[OPEN-ILS-GENERAL] How to Export Authority Records
Dan Scott
dan at coffeecode.net
Thu May 24 11:52:27 EDT 2012
On Thu, May 24, 2012 at 08:43:02AM -0400, Sharp, Chris wrote:
> Hi Arayik,
>
> I honestly don't know if this would work with a 1.4.0.4 database, but you might try using the latest version of the built-in MARC export script that comes with more recent versions of Evergreen:
>
> http://git.evergreen-ils.org/?p=Evergreen.git;a=blob_plain;f=Open-ILS/src/support-scripts/marc_export.in;h=588f8ddd7f022b7d42fbf0b4c377ba4089cf58bc;hb=HEAD
>
> I've had success using this on our 2.1.1 database.
>
> Hope that helps!
I think it will work, having taken just a quick peek at the source code.
To make Araik's life easier, I'm attaching a built version of the script
(otherwise you would need to configure && make the code to get the
defaults for /openils/conf etc in place).
Hope this helps!
Dan
-------------- next part --------------
#!/usr/bin/perl
# vim:et:sw=4:ts=4:
use strict;
use warnings;
use bytes;
use OpenSRF::System;
use OpenSRF::EX qw/:try/;
use OpenSRF::AppSession;
use OpenSRF::Utils::JSON;
use OpenSRF::Utils::SettingsClient;
use OpenILS::Application::AppUtils;
use OpenILS::Utils::Fieldmapper;
use OpenILS::Utils::CStoreEditor;
use MARC::Record;
use MARC::File::XML ( BinaryEncoding => 'UTF-8' );
use UNIVERSAL::require;
use Time::HiRes qw/time/;
use Getopt::Long;
my @formats = qw/USMARC UNIMARC XML BRE ARE/;
my $config = '/openils/conf/opensrf_core.xml';
my $format = 'USMARC';
my $encoding = 'MARC8';
my $location = '';
my $dollarsign = '$';
my $idl = 0;
my $help = undef;
my $holdings = undef;
my $timeout = 0;
my $export_mfhd = undef;
my $type = 'biblio';
my $all_records = undef;
my $replace_001 = undef;
my @library = ();
GetOptions(
'help' => \$help,
'items' => \$holdings,
'mfhd' => \$export_mfhd,
'all' => \$all_records,
'replace_001'=> \$replace_001,
'location=s' => \$location,
'money=s' => \$dollarsign,
'config=s' => \$config,
'format=s' => \$format,
'type=s' => \$type,
'xml-idl=s' => \$idl,
'encoding=s' => \$encoding,
'timeout=i' => \$timeout,
'library=s' => \@library,
);
if ($help) {
print <<"HELP";
This script exports MARC authority, bibliographic, and serial holdings
records from an Evergreen database.
Input to this script can consist of a list of record IDs, with one record ID
per line, corresponding to the record ID in the Evergreen database table of
your requested record type.
Alternately, passing the --all option will attempt to export all records of
the specified type from the Evergreen database. The --all option starts at
record ID 1 and increments the ID by 1 until the largest ID in the database
is retrieved. This may not be very efficient for databases with large gaps
in their ID sequences.
Usage: $0 [options]
--help or -h This screen.
--config or -c Configuration file [/openils/conf/opensrf_core.xml]
--format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
--encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
--xml-idl or -x Location of the IDL XML
--timeout Timeout for exporting a single record; increase if you
are using --holdings and are exporting records that
have a lot of items attached to them.
--type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
--all or -a Export all records; ignores input list
--library Export the bibliographic records that have attached
holdings for the listed library or libraries as
identified by shortname
--replace_001 Replace the 001 field value with the record ID
Additional options for type = 'BIBLIO':
--items or -i Include items (holdings) in the output
--money Currency symbol to use in item price field [\$]
--mfhd Export serial MFHD records for associated bib records
Not compatible with --format=BRE
--location or -l MARC Location Code for holdings from
http://www.loc.gov/marc/organizations/orgshome.html
Examples:
To export a set of USMARC records in a file named "output_file" based on the
IDs contained in a file named "list_of_ids":
cat list_of_ids | $0 > output_file
To export a set of MARC21XML authority records in a file named "output.xml"
for all authority records in the database:
$0 --format XML --type AUTHORITY --all > output.xml
To export a set of USMARC bibliographic records encoded in UTF-8 in a file
named "sys1_bibs.mrc" based on records which have attached callnumbers for the
libraries with the short names "BR1" and "BR2":
$0 --library BR1 --library BR2 --encoding UTF-8 > sys1_bibs.mrc
HELP
exit;
}
if ($all_records && @library) {
die('Incompatible arguments: you cannot combine a request for all ' .
'records with a request for records by library');
}
$type = lc($type);
$format = uc($format);
$encoding = uc($encoding);
binmode(STDOUT, ':raw') if ($encoding ne 'UTF-8');
binmode(STDOUT, ':utf8') if ($encoding eq 'UTF-8');
if (!grep { $format eq $_ } @formats) {
die "Please select a supported format. ".
"Right now that means one of [".
join('|', at formats). "]\n";
}
if ($format ne 'XML') {
my $type = 'MARC::File::' . $format;
$type->require;
}
if ($timeout <= 0) {
# set default timeout and/or correct silly user who
# supplied a negative timeout; default timeout of
# 300 seconds if exporting items determined empirically.
$timeout = $holdings ? 300 : 1;
}
OpenSRF::System->bootstrap_client( config_file => $config );
if (!$idl) {
$idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
}
Fieldmapper->import(IDL => $idl);
my $ses = OpenSRF::AppSession->create('open-ils.cstore');
OpenILS::Utils::CStoreEditor::init();
my $editor = OpenILS::Utils::CStoreEditor->new();
print <<HEADER if ($format eq 'XML');
<?xml version="1.0" encoding="$encoding"?>
<collection xmlns='http://www.loc.gov/MARC21/slim'>
HEADER
my %orgs;
my %shelves;
my $flesh = {};
if ($holdings) {
get_bib_locations();
}
my $start = time;
my $last_time = time;
my %count = ('bib' => 0, 'did' => 0);
my $speed = 0;
if ($all_records) {
my $top_record = 0;
if ($type eq 'biblio') {
$top_record = $editor->search_biblio_record_entry([
{deleted => 'f'},
{order_by => { 'bre' => 'id DESC' }, limit => 1}
])->[0]->id;
} elsif ($type eq 'authority') {
$top_record = $editor->search_authority_record_entry([
{deleted => 'f'},
{order_by => { 'are' => 'id DESC' }, limit => 1}
])->[0]->id;
}
for (my $i = 0; $i++ < $top_record;) {
export_record($i);
}
} elsif (@library) {
my $recids = $editor->json_query({
select => { bre => ['id'] },
from => { bre => 'acn' },
where => {
'+bre' => { deleted => 'f' },
'+acn' => {
deleted => 'f',
owning_lib => {
in => {
select => {'aou' => ['id'] },
from => 'aou',
where => { shortname => { in => \@library } }
}
}
}
},
distinct => 1,
order_by => [{
class => 'bre',
field => 'id',
direction => 'ASC'
}]
});
foreach my $record (@$recids) {
export_record($record->{id});
};
} else {
while ( my $i = <> ) {
export_record($i);
}
}
print "</collection>\n" if ($format eq 'XML');
$speed = $count{did} / (time - $start);
my $time = time - $start;
print STDERR <<DONE;
Exports Attempted : $count{bib}
Exports Completed : $count{did}
Overall Speed : $speed
Total Time Elapsed: $time seconds
DONE
sub export_record {
my $id = int(shift);
my $bib;
my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
my $s = $r->recv(timeout => $timeout);
if (!$s) {
warn "\n!!!!! Failed trying to read record $id\n";
return;
}
if ($r->failed) {
warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
return;
}
if ($r->timed_out) {
warn "\n!!!!!! Timed out trying to read record $id\n";
return;
}
$bib = $s->content;
$r->finish;
$count{bib}++;
return unless $bib;
if ($format eq 'ARE' or $format eq 'BRE') {
print OpenSRF::Utils::JSON->perl2JSON($bib);
stats();
$count{did}++;
return;
}
try {
my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
if ($type eq 'biblio') {
add_bib_holdings($bib, $r);
}
if ($replace_001) {
my $tcn = $r->field('001');
if ($tcn) {
$tcn->update($id);
} else {
my $new_001 = MARC::Field->new('001', $id);
$r->insert_fields_ordered($new_001);
}
}
if ($format eq 'XML') {
my $xml = $r->as_xml_record;
$xml =~ s/^<\?.+?\?>$//mo;
print $xml;
} elsif ($format eq 'UNIMARC') {
print $r->as_usmarc;
} elsif ($format eq 'USMARC') {
print $r->as_usmarc;
}
$count{did}++;
} otherwise {
my $e = shift;
warn "\n$e\n";
import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
};
if ($export_mfhd and $type eq 'biblio') {
my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
foreach my $mfhd (@$mfhds) {
try {
my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
if ($format eq 'XML') {
my $xml = $r->as_xml_record;
$xml =~ s/^<\?.+?\?>$//mo;
print $xml;
} elsif ($format eq 'UNIMARC') {
print $r->as_usmarc;
} elsif ($format eq 'USMARC') {
print $r->as_usmarc;
}
} otherwise {
my $e = shift;
warn "\n$e\n";
import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
};
}
}
stats() if (! ($count{bib} % 50 ));
}
sub stats {
try {
no warnings;
$speed = $count{did} / (time - $start);
my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
my $cn_speed = $count{cn} / (time - $start);
my $cp_speed = $count{cp} / (time - $start);
printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
"($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
$speed,
$speed_now,
$cn_speed,
$cp_speed;
} otherwise {};
$count{did_last} = $count{did};
$count{time_last} = time;
}
sub get_bib_locations {
print STDERR "Retrieving Org Units ... ";
my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
while (my $o = $r->recv) {
die $r->failed->stringify if ($r->failed);
$o = $o->content;
last unless ($o);
$orgs{$o->id} = $o;
}
$r->finish;
print STDERR "OK\n";
print STDERR "Retrieving Shelving locations ... ";
$r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
while (my $s = $r->recv) {
die $r->failed->stringify if ($r->failed);
$s = $s->content;
last unless ($s);
$shelves{$s->id} = $s;
}
$r->finish;
print STDERR "OK\n";
$flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
}
sub add_bib_holdings {
my $bib = shift;
my $r = shift;
my $cn_list = $bib->call_numbers;
if ($cn_list && @$cn_list) {
$count{cn} += @$cn_list;
my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
if ($cp_list && @$cp_list) {
my %cn_map;
push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
for my $cn ( @$cn_list ) {
my $cn_map_list = $cn_map{$cn->id};
for my $cp ( @$cn_map_list ) {
$count{cp}++;
$r->insert_grouped_field( MARC::Field->new( '852', '4', ' ',
($location ? ( 'a' => $location ) : ()),
b => $orgs{$cn->owning_lib}->shortname,
b => $orgs{$cp->circ_lib}->shortname,
c => $shelves{$cp->location}->name,
j => $cn->label,
($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
p => $cp->barcode,
($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
($cp->copy_number ? ( t => $cp->copy_number ) : ()),
($cp->ref eq 't' ? ( x => 'reference' ) : ()),
($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
)
);
stats() if (! ($count{cp} % 100 ));
}
}
}
}
}
More information about the Open-ils-general
mailing list