[open-ils-commits] r20009 - in trunk: . Open-ILS/src Open-ILS/src/extras/import (gmc)
svn at svn.open-ils.org
svn at svn.open-ils.org
Wed Apr 6 18:06:46 EDT 2011
Author: gmc
Date: 2011-04-06 18:06:40 -0400 (Wed, 06 Apr 2011)
New Revision: 20009
Removed:
trunk/Open-ILS/src/extras/import/marc2are.pl.in
trunk/Open-ILS/src/extras/import/marc2bre.pl.in
trunk/Open-ILS/src/extras/import/marc2sre.pl.in
trunk/Open-ILS/src/extras/import/parallel_pg_loader.pl.in
Modified:
trunk/Open-ILS/src/Makefile.am
trunk/Open-ILS/src/extras/import/marc2are.pl
trunk/Open-ILS/src/extras/import/marc2bre.pl
trunk/Open-ILS/src/extras/import/parallel_pg_loader.pl
trunk/configure.ac
Log:
Revert "install command-line MARC import tools in @prefix@/bin"
This reverts commit 7125758576667feeff054ceb4b79d7d1510bbc6f.
Modified: trunk/Open-ILS/src/Makefile.am
===================================================================
--- trunk/Open-ILS/src/Makefile.am 2011-04-06 21:49:06 UTC (rev 20008)
+++ trunk/Open-ILS/src/Makefile.am 2011-04-06 22:06:40 UTC (rev 20009)
@@ -138,7 +138,7 @@
OILSJAVA_DIR = java
endif
-bin_SCRIPTS = $(core_scripts) $(reporter_scripts) $(installautojs) @srcdir@/extras/eg_config @srcdir@/extras/fast-extract @srcdir@/extras/import/marc2are.pl @srcdir@/extras/import/marc2bre.pl @srcdir@/extras/import/marc2sre.pl @srcdir@/extras/import/parallel_pg_loader.pl
+bin_SCRIPTS = $(core_scripts) $(reporter_scripts) $(installautojs) @srcdir@/extras/eg_config @srcdir@/extras/fast-extract
data_DATA = $(core_data) $(reporter_data)
# Take care of which subdirectories to build, and which extra files to include in a distribution.
Modified: trunk/Open-ILS/src/extras/import/marc2are.pl
===================================================================
--- trunk/Open-ILS/src/extras/import/marc2are.pl 2011-04-06 21:49:06 UTC (rev 20008)
+++ trunk/Open-ILS/src/extras/import/marc2are.pl 2011-04-06 22:06:40 UTC (rev 20009)
@@ -2,6 +2,8 @@
use strict;
use warnings;
+use lib '/openils/lib/perl5/';
+
use OpenSRF::System;
use OpenSRF::Application;
use OpenSRF::EX qw/:try/;
Deleted: trunk/Open-ILS/src/extras/import/marc2are.pl.in
===================================================================
--- trunk/Open-ILS/src/extras/import/marc2are.pl.in 2011-04-06 21:49:06 UTC (rev 20008)
+++ trunk/Open-ILS/src/extras/import/marc2are.pl.in 2011-04-06 22:06:40 UTC (rev 20009)
@@ -1,119 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-
-use OpenSRF::System;
-use OpenSRF::Application;
-use OpenSRF::EX qw/:try/;
-use OpenSRF::AppSession;
-use OpenSRF::MultiSession;
-use OpenSRF::Utils::SettingsClient;
-use OpenILS::Application::AppUtils;
-use OpenILS::Utils::Fieldmapper;
-use Digest::MD5 qw/md5_hex/;
-use OpenSRF::Utils::JSON;
-use Data::Dumper;
-use Unicode::Normalize;
-
-use Time::HiRes qw/time/;
-use Getopt::Long;
-use MARC::Batch;
-use MARC::File::XML ( BinaryEncoding => 'utf-8' );
-use MARC::Charset;
-
-MARC::Charset->ignore_errors(1);
-
-my ($count, $user, $password, $config, $marctype, $keyfile, @files, $quiet) =
- (1, 'admin', 'open-ils', '@sysconfdir@/opensrf_core.xml', 'USMARC');
-
-GetOptions(
- 'startid=i' => \$count,
- 'user=s' => \$user,
- 'marctype=s' => \$marctype,
- 'password=s' => \$password,
- 'config=s' => \$config,
- 'file=s' => \@files,
- 'quiet' => \$quiet,
-);
-
- at files = @ARGV if (!@files);
-
-my @ses;
-my @req;
-my %processing_cache;
-
-OpenSRF::System->bootstrap_client( config_file => $config );
-Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
-
-$user = OpenILS::Application::AppUtils->check_user_session( login($user,$password) )->id;
-
-select STDERR; $| = 1;
-select STDOUT; $| = 1;
-
-my $batch = new MARC::Batch ( $marctype, @files );
-$batch->strict_off();
-$batch->warnings_off();
-
-my $starttime = time;
-my $rec;
-while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
- next if ($rec == -1);
- my $id = $count;
-
- (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
- $xml =~ s/^<\?xml.+\?\s*>//go;
- $xml =~ s/>\s+</></go;
- $xml =~ s/\p{Cc}//go;
- $xml = OpenILS::Application::AppUtils->entityize($xml);
- $xml =~ s/[\x00-\x1f]//go;
-
- my $bib = new Fieldmapper::authority::record_entry;
- $bib->id($id);
- $bib->active('t');
- $bib->deleted('f');
- $bib->marc($xml);
- $bib->creator($user);
- $bib->create_date('now');
- $bib->editor($user);
- $bib->edit_date('now');
- $bib->last_xact_id('IMPORT-'.$starttime);
-
- print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
-
- $count++;
-
- if (!$quiet && !($count % 20)) {
- print STDERR "\r$count\t". $count / (time - $starttime);
- }
-}
-
-sub login {
- my( $username, $password, $type ) = @_;
-
- $type |= "staff";
-
- my $seed = OpenILS::Application::AppUtils->simplereq(
- 'open-ils.auth',
- 'open-ils.auth.authenticate.init',
- $username
- );
-
- die("No auth seed. Couldn't talk to the auth server") unless $seed;
-
- my $response = OpenILS::Application::AppUtils->simplereq(
- 'open-ils.auth',
- 'open-ils.auth.authenticate.complete',
- { username => $username,
- password => md5_hex($seed . md5_hex($password)),
- type => $type });
-
- die("No auth response returned on login.") unless $response;
-
- my $authtime = $response->{payload}->{authtime};
- my $authtoken = $response->{payload}->{authtoken};
-
- die("Login failed for user $username!") unless $authtoken;
-
- return $authtoken;
-}
-
Modified: trunk/Open-ILS/src/extras/import/marc2bre.pl
===================================================================
--- trunk/Open-ILS/src/extras/import/marc2bre.pl 2011-04-06 21:49:06 UTC (rev 20008)
+++ trunk/Open-ILS/src/extras/import/marc2bre.pl 2011-04-06 22:06:40 UTC (rev 20009)
@@ -2,6 +2,8 @@
use strict;
use warnings;
+#use lib '/openils/lib/perl5/';
+
use Error qw/:try/;
use OpenILS::Utils::Fieldmapper;
use Digest::MD5 qw/md5_hex/;
Deleted: trunk/Open-ILS/src/extras/import/marc2bre.pl.in
===================================================================
--- trunk/Open-ILS/src/extras/import/marc2bre.pl.in 2011-04-06 21:49:06 UTC (rev 20008)
+++ trunk/Open-ILS/src/extras/import/marc2bre.pl.in 2011-04-06 22:06:40 UTC (rev 20009)
@@ -1,396 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-
-use Error qw/:try/;
-use OpenILS::Utils::Fieldmapper;
-use Digest::MD5 qw/md5_hex/;
-use OpenSRF::Utils::JSON;
-use OpenILS::Application::AppUtils;
-use Data::Dumper;
-use Unicode::Normalize;
-use Encode;
-
-use FileHandle;
-use Time::HiRes qw/time/;
-use Getopt::Long;
-use MARC::Batch;
-use MARC::File::XML ( BinaryEncoding => 'utf-8' );
-use MARC::Charset;
-use DBI;
-
-#MARC::Charset->ignore_errors(1);
-
-my ($id_field, $id_subfield, $recid, $user, $config, $idlfile, $marctype, $tcn_offset, $tcn_mapfile, $tcn_dumpfile, $used_id_file, $used_tcn_file, $enc, @files, @trash_fields, @req_fields, $use901, $quiet, $tcn_field, $tcn_subfield) =
- ('', 'a', 0, 1, '@sysconfdir@/opensrf_core.xml', '@sysconfdir@/fm_IDL.xml', 'USMARC', 0);
-
-my ($db_driver, $db_host, $db_port, $db_name, $db_user, $db_pw) =
- ('Pg', 'localhost', 5432, 'evergreen', 'postgres', 'postgres');
-
-GetOptions(
- 'marctype=s' => \$marctype, # format of MARC files being processed defaults to USMARC, often set to XML
- 'startid=i' => \$recid, # id number to start with when auto-assigning id numbers, defaults to highest id in database + 1
- 'idfield=s' => \$id_field, # field containing the record's desired internal id, NOT tcn
- 'idsubfield=s' => \$id_subfield, # subfield of above record id field
- 'tcnfield=s' => \$tcn_field, # field containing the record's desired tcn, NOT the internal id
- 'tcnsubfield=s' => \$tcn_subfield, # subfield of above record tcn field
- 'tcnoffset=i' => \$tcn_offset, # optionally skip characters at beginning of supplied tcn (e.g. to remove '(Sirsi)')
- 'user=s' => \$user, # set creator/editor values for records in database
- 'encoding=s' => \$enc, # set assumed MARC encoding for MARC::Charset
- 'keyfile=s' => \$tcn_mapfile, # DEPRECATED, use tcn_mapfile instead
- 'tcn_mapfile=s' => \$tcn_mapfile, # external file which allows for matching specific record tcns to specific record ids, format = one id_number|tcn_number combo per line
- 'tcnfile=s' => \$tcn_dumpfile, # DEPRECATED, use tcn_dumpfile instead
- 'tcn_dumpfile=s' => \$tcn_dumpfile, # allows specification of a dumpfile for all used tcn values
- 'config=s' => \$config, # location of OpenSRF core config file, defaults to @sysconfdir@/opensrf_core.xml
- 'file=s' => \@files, # files to process (or you can simple list the files as unnamed arguments, i.e. @ARGV)
- 'required_fields=s' => \@req_fields, # skip any records missing these fields
- 'trash=s' => \@trash_fields, # fields to remove from all processed records
- 'xml_idl=s' => \$idlfile, # location of XML IDL file, defaults to @sysconfdir@/fm_IDL.xml
- 'dontuse=s' => \$used_id_file, # DEPRECATED, use used_id_file instead
- 'used_id_file=s' => \$used_id_file, # external file which prevents id collisions by specifying ids already in use in the database, format = one id number per line
- 'used_tcn_file=s' => \$used_tcn_file, # external file which prevents tcn collisions by specifying tcns already in use in the database, format = one tcn number per line
- "db_driver=s" => \$db_driver, # database driver type, usually 'Pg'
- "db_host=s" => \$db_host, # database hostname
- "db_port=i" => \$db_port, # database port
- "db_name=s" => \$db_name, # database name
- "db_user=s" => \$db_user, # database username
- "db_pw=s" => \$db_pw, # database password
- 'use901' => \$use901, # use values from previously created 901 fields and skip all other processing
- 'quiet' => \$quiet # do not output progress count
-);
-
- at trash_fields = split(/,/,join(',', at trash_fields));
- at req_fields = split(/,/,join(',', at req_fields));
-
-if ($enc) {
- MARC::Charset->ignore_errors(1);
- MARC::Charset->assume_encoding($enc);
-}
-
-if (uc($marctype) eq 'XML') {
- 'open'->use(':utf8');
-} else {
- bytes->use();
-}
-
- at files = @ARGV if (!@files);
-
-my @ses;
-my @req;
-my %processing_cache;
-
-my $dsn = "dbi:$db_driver:host=$db_host;port=$db_port;dbname=$db_name";
-
-if (!$recid) {
- my $table = 'biblio_record_entry';
- $table = 'biblio.record_entry' if ($db_driver eq 'Pg');
-
- my $dbh = DBI->connect($dsn,$db_user,$db_pw);
- my $sth = $dbh->prepare("SELECT MAX(id) + 1 FROM $table");
-
- $sth->execute;
- $sth->bind_col(1, \$recid);
- $sth->fetch;
- $sth->finish;
- $dbh->disconnect;
-
- # In a clean Evergreen schema, the maximum ID will be -1; but sequences
- # have to start at 1, so handle the clean Evergreen schema situation
- if ($recid == 0) {
- $recid = 1;
- }
-}
-
-my %tcn_source_map = (
- a => 'Sirsi_Auto',
- o => 'OCLC',
- i => 'ISxN',
- l => 'LCCN',
- s => 'System',
- g => 'Gutenberg',
- z => 'Unknown',
-);
-
-Fieldmapper->import(IDL => $idlfile);
-
-my %tcn_map;
-if ($tcn_mapfile) {
- open F, $tcn_mapfile or die "Couldn't open key file $tcn_mapfile";
- while (<F>) {
- if ( /^(\d+)\|(\S+)/o ) {
- $tcn_map{$1} = $2;
- }
- }
- close(F);
-}
-
-my %used_recids;
-if ($used_id_file) {
- open F, $used_id_file or die "Couldn't open used-id file $used_id_file";
- while (<F>) {
- chomp;
- s/^\s*//;
- s/\s*$//;
- $used_recids{$_} = 1;
- }
- close(F);
-}
-
-my %used_tcns;
-if ($used_tcn_file) {
- open F, $used_tcn_file or die "Couldn't open used-tcn file $used_tcn_file";
- while (<F>) {
- chomp;
- s/^\s*//;
- s/\s*$//;
- $used_tcns{$_} = 1;
- }
- close(F);
-}
-
-select STDERR; $| = 1;
-select STDOUT; $| = 1;
-
-my $batch = new MARC::Batch ( $marctype, @files );
-$batch->strict_off();
-$batch->warnings_off();
-
-my $starttime = time;
-my $rec;
-my $count = 0;
-PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
- next if ($rec == -1);
-
- $count++;
-
- # Skip records that don't contain a required field (like '245', for example)
- foreach my $req_field (@req_fields) {
- if (!$rec->field("$req_field")) {
- warn "\n!!! Record $count missing required field $req_field, skipping record.\n";
- next PROCESS;
- }
- }
-
- my $id;
- my $tcn_value = '';
- my $tcn_source = '';
- # If $use901 is set, use it for the id, the tcn, and the tcn source without ANY further processing (i.e. no error checking)
- if ($use901) {
- $rec->delete_field($_) for ($rec->field(@trash_fields));
- $tcn_value = $rec->subfield('901' => 'a');
- $tcn_source = $rec->subfield('901' => 'b');
- $id = $rec->subfield('901' => 'c');
- } else {
- # This section of code deals with the record's 'id', which is a system-level, numeric, internal identifier
- # It is often convenient but not necessary to carry over the internal ids from your previous ILS, so here is where that happens
- if ($id_field) {
- my $field = $rec->field($id_field);
- if ($field) {
- if ($field->is_control_field) {
- $id = $field->data;
- } else {
- $id = $field->subfield($id_subfield);
- }
- # ensure internal record ids are numeric only
- $id =~ s/\D+//gso if $id;
- }
-
- # catch problem ids
- if (!$id) {
- warn "\n!!! Record $count has missing or invalid id field $id_field, assigning new id.\n";
- $id = '';
- } elsif (exists $used_recids{$id}) {
- warn "\n!!! Record $count has a duplicate id in field $id_field, assigning new id.\n";
- $id = '';
- } else {
- $used_recids{$id} = 1;
- }
- }
-
- # id field not specified or found to be invalid, assign auto id
- if (!$id) {
- while (exists $used_recids{$recid}) {
- $recid++;
- }
- $used_recids{$recid} = 1;
- $id = $recid;
- $recid++;
- }
-
- # This section of code deals with the record's 'tcn', or title control number, which is a record-level, possibly alpha-numeric, sometimes user-supplied value
- if ($tcn_field) {
- if ($tcn_mapfile) {
- if (my $tcn = $tcn_map{$id}) {
- $rec->delete_field( $_ ) for ($rec->field($tcn_field));
- $rec->append_fields( MARC::Field->new( $tcn_field, '', '', $tcn_subfield, $tcn ) );
- } else {
- warn "\n!!! ID $id not found in tcn_mapfile, skipping record.\n";
- $count++;
- next;
- }
- }
-
- my $field = $rec->field($tcn_field);
- if ($field) {
- if ($field->is_control_field) {
- $tcn_value = $field->data;
- } else {
- $tcn_value = $field->subfield($tcn_subfield);
- }
- # $tcn_offset is another Sirsi influence, as it will allow you to remove '(Sirsi)'
- # from exported tcns, but was added more generically to perhaps support other use cases
- if ($tcn_value) {
- $tcn_value = substr($tcn_value, $tcn_offset);
- } else {
- $tcn_value = '';
- }
- }
- }
-
- # turn our id and tcn into a 901 field, and also create a tcn and/or figure out the tcn source
- ($tcn_value, $tcn_source) = preprocess($rec, $tcn_value, $id);
- # delete the old identifier and trash fields
- $rec->delete_field($_) for ($rec->field('901', $tcn_field, $id_field, @trash_fields));
- }
-
- (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
- $xml =~ s/^<\?xml.+\?\s*>//go;
- $xml =~ s/>\s+</></go;
- $xml =~ s/\p{Cc}//go;
- $xml = OpenILS::Application::AppUtils->entityize($xml);
- $xml =~ s/[\x00-\x1f]//go;
-
- my $bib = new Fieldmapper::biblio::record_entry;
- $bib->id($id);
- $bib->active('t');
- $bib->deleted('f');
- $bib->marc($xml);
- $bib->creator($user);
- $bib->create_date('now');
- $bib->editor($user);
- $bib->edit_date('now');
- $bib->tcn_source($tcn_source);
- $bib->tcn_value($tcn_value);
- $bib->last_xact_id('IMPORT-'.$starttime);
-
- print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
- $used_tcns{$tcn_value} = 1;
-
- if (!$quiet && !($count % 50)) {
- print STDERR "\r$count\t". $count / (time - $starttime);
- }
-}
-
-if ($tcn_dumpfile) {
- open TCN_DUMPFILE, '>', $tcn_dumpfile;
- print TCN_DUMPFILE "$_\n" for (keys %used_tcns);
-}
-
-
-sub preprocess {
- my $rec = shift;
- my $tcn_value = shift;
- my $id = shift;
-
- my $tcn_source = '';
- # in the following code, $tcn_number represents the portion of the tcn following the source code-letter
- my $tcn_number = '';
- my $warn = 0;
- my $passed_tcn = '';
-
- # this preprocess subroutine is optimized for Sirsi-created tcns, that is, those with a single letter
- # followed by some digits (and maybe 'x' in older systems). If using user supplied tcns, try to identify
- # the source here, otherwise set to 'z' ('Unknown')
- if ($tcn_value =~ /([a-z])([0-9xX]+)/) {
- $tcn_source = $1;
- $tcn_number = $2;
- } else {
- $tcn_source = 'z';
- }
-
- # save and warn if a passed in TCN is replaced
- if ($tcn_value && exists $used_tcns{$tcn_value}) {
- $passed_tcn = $tcn_value;
- $tcn_value = '';
- $tcn_number = '';
- $tcn_source = '';
- $warn = 1;
- }
-
- # we didn't have a user supplied tcn, or it was a duplicate, so let's derive one from commonly unique record fields
- if (!$tcn_value) {
- my $f = $rec->field('001');
- $tcn_value = despace($f->data) if ($f);
- }
-
- if (!$tcn_value || exists $used_tcns{$tcn_value}) {
- my $f = $rec->field('000');
- if ($f) {
- $tcn_number = despace($f->data);
- $tcn_source = 'g'; # only Project Gutenberg seems to use this
- $tcn_value = $tcn_source.$tcn_number;
- }
- }
-
- if (!$tcn_value || exists $used_tcns{$tcn_value}) {
- my $f = $rec->field('020');
- if ($f) {
- $tcn_number = despace($f->subfield('a'));
- $tcn_source = 'i';
- $tcn_value = $tcn_source.$tcn_number;
- }
- }
-
- if (!$tcn_value || exists $used_tcns{$tcn_value}) {
- my $f = $rec->field('022');
- if ($f) {
- $tcn_number = despace($f->subfield('a'));
- $tcn_source = 'i';
- $tcn_value = $tcn_source.$tcn_number;
- }
- }
-
- if (!$tcn_value || exists $used_tcns{$tcn_value}) {
- my $f = $rec->field('010');
- if ($f) {
- $tcn_number = despace($f->subfield('a'));
- $tcn_source = 'l';
- $tcn_value = $tcn_source.$tcn_number;
- }
- }
-
- # special case to catch possibly passed in full OCLC numbers and those derived from the 001 field
- if ($tcn_value =~ /^oc(m|n)(\d+)$/o) {
- $tcn_source = 'o';
- $tcn_number = $2;
- $tcn_value = $tcn_source.$tcn_number;
- }
-
- if (!$tcn_value || exists $used_tcns{$tcn_value}) {
- $tcn_source = 's';
- $tcn_number = $id;
- $tcn_value = $tcn_source.$tcn_number;
- $warn = 1
- }
-
-
- # expand $tcn_source from code letter to full name
- $tcn_source = do { $tcn_source_map{$tcn_source} || 'Unknown' };
-
- if ($warn) {
- warn "\n!!! TCN $passed_tcn is already in use, using TCN ($tcn_value) derived from $tcn_source ID.\n";
- }
-
- return ($tcn_value, $tcn_source);
-}
-
-sub despace {
- my $value = shift;
-
- # remove all leading/trailing spaces and trucate at first internal space if present
- $value =~ s/\s*$//o;
- $value =~ s/^\s*//o;
- $value =~ s/^(\S+).*$/$1/o;
-
- return $value;
-}
Deleted: trunk/Open-ILS/src/extras/import/marc2sre.pl.in
===================================================================
--- trunk/Open-ILS/src/extras/import/marc2sre.pl.in 2011-04-06 21:49:06 UTC (rev 20008)
+++ trunk/Open-ILS/src/extras/import/marc2sre.pl.in 2011-04-06 22:06:40 UTC (rev 20009)
@@ -1,323 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-
-use OpenSRF::System;
-use OpenSRF::EX qw/:try/;
-use OpenSRF::Utils::SettingsClient;
-use OpenILS::Application::AppUtils;
-use OpenILS::Event;
-use OpenILS::Utils::Fieldmapper;
-use OpenSRF::Utils::JSON;
-use Unicode::Normalize;
-
-use Time::HiRes qw/time/;
-use Getopt::Long;
-use MARC::Batch;
-use MARC::File::XML ( BinaryEncoding => 'utf-8' );
-use MARC::Charset;
-use Pod::Usage;
-
-MARC::Charset->ignore_errors(1);
-
-# Command line options, with applicable defaults
-my ($idsubfield, $bibfield, $bibsubfield, @files, $libmap, $quiet, $help);
-my $idfield = '004';
-my $count = 1;
-my $user = 'admin';
-my $config = '@sysconfdir@/opensrf_core.xml';
-my $marctype = 'USMARC';
-
-my $parse_options = GetOptions(
- 'idfield=s' => \$idfield,
- 'idsubfield=s' => \$idsubfield,
- 'bibfield=s'=> \$bibfield,
- 'bibsubfield=s'=> \$bibsubfield,
- 'startid=i'=> \$count,
- 'user=s' => \$user,
- 'config=s' => \$config,
- 'marctype=s' => \$marctype,
- 'file=s' => \@files,
- 'libmap=s' => \$libmap,
- 'quiet' => \$quiet,
- 'help' => \$help,
-);
-
-if (!$parse_options or $help) {
- pod2usage(0);
-}
-
- at files = @ARGV if (!@files);
-
-my $U = 'OpenILS::Application::AppUtils';
-my @ses;
-my @req;
-my %processing_cache;
-my $lib_id_map;
-if ($libmap) {
- $lib_id_map = map_libraries_to_ID($libmap);
-}
-
-OpenSRF::System->bootstrap_client( config_file => $config );
-Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
-
-my ($result, $evt) = get_user_id($user);
-if ($evt || !$result->id) {
- print("Could not retrieve user with user name '$user'\n");
- exit(0);
-}
-
-$user = $result->id;
-
-select STDERR; $| = 1;
-select STDOUT; $| = 1;
-
-my $batch = new MARC::Batch ( $marctype, @files );
-$batch->strict_off();
-$batch->warnings_off();
-
-my $starttime = time;
-my $rec;
-while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
- next if ($rec == -1);
- my $id = $count;
- my $record_field;
- if ($idsubfield) {
- $record_field = $rec->field($idfield, $idsubfield);
- } else {
- $record_field = $rec->field($idfield);
- }
- my $record = $count;
-
- if ($record_field) {
- $record = $record_field->data;
- }
-
- # If we have been given bibfield / bibsubfield values, use those to find
- # a matching bib record for $record and use _that_ as our record instead
- if ($bibfield) {
- my ($result, $evt) = map_id_to_bib($record);
- if ($evt || !$result->record) {
- print("Could not find matching bibliographic record for $record\n");
- }
- $record = $result->record;
- } else {
- # Strip the identifier down to a usable integer
- $record =~ s/^.*?(\d+).*?$/$1/o;
- }
-
- (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
- $xml =~ s/^<\?xml.+\?\s*>//go;
- $xml =~ s/>\s+</></go;
- $xml =~ s/\p{Cc}//go;
- $xml = OpenILS::Application::AppUtils->entityize($xml);
- $xml =~ s/[\x00-\x1f]//go;
-
- my $bib = new Fieldmapper::serial::record_entry;
- $bib->id($id);
- $bib->record($record);
- $bib->active('t');
- $bib->deleted('f');
- $bib->marc($xml);
- $bib->creator($user);
- $bib->create_date('now');
- $bib->editor($user);
- $bib->edit_date('now');
- $bib->last_xact_id('IMPORT-'.$starttime);
-
- if ($libmap) {
- my $lib_id = get_library_id($rec);
- if ($lib_id) {
- $bib->owning_lib($lib_id);
- }
- }
-
- print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
-
- $count++;
-
- if (!$quiet && !($count % 20)) {
- print STDERR "\r$count\t". $count / (time - $starttime);
- }
-}
-
-# Generate a hash of library names (as found in the 852b in the MFHD record) to
-# integers representing actor.org_unit ID values
-sub map_libraries_to_ID {
- my $map_filename = shift;
-
- my %lib_id_map;
-
- open(MAP_FH, '<', $map_filename) or die "Could not load [$map_filename] $!";
- while (<MAP_FH>) {
- my ($lib, $id) = $_ =~ /^(.*?)\t(.*?)$/;
- $lib_id_map{$lib} = $id;
- }
-
- return \%lib_id_map;
-}
-
-# Look up the actor.org_unit.id value for this library name
-sub get_library_id {
- my $record = shift;
-
- my $lib_name = $record->field('852')->subfield('b');
- my $lib_id = $lib_id_map->{$lib_name};
-
- return $lib_id;
-}
-
-# Get the actor.usr.id value for the given username
-sub get_user_id {
- my $username = shift;
-
- my ($result, $evt);
-
- $result = $U->cstorereq(
- 'open-ils.cstore.direct.actor.user.search',
- { usrname => $username, deleted => 'f' }
- );
- $evt = OpenILS::Event->new('ACTOR_USR_NOT_FOUND') unless $result;
-
- return ($result, $evt);
-}
-
-# Get the biblio.record_entry.id value for the given identifier; note that this
-# approach uses a wildcard to match anything that precedes the identifier value
-sub map_id_to_bib {
- my $record = shift;
-
- my ($result, $evt);
-
- my %search = (
- tag => $bibfield,
- value => { ilike => '%' . $record }
- );
-
- if ($bibsubfield) {
- $search{'subfield'} = $bibsubfield;
- }
-
- $result = $U->cstorereq(
- 'open-ils.cstore.direct.metabib.full_rec.search', \%search
- );
- $evt = OpenILS::Event->new('METABIB_FULL_REC_NOT_FOUND') unless $record;
-
- return ($result, $evt);
-}
-
-__END__
-
-=head1 NAME
-
-marc2sre.pl - Convert MARC Format for Holdings Data (MFHD) records to SRE
-(serial.record_entry) JSON objects
-
-=head1 SYNOPSIS
-
-C<marc2sre.pl> [B<--config>=I<opensrf_core.conf>]
-[[B<--idfield>=I<MARC-tag>[ B<--idsubfield>=I<MARC-code>]] [B<--start_id>=I<start-ID>]
-[B<--user>=I<db-username>] [B<--marctype>=I<fileformat>]
-[[B<--file>=I<MARC-filename>[, ...]] [B<--libmap>=I<map-file>] [B<--quiet>=I<quiet>]
-[[B<--bibfield>=I<MARC-tag> [B<--bibsubfield>=<MARC-code>]]
-
-=head1 DESCRIPTION
-
-For one or more files containing MFHD records, iterate through the records
-and generate SRE (serial.record_entry) JSON objects.
-
-=head1 OPTIONS
-
-=over
-
-=item * B<-c> I<config-file>, B<--config>=I<config-file>
-
-Specifies the OpenSRF configuration file used to connect to the OpenSRF router.
-Defaults to F<@sysconfdir@/opensrf_core.xml>
-
-=item * B<--idfield> I<MARC-field>
-
-Specifies the MFHD field where the identifier of the corresponding
-bibliographic record is found. Defaults to '004'.
-
-=item * B<--idsubfield> I<MARC-code>
-
-Specifies the MFHD subfield, if any, where the identifier of the corresponding
-bibliographic record is found. This option is ignored unless it is accompanied
-by the B<--idfield> option. Defaults to null.
-
-=item * B<--bibfield> I<MARC-field>
-
-Specifies the field in the bibliographic record that holds the identifier
-value. Defaults to null.
-
-=item * B<--bibsubfield> I<MARC-code>
-
-Specifies the subfield in the bibliographic record, if any, that holds the
-identifier value. This option is ignored unless it is accompanied by the
-B<--bibfield> option. Defaults to null.
-
-=item * B<-u> I<username>, B<--user>=I<username>
-
-Specifies the Evergreen user that will own these serial records.
-
-=item * B<-m> I<file-format>, B<--marctype>=I<file-format>
-
-Specifies whether the files containg the MFHD records are in MARC21 ('MARC21')
-or MARC21XML ('XML') format. Defaults to MARC21.
-
-=item * B<-l> I<map-file>, B<--libmap>=I<map-file>
-
-Points to a file to containing a mapping of library names to integers.
-The integer represents the actor.org_unit.id value of the library. This enables
-us to generate an ingest file that does not subsequently need to manually
-manipulated.
-
-The library name must correspond to the 'b' subfield of the 852 field.
-Well, it does not have to, but you will have to modify this script
-accordingly.
-
-The format of the map file should be the name of the library, followed
-by a tab, followed by the desired numeric ID of the library. For example:
-
-BR1 4
-BR2 5
-
-=item * B<-q>, B<--quiet>
-
-Suppresses the record counter output.
-
-=back
-
-=head1 EXAMPLES
-
- marc2sre.pl --idfield 004 --bibfield 035 --bibsubfield a --user cat1 serial_holding.xml
-
-Processes MFHD records in the B<serial_holding.xml> file. The script pulls the
-bibliographic record identifier from the 004 control field of the MFHD record
-and searches for a matching value in the bibliographic record in data field
-035, subfield a. The "cat1" user will own the processed MFHD records.
-
-=head1 AUTHOR
-
-Dan Scott <dscott at laurentian.ca>
-
-=head1 COPYRIGHT AND LICENSE
-
-Copyright 2010-2011 by Dan Scott
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-=cut
Modified: trunk/Open-ILS/src/extras/import/parallel_pg_loader.pl
===================================================================
--- trunk/Open-ILS/src/extras/import/parallel_pg_loader.pl 2011-04-06 21:49:06 UTC (rev 20008)
+++ trunk/Open-ILS/src/extras/import/parallel_pg_loader.pl 2011-04-06 22:06:40 UTC (rev 20009)
@@ -2,6 +2,8 @@
use strict;
use warnings;
+use lib '/openils/lib/perl5/';
+
use OpenSRF::System;
use OpenSRF::EX qw/:try/;
use OpenSRF::Utils::SettingsClient;
Deleted: trunk/Open-ILS/src/extras/import/parallel_pg_loader.pl.in
===================================================================
--- trunk/Open-ILS/src/extras/import/parallel_pg_loader.pl.in 2011-04-06 21:49:06 UTC (rev 20008)
+++ trunk/Open-ILS/src/extras/import/parallel_pg_loader.pl.in 2011-04-06 22:06:40 UTC (rev 20009)
@@ -1,136 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-
-use OpenSRF::System;
-use OpenSRF::EX qw/:try/;
-use OpenSRF::Utils::SettingsClient;
-use OpenILS::Utils::Fieldmapper;
-use OpenSRF::Utils::JSON;
-use FileHandle;
-
-use Time::HiRes qw/time/;
-use Getopt::Long;
-
-my @files;
-my ($config, $output, @auto, @order, @wipe) =
- ('@sysconfdir@/opensrf_core.xml', 'pg_loader-output');
-my $nocommit = 0;
-
-GetOptions(
- 'config=s' => \$config,
- 'output=s' => \$output,
- 'wipe=s' => \@wipe,
- 'autoprimary=s' => \@auto,
- 'order=s' => \@order,
- 'nocommit=i' => \$nocommit,
-);
-
-my $pwd = `pwd`;
-chop($pwd);
-
-my %lineset;
-my %fieldcache;
-
-OpenSRF::System->bootstrap_client( config_file => $config );
-Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
-
-my $main_out = FileHandle->new(">$output.sql") if ($output);
-
-binmode($main_out,'utf8');
-
-$main_out->print("SET CLIENT_ENCODING TO 'UNICODE';\n\n");
-$main_out->print("BEGIN;\n\n");
-
-my %out_files;
-for my $h (@order) {
- $out_files{$h} = FileHandle->new(">$output.$h.sql");
- binmode($out_files{$h},'utf8');
-}
-
-my $count = 0;
-my $starttime = time;
-my $after_commit = '';
-while ( my $rec = <> ) {
- next unless ($rec);
-
- my $row;
- try {
- $row = OpenSRF::Utils::JSON->JSON2perl($rec);
- } catch Error with {
- my $e = shift;
- warn "\n\n !!! Error : $e \n\n at or around line $count\n";
- };
- next unless ($row);
-
- my $class = $row->class_name;
- my $hint = $row->json_hint;
-
- next unless ( grep /$hint/, @order );
-
- if (!$fieldcache{$hint}) {
- my @cols = $row->real_fields;
- if (grep { $_ eq $hint} @auto) {
- @cols = grep { $_ ne $class->Identity } @cols;
- }
-
- $fieldcache{$hint} =
- { table => $class->Table,
- sequence => $class->Sequence,
- pkey => $class->Identity,
- fields => \@cols,
- };
-
- #XXX it burnnnsssessss
- $fieldcache{$hint}{table} =~ s/\.full_rec/.real_full_rec/o if ($hint eq 'mfr');
-
- my $fields = join(',', @{ $fieldcache{$hint}{fields} });
- $main_out->print( "DELETE FROM $fieldcache{$hint}{table};\n" ) if (grep {$_ eq $hint } @wipe);
- # Speed up loading of bib records
- $main_out->print( "COPY $fieldcache{$hint}{table} ($fields) FROM '$pwd/$output.$hint.sql';\n" );
-
- }
-
- my $line = [map { $row->$_ } @{ $fieldcache{$hint}{fields} }];
- my @data;
- my $x = 0;
- for my $d (@$line) {
- if (!defined($d)) {
- $d = '\N';
- } else {
- $d =~ s/\f/\\f/gos;
- $d =~ s/\n/\\n/gos;
- $d =~ s/\r/\\r/gos;
- $d =~ s/\t/\\t/gos;
- $d =~ s/\\/\\\\/gos;
- }
- if ($hint eq 'bre' and $fieldcache{$hint}{fields}[$x] eq 'quality') {
- $d = int($d) if ($d ne '\N');
- }
- push @data, $d;
- $x++;
- }
- $out_files{$hint}->print( join("\t", @data)."\n" );
-
- if (!($count % 500)) {
- print STDERR "\r$count\t". $count / (time - $starttime);
- }
-
- $count++;
-}
-
-for my $hint (@order) {
- next if (grep { $_ eq $hint} @auto);
- next unless ($fieldcache{$hint}{sequence});
- $after_commit .= "SELECT setval('$fieldcache{$hint}{sequence}'::TEXT, (SELECT MAX($fieldcache{$hint}{pkey}) FROM $fieldcache{$hint}{table}), TRUE);\n";
-}
-
-if (grep /^mfr$/, %out_files) {
- $main_out->print("SELECT reporter.enable_materialized_simple_record_trigger();\n");
- $main_out->print("SELECT reporter.disable_materialized_simple_record_trigger();\n");
-}
-
-$main_out->print("COMMIT;\n\n") unless $nocommit;
-$main_out->print($after_commit);
-$main_out->close;
-
Modified: trunk/configure.ac
===================================================================
--- trunk/configure.ac 2011-04-06 21:49:06 UTC (rev 20008)
+++ trunk/configure.ac 2011-04-06 22:06:40 UTC (rev 20009)
@@ -373,19 +373,12 @@
Open-ILS/updates/Makefile
Open-ILS/xul/staff_client/Makefile
Open-ILS/src/extras/eg_config
- Open-ILS/src/extras/import/marc2are.pl
- Open-ILS/src/extras/import/marc2bre.pl
- Open-ILS/src/extras/import/marc2sre.pl
- Open-ILS/src/extras/import/parallel_pg_loader.pl
+ Open-ILS/src/extras/fast-extract
Open-ILS/src/perlmods/Makefile
Open-ILS/src/perlmods/lib/OpenILS/Utils/Cronscript.pm],
[
if test -e "./Open-ILS/src/extras/eg_config"; then chmod 755 Open-ILS/src/extras/eg_config; fi;
if test -e "./Open-ILS/src/extras/fast-extract"; then chmod 755 Open-ILS/src/extras/fast-extract; fi;
- if test -e "./Open-ILS/src/extras/import/marc2are.pl"; then chmod 755 Open-ILS/src/extras/import/marc2are.pl; fi;
- if test -e "./Open-ILS/src/extras/import/marc2bre.pl"; then chmod 755 Open-ILS/src/extras/import/marc2bre.pl; fi;
- if test -e "./Open-ILS/src/extras/import/marc2sre.pl"; then chmod 755 Open-ILS/src/extras/import/marc2sre.pl; fi;
- if test -e "./Open-ILS/src/extras/import/parallel_pg_loader.pl"; then chmod 755 Open-ILS/src/extras/import/parallel_pg_loader.pl; fi;
])
AC_OUTPUT
More information about the open-ils-commits
mailing list