[open-ils-commits] r69 - import_demo/trunk
svn at svn.open-ils.org
svn at svn.open-ils.org
Mon Dec 1 16:55:08 EST 2008
Author: dbs
Date: 2008-12-01 16:55:03 -0500 (Mon, 01 Dec 2008)
New Revision: 69
Removed:
import_demo/trunk/direct_ingest.pl
import_demo/trunk/marc2bre.pl
import_demo/trunk/parallel_pg_loader.pl
import_demo/trunk/quick_metarecord_map.sql
Modified:
import_demo/trunk/generate_copies.sql
import_demo/trunk/import_bibs.sh
Log:
It's a bad idea to carry old versions of scripts here; maybe look at svn:externals for this
Make import and generate copy logic a little bit safer and cleaner
Deleted: import_demo/trunk/direct_ingest.pl
===================================================================
--- import_demo/trunk/direct_ingest.pl 2008-12-01 19:01:20 UTC (rev 68)
+++ import_demo/trunk/direct_ingest.pl 2008-12-01 21:55:03 UTC (rev 69)
@@ -1,103 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-
-use lib '/openils/lib/perl5/';
-
-use OpenSRF::System;
-use OpenSRF::EX qw/:try/;
-use OpenSRF::AppSession;
-use OpenSRF::Application;
-use OpenSRF::MultiSession;
-use OpenSRF::Utils::SettingsClient;
-use OpenILS::Application::Ingest;
-use OpenILS::Application::AppUtils;
-use OpenILS::Utils::Fieldmapper;
-use Digest::MD5 qw/md5_hex/;
-use OpenSRF::Utils::JSON;
-use Data::Dumper;
-use FileHandle;
-
-use Time::HiRes qw/time/;
-use Getopt::Long;
-use MARC::Batch;
-use MARC::File::XML;
-use MARC::Charset;
-
-MARC::Charset->ignore_errors(1);
-
-my ($auth, $config, $quiet) =
- (0, '/openils/conf/opensrf_core.xml');
-
-GetOptions(
- 'config=s' => \$config,
- 'authority' => \$auth,
- 'quiet' => \$quiet,
-);
-
-my @ses;
-
-open NEWERR, ">&STDERR";
-
-select NEWERR; $| = 1;
-select STDERR; $| = 1;
-select STDOUT; $| = 1;
-
-OpenSRF::System->bootstrap_client( config_file => $config );
-Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
-
-OpenILS::Application::Ingest->use;
-
-my $meth = 'open-ils.ingest.full.biblio.object.readonly';
-$meth = 'open-ils.ingest.full.authority.object.readonly' if ($auth);
-
-$meth = OpenILS::Application::Ingest->method_lookup( $meth );
-
-my $count = 0;
-my $starttime = time;
-while (my $rec = <>) {
- next unless ($rec);
-
- my $bib = OpenSRF::Utils::JSON->JSON2perl($rec);
- my $data;
-
- try {
- ($data) = $meth->run( $bib );
- } catch Error with {
- my $e = shift;
- warn "Couldn't process record: $e\n >>> $rec\n";
- };
-
- next unless $data;
-
- postprocess( { bib => $bib, ingest_data => $data } );
-
- if (!$quiet && !($count % 20)) {
- print NEWERR "\r$count\t". $count / (time - $starttime);
- }
-
- $count++;
-}
-
-sub postprocess {
- my $data = shift;
-
- my $bib = $data->{bib};
- my $full_rec = $data->{ingest_data}->{full_rec};
-
- my $field_entries = $data->{ingest_data}->{field_entries} unless ($auth);
- my $fp = $data->{ingest_data}->{fingerprint} unless ($auth);
- my $rd = $data->{ingest_data}->{descriptor} unless ($auth);
-
- $bib->fingerprint( $fp->{fingerprint} ) unless ($auth);
- $bib->quality( $fp->{quality} ) unless ($auth);
-
- print( OpenSRF::Utils::JSON->perl2JSON($bib)."\n" );
- unless ($auth) {
- print( OpenSRF::Utils::JSON->perl2JSON($rd)."\n" );
- print( OpenSRF::Utils::JSON->perl2JSON($_)."\n" ) for (@$field_entries);
- }
-
- print( OpenSRF::Utils::JSON->perl2JSON($_)."\n" ) for (@$full_rec);
-}
-
Modified: import_demo/trunk/generate_copies.sql
===================================================================
--- import_demo/trunk/generate_copies.sql 2008-12-01 19:01:20 UTC (rev 68)
+++ import_demo/trunk/generate_copies.sql 2008-12-01 21:55:03 UTC (rev 69)
@@ -1,3 +1,4 @@
+BEGIN;
-- First, we build shelving location
INSERT INTO asset.copy_location (name, owning_lib)
SELECT DISTINCT l.location, ou.id
@@ -2,4 +3,16 @@
FROM staging_items l JOIN actor.org_unit ou
- ON (l.owning_lib = ou.shortname);
+ ON (l.owning_lib = ou.shortname)
+;
+-- Create circ modifiers for in-db circulation
+-- This is very, very crude but satisfies the FK constraints
+INSERT INTO config.circ_modifier (code, name, description, sip2_media_type, magnetic_media)
+ SELECT DISTINCT item_type as code,
+ item_type AS name,
+ LOWER(item_type) AS description,
+ '001' AS sip2_media_type,
+ FALSE AS magnetic_media
+ FROM staging_items
+ WHERE item_type NOT IN (SELECT code FROM config.circ_modifier);
+
-- Import call numbers for bibrecord->library mappings
@@ -16,7 +29,7 @@
circ_lib, creator, editor, create_date, barcode,
status, location, loan_duration,
fine_level, circ_modifier, deposit, ref, call_number)
- SELECT DISTINCT ou.id AS circ_lib,
+ SELECT DISTINCT ou.id AS circ_lib,
1 AS creator,
1 AS editor,
l.createdate AS create_date,
@@ -44,4 +57,6 @@
JOIN asset.copy_location cl
ON (ou.id = cl.owning_lib AND l.location = cl.name)
JOIN asset.call_number cn
- ON (ou.id = cn.owning_lib AND l.bibkey = cn.record AND l.callnum = cn.label);
+ ON (ou.id = cn.owning_lib AND l.bibkey = cn.record AND l.callnum = cn.label)
+;
+COMMIT;
Modified: import_demo/trunk/import_bibs.sh
===================================================================
--- import_demo/trunk/import_bibs.sh 2008-12-01 19:01:20 UTC (rev 68)
+++ import_demo/trunk/import_bibs.sh 2008-12-01 21:55:03 UTC (rev 69)
@@ -2,16 +2,22 @@
# Demonstrates how to import bibliographic records into Evergreen
+# Change to match the location of the import scripts, rather than the bundled
+# versions that might be out of date
+SCRIPTS=./scripts
+
# Change these variables to match your database name, user, and password
DB_NAME=evergreen
DB_USER=evergreen
DB_PW=evergreen
+DB_HOST=localhost
+DB_PORT=5432
# Create the biblio.record_entry (bre) entries in JSON format from the MARC21XML source
-perl marc2bre.pl --marctype XML --db_name $DB_NAME --db_user $DB_USER --db_pw $DB_PW --idfield '035' --idsubfield 'a' sample_marc.xml > sample_marc.bre
+perl $SCRIPTS/marc2bre.pl --marctype XML --db_name $DB_NAME --db_user $DB_USER --db_pw $DB_PW --db_host $DB_HOST --db_port $DB_PORT --idfield '035' --idsubfield 'a' sample_marc.xml > sample_marc.bre
# Run the biblio.record_entry JSON against the IDL to generate a complete set of JSON classes representing indexed fields
-perl direct_ingest.pl sample_marc.bre > sample_marc.ingest
+perl $SCRIPTS/direct_ingest.pl sample_marc.bre > sample_marc.ingest
# Generate SQL insert statements by running the complete set of JSON against the IDL
-perl parallel_pg_loader.pl -order bre -order mrd -order mfr -order mtfe -order mafe -order msfe -order mkfe -order msefe -autoprimary mrd -autoprimary mfr -autoprimary mtfe -autoprimary mafe -autoprimary msfe -autoprimary mkfe -autoprimary msefe < sample_marc.ingest > sample_marc.sql
+perl $SCRIPTS/parallel_pg_loader.pl -order bre -order mrd -order mfr -order mtfe -order mafe -order msfe -order mkfe -order msefe -autoprimary mrd -autoprimary mfr -autoprimary mtfe -autoprimary mafe -autoprimary msfe -autoprimary mkfe -autoprimary msefe --output sample_marc < sample_marc.ingest
Deleted: import_demo/trunk/marc2bre.pl
===================================================================
--- import_demo/trunk/marc2bre.pl 2008-12-01 19:01:20 UTC (rev 68)
+++ import_demo/trunk/marc2bre.pl 2008-12-01 21:55:03 UTC (rev 69)
@@ -1,300 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-
-use lib '/openils/lib/perl5/';
-
-use Error qw/:try/;
-use OpenILS::Utils::Fieldmapper;
-use Digest::MD5 qw/md5_hex/;
-use OpenSRF::Utils::JSON;
-use Data::Dumper;
-use Unicode::Normalize;
-use Encode;
-
-use FileHandle;
-use Time::HiRes qw/time/;
-use Getopt::Long;
-use MARC::Batch;
-use MARC::File::XML ( BinaryEncoding => 'utf-8' );
-use MARC::Charset;
-use DBI;
-
-#MARC::Charset->ignore_errors(1);
-
-my ($id_field, $id_subfield, $recid, $user, $config, $idlfile, $marctype, $keyfile, $dontuse_file, $enc, $force_enc, @files, @trash_fields, $quiet) =
- ('', 'a', 0, 1, '/openils/conf/opensrf_core.xml', '/openils/conf/fm_IDL.xml', 'USMARC');
-
-my ($db_driver,$db_host,$db_name,$db_user,$db_pw) =
- ('Pg','localhost','evergreen','postgres','postgres');
-
-GetOptions(
- 'marctype=s' => \$marctype,
- 'startid=i' => \$recid,
- 'idfield=s' => \$id_field,
- 'idsubfield=s' => \$id_subfield,
- 'user=s' => \$user,
- 'encoding=s' => \$enc,
- 'hard_encoding' => \$force_enc,
- 'keyfile=s' => \$keyfile,
- 'config=s' => \$config,
- 'file=s' => \@files,
- 'trash=s' => \@trash_fields,
- 'xml_idl=s' => \$idlfile,
- 'dontuse=s' => \$dontuse_file,
- "db_driver=s" => \$db_driver,
- "db_host=s" => \$db_host,
- "db_name=s" => \$db_name,
- "db_user=s" => \$db_user,
- "db_pw=s" => \$db_pw,
- 'quiet' => \$quiet
-);
-
-if ($enc) {
- MARC::Charset->ignore_errors(1);
- MARC::Charset->assume_encoding($enc);
-}
-
-if (uc($marctype) eq 'XML') {
- 'open'->use(':utf8');
-} else {
- bytes->use();
-}
-
- at files = @ARGV if (!@files);
-
-my @ses;
-my @req;
-my %processing_cache;
-
-my $dsn = "dbi:$db_driver:host=$db_host;dbname=$db_name";
-
-if (!$recid) {
- my $table = 'biblio_record_entry';
- $table = 'biblio.record_entry' if ($db_driver eq 'Pg');
-
- my $dbh = DBI->connect($dsn,$db_user,$db_pw);
- my $sth = $dbh->prepare("SELECT MAX(id) + 1 FROM $table");
-
- $sth->execute;
- $sth->bind_col(1, \$recid);
- $sth->fetch;
- $sth->finish;
- $recid++;
- $dbh->disconnect;
-}
-
-my %source_map = (
- o => 'OCLC',
- i => 'ISxN',
- l => 'LCCN',
- s => 'System',
- g => 'Gutenberg',
-);
-
-Fieldmapper->import(IDL => $idlfile);
-
-my %keymap;
-if ($keyfile) {
- open F, $keyfile or die "Couldn't open key file $keyfile";
- while (<F>) {
- if ( /^(\d+)\|(\S+)/o ) {
- $keymap{$1} = $2;
- }
- }
- close(F);
-}
-
-my %dontuse_id;
-if ($dontuse_file) {
- open F, $dontuse_file or die "Couldn't open used-id file $dontuse_file";
- while (<F>) {
- chomp;
- s/^\s*//;
- s/\s*$//;
- $dontuse_id{$_} = 1;
- }
- close(F);
-}
-
-select STDERR; $| = 1;
-select STDOUT; $| = 1;
-
-my $batch = new MARC::Batch ( $marctype, @files );
-$batch->strict_off();
-$batch->warnings_off();
-
-my %used_ids;
-my $starttime = time;
-my $rec;
-my $count = 0;
-while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
- next if ($rec == -1);
- my $id;
-
- $recid++;
- while (exists $used_ids{$recid}) {
- $recid++;
- }
- $used_ids{$recid} = 1;
-
- if ($id_field) {
- my $field = $rec->field($id_field);
- if ($field) {
- if ($field->is_control_field) {
- $id = $field->data;
- } else {
- $id = $field->subfield($id_subfield);
- }
-
- $id =~ s/\D+//gso;
- }
- $id = '' if (exists $dontuse_id{$id});
- }
-
- if (!$id) {
- $id = $recid;
- }
-
- if ($keyfile) {
- if (my $tcn = $keymap{$id}) {
- $rec->delete_field( $_ ) for ($rec->field($id_field));
- $rec->append_fields( MARC::Field->new( $id_field, '', '', $id_subfield, $tcn ) );
- } else {
- $count++;
- next;
- }
- }
-
- my $tcn;
- ($rec, $tcn) = preprocess($rec, $id);
-
- $tcn->add_subfields(c => $id);
-
- $rec->delete_field( $_ ) for ($rec->field($id_field));
- $rec->append_fields( $tcn );
-
- if (!$rec) {
- next;
- }
-
- my $tcn_value = $rec->subfield('901' => 'a') || "SYS$id";
- my $tcn_source = $rec->subfield('901' => 'b') || 'System';
-
- (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
- $xml =~ s/^<\?xml.+\?\s*>//go;
- $xml =~ s/>\s+</></go;
- $xml =~ s/\p{Cc}//go;
- $xml = entityize($xml);
- $xml =~ s/[\x00-\x1f]//go;
-
- my $bib = new Fieldmapper::biblio::record_entry;
- $bib->id($id);
- $bib->active('t');
- $bib->deleted('f');
- $bib->marc($xml);
- $bib->creator($user);
- $bib->create_date('now');
- $bib->editor($user);
- $bib->edit_date('now');
- $bib->tcn_source($tcn_source);
- $bib->tcn_value($tcn_value);
- $bib->last_xact_id('IMPORT-'.$starttime);
-
- print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
- $dontuse_id{$tcn_value} = 1;
-
- $count++;
-
- if (!$quiet && !($count % 50)) {
- print STDERR "\r$count\t". $count / (time - $starttime);
- }
-}
-
-sub preprocess {
- my $rec = shift;
- my $id = shift;
-
- my ($source, $value) = ('','');
-
- $id = '' if (exists $dontuse_id{$id});
-
- if (!$id) {
- my $f = $rec->field('001');
- $id = $f->data if ($f);
- $id = '' if (exists $dontuse_id{$id});
- }
-
- if (!$id || exists $dontuse_id{$source.$id}) {
- my $f = $rec->field('000');
- $id = $f->data if ($f);
- $source = 'g' if ($f); # only PG seems to use this
- }
-
- if (!$id || exists $dontuse_id{$source.$id}) {
- my $f = $rec->field('020');
- $id = $f->subfield('a') if ($f);
- $source = 'i' if ($f);
- }
-
- if (!$id || exists $dontuse_id{$source.$id}) {
- my $f = $rec->field('022');
- $id = $f->subfield('a') if ($f);
- $source = 'i' if ($f);
- }
-
- if (!$id || exists $dontuse_id{$source.$id}) {
- my $f = $rec->field('010');
- $id = $f->subfield('a') if ($f);
- $source = 'l' if ($f);
- }
-
- $rec->delete_field($_) for ($rec->field('901', $id_field, @trash_fields));
-
- if ($id) {
- $id =~ s/\s*$//o;
- $id =~ s/^\s*//o;
- $id =~ s/^(\S+).*$/$1/o;
-
- $id = $source.$id if ($source);
-
- ($source, $value) = $id =~ /^(.)(.+)$/o;
- if ($id =~ /^o(\d+)$/o) {
- $id = "ocm$1";
- $source = 'o';
- }
- }
-
- if ($id && exists $dontuse_id{$id}) {
- warn "\n!!! TCN $id is already in use. Using the record ID ($recid) as a system-generated TCN.\n";
- $id = '';
- }
-
- if (!$id) {
- $source = 's';
- $id = 's'.$recid;
- }
-
- my $tcn = MARC::Field->new(
- '901' => ('', ''),
- a => $id,
- b => do { $source_map{$source} || 'System' },
- );
-
- return ($rec,$tcn);
-}
-
-sub entityize {
- my $stuff = shift;
- my $form = shift;
-
- if ($form and $form eq 'D') {
- $stuff = NFD($stuff);
- } else {
- $stuff = NFC($stuff);
- }
-
- $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
- return $stuff;
-}
-
Deleted: import_demo/trunk/parallel_pg_loader.pl
===================================================================
--- import_demo/trunk/parallel_pg_loader.pl 2008-12-01 19:01:20 UTC (rev 68)
+++ import_demo/trunk/parallel_pg_loader.pl 2008-12-01 21:55:03 UTC (rev 69)
@@ -1,118 +0,0 @@
-#!/usr/bin/perl
-use strict;
-
-use lib '/openils/lib/perl5/';
-
-use OpenSRF::System;
-use OpenSRF::EX qw/:try/;
-use OpenSRF::Utils::SettingsClient;
-use OpenILS::Utils::Fieldmapper;
-use OpenSRF::Utils::JSON;
-use FileHandle;
-
-use Time::HiRes qw/time/;
-use Getopt::Long;
-
-my @files;
-my ($config, $output, @auto, @order, @wipe) =
- ('/openils/conf/opensrf_core.xml', 'pg_loader-output');
-
-GetOptions(
- 'config=s' => \$config,
- 'output=s' => \$output,
- 'wipe=s' => \@wipe,
- 'autoprimary=s' => \@auto,
- 'order=s' => \@order,
-);
-
-my $pwd = `pwd`;
-chop($pwd);
-
-my %lineset;
-my %fieldcache;
-
-OpenSRF::System->bootstrap_client( config_file => $config );
-Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
-
-my $main_out = FileHandle->new(">$output.sql") if ($output);
-
-binmode($main_out,'utf8');
-
-$main_out->print("SET CLIENT_ENCODING TO 'UNICODE';\n\n");
-$main_out->print("BEGIN;\n\n");
-
-my %out_files;
-my %out_headers;
-for my $h (@order) {
- $out_files{$h} = FileHandle->new(">$output.$h.sql");
- binmode($out_files{$h},'utf8');
-}
-
-my $count = 0;
-my $starttime = time;
-while ( my $rec = <> ) {
- next unless ($rec);
-
- my $row;
- try {
- $row = OpenSRF::Utils::JSON->JSON2perl($rec);
- } catch Error with {
- my $e = shift;
- warn "\n\n !!! Error : $e \n\n at or around line $count\n";
- };
- next unless ($row);
-
- my $class = $row->class_name;
- my $hint = $row->json_hint;
-
- next unless ( grep /$hint/, @order );
-
- if (!$fieldcache{$hint}) {
- my @cols = $row->real_fields;
- if (grep { $_ eq $hint} @auto) {
- @cols = grep { $_ ne $class->Identity } @cols;
- }
-
- $fieldcache{$hint} =
- { table => $class->Table,
- sequence => $class->Sequence,
- pkey => $class->Identity,
- fields => \@cols,
- };
-
- my $fields = join(',', @{ $fieldcache{$hint}{fields} });
- $main_out->print( "DELETE FROM $fieldcache{$hint}{table};\n" ) if (grep {$_ eq $hint } @wipe);
- $main_out->print( "COPY $fieldcache{$hint}{table} ($fields) FROM '$pwd/$output.$hint.sql';\n" );
-
- }
-
- my $line = [map { $row->$_ } @{ $fieldcache{$hint}{fields} }];
- my @data;
- my $x = 0;
- for my $d (@$line) {
- if (!defined($d)) {
- $d = '\N';
- } else {
- $d =~ s/\f/\\f/gos;
- $d =~ s/\n/\\n/gos;
- $d =~ s/\r/\\r/gos;
- $d =~ s/\t/\\t/gos;
- $d =~ s/\\/\\\\/gos;
- }
- if ($hint eq 'bre' and $fieldcache{$hint}{fields}[$x] eq 'quality') {
- $d = int($d);
- }
- push @data, $d;
- $x++;
- }
- $out_files{$hint}->print( join("\t", @data)."\n" );
-
- if (!($count % 500)) {
- print STDERR "\r$count\t". $count / (time - $starttime);
- }
-
- $count++;
-}
-
-$main_out->print("-- COMMIT;\n\n");
-$main_out->close;
Deleted: import_demo/trunk/quick_metarecord_map.sql
===================================================================
--- import_demo/trunk/quick_metarecord_map.sql 2008-12-01 19:01:20 UTC (rev 68)
+++ import_demo/trunk/quick_metarecord_map.sql 2008-12-01 21:55:03 UTC (rev 69)
@@ -1,27 +0,0 @@
-BEGIN;
-
-ALTER TABLE metabib.metarecord_source_map DROP CONSTRAINT metabib_metarecord_source_map_metarecord_fkey;
-
-TRUNCATE metabib.metarecord;
-TRUNCATE metabib.metarecord_source_map;
-
-INSERT INTO metabib.metarecord (fingerprint,master_record)
- SELECT fingerprint,id
- FROM (SELECT DISTINCT ON (fingerprint)
- fingerprint, id, quality
- FROM biblio.record_entry
- ORDER BY fingerprint, quality desc) AS x
- WHERE fingerprint IS NOT NULL;
-
-INSERT INTO metabib.metarecord_source_map (metarecord,source)
- SELECT m.id, b.id
- FROM biblio.record_entry b
- JOIN metabib.metarecord m ON (m.fingerprint = b.fingerprint);
-
-ALTER TABLE metabib.metarecord_source_map ADD CONSTRAINT metabib_metarecord_source_map_metarecord_fkey FOREIGN KEY (metarecord) REFERENCES metabib.metarecord (id);
-
-COMMIT;
-
-VACUUM FULL ANALYZE VERBOSE metabib.metarecord;
-VACUUM FULL ANALYZE VERBOSE metabib.metarecord_source_map;
-
More information about the open-ils-commits
mailing list