[open-ils-commits] r69 - import_demo/trunk

svn at svn.open-ils.org svn at svn.open-ils.org
Mon Dec 1 16:55:08 EST 2008


Author: dbs
Date: 2008-12-01 16:55:03 -0500 (Mon, 01 Dec 2008)
New Revision: 69

Removed:
   import_demo/trunk/direct_ingest.pl
   import_demo/trunk/marc2bre.pl
   import_demo/trunk/parallel_pg_loader.pl
   import_demo/trunk/quick_metarecord_map.sql
Modified:
   import_demo/trunk/generate_copies.sql
   import_demo/trunk/import_bibs.sh
Log:
It's a bad idea to carry old versions of scripts here; maybe look at svn:externals for this
Make import and generate copy logic a little bit safer and cleaner


Deleted: import_demo/trunk/direct_ingest.pl
===================================================================
--- import_demo/trunk/direct_ingest.pl	2008-12-01 19:01:20 UTC (rev 68)
+++ import_demo/trunk/direct_ingest.pl	2008-12-01 21:55:03 UTC (rev 69)
@@ -1,103 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-
-use lib '/openils/lib/perl5/';
-
-use OpenSRF::System;
-use OpenSRF::EX qw/:try/;
-use OpenSRF::AppSession;
-use OpenSRF::Application;
-use OpenSRF::MultiSession;
-use OpenSRF::Utils::SettingsClient;
-use OpenILS::Application::Ingest;
-use OpenILS::Application::AppUtils;
-use OpenILS::Utils::Fieldmapper;
-use Digest::MD5 qw/md5_hex/;
-use OpenSRF::Utils::JSON;
-use Data::Dumper;
-use FileHandle;
-
-use Time::HiRes qw/time/;
-use Getopt::Long;
-use MARC::Batch;
-use MARC::File::XML;
-use MARC::Charset;
-
-MARC::Charset->ignore_errors(1);
-
-my ($auth, $config, $quiet) =
-	(0, '/openils/conf/opensrf_core.xml');
-
-GetOptions(
-	'config=s'	=> \$config,
-	'authority'	=> \$auth,
-	'quiet'		=> \$quiet,
-);
-
-my @ses;
-
-open NEWERR,     ">&STDERR";
-
-select NEWERR; $| = 1;
-select STDERR; $| = 1;
-select STDOUT; $| = 1;
-
-OpenSRF::System->bootstrap_client( config_file => $config );
-Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
-
-OpenILS::Application::Ingest->use;
-
-my $meth = 'open-ils.ingest.full.biblio.object.readonly';
-$meth = 'open-ils.ingest.full.authority.object.readonly' if ($auth);
-
-$meth = OpenILS::Application::Ingest->method_lookup( $meth );
-
-my $count = 0;
-my $starttime = time;
-while (my $rec = <>) {
-	next unless ($rec);
-
-	my $bib = OpenSRF::Utils::JSON->JSON2perl($rec);
-	my $data;
-
-	try {
-		($data) = $meth->run( $bib );
-	} catch Error with {
-		my $e = shift;
-		warn "Couldn't process record: $e\n >>> $rec\n";
-	};
-
-	next unless $data;
-
-	postprocess( { bib => $bib, ingest_data => $data } );
-
-	if (!$quiet && !($count % 20)) {
-		print NEWERR "\r$count\t". $count / (time - $starttime);
-	}
-
-	$count++;
-}
-
-sub postprocess {
-	my $data = shift;
-
-	my $bib = $data->{bib};
-	my $full_rec = $data->{ingest_data}->{full_rec};
-
-	my $field_entries = $data->{ingest_data}->{field_entries} unless ($auth);
-	my $fp = $data->{ingest_data}->{fingerprint} unless ($auth);
-	my $rd = $data->{ingest_data}->{descriptor} unless ($auth);
-
-	$bib->fingerprint( $fp->{fingerprint} ) unless ($auth);
-	$bib->quality( $fp->{quality} ) unless ($auth);
-
-	print( OpenSRF::Utils::JSON->perl2JSON($bib)."\n" );
-	unless ($auth) {
-		print( OpenSRF::Utils::JSON->perl2JSON($rd)."\n" );
-		print( OpenSRF::Utils::JSON->perl2JSON($_)."\n" ) for (@$field_entries);
-	}
-
-	print( OpenSRF::Utils::JSON->perl2JSON($_)."\n" ) for (@$full_rec);
-}
-

Modified: import_demo/trunk/generate_copies.sql
===================================================================
--- import_demo/trunk/generate_copies.sql	2008-12-01 19:01:20 UTC (rev 68)
+++ import_demo/trunk/generate_copies.sql	2008-12-01 21:55:03 UTC (rev 69)
@@ -1,3 +1,4 @@
+BEGIN;
 -- First, we build shelving location
 INSERT INTO asset.copy_location (name, owning_lib)
         SELECT  DISTINCT l.location, ou.id
@@ -2,4 +3,16 @@
           FROM  staging_items l JOIN actor.org_unit ou
-                        ON (l.owning_lib = ou.shortname);
+                ON (l.owning_lib = ou.shortname)
+;
 
+-- Create circ modifiers for in-db circulation
+-- This is very, very crude but satisfies the FK constraints
+INSERT INTO config.circ_modifier (code, name, description, sip2_media_type, magnetic_media)
+        SELECT  DISTINCT item_type as code,
+          item_type AS name,
+          LOWER(item_type) AS description,
+          '001' AS sip2_media_type,
+          FALSE AS magnetic_media
+          FROM  staging_items
+          WHERE item_type NOT IN (SELECT code FROM config.circ_modifier);
+
 -- Import call numbers for bibrecord->library mappings
@@ -16,7 +29,7 @@
         circ_lib, creator, editor, create_date, barcode,
         status, location, loan_duration,
         fine_level, circ_modifier, deposit, ref, call_number)
-        SELECT  DISTINCT ou.id AS circ_lib,
+        SELECT  DISTINCT  ou.id AS circ_lib,
                 1 AS creator,
                 1 AS editor,
                 l.createdate AS create_date,
@@ -44,4 +57,6 @@
                 JOIN asset.copy_location cl
                         ON (ou.id = cl.owning_lib AND l.location = cl.name)
                 JOIN asset.call_number cn
-                        ON (ou.id = cn.owning_lib AND l.bibkey = cn.record AND l.callnum = cn.label);
+                        ON (ou.id = cn.owning_lib AND l.bibkey = cn.record AND l.callnum = cn.label)
+;
+COMMIT;

Modified: import_demo/trunk/import_bibs.sh
===================================================================
--- import_demo/trunk/import_bibs.sh	2008-12-01 19:01:20 UTC (rev 68)
+++ import_demo/trunk/import_bibs.sh	2008-12-01 21:55:03 UTC (rev 69)
@@ -2,16 +2,22 @@
 
 # Demonstrates how to import bibliographic records into Evergreen
 
+# Change to match the location of the import scripts, rather than the bundled
+# versions that might be out of date
+SCRIPTS=./scripts
+
 # Change these variables to match your database name, user, and password
 DB_NAME=evergreen
 DB_USER=evergreen
 DB_PW=evergreen
+DB_HOST=localhost
+DB_PORT=5432
 
 # Create the biblio.record_entry (bre) entries in JSON format from the MARC21XML source
-perl marc2bre.pl --marctype XML --db_name $DB_NAME --db_user $DB_USER --db_pw $DB_PW --idfield '035' --idsubfield 'a' sample_marc.xml > sample_marc.bre
+perl $SCRIPTS/marc2bre.pl --marctype XML --db_name $DB_NAME --db_user $DB_USER --db_pw $DB_PW --db_host $DB_HOST --db_port $DB_PORT --idfield '035' --idsubfield 'a' sample_marc.xml > sample_marc.bre
 
 # Run the biblio.record_entry JSON against the IDL to generate a complete set of JSON classes representing indexed fields
-perl direct_ingest.pl sample_marc.bre > sample_marc.ingest
+perl $SCRIPTS/direct_ingest.pl sample_marc.bre > sample_marc.ingest
 
 # Generate SQL insert statements by running the complete set of JSON against the IDL
-perl parallel_pg_loader.pl -order bre -order mrd -order mfr -order mtfe -order mafe -order msfe -order mkfe -order msefe -autoprimary mrd -autoprimary mfr -autoprimary mtfe -autoprimary mafe -autoprimary msfe -autoprimary mkfe -autoprimary msefe < sample_marc.ingest > sample_marc.sql
+perl $SCRIPTS/parallel_pg_loader.pl -order bre -order mrd -order mfr -order mtfe -order mafe -order msfe -order mkfe -order msefe -autoprimary mrd -autoprimary mfr -autoprimary mtfe -autoprimary mafe -autoprimary msfe -autoprimary mkfe -autoprimary msefe --output sample_marc < sample_marc.ingest 

Deleted: import_demo/trunk/marc2bre.pl
===================================================================
--- import_demo/trunk/marc2bre.pl	2008-12-01 19:01:20 UTC (rev 68)
+++ import_demo/trunk/marc2bre.pl	2008-12-01 21:55:03 UTC (rev 69)
@@ -1,300 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-
-use lib '/openils/lib/perl5/';
-
-use Error qw/:try/;
-use OpenILS::Utils::Fieldmapper;
-use Digest::MD5 qw/md5_hex/;
-use OpenSRF::Utils::JSON;
-use Data::Dumper;
-use Unicode::Normalize;
-use Encode;
-
-use FileHandle;
-use Time::HiRes qw/time/;
-use Getopt::Long;
-use MARC::Batch;
-use MARC::File::XML ( BinaryEncoding => 'utf-8' );
-use MARC::Charset;
-use DBI;
-
-#MARC::Charset->ignore_errors(1);
-
-my ($id_field, $id_subfield, $recid, $user, $config, $idlfile, $marctype, $keyfile, $dontuse_file, $enc, $force_enc, @files, @trash_fields, $quiet) =
-	('', 'a', 0, 1, '/openils/conf/opensrf_core.xml', '/openils/conf/fm_IDL.xml', 'USMARC');
-
-my ($db_driver,$db_host,$db_name,$db_user,$db_pw) =
-	('Pg','localhost','evergreen','postgres','postgres');
-
-GetOptions(
-	'marctype=s'	=> \$marctype,
-	'startid=i'	=> \$recid,
-	'idfield=s'	=> \$id_field,
-	'idsubfield=s'	=> \$id_subfield,
-	'user=s'	=> \$user,
-	'encoding=s'	=> \$enc,
-	'hard_encoding'	=> \$force_enc,
-	'keyfile=s'	=> \$keyfile,
-	'config=s'	=> \$config,
-	'file=s'	=> \@files,
-	'trash=s'	=> \@trash_fields,
-	'xml_idl=s'	=> \$idlfile,
-	'dontuse=s'	=> \$dontuse_file,
-	"db_driver=s"		=> \$db_driver,
-	"db_host=s"		=> \$db_host,
-	"db_name=s"		=> \$db_name,
-	"db_user=s"		=> \$db_user,
-	"db_pw=s"		=> \$db_pw,
-	'quiet'		=> \$quiet
-);
-
-if ($enc) {
-	MARC::Charset->ignore_errors(1);
-	MARC::Charset->assume_encoding($enc);
-}
-
-if (uc($marctype) eq 'XML') {
-	'open'->use(':utf8');
-} else {
-	bytes->use();
-}
-
- at files = @ARGV if (!@files);
-
-my @ses;
-my @req;
-my %processing_cache;
-
-my $dsn = "dbi:$db_driver:host=$db_host;dbname=$db_name";
-
-if (!$recid) {
-    my $table = 'biblio_record_entry';
-    $table = 'biblio.record_entry' if ($db_driver eq 'Pg');
-
-	my $dbh = DBI->connect($dsn,$db_user,$db_pw);
-	my $sth = $dbh->prepare("SELECT MAX(id) + 1 FROM $table");
-
-	$sth->execute;
-	$sth->bind_col(1, \$recid);
-	$sth->fetch;
-	$sth->finish;
-	$recid++;
-	$dbh->disconnect;
-}
-
-my %source_map = (      
-	o  => 'OCLC',
-	i  => 'ISxN',    
-	l  => 'LCCN',
-	s  => 'System',  
-	g  => 'Gutenberg',  
-);                              
-
-Fieldmapper->import(IDL => $idlfile);
-
-my %keymap;
-if ($keyfile) {
-	open F, $keyfile or die "Couldn't open key file $keyfile";
-	while (<F>) {
-		if ( /^(\d+)\|(\S+)/o ) {
-			$keymap{$1} = $2;
-		}
-	}
-	close(F);
-}
-
-my %dontuse_id;
-if ($dontuse_file) {
-	open F, $dontuse_file or die "Couldn't open used-id file $dontuse_file";
-	while (<F>) {
-		chomp;
-		s/^\s*//;
-		s/\s*$//;
-		$dontuse_id{$_} = 1;
-	}
-	close(F);
-}
-
-select STDERR; $| = 1;
-select STDOUT; $| = 1;
-
-my $batch = new MARC::Batch ( $marctype, @files );
-$batch->strict_off();
-$batch->warnings_off();
-
-my %used_ids;
-my $starttime = time;
-my $rec;
-my $count = 0;
-while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
-	next if ($rec == -1);
-	my $id;
-
-	$recid++;
-	while (exists $used_ids{$recid}) {
-		$recid++;
-	}
-	$used_ids{$recid} = 1;
-
-	if ($id_field) {
-		my $field = $rec->field($id_field);
-		if ($field) {
-			if ($field->is_control_field) {
-				$id = $field->data;
-			} else {
-				$id = $field->subfield($id_subfield);
-			}
-
-			$id =~ s/\D+//gso;
-		}
-		$id = '' if (exists $dontuse_id{$id});
-	}
-
-	if (!$id) {
-		$id = $recid;
-	}
-
-	if ($keyfile) {
-		if (my $tcn = $keymap{$id}) {
-			$rec->delete_field( $_ ) for ($rec->field($id_field));
-			$rec->append_fields( MARC::Field->new( $id_field, '', '', $id_subfield, $tcn ) );
-		} else {
-			$count++;
-			next;
-		}
-	}
-
-	my $tcn;
-	($rec, $tcn) = preprocess($rec, $id);
-
-	$tcn->add_subfields(c => $id);
-
-	$rec->delete_field( $_ ) for ($rec->field($id_field));
-	$rec->append_fields( $tcn );
-
-	if (!$rec) {
-		next;
-	}
-
-	my $tcn_value = $rec->subfield('901' => 'a') || "SYS$id";
-	my $tcn_source = $rec->subfield('901' => 'b') || 'System';
-
-	(my $xml = $rec->as_xml_record()) =~ s/\n//sog;
-	$xml =~ s/^<\?xml.+\?\s*>//go;
-	$xml =~ s/>\s+</></go;
-	$xml =~ s/\p{Cc}//go;
-	$xml = entityize($xml);
-	$xml =~ s/[\x00-\x1f]//go;
-
-	my $bib = new Fieldmapper::biblio::record_entry;
-	$bib->id($id);
-	$bib->active('t');
-	$bib->deleted('f');
-	$bib->marc($xml);
-	$bib->creator($user);
-	$bib->create_date('now');
-	$bib->editor($user);
-	$bib->edit_date('now');
-	$bib->tcn_source($tcn_source);
-	$bib->tcn_value($tcn_value);
-	$bib->last_xact_id('IMPORT-'.$starttime);
-
-	print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
-	$dontuse_id{$tcn_value} = 1;
-
-	$count++;
-
-	if (!$quiet && !($count % 50)) {
-		print STDERR "\r$count\t". $count / (time - $starttime);
-	}
-}
-
-sub preprocess {
-	my $rec = shift;
-	my $id = shift;
-
-	my ($source, $value) = ('','');
-
-	$id = '' if (exists $dontuse_id{$id});
-
-	if (!$id) {
-		my $f = $rec->field('001');
-		$id = $f->data if ($f);
-		$id = '' if (exists $dontuse_id{$id});
-	}
-
-	if (!$id || exists $dontuse_id{$source.$id}) {
-		my $f = $rec->field('000');
-		$id = $f->data if ($f);
-		$source = 'g' if ($f); # only PG seems to use this
-	}
-
-        if (!$id || exists $dontuse_id{$source.$id}) {
-                my $f = $rec->field('020');
-                $id = $f->subfield('a') if ($f);
-		$source = 'i' if ($f);
-        }
-
-        if (!$id || exists $dontuse_id{$source.$id}) {
-                my $f = $rec->field('022');
-                $id = $f->subfield('a') if ($f);
-		$source = 'i' if ($f);
-        }
-
-        if (!$id || exists $dontuse_id{$source.$id}) {
-                my $f = $rec->field('010');
-                $id = $f->subfield('a') if ($f);
-		$source = 'l' if ($f);
-        }
-
-	$rec->delete_field($_) for ($rec->field('901', $id_field, @trash_fields));
-
-	if ($id) {
-		$id =~ s/\s*$//o;
-		$id =~ s/^\s*//o;
-		$id =~ s/^(\S+).*$/$1/o;
-
-		$id = $source.$id if ($source);
-
-		($source, $value) = $id =~ /^(.)(.+)$/o;
-		if ($id =~ /^o(\d+)$/o) {
-			$id = "ocm$1";
-			$source = 'o';
-		}
-	}
-
-	if ($id && exists $dontuse_id{$id}) {
-		warn "\n!!! TCN $id is already in use.  Using the record ID ($recid) as a system-generated TCN.\n";
-		$id = '';
-	}
-
-	if (!$id) {
-		$source = 's';
-		$id = 's'.$recid;
-	}
-
-	my $tcn = MARC::Field->new(
-		'901' => ('', ''),
-		a => $id,
-		b => do { $source_map{$source} || 'System' },
-	);
-
-	return ($rec,$tcn);
-}
-
-sub entityize {
-        my $stuff = shift;
-        my $form = shift;
-
-        if ($form and $form eq 'D') {
-                $stuff = NFD($stuff);
-        } else {
-                $stuff = NFC($stuff);
-        }
-
-        $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
-        return $stuff;
-}
-

Deleted: import_demo/trunk/parallel_pg_loader.pl
===================================================================
--- import_demo/trunk/parallel_pg_loader.pl	2008-12-01 19:01:20 UTC (rev 68)
+++ import_demo/trunk/parallel_pg_loader.pl	2008-12-01 21:55:03 UTC (rev 69)
@@ -1,118 +0,0 @@
-#!/usr/bin/perl
-use strict;
-
-use lib '/openils/lib/perl5/';
-
-use OpenSRF::System;
-use OpenSRF::EX qw/:try/;
-use OpenSRF::Utils::SettingsClient;
-use OpenILS::Utils::Fieldmapper;
-use OpenSRF::Utils::JSON;
-use FileHandle;
-
-use Time::HiRes qw/time/;
-use Getopt::Long;
-
-my @files;
-my ($config, $output, @auto, @order, @wipe) =
-	('/openils/conf/opensrf_core.xml', 'pg_loader-output');
-
-GetOptions(
-	'config=s'	=> \$config,
-	'output=s'	=> \$output,
-	'wipe=s'	=> \@wipe,
-	'autoprimary=s'	=> \@auto,
-	'order=s'	=> \@order,
-);
-
-my $pwd = `pwd`;
-chop($pwd);
-
-my %lineset;
-my %fieldcache;
-
-OpenSRF::System->bootstrap_client( config_file => $config );
-Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
-
-my $main_out = FileHandle->new(">$output.sql") if ($output);
-
-binmode($main_out,'utf8');
-
-$main_out->print("SET CLIENT_ENCODING TO 'UNICODE';\n\n");
-$main_out->print("BEGIN;\n\n");
-
-my %out_files;
-my %out_headers;
-for my $h (@order) {
-	$out_files{$h} = FileHandle->new(">$output.$h.sql");
-	binmode($out_files{$h},'utf8');
-}
-
-my $count = 0;
-my $starttime = time;
-while ( my $rec = <> ) {
-	next unless ($rec);
-
-	my $row;
-	try {
-		$row = OpenSRF::Utils::JSON->JSON2perl($rec);
-	} catch Error with {
-		my $e = shift;
-		warn "\n\n !!! Error : $e \n\n at or around line $count\n";
-	};
-	next unless ($row);
-
-	my $class = $row->class_name;
-	my $hint = $row->json_hint;
-
-	next unless ( grep /$hint/, @order );
-
-	if (!$fieldcache{$hint}) {
-		my @cols = $row->real_fields;
-		if (grep { $_ eq $hint} @auto) {
-			@cols = grep { $_ ne $class->Identity } @cols;
-		}
-
-		$fieldcache{$hint} =
-			{ table => $class->Table,
-			  sequence => $class->Sequence,
-			  pkey => $class->Identity,
-			  fields => \@cols,
-			};
-
-		my $fields = join(',', @{ $fieldcache{$hint}{fields} });
-		$main_out->print( "DELETE FROM $fieldcache{$hint}{table};\n" ) if (grep {$_ eq $hint } @wipe);
-		$main_out->print( "COPY $fieldcache{$hint}{table} ($fields) FROM '$pwd/$output.$hint.sql';\n" );
-
-	}
-
-	my $line = [map { $row->$_ } @{ $fieldcache{$hint}{fields} }];
-	my @data;
-	my $x = 0;
-	for my $d (@$line) {
-		if (!defined($d)) {
-			$d = '\N';
-		} else {
-			$d =~ s/\f/\\f/gos;
-			$d =~ s/\n/\\n/gos;
-			$d =~ s/\r/\\r/gos;
-			$d =~ s/\t/\\t/gos;
-			$d =~ s/\\/\\\\/gos;
-		}
-		if ($hint eq 'bre' and $fieldcache{$hint}{fields}[$x] eq 'quality') {
-			$d = int($d);
-		}
-		push @data, $d;
-		$x++;
-	}
-	$out_files{$hint}->print( join("\t", @data)."\n" );
-
-	if (!($count % 500)) {
-		print STDERR "\r$count\t". $count / (time - $starttime);
-	}
-
-	$count++;
-}
-
-$main_out->print("-- COMMIT;\n\n");
-$main_out->close; 

Deleted: import_demo/trunk/quick_metarecord_map.sql
===================================================================
--- import_demo/trunk/quick_metarecord_map.sql	2008-12-01 19:01:20 UTC (rev 68)
+++ import_demo/trunk/quick_metarecord_map.sql	2008-12-01 21:55:03 UTC (rev 69)
@@ -1,27 +0,0 @@
-BEGIN;
-
-ALTER TABLE metabib.metarecord_source_map DROP CONSTRAINT metabib_metarecord_source_map_metarecord_fkey;
-
-TRUNCATE metabib.metarecord;
-TRUNCATE metabib.metarecord_source_map;
-
-INSERT INTO metabib.metarecord (fingerprint,master_record)
-	SELECT	fingerprint,id
-	  FROM	(SELECT	DISTINCT ON (fingerprint)
-	  		fingerprint, id, quality
-		  FROM	biblio.record_entry
-		  ORDER BY fingerprint, quality desc) AS x
-	  WHERE	fingerprint IS NOT NULL;
-
-INSERT INTO metabib.metarecord_source_map (metarecord,source)
-	SELECT	m.id, b.id
-	  FROM	biblio.record_entry b
-	  	JOIN metabib.metarecord m ON (m.fingerprint = b.fingerprint);
-
-ALTER TABLE metabib.metarecord_source_map ADD CONSTRAINT metabib_metarecord_source_map_metarecord_fkey FOREIGN KEY (metarecord) REFERENCES metabib.metarecord (id);
-
-COMMIT;
-
-VACUUM FULL ANALYZE VERBOSE metabib.metarecord;
-VACUUM FULL ANALYZE VERBOSE metabib.metarecord_source_map;
-



More information about the open-ils-commits mailing list