[open-ils-commits] r7918 - trunk/Evergreen/src/extras/import

svn at svn.open-ils.org svn at svn.open-ils.org
Tue Oct 23 09:45:43 EDT 2007


Author: dbs
Date: 2007-10-23 09:32:07 -0400 (Tue, 23 Oct 2007)
New Revision: 7918

Modified:
   trunk/Evergreen/src/extras/import/import_holdings.pl
Log:
Make TCN parsing match marc2bre.pl.
Remove dependency on external TCN->id map file.

Parse XML using DOM rather than more fragile line-oriented mode.
This needs to be tested with large sets of MARC records and memory usage watched closely!


Modified: trunk/Evergreen/src/extras/import/import_holdings.pl
===================================================================
--- trunk/Evergreen/src/extras/import/import_holdings.pl	2007-10-22 21:15:47 UTC (rev 7917)
+++ trunk/Evergreen/src/extras/import/import_holdings.pl	2007-10-23 13:32:07 UTC (rev 7918)
@@ -36,19 +36,18 @@
 
 $|=1;
 
-my ($userid,$cn_id,$cp_id,$cp_file,$cn_file,$map_file,$lib_map_field,$id_tag) =
-	(1, 1, 1, 'asset_copy.sql','asset_volume.sql','record_id_map.pl','shortname','/*/*/*[@tag="035"][1]');
+my ($userid,$cn_id,$cp_id,$cp_file,$cn_file,$lib_map_field,$id_tag, $marc_file) =
+	(1, 1, 1, 'asset_copy.sql','asset_volume.sql','shortname','./controlfield[@tag="035"]');
 
 my ($holding_tag,$bc,$lbl,$own,$pr,$cpn,$avail) =
-	('/*/*/*[@tag="999"]','i','a','m','p','c','k');
+	('./datafield[@tag="999"]','i','a','m','p','c','k');
 
 my ($db_driver,$db_host,$db_name,$db_user,$db_pw) =
-	('Pg','localhost','demo-dev','postgres','postgres');
+	('Pg','localhost','evergreen','postgres','postgres');
 
 GetOptions (	
 	"copy_file=s"		=> \$cp_file,
 	"volume_file=s"		=> \$cn_file,
-	"tcn_map_file=s"	=> \$map_file,
 	"userid=i"		=> \$userid,
 	"first_volume=i"	=> \$cn_id,
 	"first_copy=i"		=> \$cp_id,
@@ -66,6 +65,7 @@
 	"item_price=s"		=> \$pr,
 	"item_copy_number=s"	=> \$cpn,
 	"item_copy_status=s"	=> \$avail,
+	"marc_file=s"	=> \$marc_file,
 
 );
 
@@ -83,10 +83,10 @@
 while (my $lib = $sth->fetchrow_arrayref) {
 	$$lib_map{$$lib[0]} = $$lib[1];
 }
-	
-my $tcn_map;
-eval `cat $map_file`;
 
+my $tcn_sth = $dbh->prepare("SELECT id FROM biblio.record_entry WHERE tcn_value = ?");
+my $rec_id;
+
 open CP, ">$cp_file" or die "Can't open $cp_file!  $!\n";
 open CN, ">$cn_file" or die "Can't open $cn_file!  $!\n";
 
@@ -103,54 +103,52 @@
 
 my $xact_id = time;
 
-my $parser = XML::LibXML->new;
+my $parser = XML::LibXML->new();
 
 my $cn_map;
+my $doc;
 
-my $xml = '';
-while ( $xml .= <STDIN> ) {
-	chomp $xml;
-	next unless $xml;
+$doc = $parser->parse_file( $marc_file );
+my $xc = XML::LibXML::XPathContext->new($doc);
+my @records = $xc->findnodes('//record');
+foreach my $record (@records) {
 
 	my $tcn;
-	my $doc;
 	my $success = 0;
 	try {
-		$doc = $parser->parse_string($xml);;
-		$tcn = $doc->documentElement->findvalue( '//*[@tag="035"][1]' );
+		$tcn = $xc->findvalue( $id_tag, $record );
 		$success = 1;
 	} catch Error with {
 		my $e = shift;
 		warn $e;
-		warn $xml;
 	};	
 	next unless $success;
 
-	$tcn =~ s/^.*?(\w+)\s*$/$1/go;
+	$tcn =~ s/^\s*(\.+)\s*/$1/o;
+	$tcn =~ s/\s+/_/go;
 	
 	unless ($tcn) {
 		warn "\nNo TCN found in rec!!\n";
-		$xml = '';
 		next;
 	}
-	$tcn = "_$tcn";
 
-	unless (exists($$tcn_map{$tcn})) {
+	$tcn_sth->execute($tcn);
+	$tcn_sth->bind_col(1, \$rec_id);
+	$tcn_sth->fetch;
+
+	unless ($rec_id) {
 		warn "\n !! TCN $tcn not in the map!\n";
-		$xml = '';
 		next;
 	}
 
-	my $rec_id = $$tcn_map{$tcn};
+	for my $node ($xc->findnodes($holding_tag, $record)) {
+		my $barcode = $xc->findvalue( "./*[\@code=\"$bc\"]", $node );
+		my $label = $xc->findvalue( "./*[\@code=\"$lbl\"]", $node );
+		my $owning_lib = $$lib_map{ $xc->findvalue( "./*[\@code=\"$own\"]", $node ) };
+		my $price = $xc->findvalue( "./*[\@code=\"$pr\"]", $node );
+		my $copy_number = $xc->findvalue( "./*[\@code=\"$cpn\"]", $node ) || 0;
+		my $available = $xc->findvalue( "./*[\@code=\"$avail\"]", $node ) || '';
 
-	for my $node ($doc->documentElement->findnodes($holding_tag)) {
-		my $barcode = $node->findvalue( "*[\@code=\"$bc\"]" );
-		my $label = $node->findvalue( "*[\@code=\"$lbl\"]" );
-		my $owning_lib = $$lib_map{ $node->findvalue( "*[\@code=\"$own\"]" ) };
-		my $price = $node->findvalue( "*[\@code=\"$pr\"]" );
-		my $copy_number = $node->findvalue( "*[\@code=\"$cpn\"]" ) || 0;
-		my $available = $node->findvalue( "*[\@code=\"$avail\"]" ) || '';
-
 		my $status = $status_map{$available} || 0;
 
 		next unless $barcode;
@@ -181,7 +179,6 @@
 		print 'c';
 		$cp_id++;
 	}
-	$xml = '';
 }
 
 print CN "\\.\n";



More information about the open-ils-commits mailing list