[open-ils-commits] r339 - conifer/trunk/tools/migration-scripts (dbs)

svn at svn.open-ils.org svn at svn.open-ils.org
Thu Apr 16 14:07:08 EDT 2009


Author: dbs
Date: 2009-04-16 14:07:04 -0400 (Thu, 16 Apr 2009)
New Revision: 339

Added:
   conifer/trunk/tools/migration-scripts/fixURIs.pl
Log:
Script for cleaning up URIs to conform to the URIs-as-copies ingest scheme


Added: conifer/trunk/tools/migration-scripts/fixURIs.pl
===================================================================
--- conifer/trunk/tools/migration-scripts/fixURIs.pl	                        (rev 0)
+++ conifer/trunk/tools/migration-scripts/fixURIs.pl	2009-04-16 18:07:04 UTC (rev 339)
@@ -0,0 +1,68 @@
+#!/usr/bin/perl -w
+use strict;
+use MARC::File::XML( BinaryEncoding => 'utf8', RecordFormat => 'USMARC' );
+
+# Clean up URIs prior to batch ingest
+#   * If we detect a proxy URL:
+#     * Ensure ind1 = 4
+#     * Ensure ind2 = 2
+#     * Ensure $9 = aou.shortname
+#   * Trim whitespace and other tweaks while we're at it?
+
+my $input = MARC::File::XML->in( shift );
+my $output = MARC::File::XML->out( 'bibs_edited.xml' );
+
+my $touched = 0;
+while (my $marc = $input->next()) {
+	my $edited = 0;
+	my @uri_fields = $marc->field('856');
+	foreach my $uri (@uri_fields) {
+		my ($orgunit);
+
+		# There's no way we should have multiples, but let's iterate anyway
+		my @urls = $uri->subfield('u');
+
+		foreach my $url (@urls) {
+			if ($url =~ m/librweb.laurentian.ca/o) {
+				$orgunit = 'OSUL';
+			} elsif ($url =~ m/libproxy.auc.ca/o) {
+				$orgunit = 'OSTMA';
+			} elsif ($url =~ m/normedproxy.lakeheadu.ca/o) {
+				$orgunit = 'OSM';
+			}
+
+			if ($orgunit) {
+				my $clean_url = $url;
+				$clean_url =~ s/^\s*(.*?)\s*$/$1/o;
+				if ($url ne $clean_url) {
+					$uri->update(u => $clean_url);
+					$edited++;
+				}
+
+				my $ind1 = $uri->indicator(1);
+				if ($ind1 and $ind1 ne '1' and $ind1 ne '4') {
+					$uri->update(ind1 => '4');
+					$edited++;
+				}
+
+				my $ind2 = $uri->indicator(2);
+				if ($ind2 and $ind2 ne '0' and $ind2 ne '1') {
+					$uri->update(ind2 => '1');
+					$edited++;
+				}
+
+				# Risking that we only have one subfield 9 here
+				my $aou = $uri->subfield('9');
+				if (!$aou or $aou ne $orgunit) {
+					$uri->update(9 => $orgunit);
+					$edited++;
+				}
+			}
+		}
+	}
+	if ($edited) {
+		$touched++;
+	}
+	$output->write($marc);
+}
+$output->close();



More information about the open-ils-commits mailing list