[open-ils-commits] r339 - conifer/trunk/tools/migration-scripts (dbs)
svn at svn.open-ils.org
svn at svn.open-ils.org
Thu Apr 16 14:07:08 EDT 2009
Author: dbs
Date: 2009-04-16 14:07:04 -0400 (Thu, 16 Apr 2009)
New Revision: 339
Added:
conifer/trunk/tools/migration-scripts/fixURIs.pl
Log:
Script for cleaning up URIs to conform to the URIs-as-copies ingest scheme
Added: conifer/trunk/tools/migration-scripts/fixURIs.pl
===================================================================
--- conifer/trunk/tools/migration-scripts/fixURIs.pl (rev 0)
+++ conifer/trunk/tools/migration-scripts/fixURIs.pl 2009-04-16 18:07:04 UTC (rev 339)
@@ -0,0 +1,68 @@
+#!/usr/bin/perl -w
+use strict;
+use MARC::File::XML( BinaryEncoding => 'utf8', RecordFormat => 'USMARC' );
+
+# Clean up URIs prior to batch ingest
+# * If we detect a proxy URL:
+# * Ensure ind1 = 4
+# * Ensure ind2 = 2
+# * Ensure $9 = aou.shortname
+# * Trim whitespace and other tweaks while we're at it?
+
+my $input = MARC::File::XML->in( shift );
+my $output = MARC::File::XML->out( 'bibs_edited.xml' );
+
+my $touched = 0;
+while (my $marc = $input->next()) {
+ my $edited = 0;
+ my @uri_fields = $marc->field('856');
+ foreach my $uri (@uri_fields) {
+ my ($orgunit);
+
+ # There's no way we should have multiples, but let's iterate anyway
+ my @urls = $uri->subfield('u');
+
+ foreach my $url (@urls) {
+ if ($url =~ m/librweb.laurentian.ca/o) {
+ $orgunit = 'OSUL';
+ } elsif ($url =~ m/libproxy.auc.ca/o) {
+ $orgunit = 'OSTMA';
+ } elsif ($url =~ m/normedproxy.lakeheadu.ca/o) {
+ $orgunit = 'OSM';
+ }
+
+ if ($orgunit) {
+ my $clean_url = $url;
+ $clean_url =~ s/^\s*(.*?)\s*$/$1/o;
+ if ($url ne $clean_url) {
+ $uri->update(u => $clean_url);
+ $edited++;
+ }
+
+ my $ind1 = $uri->indicator(1);
+ if ($ind1 and $ind1 ne '1' and $ind1 ne '4') {
+ $uri->update(ind1 => '4');
+ $edited++;
+ }
+
+ my $ind2 = $uri->indicator(2);
+ if ($ind2 and $ind2 ne '0' and $ind2 ne '1') {
+ $uri->update(ind2 => '1');
+ $edited++;
+ }
+
+ # Risking that we only have one subfield 9 here
+ my $aou = $uri->subfield('9');
+ if (!$aou or $aou ne $orgunit) {
+ $uri->update(9 => $orgunit);
+ $edited++;
+ }
+ }
+ }
+ }
+ if ($edited) {
+ $touched++;
+ }
+ $output->write($marc);
+}
+$output->close();
More information about the open-ils-commits
mailing list