[open-ils-commits] r18811 - in branches/rel_2_0/Open-ILS/src/sql/Pg: . upgrade (dbs)

svn at svn.open-ils.org svn at svn.open-ils.org
Sat Nov 20 14:56:15 EST 2010


Author: dbs
Date: 2010-11-20 14:56:10 -0500 (Sat, 20 Nov 2010)
New Revision: 18811

Added:
   branches/rel_2_0/Open-ILS/src/sql/Pg/upgrade/0465.function.maintain_control_numbers.sql
Modified:
   branches/rel_2_0/Open-ILS/src/sql/Pg/002.functions.config.sql
   branches/rel_2_0/Open-ILS/src/sql/Pg/002.schema.config.sql
   branches/rel_2_0/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql
Log:
Address maintain_control_numbers() database function bug #677160

Jason Stephenson reported a bug handling records with multiple
001 or 003 fields, and supplied a set of test records to
reproduce the condition. The bug caused the ingest process
to throw a database error, rolling back the transaction and
preventing the actual ingest of those records.

The solution was to simplify the logic in maintain_control_numbers().
Now, in the case that there are either multiple 001s or 003s in the
incoming record, we simply delete all of the 003s and 001s and
create the desired 001 and 003. Also, if there are not exactly one
001 and one 003 in the incoming record, we do not try to preserve
one of those values in the 035 as it would be close to meaningless.

Many thanks to Jason for the clear bug report and test cases!


Modified: branches/rel_2_0/Open-ILS/src/sql/Pg/002.functions.config.sql
===================================================================
--- branches/rel_2_0/Open-ILS/src/sql/Pg/002.functions.config.sql	2010-11-20 19:55:03 UTC (rev 18810)
+++ branches/rel_2_0/Open-ILS/src/sql/Pg/002.functions.config.sql	2010-11-20 19:56:10 UTC (rev 18811)
@@ -513,8 +513,6 @@
 
 my ($create, $munge) = (0, 0);
 
-# Incoming MARC records may have multiple 001s or 003s, despite the spec
-my @control_ids = $record->field('003');
 my @scns = $record->field('035');
 
 foreach my $id_field ('001', '003') {
@@ -528,32 +526,27 @@
     }
 
     # Create the 001/003 if none exist
-    if (scalar(@controls) == 0) {
-        $record->insert_fields_ordered(MARC::Field->new($id_field, $spec_value));
-        $create = 1;
-    } elsif (scalar(@controls) > 1) {
-        # Do we already have the right 001/003 value in the existing set?
+    if (scalar(@controls) == 1) {
+        # Only one field; check to see if we need to munge it
         unless (grep $_->data() eq $spec_value, @controls) {
             $munge = 1;
         }
-
+    } else {
         # Delete the other fields, as with more than 1 001/003 we do not know which 003/001 to match
         foreach my $control (@controls) {
             unless ($control->data() eq $spec_value) {
                 $record->delete_field($control);
             }
         }
-    } else {
-        # Only one field; check to see if we need to munge it
-        unless (grep $_->data() eq $spec_value, @controls) {
-            $munge = 1;
-        }
+        $record->insert_fields_ordered(MARC::Field->new($id_field, $spec_value));
+        $create = 1;
     }
 }
 
-# Now, if we need to munge the 001, we will first push the existing 001/003 into the 035;
-# but if the record did not have a 003 to begin with, skip this process
-if ($munge && scalar(@control_ids) > 0) {
+# Now, if we need to munge the 001, we will first push the existing 001/003
+# into the 035; but if the record did not have one (and one only) 001 and 003
+# to begin with, skip this process
+if ($munge and not $create) {
     my $scn = "(" . $record->field('003')->data() . ")" . $record->field('001')->data();
 
     # Do not create duplicate 035 fields

Modified: branches/rel_2_0/Open-ILS/src/sql/Pg/002.schema.config.sql
===================================================================
--- branches/rel_2_0/Open-ILS/src/sql/Pg/002.schema.config.sql	2010-11-20 19:55:03 UTC (rev 18810)
+++ branches/rel_2_0/Open-ILS/src/sql/Pg/002.schema.config.sql	2010-11-20 19:56:10 UTC (rev 18811)
@@ -70,7 +70,7 @@
     install_date    TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
 );
 
-INSERT INTO config.upgrade_log (version) VALUES ('0464'); -- dbs
+INSERT INTO config.upgrade_log (version) VALUES ('0465'); -- dbs
 
 CREATE TABLE config.bib_source (
 	id		SERIAL	PRIMARY KEY,

Modified: branches/rel_2_0/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql
===================================================================
--- branches/rel_2_0/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql	2010-11-20 19:55:03 UTC (rev 18810)
+++ branches/rel_2_0/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql	2010-11-20 19:56:10 UTC (rev 18811)
@@ -16966,8 +16966,6 @@
 
 my ($create, $munge) = (0, 0);
 
-# Incoming MARC records may have multiple 001s or 003s, despite the spec
-my @control_ids = $record->field('003');
 my @scns = $record->field('035');
 
 foreach my $id_field ('001', '003') {
@@ -16981,32 +16979,143 @@
     }
 
     # Create the 001/003 if none exist
-    if (scalar(@controls) == 0) {
-        $record->insert_fields_ordered(MARC::Field->new($id_field, $spec_value));
-        $create = 1;
-    } elsif (scalar(@controls) > 1) {
-        # Do we already have the right 001/003 value in the existing set?
+    if (scalar(@controls) == 1) {
+        # Only one field; check to see if we need to munge it
         unless (grep $_->data() eq $spec_value, @controls) {
             $munge = 1;
         }
-
+    } else {
         # Delete the other fields, as with more than 1 001/003 we do not know which 003/001 to match
         foreach my $control (@controls) {
             unless ($control->data() eq $spec_value) {
                 $record->delete_field($control);
             }
         }
+        $record->insert_fields_ordered(MARC::Field->new($id_field, $spec_value));
+        $create = 1;
+    }
+}
+
+# Now, if we need to munge the 001, we will first push the existing 001/003
+# into the 035; but if the record did not have one (and one only) 001 and 003
+# to begin with, skip this process
+if ($munge and not $create) {
+    my $scn = "(" . $record->field('003')->data() . ")" . $record->field('001')->data();
+
+    # Do not create duplicate 035 fields
+    unless (grep $_->subfield('a') eq $scn, @scns) {
+        $record->insert_fields_ordered(MARC::Field->new('035', '', '', 'a' => $scn));
+    }
+}
+
+# Set the 001/003 and update the MARC
+if ($create or $munge) {
+    $record->field('001')->data($rec_id);
+    $record->field('003')->data($ou_cni);
+
+    my $xml = $record->as_xml_record();
+    $xml =~ s/\n//sgo;
+    $xml =~ s/^<\?xml.+\?\s*>//go;
+    $xml =~ s/>\s+</></go;
+    $xml =~ s/\p{Cc}//go;
+
+    # Embed a version of OpenILS::Application::AppUtils->entityize()
+    # to avoid having to set PERL5LIB for PostgreSQL as well
+
+    # If we are going to convert non-ASCII characters to XML entities,
+    # we had better be dealing with a UTF8 string to begin with
+    $xml = decode_utf8($xml);
+
+    $xml = NFC($xml);
+
+    # Convert raw ampersands to entities
+    $xml =~ s/&(?!\S+;)/&amp;/gso;
+
+    # Convert Unicode characters to entities
+    $xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
+
+    $xml =~ s/[\x00-\x1f]//go;
+    $_TD->{new}{marc} = $xml;
+
+    return "MODIFY";
+}
+
+CREATE OR REPLACE FUNCTION maintain_control_numbers() RETURNS TRIGGER AS $func$
+use strict;
+use MARC::Record;
+use MARC::File::XML (BinaryEncoding => 'UTF-8');
+use Encode;
+use Unicode::Normalize;
+
+my $record = MARC::Record->new_from_xml($_TD->{new}{marc});
+my $schema = $_TD->{table_schema};
+my $rec_id = $_TD->{new}{id};
+
+# Short-circuit if maintaining control numbers per MARC21 spec is not enabled
+my $enable = spi_exec_query("SELECT enabled FROM config.global_flag WHERE name = 'cat.maintain_control_numbers'");
+if (!($enable->{processed}) or $enable->{rows}[0]->{enabled} eq 'f') {
+    return;
+}
+
+# Get the control number identifier from an OU setting based on $_TD->{new}{owner}
+my $ou_cni = 'EVRGRN';
+
+my $owner;
+if ($schema eq 'serial') {
+    $owner = $_TD->{new}{owning_lib};
+} else {
+    # are.owner and bre.owner can be null, so fall back to the consortial setting
+    $owner = $_TD->{new}{owner} || 1;
+}
+
+my $ous_rv = spi_exec_query("SELECT value FROM actor.org_unit_ancestor_setting('cat.marc_control_number_identifier', $owner)");
+if ($ous_rv->{processed}) {
+    $ou_cni = $ous_rv->{rows}[0]->{value};
+    $ou_cni =~ s/"//g; # Stupid VIM syntax highlighting"
+} else {
+    # Fall back to the shortname of the OU if there was no OU setting
+    $ous_rv = spi_exec_query("SELECT shortname FROM actor.org_unit WHERE id = $owner");
+    if ($ous_rv->{processed}) {
+        $ou_cni = $ous_rv->{rows}[0]->{shortname};
+    }
+}
+
+my ($create, $munge) = (0, 0);
+
+my @scns = $record->field('035');
+
+foreach my $id_field ('001', '003') {
+    my $spec_value;
+    my @controls = $record->field($id_field);
+
+    if ($id_field eq '001') {
+        $spec_value = $rec_id;
     } else {
+        $spec_value = $ou_cni;
+    }
+
+    # Create the 001/003 if none exist
+    if (scalar(@controls) == 1) {
         # Only one field; check to see if we need to munge it
         unless (grep $_->data() eq $spec_value, @controls) {
             $munge = 1;
         }
+    } else {
+        # Delete the other fields, as with more than 1 001/003 we do not know which 003/001 to match
+        foreach my $control (@controls) {
+            unless ($control->data() eq $spec_value) {
+                $record->delete_field($control);
+            }
+        }
+        $record->insert_fields_ordered(MARC::Field->new($id_field, $spec_value));
+        $create = 1;
     }
 }
 
-# Now, if we need to munge the 001, we will first push the existing 001/003 into the 035;
-# but if the record did not have a 003 to begin with, skip this process
-if ($munge && scalar(@control_ids) > 0) {
+# Now, if we need to munge the 001, we will first push the existing 001/003
+# into the 035; but if the record did not have one (and one only) 001 and 003
+# to begin with, skip this process
+if ($munge and not $create) {
     my $scn = "(" . $record->field('003')->data() . ")" . $record->field('001')->data();
 
     # Do not create duplicate 035 fields

Copied: branches/rel_2_0/Open-ILS/src/sql/Pg/upgrade/0465.function.maintain_control_numbers.sql (from rev 18809, trunk/Open-ILS/src/sql/Pg/upgrade/0465.function.maintain_control_numbers.sql)
===================================================================
--- branches/rel_2_0/Open-ILS/src/sql/Pg/upgrade/0465.function.maintain_control_numbers.sql	                        (rev 0)
+++ branches/rel_2_0/Open-ILS/src/sql/Pg/upgrade/0465.function.maintain_control_numbers.sql	2010-11-20 19:56:10 UTC (rev 18811)
@@ -0,0 +1,124 @@
+BEGIN;
+
+INSERT INTO config.upgrade_log (version) VALUES ('0465'); -- dbs
+
+CREATE OR REPLACE FUNCTION maintain_control_numbers() RETURNS TRIGGER AS $func$
+use strict;
+use MARC::Record;
+use MARC::File::XML (BinaryEncoding => 'UTF-8');
+use Encode;
+use Unicode::Normalize;
+
+my $record = MARC::Record->new_from_xml($_TD->{new}{marc});
+my $schema = $_TD->{table_schema};
+my $rec_id = $_TD->{new}{id};
+
+# Short-circuit if maintaining control numbers per MARC21 spec is not enabled
+my $enable = spi_exec_query("SELECT enabled FROM config.global_flag WHERE name = 'cat.maintain_control_numbers'");
+if (!($enable->{processed}) or $enable->{rows}[0]->{enabled} eq 'f') {
+    return;
+}
+
+# Get the control number identifier from an OU setting based on $_TD->{new}{owner}
+my $ou_cni = 'EVRGRN';
+
+my $owner;
+if ($schema eq 'serial') {
+    $owner = $_TD->{new}{owning_lib};
+} else {
+    # are.owner and bre.owner can be null, so fall back to the consortial setting
+    $owner = $_TD->{new}{owner} || 1;
+}
+
+my $ous_rv = spi_exec_query("SELECT value FROM actor.org_unit_ancestor_setting('cat.marc_control_number_identifier', $owner)");
+if ($ous_rv->{processed}) {
+    $ou_cni = $ous_rv->{rows}[0]->{value};
+    $ou_cni =~ s/"//g; # Stupid VIM syntax highlighting"
+} else {
+    # Fall back to the shortname of the OU if there was no OU setting
+    $ous_rv = spi_exec_query("SELECT shortname FROM actor.org_unit WHERE id = $owner");
+    if ($ous_rv->{processed}) {
+        $ou_cni = $ous_rv->{rows}[0]->{shortname};
+    }
+}
+
+my ($create, $munge) = (0, 0);
+
+my @scns = $record->field('035');
+
+foreach my $id_field ('001', '003') {
+    my $spec_value;
+    my @controls = $record->field($id_field);
+
+    if ($id_field eq '001') {
+        $spec_value = $rec_id;
+    } else {
+        $spec_value = $ou_cni;
+    }
+
+    # Create the 001/003 if none exist
+    if (scalar(@controls) == 1) {
+        # Only one field; check to see if we need to munge it
+        unless (grep $_->data() eq $spec_value, @controls) {
+            $munge = 1;
+        }
+    } else {
+        # Delete the other fields, as with more than 1 001/003 we do not know which 003/001 to match
+        foreach my $control (@controls) {
+            unless ($control->data() eq $spec_value) {
+                $record->delete_field($control);
+            }
+        }
+        $record->insert_fields_ordered(MARC::Field->new($id_field, $spec_value));
+        $create = 1;
+    }
+}
+
+# Now, if we need to munge the 001, we will first push the existing 001/003
+# into the 035; but if the record did not have one (and one only) 001 and 003
+# to begin with, skip this process
+if ($munge and not $create) {
+    my $scn = "(" . $record->field('003')->data() . ")" . $record->field('001')->data();
+
+    # Do not create duplicate 035 fields
+    unless (grep $_->subfield('a') eq $scn, @scns) {
+        $record->insert_fields_ordered(MARC::Field->new('035', '', '', 'a' => $scn));
+    }
+}
+
+# Set the 001/003 and update the MARC
+if ($create or $munge) {
+    $record->field('001')->data($rec_id);
+    $record->field('003')->data($ou_cni);
+
+    my $xml = $record->as_xml_record();
+    $xml =~ s/\n//sgo;
+    $xml =~ s/^<\?xml.+\?\s*>//go;
+    $xml =~ s/>\s+</></go;
+    $xml =~ s/\p{Cc}//go;
+
+    # Embed a version of OpenILS::Application::AppUtils->entityize()
+    # to avoid having to set PERL5LIB for PostgreSQL as well
+
+    # If we are going to convert non-ASCII characters to XML entities,
+    # we had better be dealing with a UTF8 string to begin with
+    $xml = decode_utf8($xml);
+
+    $xml = NFC($xml);
+
+    # Convert raw ampersands to entities
+    $xml =~ s/&(?!\S+;)/&amp;/gso;
+
+    # Convert Unicode characters to entities
+    $xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
+
+    $xml =~ s/[\x00-\x1f]//go;
+    $_TD->{new}{marc} = $xml;
+
+    return "MODIFY";
+}
+
+return;
+$func$ LANGUAGE PLPERLU;
+
+COMMIT;



More information about the open-ils-commits mailing list