[open-ils-commits] r19201 - in trunk/Open-ILS/src/sql/Pg: . upgrade (dbs)
svn at svn.open-ils.org
svn at svn.open-ils.org
Wed Jan 19 09:53:51 EST 2011
Author: dbs
Date: 2011-01-19 09:53:48 -0500 (Wed, 19 Jan 2011)
New Revision: 19201
Added:
trunk/Open-ILS/src/sql/Pg/upgrade/0476.schema.authority_normalize_heading.sql
Modified:
trunk/Open-ILS/src/sql/Pg/002.schema.config.sql
trunk/Open-ILS/src/sql/Pg/020.schema.functions.sql
trunk/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql
Log:
Avoid escaping issues in authority.normalize_heading() by parameterizing the query
Long story short: MARC subfield values containing backslashes caused noise
and in some cases painful errors. Using spi_prepare/spi_exec_query is the
safest way of handling escaping, rather than adding more regexes and munging
the data before it even gets to naco_normalize().
Most painful case was <subfield code="a">Foo, Bar\</subfield> - the trailing
slash ended up escaping the enclosing single quote (because PostgreSQL isn't
configured by default with strict conformance to SQL escaping rules yet) and
threw an error.
Modified: trunk/Open-ILS/src/sql/Pg/002.schema.config.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/002.schema.config.sql 2011-01-19 04:41:15 UTC (rev 19200)
+++ trunk/Open-ILS/src/sql/Pg/002.schema.config.sql 2011-01-19 14:53:48 UTC (rev 19201)
@@ -70,7 +70,7 @@
install_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
-INSERT INTO config.upgrade_log (version) VALUES ('0475'); -- dbwells
+INSERT INTO config.upgrade_log (version) VALUES ('0476'); -- dbs
CREATE TABLE config.bib_source (
id SERIAL PRIMARY KEY,
Modified: trunk/Open-ILS/src/sql/Pg/020.schema.functions.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/020.schema.functions.sql 2011-01-19 04:41:15 UTC (rev 19200)
+++ trunk/Open-ILS/src/sql/Pg/020.schema.functions.sql 2011-01-19 14:53:48 UTC (rev 19201)
@@ -375,12 +375,12 @@
}
}
- # Perhaps better to parameterize the spi and pass as a parameter
- $auth_txt =~ s/'//go;
-
if ($auth_txt) {
- my $result = spi_exec_query("SELECT public.naco_normalize('$auth_txt') AS norm_text");
+ my $stmt = spi_prepare('SELECT public.naco_normalize($1) AS norm_text', 'TEXT');
+ my $result = spi_exec_prepared($stmt, $auth_txt);
my $norm_txt = $result->{rows}[0]->{norm_text};
+ spi_freeplan($stmt);
+ undef($stmt);
return $head->tag() . "_" . $thes_code . " " . $norm_txt;
}
Modified: trunk/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql 2011-01-19 04:41:15 UTC (rev 19200)
+++ trunk/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql 2011-01-19 14:53:48 UTC (rev 19201)
@@ -18724,7 +18724,7 @@
# Default to "No attempt to code" if the leader is horribly broken
my $fixed_field = $r->field('008');
my $thes_char = '|';
- if ($fixed_field) {
+ if ($fixed_field) {
$thes_char = substr($fixed_field->data(), 11, 1) || '|';
}
@@ -18746,13 +18746,13 @@
$auth_txt .= '‡' . $sf->[0] . ' ' . $sf->[1];
}
}
-
- # Perhaps better to parameterize the spi and pass as a parameter
- $auth_txt =~ s/'//go;
-
+
if ($auth_txt) {
- my $result = spi_exec_query("SELECT public.naco_normalize('$auth_txt') AS norm_text");
+ my $stmt = spi_prepare('SELECT public.naco_normalize($1) AS norm_text', 'TEXT');
+ my $result = spi_exec_prepared($stmt, $auth_txt);
my $norm_txt = $result->{rows}[0]->{norm_text};
+ spi_freeplan($stmt);
+ undef($stmt);
return $head->tag() . "_" . $thes_code . " " . $norm_txt;
}
Added: trunk/Open-ILS/src/sql/Pg/upgrade/0476.schema.authority_normalize_heading.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/upgrade/0476.schema.authority_normalize_heading.sql (rev 0)
+++ trunk/Open-ILS/src/sql/Pg/upgrade/0476.schema.authority_normalize_heading.sql 2011-01-19 14:53:48 UTC (rev 19201)
@@ -0,0 +1,83 @@
+-- Use spi_prepare/spi_exec_query to delegate escaping issues to the database
+-- (where they belong) and avoid ugly MARC corner cases
+BEGIN;
+
+INSERT INTO config.upgrade_log (version) VALUES ('0476'); -- dbs
+
+CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $func$
+ use strict;
+ use warnings;
+
+ use utf8;
+ use MARC::Record;
+ use MARC::File::XML (BinaryEncoding => 'UTF8');
+ use UUID::Tiny ':std';
+
+ my $xml = shift() or return undef;
+
+ my $r;
+
+ # Prevent errors in XML parsing from blowing out ungracefully
+ eval {
+ $r = MARC::Record->new_from_xml( $xml );
+ 1;
+ } or do {
+ return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml);
+ };
+
+ if (!$r) {
+ return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml);
+ }
+
+ # From http://www.loc.gov/standards/sourcelist/subject.html
+ my $thes_code_map = {
+ a => 'lcsh',
+ b => 'lcshac',
+ c => 'mesh',
+ d => 'nal',
+ k => 'cash',
+ n => 'notapplicable',
+ r => 'aat',
+ s => 'sears',
+ v => 'rvm',
+ };
+
+ # Default to "No attempt to code" if the leader is horribly broken
+ my $fixed_field = $r->field('008');
+ my $thes_char = '|';
+ if ($fixed_field) {
+ $thes_char = substr($fixed_field->data(), 11, 1) || '|';
+ }
+
+ my $thes_code = 'UNDEFINED';
+
+ if ($thes_char eq 'z') {
+ # Grab the 040 $f per http://www.loc.gov/marc/authority/ad040.html
+ $thes_code = $r->subfield('040', 'f') || 'UNDEFINED';
+ } elsif ($thes_code_map->{$thes_char}) {
+ $thes_code = $thes_code_map->{$thes_char};
+ }
+
+ my $auth_txt = '';
+ my $head = $r->field('1..');
+ if ($head) {
+ # Concatenate all of these subfields together, prefixed by their code
+ # to prevent collisions along the lines of "Fiction, North Carolina"
+ foreach my $sf ($head->subfields()) {
+ $auth_txt .= '‡' . $sf->[0] . ' ' . $sf->[1];
+ }
+ }
+
+ if ($auth_txt) {
+ my $stmt = spi_prepare('SELECT public.naco_normalize($1) AS norm_text', 'TEXT');
+ my $result = spi_exec_prepared($stmt, $auth_txt);
+ my $norm_txt = $result->{rows}[0]->{norm_text};
+ spi_freeplan($stmt);
+ undef($stmt);
+ return $head->tag() . "_" . $thes_code . " " . $norm_txt;
+ }
+
+ return 'NOHEADING_' . $thes_code . ' ' . create_uuid_as_string(UUID_MD5, $xml);
+$func$ LANGUAGE 'plperlu' IMMUTABLE;
+
+COMMIT;
More information about the open-ils-commits
mailing list