[open-ils-commits] r19434 - in branches/rel_2_0/Open-ILS/src: perlmods/OpenILS/Application/Search sql/Pg sql/Pg/upgrade (dbs)

svn at svn.open-ils.org svn at svn.open-ils.org
Fri Feb 11 16:45:26 EST 2011


Author: dbs
Date: 2011-02-11 16:45:20 -0500 (Fri, 11 Feb 2011)
New Revision: 19434

Added:
   branches/rel_2_0/Open-ILS/src/sql/Pg/upgrade/0485.schema.reporter_strip_isbns.sql
Modified:
   branches/rel_2_0/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm
   branches/rel_2_0/Open-ILS/src/sql/Pg/002.schema.config.sql
   branches/rel_2_0/Open-ILS/src/sql/Pg/reporter-schema.sql
Log:
Normalize ISBNs by stripping hyphens in search methods and in reporter.materialized_simple_record

We weren't normalizing ISBNs in search or in reporter.materialized_simple_record
which required users to exactly match the form of the ISBN as entered in the
MARC record to get a successful search result - pretty hit or miss.

The longer term fix in 2.0 and above is to make the ISBN search search against
the identifier|isbn index, which also gets ISBN10/ISBN13 equivalence goodness.
But this patch will be relatively easy to backport through to 1.6.1 and plugs
some of the holes in our exposed APIs and search points in the short term.


Modified: branches/rel_2_0/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm
===================================================================
--- branches/rel_2_0/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm	2011-02-11 21:40:49 UTC (rev 19433)
+++ branches/rel_2_0/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm	2011-02-11 21:45:20 UTC (rev 19434)
@@ -2095,6 +2095,8 @@
 sub biblio_search_isbn { 
 	my( $self, $client, $isbn ) = @_;
 	$logger->debug("Searching ISBN $isbn");
+	# Strip hyphens from incoming ISBNs
+	$isbn =~ s/-//g;
 	my $recs = $U->storagereq('open-ils.storage.id_list.biblio.record_entry.search.isbn.atomic', $isbn);
 	return { ids => $recs, count => scalar(@$recs) };
 }
@@ -2109,6 +2111,8 @@
 	$logger->debug("Searching ISBNs @$isbn_list");
 	my @recs = (); my %rec_set = ();
 	foreach my $isbn ( @$isbn_list ) {
+		# Strip hyphens from incoming ISBNs
+		$isbn =~ s/-//g;
 		foreach my $rec ( @{ $U->storagereq(
 			'open-ils.storage.id_list.biblio.record_entry.search.isbn.atomic', $isbn )
 		} ) {

Modified: branches/rel_2_0/Open-ILS/src/sql/Pg/002.schema.config.sql
===================================================================
--- branches/rel_2_0/Open-ILS/src/sql/Pg/002.schema.config.sql	2011-02-11 21:40:49 UTC (rev 19433)
+++ branches/rel_2_0/Open-ILS/src/sql/Pg/002.schema.config.sql	2011-02-11 21:45:20 UTC (rev 19434)
@@ -70,7 +70,7 @@
     install_date    TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
 );
 
-INSERT INTO config.upgrade_log (version) VALUES ('0484'); -- miker
+INSERT INTO config.upgrade_log (version) VALUES ('0485'); -- dbs
 
 CREATE TABLE config.bib_source (
 	id		SERIAL	PRIMARY KEY,

Modified: branches/rel_2_0/Open-ILS/src/sql/Pg/reporter-schema.sql
===================================================================
--- branches/rel_2_0/Open-ILS/src/sql/Pg/reporter-schema.sql	2011-02-11 21:40:49 UTC (rev 19433)
+++ branches/rel_2_0/Open-ILS/src/sql/Pg/reporter-schema.sql	2011-02-11 21:45:20 UTC (rev 19434)
@@ -126,8 +126,8 @@
 	series_title.value AS series_title,
 	series_statement.value AS series_statement,
 	summary.value AS summary,
-	ARRAY_ACCUM( SUBSTRING(isbn.value FROM $$^\S+$$) ) AS isbn,
-	ARRAY_ACCUM( REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) AS issn,
+	ARRAY_ACCUM( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) AS isbn,
+	ARRAY_ACCUM( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) AS issn,
 	ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '650' AND subfield = 'a' AND record = r.id)) AS topic_subject,
 	ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '651' AND subfield = 'a' AND record = r.id)) AS geographic_subject,
 	ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '655' AND subfield = 'a' AND record = r.id)) AS genre,
@@ -158,8 +158,8 @@
     FIRST(author.value) AS author,
     ARRAY_TO_STRING(ARRAY_ACCUM( DISTINCT publisher.value), ', ') AS publisher,
     ARRAY_TO_STRING(ARRAY_ACCUM( DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$) ), ', ') AS pubdate,
-    ARRAY_ACCUM( DISTINCT SUBSTRING(isbn.value FROM $$^\S+$$) ) AS isbn,
-    ARRAY_ACCUM( DISTINCT SUBSTRING(issn.value FROM $$^\S+$$) ) AS issn
+    ARRAY_ACCUM( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) AS isbn,
+    ARRAY_ACCUM( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) AS issn
   FROM  biblio.record_entry r
     LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a')
     LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a')

Copied: branches/rel_2_0/Open-ILS/src/sql/Pg/upgrade/0485.schema.reporter_strip_isbns.sql (from rev 19432, trunk/Open-ILS/src/sql/Pg/upgrade/0485.schema.reporter_strip_isbns.sql)
===================================================================
--- branches/rel_2_0/Open-ILS/src/sql/Pg/upgrade/0485.schema.reporter_strip_isbns.sql	                        (rev 0)
+++ branches/rel_2_0/Open-ILS/src/sql/Pg/upgrade/0485.schema.reporter_strip_isbns.sql	2011-02-11 21:45:20 UTC (rev 19434)
@@ -0,0 +1,74 @@
+BEGIN;
+
+INSERT INTO config.upgrade_log (version) VALUES ('0485'); -- dbs
+
+CREATE OR REPLACE VIEW reporter.simple_record AS
+SELECT	r.id,
+	s.metarecord,
+	r.fingerprint,
+	r.quality,
+	r.tcn_source,
+	r.tcn_value,
+	title.value AS title,
+	uniform_title.value AS uniform_title,
+	author.value AS author,
+	publisher.value AS publisher,
+	SUBSTRING(pubdate.value FROM $$\d+$$) AS pubdate,
+	series_title.value AS series_title,
+	series_statement.value AS series_statement,
+	summary.value AS summary,
+	ARRAY_ACCUM( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) AS isbn,
+	ARRAY_ACCUM( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) AS issn,
+	ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '650' AND subfield = 'a' AND record = r.id)) AS topic_subject,
+	ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '651' AND subfield = 'a' AND record = r.id)) AS geographic_subject,
+	ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '655' AND subfield = 'a' AND record = r.id)) AS genre,
+	ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '600' AND subfield = 'a' AND record = r.id)) AS name_subject,
+	ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '610' AND subfield = 'a' AND record = r.id)) AS corporate_subject,
+	ARRAY((SELECT value FROM metabib.full_rec WHERE tag = '856' AND subfield IN ('3','y','u') AND record = r.id ORDER BY CASE WHEN subfield IN ('3','y') THEN 0 ELSE 1 END)) AS external_uri
+  FROM	biblio.record_entry r
+	JOIN metabib.metarecord_source_map s ON (s.source = r.id)
+	LEFT JOIN metabib.full_rec uniform_title ON (r.id = uniform_title.record AND uniform_title.tag = '240' AND uniform_title.subfield = 'a')
+	LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a')
+	LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag = '100' AND author.subfield = 'a')
+	LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND publisher.tag = '260' AND publisher.subfield = 'b')
+	LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND pubdate.tag = '260' AND pubdate.subfield = 'c')
+	LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z'))
+	LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a')
+	LEFT JOIN metabib.full_rec series_title ON (r.id = series_title.record AND series_title.tag IN ('830','440') AND series_title.subfield = 'a')
+	LEFT JOIN metabib.full_rec series_statement ON (r.id = series_statement.record AND series_statement.tag = '490' AND series_statement.subfield = 'a')
+	LEFT JOIN metabib.full_rec summary ON (r.id = summary.record AND summary.tag = '520' AND summary.subfield = 'a')
+  GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14;
+
+CREATE OR REPLACE VIEW reporter.old_super_simple_record AS
+SELECT  r.id,
+    r.fingerprint,
+    r.quality,
+    r.tcn_source,
+    r.tcn_value,
+    FIRST(title.value) AS title,
+    FIRST(author.value) AS author,
+    ARRAY_TO_STRING(ARRAY_ACCUM( DISTINCT publisher.value), ', ') AS publisher,
+    ARRAY_TO_STRING(ARRAY_ACCUM( DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$) ), ', ') AS pubdate,
+    ARRAY_ACCUM( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) AS isbn,
+    ARRAY_ACCUM( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) AS issn
+  FROM  biblio.record_entry r
+    LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a')
+    LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a')
+    LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND publisher.tag = '260' AND publisher.subfield = 'b')
+    LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND pubdate.tag = '260' AND pubdate.subfield = 'c')
+    LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z'))
+    LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a')
+  GROUP BY 1,2,3,4,5;
+
+-- Update reporter.materialized_simple_record with normalized ISBN values
+-- This might not get all of them, but most ISBNs will have more than one hyphen
+DELETE FROM reporter.materialized_simple_record WHERE id IN (
+    SELECT record FROM metabib.full_rec WHERE tag = '020' AND subfield IN ('a', 'z') AND value LIKE '%-%-%'
+);
+
+INSERT INTO reporter.materialized_simple_record
+    SELECT DISTINCT rossr.* FROM reporter.old_super_simple_record rossr INNER JOIN metabib.full_rec mfr ON mfr.record = rossr.id
+        WHERE mfr.tag = '020' AND mfr.subfield IN ('a', 'z') AND mfr.value LIKE '%-%-%'
+;
+
+COMMIT;



More information about the open-ils-commits mailing list