[open-ils-commits] [GIT] Evergreen ILS branch master updated. c4d1595fba44d24825f51a5097b7ee7b07523780

Evergreen Git git at git.evergreen-ils.org
Thu Feb 16 10:08:48 EST 2017


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Evergreen ILS".

The branch, master has been updated
       via  c4d1595fba44d24825f51a5097b7ee7b07523780 (commit)
       via  cacb6861baa23d622a36b8b0240b6b96f2b291d1 (commit)
       via  4ff655b82870af27f2b30052442d75ffce40db7c (commit)
       via  abbcc13856d1fe71ab51272b59d534c22b467a2a (commit)
      from  6e2cf57a7e039a1758d2ed219c935a555e7061d6 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit c4d1595fba44d24825f51a5097b7ee7b07523780
Author: Mike Rylander <mrylander at gmail.com>
Date:   Thu Feb 16 10:05:55 2017 -0500

    Stamping upgrade script for separating fingerprint components
    
    Signed-off-by: Mike Rylander <mrylander at gmail.com>

diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index bc6502e..b2cb8cb 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps
     BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
     FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
 
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1016', :eg_version); -- kmlussier/miker
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1017', :eg_version); -- gmcharlt/miker
 
 CREATE TABLE config.bib_source (
 	id		SERIAL	PRIMARY KEY,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql b/Open-ILS/src/sql/Pg/upgrade/1017.schema.update_fingerprinting.sql
similarity index 97%
rename from Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql
rename to Open-ILS/src/sql/Pg/upgrade/1017.schema.update_fingerprinting.sql
index 2eb5ac8..ad4fc47 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/1017.schema.update_fingerprinting.sql
@@ -1,6 +1,6 @@
 BEGIN;
 
---- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+SELECT evergreen.upgrade_deps_block_check('1017', :eg_version);
 
 CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
 DECLARE

commit cacb6861baa23d622a36b8b0240b6b96f2b291d1
Author: Galen Charlton <gmc at esilibrary.com>
Date:   Fri Oct 28 13:00:45 2016 -0400

    LP#1528901: avoid accidental bib fingerprint collisions
    
    This patch fixes a problem where the bib fingerprint algorithm
    could end up putting completely different works in the same
    metarecord. For example,
    
    100 $a Steel, Danielle
    245 $a Blue
    
    and
    
    *no 1XX
    245 $a Blue steel
    
    previously (with stock config.biblio_fingerprint settings) got
    a fingerprint of "bluesteel". With this patch, their fingerprints
    are now:
    
    "Title:blue Author:steel"
    
    and
    
    "Title:bluesteel Author:"
    
    The upgrade script supplied with this patch remaps the metarecords
    after updating the fingerprints. While existing metarecord holds
    may get moved, note that there is no known way of ensuring that a
    metarecord hold placed on a collided metarecord will end up attach
    to whatever work the patron intended to request.
    
    To test:
    
    [1] Add records for "Blue" and "Blue steel".
    [2] Note that they end up on the same metarecord.
    [3] Apply the patch and perform the update.
    [4] The two bibs should now be on separate metarecords.
    
    Signed-off-by: Galen Charlton <gmc at esilibrary.com>
    Signed-off-by: Rogan Hamby <rogan.hamby at gmail.com>
    Signed-off-by: Kathy Lussier <klussier at masslnc.org>
    Signed-off-by: Mike Rylander <mrylander at gmail.com>

diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
index 71bab56..f79d09e 100644
--- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql
+++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
@@ -1073,11 +1073,12 @@ BEGIN
             raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
         END IF;
 
-		output_text := output_text || REGEXP_REPLACE(raw_text, E'\\s+', '', 'g');
+		output_text := output_text || idx.name || ':' ||
+					   REGEXP_REPLACE(raw_text, E'\\s+', '', 'g') || ' ';
 
 	END LOOP;
 
-    RETURN output_text;
+    RETURN BTRIM(output_text);
 
 END;
 $func$ LANGUAGE PLPGSQL;
diff --git a/Open-ILS/src/sql/Pg/t/lp1528901_more_precise_fingerprints.pg b/Open-ILS/src/sql/Pg/t/lp1528901_more_precise_fingerprints.pg
new file mode 100644
index 0000000..bb06244
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/t/lp1528901_more_precise_fingerprints.pg
@@ -0,0 +1,50 @@
+-- tests to verify biblio fingerprints avoid conflating
+-- works where words coming from titles and authors might overlap
+BEGIN;
+
+SELECT plan(1);
+
+INSERT INTO biblio.record_entry (last_xact_id, marc) VALUES (
+    'bib-fingerprint-test-1',
+    $record$<record xmlns="http://www.loc.gov/MARC21/slim">
+  <leader>02137cam a2200457 a 4500</leader>
+  <controlfield tag="001">ocn694080497</controlfield>
+  <controlfield tag="005">20160729104757.0</controlfield>
+  <controlfield tag="008">101217s2011    txu      b    001 0 eng  </controlfield>
+  <datafield tag="100" ind1="0" ind2="0">
+    <subfield code="a">Jasper, Frances</subfield>
+  </datafield>
+  <datafield tag="245" ind1="0" ind2="0">
+    <subfield code="a">Gzarniblat</subfield>
+  </datafield>
+</record>$record$);
+
+INSERT INTO biblio.record_entry (last_xact_id, marc) VALUES (
+    'bib-fingerprint-test-2',
+    $record$<record xmlns="http://www.loc.gov/MARC21/slim">
+  <leader>02137cam a2200457 a 4500</leader>
+  <controlfield tag="001">ocn694080497</controlfield>
+  <controlfield tag="005">20160729104757.0</controlfield>
+  <controlfield tag="008">101217s2011    txu      b    001 0 eng  </controlfield>
+  <datafield tag="245" ind1="0" ind2="0">
+    <subfield code="a">Gzarniblat Jasper</subfield>
+  </datafield>
+</record>$record$);
+
+SELECT results_ne(
+    $$
+        SELECT metarecord FROM metabib.metarecord_source_map
+        WHERE source = (
+            SELECT id FROM biblio.record_entry WHERE last_xact_id = 'bib-fingerprint-test-1'
+        )
+    $$,
+    $$ 
+        SELECT metarecord FROM metabib.metarecord_source_map
+        WHERE source = (
+            SELECT id FROM biblio.record_entry WHERE last_xact_id = 'bib-fingerprint-test-2'
+        )
+    $$,
+    'LP#1528901: same words in title and author do not stick different bibs in same metarecord'
+);
+
+ROLLBACK;
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql
new file mode 100644
index 0000000..2eb5ac8
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql
@@ -0,0 +1,86 @@
+BEGIN;
+
+--- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
+DECLARE
+	idx		config.biblio_fingerprint%ROWTYPE;
+	xfrm		config.xml_transform%ROWTYPE;
+	prev_xfrm	TEXT;
+	transformed_xml	TEXT;
+	xml_node	TEXT;
+	xml_node_list	TEXT[];
+	raw_text	TEXT;
+    output_text TEXT := '';
+BEGIN
+
+    IF marc IS NULL OR marc = '' THEN
+        RETURN NULL;
+    END IF;
+
+	-- Loop over the indexing entries
+	FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP
+
+		SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
+
+		-- See if we can skip the XSLT ... it's expensive
+		IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
+			-- Can't skip the transform
+			IF xfrm.xslt <> '---' THEN
+				transformed_xml := oils_xslt_process(marc,xfrm.xslt);
+			ELSE
+				transformed_xml := marc;
+			END IF;
+
+			prev_xfrm := xfrm.name;
+		END IF;
+
+		raw_text := COALESCE(
+            naco_normalize(
+                ARRAY_TO_STRING(
+                    oils_xpath(
+                        '//text()',
+                        (oils_xpath(
+                            idx.xpath,
+                            transformed_xml,
+                            ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] 
+                        ))[1]
+                    ),
+                    ''
+                )
+            ),
+            ''
+        );
+
+        raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E'');
+        raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain!
+
+        IF idx.first_word IS TRUE THEN
+            raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
+        END IF;
+
+		output_text := output_text || idx.name || ':' ||
+					   REGEXP_REPLACE(raw_text, E'\\s+', '', 'g') || ' ';
+
+	END LOOP;
+
+    RETURN BTRIM(output_text);
+
+END;
+$func$ LANGUAGE PLPGSQL;
+
+COMMIT;
+
+\qecho Recalculating bib fingerprints
+ALTER TABLE biblio.record_entry DISABLE TRIGGER USER;
+UPDATE biblio.record_entry SET fingerprint = biblio.extract_fingerprint(marc) WHERE NOT deleted;
+ALTER TABLE biblio.record_entry ENABLE TRIGGER USER;
+
+SELECT metabib.remap_metarecord_for_bib(id, fingerprint)
+FROM biblio.record_entry
+WHERE NOT deleted;
+
+\qecho Remapping metarecords
+SELECT metabib.remap_metarecord_for_bib(id, fingerprint)
+FROM biblio.record_entry
+WHERE NOT deleted;

commit 4ff655b82870af27f2b30052442d75ffce40db7c
Author: Mike Rylander <mrylander at gmail.com>
Date:   Thu Feb 16 10:01:33 2017 -0500

    Stamping upgrade script for including parts in bib fingerprints
    
    Signed-off-by: Mike Rylander <mrylander at gmail.com>

diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index 9669a87..bc6502e 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps
     BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
     FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
 
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1015', :eg_version); -- Bmagic/kmlussier
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1016', :eg_version); -- kmlussier/miker
 
 CREATE TABLE config.bib_source (
 	id		SERIAL	PRIMARY KEY,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.data.add_parts_for_biblio_fingerprint.sql b/Open-ILS/src/sql/Pg/upgrade/1016.data.add_parts_for_biblio_fingerprint.sql
similarity index 84%
rename from Open-ILS/src/sql/Pg/upgrade/XXXX.data.add_parts_for_biblio_fingerprint.sql
rename to Open-ILS/src/sql/Pg/upgrade/1016.data.add_parts_for_biblio_fingerprint.sql
index 37b2b31..a0728be 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.data.add_parts_for_biblio_fingerprint.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/1016.data.add_parts_for_biblio_fingerprint.sql
@@ -1,6 +1,6 @@
 BEGIN;
 
--- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+SELECT evergreen.upgrade_deps_block_check('1016', :eg_version);
 
 INSERT INTO config.biblio_fingerprint (name, xpath, format)
     VALUES (

commit abbcc13856d1fe71ab51272b59d534c22b467a2a
Author: Kathy Lussier <klussier at masslnc.org>
Date:   Wed Jun 22 13:59:34 2016 -0400

    LP#1553287: Add part information to biblio.fingerprint
    
    Evergreen metarecord searching will sometimes group together different works
    that are part of the same series because biblio.fingerprint doesn't incorporate
    subfield n or p from the title. For example, bib records for the Mockinjay
    movies list the Hunger Games in the 245a with Mockinjay in subfield p.
    Without the part information in the fingerprint, Evergreen will group these
    movies together with versions of the first Hunger Games book.
    
    This branch adds parts subfields to biblio.fingerprint to allow us to
    distinguish among different parts in a series.
    
    Signed-off-by: Kathy Lussier <klussier at masslnc.org>
    Signed-off-by: Mike Rylander <mrylander at gmail.com>

diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index bb14f6c..9669a87 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -170,6 +170,20 @@ INSERT INTO config.biblio_fingerprint (name, xpath, format, first_word)
         TRUE
     );
 
+INSERT INTO config.biblio_fingerprint (name, xpath, format)
+    VALUES (
+        'PartName',
+        '//mods32:mods/mods32:titleInfo/mods32:partName',
+        'mods32'
+    );
+
+INSERT INTO config.biblio_fingerprint (name, xpath, format)
+    VALUES (
+        'PartNumber',
+        '//mods32:mods/mods32:titleInfo/mods32:partNumber',
+        'mods32'
+    );
+
 CREATE TABLE config.metabib_class (
     name     TEXT    PRIMARY KEY,
     label    TEXT    NOT NULL UNIQUE,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.data.add_parts_for_biblio_fingerprint.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.data.add_parts_for_biblio_fingerprint.sql
new file mode 100644
index 0000000..37b2b31
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.data.add_parts_for_biblio_fingerprint.sql
@@ -0,0 +1,19 @@
+BEGIN;
+
+-- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+INSERT INTO config.biblio_fingerprint (name, xpath, format)
+    VALUES (
+        'PartName',
+        '//mods32:mods/mods32:titleInfo/mods32:partName',
+        'mods32'
+    );
+
+INSERT INTO config.biblio_fingerprint (name, xpath, format)
+    VALUES (
+        'PartNumber',
+        '//mods32:mods/mods32:titleInfo/mods32:partNumber',
+        'mods32'
+    );
+
+COMMIT;
diff --git a/docs/RELEASE_NOTES_NEXT/Administration/add-parts-to-biblio-fingerprint.adoc b/docs/RELEASE_NOTES_NEXT/Administration/add-parts-to-biblio-fingerprint.adoc
new file mode 100644
index 0000000..3d872ea
--- /dev/null
+++ b/docs/RELEASE_NOTES_NEXT/Administration/add-parts-to-biblio-fingerprint.adoc
@@ -0,0 +1,9 @@
+Bibliographic Fingerprint Improvement
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+The bibliographic fingerprint will now incorporate subfield n and p from MARC
+title fields to better distinguish between records of the same series that
+may share the same title but have a different part. With this change, these
+MARC records will no longer be grouped together in a 'Group Formats & Editions'
+search.
+
+

-----------------------------------------------------------------------

Summary of changes:
 Open-ILS/src/sql/Pg/002.schema.config.sql          |   16 ++++-
 Open-ILS/src/sql/Pg/030.schema.metabib.sql         |    5 +-
 .../Pg/t/lp1528901_more_precise_fingerprints.pg    |   50 +++++++++++
 .../1016.data.add_parts_for_biblio_fingerprint.sql |   19 +++++
 .../upgrade/1017.schema.update_fingerprinting.sql  |   86 ++++++++++++++++++++
 .../add-parts-to-biblio-fingerprint.adoc           |    9 ++
 6 files changed, 182 insertions(+), 3 deletions(-)
 create mode 100644 Open-ILS/src/sql/Pg/t/lp1528901_more_precise_fingerprints.pg
 create mode 100644 Open-ILS/src/sql/Pg/upgrade/1016.data.add_parts_for_biblio_fingerprint.sql
 create mode 100644 Open-ILS/src/sql/Pg/upgrade/1017.schema.update_fingerprinting.sql
 create mode 100644 docs/RELEASE_NOTES_NEXT/Administration/add-parts-to-biblio-fingerprint.adoc


hooks/post-receive
-- 
Evergreen ILS


More information about the open-ils-commits mailing list