[open-ils-commits] [GIT] Evergreen ILS branch master updated. c4d1595fba44d24825f51a5097b7ee7b07523780
Evergreen Git
git at git.evergreen-ils.org
Thu Feb 16 10:08:48 EST 2017
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Evergreen ILS".
The branch, master has been updated
via c4d1595fba44d24825f51a5097b7ee7b07523780 (commit)
via cacb6861baa23d622a36b8b0240b6b96f2b291d1 (commit)
via 4ff655b82870af27f2b30052442d75ffce40db7c (commit)
via abbcc13856d1fe71ab51272b59d534c22b467a2a (commit)
from 6e2cf57a7e039a1758d2ed219c935a555e7061d6 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit c4d1595fba44d24825f51a5097b7ee7b07523780
Author: Mike Rylander <mrylander at gmail.com>
Date: Thu Feb 16 10:05:55 2017 -0500
Stamping upgrade script for separating fingerprint components
Signed-off-by: Mike Rylander <mrylander at gmail.com>
diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index bc6502e..b2cb8cb 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps
BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1016', :eg_version); -- kmlussier/miker
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1017', :eg_version); -- gmcharlt/miker
CREATE TABLE config.bib_source (
id SERIAL PRIMARY KEY,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql b/Open-ILS/src/sql/Pg/upgrade/1017.schema.update_fingerprinting.sql
similarity index 97%
rename from Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql
rename to Open-ILS/src/sql/Pg/upgrade/1017.schema.update_fingerprinting.sql
index 2eb5ac8..ad4fc47 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/1017.schema.update_fingerprinting.sql
@@ -1,6 +1,6 @@
BEGIN;
---- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+SELECT evergreen.upgrade_deps_block_check('1017', :eg_version);
CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
DECLARE
commit cacb6861baa23d622a36b8b0240b6b96f2b291d1
Author: Galen Charlton <gmc at esilibrary.com>
Date: Fri Oct 28 13:00:45 2016 -0400
LP#1528901: avoid accidental bib fingerprint collisions
This patch fixes a problem where the bib fingerprint algorithm
could end up putting completely different works in the same
metarecord. For example,
100 $a Steel, Danielle
245 $a Blue
and
*no 1XX
245 $a Blue steel
previously (with stock config.biblio_fingerprint settings) got
a fingerprint of "bluesteel". With this patch, their fingerprints
are now:
"Title:blue Author:steel"
and
"Title:bluesteel Author:"
The upgrade script supplied with this patch remaps the metarecords
after updating the fingerprints. While existing metarecord holds
may get moved, note that there is no known way of ensuring that a
metarecord hold placed on a collided metarecord will end up attach
to whatever work the patron intended to request.
To test:
[1] Add records for "Blue" and "Blue steel".
[2] Note that they end up on the same metarecord.
[3] Apply the patch and perform the update.
[4] The two bibs should now be on separate metarecords.
Signed-off-by: Galen Charlton <gmc at esilibrary.com>
Signed-off-by: Rogan Hamby <rogan.hamby at gmail.com>
Signed-off-by: Kathy Lussier <klussier at masslnc.org>
Signed-off-by: Mike Rylander <mrylander at gmail.com>
diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
index 71bab56..f79d09e 100644
--- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql
+++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
@@ -1073,11 +1073,12 @@ BEGIN
raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
END IF;
- output_text := output_text || REGEXP_REPLACE(raw_text, E'\\s+', '', 'g');
+ output_text := output_text || idx.name || ':' ||
+ REGEXP_REPLACE(raw_text, E'\\s+', '', 'g') || ' ';
END LOOP;
- RETURN output_text;
+ RETURN BTRIM(output_text);
END;
$func$ LANGUAGE PLPGSQL;
diff --git a/Open-ILS/src/sql/Pg/t/lp1528901_more_precise_fingerprints.pg b/Open-ILS/src/sql/Pg/t/lp1528901_more_precise_fingerprints.pg
new file mode 100644
index 0000000..bb06244
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/t/lp1528901_more_precise_fingerprints.pg
@@ -0,0 +1,50 @@
+-- tests to verify biblio fingerprints avoid conflating
+-- works where words coming from titles and authors might overlap
+BEGIN;
+
+SELECT plan(1);
+
+INSERT INTO biblio.record_entry (last_xact_id, marc) VALUES (
+ 'bib-fingerprint-test-1',
+ $record$<record xmlns="http://www.loc.gov/MARC21/slim">
+ <leader>02137cam a2200457 a 4500</leader>
+ <controlfield tag="001">ocn694080497</controlfield>
+ <controlfield tag="005">20160729104757.0</controlfield>
+ <controlfield tag="008">101217s2011 txu b 001 0 eng </controlfield>
+ <datafield tag="100" ind1="0" ind2="0">
+ <subfield code="a">Jasper, Frances</subfield>
+ </datafield>
+ <datafield tag="245" ind1="0" ind2="0">
+ <subfield code="a">Gzarniblat</subfield>
+ </datafield>
+</record>$record$);
+
+INSERT INTO biblio.record_entry (last_xact_id, marc) VALUES (
+ 'bib-fingerprint-test-2',
+ $record$<record xmlns="http://www.loc.gov/MARC21/slim">
+ <leader>02137cam a2200457 a 4500</leader>
+ <controlfield tag="001">ocn694080497</controlfield>
+ <controlfield tag="005">20160729104757.0</controlfield>
+ <controlfield tag="008">101217s2011 txu b 001 0 eng </controlfield>
+ <datafield tag="245" ind1="0" ind2="0">
+ <subfield code="a">Gzarniblat Jasper</subfield>
+ </datafield>
+</record>$record$);
+
+SELECT results_ne(
+ $$
+ SELECT metarecord FROM metabib.metarecord_source_map
+ WHERE source = (
+ SELECT id FROM biblio.record_entry WHERE last_xact_id = 'bib-fingerprint-test-1'
+ )
+ $$,
+ $$
+ SELECT metarecord FROM metabib.metarecord_source_map
+ WHERE source = (
+ SELECT id FROM biblio.record_entry WHERE last_xact_id = 'bib-fingerprint-test-2'
+ )
+ $$,
+ 'LP#1528901: same words in title and author do not stick different bibs in same metarecord'
+);
+
+ROLLBACK;
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql
new file mode 100644
index 0000000..2eb5ac8
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql
@@ -0,0 +1,86 @@
+BEGIN;
+
+--- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
+DECLARE
+ idx config.biblio_fingerprint%ROWTYPE;
+ xfrm config.xml_transform%ROWTYPE;
+ prev_xfrm TEXT;
+ transformed_xml TEXT;
+ xml_node TEXT;
+ xml_node_list TEXT[];
+ raw_text TEXT;
+ output_text TEXT := '';
+BEGIN
+
+ IF marc IS NULL OR marc = '' THEN
+ RETURN NULL;
+ END IF;
+
+ -- Loop over the indexing entries
+ FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP
+
+ SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
+
+ -- See if we can skip the XSLT ... it's expensive
+ IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
+ -- Can't skip the transform
+ IF xfrm.xslt <> '---' THEN
+ transformed_xml := oils_xslt_process(marc,xfrm.xslt);
+ ELSE
+ transformed_xml := marc;
+ END IF;
+
+ prev_xfrm := xfrm.name;
+ END IF;
+
+ raw_text := COALESCE(
+ naco_normalize(
+ ARRAY_TO_STRING(
+ oils_xpath(
+ '//text()',
+ (oils_xpath(
+ idx.xpath,
+ transformed_xml,
+ ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
+ ))[1]
+ ),
+ ''
+ )
+ ),
+ ''
+ );
+
+ raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E'');
+ raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain!
+
+ IF idx.first_word IS TRUE THEN
+ raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
+ END IF;
+
+ output_text := output_text || idx.name || ':' ||
+ REGEXP_REPLACE(raw_text, E'\\s+', '', 'g') || ' ';
+
+ END LOOP;
+
+ RETURN BTRIM(output_text);
+
+END;
+$func$ LANGUAGE PLPGSQL;
+
+COMMIT;
+
+\qecho Recalculating bib fingerprints
+ALTER TABLE biblio.record_entry DISABLE TRIGGER USER;
+UPDATE biblio.record_entry SET fingerprint = biblio.extract_fingerprint(marc) WHERE NOT deleted;
+ALTER TABLE biblio.record_entry ENABLE TRIGGER USER;
+
+SELECT metabib.remap_metarecord_for_bib(id, fingerprint)
+FROM biblio.record_entry
+WHERE NOT deleted;
+
+\qecho Remapping metarecords
+SELECT metabib.remap_metarecord_for_bib(id, fingerprint)
+FROM biblio.record_entry
+WHERE NOT deleted;
commit 4ff655b82870af27f2b30052442d75ffce40db7c
Author: Mike Rylander <mrylander at gmail.com>
Date: Thu Feb 16 10:01:33 2017 -0500
Stamping upgrade script for including parts in bib fingerprints
Signed-off-by: Mike Rylander <mrylander at gmail.com>
diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index 9669a87..bc6502e 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps
BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1015', :eg_version); -- Bmagic/kmlussier
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1016', :eg_version); -- kmlussier/miker
CREATE TABLE config.bib_source (
id SERIAL PRIMARY KEY,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.data.add_parts_for_biblio_fingerprint.sql b/Open-ILS/src/sql/Pg/upgrade/1016.data.add_parts_for_biblio_fingerprint.sql
similarity index 84%
rename from Open-ILS/src/sql/Pg/upgrade/XXXX.data.add_parts_for_biblio_fingerprint.sql
rename to Open-ILS/src/sql/Pg/upgrade/1016.data.add_parts_for_biblio_fingerprint.sql
index 37b2b31..a0728be 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.data.add_parts_for_biblio_fingerprint.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/1016.data.add_parts_for_biblio_fingerprint.sql
@@ -1,6 +1,6 @@
BEGIN;
--- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+SELECT evergreen.upgrade_deps_block_check('1016', :eg_version);
INSERT INTO config.biblio_fingerprint (name, xpath, format)
VALUES (
commit abbcc13856d1fe71ab51272b59d534c22b467a2a
Author: Kathy Lussier <klussier at masslnc.org>
Date: Wed Jun 22 13:59:34 2016 -0400
LP#1553287: Add part information to biblio.fingerprint
Evergreen metarecord searching will sometimes group together different works
that are part of the same series because biblio.fingerprint doesn't incorporate
subfield n or p from the title. For example, bib records for the Mockinjay
movies list the Hunger Games in the 245a with Mockinjay in subfield p.
Without the part information in the fingerprint, Evergreen will group these
movies together with versions of the first Hunger Games book.
This branch adds parts subfields to biblio.fingerprint to allow us to
distinguish among different parts in a series.
Signed-off-by: Kathy Lussier <klussier at masslnc.org>
Signed-off-by: Mike Rylander <mrylander at gmail.com>
diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index bb14f6c..9669a87 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -170,6 +170,20 @@ INSERT INTO config.biblio_fingerprint (name, xpath, format, first_word)
TRUE
);
+INSERT INTO config.biblio_fingerprint (name, xpath, format)
+ VALUES (
+ 'PartName',
+ '//mods32:mods/mods32:titleInfo/mods32:partName',
+ 'mods32'
+ );
+
+INSERT INTO config.biblio_fingerprint (name, xpath, format)
+ VALUES (
+ 'PartNumber',
+ '//mods32:mods/mods32:titleInfo/mods32:partNumber',
+ 'mods32'
+ );
+
CREATE TABLE config.metabib_class (
name TEXT PRIMARY KEY,
label TEXT NOT NULL UNIQUE,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.data.add_parts_for_biblio_fingerprint.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.data.add_parts_for_biblio_fingerprint.sql
new file mode 100644
index 0000000..37b2b31
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.data.add_parts_for_biblio_fingerprint.sql
@@ -0,0 +1,19 @@
+BEGIN;
+
+-- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+INSERT INTO config.biblio_fingerprint (name, xpath, format)
+ VALUES (
+ 'PartName',
+ '//mods32:mods/mods32:titleInfo/mods32:partName',
+ 'mods32'
+ );
+
+INSERT INTO config.biblio_fingerprint (name, xpath, format)
+ VALUES (
+ 'PartNumber',
+ '//mods32:mods/mods32:titleInfo/mods32:partNumber',
+ 'mods32'
+ );
+
+COMMIT;
diff --git a/docs/RELEASE_NOTES_NEXT/Administration/add-parts-to-biblio-fingerprint.adoc b/docs/RELEASE_NOTES_NEXT/Administration/add-parts-to-biblio-fingerprint.adoc
new file mode 100644
index 0000000..3d872ea
--- /dev/null
+++ b/docs/RELEASE_NOTES_NEXT/Administration/add-parts-to-biblio-fingerprint.adoc
@@ -0,0 +1,9 @@
+Bibliographic Fingerprint Improvement
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+The bibliographic fingerprint will now incorporate subfield n and p from MARC
+title fields to better distinguish between records of the same series that
+may share the same title but have a different part. With this change, these
+MARC records will no longer be grouped together in a 'Group Formats & Editions'
+search.
+
+
-----------------------------------------------------------------------
Summary of changes:
Open-ILS/src/sql/Pg/002.schema.config.sql | 16 ++++-
Open-ILS/src/sql/Pg/030.schema.metabib.sql | 5 +-
.../Pg/t/lp1528901_more_precise_fingerprints.pg | 50 +++++++++++
.../1016.data.add_parts_for_biblio_fingerprint.sql | 19 +++++
.../upgrade/1017.schema.update_fingerprinting.sql | 86 ++++++++++++++++++++
.../add-parts-to-biblio-fingerprint.adoc | 9 ++
6 files changed, 182 insertions(+), 3 deletions(-)
create mode 100644 Open-ILS/src/sql/Pg/t/lp1528901_more_precise_fingerprints.pg
create mode 100644 Open-ILS/src/sql/Pg/upgrade/1016.data.add_parts_for_biblio_fingerprint.sql
create mode 100644 Open-ILS/src/sql/Pg/upgrade/1017.schema.update_fingerprinting.sql
create mode 100644 docs/RELEASE_NOTES_NEXT/Administration/add-parts-to-biblio-fingerprint.adoc
hooks/post-receive
--
Evergreen ILS
More information about the open-ils-commits
mailing list