[open-ils-commits] [GIT] Evergreen ILS branch user/senator/extract-metabib-field-entry-fix created. bf469ab6242d396dd3ca8e40dec2732246cd1073
Evergreen Git
git at git.evergreen-ils.org
Thu Jun 30 14:32:47 EDT 2011
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Evergreen ILS".
The branch, user/senator/extract-metabib-field-entry-fix has been created
at bf469ab6242d396dd3ca8e40dec2732246cd1073 (commit)
- Log -----------------------------------------------------------------
commit bf469ab6242d396dd3ca8e40dec2732246cd1073
Author: Lebbeous Fogle-Weekley <lebbeous at esilibrary.com>
Date: Thu Jun 30 14:31:00 2011 -0400
Correct facet entry extraction from bibs upon ingest
Previously, any normalizers that should apply to facets (defined
by rows in config.metabib_field_index_norm_map) were forgotten.
Signed-off-by: Lebbeous Fogle-Weekley <lebbeous at esilibrary.com>
diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index 4c4b95a..6128dbc 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -86,7 +86,7 @@ CREATE TRIGGER no_overlapping_deps
BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0569', :eg_version); -- miker
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0xxx', :eg_version); -- senator
CREATE TABLE config.bib_source (
id SERIAL PRIMARY KEY,
diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
index 17a2383..24fa87d 100644
--- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql
+++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
@@ -276,6 +276,7 @@ DECLARE
facet_text TEXT;
raw_text TEXT;
curr_text TEXT;
+ normalizer RECORD;
joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
output_row metabib.field_entry_template%ROWTYPE;
BEGIN
@@ -331,7 +332,7 @@ BEGIN
raw_text := COALESCE(raw_text,'') || curr_text;
- -- insert raw node text for faceting
+ -- insert node text for faceting
IF idx.facet_field THEN
IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
@@ -340,6 +341,26 @@ BEGIN
facet_text := curr_text;
END IF;
+ -- apply any normalizers
+ FOR normalizer IN
+ SELECT n.func AS func,
+ n.param_count AS param_count,
+ m.params AS params
+ FROM config.index_normalizer n
+ JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
+ WHERE m.field = idx.id AND m.pos < 0
+ ORDER BY m.pos LOOP
+ EXECUTE 'SELECT ' || normalizer.func || '(' ||
+ quote_literal( facet_text ) ||
+ CASE
+ WHEN normalizer.param_count > 0
+ THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+ ELSE ''
+ END ||
+ ')' INTO facet_text;
+
+ END LOOP;
+
output_row.field_class = idx.field_class;
output_row.field = -1 * idx.id;
output_row.source = rid;
diff --git a/Open-ILS/src/sql/Pg/upgrade/0xxx.schema.extract-metabib-field-entry-fix.sql b/Open-ILS/src/sql/Pg/upgrade/0xxx.schema.extract-metabib-field-entry-fix.sql
new file mode 100644
index 0000000..cf8195f
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/0xxx.schema.extract-metabib-field-entry-fix.sql
@@ -0,0 +1,129 @@
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('0xxx', :eg_version);
+
+CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
+DECLARE
+ bib biblio.record_entry%ROWTYPE;
+ idx config.metabib_field%ROWTYPE;
+ xfrm config.xml_transform%ROWTYPE;
+ prev_xfrm TEXT;
+ transformed_xml TEXT;
+ xml_node TEXT;
+ xml_node_list TEXT[];
+ facet_text TEXT;
+ raw_text TEXT;
+ curr_text TEXT;
+ normalizer RECORD;
+ joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
+ output_row metabib.field_entry_template%ROWTYPE;
+BEGIN
+
+ -- Get the record
+ SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
+
+ -- Loop over the indexing entries
+ FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
+
+ SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
+
+ -- See if we can skip the XSLT ... it's expensive
+ IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
+ -- Can't skip the transform
+ IF xfrm.xslt <> '---' THEN
+ transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
+ ELSE
+ transformed_xml := bib.marc;
+ END IF;
+
+ prev_xfrm := xfrm.name;
+ END IF;
+
+ xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+
+ raw_text := NULL;
+ FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
+ CONTINUE WHEN xml_node !~ E'^\\s*<';
+
+ curr_text := ARRAY_TO_STRING(
+ oils_xpath( '//text()',
+ REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded
+ REGEXP_REPLACE( -- This escapes embeded <s
+ xml_node,
+ $re$(>[^<]+)(<)([^>]+<)$re$,
+ E'\\1<\\3',
+ 'g'
+ ),
+ '&(?!amp;)',
+ '&',
+ 'g'
+ )
+ ),
+ ' '
+ );
+
+ CONTINUE WHEN curr_text IS NULL OR curr_text = '';
+
+ IF raw_text IS NOT NULL THEN
+ raw_text := raw_text || joiner;
+ END IF;
+
+ raw_text := COALESCE(raw_text,'') || curr_text;
+
+ -- insert node text for faceting
+ IF idx.facet_field THEN
+
+ IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
+ facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+ ELSE
+ facet_text := curr_text;
+ END IF;
+
+ -- apply any normalizers
+ FOR normalizer IN
+ SELECT n.func AS func,
+ n.param_count AS param_count,
+ m.params AS params
+ FROM config.index_normalizer n
+ JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
+ WHERE m.field = idx.id AND m.pos < 0
+ ORDER BY m.pos LOOP
+ EXECUTE 'SELECT ' || normalizer.func || '(' ||
+ quote_literal( facet_text ) ||
+ CASE
+ WHEN normalizer.param_count > 0
+ THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+ ELSE ''
+ END ||
+ ')' INTO facet_text;
+
+ END LOOP;
+
+ output_row.field_class = idx.field_class;
+ output_row.field = -1 * idx.id;
+ output_row.source = rid;
+ output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
+
+ RETURN NEXT output_row;
+ END IF;
+
+ END LOOP;
+
+ CONTINUE WHEN raw_text IS NULL OR raw_text = '';
+
+ -- insert combined node text for searching
+ IF idx.search_field THEN
+ output_row.field_class = idx.field_class;
+ output_row.field = idx.id;
+ output_row.source = rid;
+ output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
+
+ RETURN NEXT output_row;
+ END IF;
+
+ END LOOP;
+
+END;
+$func$ LANGUAGE PLPGSQL;
+
+COMMIT;
-----------------------------------------------------------------------
hooks/post-receive
--
Evergreen ILS
More information about the open-ils-commits
mailing list