[open-ils-commits] r16009 - in trunk/Open-ILS: examples src/perlmods/OpenILS/Application/Storage/Driver/Pg src/sql/Pg src/sql/Pg/upgrade (miker)

svn at svn.open-ils.org svn at svn.open-ils.org
Fri Mar 26 13:23:47 EDT 2010


Author: miker
Date: 2010-03-26 13:23:43 -0400 (Fri, 26 Mar 2010)
New Revision: 16009

Added:
   trunk/Open-ILS/src/sql/Pg/upgrade/0217.schema.facet_xpath.sql
Modified:
   trunk/Open-ILS/examples/fm_IDL.xml
   trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm
   trunk/Open-ILS/src/sql/Pg/002.schema.config.sql
   trunk/Open-ILS/src/sql/Pg/030.schema.metabib.sql
   trunk/Open-ILS/src/sql/Pg/950.data.seed-values.sql
Log:
add support for an XPath expression to pull out just what is needed for facet values

Modified: trunk/Open-ILS/examples/fm_IDL.xml
===================================================================
--- trunk/Open-ILS/examples/fm_IDL.xml	2010-03-26 16:03:02 UTC (rev 16008)
+++ trunk/Open-ILS/examples/fm_IDL.xml	2010-03-26 17:23:43 UTC (rev 16009)
@@ -1514,6 +1514,7 @@
 			<field reporter:label="Format" name="format" reporter:datatype="link"/>
 			<field reporter:label="Search Field" name="search_field" reporter:datatype="bool" />
 			<field reporter:label="Facet Field" name="facet_field" reporter:datatype="bool" />
+			<field reporter:label="Facet XPath" name="facet_xpath" reporter:datatype="text" />
 		</fields>
 		<links>
 			<link field="field_class" reltype="has_a" key="name" map="" class="cmc"/>

Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm	2010-03-26 16:03:02 UTC (rev 16008)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm	2010-03-26 17:23:43 UTC (rev 16009)
@@ -209,8 +209,6 @@
             __PACKAGE__->add_search_field_alias( $cmsa->field_class, $c->{field}, $cmsa->alias );
         }
     }
-
-    return $self->relevance_bumps;
 }
 
 sub initialize_relevance_bumps {

Modified: trunk/Open-ILS/src/sql/Pg/002.schema.config.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/002.schema.config.sql	2010-03-26 16:03:02 UTC (rev 16008)
+++ trunk/Open-ILS/src/sql/Pg/002.schema.config.sql	2010-03-26 17:23:43 UTC (rev 16009)
@@ -60,7 +60,7 @@
     install_date    TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
 );
 
-INSERT INTO config.upgrade_log (version) VALUES ('0216'); -- miker
+INSERT INTO config.upgrade_log (version) VALUES ('0217'); -- miker
 
 CREATE TABLE config.bib_source (
 	id		SERIAL	PRIMARY KEY,
@@ -204,7 +204,8 @@
 	weight		INT	NOT NULL DEFAULT 1,
 	format		TEXT	NOT NULL DEFAULT 'mods33',
 	search_field	BOOL	NOT NULL DEFAULT TRUE,
-	facet_field	BOOL	NOT NULL DEFAULT FALSE
+	facet_field	BOOL	NOT NULL DEFAULT FALSE,
+    facet_xpath TEXT
 );
 COMMENT ON TABLE config.metabib_field IS $$
 /*

Modified: trunk/Open-ILS/src/sql/Pg/030.schema.metabib.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/030.schema.metabib.sql	2010-03-26 16:03:02 UTC (rev 16008)
+++ trunk/Open-ILS/src/sql/Pg/030.schema.metabib.sql	2010-03-26 17:23:43 UTC (rev 16009)
@@ -240,6 +240,7 @@
     transformed_xml TEXT;
     xml_node    TEXT;
     xml_node_list   TEXT[];
+    facet_text  TEXT;
     raw_text    TEXT;
     curr_text   TEXT;
     joiner      TEXT := default_joiner; -- XXX will index defs supply a joiner?
@@ -300,10 +301,16 @@
             -- insert raw node text for faceting
             IF idx.facet_field THEN
 
+                IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
+                    facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+                ELSE
+                    facet_text := curr_text;
+                END IF;
+
                 output_row.field_class = idx.field_class;
-                output_row.field = -1 *idx.id;
+                output_row.field = -1 * idx.id;
                 output_row.source = rid;
-                output_row.value = BTRIM(REGEXP_REPLACE(curr_text, E'\\s+', ' ', 'g'));
+                output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
 
                 RETURN NEXT output_row;
             END IF;

Modified: trunk/Open-ILS/src/sql/Pg/950.data.seed-values.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/950.data.seed-values.sql	2010-03-26 16:03:02 UTC (rev 16008)
+++ trunk/Open-ILS/src/sql/Pg/950.data.seed-values.sql	2010-03-26 17:23:43 UTC (rev 16009)
@@ -17,9 +17,10 @@
 INSERT INTO config.metabib_class ( name, label ) VALUES ( 'subject', oils_i18n_gettext('subject', 'Subject', 'cmc', 'name') );
 INSERT INTO config.metabib_class ( name, label ) VALUES ( 'series', oils_i18n_gettext('series', 'Series', 'cmc', 'name') );
 
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, facet_field ) VALUES 
+    (1, 'series', 'seriestitle', oils_i18n_gettext(1, 'Series Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:relatedItem[@type="series"]/mods32:titleInfo$$, TRUE );
+
 INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES 
-    (1, 'series', 'seriestitle', oils_i18n_gettext(1, 'Series Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:relatedItem[@type="series"]/mods32:titleInfo$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES 
     (2, 'title', 'abbreviated', oils_i18n_gettext(2, 'Abbreviated Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='abbreviated')]$$ );
 INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES 
     (3, 'title', 'translated', oils_i18n_gettext(3, 'Translated Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='translated')]$$ );
@@ -29,22 +30,24 @@
     (5, 'title', 'uniform', oils_i18n_gettext(5, 'Uniform Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='uniform')]$$ );
 INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES 
     (6, 'title', 'proper', oils_i18n_gettext(6, 'Title Proper', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and not (@type)]$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES 
-    (7, 'author', 'corporate', oils_i18n_gettext(7, 'Corporate Author', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:name[@type='corporate']/mods32:namePart[../mods32:role/mods32:roleTerm[text()='creator']]$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES 
-    (8, 'author', 'personal', oils_i18n_gettext(8, 'Personal Author', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:name[@type='personal']/mods32:namePart[../mods32:role/mods32:roleTerm[text()='creator']]$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES 
-    (9, 'author', 'conference', oils_i18n_gettext(9, 'Conference Author', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:name[@type='conference']/mods32:namePart[../mods32:role/mods32:roleTerm[text()='creator']]$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES 
-    (10, 'author', 'other', oils_i18n_gettext(10, 'Other Author', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:name[@type='personal']/mods32:namePart[not(../mods32:role)]$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES 
-    (11, 'subject', 'geographic', oils_i18n_gettext(11, 'Geographic Subject', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject/mods32:geographic$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES 
-    (12, 'subject', 'name', oils_i18n_gettext(12, 'Name Subject', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject/mods32:name$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES 
-    (13, 'subject', 'temporal', oils_i18n_gettext(13, 'Temporal Subject', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject/mods32:temporal$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES 
-    (14, 'subject', 'topic', oils_i18n_gettext(14, 'Topic Subject', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject/mods32:topic$$ );
+
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, facet_xpath, facet_field ) VALUES 
+    (7, 'author', 'corporate', oils_i18n_gettext(7, 'Corporate Author', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:name[@type='corporate' and mods32:role/mods32:roleTerm[text()='creator']]$$, $$*[local-name()='namePart']$$, TRUE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, facet_xpath, facet_field ) VALUES 
+    (8, 'author', 'personal', oils_i18n_gettext(8, 'Personal Author', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:name[@type='personal' and mods32:role/mods32:roleTerm[text()='creator']]$$, $$*[local-name()='namePart']$$, TRUE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, facet_xpath, facet_field ) VALUES 
+    (9, 'author', 'conference', oils_i18n_gettext(9, 'Conference Author', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:name[@type='conference' and mods32:role/mods32:roleTerm[text()='creator']]$$, $$*[local-name()='namePart']$$, TRUE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, facet_xpath, facet_field ) VALUES 
+    (10, 'author', 'other', oils_i18n_gettext(10, 'Other Author', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:name[@type='personal' and not(mods32:role)]$$, $$*[local-name()='namePart']$$, TRUE );
+
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, facet_field ) VALUES 
+    (11, 'subject', 'geographic', oils_i18n_gettext(11, 'Geographic Subject', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject/mods32:geographic$$, TRUE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, facet_xpath, facet_field ) VALUES 
+    (12, 'subject', 'name', oils_i18n_gettext(12, 'Name Subject', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject/mods32:name$$, $$*[local-name()='namePart']$$, TRUE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, x, facet_fieldpath ) VALUES 
+    (13, 'subject', 'temporal', oils_i18n_gettext(13, 'Temporal Subject', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject/mods32:temporal$$, TRUE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, facet_field ) VALUES 
+    (14, 'subject', 'topic', oils_i18n_gettext(14, 'Topic Subject', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject/mods32:topic$$, TRUE );
 --INSERT INTO config.metabib_field ( id, field_class, name, format, xpath ) VALUES 
 --  ( id, field_class, name, xpath ) VALUES ( 'subject', 'genre', 'mods32', $$//mods32:mods/mods32:genre$$ );
 INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES 

Added: trunk/Open-ILS/src/sql/Pg/upgrade/0217.schema.facet_xpath.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/upgrade/0217.schema.facet_xpath.sql	                        (rev 0)
+++ trunk/Open-ILS/src/sql/Pg/upgrade/0217.schema.facet_xpath.sql	2010-03-26 17:23:43 UTC (rev 16009)
@@ -0,0 +1,123 @@
+
+BEGIN;
+
+INSERT INTO config.upgrade_log (version) VALUES ('0217'); --miker
+
+ALTER TABLE config.metabib_field ADD COLUMN facet_xpath TEXT;
+
+UPDATE config.metabib_field SET facet_field=TRUE WHERE id = 1;
+UPDATE config.metabib_field SET xpath=$$//mods32:mods/mods32:name[@type='corporate' and mods32:role/mods32:roleTerm[text()='creator']]$$, facet_field=TRUE, facet_xpath=$$*[local-name()='namePart']$$ WHERE id = 7;
+UPDATE config.metabib_field SET xpath=$$//mods32:mods/mods32:name[@type='personal' and mods32:role/mods32:roleTerm[text()='creator']]$$, facet_field=TRUE, facet_xpath=$$*[local-name()='namePart']$$ WHERE id = 8;
+UPDATE config.metabib_field SET xpath=$$//mods32:mods/mods32:name[@type='conference' and mods32:role/mods32:roleTerm[text()='creator']]$$, facet_field=TRUE, facet_xpath=$$*[local-name()='namePart']$$ WHERE id = 9;
+UPDATE config.metabib_field SET xpath=$$//mods32:mods/mods32:name[@type='personal' and not(mods32:role)]$$, facet_field=TRUE, facet_xpath=$$*[local-name()='namePart']$$ WHERE id = 10;
+
+UPDATE config.metabib_field SET facet_field=TRUE WHERE id = 11;
+UPDATE config.metabib_field SET facet_field=TRUE , facet_xpath=$$*[local-name()='namePart']$$ WHERE id = 12;
+UPDATE config.metabib_field SET facet_field=TRUE WHERE id = 13;
+UPDATE config.metabib_field SET facet_field=TRUE WHERE id = 14;
+
+CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
+DECLARE
+    bib     biblio.record_entry%ROWTYPE;
+    idx     config.metabib_field%ROWTYPE;
+    xfrm        config.xml_transform%ROWTYPE;
+    prev_xfrm   TEXT;
+    transformed_xml TEXT;
+    xml_node    TEXT;
+    xml_node_list   TEXT[];
+    facet_text  TEXT;
+    raw_text    TEXT;
+    curr_text   TEXT;
+    joiner      TEXT := default_joiner; -- XXX will index defs supply a joiner?
+    output_row  metabib.field_entry_template%ROWTYPE;
+BEGIN
+
+    -- Get the record
+    SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
+
+    -- Loop over the indexing entries
+    FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
+
+        SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
+
+        -- See if we can skip the XSLT ... it's expensive
+        IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
+            -- Can't skip the transform
+            IF xfrm.xslt <> '---' THEN
+                transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
+            ELSE
+                transformed_xml := bib.marc;
+            END IF;
+
+            prev_xfrm := xfrm.name;
+        END IF;
+
+        xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+
+        raw_text := NULL;
+        FOR xml_node IN SELECT x FROM explode_array(xml_node_list) AS x LOOP
+            CONTINUE WHEN xml_node !~ E'^\\s*<';
+
+            curr_text := ARRAY_TO_STRING(
+                oils_xpath( '//text()',
+                    REGEXP_REPLACE( -- This escapes all &s not followed by "amp;".  Data ise returned from oils_xpath (above) in UTF-8, not entity encoded
+                        REGEXP_REPLACE( -- This escapes embeded <s
+                            xml_node,
+                            $re$(>[^<]+)(<)([^>]+<)$re$,
+                            E'\\1&lt;\\3',
+                            'g'
+                        ),
+                        '&(?!amp;)',
+                        '&amp;',
+                        'g'
+                    )
+                ),
+                ' '
+            );
+
+            CONTINUE WHEN curr_text IS NULL OR curr_text = '';
+
+            IF raw_text IS NOT NULL THEN
+                raw_text := raw_text || joiner;
+            END IF;
+
+            raw_text := COALESCE(raw_text,'') || curr_text;
+
+            -- insert raw node text for faceting
+            IF idx.facet_field THEN
+
+                IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
+                    facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+                ELSE
+                    facet_text := curr_text;
+                END IF;
+
+                output_row.field_class = idx.field_class;
+                output_row.field = -1 * idx.id;
+                output_row.source = rid;
+                output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
+
+                RETURN NEXT output_row;
+            END IF;
+
+        END LOOP;
+
+        CONTINUE WHEN raw_text IS NULL OR raw_text = '';
+
+        -- insert combined node text for searching
+        IF idx.search_field THEN
+            output_row.field_class = idx.field_class;
+            output_row.field = idx.id;
+            output_row.source = rid;
+            output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
+
+            RETURN NEXT output_row;
+        END IF;
+
+    END LOOP;
+
+END;
+$func$ LANGUAGE PLPGSQL;
+
+COMMIT;
+



More information about the open-ils-commits mailing list