[open-ils-commits] ***SPAM*** [GIT] Evergreen ILS branch master updated. 7e488141a61b30f431a6cf551922aaa2863bc77d

Evergreen Git git at git.evergreen-ils.org
Tue Mar 18 21:40:16 EDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Evergreen ILS".

The branch, master has been updated
       via  7e488141a61b30f431a6cf551922aaa2863bc77d (commit)
       via  8e5dc73ba37421898f3d5d8bfa0e35b74715d8de (commit)
       via  624e0dc7066033051b5715afe7dbf1e3fbb88749 (commit)
       via  cdb49401755d803d51b863de06d1df08345342c4 (commit)
       via  a3a81ab4f00db4207f559cf309a59d2b30cac3ec (commit)
       via  d3987e2030981ea8882a6d688f81e4eb7be3397b (commit)
       via  6d079ea592f5982225689ec6abce9e602eff7b10 (commit)
      from  d59d8f95af806bf218f3c45fc558a582a91393da (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 7e488141a61b30f431a6cf551922aaa2863bc77d
Author: Ben Shum <bshum at biblio.org>
Date:   Tue Mar 18 21:39:10 2014 -0400

    LP#1243023: Stamping upgrade script for oils_expath-tweaks
    
    Signed-off-by: Ben Shum <bshum at biblio.org>

diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index 5efa89b..7c30be7 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps
     BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
     FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
 
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0873', :eg_version); -- miker/dbwells
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0874', :eg_version); -- dbwells/bshum
 
 CREATE TABLE config.bib_source (
 	id		SERIAL	PRIMARY KEY,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.oils_xpath-tweaks-for-newer-pg.sql b/Open-ILS/src/sql/Pg/upgrade/0874.function.oils_xpath-tweaks-for-newer-pg.sql
similarity index 99%
rename from Open-ILS/src/sql/Pg/upgrade/XXXX.function.oils_xpath-tweaks-for-newer-pg.sql
rename to Open-ILS/src/sql/Pg/upgrade/0874.function.oils_xpath-tweaks-for-newer-pg.sql
index d017e75..ee531e0 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.oils_xpath-tweaks-for-newer-pg.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/0874.function.oils_xpath-tweaks-for-newer-pg.sql
@@ -1,6 +1,6 @@
 BEGIN;
 
---SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+SELECT evergreen.upgrade_deps_block_check('0874', :eg_version);
 
 DROP FUNCTION IF EXISTS evergreen.oils_xpath( TEXT, TEXT, ANYARRAY);
 DROP FUNCTION IF EXISTS public.oils_xpath(TEXT, TEXT, ANYARRAY);

commit 8e5dc73ba37421898f3d5d8bfa0e35b74715d8de
Author: Dan Wells <dbw2 at calvin.edu>
Date:   Tue Mar 18 21:15:25 2014 -0400

    LP#1243023: Add optional quick reingest to upgrade script
    
    Signed-off-by: Dan Wells <dbw2 at calvin.edu>
    Signed-off-by: Ben Shum <bshum at biblio.org>

diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.oils_xpath-tweaks-for-newer-pg.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.oils_xpath-tweaks-for-newer-pg.sql
index 14cd1a0..d017e75 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.oils_xpath-tweaks-for-newer-pg.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.oils_xpath-tweaks-for-newer-pg.sql
@@ -601,3 +601,11 @@ END;
 $func$ LANGUAGE PLPGSQL;
 
 COMMIT;
+
+\qecho This script will now attempt a "quick fix" of browse_entry only.
+\qecho If you have issues, a browse or full reingest is recommended.
+\qecho You may cancel now without losing the effect of the rest of the
+\qecho upgrade script, and arrange the reingest later.
+
+UPDATE metabib.browse_entry SET value=evergreen.xml_famous5_to_text(value) WHERE value LIKE '%&%';
+UPDATE metabib.browse_entry SET sort_value=evergreen.xml_famous5_to_text(sort_value) WHERE sort_value LIKE '%&%';

commit 624e0dc7066033051b5715afe7dbf1e3fbb88749
Author: Dan Wells <dbw2 at calvin.edu>
Date:   Mon Mar 10 17:02:33 2014 -0400

    LP#1243023 Add upgrade script and pgTAP test
    
    For fixes to XPATH extraction encoding.
    
    Signed-off-by: Dan Wells <dbw2 at calvin.edu>
    Signed-off-by: Remington Steed <rjs7 at calvin.edu>
    Signed-off-by: Ben Shum <bshum at biblio.org>

diff --git a/Open-ILS/src/sql/Pg/t/regress/lp1243023_decoded_xpath_extracts.pg b/Open-ILS/src/sql/Pg/t/regress/lp1243023_decoded_xpath_extracts.pg
new file mode 100644
index 0000000..095c8b9
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/t/regress/lp1243023_decoded_xpath_extracts.pg
@@ -0,0 +1,10 @@
+BEGIN;
+
+SELECT plan(4);
+
+SELECT is((oils_xpath('//*', '<field attr="&quot;quoted&quot;">this &amp; that &lt; &gt;</field>'))[1], '<field attr="&quot;quoted&quot;">this &amp; that &lt; &gt;</field>', 'oils_xpath - encoded node extracts');
+SELECT is((oils_xpath('//text()', '<field attr="&quot;quoted&quot;">this &amp; that &lt; &gt;</field>'))[1], 'this & that < >', 'oils_xpath - decoded text() extracts');
+SELECT is((oils_xpath('//*/@attr', '<field attr="&quot;quoted&quot;">this &amp; that &lt; &gt;</field>'))[1], '"quoted"', 'oils_xpath - decoded &quot; in attribute extracts');
+SELECT is((oils_xpath('//*/@attr', E'<field attr=\'&apos;quoted&apos;\'>this &amp; that &lt; &gt;</field>'))[1], E'\'quoted\'', 'oils_xpath - decoded &apos; in attribute extracts');
+
+ROLLBACK;
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.oils_xpath-tweaks-for-newer-pg.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.oils_xpath-tweaks-for-newer-pg.sql
new file mode 100644
index 0000000..14cd1a0
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.oils_xpath-tweaks-for-newer-pg.sql
@@ -0,0 +1,603 @@
+BEGIN;
+
+--SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+DROP FUNCTION IF EXISTS evergreen.oils_xpath( TEXT, TEXT, ANYARRAY);
+DROP FUNCTION IF EXISTS public.oils_xpath(TEXT, TEXT, ANYARRAY);
+DROP FUNCTION IF EXISTS public.oils_xpath(TEXT, TEXT);
+DROP FUNCTION IF EXISTS public.oils_xslt_process(TEXT, TEXT);
+
+CREATE OR REPLACE FUNCTION evergreen.xml_famous5_to_text( TEXT ) RETURNS TEXT AS $f$
+ SELECT REPLACE(
+            REPLACE(
+                REPLACE(
+                    REPLACE(
+                        REPLACE( $1, '&lt;', '<'),
+                        '&gt;',
+                        '>'
+                    ),
+                    '&apos;',
+                    $$'$$
+                ), -- ' ... vim
+                '&quot;',
+                '"'
+            ),
+            '&amp;',
+            '&'
+        );
+$f$ LANGUAGE SQL IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION evergreen.oils_xpath ( TEXT, TEXT, TEXT[] ) RETURNS TEXT[] AS $f$
+    SELECT  ARRAY_AGG(
+                CASE WHEN strpos(x,'<') = 1 THEN -- It's an element node
+                    x
+                ELSE -- it's text-ish
+                    evergreen.xml_famous5_to_text(x)
+                END
+            )
+      FROM  UNNEST(XPATH( $1, $2::XML, $3 )::TEXT[]) x;
+$f$ LANGUAGE SQL IMMUTABLE;
+
+-- Trust me, it's just simpler to duplicate these...
+CREATE OR REPLACE FUNCTION evergreen.oils_xpath ( TEXT, TEXT ) RETURNS TEXT[] AS $f$
+    SELECT  ARRAY_AGG(
+                CASE WHEN strpos(x,'<') = 1 THEN -- It's an element node
+                    x
+                ELSE -- it's text-ish
+                    evergreen.xml_famous5_to_text(x)
+                END
+            )
+      FROM  UNNEST(XPATH( $1, $2::XML)::TEXT[]) x;
+$f$ LANGUAGE SQL IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION evergreen.oils_xslt_process(TEXT, TEXT) RETURNS TEXT AS $func$
+  use strict;
+
+  use XML::LibXSLT;
+  use XML::LibXML;
+
+  my $doc = shift;
+  my $xslt = shift;
+
+  # The following approach uses the older XML::LibXML 1.69 / XML::LibXSLT 1.68
+  # methods of parsing XML documents and stylesheets, in the hopes of broader
+  # compatibility with distributions
+  my $parser = $_SHARED{'_xslt_process'}{parsers}{xml} || XML::LibXML->new();
+
+  # Cache the XML parser, if we do not already have one
+  $_SHARED{'_xslt_process'}{parsers}{xml} = $parser
+    unless ($_SHARED{'_xslt_process'}{parsers}{xml});
+
+  my $xslt_parser = $_SHARED{'_xslt_process'}{parsers}{xslt} || XML::LibXSLT->new();
+
+  # Cache the XSLT processor, if we do not already have one
+  $_SHARED{'_xslt_process'}{parsers}{xslt} = $xslt_parser
+    unless ($_SHARED{'_xslt_process'}{parsers}{xslt});
+
+  my $stylesheet = $_SHARED{'_xslt_process'}{stylesheets}{$xslt} ||
+    $xslt_parser->parse_stylesheet( $parser->parse_string($xslt) );
+
+  $_SHARED{'_xslt_process'}{stylesheets}{$xslt} = $stylesheet
+    unless ($_SHARED{'_xslt_process'}{stylesheets}{$xslt});
+
+  return $stylesheet->output_string(
+    $stylesheet->transform(
+      $parser->parse_string($doc)
+    )
+  );
+
+$func$ LANGUAGE 'plperlu' STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION authority.simple_heading_set( marcxml TEXT ) RETURNS SETOF authority.simple_heading AS $func$
+DECLARE
+    res             authority.simple_heading%ROWTYPE;
+    acsaf           authority.control_set_authority_field%ROWTYPE;
+    tag_used        TEXT;
+    nfi_used        TEXT;
+    sf              TEXT;
+    cset            INT;
+    heading_text    TEXT;
+    joiner_text     TEXT;
+    sort_text       TEXT;
+    tmp_text        TEXT;
+    tmp_xml         TEXT;
+    first_sf        BOOL;
+    auth_id         INT DEFAULT COALESCE(NULLIF(oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', marcxml), ''), '0')::INT; 
+BEGIN
+
+    SELECT control_set INTO cset FROM authority.record_entry WHERE id = auth_id;
+
+    IF cset IS NULL THEN
+        SELECT  control_set INTO cset
+          FROM  authority.control_set_authority_field
+          WHERE tag IN ( SELECT  UNNEST(XPATH('//*[starts-with(@tag,"1")]/@tag',marcxml::XML)::TEXT[]))
+          LIMIT 1;
+    END IF;
+
+    res.record := auth_id;
+
+    FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP
+
+        res.atag := acsaf.id;
+        tag_used := acsaf.tag;
+        nfi_used := acsaf.nfi;
+        joiner_text := COALESCE(acsaf.joiner, ' ');
+
+        FOR tmp_xml IN SELECT UNNEST(XPATH('//*[@tag="'||tag_used||'"]', marcxml::XML)::TEXT[]) LOOP
+
+            heading_text := COALESCE(
+                oils_xpath_string('./*[contains("'||acsaf.display_sf_list||'", at code)]', tmp_xml, joiner_text),
+                ''
+            );
+
+            IF nfi_used IS NOT NULL THEN
+
+                sort_text := SUBSTRING(
+                    heading_text FROM
+                    COALESCE(
+                        NULLIF(
+                            REGEXP_REPLACE(
+                                oils_xpath_string('./@ind'||nfi_used, tmp_xml::TEXT),
+                                $$\D+$$,
+                                '',
+                                'g'
+                            ),
+                            ''
+                        )::INT,
+                        0
+                    ) + 1
+                );
+
+            ELSE
+                sort_text := heading_text;
+            END IF;
+
+            IF heading_text IS NOT NULL AND heading_text <> '' THEN
+                res.value := heading_text;
+                res.sort_value := public.naco_normalize(sort_text);
+                res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value);
+                RETURN NEXT res;
+            END IF;
+
+        END LOOP;
+
+    END LOOP;
+
+    RETURN;
+END;
+$func$ LANGUAGE PLPGSQL IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION url_verify.extract_urls ( session_id INT, item_id INT ) RETURNS INT AS $$
+DECLARE
+    last_seen_tag TEXT;
+    current_tag TEXT;
+    current_sf TEXT;
+    current_url TEXT;
+    current_ord INT;
+    current_url_pos INT;
+    current_selector url_verify.url_selector%ROWTYPE;
+BEGIN
+    current_ord := 1;
+
+    FOR current_selector IN SELECT * FROM url_verify.url_selector s WHERE s.session = session_id LOOP
+        current_url_pos := 1;
+        LOOP
+            SELECT  (oils_xpath(current_selector.xpath || '/text()', b.marc))[current_url_pos] INTO current_url
+              FROM  biblio.record_entry b
+                    JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
+              WHERE c.id = item_id;
+
+            EXIT WHEN current_url IS NULL;
+
+            SELECT  (oils_xpath(current_selector.xpath || '/../@tag', b.marc))[current_url_pos] INTO current_tag
+              FROM  biblio.record_entry b
+                    JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
+              WHERE c.id = item_id;
+
+            IF current_tag IS NULL THEN
+                current_tag := last_seen_tag;
+            ELSE
+                last_seen_tag := current_tag;
+            END IF;
+
+            SELECT  (oils_xpath(current_selector.xpath || '/@code', b.marc))[current_url_pos] INTO current_sf
+              FROM  biblio.record_entry b
+                    JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
+              WHERE c.id = item_id;
+
+            INSERT INTO url_verify.url (session, item, url_selector, tag, subfield, ord, full_url)
+              VALUES ( session_id, item_id, current_selector.id, current_tag, current_sf, current_ord, current_url);
+
+            current_url_pos := current_url_pos + 1;
+            current_ord := current_ord + 1;
+        END LOOP;
+    END LOOP;
+
+    RETURN current_ord - 1;
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
+DECLARE
+    bib     biblio.record_entry%ROWTYPE;
+    idx     config.metabib_field%ROWTYPE;
+    xfrm        config.xml_transform%ROWTYPE;
+    prev_xfrm   TEXT;
+    transformed_xml TEXT;
+    xml_node    TEXT;
+    xml_node_list   TEXT[];
+    facet_text  TEXT;
+    browse_text TEXT;
+    sort_value  TEXT;
+    raw_text    TEXT;
+    curr_text   TEXT;
+    joiner      TEXT := default_joiner; -- XXX will index defs supply a joiner?
+    authority_text TEXT;
+    authority_link BIGINT;
+    output_row  metabib.field_entry_template%ROWTYPE;
+BEGIN
+
+    -- Start out with no field-use bools set
+    output_row.browse_field = FALSE;
+    output_row.facet_field = FALSE;
+    output_row.search_field = FALSE;
+
+    -- Get the record
+    SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
+
+    -- Loop over the indexing entries
+    FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
+
+        joiner := COALESCE(idx.joiner, default_joiner);
+
+        SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
+
+        -- See if we can skip the XSLT ... it's expensive
+        IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
+            -- Can't skip the transform
+            IF xfrm.xslt <> '---' THEN
+                transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
+            ELSE
+                transformed_xml := bib.marc;
+            END IF;
+
+            prev_xfrm := xfrm.name;
+        END IF;
+
+        xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+
+        raw_text := NULL;
+        FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
+            CONTINUE WHEN xml_node !~ E'^\\s*<';
+
+            -- XXX much of this should be moved into oils_xpath_string...
+            curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
+                oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
+                    REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
+                ), ' '), ''),  -- throw away morally empty (bankrupt?) strings
+                joiner
+            );
+
+            CONTINUE WHEN curr_text IS NULL OR curr_text = '';
+
+            IF raw_text IS NOT NULL THEN
+                raw_text := raw_text || joiner;
+            END IF;
+
+            raw_text := COALESCE(raw_text,'') || curr_text;
+
+            -- autosuggest/metabib.browse_entry
+            IF idx.browse_field THEN
+
+                IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
+                    browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+                ELSE
+                    browse_text := curr_text;
+                END IF;
+
+                IF idx.browse_sort_xpath IS NOT NULL AND
+                    idx.browse_sort_xpath <> '' THEN
+
+                    sort_value := oils_xpath_string(
+                        idx.browse_sort_xpath, xml_node, joiner,
+                        ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
+                    );
+                ELSE
+                    sort_value := browse_text;
+                END IF;
+
+                output_row.field_class = idx.field_class;
+                output_row.field = idx.id;
+                output_row.source = rid;
+                output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
+                output_row.sort_value :=
+                    public.naco_normalize(sort_value);
+
+                output_row.authority := NULL;
+
+                IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
+                    authority_text := oils_xpath_string(
+                        idx.authority_xpath, xml_node, joiner,
+                        ARRAY[
+                            ARRAY[xfrm.prefix, xfrm.namespace_uri],
+                            ARRAY['xlink','http://www.w3.org/1999/xlink']
+                        ]
+                    );
+
+                    IF authority_text ~ '^\d+$' THEN
+                        authority_link := authority_text::BIGINT;
+                        PERFORM * FROM authority.record_entry WHERE id = authority_link;
+                        IF FOUND THEN
+                            output_row.authority := authority_link;
+                        END IF;
+                    END IF;
+
+                END IF;
+
+                output_row.browse_field = TRUE;
+                -- Returning browse rows with search_field = true for search+browse
+                -- configs allows us to retain granularity of being able to search
+                -- browse fields with "starts with" type operators (for example, for
+                -- titles of songs in music albums)
+                IF idx.search_field THEN
+                    output_row.search_field = TRUE;
+                END IF;
+                RETURN NEXT output_row;
+                output_row.browse_field = FALSE;
+                output_row.search_field = FALSE;
+                output_row.sort_value := NULL;
+            END IF;
+
+            -- insert raw node text for faceting
+            IF idx.facet_field THEN
+
+                IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
+                    facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+                ELSE
+                    facet_text := curr_text;
+                END IF;
+
+                output_row.field_class = idx.field_class;
+                output_row.field = -1 * idx.id;
+                output_row.source = rid;
+                output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
+
+                output_row.facet_field = TRUE;
+                RETURN NEXT output_row;
+                output_row.facet_field = FALSE;
+            END IF;
+
+        END LOOP;
+
+        CONTINUE WHEN raw_text IS NULL OR raw_text = '';
+
+        -- insert combined node text for searching
+        IF idx.search_field THEN
+            output_row.field_class = idx.field_class;
+            output_row.field = idx.id;
+            output_row.source = rid;
+            output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
+
+            output_row.search_field = TRUE;
+            RETURN NEXT output_row;
+            output_row.search_field = FALSE;
+        END IF;
+
+    END LOOP;
+
+END;
+
+$func$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$
+DECLARE
+    transformed_xml TEXT;
+    rmarc           TEXT := prmarc;
+    tmp_val         TEXT;
+    prev_xfrm       TEXT;
+    normalizer      RECORD;
+    xfrm            config.xml_transform%ROWTYPE;
+    attr_vector     INT[] := '{}'::INT[];
+    attr_vector_tmp INT[];
+    attr_list       TEXT[] := pattr_list;
+    attr_value      TEXT[];
+    norm_attr_value TEXT[];
+    tmp_xml         TEXT;
+    attr_def        config.record_attr_definition%ROWTYPE;
+    ccvm_row        config.coded_value_map%ROWTYPE;
+BEGIN
+
+    IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
+        SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition;
+    END IF;
+
+    IF rmarc IS NULL THEN
+        SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid;
+    END IF;
+
+    FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP
+
+        attr_value := '{}'::TEXT[];
+        norm_attr_value := '{}'::TEXT[];
+        attr_vector_tmp := '{}'::INT[];
+
+        SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1; 
+
+        -- tag+sf attrs only support SVF
+        IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
+            SELECT  ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value
+              FROM  (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x
+              WHERE record = rid
+                    AND tag LIKE attr_def.tag
+                    AND CASE
+                        WHEN attr_def.sf_list IS NOT NULL 
+                            THEN POSITION(subfield IN attr_def.sf_list) > 0
+                        ELSE TRUE
+                    END
+              GROUP BY tag
+              ORDER BY tag
+              LIMIT 1;
+
+        ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
+            attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field);
+
+            IF NOT attr_def.multi THEN
+                attr_value := ARRAY[attr_value[1]];
+            END IF;
+
+        ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
+
+            SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
+        
+            -- See if we can skip the XSLT ... it's expensive
+            IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
+                -- Can't skip the transform
+                IF xfrm.xslt <> '---' THEN
+                    transformed_xml := oils_xslt_process(rmarc,xfrm.xslt);
+                ELSE
+                    transformed_xml := rmarc;
+                END IF;
+    
+                prev_xfrm := xfrm.name;
+            END IF;
+
+            IF xfrm.name IS NULL THEN
+                -- just grab the marcxml (empty) transform
+                SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
+                prev_xfrm := xfrm.name;
+            END IF;
+
+            FOR tmp_xml IN SELECT oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]) LOOP
+                tmp_val := oils_xpath_string(
+                                '//*',
+                                tmp_xml,
+                                COALESCE(attr_def.joiner,' '),
+                                ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
+                            );
+                IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
+                    attr_value := attr_value || tmp_val;
+                    EXIT WHEN NOT attr_def.multi;
+                END IF;
+            END LOOP;
+
+        ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
+            SELECT  ARRAY_AGG(m.value) INTO attr_value
+              FROM  vandelay.marc21_physical_characteristics(rmarc) v
+                    LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
+              WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
+                    AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
+
+            IF NOT attr_def.multi THEN
+                attr_value := ARRAY[attr_value[1]];
+            END IF;
+
+        END IF;
+
+                -- apply index normalizers to attr_value
+        FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
+            FOR normalizer IN
+                SELECT  n.func AS func,
+                        n.param_count AS param_count,
+                        m.params AS params
+                  FROM  config.index_normalizer n
+                        JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
+                  WHERE attr = attr_def.name
+                  ORDER BY m.pos LOOP
+                    EXECUTE 'SELECT ' || normalizer.func || '(' ||
+                    COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
+                        CASE
+                            WHEN normalizer.param_count > 0
+                                THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+                                ELSE ''
+                            END ||
+                    ')' INTO tmp_val;
+
+            END LOOP;
+            IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
+                norm_attr_value := norm_attr_value || tmp_val;
+            END IF;
+        END LOOP;
+        
+        IF attr_def.filter THEN
+            -- Create unknown uncontrolled values and find the IDs of the values
+            IF ccvm_row.id IS NULL THEN
+                FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
+                    IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
+                        BEGIN -- use subtransaction to isolate unique constraint violations
+                            INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
+                        EXCEPTION WHEN unique_violation THEN END;
+                    END IF;
+                END LOOP;
+
+                SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
+            ELSE
+                SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
+            END IF;
+
+            -- Add the new value to the vector
+            attr_vector := attr_vector || attr_vector_tmp;
+        END IF;
+
+        IF attr_def.sorter AND norm_attr_value[1] IS NOT NULL THEN
+            DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
+            INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]);
+        END IF;
+
+    END LOOP;
+
+/* We may need to rewrite the vlist to contain
+   the intersection of new values for requested
+   attrs and old values for ignored attrs. To
+   do this, we take the old attr vlist and
+   subtract any values that are valid for the
+   requested attrs, and then add back the new
+   set of attr values. */
+
+    IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN 
+        SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid;
+        SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list);
+        attr_vector := attr_vector || attr_vector_tmp;
+    END IF;
+
+    -- On to composite attributes, now that the record attrs have been pulled.  Processed in name order, so later composite
+    -- attributes can depend on earlier ones.
+    PERFORM metabib.compile_composite_attr_cache_init();
+    FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP
+
+        FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP
+
+            tmp_val := metabib.compile_composite_attr( ccvm_row.id );
+            CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do
+
+            IF attr_def.filter THEN
+                IF attr_vector @@ tmp_val::query_int THEN
+                    attr_vector = attr_vector + intset(ccvm_row.id);
+                    EXIT WHEN NOT attr_def.multi;
+                END IF;
+            END IF;
+
+            IF attr_def.sorter THEN
+                IF attr_vector @@ tmp_val THEN
+                    DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name;
+                    INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code);
+                END IF;
+            END IF;
+
+        END LOOP;
+
+    END LOOP;
+
+    IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
+        IF rdeleted THEN -- initial insert OR revivication
+            DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
+            INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
+        ELSE
+            UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
+        END IF;
+    END IF;
+
+END;
+
+$func$ LANGUAGE PLPGSQL;
+
+COMMIT;

commit cdb49401755d803d51b863de06d1df08345342c4
Author: Mike Rylander <mrylander at gmail.com>
Date:   Mon Mar 3 14:34:30 2014 -0500

    LP#1243023: Clean up string handling variable types
    
    Similar to the previous commit, this was really TEXT, no need to flip
    back and forth between that and XML.
    
    Signed-off-by: Mike Rylander <mrylander at gmail.com>
    Signed-off-by: Dan Wells <dbw2 at calvin.edu>
    Signed-off-by: Ben Shum <bshum at biblio.org>

diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
index b8ad51a..fccde87 100644
--- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql
+++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
@@ -606,24 +606,9 @@ BEGIN
 
             -- XXX much of this should be moved into oils_xpath_string...
             curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
-                oils_xpath( '//text()',
-                    REGEXP_REPLACE(
-                        REGEXP_REPLACE( -- This escapes all &s not followed by "amp;".  Data ise returned from oils_xpath (above) in UTF-8, not entity encoded
-                            REGEXP_REPLACE( -- This escapes embeded <s
-                                xml_node,
-                                $re$(>[^<]+)(<)([^>]+<)$re$,
-                                E'\\1&lt;\\3',
-                                'g'
-                            ),
-                            '&(?!amp;)',
-                            '&amp;',
-                            'g'
-                        ),
-                        E'\\s+',
-                        ' ',
-                        'g'
-                    )
-                ), ' '), ''),
+                oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
+                    REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
+                ), ' '), ''),  -- throw away morally empty (bankrupt?) strings
                 joiner
             );
 
@@ -1475,7 +1460,7 @@ DECLARE
     attr_list       TEXT[] := pattr_list;
     attr_value      TEXT[];
     norm_attr_value TEXT[];
-    tmp_xml         XML;
+    tmp_xml         TEXT;
     attr_def        config.record_attr_definition%ROWTYPE;
     ccvm_row        config.coded_value_map%ROWTYPE;
 BEGIN
@@ -1540,10 +1525,10 @@ BEGIN
                 prev_xfrm := xfrm.name;
             END IF;
 
-            FOR tmp_xml IN SELECT XPATH(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]) LOOP
+            FOR tmp_xml IN SELECT oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]) LOOP
                 tmp_val := oils_xpath_string(
                                 '//*',
-                                tmp_xml::TEXT,
+                                tmp_xml,
                                 COALESCE(attr_def.joiner,' '),
                                 ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
                             );

commit a3a81ab4f00db4207f559cf309a59d2b30cac3ec
Author: Mike Rylander <mrylander at gmail.com>
Date:   Mon Mar 3 14:28:13 2014 -0500

    LP#1243023: Make sure URLs are not broken
    
    We need to make sure that the URLs we extract contain decoded Famous Five
    characters, not entities.
    
    Signed-off-by: Mike Rylander <mrylander at gmail.com>
    Signed-off-by: Dan Wells <dbw2 at calvin.edu>
    Signed-off-by: Ben Shum <bshum at biblio.org>

diff --git a/Open-ILS/src/sql/Pg/076.functions.url_verify.sql b/Open-ILS/src/sql/Pg/076.functions.url_verify.sql
index dda7fbc..68fc51a 100644
--- a/Open-ILS/src/sql/Pg/076.functions.url_verify.sql
+++ b/Open-ILS/src/sql/Pg/076.functions.url_verify.sql
@@ -75,14 +75,14 @@ BEGIN
     FOR current_selector IN SELECT * FROM url_verify.url_selector s WHERE s.session = session_id LOOP
         current_url_pos := 1;
         LOOP
-            SELECT  (XPATH(current_selector.xpath || '/text()', b.marc::XML))[current_url_pos]::TEXT INTO current_url
+            SELECT  (oils_xpath(current_selector.xpath || '/text()', b.marc))[current_url_pos] INTO current_url
               FROM  biblio.record_entry b
                     JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
               WHERE c.id = item_id;
 
             EXIT WHEN current_url IS NULL;
 
-            SELECT  (XPATH(current_selector.xpath || '/../@tag', b.marc::XML))[current_url_pos]::TEXT INTO current_tag
+            SELECT  (oils_xpath(current_selector.xpath || '/../@tag', b.marc))[current_url_pos] INTO current_tag
               FROM  biblio.record_entry b
                     JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
               WHERE c.id = item_id;
@@ -93,7 +93,7 @@ BEGIN
                 last_seen_tag := current_tag;
             END IF;
 
-            SELECT  (XPATH(current_selector.xpath || '/@code', b.marc::XML))[current_url_pos]::TEXT INTO current_sf
+            SELECT  (oils_xpath(current_selector.xpath || '/@code', b.marc))[current_url_pos] INTO current_sf
               FROM  biblio.record_entry b
                     JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
               WHERE c.id = item_id;

commit d3987e2030981ea8882a6d688f81e4eb7be3397b
Author: Mike Rylander <mrylander at gmail.com>
Date:   Mon Mar 3 14:26:46 2014 -0500

    LP#1243023: Clean up string handling variable types
    
    It was already TEXT, no need to flip back and forth between that
    and XML.
    
    Signed-off-by: Mike Rylander <mrylander at gmail.com>
    Signed-off-by: Dan Wells <dbw2 at calvin.edu>
    Signed-off-by: Ben Shum <bshum at biblio.org>

diff --git a/Open-ILS/src/sql/Pg/011.schema.authority.sql b/Open-ILS/src/sql/Pg/011.schema.authority.sql
index 7033773..d3527f2 100644
--- a/Open-ILS/src/sql/Pg/011.schema.authority.sql
+++ b/Open-ILS/src/sql/Pg/011.schema.authority.sql
@@ -350,10 +350,10 @@ BEGIN
         nfi_used := acsaf.nfi;
         joiner_text := COALESCE(acsaf.joiner, ' ');
 
-        FOR tmp_xml IN SELECT UNNEST(XPATH('//*[@tag="'||tag_used||'"]', marcxml::XML)) LOOP
+        FOR tmp_xml IN SELECT UNNEST(XPATH('//*[@tag="'||tag_used||'"]', marcxml::XML)::TEXT[]) LOOP
 
             heading_text := COALESCE(
-                oils_xpath_string('./*[contains("'||acsaf.display_sf_list||'", at code)]', tmp_xml::TEXT, joiner_text),
+                oils_xpath_string('./*[contains("'||acsaf.display_sf_list||'", at code)]', tmp_xml, joiner_text),
                 ''
             );
 

commit 6d079ea592f5982225689ec6abce9e602eff7b10
Author: Mike Rylander <mrylander at gmail.com>
Date:   Mon Mar 3 14:07:42 2014 -0500

    LP#1243023: Teach oils_xpath() to decode specific enties in text nodes
    
    Because of a behavioral change in Postgres' XML code (specifically, when
    casting XML as TEXT, the Famous Five XML entities are not decoded), we
    are seeing doubled encodings in XML and HTML output, as well as in indexed
    data.  To combat this, we will now check the first character of each
    array element returned by XPATH() and, if it is not '<', we will decode
    the entities ourselves.
    
    Also included in this commit is some cleanup of the surrounding file
    content, which was just confusing and useless in the modern era.
    
    The contents of his commit, followed by:
    
     =# drop function evergreen.oils_xpath(text,text,anyarray)
    
    are enough to address the OP's complaint about browse data.
    
    Signed-off-by: Mike Rylander <mrylander at gmail.com>
    Signed-off-by: Dan Wells <dbw2 at calvin.edu>
    Signed-off-by: Ben Shum <bshum at biblio.org>

diff --git a/Open-ILS/src/sql/Pg/002.functions.config.sql b/Open-ILS/src/sql/Pg/002.functions.config.sql
index 13f7b35..3dab7d9 100644
--- a/Open-ILS/src/sql/Pg/002.functions.config.sql
+++ b/Open-ILS/src/sql/Pg/002.functions.config.sql
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2004-2008  Georgia Public Library Service
- * Copyright (C) 2008  Equinox Software, Inc.
+ * Copyright (C) 2008-2014  Equinox Software, Inc.
  * Mike Rylander <miker at esilibrary.com>
  *
  * This program is free software; you can redistribute it and/or
@@ -18,172 +18,50 @@
 
 BEGIN;
 
-/*
-CREATE OR REPLACE FUNCTION oils_xml_transform ( TEXT, TEXT ) RETURNS TEXT AS $_$
-	SELECT	CASE	WHEN (SELECT COUNT(*) FROM config.xml_transform WHERE name = $2 AND xslt = '---') > 0 THEN $1
-			ELSE xslt_process($1, (SELECT xslt FROM config.xml_transform WHERE name = $2))
-		END;
-$_$ LANGUAGE SQL STRICT IMMUTABLE;
-
-CREATE OR REPLACE FUNCTION public.extract_marc_field ( TEXT, BIGINT, TEXT, TEXT ) RETURNS TEXT AS $$
-    SELECT regexp_replace(string_agg(output,' '),$4,'','g') FROM oils_xpath_table('id', 'marc', $1, $3, 'id='||$2)x(id INT, output TEXT);
-$$ LANGUAGE SQL;
-
-CREATE OR REPLACE FUNCTION oils_xml_uncache (xml TEXT) RETURNS BOOL AS $func$
-  delete $_SHARED{'_xslt_process'}{docs}{shift()};
-  return 1;
-$func$ LANGUAGE PLPERLU;
-
-CREATE OR REPLACE FUNCTION oils_xml_cache (xml TEXT) RETURNS BOOL AS $func$
-  use strict;
-  use XML::LibXML;
-
-  my $doc = shift;
-
-  # The following approach uses the older XML::LibXML 1.69 / XML::LibXSLT 1.68
-  # methods of parsing XML documents and stylesheets, in the hopes of broader
-  # compatibility with distributions
-  my $parser = $_SHARED{'_xslt_process'}{parsers}{xml} || XML::LibXML->new();
-
-  # Cache the XML parser, if we do not already have one
-  $_SHARED{'_xslt_process'}{parsers}{xml} = $parser
-    unless ($_SHARED{'_xslt_process'}{parsers}{xml});
-
-  # Parse and cache the doc
-  eval { $_SHARED{'_xslt_process'}{docs}{$doc} = $parser->parse_string($doc) };
-
-  return 0 if ($@);
-  return 1;
-$func$ LANGUAGE PLPERLU;
-
--- if we use these, we need to ...
-drop function oils_xpath(text, text, anyarray);
-
-CREATE OR REPLACE FUNCTION oils_xpath (xpath TEXT, xml TEXT, ns TEXT[][]) RETURNS TEXT[] AS $func$
-  use strict;
-  use XML::LibXML;
-
-  my $xpath = shift;
-  my $doc = shift;
-  my $ns_string = shift || '';
-  #elog(NOTICE,"ns_string: $ns_string");
-
-  my %ns_list = $ns_string =~ m/\{([^{,]+),([^}]+)\}/g;
-  #elog(NOTICE,"NS Prefix $_: $ns_list{$_}") for (keys %ns_list);
-
-  # The following approach uses the older XML::LibXML 1.69 / XML::LibXSLT 1.68
-  # methods of parsing XML documents and stylesheets, in the hopes of broader
-  # compatibility with distributions
-  my $parser = eval { $_SHARED{'_xslt_process'}{parsers}{xml} || XML::LibXML->new() };
-
-  return undef if ($@);
-
-  # Cache the XML parser, if we do not already have one
-  $_SHARED{'_xslt_process'}{parsers}{xml} = $parser
-    unless ($_SHARED{'_xslt_process'}{parsers}{xml});
-
-  # Look for a cached version of the doc, or parse it if none
-  my $dom = eval { $_SHARED{'_xslt_process'}{docs}{$doc} || $parser->parse_string($doc) };
-
-  return undef if ($@);
-
-  # Cache the parsed XML doc, if already there
-  $_SHARED{'_xslt_process'}{docs}{$doc} = $dom
-    unless ($_SHARED{'_xslt_process'}{docs}{$doc});
-
-  # Register the requested namespaces
-  $dom->documentElement->setNamespace( $ns_list{$_} => $_ ) for ( keys %ns_list );
-
-  # Gather and return nodes
-  my @nodes = $dom->findnodes($xpath);
-  #elog(NOTICE,"nodes found by $xpath: ". scalar(@nodes));
-
-  return [ map { $_->toString } @nodes ];
-$func$ LANGUAGE PLPERLU;
-
-CREATE OR REPLACE FUNCTION oils_xpath ( TEXT, TEXT ) RETURNS TEXT[] AS $$SELECT oils_xpath( $1, $2, '{}'::TEXT[] );$$ LANGUAGE SQL IMMUTABLE;
-
-*/
-
-CREATE FUNCTION version_specific_xpath () RETURNS TEXT AS $wrapper_function$
-DECLARE
-    out_text TEXT;
-BEGIN
-    
-    IF REGEXP_REPLACE(VERSION(),E'^.+?(\\d+\\.\\d+).*?$',E'\\1')::FLOAT < 8.3 THEN
-        out_text := 'Creating XPath functions that work like the native XPATH function in 8.3+';
-        
-        EXECUTE $create_82_funcs$
-                        
-CREATE OR REPLACE FUNCTION oils_xpath ( xpath TEXT, xml TEXT, ns ANYARRAY ) RETURNS TEXT[] AS $func$
-DECLARE
-    node_text   TEXT;
-    ns_regexp   TEXT;
-    munged_xpath    TEXT;
-BEGIN
-
-    munged_xpath := xpath;
-
-    IF ns IS NOT NULL AND array_upper(ns, 1) IS NOT NULL THEN
-        FOR namespace IN 1 .. array_upper(ns, 1) LOOP
-            munged_xpath := REGEXP_REPLACE(
-                munged_xpath,
-                E'(' || ns[namespace][1] || E'):(\\w+)',
-                E'*[local-name() = "\\2" and namespace-uri() = "' || ns[namespace][2] || E'"]',
-                'g'
-            );
-        END LOOP;
-
-        munged_xpath := REGEXP_REPLACE( munged_xpath, E'\\]\\[(\\D)',E' and \\1', 'g');
-    END IF;
-
-    -- RAISE NOTICE 'munged xpath: %', munged_xpath;
-
-    node_text := xpath_nodeset(xml, munged_xpath, 'XXX_OILS_NODESET');
-    -- RAISE NOTICE 'node_text: %', node_text;
-
-    IF munged_xpath ~ $re$/[^/[]*@[^/]+$$re$ THEN
-        node_text := REGEXP_REPLACE(node_text,'<XXX_OILS_NODESET>[^"]+"', '<XXX_OILS_NODESET>', 'g');
-        node_text := REGEXP_REPLACE(node_text,'"</XXX_OILS_NODESET>', '</XXX_OILS_NODESET>', 'g');
-    END IF;
-
-    node_text := REGEXP_REPLACE(node_text,'^<XXX_OILS_NODESET>', '');
-    node_text := REGEXP_REPLACE(node_text,'</XXX_OILS_NODESET>$', '');
-
-    RETURN  STRING_TO_ARRAY(node_text, '</XXX_OILS_NODESET><XXX_OILS_NODESET>');
-END;
-$func$ LANGUAGE PLPGSQL IMMUTABLE;
-
-CREATE OR REPLACE FUNCTION oils_xpath ( TEXT, TEXT ) RETURNS TEXT[] AS $$SELECT oils_xpath( $1, $2, '{}'::TEXT[] );$$ LANGUAGE SQL IMMUTABLE;
-
-CREATE OR REPLACE FUNCTION oils_xslt_process(TEXT, TEXT) RETURNS TEXT AS $$
-    SELECT xslt_process( $1, $2 );
-$$ LANGUAGE SQL IMMUTABLE;
-
-        $create_82_funcs$;
-    ELSIF REGEXP_REPLACE(VERSION(),E'^.+?(\\d+\\.\\d+).*?$',E'\\1')::FLOAT = 8.3 THEN
-        out_text := 'Creating XPath wrapper functions around the native XPATH function in 8.3.  contrib/xml2 still required!';
-
-        EXECUTE $create_83_funcs$
--- 8.3 or after
-CREATE OR REPLACE FUNCTION oils_xpath ( TEXT, TEXT, ANYARRAY ) RETURNS TEXT[] AS 'SELECT XPATH( $1, $2::XML, $3 )::TEXT[];' LANGUAGE SQL IMMUTABLE;
-CREATE OR REPLACE FUNCTION oils_xpath ( TEXT, TEXT ) RETURNS TEXT[] AS 'SELECT XPATH( $1, $2::XML )::TEXT[];' LANGUAGE SQL IMMUTABLE;
-
-CREATE OR REPLACE FUNCTION oils_xslt_process(TEXT, TEXT) RETURNS TEXT AS $$
-    SELECT xslt_process( $1, $2 );
-$$ LANGUAGE SQL IMMUTABLE;
-
-        $create_83_funcs$;
-
-    ELSE
-        out_text := 'Creating XPath wrapper functions around the native XPATH function in 8.4+, and plperlu-based xslt processor.  No contrib/xml2 needed!';
-
-        EXECUTE $create_84_funcs$
--- 8.4 or after
-CREATE OR REPLACE FUNCTION oils_xpath ( TEXT, TEXT, ANYARRAY ) RETURNS TEXT[] AS 'SELECT XPATH( $1, $2::XML, $3 )::TEXT[];' LANGUAGE SQL IMMUTABLE;
-CREATE OR REPLACE FUNCTION oils_xpath ( TEXT, TEXT ) RETURNS TEXT[] AS 'SELECT XPATH( $1, $2::XML )::TEXT[];' LANGUAGE SQL IMMUTABLE;
-
-CREATE OR REPLACE FUNCTION oils_xslt_process(TEXT, TEXT) RETURNS TEXT AS $func$
+CREATE OR REPLACE FUNCTION evergreen.xml_famous5_to_text( TEXT ) RETURNS TEXT AS $f$
+ SELECT REPLACE(
+            REPLACE(
+                REPLACE(
+                    REPLACE(
+                        REPLACE( $1, '&lt;', '<'),
+                        '&gt;',
+                        '>'
+                    ),
+                    '&apos;',
+                    $$'$$
+                ), -- ' ... vim
+                '&quot;',
+                '"'
+            ),
+            '&amp;',
+            '&'
+        );
+$f$ LANGUAGE SQL IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION evergreen.oils_xpath ( TEXT, TEXT, TEXT[] ) RETURNS TEXT[] AS $f$
+    SELECT  ARRAY_AGG(
+                CASE WHEN strpos(x,'<') = 1 THEN -- It's an element node
+                    x
+                ELSE -- it's text-ish
+                    evergreen.xml_famous5_to_text(x)
+                END
+            )
+      FROM  UNNEST(XPATH( $1, $2::XML, $3 )::TEXT[]) x;
+$f$ LANGUAGE SQL IMMUTABLE;
+
+-- Trust me, it's just simpler to duplicate these...
+CREATE OR REPLACE FUNCTION evergreen.oils_xpath ( TEXT, TEXT ) RETURNS TEXT[] AS $f$
+    SELECT  ARRAY_AGG(
+                CASE WHEN strpos(x,'<') = 1 THEN -- It's an element node
+                    x
+                ELSE -- it's text-ish
+                    evergreen.xml_famous5_to_text(x)
+                END
+            )
+      FROM  UNNEST(XPATH( $1, $2::XML)::TEXT[]) x;
+$f$ LANGUAGE SQL IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION evergreen.oils_xslt_process(TEXT, TEXT) RETURNS TEXT AS $func$
   use strict;
 
   use XML::LibXSLT;
@@ -221,18 +99,6 @@ CREATE OR REPLACE FUNCTION oils_xslt_process(TEXT, TEXT) RETURNS TEXT AS $func$
 
 $func$ LANGUAGE 'plperlu' STRICT IMMUTABLE;
 
-        $create_84_funcs$;
-
-    END IF;
-
-    RETURN out_text;
-END;
-$wrapper_function$ LANGUAGE PLPGSQL;
-
-SELECT version_specific_xpath();
-DROP FUNCTION version_specific_xpath();
-
-
 CREATE OR REPLACE FUNCTION oils_xpath_string ( TEXT, TEXT, TEXT, ANYARRAY ) RETURNS TEXT AS $func$
     SELECT  ARRAY_TO_STRING(
                 oils_xpath(

-----------------------------------------------------------------------

Summary of changes:
 Open-ILS/src/sql/Pg/002.functions.config.sql       |  224 ++------
 Open-ILS/src/sql/Pg/002.schema.config.sql          |    2 +-
 Open-ILS/src/sql/Pg/011.schema.authority.sql       |    4 +-
 Open-ILS/src/sql/Pg/030.schema.metabib.sql         |   27 +-
 Open-ILS/src/sql/Pg/076.functions.url_verify.sql   |    6 +-
 .../t/regress/lp1243023_decoded_xpath_extracts.pg  |   10 +
 ...874.function.oils_xpath-tweaks-for-newer-pg.sql |  611 ++++++++++++++++++++
 7 files changed, 678 insertions(+), 206 deletions(-)
 create mode 100644 Open-ILS/src/sql/Pg/t/regress/lp1243023_decoded_xpath_extracts.pg
 create mode 100644 Open-ILS/src/sql/Pg/upgrade/0874.function.oils_xpath-tweaks-for-newer-pg.sql


hooks/post-receive
-- 
Evergreen ILS


More information about the open-ils-commits mailing list