[open-ils-commits] [GIT] Evergreen ILS branch rel_2_2 updated. d4cc86cca92296e5af37312a8d4aee6e23b0c602

Evergreen Git git at git.evergreen-ils.org
Tue Sep 4 17:21:39 EDT 2012


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Evergreen ILS".

The branch, rel_2_2 has been updated
       via  d4cc86cca92296e5af37312a8d4aee6e23b0c602 (commit)
       via  a9f8d2c6c2dd143be427f6281e93447f9e69bf10 (commit)
       via  0a2007b4efb151e5221c653f217869eec1b2b80e (commit)
      from  304d7065458cfc6c01043c2c6f529abaa50e19f7 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit d4cc86cca92296e5af37312a8d4aee6e23b0c602
Author: Dan Wells <dbw2 at calvin.edu>
Date:   Tue Sep 4 17:09:44 2012 -0400

    Upgrade bits for Vandelay Overlay Changes
    
    Signed-off-by: Dan Wells <dbw2 at calvin.edu>

diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index fea872b..2c4b5fb 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -87,7 +87,7 @@ CREATE TRIGGER no_overlapping_deps
     BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
     FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
 
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0736', :eg_version); -- miker/tsbere
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0738', :eg_version); -- senator/dbwells
 
 CREATE TABLE config.bib_source (
 	id		SERIAL	PRIMARY KEY,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql b/Open-ILS/src/sql/Pg/upgrade/0738.schema.vandelay.import-match-no-like-any.sql
similarity index 98%
rename from Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql
rename to Open-ILS/src/sql/Pg/upgrade/0738.schema.vandelay.import-match-no-like-any.sql
index 1ec6032..e00cb4d 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/0738.schema.vandelay.import-match-no-like-any.sql
@@ -1,8 +1,8 @@
 BEGIN;
 
--- XXXX.schema.vandelay.import-match-no-like-any.sql
+-- 0738.schema.vandelay.import-match-no-like-any.sql
 
-SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+SELECT evergreen.upgrade_deps_block_check('0738', :eg_version);
 
 CREATE OR REPLACE FUNCTION vandelay.match_set_test_marcxml(
     match_set_id INTEGER, record_xml TEXT

commit a9f8d2c6c2dd143be427f6281e93447f9e69bf10
Author: Dan Wells <dbw2 at calvin.edu>
Date:   Thu Aug 30 15:26:15 2012 -0400

    Increase Overlay Speed for Standard Identifiers
    
    Due to an inefficient query order and a confused query planner,
    some overlay matching can end up going abysmally slow.  See LP
    bug #1024095 for elaboration on the issue.
    
    This commit reorders the joins as originally suggested by Lebbeous,
    then adds a specific index to coax the query planner into making the
    best choices.  The example index here only targets the 02x identifier
    fields, so other tag/subfield/substring(value) indexes will likely
    be necessary for maximum benefit when matching on other fields
    (e.g. 010).
    
    These changes combined with the previous commit which replaces 'LIKE
    ANY' with 'LIKE (... OR ...)' has shown great promise in testing.
    
    Signed-off-by: Dan Wells <dbw2 at calvin.edu>
    Signed-off-by: Lebbeous Fogle-Weekley <lebbeous at esilibrary.com>

diff --git a/Open-ILS/src/sql/Pg/012.schema.vandelay.sql b/Open-ILS/src/sql/Pg/012.schema.vandelay.sql
index 96c555f..ee64e7f 100644
--- a/Open-ILS/src/sql/Pg/012.schema.vandelay.sql
+++ b/Open-ILS/src/sql/Pg/012.schema.vandelay.sql
@@ -508,7 +508,7 @@ BEGIN
     -- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
     wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore);
 
-    query_ := 'SELECT DISTINCT(bre.id) AS record, ';
+    query_ := 'SELECT DISTINCT(record), ';
 
     -- qrows table is for the quality bits we add to the SELECT clause
     SELECT ARRAY_TO_STRING(
@@ -517,15 +517,14 @@ BEGIN
 
     -- our query string so far is the SELECT clause and the inital FROM.
     -- no JOINs yet nor the WHERE clause
-    query_ := query_ || coal || ' AS quality ' || E'\n' ||
-        'FROM biblio.record_entry bre ';
+    query_ := query_ || coal || ' AS quality ' || E'\n';
 
     -- jrows table is for the joins we must make (and the real text conditions)
     SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins
         FROM _vandelay_tmp_jrows;
 
     -- add those joins and the where clause to our query.
-    query_ := query_ || joins || E'\n' || 'WHERE ' || wq || ' AND not bre.deleted';
+    query_ := query_ || joins || E'\n' || 'JOIN biblio.record_entry bre ON (bre.id = record) ' || 'WHERE ' || wq || ' AND not bre.deleted';
 
     -- this will return rows of record,quality
     FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP
@@ -640,10 +639,21 @@ DECLARE
     op          TEXT;
     tagkey      TEXT;
     caseless    BOOL;
+    jrow_count  INT;
+    my_using    TEXT;
+    my_join     TEXT;
 BEGIN
     -- remember $1 is tags_rstore, and $2 is svf_rstore
 
     caseless := FALSE;
+    SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows;
+    IF jrow_count > 0 THEN
+        my_using := ' USING (record)';
+        my_join := 'FULL OUTER JOIN';
+    ELSE
+        my_using := '';
+        my_join := 'FROM';
+    END IF;
 
     IF node.tag IS NOT NULL THEN
         caseless := (node.tag IN ('020', '022', '024'));
@@ -669,25 +679,23 @@ BEGIN
 
     my_alias := 'n' || node.id::TEXT;
 
-    jrow := 'LEFT JOIN (SELECT *, ' || node.quality ||
-        ' AS quality FROM metabib.';
+    jrow := my_join || ' (SELECT *, ';
     IF node.tag IS NOT NULL THEN
-        jrow := jrow || 'full_rec) ' || my_alias || ' ON (' ||
-            my_alias || '.record = bre.id AND ' || my_alias || '.tag = ''' ||
+        jrow := jrow  || node.quality ||
+            ' AS quality FROM metabib.full_rec mfr WHERE mfr.tag = ''' ||
             node.tag || '''';
         IF node.subfield IS NOT NULL THEN
-            jrow := jrow || ' AND ' || my_alias || '.subfield = ''' ||
+            jrow := jrow || ' AND mfr.subfield = ''' ||
                 node.subfield || '''';
         END IF;
         jrow := jrow || ' AND (';
-
-        jrow := jrow || vandelay._node_tag_comparisons(caseless, my_alias, op, tags_rstore, tagkey);
-        jrow := jrow || '))';
+        jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey);
+        jrow := jrow || ')) ' || my_alias || my_using || E'\n';
     ELSE    -- svf
-        jrow := jrow || 'record_attr) ' || my_alias || ' ON (' ||
-            my_alias || '.id = bre.id AND (' ||
-            my_alias || '.attrs->''' || node.svf ||
-            ''' ' || op || ' $2->''' || node.svf || '''))';
+        jrow := jrow || 'id AS record, ' || node.quality ||
+            ' AS quality FROM metabib.record_attr mra WHERE mra.attrs->''' ||
+            node.svf || ''' ' || op || ' $2->''' || node.svf || ''') ' ||
+            my_alias || my_using || E'\n';
     END IF;
     INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
 END;
@@ -695,7 +703,6 @@ $$ LANGUAGE PLPGSQL;
 
 CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons(
     caseless BOOLEAN,
-    my_alias TEXT,
     op TEXT,
     tags_rstore HSTORE,
     tagkey TEXT
@@ -719,9 +726,9 @@ BEGIN
         END IF;
 
         IF caseless THEN
-            result := result || 'LOWER(' || my_alias || '.value) ' || op;
+            result := result || 'LOWER(mfr.value) ' || op;
         ELSE
-            result := result || my_alias || '.value ' || op;
+            result := result || 'mfr.value ' || op;
         END IF;
 
         result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL');
diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
index dbf3ba8..5bcb8f8 100644
--- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql
+++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
@@ -263,6 +263,20 @@ CREATE INDEX metabib_full_rec_index_vector_idx ON metabib.real_full_rec USING GI
 CREATE INDEX metabib_full_rec_isxn_caseless_idx
     ON metabib.real_full_rec (LOWER(value))
     WHERE tag IN ('020', '022', '024');
+-- This next index might fully supplant the one above, but leaving both for now.
+-- (they are not too large)
+-- The reason we need this index is to ensure that the query parser always
+-- prefers this index over the simpler tag/subfield index, as this greatly
+-- increases Vandelay overlay speed for these identifiers, especially when
+-- a record has many of these fields (around > 4-6 seems like the cutoff
+-- on at least one PG9.1 system)
+-- A similar index could be added for other fields (e.g. 010), but one should
+-- leave out the LOWER() in all other cases.
+-- TODO: verify whether we can discard the non tag/subfield/substring version
+-- above (metabib_full_rec_isxn_caseless_idx)
+CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
+    ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
+    WHERE tag IN ('020', '022', '024');
 
 
 CREATE TRIGGER metabib_full_rec_fti_trigger
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql
index 34a2ee0..1ec6032 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql
@@ -27,7 +27,7 @@ BEGIN
     -- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
     wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore);
 
-    query_ := 'SELECT DISTINCT(bre.id) AS record, ';
+    query_ := 'SELECT DISTINCT(record), ';
 
     -- qrows table is for the quality bits we add to the SELECT clause
     SELECT ARRAY_TO_STRING(
@@ -36,18 +36,16 @@ BEGIN
 
     -- our query string so far is the SELECT clause and the inital FROM.
     -- no JOINs yet nor the WHERE clause
-    query_ := query_ || coal || ' AS quality ' || E'\n' ||
-        'FROM biblio.record_entry bre ';
+    query_ := query_ || coal || ' AS quality ' || E'\n';
 
     -- jrows table is for the joins we must make (and the real text conditions)
     SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins
         FROM _vandelay_tmp_jrows;
 
     -- add those joins and the where clause to our query.
-    query_ := query_ || joins || E'\n' || 'WHERE ' || wq || ' AND not bre.deleted';
+    query_ := query_ || joins || E'\n' || 'JOIN biblio.record_entry bre ON (bre.id = record) ' || 'WHERE ' || wq || ' AND not bre.deleted';
 
     -- this will return rows of record,quality
-    RAISE WARNING '%', query_;
     FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP
         RETURN NEXT rec;
     END LOOP;
@@ -122,10 +120,21 @@ DECLARE
     op          TEXT;
     tagkey      TEXT;
     caseless    BOOL;
+    jrow_count  INT;
+    my_using    TEXT;
+    my_join     TEXT;
 BEGIN
     -- remember $1 is tags_rstore, and $2 is svf_rstore
 
     caseless := FALSE;
+    SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows;
+    IF jrow_count > 0 THEN
+        my_using := ' USING (record)';
+        my_join := 'FULL OUTER JOIN';
+    ELSE
+        my_using := '';
+        my_join := 'FROM';
+    END IF;
 
     IF node.tag IS NOT NULL THEN
         caseless := (node.tag IN ('020', '022', '024'));
@@ -151,25 +160,23 @@ BEGIN
 
     my_alias := 'n' || node.id::TEXT;
 
-    jrow := 'LEFT JOIN (SELECT *, ' || node.quality ||
-        ' AS quality FROM metabib.';
+    jrow := my_join || ' (SELECT *, ';
     IF node.tag IS NOT NULL THEN
-        jrow := jrow || 'full_rec) ' || my_alias || ' ON (' ||
-            my_alias || '.record = bre.id AND ' || my_alias || '.tag = ''' ||
+        jrow := jrow  || node.quality ||
+            ' AS quality FROM metabib.full_rec mfr WHERE mfr.tag = ''' ||
             node.tag || '''';
         IF node.subfield IS NOT NULL THEN
-            jrow := jrow || ' AND ' || my_alias || '.subfield = ''' ||
+            jrow := jrow || ' AND mfr.subfield = ''' ||
                 node.subfield || '''';
         END IF;
         jrow := jrow || ' AND (';
-
-        jrow := jrow || vandelay._node_tag_comparisons(caseless, my_alias, op, tags_rstore, tagkey);
-        jrow := jrow || '))';
+        jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey);
+        jrow := jrow || ')) ' || my_alias || my_using || E'\n';
     ELSE    -- svf
-        jrow := jrow || 'record_attr) ' || my_alias || ' ON (' ||
-            my_alias || '.id = bre.id AND (' ||
-            my_alias || '.attrs->''' || node.svf ||
-            ''' ' || op || ' $2->''' || node.svf || '''))';
+        jrow := jrow || 'id AS record, ' || node.quality ||
+            ' AS quality FROM metabib.record_attr mra WHERE mra.attrs->''' ||
+            node.svf || ''' ' || op || ' $2->''' || node.svf || ''') ' ||
+            my_alias || my_using || E'\n';
     END IF;
     INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
 END;
@@ -177,7 +184,6 @@ $$ LANGUAGE PLPGSQL;
 
 CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons(
     caseless BOOLEAN,
-    my_alias TEXT,
     op TEXT,
     tags_rstore HSTORE,
     tagkey TEXT
@@ -201,9 +207,9 @@ BEGIN
         END IF;
 
         IF caseless THEN
-            result := result || 'LOWER(' || my_alias || '.value) ' || op;
+            result := result || 'LOWER(mfr.value) ' || op;
         ELSE
-            result := result || my_alias || '.value ' || op;
+            result := result || 'mfr.value ' || op;
         END IF;
 
         result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL');
@@ -224,5 +230,20 @@ DROP FUNCTION vandelay.get_expr_from_match_set( INTEGER );
 DROP FUNCTION vandelay.get_expr_from_match_set_point( vandelay.match_set_point );
 DROP FUNCTION vandelay._get_expr_push_jrow( vandelay.match_set_point );
 
+-- This next index might fully supplant an existing one but leaving both for now
+-- (they are not too large)
+-- The reason we need this index is to ensure that the query parser always
+-- prefers this index over the simpler tag/subfield index, as this greatly
+-- increases Vandelay overlay speed for these identifiers, especially when
+-- a record has many of these fields (around > 4-6 seems like the cutoff
+-- on at least one PG9.1 system)
+-- A similar index could be added for other fields (e.g. 010), but one should
+-- leave out the LOWER() in all other cases.
+-- TODO: verify whether we can discard the non tag/subfield/substring version
+-- (metabib_full_rec_isxn_caseless_idx)
+CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
+    ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
+    WHERE tag IN ('020', '022', '024');
+
 COMMIT;
 

commit 0a2007b4efb151e5221c653f217869eec1b2b80e
Author: Lebbeous Fogle-Weekley <lebbeous at esilibrary.com>
Date:   Mon Jun 25 20:00:33 2012 -0400

    Vandelay: Fix index-miss with MARC Imports using Match Sets
    
    In some Postgres installations, an expression such as:
    
    (value LIKE '13423488%' OR value LIKE '245425%') will use a btree index,
    but
    
    value LIKE ANY('{13423488%,245425%}'::TEXT[])
    
    will not.
    
    Missing such an index can make matching incoming bibs based on a field
    that's present in most of your existing records terrifically slow.
    
    Signed-off-by: Lebbeous Fogle-Weekley <lebbeous at esilibrary.com>
    Signed-off-by: Dan Wells <dbw2 at calvin.edu>

diff --git a/Open-ILS/src/sql/Pg/012.schema.vandelay.sql b/Open-ILS/src/sql/Pg/012.schema.vandelay.sql
index 65e2ca7..96c555f 100644
--- a/Open-ILS/src/sql/Pg/012.schema.vandelay.sql
+++ b/Open-ILS/src/sql/Pg/012.schema.vandelay.sql
@@ -506,7 +506,7 @@ BEGIN
 
     -- generate the where clause and return that directly (into wq), and as
     -- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
-    wq := vandelay.get_expr_from_match_set(match_set_id);
+    wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore);
 
     query_ := 'SELECT DISTINCT(bre.id) AS record, ';
 
@@ -569,7 +569,8 @@ END;
 $func$ LANGUAGE PLPGSQL;
 
 CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
-    match_set_id INTEGER
+    match_set_id INTEGER,
+    tags_rstore HSTORE
 ) RETURNS TEXT AS $$
 DECLARE
     root    vandelay.match_set_point;
@@ -577,12 +578,13 @@ BEGIN
     SELECT * INTO root FROM vandelay.match_set_point
         WHERE parent IS NULL AND match_set = match_set_id;
 
-    RETURN vandelay.get_expr_from_match_set_point(root);
+    RETURN vandelay.get_expr_from_match_set_point(root, tags_rstore);
 END;
 $$  LANGUAGE PLPGSQL;
 
 CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
-    node vandelay.match_set_point
+    node vandelay.match_set_point,
+    tags_rstore HSTORE
 ) RETURNS TEXT AS $$
 DECLARE
     q           TEXT;
@@ -605,13 +607,13 @@ BEGIN
                 q := q || ' ' || this_op || ' ';
             END IF;
             i := i + 1;
-            q := q || vandelay.get_expr_from_match_set_point(child);
+            q := q || vandelay.get_expr_from_match_set_point(child, tags_rstore);
         END LOOP;
         q := q || ')';
         RETURN q;
     ELSIF node.bool_op IS NULL THEN
         PERFORM vandelay._get_expr_push_qrow(node);
-        PERFORM vandelay._get_expr_push_jrow(node);
+        PERFORM vandelay._get_expr_push_jrow(node, tags_rstore);
         RETURN vandelay._get_expr_render_one(node);
     ELSE
         RETURN '';
@@ -629,7 +631,8 @@ END;
 $$ LANGUAGE PLPGSQL;
 
 CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
-    node vandelay.match_set_point
+    node vandelay.match_set_point,
+    tags_rstore HSTORE
 ) RETURNS VOID AS $$
 DECLARE
     jrow        TEXT;
@@ -678,13 +681,8 @@ BEGIN
         END IF;
         jrow := jrow || ' AND (';
 
-        IF caseless THEN
-            jrow := jrow || 'LOWER(' || my_alias || '.value) ' || op;
-        ELSE
-            jrow := jrow || my_alias || '.value ' || op;
-        END IF;
-
-        jrow := jrow || ' ANY(($1->''' || tagkey || ''')::TEXT[])))';
+        jrow := jrow || vandelay._node_tag_comparisons(caseless, my_alias, op, tags_rstore, tagkey);
+        jrow := jrow || '))';
     ELSE    -- svf
         jrow := jrow || 'record_attr) ' || my_alias || ' ON (' ||
             my_alias || '.id = bre.id AND (' ||
@@ -695,6 +693,50 @@ BEGIN
 END;
 $$ LANGUAGE PLPGSQL;
 
+CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons(
+    caseless BOOLEAN,
+    my_alias TEXT,
+    op TEXT,
+    tags_rstore HSTORE,
+    tagkey TEXT
+) RETURNS TEXT AS $$
+DECLARE
+    result  TEXT;
+    i       INT;
+    vals    TEXT[];
+BEGIN
+    i := 1;
+    vals := tags_rstore->tagkey;
+    result := '';
+
+    WHILE TRUE LOOP
+        IF i > 1 THEN
+            IF vals[i] IS NULL THEN
+                EXIT;
+            ELSE
+                result := result || ' OR ';
+            END IF;
+        END IF;
+
+        IF caseless THEN
+            result := result || 'LOWER(' || my_alias || '.value) ' || op;
+        ELSE
+            result := result || my_alias || '.value ' || op;
+        END IF;
+
+        result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL');
+
+        IF vals[i] IS NULL THEN
+            EXIT;
+        END IF;
+        i := i + 1;
+    END LOOP;
+
+    RETURN result;
+
+END;
+$$ LANGUAGE PLPGSQL;
+
 CREATE OR REPLACE FUNCTION vandelay._get_expr_render_one(
     node vandelay.match_set_point
 ) RETURNS TEXT AS $$
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql
new file mode 100644
index 0000000..34a2ee0
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql
@@ -0,0 +1,228 @@
+BEGIN;
+
+-- XXXX.schema.vandelay.import-match-no-like-any.sql
+
+SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+CREATE OR REPLACE FUNCTION vandelay.match_set_test_marcxml(
+    match_set_id INTEGER, record_xml TEXT
+) RETURNS SETOF vandelay.match_set_test_result AS $$
+DECLARE
+    tags_rstore HSTORE;
+    svf_rstore  HSTORE;
+    coal        TEXT;
+    joins       TEXT;
+    query_      TEXT;
+    wq          TEXT;
+    qvalue      INTEGER;
+    rec         RECORD;
+BEGIN
+    tags_rstore := vandelay.flatten_marc_hstore(record_xml);
+    svf_rstore := vandelay.extract_rec_attrs(record_xml);
+
+    CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER);
+    CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT);
+
+    -- generate the where clause and return that directly (into wq), and as
+    -- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
+    wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore);
+
+    query_ := 'SELECT DISTINCT(bre.id) AS record, ';
+
+    -- qrows table is for the quality bits we add to the SELECT clause
+    SELECT ARRAY_TO_STRING(
+        ARRAY_ACCUM('COALESCE(n' || q::TEXT || '.quality, 0)'), ' + '
+    ) INTO coal FROM _vandelay_tmp_qrows;
+
+    -- our query string so far is the SELECT clause and the inital FROM.
+    -- no JOINs yet nor the WHERE clause
+    query_ := query_ || coal || ' AS quality ' || E'\n' ||
+        'FROM biblio.record_entry bre ';
+
+    -- jrows table is for the joins we must make (and the real text conditions)
+    SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins
+        FROM _vandelay_tmp_jrows;
+
+    -- add those joins and the where clause to our query.
+    query_ := query_ || joins || E'\n' || 'WHERE ' || wq || ' AND not bre.deleted';
+
+    -- this will return rows of record,quality
+    RAISE WARNING '%', query_;
+    FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP
+        RETURN NEXT rec;
+    END LOOP;
+
+    DROP TABLE _vandelay_tmp_qrows;
+    DROP TABLE _vandelay_tmp_jrows;
+    RETURN;
+END;
+
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
+    match_set_id INTEGER,
+    tags_rstore HSTORE
+) RETURNS TEXT AS $$
+DECLARE
+    root    vandelay.match_set_point;
+BEGIN
+    SELECT * INTO root FROM vandelay.match_set_point
+        WHERE parent IS NULL AND match_set = match_set_id;
+
+    RETURN vandelay.get_expr_from_match_set_point(root, tags_rstore);
+END;
+$$  LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
+    node vandelay.match_set_point,
+    tags_rstore HSTORE
+) RETURNS TEXT AS $$
+DECLARE
+    q           TEXT;
+    i           INTEGER;
+    this_op     TEXT;
+    children    INTEGER[];
+    child       vandelay.match_set_point;
+BEGIN
+    SELECT ARRAY_ACCUM(id) INTO children FROM vandelay.match_set_point
+        WHERE parent = node.id;
+
+    IF ARRAY_LENGTH(children, 1) > 0 THEN
+        this_op := vandelay._get_expr_render_one(node);
+        q := '(';
+        i := 1;
+        WHILE children[i] IS NOT NULL LOOP
+            SELECT * INTO child FROM vandelay.match_set_point
+                WHERE id = children[i];
+            IF i > 1 THEN
+                q := q || ' ' || this_op || ' ';
+            END IF;
+            i := i + 1;
+            q := q || vandelay.get_expr_from_match_set_point(child, tags_rstore);
+        END LOOP;
+        q := q || ')';
+        RETURN q;
+    ELSIF node.bool_op IS NULL THEN
+        PERFORM vandelay._get_expr_push_qrow(node);
+        PERFORM vandelay._get_expr_push_jrow(node, tags_rstore);
+        RETURN vandelay._get_expr_render_one(node);
+    ELSE
+        RETURN '';
+    END IF;
+END;
+$$  LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
+    node vandelay.match_set_point,
+    tags_rstore HSTORE
+) RETURNS VOID AS $$
+DECLARE
+    jrow        TEXT;
+    my_alias    TEXT;
+    op          TEXT;
+    tagkey      TEXT;
+    caseless    BOOL;
+BEGIN
+    -- remember $1 is tags_rstore, and $2 is svf_rstore
+
+    caseless := FALSE;
+
+    IF node.tag IS NOT NULL THEN
+        caseless := (node.tag IN ('020', '022', '024'));
+        tagkey := node.tag;
+        IF node.subfield IS NOT NULL THEN
+            tagkey := tagkey || node.subfield;
+        END IF;
+    END IF;
+
+    IF node.negate THEN
+        IF caseless THEN
+            op := 'NOT LIKE';
+        ELSE
+            op := '<>';
+        END IF;
+    ELSE
+        IF caseless THEN
+            op := 'LIKE';
+        ELSE
+            op := '=';
+        END IF;
+    END IF;
+
+    my_alias := 'n' || node.id::TEXT;
+
+    jrow := 'LEFT JOIN (SELECT *, ' || node.quality ||
+        ' AS quality FROM metabib.';
+    IF node.tag IS NOT NULL THEN
+        jrow := jrow || 'full_rec) ' || my_alias || ' ON (' ||
+            my_alias || '.record = bre.id AND ' || my_alias || '.tag = ''' ||
+            node.tag || '''';
+        IF node.subfield IS NOT NULL THEN
+            jrow := jrow || ' AND ' || my_alias || '.subfield = ''' ||
+                node.subfield || '''';
+        END IF;
+        jrow := jrow || ' AND (';
+
+        jrow := jrow || vandelay._node_tag_comparisons(caseless, my_alias, op, tags_rstore, tagkey);
+        jrow := jrow || '))';
+    ELSE    -- svf
+        jrow := jrow || 'record_attr) ' || my_alias || ' ON (' ||
+            my_alias || '.id = bre.id AND (' ||
+            my_alias || '.attrs->''' || node.svf ||
+            ''' ' || op || ' $2->''' || node.svf || '''))';
+    END IF;
+    INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons(
+    caseless BOOLEAN,
+    my_alias TEXT,
+    op TEXT,
+    tags_rstore HSTORE,
+    tagkey TEXT
+) RETURNS TEXT AS $$
+DECLARE
+    result  TEXT;
+    i       INT;
+    vals    TEXT[];
+BEGIN
+    i := 1;
+    vals := tags_rstore->tagkey;
+    result := '';
+
+    WHILE TRUE LOOP
+        IF i > 1 THEN
+            IF vals[i] IS NULL THEN
+                EXIT;
+            ELSE
+                result := result || ' OR ';
+            END IF;
+        END IF;
+
+        IF caseless THEN
+            result := result || 'LOWER(' || my_alias || '.value) ' || op;
+        ELSE
+            result := result || my_alias || '.value ' || op;
+        END IF;
+
+        result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL');
+
+        IF vals[i] IS NULL THEN
+            EXIT;
+        END IF;
+        i := i + 1;
+    END LOOP;
+
+    RETURN result;
+
+END;
+$$ LANGUAGE PLPGSQL;
+
+-- drop old versions of these functions with fewer args
+DROP FUNCTION vandelay.get_expr_from_match_set( INTEGER );
+DROP FUNCTION vandelay.get_expr_from_match_set_point( vandelay.match_set_point );
+DROP FUNCTION vandelay._get_expr_push_jrow( vandelay.match_set_point );
+
+COMMIT;
+

-----------------------------------------------------------------------

Summary of changes:
 Open-ILS/src/sql/Pg/002.schema.config.sql          |    2 +-
 Open-ILS/src/sql/Pg/012.schema.vandelay.sql        |  101 ++++++--
 Open-ILS/src/sql/Pg/030.schema.metabib.sql         |   14 +
 ...38.schema.vandelay.import-match-no-like-any.sql |  249 ++++++++++++++++++++
 4 files changed, 339 insertions(+), 27 deletions(-)
 create mode 100644 Open-ILS/src/sql/Pg/upgrade/0738.schema.vandelay.import-match-no-like-any.sql


hooks/post-receive
-- 
Evergreen ILS


More information about the open-ils-commits mailing list