[open-ils-commits] [GIT] Evergreen ILS branch master updated. 7f1ae7e5d686e9f4d6ee62b9d22aa88ac3eb116d

Evergreen Git git at git.evergreen-ils.org
Wed Jun 26 23:45:43 EDT 2013


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Evergreen ILS".

The branch, master has been updated
       via  7f1ae7e5d686e9f4d6ee62b9d22aa88ac3eb116d (commit)
       via  8508a0d89efb0305e590bec9d6b94c1236a5e641 (commit)
       via  6205d9b43d60939fdc173ff77e661fa7ebe3dbdd (commit)
       via  68d9a2111f6087a1377ff4bed8fa5a485fec4932 (commit)
       via  2f9c80ae6b6a3a8ed1f7d2b91f310a6faf1d69fa (commit)
      from  fceea3d2e12537ccb540a2a950a5ada22f73b7bf (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 7f1ae7e5d686e9f4d6ee62b9d22aa88ac3eb116d
Author: Dan Scott <dscott at laurentian.ca>
Date:   Wed Jun 26 23:01:36 2013 -0400

    Sign off on apostrophe upgrade script
    
    Also had to quote the \qecho commands to prevent them from being
    executed instead of simply set to STDOUT.
    
    Signed-off-by: Dan Scott <dscott at laurentian.ca>
    
    Conflicts:
    	Open-ILS/src/sql/Pg/002.schema.config.sql

diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index 7180d01..00daaee 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps
     BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
     FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
 
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0799', :eg_version); -- mrpeters/paxed/bshum
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0800', :eg_version); -- miker/dbs
 
 CREATE TABLE config.bib_source (
 	id		SERIAL	PRIMARY KEY,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.apostrophe-search.sql b/Open-ILS/src/sql/Pg/upgrade/0800.function.apostrophe-search.sql
similarity index 96%
rename from Open-ILS/src/sql/Pg/upgrade/XXXX.function.apostrophe-search.sql
rename to Open-ILS/src/sql/Pg/upgrade/0800.function.apostrophe-search.sql
index 3b7e26f..b5c2fc2 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.apostrophe-search.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/0800.function.apostrophe-search.sql
@@ -1,6 +1,6 @@
 BEGIN;
 
-SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+SELECT evergreen.upgrade_deps_block_check('0800', :eg_version);
 
 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
 DECLARE
@@ -193,10 +193,10 @@ COMMIT;
 \qecho **** your entire bibliographic data set, consider generating an SQL script
 \qecho **** with the following query, and running that via psql:
 \qecho
-\qecho  \t
-\qecho  \o /tmp/reingest-2.4.1.sql
-\qecho  SELECT 'select metabib.reingest_metabib_field_entries(' || id || ');' FROM biblio.record_entry WHERE NOT DELETED AND id > 0;
-\qecho  \o
-\qecho  \t
+\qecho '\\t'
+\qecho '\\o /tmp/reingest-2.4.1.sql'
+\qecho 'SELECT ''select metabib.reingest_metabib_field_entries('' || id || '');'' FROM biblio.record_entry WHERE NOT DELETED AND id > 0;'
+\qecho '\\o'
+\qecho '\\t'
 \qecho
 

commit 8508a0d89efb0305e590bec9d6b94c1236a5e641
Author: Mike Rylander <mrylander at gmail.com>
Date:   Mon Jun 24 15:48:07 2013 -0400

    Create upgrade script for ingest normalization changes
    
    This provides the upgrade script for the ingest changes outlined
    in LP 1187433.  In addition, the version-upgrade script for 2.3 to
    2.4.0 has been modified to move the final reingest step until after
    this incremental change has been applied.
    
    Signed-off-by: Mike Rylander <mrylander at gmail.com>
    Signed-off-by: Dan Scott <dscott at laurentian.ca>

diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.apostrophe-search.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.apostrophe-search.sql
new file mode 100644
index 0000000..3b7e26f
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.apostrophe-search.sql
@@ -0,0 +1,202 @@
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
+DECLARE
+    fclass          RECORD;
+    ind_data        metabib.field_entry_template%ROWTYPE;
+    mbe_row         metabib.browse_entry%ROWTYPE;
+    mbe_id          BIGINT;
+    b_skip_facet    BOOL;
+    b_skip_browse   BOOL;
+    b_skip_search   BOOL;
+BEGIN
+
+    SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name =  'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
+    SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name =  'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
+    SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name =  'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
+
+    PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
+    IF NOT FOUND THEN
+        IF NOT b_skip_search THEN
+            FOR fclass IN SELECT * FROM config.metabib_class LOOP
+                -- RAISE NOTICE 'Emptying out %', fclass.name;
+                EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
+            END LOOP;
+        END IF;
+        IF NOT b_skip_facet THEN
+            DELETE FROM metabib.facet_entry WHERE source = bib_id;
+        END IF;
+        IF NOT b_skip_browse THEN
+            DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
+        END IF;
+    END IF;
+
+    FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
+        IF ind_data.field < 0 THEN
+            ind_data.field = -1 * ind_data.field;
+        END IF;
+
+        IF ind_data.facet_field AND NOT b_skip_facet THEN
+            INSERT INTO metabib.facet_entry (field, source, value)
+                VALUES (ind_data.field, ind_data.source, ind_data.value);
+        END IF;
+
+        IF ind_data.browse_field AND NOT b_skip_browse THEN
+            -- A caveat about this SELECT: this should take care of replacing
+            -- old mbe rows when data changes, but not if normalization (by
+            -- which I mean specifically the output of
+            -- evergreen.oils_tsearch2()) changes.  It may or may not be
+            -- expensive to add a comparison of index_vector to index_vector
+            -- to the WHERE clause below.
+            SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = ind_data.value;
+            IF FOUND THEN
+                mbe_id := mbe_row.id;
+            ELSE
+                INSERT INTO metabib.browse_entry (value) VALUES
+                    (metabib.browse_normalize(ind_data.value, ind_data.field));
+                mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
+            END IF;
+
+            INSERT INTO metabib.browse_entry_def_map (entry, def, source)
+                VALUES (mbe_id, ind_data.field, ind_data.source);
+        END IF;
+
+        -- Avoid inserting duplicate rows, but retain granularity of being
+        -- able to search browse fields with "starts with" type operators
+        -- (for example, for titles of songs in music albums)
+        IF (ind_data.search_field OR ind_data.browse_field) AND NOT b_skip_search THEN
+            EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
+                '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
+                INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
+                -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
+            IF mbe_id IS NULL THEN
+                EXECUTE $$
+                INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
+                    VALUES ($$ ||
+                        quote_literal(ind_data.field) || $$, $$ ||
+                        quote_literal(ind_data.source) || $$, $$ ||
+                        quote_literal(ind_data.value) ||
+                    $$);$$;
+            END IF;
+        END IF;
+
+    END LOOP;
+
+    IF NOT b_skip_search THEN
+        PERFORM metabib.update_combined_index_vectors(bib_id);
+    END IF;
+
+    RETURN;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
+DECLARE
+    normalizer      RECORD;
+    value           TEXT := '';
+    temp_vector     TEXT := '';
+    ts_rec          RECORD;
+    cur_weight      "char";
+BEGIN
+
+    value := NEW.value;
+    NEW.index_vector = ''::tsvector;
+
+    IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
+        FOR normalizer IN
+            SELECT  n.func AS func,
+                    n.param_count AS param_count,
+                    m.params AS params
+              FROM  config.index_normalizer n
+                    JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
+              WHERE field = NEW.field AND m.pos < 0
+              ORDER BY m.pos LOOP
+                EXECUTE 'SELECT ' || normalizer.func || '(' ||
+                    quote_literal( value ) ||
+                    CASE
+                        WHEN normalizer.param_count > 0
+                            THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+                            ELSE ''
+                        END ||
+                    ')' INTO value;
+
+        END LOOP;
+
+        NEW.value = value;
+
+        FOR normalizer IN
+            SELECT  n.func AS func,
+                    n.param_count AS param_count,
+                    m.params AS params
+              FROM  config.index_normalizer n
+                    JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
+              WHERE field = NEW.field AND m.pos >= 0
+              ORDER BY m.pos LOOP
+                EXECUTE 'SELECT ' || normalizer.func || '(' ||
+                    quote_literal( value ) ||
+                    CASE
+                        WHEN normalizer.param_count > 0
+                            THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+                            ELSE ''
+                        END ||
+                    ')' INTO value;
+
+        END LOOP;
+   END IF;
+
+    IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
+        value :=  ARRAY_TO_STRING(
+            evergreen.regexp_split_to_array(value, E'\\W+'), ' '
+        );
+        value := public.search_normalize(value);
+        NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
+    ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
+        FOR ts_rec IN
+            SELECT ts_config, index_weight
+            FROM config.metabib_class_ts_map
+            WHERE field_class = TG_ARGV[0]
+                AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language'))
+                AND always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field)
+            UNION
+            SELECT ts_config, index_weight
+            FROM config.metabib_field_ts_map
+            WHERE metabib_field = NEW.field
+               AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language'))
+            ORDER BY index_weight ASC
+        LOOP
+            IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN
+                NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
+                temp_vector = '';
+            END IF;
+            cur_weight = ts_rec.index_weight;
+            SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT;
+        END LOOP;
+        NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
+    ELSE
+        NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
+    END IF;
+
+    RETURN NEW;
+END;
+$$ LANGUAGE PLPGSQL;
+
+COMMIT;
+
+\qecho **** If upgrading from Evergreen 2.3 or before, now is the time to run
+\qecho **** Open-ILS/src/sql/Pg/version-upgrade/2.3-2.4-supplemental.sh, which
+\qecho **** contains additional required SQL to complete your Evergreen upgrade!
+\qecho
+\qecho **** If upgrading from Evergreen 2.4.0, you will need to reingest your
+\qecho **** full data set.  In order to allow this to continue without locking
+\qecho **** your entire bibliographic data set, consider generating an SQL script
+\qecho **** with the following query, and running that via psql:
+\qecho
+\qecho  \t
+\qecho  \o /tmp/reingest-2.4.1.sql
+\qecho  SELECT 'select metabib.reingest_metabib_field_entries(' || id || ');' FROM biblio.record_entry WHERE NOT DELETED AND id > 0;
+\qecho  \o
+\qecho  \t
+\qecho
+
diff --git a/Open-ILS/src/sql/Pg/version-upgrade/2.3-2.4.0-upgrade-db.sql b/Open-ILS/src/sql/Pg/version-upgrade/2.3-2.4.0-upgrade-db.sql
index 28ce7bd..6c172a9 100644
--- a/Open-ILS/src/sql/Pg/version-upgrade/2.3-2.4.0-upgrade-db.sql
+++ b/Open-ILS/src/sql/Pg/version-upgrade/2.3-2.4.0-upgrade-db.sql
@@ -3993,5 +3993,3 @@ CREATE TEXT SEARCH CONFIGURATION subject ( COPY = english_nostop );
 CREATE TEXT SEARCH CONFIGURATION series ( COPY = english_nostop );
 CREATE TEXT SEARCH CONFIGURATION identifier ( COPY = english_nostop );
 
-\qecho Please run Open-ILS/src/sql/Pg/version-upgrade/2.3-2.4-supplemental.sh now, which contains additional required SQL to complete your Evergreen upgrade!
-

commit 6205d9b43d60939fdc173ff77e661fa7ebe3dbdd
Author: Dan Scott <dscott at laurentian.ca>
Date:   Thu Jun 13 01:16:23 2013 -0400

    Retain index granularity with minimal bloat
    
    The previous approach to reducing index bloat arguably went too far, in
    that analytics such as separately catalogued songs for an album were all
    simply aggregated together in a single metabib.title_field_entry row,
    rather than being added as separated metabib.title_field_entry rows for
    each unique value.
    
    To avoid the original problem of exact duplicate rows being inserted, we
    now check for an existing matching row before inserting into the index.
    A good test record is title "Cello concerto." which results in 1 title
    proper row and 4 added entry title rows in metabib.title_field_entry
    after this change.
    
    Signed-off-by: Dan Scott <dscott at laurentian.ca>

diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
index a989a4f..68d44fd 100644
--- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql
+++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
@@ -641,14 +641,23 @@ BEGIN
                 VALUES (mbe_id, ind_data.field, ind_data.source);
         END IF;
 
-        IF ind_data.search_field AND NOT b_skip_search THEN
-            EXECUTE $$
+        -- Avoid inserting duplicate rows, but retain granularity of being
+        -- able to search browse fields with "starts with" type operators
+        -- (for example, for titles of songs in music albums)
+        IF (ind_data.search_field OR ind_data.browse_field) AND NOT b_skip_search THEN
+            EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
+                '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
+                INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
+                -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
+            IF mbe_id IS NULL THEN
+                EXECUTE $$
                 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
                     VALUES ($$ ||
                         quote_literal(ind_data.field) || $$, $$ ||
                         quote_literal(ind_data.source) || $$, $$ ||
                         quote_literal(ind_data.value) ||
                     $$);$$;
+            END IF;
         END IF;
 
     END LOOP;

commit 68d9a2111f6087a1377ff4bed8fa5a485fec4932
Author: Mike Rylander <mrylander at gmail.com>
Date:   Wed Jun 12 11:17:08 2013 -0400

    Bring back "split-brain" indexing normalization
    
    Before 2.4-era changes to indexing and search, normalizers were applied
    in a way that resulted in different effects on the value and index_vector
    columns of metabib.*field_entry tables.  This behavior was lost during
    the changes mentioned above, but we need it for several things to work
    properly, phrase search and (likely) facets among them.  This commit
    brings that behavior back.
    
    See here for the original behavioral documentation:
    
    http://evergreen-ils.org/dokuwiki/doku.php?id=documentation:indexing#field_normalization_settings
    
    Signed-off-by: Mike Rylander <mrylander at gmail.com>
    Signed-off-by: Dan Scott <dscott at laurentian.ca>

diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
index a047e1e..a989a4f 100644
--- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql
+++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
@@ -1638,7 +1638,7 @@ BEGIN
                     m.params AS params
               FROM  config.index_normalizer n
                     JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
-              WHERE field = NEW.field
+              WHERE field = NEW.field AND m.pos < 0
               ORDER BY m.pos LOOP
                 EXECUTE 'SELECT ' || normalizer.func || '(' ||
                     quote_literal( value ) ||
@@ -1650,8 +1650,28 @@ BEGIN
                     ')' INTO value;
 
         END LOOP;
+
         NEW.value = value;
-    END IF;
+
+        FOR normalizer IN
+            SELECT  n.func AS func,
+                    n.param_count AS param_count,
+                    m.params AS params
+              FROM  config.index_normalizer n
+                    JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
+              WHERE field = NEW.field AND m.pos >= 0
+              ORDER BY m.pos LOOP
+                EXECUTE 'SELECT ' || normalizer.func || '(' ||
+                    quote_literal( value ) ||
+                    CASE
+                        WHEN normalizer.param_count > 0
+                            THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+                            ELSE ''
+                        END ||
+                    ')' INTO value;
+
+        END LOOP;
+   END IF;
 
     IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
         value :=  ARRAY_TO_STRING(

commit 2f9c80ae6b6a3a8ed1f7d2b91f310a6faf1d69fa
Author: Bob Wicksall <bwicksall at pls-net.org>
Date:   Thu Jun 13 01:06:10 2013 -0400

    Reduce index bloat involving non-search_field values
    
    Rows in metabib.title_field_entry, subject_field_entry,
    series_field_entry, and author_field_entry are doubled or tripled due to
    bad logic in biblio.extract_metabib_field_entry. This results in these
    tables being 2 or more times their correct size.
    
    This was introduced in 2.2.0 when the logic for browse_field and
    facet_field were added to biblio.extract_metabib_field_entry. 2.1 is not
    affected.
    
    The duplicates are caused when biblio.extract_metabib_field_entry
    returns TRUE in the search_field column for all rows even if they should
    just be facet_field or browse_field after the first search_field value
    is returned.
    
    Signed-off-by: Mike Rylander <mrylander at gmail.com>
    Signed-off-by: Dan Scott <dscott at laurentian.ca>

diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
index e14c239..a047e1e 100644
--- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql
+++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
@@ -411,6 +411,11 @@ DECLARE
     output_row  metabib.field_entry_template%ROWTYPE;
 BEGIN
 
+    -- Start out with no field-use bools set
+    output_row.browse_field = FALSE;
+    output_row.facet_field = FALSE;
+    output_row.search_field = FALSE;
+
     -- Get the record
     SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
 
@@ -513,6 +518,7 @@ BEGIN
 
             output_row.search_field = TRUE;
             RETURN NEXT output_row;
+            output_row.search_field = FALSE;
         END IF;
 
     END LOOP;

-----------------------------------------------------------------------

Summary of changes:
 Open-ILS/src/sql/Pg/002.schema.config.sql          |    2 +-
 Open-ILS/src/sql/Pg/030.schema.metabib.sql         |   43 ++++-
 .../Pg/upgrade/0800.function.apostrophe-search.sql |  202 ++++++++++++++++++++
 .../Pg/version-upgrade/2.3-2.4.0-upgrade-db.sql    |    2 -
 4 files changed, 242 insertions(+), 7 deletions(-)
 create mode 100644 Open-ILS/src/sql/Pg/upgrade/0800.function.apostrophe-search.sql


hooks/post-receive
-- 
Evergreen ILS


More information about the open-ils-commits mailing list