[open-ils-commits] [GIT] Evergreen ILS branch master updated. d3bf88739ea88768d92831062219f76f58b6a1e3

Evergreen Git git at git.evergreen-ils.org
Sun Mar 4 12:55:36 EST 2012


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Evergreen ILS".

The branch, master has been updated
       via  d3bf88739ea88768d92831062219f76f58b6a1e3 (commit)
       via  cefee449af9bb3c7bb1d245fd352ac5d2d0ced52 (commit)
       via  dd21cdc01d2b3e6139714b2196c8c9d274c856a5 (commit)
      from  043aa913fc4233eaf6f96b46491ce7643acd396b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit d3bf88739ea88768d92831062219f76f58b6a1e3
Author: Dan Scott <dscott at laurentian.ca>
Date:   Sun Mar 4 12:28:51 2012 -0500

    Number the autosuggest normalization upgrade script
    
    Signed-off-by: Dan Scott <dscott at laurentian.ca>

diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index 66d37a8..4c1bc1e 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -86,7 +86,7 @@ CREATE TRIGGER no_overlapping_deps
     BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
     FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
 
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0678', :eg_version); -- berick/miker
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0679', :eg_version); -- berick/miker
 
 CREATE TABLE config.bib_source (
 	id		SERIAL	PRIMARY KEY,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.autosuggest.search-normalize.sql b/Open-ILS/src/sql/Pg/upgrade/0679.schema.autosuggest.search-normalize.sql
similarity index 96%
rename from Open-ILS/src/sql/Pg/upgrade/XXXX.schema.autosuggest.search-normalize.sql
rename to Open-ILS/src/sql/Pg/upgrade/0679.schema.autosuggest.search-normalize.sql
index 4aca713..d82af47 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.autosuggest.search-normalize.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/0679.schema.autosuggest.search-normalize.sql
@@ -1,6 +1,11 @@
 BEGIN;
 
-SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+SELECT evergreen.upgrade_deps_block_check('0679', :eg_version);
+
+-- Address typo in column name
+ALTER TABLE config.metabib_class ADD COLUMN buoyant BOOL DEFAULT FALSE NOT NULL;
+UPDATE config.metabib_class SET buoyant = bouyant;
+ALTER TABLE config.metabib_class DROP COLUMN bouyant;
 
 CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$
 DECLARE

commit cefee449af9bb3c7bb1d245fd352ac5d2d0ced52
Author: Dan Scott <dscott at laurentian.ca>
Date:   Sun Mar 4 12:06:47 2012 -0500

    Address "bouyancy" typo (should be "buoyancy")
    
    Let us avoid a typo making it into a major release and condemning us to
    the hell of having to remember to misspell this thing until we can break
    backwards compatibility...
    
    Drop metabib.suggest_browse_entries() in the upgrade script, as the OUT
    parameters have changed and a mere "CREATE OR REPLACE" will not suffice.
    
    Signed-off-by: Dan Scott <dscott at laurentian.ca>

diff --git a/Open-ILS/examples/fm_IDL.xml b/Open-ILS/examples/fm_IDL.xml
index f53638e..5260a76 100644
--- a/Open-ILS/examples/fm_IDL.xml
+++ b/Open-ILS/examples/fm_IDL.xml
@@ -2133,7 +2133,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 		<fields oils_persist:primary="name">
 			<field reporter:label="Name" name="name" reporter:datatype="text"/>
 			<field reporter:label="Label" name="label" reporter:datatype="text" oils_persist:i18n="true"/>
-			<field reporter:label="Bouyant?" name="bouyant" reporter:datatype="bool" />
+			<field reporter:label="Buoyant?" name="buoyant" reporter:datatype="bool" />
 			<field reporter:label="Restrict?" name="restrict" reporter:datatype="bool" />
 			<field reporter:label="Fields" name="fields" reporter:datatype="link" oils_persist:virtual="true"/>
 		</fields>
diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index 40e3af0..66d37a8 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -167,7 +167,7 @@ INSERT INTO config.biblio_fingerprint (name, xpath, format, first_word)
 CREATE TABLE config.metabib_class (
     name     TEXT    PRIMARY KEY,
     label    TEXT    NOT NULL UNIQUE,
-    bouyant  BOOL    DEFAULT FALSE NOT NULL,
+    buoyant  BOOL    DEFAULT FALSE NOT NULL,
     restrict BOOL    DEFAULT FALSE NOT NULL
 );
 
diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
index 04b9f56..b956ab8 100644
--- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql
+++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
@@ -1349,11 +1349,11 @@ CREATE OR REPLACE
     ) RETURNS TABLE (
         value                   TEXT,   -- plain
         field                   INTEGER,
-        bouyant_and_class_match BOOL,
+        buoyant_and_class_match BOOL,
         field_match             BOOL,
         field_weight            INTEGER,
         rank                    REAL,
-        bouyant                 BOOL,
+        buoyant                 BOOL,
         match                   TEXT    -- marked up
     ) AS $func$
 DECLARE
@@ -1423,11 +1423,11 @@ BEGIN
     RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $7, $3) FROM (SELECT DISTINCT
         mbe.value,
         cmf.id,
-        cmc.bouyant AND _registered.field_class IS NOT NULL,
+        cmc.buoyant AND _registered.field_class IS NOT NULL,
         _registered.field = cmf.id,
         cmf.weight,
         TS_RANK_CD(mbe.index_vector, $1, $6),
-        cmc.bouyant
+        cmc.buoyant
     FROM metabib.browse_entry_def_map mbedm
     JOIN metabib.browse_entry mbe ON (mbe.id = mbedm.entry)
     JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
@@ -1444,11 +1444,11 @@ BEGIN
         ;
 
     -- sort order:
-    --  bouyant AND chosen class = match class
+    --  buoyant AND chosen class = match class
     --  chosen field = match field
     --  field weight
     --  rank
-    --  bouyancy
+    --  buoyancy
     --  value itself
 
 END;
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.autosuggest.search-normalize.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.autosuggest.search-normalize.sql
index 14284b5..4aca713 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.autosuggest.search-normalize.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.autosuggest.search-normalize.sql
@@ -112,6 +112,9 @@ END;
 $$ LANGUAGE PLPGSQL;
 
 
+-- Definition of OUT parameters changes, so must drop first
+DROP FUNCTION IF EXISTS metabib.suggest_browse_entries (TEXT, TEXT, TEXT, INTEGER, INTEGER, INTEGER);
+
 CREATE OR REPLACE
     FUNCTION metabib.suggest_browse_entries(
         raw_query_text  TEXT,   -- actually typed by humans at the UI level
@@ -123,11 +126,11 @@ CREATE OR REPLACE
     ) RETURNS TABLE (
         value                   TEXT,   -- plain
         field                   INTEGER,
-        bouyant_and_class_match BOOL,
+        buoyant_and_class_match BOOL,
         field_match             BOOL,
         field_weight            INTEGER,
         rank                    REAL,
-        bouyant                 BOOL,
+        buoyant                 BOOL,
         match                   TEXT    -- marked up
     ) AS $func$
 DECLARE
@@ -197,11 +200,11 @@ BEGIN
     RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $7, $3) FROM (SELECT DISTINCT
         mbe.value,
         cmf.id,
-        cmc.bouyant AND _registered.field_class IS NOT NULL,
+        cmc.buoyant AND _registered.field_class IS NOT NULL,
         _registered.field = cmf.id,
         cmf.weight,
         TS_RANK_CD(mbe.index_vector, $1, $6),
-        cmc.bouyant
+        cmc.buoyant
     FROM metabib.browse_entry_def_map mbedm
     JOIN metabib.browse_entry mbe ON (mbe.id = mbedm.entry)
     JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
@@ -218,11 +221,11 @@ BEGIN
         ;
 
     -- sort order:
-    --  bouyant AND chosen class = match class
+    --  buoyant AND chosen class = match class
     --  chosen field = match field
     --  field weight
     --  rank
-    --  bouyancy
+    --  buoyancy
     --  value itself
 
 END;

commit dd21cdc01d2b3e6139714b2196c8c9d274c856a5
Author: Lebbeous Fogle-Weekley <lebbeous at esilibrary.com>
Date:   Wed Feb 29 14:30:59 2012 -0500

    AutoSuggest: suggest "josé" when the user types "jose"
    
    "josé" will serve as my example of a term with diacritics in it.  I
    don't mean that there is special handling just for the word "josé" per se.
    
    Before now, the user could type "josé" and see suggestions containing
    exactly that.  You could not type "jose" and expect to see "josé".  Now
    you can.
    
    In other words, typing "jose" and "josé" should now produce the exact same
    set of suggestions.  Only the version you actually typed will be
    *highlighted* in the suggestions, however.
    
    Signed-off-by: Lebbeous Fogle-Weekley <lebbeous at esilibrary.com>
    Signed-off-by: Dan Scott <dscott at laurentian.ca>

diff --git a/Open-ILS/src/perlmods/lib/OpenILS/WWW/AutoSuggest.pm b/Open-ILS/src/perlmods/lib/OpenILS/WWW/AutoSuggest.pm
index cf8644b..7ed9bf7 100644
--- a/Open-ILS/src/perlmods/lib/OpenILS/WWW/AutoSuggest.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/WWW/AutoSuggest.pm
@@ -49,18 +49,6 @@ my @_output_handler_types = sort {
 
 # END package globals
 
-# Given a string such as a user might type into a search box, prepare
-# it for to_tsquery(). See
-# http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
-sub prepare_for_tsquery {
-    my ($str) = shift;
-
-    $str =~ s/[^\w\s]/ /ig;
-    $str .= ":*" unless $str =~ /\s$/;
-
-    return join(" & ", grep(length, split(/\s+/, $str)));
-}
-
 # The third argument to our stored procedure, metabib.suggest_browse_entries(),
 # is passed through directly to ts_headline() as the 'options' arugment.
 sub prepare_headline_opts {
@@ -106,7 +94,7 @@ sub get_suggestions {
     return $editor->json_query({
         "from" => [
             "metabib.suggest_browse_entries",
-            prepare_for_tsquery($query),
+            $query,
             $search_class,
             $headline_opts,
             $org_unit,
diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index 3eba010..40e3af0 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -811,6 +811,7 @@ BEGIN
         value :=  ARRAY_TO_STRING(
             evergreen.regexp_split_to_array(value, E'\\W+'), ' '
         );
+        value := public.search_normalize(value);
     END IF;
 
     NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value);
diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
index 4cc72c0..04b9f56 100644
--- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql
+++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql
@@ -1297,9 +1297,50 @@ END;
 $func$ LANGUAGE PLPGSQL;
 
 
+-- Given a string such as a user might type into a search box, prepare
+-- two changed variants for TO_TSQUERY(). See
+-- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
+-- The first variant is normalized to match indexed documents regardless
+-- of diacritics.  The second variant keeps its diacritics for proper
+-- highlighting via TS_HEADLINE().
+CREATE OR REPLACE
+    FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
+$$
+DECLARE
+    orig_ended_in_space     BOOLEAN;
+    result                  RECORD;
+    plain                   TEXT;
+    normalized              TEXT;
+BEGIN
+    orig_ended_in_space := orig ~ E'\\s$';
+
+    orig := ARRAY_TO_STRING(
+        evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
+    );
+
+    normalized := public.search_normalize(orig); -- also trim()s
+    plain := trim(orig);
+
+    IF NOT orig_ended_in_space THEN
+        plain := plain || ':*';
+        normalized := normalized || ':*';
+    END IF;
+
+    plain := ARRAY_TO_STRING(
+        evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
+    );
+    normalized := ARRAY_TO_STRING(
+        evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
+    );
+
+    RETURN ARRAY[normalized, plain];
+END;
+$$ LANGUAGE PLPGSQL;
+
+
 CREATE OR REPLACE
     FUNCTION metabib.suggest_browse_entries(
-        query_text      TEXT,   -- 'foo' or 'foo & ba:*',ready for to_tsquery()
+        raw_query_text  TEXT,   -- actually typed by humans at the UI level
         search_class    TEXT,   -- 'alias' or 'class' or 'class|field..', etc
         headline_opts   TEXT,   -- markup options for ts_headline()
         visibility_org  INTEGER,-- null if you don't want opac visibility test
@@ -1316,12 +1357,17 @@ CREATE OR REPLACE
         match                   TEXT    -- marked up
     ) AS $func$
 DECLARE
+    prepared_query_texts    TEXT[];
     query                   TSQUERY;
+    plain_query             TSQUERY;
     opac_visibility_join    TEXT;
     search_class_join       TEXT;
     r_fields                RECORD;
 BEGIN
-    query := TO_TSQUERY('keyword', query_text);
+    prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
+
+    query := TO_TSQUERY('keyword', prepared_query_texts[1]);
+    plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
 
     IF visibility_org IS NOT NULL THEN
         opac_visibility_join := '
@@ -1374,7 +1420,7 @@ BEGIN
     ';
     END IF;
 
-    RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $1, $3) FROM (SELECT DISTINCT
+    RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $7, $3) FROM (SELECT DISTINCT
         mbe.value,
         cmf.id,
         cmc.bouyant AND _registered.field_class IS NOT NULL,
@@ -1394,7 +1440,7 @@ BEGIN
     '   -- sic, repeat the order by clause in the outer select too
     USING
         query, search_class, headline_opts,
-        visibility_org, query_limit, normalization
+        visibility_org, query_limit, normalization, plain_query
         ;
 
     -- sort order:
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.autosuggest.search-normalize.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.autosuggest.search-normalize.sql
new file mode 100644
index 0000000..14284b5
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.autosuggest.search-normalize.sql
@@ -0,0 +1,255 @@
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$
+DECLARE
+    normalizer      RECORD;
+    value           TEXT := '';
+BEGIN
+
+    value := NEW.value;
+
+    IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
+        FOR normalizer IN
+            SELECT  n.func AS func,
+                    n.param_count AS param_count,
+                    m.params AS params
+              FROM  config.index_normalizer n
+                    JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
+              WHERE field = NEW.field AND m.pos < 0
+              ORDER BY m.pos LOOP
+                EXECUTE 'SELECT ' || normalizer.func || '(' ||
+                    quote_literal( value ) ||
+                    CASE
+                        WHEN normalizer.param_count > 0
+                            THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+                            ELSE ''
+                        END ||
+                    ')' INTO value;
+
+        END LOOP;
+
+        NEW.value := value;
+    END IF;
+
+    IF NEW.index_vector = ''::tsvector THEN
+        RETURN NEW;
+    END IF;
+
+    IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
+        FOR normalizer IN
+            SELECT  n.func AS func,
+                    n.param_count AS param_count,
+                    m.params AS params
+              FROM  config.index_normalizer n
+                    JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
+              WHERE field = NEW.field AND m.pos >= 0
+              ORDER BY m.pos LOOP
+                EXECUTE 'SELECT ' || normalizer.func || '(' ||
+                    quote_literal( value ) ||
+                    CASE
+                        WHEN normalizer.param_count > 0
+                            THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+                            ELSE ''
+                        END ||
+                    ')' INTO value;
+
+        END LOOP;
+    END IF;
+
+    IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
+        value :=  ARRAY_TO_STRING(
+            evergreen.regexp_split_to_array(value, E'\\W+'), ' '
+        );
+        value := public.search_normalize(value);
+    END IF;
+
+    NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value);
+
+    RETURN NEW;
+END;
+$$ LANGUAGE PLPGSQL;
+
+-- Given a string such as a user might type into a search box, prepare
+-- two changed variants for TO_TSQUERY(). See
+-- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
+-- The first variant is normalized to match indexed documents regardless
+-- of diacritics.  The second variant keeps its diacritics for proper
+-- highlighting via TS_HEADLINE().
+CREATE OR REPLACE
+    FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
+$$
+DECLARE
+    orig_ended_in_space     BOOLEAN;
+    result                  RECORD;
+    plain                   TEXT;
+    normalized              TEXT;
+BEGIN
+    orig_ended_in_space := orig ~ E'\\s$';
+
+    orig := ARRAY_TO_STRING(
+        evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
+    );
+
+    normalized := public.search_normalize(orig); -- also trim()s
+    plain := trim(orig);
+
+    IF NOT orig_ended_in_space THEN
+        plain := plain || ':*';
+        normalized := normalized || ':*';
+    END IF;
+
+    plain := ARRAY_TO_STRING(
+        evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
+    );
+    normalized := ARRAY_TO_STRING(
+        evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
+    );
+
+    RETURN ARRAY[normalized, plain];
+END;
+$$ LANGUAGE PLPGSQL;
+
+
+CREATE OR REPLACE
+    FUNCTION metabib.suggest_browse_entries(
+        raw_query_text  TEXT,   -- actually typed by humans at the UI level
+        search_class    TEXT,   -- 'alias' or 'class' or 'class|field..', etc
+        headline_opts   TEXT,   -- markup options for ts_headline()
+        visibility_org  INTEGER,-- null if you don't want opac visibility test
+        query_limit     INTEGER,-- use in LIMIT clause of interal query
+        normalization   INTEGER -- argument to TS_RANK_CD()
+    ) RETURNS TABLE (
+        value                   TEXT,   -- plain
+        field                   INTEGER,
+        bouyant_and_class_match BOOL,
+        field_match             BOOL,
+        field_weight            INTEGER,
+        rank                    REAL,
+        bouyant                 BOOL,
+        match                   TEXT    -- marked up
+    ) AS $func$
+DECLARE
+    prepared_query_texts    TEXT[];
+    query                   TSQUERY;
+    plain_query             TSQUERY;
+    opac_visibility_join    TEXT;
+    search_class_join       TEXT;
+    r_fields                RECORD;
+BEGIN
+    prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
+
+    query := TO_TSQUERY('keyword', prepared_query_texts[1]);
+    plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
+
+    IF visibility_org IS NOT NULL THEN
+        opac_visibility_join := '
+    JOIN asset.opac_visible_copies aovc ON (
+        aovc.record = mbedm.source AND
+        aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
+    )';
+    ELSE
+        opac_visibility_join := '';
+    END IF;
+
+    -- The following determines whether we only provide suggestsons matching
+    -- the user's selected search_class, or whether we show other suggestions
+    -- too. The reason for MIN() is that for search_classes like
+    -- 'title|proper|uniform' you would otherwise get multiple rows.  The
+    -- implication is that if title as a class doesn't have restrict,
+    -- nor does the proper field, but the uniform field does, you're going
+    -- to get 'false' for your overall evaluation of 'should we restrict?'
+    -- To invert that, change from MIN() to MAX().
+
+    SELECT
+        INTO r_fields
+            MIN(cmc.restrict::INT) AS restrict_class,
+            MIN(cmf.restrict::INT) AS restrict_field
+        FROM metabib.search_class_to_registered_components(search_class)
+            AS _registered (field_class TEXT, field INT)
+        JOIN
+            config.metabib_class cmc ON (cmc.name = _registered.field_class)
+        LEFT JOIN
+            config.metabib_field cmf ON (cmf.id = _registered.field);
+
+    -- evaluate 'should we restrict?'
+    IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
+        search_class_join := '
+    JOIN
+        metabib.search_class_to_registered_components($2)
+        AS _registered (field_class TEXT, field INT) ON (
+            (_registered.field IS NULL AND
+                _registered.field_class = cmf.field_class) OR
+            (_registered.field = cmf.id)
+        )
+    ';
+    ELSE
+        search_class_join := '
+    LEFT JOIN
+        metabib.search_class_to_registered_components($2)
+        AS _registered (field_class TEXT, field INT) ON (
+            _registered.field_class = cmc.name
+        )
+    ';
+    END IF;
+
+    RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $7, $3) FROM (SELECT DISTINCT
+        mbe.value,
+        cmf.id,
+        cmc.bouyant AND _registered.field_class IS NOT NULL,
+        _registered.field = cmf.id,
+        cmf.weight,
+        TS_RANK_CD(mbe.index_vector, $1, $6),
+        cmc.bouyant
+    FROM metabib.browse_entry_def_map mbedm
+    JOIN metabib.browse_entry mbe ON (mbe.id = mbedm.entry)
+    JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
+    JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
+    '  || search_class_join || opac_visibility_join ||
+    ' WHERE $1 @@ mbe.index_vector
+    ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
+    LIMIT $5) x
+    ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
+    '   -- sic, repeat the order by clause in the outer select too
+    USING
+        query, search_class, headline_opts,
+        visibility_org, query_limit, normalization, plain_query
+        ;
+
+    -- sort order:
+    --  bouyant AND chosen class = match class
+    --  chosen field = match field
+    --  field weight
+    --  rank
+    --  bouyancy
+    --  value itself
+
+END;
+$func$ LANGUAGE PLPGSQL;
+
+
+\qecho 
+\qecho The following takes about a minute per 100,000 rows in
+\qecho metabib.browse_entry on my development system, which is only a VM with
+\qecho 4 GB of memory and 2 cores.
+\qecho 
+\qecho The following is a very loose estimate of how long the next UPDATE
+\qecho statement would take to finish on MY machine, based on YOUR number
+\qecho of rows in metabib.browse_entry:
+\qecho 
+
+SELECT (COUNT(id) / 100000.0) * INTERVAL '1 minute'
+    AS "approximate duration of following UPDATE statement"
+    FROM metabib.browse_entry;
+
+UPDATE metabib.browse_entry SET index_vector = TO_TSVECTOR(
+    'keyword',
+    public.search_normalize(
+        ARRAY_TO_STRING(
+            evergreen.regexp_split_to_array(value, E'\\W+'), ' '
+        )
+    )
+);
+
+COMMIT;

-----------------------------------------------------------------------

Summary of changes:
 Open-ILS/examples/fm_IDL.xml                       |    2 +-
 .../src/perlmods/lib/OpenILS/WWW/AutoSuggest.pm    |   14 +-
 Open-ILS/src/sql/Pg/002.schema.config.sql          |    5 +-
 Open-ILS/src/sql/Pg/030.schema.metabib.sql         |   66 ++++-
 .../0679.schema.autosuggest.search-normalize.sql   |  263 ++++++++++++++++++++
 5 files changed, 324 insertions(+), 26 deletions(-)
 create mode 100644 Open-ILS/src/sql/Pg/upgrade/0679.schema.autosuggest.search-normalize.sql


hooks/post-receive
-- 
Evergreen ILS


More information about the open-ils-commits mailing list