[open-ils-commits] [GIT] Evergreen ILS branch master updated. 70c1da0e63abb278b9c642ca6230b4ecd2279dc7

Evergreen Git git at git.evergreen-ils.org
Tue Feb 23 23:50:51 EST 2016


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Evergreen ILS".

The branch, master has been updated
       via  70c1da0e63abb278b9c642ca6230b4ecd2279dc7 (commit)
       via  9cf7350c02df1f4a2c213942edb62d89b468a568 (commit)
       via  24a7665db9713837148ce1ccd5480e46b63e3d4a (commit)
       via  01ffba4e5e8ed9abb640f288a997f72804b77ebb (commit)
      from  49590f0f0600be28ab28c6f55487c9992993a398 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 70c1da0e63abb278b9c642ca6230b4ecd2279dc7
Author: Ben Shum <ben at evergreener.net>
Date:   Tue Feb 23 23:50:11 2016 -0500

    LP#1505286: stamping upgrade script for limit facets retrieved
    
    Signed-off-by: Ben Shum <ben at evergreener.net>

diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index 1559923..a4f3851 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps
     BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
     FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
 
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0957', :eg_version); --berick/kmlussier
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0958', :eg_version); -- gmcharlt/bshum
 
 CREATE TABLE config.bib_source (
 	id		SERIAL	PRIMARY KEY,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql b/Open-ILS/src/sql/Pg/upgrade/0958.schema.limit_facets.sql
similarity index 97%
rename from Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql
rename to Open-ILS/src/sql/Pg/upgrade/0958.schema.limit_facets.sql
index 3140634..a68b448 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/0958.schema.limit_facets.sql
@@ -1,6 +1,6 @@
 BEGIN;
 
---- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+SELECT evergreen.upgrade_deps_block_check('0958', :eg_version);
 
 CREATE OR REPLACE FUNCTION search.facets_for_record_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
     SELECT id, value, count FROM (

commit 9cf7350c02df1f4a2c213942edb62d89b468a568
Author: Galen Charlton <gmc at esilibrary.com>
Date:   Fri Oct 23 17:18:19 2015 +0000

    LP#1505286: add release notes
    
    Signed-off-by: Galen Charlton <gmc at esilibrary.com>
    Signed-off-by: Ben Shum <ben at evergreener.net>

diff --git a/docs/RELEASE_NOTES_NEXT/OPAC/Limit_number_of_facets_retrieved.txt b/docs/RELEASE_NOTES_NEXT/OPAC/Limit_number_of_facets_retrieved.txt
new file mode 100644
index 0000000..bd0c7ea
--- /dev/null
+++ b/docs/RELEASE_NOTES_NEXT/OPAC/Limit_number_of_facets_retrieved.txt
@@ -0,0 +1,14 @@
+Limit number of facets retrieved during search
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Catalog search now sets a limit on the number of facets retrieved
+per defined facet field. Setting a limit is useful so that
+`open-ils.cstore backends don't end up needlessly consuming
+memory when fetching facets for a large result set; if a broad
+search retrieves over 10,000 author facets (say), even the most
+persistant user is not going to actually look at all of them. Fetching
+fewer facets can also slightly speed up generation of search results.
+
+The limit is controlled by a new global flag, `search.max_facets_per_field`,
+whose label is "Search: maximum number of facet values to retrieve for
+each facet field".  The default limit value is 1,000, but lower values
+(e.g., 100) are perhaps even better for most catalogs.

commit 24a7665db9713837148ce1ccd5480e46b63e3d4a
Author: Galen Charlton <gmc at esilibrary.com>
Date:   Fri Oct 23 17:04:17 2015 +0000

    LP#1505286: add pgTAP tests
    
    Signed-off-by: Galen Charlton <gmc at esilibrary.com>
    Signed-off-by: Ben Shum <ben at evergreener.net>

diff --git a/Open-ILS/src/sql/Pg/t/search_limit_facet_fetch.pg b/Open-ILS/src/sql/Pg/t/search_limit_facet_fetch.pg
new file mode 100644
index 0000000..86023ce
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/t/search_limit_facet_fetch.pg
@@ -0,0 +1,85 @@
+BEGIN;
+
+SELECT plan(4);
+
+INSERT INTO biblio.record_entry (id, last_xact_id, marc)
+VALUES (999999998, 'pgtap', '<record    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"    xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"    xmlns="http://www.loc.gov/MARC21/slim">
+  <leader>00531nam a2200157 a 4500</leader>
+  <controlfield tag="005">20080729170300.0</controlfield>
+  <controlfield tag="008">      t19981999enka              0 eng  </controlfield>
+  <datafield tag="245" ind1="1" ind2="4">
+    <subfield code="a">test-value</subfield>
+  </datafield>
+  <datafield tag="650" ind1=" " ind2=" ">
+    <subfield code="a">subject 1</subfield>
+  </datafield>
+  <datafield tag="650" ind1=" " ind2=" ">
+    <subfield code="a">subject 2</subfield>
+  </datafield>
+  <datafield tag="650" ind1=" " ind2=" ">
+    <subfield code="a">subject 3</subfield>
+  </datafield>
+  <datafield tag="650" ind1=" " ind2=" ">
+    <subfield code="a">subject 4</subfield>
+  </datafield>
+  <datafield tag="650" ind1=" " ind2=" ">
+    <subfield code="a">subject 5</subfield>
+  </datafield>
+  <datafield tag="650" ind1=" " ind2=" ">
+    <subfield code="a">subject 6</subfield>
+  </datafield>
+  <datafield tag="650" ind1=" " ind2=" ">
+    <subfield code="a">subject 7</subfield>
+  </datafield>
+  <datafield tag="650" ind1=" " ind2=" ">
+    <subfield code="a">subject 8</subfield>
+  </datafield>
+  <datafield tag="650" ind1=" " ind2=" ">
+    <subfield code="a">subject 9</subfield>
+  </datafield>
+  <datafield tag="650" ind1=" " ind2=" ">
+    <subfield code="a">subject 10</subfield>
+  </datafield>
+</record>');
+
+-- These tests assume that one subject facet field is defined
+
+SELECT is(
+    (
+        SELECT COUNT(*) FROM search.facets_for_record_set('{}', '{999999998}')
+    ),
+    10::BIGINT,
+    '10 subject facets retrieved'
+);
+
+UPDATE config.global_flag SET value = '5' WHERE name = 'search.max_facets_per_field';
+SELECT is(
+    (
+        SELECT COUNT(*) FROM search.facets_for_record_set('{}', '{999999998}')
+    ),
+    5::BIGINT,
+    '5 subject facets retrieved after setting limit'
+);
+
+SELECT is(
+    (
+        SELECT COUNT(*) FROM (
+            SELECT search.facets_for_metarecord_set('{}', array_accum(metarecord))
+            FROM metabib.metarecord_source_map
+            WHERE source = 999999998
+        ) x
+    ),
+    5::BIGINT,
+    'Works for metarecord sets too'
+);
+
+SELECT is(
+    (
+        SELECT COUNT(*) FROM search.facets_for_record_set('{subject}', '{999999998}')
+    ),
+    0::BIGINT,
+    'Zero subject facets retrieved after excluding subject facets'
+);
+
+SELECT * FROM finish();
+ROLLBACK;

commit 01ffba4e5e8ed9abb640f288a997f72804b77ebb
Author: Galen Charlton <gmc at esilibrary.com>
Date:   Fri Oct 23 16:29:38 2015 +0000

    LP#1505286: limit number of facets retrieved
    
    This patch teaches search how to limit the number of facets retrieved
    per defined facet field. Setting a limit is useful so that
    open-ils.cstore backends don't end up needlessly consuming
    memory when fetching facets for a large result set; if a broad
    search retrieves over 10,000 author facets (say), even the most
    persistant user is not going to actually look at all of them. Fetching
    fewer facets can also slightly speed up generation of search
    results.
    
    The limit is controlled by a new global flag, search.max_facets_per_field,
    whose label is "Search: maximum number of facet values to retrieve for
    each facet field".  The default limit value is 1,000, but lower values
    (e.g., 100) are perhaps even better for most catalogs.
    
    To test:
    
    [1] Upon applying the patch, set the value of the
        search.max_facets_per_field global flag to a small
        value.
    [2] Perform some searches and verify that the number
        of facets retrieved doesn't exceed the limit; note
        that the limit is per facet *field*, not overall
        or per field class.
    
    Signed-off-by: Galen Charlton <gmc at esilibrary.com>
    Signed-off-by: Ben Shum <ben at evergreener.net>

diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm
index 777ef7e..1e6a485 100644
--- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm
@@ -1565,43 +1565,15 @@ sub cache_facets {
 
     return undef unless (@$results);
 
-    # The query we're constructing
-    #
-    # select  mfae.field as id,
-    #         mfae.value,
-    #         count(distinct mmrsm.appropriate-id-field )
-    #   from  metabib.facet_entry mfae
-    #         join metabib.metarecord_sourc_map mmrsm on (mfae.source = mmrsm.source)
-    #   where mmrsm.appropriate-id-field in IDLIST
-    #   group by 1,2;
-
-    my $count_field = $metabib ? 'metarecord' : 'source';
+    my $facets_function = $metabib ? 'search.facets_for_metarecord_set'
+                                   : 'search.facets_for_record_set';
+    my $results_str = '{' . join(',', @$results) . '}';
+    my $ignore_str = ref($ignore) ? '{' . join(',', @$ignore) . '}'
+                                  : '{}';
     my $query = {   
-        select  => {
-            mfae => [ { column => 'field', alias => 'id'}, 'value' ],
-            mmrsm => [{
-                transform => 'count',
-                distinct => 1,
-                column => $count_field,
-                alias => 'count',
-                aggregate => 1
-            }]
-        },
-        from    => {
-            mfae => {
-                mmrsm => { field => 'source', fkey => 'source' },
-                cmf   => { field => 'id', fkey => 'field' }
-            }
-        },
-        where   => {
-            '+mmrsm' => { $count_field => $results },
-            '+cmf'   => { facet_field => 't' }
-        }
+        from => [ $facets_function, $ignore_str, $results_str ]
     };
 
-    $query->{where}->{'+cmf'}->{field_class} = {'not in' => $ignore}
-        if ref($ignore) and @$ignore > 0;
-
     my $facets = OpenILS::Utils::CStoreEditor->new->json_query($query, {substream => 1});
 
     for my $facet (@$facets) {
diff --git a/Open-ILS/src/sql/Pg/300.schema.staged_search.sql b/Open-ILS/src/sql/Pg/300.schema.staged_search.sql
index 9fa639f..e3f96c0 100644
--- a/Open-ILS/src/sql/Pg/300.schema.staged_search.sql
+++ b/Open-ILS/src/sql/Pg/300.schema.staged_search.sql
@@ -429,5 +429,42 @@ BEGIN
 END;
 $func$ LANGUAGE PLPGSQL;
 
- 
+CREATE OR REPLACE FUNCTION search.facets_for_record_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
+    SELECT id, value, count FROM (
+        SELECT mfae.field AS id,
+               mfae.value,
+               COUNT(DISTINCT mmrsm.source),
+               row_number() OVER (
+                PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.source) DESC
+               ) AS rownum
+        FROM metabib.facet_entry mfae
+        JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source)
+        JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
+        WHERE mmrsm.source IN (SELECT * FROM unnest(hits))
+        AND cmf.facet_field
+        AND cmf.field_class NOT IN (SELECT * FROM unnest(ignore_facet_classes))
+        GROUP by 1, 2
+    ) all_facets
+    WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000));
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION search.facets_for_metarecord_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
+    SELECT id, value, count FROM (
+        SELECT mfae.field AS id,
+               mfae.value,
+               COUNT(DISTINCT mmrsm.metarecord),
+               row_number() OVER (
+                PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.metarecord) DESC
+               ) AS rownum
+        FROM metabib.facet_entry mfae
+        JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source)
+        JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
+        WHERE mmrsm.metarecord IN (SELECT * FROM unnest(hits))
+        AND cmf.facet_field
+        AND cmf.field_class NOT IN (SELECT * FROM unnest(ignore_facet_classes))
+        GROUP by 1, 2
+    ) all_facets
+    WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000));
+$$ LANGUAGE SQL;
+
 COMMIT;
diff --git a/Open-ILS/src/sql/Pg/950.data.seed-values.sql b/Open-ILS/src/sql/Pg/950.data.seed-values.sql
index 2505f50..5b8fcce 100644
--- a/Open-ILS/src/sql/Pg/950.data.seed-values.sql
+++ b/Open-ILS/src/sql/Pg/950.data.seed-values.sql
@@ -14683,6 +14683,19 @@ INSERT INTO config.global_flag (name, label, value, enabled) VALUES (
     TRUE
 );
 
+INSERT INTO config.global_flag (name, value, label, enabled)
+    VALUES (
+        'search.max_facets_per_field',
+        '1000',
+        oils_i18n_gettext(
+            'search.max_facets_per_field',
+            'Search: maximum number of facet values to retrieve for each facet field',
+            'cgf', 
+            'label'
+        ),
+        TRUE
+    );
+
 INSERT INTO config.org_unit_setting_type
     (name, grp, label, description, datatype)
     VALUES
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql
new file mode 100644
index 0000000..3140634
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql
@@ -0,0 +1,56 @@
+BEGIN;
+
+--- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+CREATE OR REPLACE FUNCTION search.facets_for_record_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
+    SELECT id, value, count FROM (
+        SELECT mfae.field AS id,
+               mfae.value,
+               COUNT(DISTINCT mmrsm.source),
+               row_number() OVER (
+                PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.source) DESC
+               ) AS rownum
+        FROM metabib.facet_entry mfae
+        JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source)
+        JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
+        WHERE mmrsm.source IN (SELECT * FROM unnest(hits))
+        AND cmf.facet_field
+        AND cmf.field_class NOT IN (SELECT * FROM unnest(ignore_facet_classes))
+        GROUP by 1, 2
+    ) all_facets
+    WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000));
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION search.facets_for_metarecord_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
+    SELECT id, value, count FROM (
+        SELECT mfae.field AS id,
+               mfae.value,
+               COUNT(DISTINCT mmrsm.metarecord),
+               row_number() OVER (
+                PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.metarecord) DESC
+               ) AS rownum
+        FROM metabib.facet_entry mfae
+        JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source)
+        JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
+        WHERE mmrsm.metarecord IN (SELECT * FROM unnest(hits))
+        AND cmf.facet_field
+        AND cmf.field_class NOT IN (SELECT * FROM unnest(ignore_facet_classes))
+        GROUP by 1, 2
+    ) all_facets
+    WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000));
+$$ LANGUAGE SQL;
+
+INSERT INTO config.global_flag (name, value, label, enabled)
+    VALUES (
+        'search.max_facets_per_field',
+        '1000',
+        oils_i18n_gettext(
+            'search.max_facets_per_field',
+            'Search: maximum number of facet values to retrieve for each facet field',
+            'cgf',
+            'label'
+        ),
+        TRUE
+    );
+
+COMMIT;

-----------------------------------------------------------------------

Summary of changes:
 .../lib/OpenILS/Application/Search/Biblio.pm       |   40 ++--------
 Open-ILS/src/sql/Pg/002.schema.config.sql          |    2 +-
 Open-ILS/src/sql/Pg/300.schema.staged_search.sql   |   39 +++++++++-
 Open-ILS/src/sql/Pg/950.data.seed-values.sql       |   13 +++
 Open-ILS/src/sql/Pg/t/search_limit_facet_fetch.pg  |   85 ++++++++++++++++++++
 .../sql/Pg/upgrade/0958.schema.limit_facets.sql    |   56 +++++++++++++
 .../OPAC/Limit_number_of_facets_retrieved.txt      |   14 +++
 7 files changed, 213 insertions(+), 36 deletions(-)
 create mode 100644 Open-ILS/src/sql/Pg/t/search_limit_facet_fetch.pg
 create mode 100644 Open-ILS/src/sql/Pg/upgrade/0958.schema.limit_facets.sql
 create mode 100644 docs/RELEASE_NOTES_NEXT/OPAC/Limit_number_of_facets_retrieved.txt


hooks/post-receive
-- 
Evergreen ILS


More information about the open-ils-commits mailing list