[open-ils-commits] [GIT] Evergreen ILS branch master updated. 70c1da0e63abb278b9c642ca6230b4ecd2279dc7
Evergreen Git
git at git.evergreen-ils.org
Tue Feb 23 23:50:51 EST 2016
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Evergreen ILS".
The branch, master has been updated
via 70c1da0e63abb278b9c642ca6230b4ecd2279dc7 (commit)
via 9cf7350c02df1f4a2c213942edb62d89b468a568 (commit)
via 24a7665db9713837148ce1ccd5480e46b63e3d4a (commit)
via 01ffba4e5e8ed9abb640f288a997f72804b77ebb (commit)
from 49590f0f0600be28ab28c6f55487c9992993a398 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 70c1da0e63abb278b9c642ca6230b4ecd2279dc7
Author: Ben Shum <ben at evergreener.net>
Date: Tue Feb 23 23:50:11 2016 -0500
LP#1505286: stamping upgrade script for limit facets retrieved
Signed-off-by: Ben Shum <ben at evergreener.net>
diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index 1559923..a4f3851 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps
BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0957', :eg_version); --berick/kmlussier
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0958', :eg_version); -- gmcharlt/bshum
CREATE TABLE config.bib_source (
id SERIAL PRIMARY KEY,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql b/Open-ILS/src/sql/Pg/upgrade/0958.schema.limit_facets.sql
similarity index 97%
rename from Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql
rename to Open-ILS/src/sql/Pg/upgrade/0958.schema.limit_facets.sql
index 3140634..a68b448 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/0958.schema.limit_facets.sql
@@ -1,6 +1,6 @@
BEGIN;
---- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+SELECT evergreen.upgrade_deps_block_check('0958', :eg_version);
CREATE OR REPLACE FUNCTION search.facets_for_record_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
SELECT id, value, count FROM (
commit 9cf7350c02df1f4a2c213942edb62d89b468a568
Author: Galen Charlton <gmc at esilibrary.com>
Date: Fri Oct 23 17:18:19 2015 +0000
LP#1505286: add release notes
Signed-off-by: Galen Charlton <gmc at esilibrary.com>
Signed-off-by: Ben Shum <ben at evergreener.net>
diff --git a/docs/RELEASE_NOTES_NEXT/OPAC/Limit_number_of_facets_retrieved.txt b/docs/RELEASE_NOTES_NEXT/OPAC/Limit_number_of_facets_retrieved.txt
new file mode 100644
index 0000000..bd0c7ea
--- /dev/null
+++ b/docs/RELEASE_NOTES_NEXT/OPAC/Limit_number_of_facets_retrieved.txt
@@ -0,0 +1,14 @@
+Limit number of facets retrieved during search
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Catalog search now sets a limit on the number of facets retrieved
+per defined facet field. Setting a limit is useful so that
+`open-ils.cstore backends don't end up needlessly consuming
+memory when fetching facets for a large result set; if a broad
+search retrieves over 10,000 author facets (say), even the most
+persistant user is not going to actually look at all of them. Fetching
+fewer facets can also slightly speed up generation of search results.
+
+The limit is controlled by a new global flag, `search.max_facets_per_field`,
+whose label is "Search: maximum number of facet values to retrieve for
+each facet field". The default limit value is 1,000, but lower values
+(e.g., 100) are perhaps even better for most catalogs.
commit 24a7665db9713837148ce1ccd5480e46b63e3d4a
Author: Galen Charlton <gmc at esilibrary.com>
Date: Fri Oct 23 17:04:17 2015 +0000
LP#1505286: add pgTAP tests
Signed-off-by: Galen Charlton <gmc at esilibrary.com>
Signed-off-by: Ben Shum <ben at evergreener.net>
diff --git a/Open-ILS/src/sql/Pg/t/search_limit_facet_fetch.pg b/Open-ILS/src/sql/Pg/t/search_limit_facet_fetch.pg
new file mode 100644
index 0000000..86023ce
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/t/search_limit_facet_fetch.pg
@@ -0,0 +1,85 @@
+BEGIN;
+
+SELECT plan(4);
+
+INSERT INTO biblio.record_entry (id, last_xact_id, marc)
+VALUES (999999998, 'pgtap', '<record xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd" xmlns="http://www.loc.gov/MARC21/slim">
+ <leader>00531nam a2200157 a 4500</leader>
+ <controlfield tag="005">20080729170300.0</controlfield>
+ <controlfield tag="008"> t19981999enka 0 eng </controlfield>
+ <datafield tag="245" ind1="1" ind2="4">
+ <subfield code="a">test-value</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2=" ">
+ <subfield code="a">subject 1</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2=" ">
+ <subfield code="a">subject 2</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2=" ">
+ <subfield code="a">subject 3</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2=" ">
+ <subfield code="a">subject 4</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2=" ">
+ <subfield code="a">subject 5</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2=" ">
+ <subfield code="a">subject 6</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2=" ">
+ <subfield code="a">subject 7</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2=" ">
+ <subfield code="a">subject 8</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2=" ">
+ <subfield code="a">subject 9</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2=" ">
+ <subfield code="a">subject 10</subfield>
+ </datafield>
+</record>');
+
+-- These tests assume that one subject facet field is defined
+
+SELECT is(
+ (
+ SELECT COUNT(*) FROM search.facets_for_record_set('{}', '{999999998}')
+ ),
+ 10::BIGINT,
+ '10 subject facets retrieved'
+);
+
+UPDATE config.global_flag SET value = '5' WHERE name = 'search.max_facets_per_field';
+SELECT is(
+ (
+ SELECT COUNT(*) FROM search.facets_for_record_set('{}', '{999999998}')
+ ),
+ 5::BIGINT,
+ '5 subject facets retrieved after setting limit'
+);
+
+SELECT is(
+ (
+ SELECT COUNT(*) FROM (
+ SELECT search.facets_for_metarecord_set('{}', array_accum(metarecord))
+ FROM metabib.metarecord_source_map
+ WHERE source = 999999998
+ ) x
+ ),
+ 5::BIGINT,
+ 'Works for metarecord sets too'
+);
+
+SELECT is(
+ (
+ SELECT COUNT(*) FROM search.facets_for_record_set('{subject}', '{999999998}')
+ ),
+ 0::BIGINT,
+ 'Zero subject facets retrieved after excluding subject facets'
+);
+
+SELECT * FROM finish();
+ROLLBACK;
commit 01ffba4e5e8ed9abb640f288a997f72804b77ebb
Author: Galen Charlton <gmc at esilibrary.com>
Date: Fri Oct 23 16:29:38 2015 +0000
LP#1505286: limit number of facets retrieved
This patch teaches search how to limit the number of facets retrieved
per defined facet field. Setting a limit is useful so that
open-ils.cstore backends don't end up needlessly consuming
memory when fetching facets for a large result set; if a broad
search retrieves over 10,000 author facets (say), even the most
persistant user is not going to actually look at all of them. Fetching
fewer facets can also slightly speed up generation of search
results.
The limit is controlled by a new global flag, search.max_facets_per_field,
whose label is "Search: maximum number of facet values to retrieve for
each facet field". The default limit value is 1,000, but lower values
(e.g., 100) are perhaps even better for most catalogs.
To test:
[1] Upon applying the patch, set the value of the
search.max_facets_per_field global flag to a small
value.
[2] Perform some searches and verify that the number
of facets retrieved doesn't exceed the limit; note
that the limit is per facet *field*, not overall
or per field class.
Signed-off-by: Galen Charlton <gmc at esilibrary.com>
Signed-off-by: Ben Shum <ben at evergreener.net>
diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm
index 777ef7e..1e6a485 100644
--- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm
@@ -1565,43 +1565,15 @@ sub cache_facets {
return undef unless (@$results);
- # The query we're constructing
- #
- # select mfae.field as id,
- # mfae.value,
- # count(distinct mmrsm.appropriate-id-field )
- # from metabib.facet_entry mfae
- # join metabib.metarecord_sourc_map mmrsm on (mfae.source = mmrsm.source)
- # where mmrsm.appropriate-id-field in IDLIST
- # group by 1,2;
-
- my $count_field = $metabib ? 'metarecord' : 'source';
+ my $facets_function = $metabib ? 'search.facets_for_metarecord_set'
+ : 'search.facets_for_record_set';
+ my $results_str = '{' . join(',', @$results) . '}';
+ my $ignore_str = ref($ignore) ? '{' . join(',', @$ignore) . '}'
+ : '{}';
my $query = {
- select => {
- mfae => [ { column => 'field', alias => 'id'}, 'value' ],
- mmrsm => [{
- transform => 'count',
- distinct => 1,
- column => $count_field,
- alias => 'count',
- aggregate => 1
- }]
- },
- from => {
- mfae => {
- mmrsm => { field => 'source', fkey => 'source' },
- cmf => { field => 'id', fkey => 'field' }
- }
- },
- where => {
- '+mmrsm' => { $count_field => $results },
- '+cmf' => { facet_field => 't' }
- }
+ from => [ $facets_function, $ignore_str, $results_str ]
};
- $query->{where}->{'+cmf'}->{field_class} = {'not in' => $ignore}
- if ref($ignore) and @$ignore > 0;
-
my $facets = OpenILS::Utils::CStoreEditor->new->json_query($query, {substream => 1});
for my $facet (@$facets) {
diff --git a/Open-ILS/src/sql/Pg/300.schema.staged_search.sql b/Open-ILS/src/sql/Pg/300.schema.staged_search.sql
index 9fa639f..e3f96c0 100644
--- a/Open-ILS/src/sql/Pg/300.schema.staged_search.sql
+++ b/Open-ILS/src/sql/Pg/300.schema.staged_search.sql
@@ -429,5 +429,42 @@ BEGIN
END;
$func$ LANGUAGE PLPGSQL;
-
+CREATE OR REPLACE FUNCTION search.facets_for_record_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
+ SELECT id, value, count FROM (
+ SELECT mfae.field AS id,
+ mfae.value,
+ COUNT(DISTINCT mmrsm.source),
+ row_number() OVER (
+ PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.source) DESC
+ ) AS rownum
+ FROM metabib.facet_entry mfae
+ JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source)
+ JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
+ WHERE mmrsm.source IN (SELECT * FROM unnest(hits))
+ AND cmf.facet_field
+ AND cmf.field_class NOT IN (SELECT * FROM unnest(ignore_facet_classes))
+ GROUP by 1, 2
+ ) all_facets
+ WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000));
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION search.facets_for_metarecord_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
+ SELECT id, value, count FROM (
+ SELECT mfae.field AS id,
+ mfae.value,
+ COUNT(DISTINCT mmrsm.metarecord),
+ row_number() OVER (
+ PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.metarecord) DESC
+ ) AS rownum
+ FROM metabib.facet_entry mfae
+ JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source)
+ JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
+ WHERE mmrsm.metarecord IN (SELECT * FROM unnest(hits))
+ AND cmf.facet_field
+ AND cmf.field_class NOT IN (SELECT * FROM unnest(ignore_facet_classes))
+ GROUP by 1, 2
+ ) all_facets
+ WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000));
+$$ LANGUAGE SQL;
+
COMMIT;
diff --git a/Open-ILS/src/sql/Pg/950.data.seed-values.sql b/Open-ILS/src/sql/Pg/950.data.seed-values.sql
index 2505f50..5b8fcce 100644
--- a/Open-ILS/src/sql/Pg/950.data.seed-values.sql
+++ b/Open-ILS/src/sql/Pg/950.data.seed-values.sql
@@ -14683,6 +14683,19 @@ INSERT INTO config.global_flag (name, label, value, enabled) VALUES (
TRUE
);
+INSERT INTO config.global_flag (name, value, label, enabled)
+ VALUES (
+ 'search.max_facets_per_field',
+ '1000',
+ oils_i18n_gettext(
+ 'search.max_facets_per_field',
+ 'Search: maximum number of facet values to retrieve for each facet field',
+ 'cgf',
+ 'label'
+ ),
+ TRUE
+ );
+
INSERT INTO config.org_unit_setting_type
(name, grp, label, description, datatype)
VALUES
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql
new file mode 100644
index 0000000..3140634
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql
@@ -0,0 +1,56 @@
+BEGIN;
+
+--- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+CREATE OR REPLACE FUNCTION search.facets_for_record_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
+ SELECT id, value, count FROM (
+ SELECT mfae.field AS id,
+ mfae.value,
+ COUNT(DISTINCT mmrsm.source),
+ row_number() OVER (
+ PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.source) DESC
+ ) AS rownum
+ FROM metabib.facet_entry mfae
+ JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source)
+ JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
+ WHERE mmrsm.source IN (SELECT * FROM unnest(hits))
+ AND cmf.facet_field
+ AND cmf.field_class NOT IN (SELECT * FROM unnest(ignore_facet_classes))
+ GROUP by 1, 2
+ ) all_facets
+ WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000));
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION search.facets_for_metarecord_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
+ SELECT id, value, count FROM (
+ SELECT mfae.field AS id,
+ mfae.value,
+ COUNT(DISTINCT mmrsm.metarecord),
+ row_number() OVER (
+ PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.metarecord) DESC
+ ) AS rownum
+ FROM metabib.facet_entry mfae
+ JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source)
+ JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
+ WHERE mmrsm.metarecord IN (SELECT * FROM unnest(hits))
+ AND cmf.facet_field
+ AND cmf.field_class NOT IN (SELECT * FROM unnest(ignore_facet_classes))
+ GROUP by 1, 2
+ ) all_facets
+ WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000));
+$$ LANGUAGE SQL;
+
+INSERT INTO config.global_flag (name, value, label, enabled)
+ VALUES (
+ 'search.max_facets_per_field',
+ '1000',
+ oils_i18n_gettext(
+ 'search.max_facets_per_field',
+ 'Search: maximum number of facet values to retrieve for each facet field',
+ 'cgf',
+ 'label'
+ ),
+ TRUE
+ );
+
+COMMIT;
-----------------------------------------------------------------------
Summary of changes:
.../lib/OpenILS/Application/Search/Biblio.pm | 40 ++--------
Open-ILS/src/sql/Pg/002.schema.config.sql | 2 +-
Open-ILS/src/sql/Pg/300.schema.staged_search.sql | 39 +++++++++-
Open-ILS/src/sql/Pg/950.data.seed-values.sql | 13 +++
Open-ILS/src/sql/Pg/t/search_limit_facet_fetch.pg | 85 ++++++++++++++++++++
.../sql/Pg/upgrade/0958.schema.limit_facets.sql | 56 +++++++++++++
.../OPAC/Limit_number_of_facets_retrieved.txt | 14 +++
7 files changed, 213 insertions(+), 36 deletions(-)
create mode 100644 Open-ILS/src/sql/Pg/t/search_limit_facet_fetch.pg
create mode 100644 Open-ILS/src/sql/Pg/upgrade/0958.schema.limit_facets.sql
create mode 100644 docs/RELEASE_NOTES_NEXT/OPAC/Limit_number_of_facets_retrieved.txt
hooks/post-receive
--
Evergreen ILS
More information about the open-ils-commits
mailing list