[open-ils-commits] r10319 - in trunk/Open-ILS: examples
src/perlmods/OpenILS/Application/Search
src/perlmods/OpenILS/Application/Storage/Publisher
svn at svn.open-ils.org
svn at svn.open-ils.org
Sat Aug 9 23:49:23 EDT 2008
Author: miker
Date: 2008-08-09 23:49:21 -0400 (Sat, 09 Aug 2008)
New Revision: 10319
Modified:
trunk/Open-ILS/examples/opensrf.xml.example
trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm
trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/metabib.pm
Log:
make staged search result calcuation more configurable
Modified: trunk/Open-ILS/examples/opensrf.xml.example
===================================================================
--- trunk/Open-ILS/examples/opensrf.xml.example 2008-08-08 21:22:39 UTC (rev 10318)
+++ trunk/Open-ILS/examples/opensrf.xml.example 2008-08-10 03:49:21 UTC (rev 10319)
@@ -324,10 +324,32 @@
</unix_config>
<app_settings>
<marc_html_xsl>oilsMARC21slim2HTML.xsl</marc_html_xsl>
+
<!-- Default to using staged search -->
<use_staged_search>true</use_staged_search>
- <superpage_size>500</superpage_size>
- <max_superpages>20</max_superpages>
+
+ <!--
+ For staged search, we estimate hits based on inclusion or exclusion.
+
+ Valid settings:
+ inclusion - visible ratio on superpage
+ exclusion - excluded ratio on superpage
+ delete_adjusted_inclusion - included ratio on superpage, ratio adjusted by deleted count
+ delete_adjusted_exclusion - excluded ratio on superpage, ratio adjusted by deleted count
+
+ Under normal circumstances, inclusion is the best strategy, and both delete_adjusted variants
+ will return the same value +/- 1. The exclusion strategy is the original, and works well
+ when there are few deleted or excluded records, in other words, when the superpage is not
+ sparsely populated with visible records.
+ -->
+ <estimation_strategy>inclusion</estimation_strategy>
+
+ <!-- Baseline number of records to check for hit estimation. -->
+ <superpage_size>1000</superpage_size>
+
+ <!-- How many superpages to consider for searching overall. -->
+ <max_superpages>10</max_superpages>
+
<!-- zip code database file -->
<!--<zips_file>/openils/var/data/zips.txt</zips_file>-->
</app_settings>
Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm 2008-08-08 21:22:39 UTC (rev 10318)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm 2008-08-10 03:49:21 UTC (rev 10319)
@@ -780,6 +780,13 @@
# restrict total tested to superpage size * number of superpages
$search_hash->{core_limit} = $superpage_size * $max_superpages;
+ # Set the configured estimation strategy, defaults to 'inclusion'.
+ $search_hash->{estimation_strategy} = OpenSRF::Utils::SettingsClient
+ ->new
+ ->config_value(
+ apps => 'open-ils.search', app_settings => 'estimation_strategy'
+ );
+
# pull any existing results from the cache
my $key = search_cache_key($method, $search_hash);
my $cache_data = $cache->get_cache($key) || {};
Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/metabib.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/metabib.pm 2008-08-08 21:22:39 UTC (rev 10318)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/metabib.pm 2008-08-10 03:49:21 UTC (rev 10319)
@@ -2369,6 +2369,9 @@
}
+ # inclusion, exclusion, delete_adjusted_inclusion, delete_adjusted_exclusion
+ my $estimation_strategy = $args{estimation_strategy} || 'inclusion';
+
my $ou = $args{org_unit};
my $limit = $args{limit} || 10;
my $offset = $args{offset} || 0;
@@ -2566,11 +2569,10 @@
my $estimate = $visible;
if ( $total > $checked && $checked ) {
- my $deleted_ratio = $deleted / $checked;
- my $exclution_ratio = $excluded / $checked;
- my $delete_adjusted_total = $total - ( $total * $deleted_ratio );
- $estimate = $$summary_row{estimated_hit_count} = int($delete_adjusted_total - ( $delete_adjusted_total * $exclution_ratio ));
+ $$summary_row{hit_estimate} = FTS_paging_estimate($self, $client, $checked, $visible, $excluded, $deleted, $total);
+ $estimate = $$summary_row{estimated_hit_count} = $$summary_row{hit_estimate}{$estimation_strategy};
+
}
delete $$summary_row{id};
@@ -2622,7 +2624,79 @@
cachable => 1,
);
+sub FTS_paging_estimate {
+ my $self = shift;
+ my $client = shift;
+ my $checked = shift;
+ my $visible = shift;
+ my $excluded = shift;
+ my $deleted = shift;
+ my $total = shift;
+
+ my $deleted_ratio = $deleted / $checked;
+ my $delete_adjusted_total = $total - ( $total * $deleted_ratio );
+
+ my $exclusion_ratio = $excluded / $checked;
+ my $delete_adjusted_exclusion_ratio = $excluded / ($checked - $deleted);
+
+ my $inclusion_ratio = $visible / $checked;
+ my $delete_adjusted_inclusion_ratio = $visible / ($checked - $deleted);
+
+ return {
+ exclusion => int($delete_adjusted_total - ( $delete_adjusted_total * $exclusion_ratio )),
+ inclusion => int($delete_adjusted_total * $inclusion_ratio),
+ delete_adjusted_exclusion => int($delete_adjusted_total - ( $delete_adjusted_total * $delete_adjusted_exclusion_ratio )),
+ delete_adjusted_inclusion => int($delete_adjusted_total * $delete_adjusted_inclusion_ratio)
+ };
+}
+__PACKAGE__->register_method(
+ api_name => "open-ils.storage.fts_paging_estimate",
+ method => 'staged_fts',
+ argc => 5,
+ strict => 1,
+ api_level => 1,
+ signature => {
+ 'return'=> q#
+ Hash of estimation values based on four variant estimation strategies:
+ exclusion -- Estimate based on the ratio of excluded records on the current superpage;
+ inclusion -- Estimate based on the ratio of visible records on the current superpage;
+ delete_adjusted_exclusion -- Same as exclusion strategy, but the ratio is adjusted by deleted count;
+ delete_adjusted_inclusion -- Same as inclusion strategy, but the ratio is adjusted by deleted count;
+ #,
+ desc => q#
+ Helper method used to determin the approximate number of
+ hits for a search that spans multiple superpages. For
+ sparse superpages, the inclusion estimate will likely be the
+ best estimate. The exclusion strategy is the original, but
+ inclusion is the default.
+ #,
+ params => [
+ { name => 'checked',
+ desc => 'Number of records check -- nominally the size of a superpage, or a remaining amount from the last superpage.',
+ type => 'number'
+ },
+ { name => 'visible',
+ desc => 'Number of records visible to the search location on the current superpage.',
+ type => 'number'
+ },
+ { name => 'excluded',
+ desc => 'Number of records excluded from the search location on the current superpage.',
+ type => 'number'
+ },
+ { name => 'deleted',
+ desc => 'Number of deleted records on the current superpage.',
+ type => 'number'
+ },
+ { name => 'total',
+ desc => 'Total number of records up to check_limit (superpage_size * max_superpages).',
+ type => 'number'
+ }
+ ]
+ }
+);
+
+
sub xref_count {
my $self = shift;
my $client = shift;
More information about the open-ils-commits
mailing list