[open-ils-commits] r10319 - in trunk/Open-ILS: examples src/perlmods/OpenILS/Application/Search src/perlmods/OpenILS/Application/Storage/Publisher

svn at svn.open-ils.org svn at svn.open-ils.org
Sat Aug 9 23:49:23 EDT 2008


Author: miker
Date: 2008-08-09 23:49:21 -0400 (Sat, 09 Aug 2008)
New Revision: 10319

Modified:
   trunk/Open-ILS/examples/opensrf.xml.example
   trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm
   trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/metabib.pm
Log:
make staged search result calcuation more configurable

Modified: trunk/Open-ILS/examples/opensrf.xml.example
===================================================================
--- trunk/Open-ILS/examples/opensrf.xml.example	2008-08-08 21:22:39 UTC (rev 10318)
+++ trunk/Open-ILS/examples/opensrf.xml.example	2008-08-10 03:49:21 UTC (rev 10319)
@@ -324,10 +324,32 @@
                 </unix_config>
                 <app_settings>
                     <marc_html_xsl>oilsMARC21slim2HTML.xsl</marc_html_xsl>
+
                     <!-- Default to using staged search -->
                     <use_staged_search>true</use_staged_search>
-                    <superpage_size>500</superpage_size>
-                    <max_superpages>20</max_superpages>
+
+                    <!--
+                        For staged search, we estimate hits based on inclusion or exclusion.
+
+                        Valid settings:
+                            inclusion - visible ratio on superpage
+                            exclusion - excluded ratio on superpage
+                            delete_adjusted_inclusion - included ratio on superpage, ratio adjusted by deleted count
+                            delete_adjusted_exclusion - excluded ratio on superpage, ratio adjusted by deleted count
+
+                        Under normal circumstances, inclusion is the best strategy, and both delete_adjusted variants
+                        will return the same value +/- 1.  The exclusion strategy is the original, and works well
+                        when there are few deleted or excluded records, in other words, when the superpage is not
+                        sparsely populated with visible records.
+                    -->
+                    <estimation_strategy>inclusion</estimation_strategy>
+
+                    <!-- Baseline number of records to check for hit estimation. -->
+                    <superpage_size>1000</superpage_size>
+
+                    <!-- How many superpages to consider for searching overall. -->
+                    <max_superpages>10</max_superpages>
+
                     <!-- zip code database file -->
                     <!--<zips_file>/openils/var/data/zips.txt</zips_file>-->
                 </app_settings>

Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm	2008-08-08 21:22:39 UTC (rev 10318)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm	2008-08-10 03:49:21 UTC (rev 10319)
@@ -780,6 +780,13 @@
     # restrict total tested to superpage size * number of superpages
     $search_hash->{core_limit} = $superpage_size * $max_superpages;
 
+    # Set the configured estimation strategy, defaults to 'inclusion'.
+	$search_hash->{estimation_strategy} = OpenSRF::Utils::SettingsClient
+        ->new
+        ->config_value(
+            apps => 'open-ils.search', app_settings => 'estimation_strategy'
+        );
+
     # pull any existing results from the cache
     my $key = search_cache_key($method, $search_hash);
     my $cache_data = $cache->get_cache($key) || {};

Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/metabib.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/metabib.pm	2008-08-08 21:22:39 UTC (rev 10318)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/metabib.pm	2008-08-10 03:49:21 UTC (rev 10319)
@@ -2369,6 +2369,9 @@
 
     }
 
+    # inclusion, exclusion, delete_adjusted_inclusion, delete_adjusted_exclusion
+    my $estimation_strategy = $args{estimation_strategy} || 'inclusion';
+
 	my $ou = $args{org_unit};
 	my $limit = $args{limit} || 10;
 	my $offset = $args{offset} || 0;
@@ -2566,11 +2569,10 @@
 
     my $estimate = $visible;
     if ( $total > $checked && $checked ) {
-        my $deleted_ratio = $deleted / $checked;
-        my $exclution_ratio = $excluded / $checked;
-        my $delete_adjusted_total = $total - ( $total * $deleted_ratio );
 
-        $estimate = $$summary_row{estimated_hit_count} = int($delete_adjusted_total - ( $delete_adjusted_total * $exclution_ratio ));
+        $$summary_row{hit_estimate} = FTS_paging_estimate($self, $client, $checked, $visible, $excluded, $deleted, $total);
+        $estimate = $$summary_row{estimated_hit_count} = $$summary_row{hit_estimate}{$estimation_strategy};
+
     }
 
     delete $$summary_row{id};
@@ -2622,7 +2624,79 @@
 	cachable	=> 1,
 );
 
+sub FTS_paging_estimate {
+	my $self = shift;
+	my $client = shift;
 
+    my $checked = shift;
+    my $visible = shift;
+    my $excluded = shift;
+    my $deleted = shift;
+    my $total = shift;
+
+    my $deleted_ratio = $deleted / $checked;
+    my $delete_adjusted_total = $total - ( $total * $deleted_ratio );
+
+    my $exclusion_ratio = $excluded / $checked;
+    my $delete_adjusted_exclusion_ratio = $excluded / ($checked - $deleted);
+
+    my $inclusion_ratio = $visible / $checked;
+    my $delete_adjusted_inclusion_ratio = $visible / ($checked - $deleted);
+
+    return {
+        exclusion                   => int($delete_adjusted_total - ( $delete_adjusted_total * $exclusion_ratio )),
+        inclusion                   => int($delete_adjusted_total * $inclusion_ratio),
+        delete_adjusted_exclusion   => int($delete_adjusted_total - ( $delete_adjusted_total * $delete_adjusted_exclusion_ratio )),
+        delete_adjusted_inclusion   => int($delete_adjusted_total * $delete_adjusted_inclusion_ratio)
+    };
+}
+__PACKAGE__->register_method(
+	api_name	=> "open-ils.storage.fts_paging_estimate",
+	method		=> 'staged_fts',
+    argc        => 5,
+    strict      => 1,
+	api_level	=> 1,
+    signature   => {
+        'return'=> q#
+            Hash of estimation values based on four variant estimation strategies:
+                exclusion -- Estimate based on the ratio of excluded records on the current superpage;
+                inclusion -- Estimate based on the ratio of visible records on the current superpage;
+                delete_adjusted_exclusion -- Same as exclusion strategy, but the ratio is adjusted by deleted count;
+                delete_adjusted_inclusion -- Same as inclusion strategy, but the ratio is adjusted by deleted count;
+        #,
+        desc    => q#
+            Helper method used to determin the approximate number of
+            hits for a search that spans multiple superpages.  For
+            sparse superpages, the inclusion estimate will likely be the
+            best estimate.  The exclusion strategy is the original, but
+            inclusion is the default.
+        #,
+        params  => [
+            {   name    => 'checked',
+                desc    => 'Number of records check -- nominally the size of a superpage, or a remaining amount from the last superpage.',
+                type    => 'number'
+            },
+            {   name    => 'visible',
+                desc    => 'Number of records visible to the search location on the current superpage.',
+                type    => 'number'
+            },
+            {   name    => 'excluded',
+                desc    => 'Number of records excluded from the search location on the current superpage.',
+                type    => 'number'
+            },
+            {   name    => 'deleted',
+                desc    => 'Number of deleted records on the current superpage.',
+                type    => 'number'
+            },
+            {   name    => 'total',
+                desc    => 'Total number of records up to check_limit (superpage_size * max_superpages).',
+                type    => 'number'
+            }
+        ]
+    }
+);
+
+
 sub xref_count {
 	my $self = shift;
 	my $client = shift;



More information about the open-ils-commits mailing list