[open-ils-commits] SPAM: r9095 - in branches/rel_1_2/Open-ILS/src: perlmods/OpenILS/Application/Storage/Publisher perlmods/OpenILS/WWW sql/Pg

svn at svn.open-ils.org svn at svn.open-ils.org
Wed Mar 19 23:34:47 EDT 2008


Author: miker
Date: 2008-03-19 23:00:23 -0400 (Wed, 19 Mar 2008)
New Revision: 9095

Added:
   branches/rel_1_2/Open-ILS/src/sql/Pg/300.schema.staged_search.sql
Modified:
   branches/rel_1_2/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/metabib.pm
   branches/rel_1_2/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm
   branches/rel_1_2/Open-ILS/src/sql/Pg/build-db.sh
Log:
backporting staged search

Modified: branches/rel_1_2/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/metabib.pm
===================================================================
--- branches/rel_1_2/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/metabib.pm	2008-03-20 02:50:12 UTC (rev 9094)
+++ branches/rel_1_2/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/metabib.pm	2008-03-20 03:00:23 UTC (rev 9095)
@@ -6,6 +6,7 @@
 use OpenILS::Utils::Fieldmapper;
 use OpenSRF::Utils::Logger qw/:level/;
 use OpenSRF::Utils::Cache;
+use OpenSRF::Utils::JSON;
 use Data::Dumper;
 use Digest::MD5 qw/md5_hex/;
 
@@ -1912,10 +1913,6 @@
 		$ou = actor::org_unit->search( { parent_ou => undef } )->next->id;
 	}
 
-	if (!defined($args{org_unit})) {
-		die "No target organizational unit passed to ".$self->api_name;
-	}
-
 	if (! scalar( keys %{$args{searches}} )) {
 		die "No search arguments were passed to ".$self->api_name;
 	}
@@ -2306,6 +2303,228 @@
 	cachable	=> 1,
 );
 
+# XXX factored most of the PG dependant stuff out of here... need to find a way to do "dependants".
+sub staged_fts {
+	my $self = shift;
+	my $client = shift;
+	my %args = @_;
+	
+	my $ou = $args{org_unit};
+	my $limit = $args{limit} || 10;
+	my $offset = $args{offset} || 0;
+
+	if (!$ou) {
+		$ou = actor::org_unit->search( { parent_ou => undef } )->next->id;
+	}
+
+	if (! scalar( keys %{$args{searches}} )) {
+		die "No search arguments were passed to ".$self->api_name;
+	}
+
+	my (@statuses, at types, at forms, at lang, at aud, at lit_form, at vformats);
+
+	if ($args{available}) {
+		@statuses = (0,7);
+	}
+
+	if (my $s = $args{statuses}) {
+		$s = [$s] if (!ref($s));
+		@statuses = @$s;
+	}
+
+	if (my $a = $args{audience}) {
+		$a = [$a] if (!ref($a));
+		@aud = @$a;
+	}
+
+	if (my $l = $args{language}) {
+		$l = [$l] if (!ref($l));
+		@lang = @$l;
+	}
+
+	if (my $f = $args{lit_form}) {
+		$f = [$f] if (!ref($f));
+		@lit_form = @$f;
+	}
+
+	if (my $f = $args{item_form}) {
+		$f = [$f] if (!ref($f));
+		@forms = @$f;
+	}
+
+	if (my $t = $args{item_type}) {
+		$t = [$t] if (!ref($t));
+		@types = @$t;
+	}
+
+	if (my $v = $args{vr_format}) {
+		$v = [$v] if (!ref($v));
+		@vformats = @$v;
+	}
+
+	# XXX legacy format and item type support
+	if ($args{format}) {
+		my ($t, $f) = split '-', $args{format};
+		@types = split '', $t;
+		@forms = split '', $f;
+	}
+
+    my %stored_proc_search_args;
+	for my $search_group (sort keys %{$args{searches}}) {
+		(my $search_group_name = $search_group) =~ s/\|/_/gso;
+		my ($search_class,$search_field) = split /\|/, $search_group;
+		$log->debug("Searching class [$search_class] and field [$search_field]",DEBUG);
+
+		if ($search_field) {
+			unless ( config::metabib_field->search( field_class => $search_class, name => $search_field )->next ) {
+				$log->warn("Requested class [$search_class] or field [$search_field] does not exist!");
+				return undef;
+			}
+		}
+
+		my $class = $_cdbi->{$search_class};
+		my $search_table = $class->table;
+
+		my ($index_col) = $class->columns('FTS');
+		$index_col ||= 'value';
+
+		
+		my $fts = OpenILS::Application::Storage::FTS->compile(
+            $search_class => $args{searches}{$search_group}{term},
+            $search_group_name.'.value',
+            "$search_group_name.$index_col"
+        );
+		$fts->sql_where_clause; # this builds the ranks for us
+
+		my @fts_ranks = $fts->fts_rank;
+		my @fts_queries = $fts->fts_query;
+		my @phrases = map { lc($_) } $fts->phrases;
+		my @words = map { lc($_) } $fts->words;
+
+        $stored_proc_search_args{$search_group} = {
+            fts_rank    => \@fts_ranks,
+            fts_query   => \@fts_queries,
+            phrase      => \@phrases,
+            word        => \@words,
+        };
+
+	}
+
+	my $param_search_ou = $ou;
+	my $param_depth = $args{depth}; $param_depth = 'NULL' unless (defined($param_depth) and length($param_depth) > 0 );
+    my $param_searches = OpenSRF::Utils::JSON->perl2JSON( \%stored_proc_search_args ); $param_searches =~ s/\$//go; $param_searches = '$$'.$param_searches.'$$';
+    my $param_statuses = '$${' . join(',', map { s/\$//go; $_ } @statuses) . '}$$';
+    my $param_audience = '$${' . join(',', map { s/\$//go; $_ } @aud) . '}$$';
+    my $param_language = '$${' . join(',', map { s/\$//go; $_ } @lang) . '}$$';
+    my $param_lit_form = '$${' . join(',', map { s/\$//go; $_ } @lit_form) . '}$$';
+    my $param_types = '$${' . join(',', map { s/\$//go; $_ } @types) . '}$$';
+    my $param_forms = '$${' . join(',', map { s/\$//go; $_ } @forms) . '}$$';
+    my $param_vformats = '$${' . join(',', map { s/\$//go; $_ } @vformats) . '}$$';
+	my $param_pref_lang = $args{preferred_language}; $param_pref_lang =~ s/\$//go; $param_pref_lang = '$$'.$param_pref_lang.'$$';
+	my $param_pref_lang_multiplier = $args{preferred_language_weight}; $param_pref_lang_multiplier ||= 'NULL';
+	my $param_sort = $args{'sort'}; $param_sort =~ s/\$//go; $param_sort = '$$'.$param_sort.'$$';
+	my $param_sort_desc = defined($args{sort_dir}) && $args{sort_dir} =~ /^d/io ? "'t'" : "'f'";
+	my $metarecord = $self->api_name =~ /metabib/o ? "'t'" : "'f'";
+	my $staff = $self->api_name =~ /staff/o ? "'t'" : "'f'";
+    my $param_rel_limit = $args{core_limit}; $param_rel_limit ||= 'NULL';
+    my $param_chk_limit = $args{check_limit}; $param_chk_limit ||= 'NULL';
+    my $param_skip_chk = $args{skip_check}; $param_skip_chk ||= 'NULL';
+
+	my $sth = metabib::metarecord_source_map->db_Main->prepare(<<"    SQL");
+        SELECT  *
+          FROM  search.staged_fts(
+                    $param_search_ou,
+                    $param_depth,
+                    $param_searches,
+                    $param_statuses,
+                    $param_audience,
+                    $param_language,
+                    $param_lit_form,
+                    $param_types,
+                    $param_forms,
+                    $param_vformats,
+                    $param_pref_lang,
+                    $param_pref_lang_multiplier,
+                    $param_sort,
+                    $param_sort_desc,
+                    $metarecord,
+                    $staff,
+                    $param_rel_limit,
+                    $param_chk_limit,
+                    $param_skip_chk
+                );
+    SQL
+
+    $sth->execute;
+
+    my $recs = $sth->fetchall_arrayref({});
+    my $summary_row = pop @$recs;
+
+    my $total = $$summary_row{total};
+    my $checked = $$summary_row{checked};
+    my $visible = $$summary_row{visible};
+    my $deleted = $$summary_row{deleted};
+    my $excluded = $$summary_row{excluded};
+
+    my $estimate = $visible;
+    if ( $total > $checked && $checked ) {
+        my $deleted_ratio = $deleted / $checked;
+        my $exclution_ratio = $excluded / $checked;
+        my $delete_adjusted_total = $total - ( $total * $deleted_ratio );
+
+        $estimate = $$summary_row{estimated_hit_count} = int($delete_adjusted_total - ( $delete_adjusted_total * $exclution_ratio ));
+    }
+
+    delete $$summary_row{id};
+    delete $$summary_row{rel};
+    delete $$summary_row{record};
+
+    $client->respond( $summary_row );
+
+	$log->debug("Search yielded ".scalar(@$recs)." checked, visible results with an approximate visible total of $estimate.",DEBUG);
+
+	for my $rec (@$recs[$offset .. $offset + $limit - 1]) {
+        delete $$rec{checked};
+        delete $$rec{visible};
+        delete $$rec{excluded};
+        delete $$rec{deleted};
+        delete $$rec{total};
+        $$rec{rel} = sprintf('%0.3f',$$rec{rel});
+
+		$client->respond( $rec );
+	}
+	return undef;
+}
+__PACKAGE__->register_method(
+	api_name	=> "open-ils.storage.biblio.multiclass.staged.search_fts",
+	method		=> 'staged_fts',
+	api_level	=> 1,
+	stream		=> 1,
+	cachable	=> 1,
+);
+__PACKAGE__->register_method(
+	api_name	=> "open-ils.storage.biblio.multiclass.staged.search_fts.staff",
+	method		=> 'staged_fts',
+	api_level	=> 1,
+	stream		=> 1,
+	cachable	=> 1,
+);
+__PACKAGE__->register_method(
+	api_name	=> "open-ils.storage.metabib.multiclass.staged.search_fts",
+	method		=> 'staged_fts',
+	api_level	=> 1,
+	stream		=> 1,
+	cachable	=> 1,
+);
+__PACKAGE__->register_method(
+	api_name	=> "open-ils.storage.metabib.multiclass.staged.search_fts.staff",
+	method		=> 'staged_fts',
+	api_level	=> 1,
+	stream		=> 1,
+	cachable	=> 1,
+);
+
+
 sub xref_count {
 	my $self = shift;
 	my $client = shift;

Modified: branches/rel_1_2/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm
===================================================================
--- branches/rel_1_2/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm	2008-03-20 02:50:12 UTC (rev 9094)
+++ branches/rel_1_2/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm	2008-03-20 03:00:23 UTC (rev 9095)
@@ -912,9 +912,13 @@
 	$lang = '' if ($lang eq '*');
 
 	$sort = $cgi->param('searchSort') if $cgi->param('searchSort');
+	$sort ||= '';
 	$sortdir = $cgi->param('searchSortDir') if $cgi->param('searchSortDir');
-	$terms .= " " . $cgi->param('searchTerms') if $cgi->param('searchTerms');
+	$sortdir ||= '';
 
+	$terms .= " " if ($terms && $cgi->param('searchTerms'));
+	$terms .= $cgi->param('searchTerms') if $cgi->param('searchTerms');
+
 	$class = $cgi->param('searchClass') if $cgi->param('searchClass');
 	$class ||= '-';
 
@@ -931,11 +935,16 @@
 	my $sut = $cgi->param('su');
 	my $set = $cgi->param('se');
 
-	$terms .= " keyword: $kwt" if ($kwt);
-	$terms .= " title: $tit" if ($tit);
-	$terms .= " author: $aut" if ($aut);
-	$terms .= " subject: $sut" if ($sut);
-	$terms .= " series: $set" if ($set);
+	$terms .= " " if ($terms && $kwt);
+	$terms .= "keyword: $kwt" if ($kwt);
+	$terms .= " " if ($terms && $tit);
+	$terms .= "title: $tit" if ($tit);
+	$terms .= " " if ($terms && $aut);
+	$terms .= "author: $aut" if ($aut);
+	$terms .= " " if ($terms && $sut);
+	$terms .= "subject: $sut" if ($sut);
+	$terms .= " " if ($terms && $set);
+	$terms .= "series: $set" if ($set);
 
 	if ($version eq '1.0') {
 		$type = 'rss2';
@@ -944,13 +953,6 @@
 	}
 	my $flesh_feed = ($type =~ /-full$/o) ? 1 : 0;
 
-	$terms = decode_utf8($terms);
-	$terms =~ s/\+/ /go;
-	$terms =~ s/'//go;
-	$terms =~ s/^\s+//go;
-	my $term_copy = $terms;
-
-	my $complex_terms = 0;
 	if ($terms eq 'help') {
 		print $cgi->header(-type => 'text/html');
 		print <<"		HTML";
@@ -966,93 +968,56 @@
 		return Apache2::Const::OK;
 	}
 
-	my $cache_key = '';
-	my $searches = {};
-	while ($term_copy =~ s/((?:keyword(?:\|\w+)?|title(?:\|\w+)?|author(?:\|\w+)?|subject(?:\|\w+)?|series(?:\|\w+)?|site|dir|sort|lang):[^:]+)$//so) {
-		my ($c,$t) = split ':' => $1;
-		if ($c eq 'site') {
-			$org = $t;
-			$org =~ s/^\s*//o;
-			$org =~ s/\s*$//o;
-		} elsif ($c eq 'sort') {
-			($sort = lc($t)) =~ s/^\s*(\w+)\s*$/$1/go;
-		} elsif ($c eq 'dir') {
-			($sortdir = lc($t)) =~ s/^\s*(\w+)\s*$/$1/go;
-		} elsif ($c eq 'lang') {
-			($lang = lc($t)) =~ s/^\s*(\w+)\s*$/$1/go;
-		} else {
-			$$searches{$c}{term} .= ' '.$t;
-			$cache_key .= $c . $t;
-			$complex_terms = 1;
-		}
-	}
-
+	$terms = decode_utf8($terms);
 	$lang = 'eng' if ($lang eq 'en-US');
 
-	if ($term_copy) {
-		no warnings;
-		$class = 'keyword' if ($class eq '-');
-		$$searches{$class}{term} .= " $term_copy";
-		$cache_key .= $class . $term_copy;
-	}
+	$log->debug("OpenSearch terms: $terms");
 
 	my $org_unit;
 	if ($org eq '-') {
 	 	$org_unit = $actor->request(
 			'open-ils.actor.org_unit_list.search' => parent_ou => undef
 		)->gather(1);
-	} else {
+	} elsif ($org !~ /^\d+$/o) {
 	 	$org_unit = $actor->request(
 			'open-ils.actor.org_unit_list.search' => shortname => uc($org)
 		)->gather(1);
+	} else {
+	 	$org_unit = $actor->request(
+			'open-ils.actor.org_unit_list.search' => id => $org
+		)->gather(1);
 	}
 
-	{ no warnings; $cache_key .= $org.$sort.$sortdir.$lang; }
-
-	my $rs_name = $cgi->cookie('os_session');
-	my $cached_res = OpenSRF::Utils::Cache->new->get_cache( "os_session:$rs_name" ) if ($rs_name);
-
-	my $recs;
-	if (!($recs = $$cached_res{os_results}{$cache_key})) {
-		$rs_name = $cgi->remote_host . '::' . rand(time);
-		$recs = $search->request(
-			'open-ils.search.biblio.multiclass' => {
-				searches	=> $searches,
+    my $recs = $search->request(
+        'open-ils.search.biblio.multiclass.query' => {
 				org_unit	=> $org_unit->[0]->id,
-				offset		=> 0,
-				limit		=> 5000,
-				($sort ?    ( 'sort'     => $sort    ) : ()),
-				($sortdir ? ( 'sort_dir' => $sortdir ) : ($sort ? (sort_dir => 'asc') : (sort_dir => 'desc') )),
+			offset		=> $offset - 1,
+			limit		=> $limit,
+			sort		=> $sort,
+			sort_dir	=> $sortdir,
 				($lang ?    ( 'language' => $lang    ) : ()),
-			}
+		} => $terms => 1
 		)->gather(1);
-		try {
-			$$cached_res{os_results}{$cache_key} = $recs;
-			OpenSRF::Utils::Cache->new->put_cache( "os_session:$rs_name", $cached_res, 1800 );
-		} catch Error with {
-			warn "supercat unable to store IDs in memcache server\n";
-			$logger->error("supercat unable to store IDs in memcache server");
-		};
-	}
 
+	$log->debug("Hits for [$terms]: $recs->{count}");
+
 	my $feed = create_record_feed(
 		'record',
 		$type,
-		[ map { $_->[0] } @{$recs->{ids}}[$offset .. $offset + $limit - 1] ],
+		[ map { $_->[0] } @{$recs->{ids}} ],
 		$unapi,
 		$org,
 		$flesh_feed
 	);
+
+	$log->debug("Feed created...");
+
 	$feed->root($root);
 	$feed->lib($org);
 	$feed->search($terms);
 	$feed->class($class);
 
-	if ($complex_terms) {
 		$feed->title("Search results for [$terms] at ".$org_unit->[0]->name);
-	} else {
-		$feed->title("Search results for [$class => $terms] at ".$org_unit->[0]->name);
-	}
 
 	$feed->creator($host);
 	$feed->update_ts(gmtime_ISO8601());
@@ -1078,6 +1043,8 @@
 		$limit,
 	);
 
+	$log->debug("...basic feed data added...");
+
 	$feed->link(
 		next =>
 		$base . "/$version/$org/$type/$class?searchTerms=$terms&searchSort=$sort&searchSortDir=$sortdir&searchLang=$lang&startIndex=" . int($offset + $limit + 1) . "&count=" . $limit =>
@@ -1122,6 +1089,8 @@
 
 	$feed->link( 'unapi-server' => $unapi);
 
+	$log->debug("...feed links added...");
+
 #	$feed->link(
 #		opac =>
 #		$root . "../$lang/skin/default/xml/rresult.xml?rt=list&" .
@@ -1129,13 +1098,9 @@
 #		'text/html'
 #	);
 
-	print $cgi->header(
-		-type		=> $feed->type,
-		-charset	=> 'UTF-8',
-		-cookie		=> $cgi->cookie( -name => 'os_session', -value => $rs_name, -expires => '+30m' ),
-	);
+	print $cgi->header( -type => $feed->type, -charset => 'UTF-8') . entityize($feed->toString) . "\n";
 
-	print entityize($feed->toString) . "\n";
+	$log->debug("...and feed returned.");
 
 	return Apache2::Const::OK;
 }

Added: branches/rel_1_2/Open-ILS/src/sql/Pg/300.schema.staged_search.sql
===================================================================
--- branches/rel_1_2/Open-ILS/src/sql/Pg/300.schema.staged_search.sql	                        (rev 0)
+++ branches/rel_1_2/Open-ILS/src/sql/Pg/300.schema.staged_search.sql	2008-03-20 03:00:23 UTC (rev 9095)
@@ -0,0 +1,562 @@
+
+DROP SCHEMA search CASCADE;
+
+BEGIN;
+
+CREATE SCHEMA search;
+
+CREATE TABLE search.relevance_adjustment (
+    id          SERIAL  PRIMARY KEY,
+    active      BOOL    NOT NULL DEFAULT TRUE,
+    field       INT     NOT NULL REFERENCES config.metabib_field (id),
+    bump_type   TEXT    NOT NULL CHECK (bump_type IN ('word_order','first_word','full_match')),
+    multiplier  NUMERIC NOT NULL DEFAULT 1.0
+);
+CREATE UNIQUE INDEX bump_once_per_field_idx ON search.relevance_adjustment ( field, bump_type );
+
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(1, 'first_word', 1.5);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(1, 'full_match', 20);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(2, 'first_word', 1.5);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(2, 'word_order', 10);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(2, 'full_match', 20);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(3, 'first_word', 1.5);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(3, 'word_order', 10);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(3, 'full_match', 20);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(4, 'first_word', 1.5);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(4, 'word_order', 10);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(4, 'full_match', 20);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(5, 'first_word', 1.5);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(5, 'word_order', 10);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(5, 'full_match', 20);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(6, 'first_word', 1.5);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(7, 'first_word', 1.5);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(8, 'first_word', 1.5);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(9, 'first_word', 1.5);
+INSERT INTO search.relevance_adjustment (field, bump_type, multiplier) VALUES(14, 'word_order', 10);
+
+CREATE OR REPLACE FUNCTION search.pick_table (TEXT) RETURNS TEXT AS $$
+    SELECT  CASE
+                WHEN $1 = 'author'  THEN 'metabib.author_field_entry'
+                WHEN $1 = 'title'   THEN 'metabib.title_field_entry'
+                WHEN $1 = 'subject' THEN 'metabib.subject_field_entry'
+                WHEN $1 = 'keyword' THEN 'metabib.keyword_field_entry'
+                WHEN $1 = 'series'  THEN 'metabib.series_field_entry'
+            END;
+$$ LANGUAGE SQL;
+
+CREATE TYPE search.search_result AS ( id BIGINT, rel NUMERIC, record INT, total INT, checked INT, visible INT, deleted INT, excluded INT );
+CREATE TYPE search.search_args AS ( id INT, field_class TEXT, field_name TEXT, table_alias TEXT, term TEXT, term_type TEXT );
+
+CREATE OR REPLACE FUNCTION search.staged_fts (
+
+    param_search_ou INT,
+    param_depth     INT,
+    param_searches  TEXT, -- JSON hash, to be turned into a resultset via search.parse_search_args
+    param_statuses  INT[],
+    param_audience  TEXT[],
+    param_language  TEXT[],
+    param_lit_form  TEXT[],
+    param_types     TEXT[],
+    param_forms     TEXT[],
+    param_vformats  TEXT[],
+    param_pref_lang TEXT,
+    param_pref_lang_multiplier REAL,
+    param_sort      TEXT,
+    param_sort_desc BOOL,
+    metarecord      BOOL,
+    staff           BOOL,
+    param_rel_limit INT,
+    param_chk_limit INT,
+    param_skip_chk  INT
+ 
+) RETURNS SETOF search.search_result AS $func$
+DECLARE
+
+    current_res         search.search_result%ROWTYPE;
+    query_part          search.search_args%ROWTYPE;
+    phrase_query_part   search.search_args%ROWTYPE;
+    rank_adjust_id      INT;
+    core_rel_limit      INT;
+    core_chk_limit      INT;
+    core_skip_chk       INT;
+    rank_adjust         search.relevance_adjustment%ROWTYPE;
+    query_table         TEXT;
+    tmp_text            TEXT;
+    tmp_int             INT;
+    current_rank        TEXT;
+    ranks               TEXT[] := '{}';
+    query_table_alias   TEXT;
+    from_alias_array    TEXT[] := '{}';
+    used_ranks          TEXT[] := '{}';
+    mb_field            INT;
+    mb_field_list       INT[];
+    search_org_list     INT[];
+    select_clause       TEXT := 'SELECT';
+    from_clause         TEXT := ' FROM  metabib.metarecord_source_map m JOIN metabib.rec_descriptor mrd ON (m.source = mrd.record) ';
+    where_clause        TEXT := ' WHERE 1=1 ';
+    mrd_used            BOOL := FALSE;
+    sort_desc           BOOL := FALSE;
+
+    core_result         RECORD;
+    core_cursor         REFCURSOR;
+    core_rel_query      TEXT;
+    vis_limit_query     TEXT;
+    inner_where_clause  TEXT;
+
+    total_count         INT := 0;
+    check_count         INT := 0;
+    deleted_count       INT := 0;
+    visible_count       INT := 0;
+    excluded_count      INT := 0;
+
+BEGIN
+
+    core_rel_limit := COALESCE( param_rel_limit, 25000 );
+    core_chk_limit := COALESCE( param_chk_limit, 1000 );
+    core_skip_chk := COALESCE( param_skip_chk, 1 );
+
+    IF metarecord THEN
+        select_clause := select_clause || ' m.metarecord as id, array_accum(distinct m.source) as records,';
+    ELSE
+        select_clause := select_clause || ' m.source as id, array_accum(distinct m.source) as records,';
+    END IF;
+
+    -- first we need to construct the base query
+    FOR query_part IN SELECT * FROM search.parse_search_args(param_searches) WHERE term_type = 'fts_query' LOOP
+
+        inner_where_clause := 'index_vector @@ ' || query_part.term;
+
+        IF query_part.field_name IS NOT NULL THEN
+
+           SELECT  id INTO mb_field
+             FROM  config.metabib_field
+             WHERE field_class = query_part.field_class
+                   AND name = query_part.field_name;
+
+            IF FOUND THEN
+                inner_where_clause := inner_where_clause ||
+                    ' AND ' || 'field = ' || mb_field;
+            END IF;
+
+        END IF;
+
+        -- moving on to the rank ...
+        SELECT  * INTO query_part
+          FROM  search.parse_search_args(param_searches)
+          WHERE term_type = 'fts_rank'
+                AND table_alias = query_part.table_alias;
+
+        current_rank := query_part.term || ' * ' || query_part.table_alias || '_weight.weight';
+
+        IF query_part.field_name IS NOT NULL THEN
+
+           SELECT  array_accum(distinct id) INTO mb_field_list
+             FROM  config.metabib_field
+             WHERE field_class = query_part.field_class
+                   AND name = query_part.field_name;
+
+        ELSE
+
+           SELECT  array_accum(distinct id) INTO mb_field_list
+             FROM  config.metabib_field
+             WHERE field_class = query_part.field_class;
+
+        END IF;
+
+        FOR rank_adjust IN SELECT * FROM search.relevance_adjustment WHERE active AND field IN ( SELECT * FROM search.explode_array( mb_field_list ) ) LOOP
+
+            IF NOT rank_adjust.bump_type = ANY (used_ranks) THEN
+
+                IF rank_adjust.bump_type = 'first_word' THEN
+                    SELECT  term INTO tmp_text
+                      FROM  search.parse_search_args(param_searches)
+                      WHERE table_alias = query_part.table_alias AND term_type = 'word'
+                      ORDER BY id
+                      LIMIT 1;
+
+                    tmp_text := query_part.table_alias || '.value ILIKE ' || quote_literal( tmp_text || '%' );
+
+                ELSIF rank_adjust.bump_type = 'word_order' THEN
+                    SELECT  array_to_string( array_accum( term ), '%' ) INTO tmp_text
+                      FROM  search.parse_search_args(param_searches)
+                      WHERE table_alias = query_part.table_alias AND term_type = 'word';
+
+                    tmp_text := query_part.table_alias || '.value ILIKE ' || quote_literal( '%' || tmp_text || '%' );
+
+                ELSIF rank_adjust.bump_type = 'full_match' THEN
+                    SELECT  array_to_string( array_accum( term ), E'\\s+' ) INTO tmp_text
+                      FROM  search.parse_search_args(param_searches)
+                      WHERE table_alias = query_part.table_alias AND term_type = 'word';
+
+                    tmp_text := query_part.table_alias || '.value  ~ ' || quote_literal( '^' || tmp_text || E'\\W*$' );
+
+                END IF;
+
+
+                current_rank := current_rank || ' * ( CASE WHEN ' || tmp_text ||
+                    ' THEN ' || rank_adjust.multiplier || '::REAL ELSE 1.0 END )';
+
+                used_ranks := array_append( used_ranks, rank_adjust.bump_type );
+
+            END IF;
+
+        END LOOP;
+
+        ranks := array_append( ranks, current_rank );
+        used_ranks := '{}';
+
+        FOR phrase_query_part IN
+            SELECT  * 
+              FROM  search.parse_search_args(param_searches)
+              WHERE term_type = 'phrase'
+                    AND table_alias = query_part.table_alias LOOP
+
+            inner_where_clause := inner_where_clause || ' AND ' || 'value  ~* ' || quote_literal( E'(^|\\W+)' || regexp_replace(phrase_query_part.term, E'\\s+',E'\\\\s+','g') || E'(\\W+|\$)' );
+
+        END LOOP;
+
+        query_table := search.pick_table(query_part.field_class);
+
+        from_clause := from_clause ||
+            ' JOIN ( SELECT * FROM ' || query_table || ' WHERE ' || inner_where_clause ||
+                    CASE WHEN core_rel_limit > 0 THEN ' LIMIT ' || core_rel_limit::TEXT ELSE '' END || ' ) AS ' || query_part.table_alias ||
+                ' ON ( m.source = ' || query_part.table_alias || '.source )' ||
+            ' JOIN config.metabib_field AS ' || query_part.table_alias || '_weight' ||
+                ' ON ( ' || query_part.table_alias || '.field = ' || query_part.table_alias || '_weight.id  AND  ' || query_part.table_alias || '_weight.search_field)';
+
+        from_alias_array := array_append(from_alias_array, query_part.table_alias);
+
+    END LOOP;
+
+    IF param_pref_lang IS NOT NULL AND param_pref_lang_multiplier IS NOT NULL THEN
+        current_rank := ' CASE WHEN mrd.item_lang = ' || quote_literal( param_pref_lang ) ||
+            ' THEN ' || param_pref_lang_multiplier || '::REAL ELSE 1.0 END ';
+
+        --ranks := array_append( ranks, current_rank );
+    END IF;
+
+    current_rank := ' AVG( ( (' || array_to_string( ranks, ') + (' ) || ') ) * ' || current_rank || ' ) ';
+    select_clause := select_clause || current_rank || ' AS rel,';
+
+    sort_desc = param_sort_desc;
+
+    IF param_sort = 'pubdate' THEN
+
+        tmp_text := '999999';
+        IF param_sort_desc THEN tmp_text := '0'; END IF;
+
+        current_rank := $$
+            ( COALESCE( FIRST ((
+                SELECT  SUBSTRING(frp.value FROM E'\\d{4}')
+                  FROM  metabib.full_rec frp
+                  WHERE frp.record = m.source
+                    AND frp.tag = '260'
+                    AND frp.subfield = 'c'
+                  LIMIT 1
+            )), $$ || quote_literal(tmp_text) || $$ )::INT )
+        $$;
+
+    ELSIF param_sort = 'title' THEN
+
+        tmp_text := 'zzzzzz';
+        IF param_sort_desc THEN tmp_text := '    '; END IF;
+
+        current_rank := $$
+            ( COALESCE( FIRST ((
+                SELECT  LTRIM(SUBSTR( frt.value, COALESCE(SUBSTRING(frt.ind2 FROM E'\\d+'),'0')::INT + 1 ))
+                  FROM  metabib.full_rec frt
+                  WHERE frt.record = m.source
+                    AND frt.tag = '245'
+                    AND frt.subfield = 'a'
+                  LIMIT 1
+            )),$$ || quote_literal(tmp_text) || $$))
+        $$;
+
+    ELSIF param_sort = 'author' THEN
+
+        tmp_text := 'zzzzzz';
+        IF param_sort_desc THEN tmp_text := '    '; END IF;
+
+        current_rank := $$
+            ( COALESCE( FIRST ((
+                SELECT  LTRIM(fra.value)
+                  FROM  metabib.full_rec fra
+                  WHERE fra.record = m.source
+                    AND fra.tag LIKE '1%'
+                    AND fra.subfield = 'a'
+                  ORDER BY fra.tag::text::int
+                  LIMIT 1
+            )),$$ || quote_literal(tmp_text) || $$))
+        $$;
+
+    ELSIF param_sort = 'create_date' THEN
+            current_rank := $$( FIRST (( SELECT create_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )$$;
+    ELSIF param_sort = 'edit_date' THEN
+            current_rank := $$( FIRST (( SELECT edit_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )$$;
+    ELSE
+        sort_desc := NOT COALESCE(param_sort_desc, FALSE);
+    END IF;
+
+    select_clause := select_clause || current_rank || ' AS rank';
+
+    -- now add the other qualifiers
+    IF param_audience IS NOT NULL AND array_upper(param_audience, 1) > 0 THEN
+        where_clause = where_clause || $$ AND mrd.audience IN ('$$ || array_to_string(param_audience, $$','$$) || $$') $$;
+    END IF;
+
+    IF param_language IS NOT NULL AND array_upper(param_language, 1) > 0 THEN
+        where_clause = where_clause || $$ AND mrd.item_lang IN ('$$ || array_to_string(param_language, $$','$$) || $$') $$;
+    END IF;
+
+    IF param_lit_form IS NOT NULL AND array_upper(param_lit_form, 1) > 0 THEN
+        where_clause = where_clause || $$ AND mrd.lit_form IN ('$$ || array_to_string(param_lit_form, $$','$$) || $$') $$;
+    END IF;
+
+    IF param_types IS NOT NULL AND array_upper(param_types, 1) > 0 THEN
+        where_clause = where_clause || $$ AND mrd.item_type IN ('$$ || array_to_string(param_types, $$','$$) || $$') $$;
+    END IF;
+
+    IF param_forms IS NOT NULL AND array_upper(param_forms, 1) > 0 THEN
+        where_clause = where_clause || $$ AND mrd.item_form IN ('$$ || array_to_string(param_forms, $$','$$) || $$') $$;
+    END IF;
+
+    IF param_vformats IS NOT NULL AND array_upper(param_vformats, 1) > 0 THEN
+        where_clause = where_clause || $$ AND mrd.vr_format IN ('$$ || array_to_string(param_vformats, $$','$$) || $$') $$;
+    END IF;
+
+    core_rel_query := select_clause || from_clause || where_clause ||
+                        ' GROUP BY 1 ORDER BY 4' || CASE WHEN sort_desc THEN ' DESC' ELSE ' ASC' END || ';';
+    --RAISE NOTICE 'Base Query:  %', core_rel_query;
+
+    IF param_depth IS NOT NULL THEN
+        SELECT array_accum(distinct id) INTO search_org_list FROM actor.org_unit_descendants( param_search_ou, param_depth );
+    ELSE
+        SELECT array_accum(distinct id) INTO search_org_list FROM actor.org_unit_descendants( param_search_ou );
+    END IF;
+
+    OPEN core_cursor FOR EXECUTE core_rel_query;
+
+    LOOP
+
+        FETCH core_cursor INTO core_result;
+        EXIT WHEN NOT FOUND;
+
+
+        IF total_count % 1000 = 0 THEN
+            -- RAISE NOTICE ' % total, % checked so far ... ', total_count, check_count;
+        END IF;
+
+        IF core_chk_limit > 0 AND total_count - core_skip_chk + 1 >= core_chk_limit THEN
+            total_count := total_count + 1;
+            CONTINUE;
+        END IF;
+
+        total_count := total_count + 1;
+
+        CONTINUE WHEN param_skip_chk IS NOT NULL and total_count < param_skip_chk;
+
+        check_count := check_count + 1;
+
+        PERFORM 1 FROM biblio.record_entry b WHERE NOT b.deleted AND b.id IN ( SELECT * FROM search.explode_array( core_result.records ) );
+        IF NOT FOUND THEN
+            -- RAISE NOTICE ' % were all deleted ... ', core_result.records;
+            deleted_count := deleted_count + 1;
+            CONTINUE;
+        END IF;
+
+        PERFORM 1
+          FROM  biblio.record_entry b
+                JOIN config.bib_source s ON (b.source = s.id)
+          WHERE s.transcendant
+                AND b.id IN ( SELECT * FROM search.explode_array( core_result.records ) );
+
+        IF FOUND THEN
+            -- RAISE NOTICE ' % were all transcendant ... ', core_result.records;
+            visible_count := visible_count + 1;
+
+            current_res.id = core_result.id;
+            current_res.rel = core_result.rel;
+
+            tmp_int := 1;
+            IF metarecord THEN
+                SELECT COUNT(DISTINCT s.source) INTO tmp_int FROM metabib.metarecord_source_map s WHERE s.metarecord = core_result.id;
+            END IF;
+
+            IF tmp_int = 1 THEN
+                current_res.record = core_result.records[1];
+            ELSE
+                current_res.record = NULL;
+            END IF;
+
+            RETURN NEXT current_res;
+
+            CONTINUE;
+        END IF;
+
+        IF param_statuses IS NOT NULL AND array_upper(param_statuses, 1) > 0 THEN
+
+            PERFORM 1
+              FROM  asset.call_number cn
+                    JOIN asset.copy cp ON (cp.call_number = cn.id)
+              WHERE NOT cn.deleted
+                    AND NOT cp.deleted
+                    AND cp.status IN ( SELECT * FROM search.explode_array( param_statuses ) )
+                    AND cn.record IN ( SELECT * FROM search.explode_array( core_result.records ) )
+                    AND cp.circ_lib IN ( SELECT * FROM search.explode_array( search_org_list ) )
+              LIMIT 1;
+
+            IF NOT FOUND THEN
+                -- RAISE NOTICE ' % were all status-excluded ... ', core_result.records;
+                excluded_count := excluded_count + 1;
+                CONTINUE;
+            END IF;
+
+        END IF;
+
+        IF staff IS NULL OR NOT staff THEN
+
+            PERFORM 1
+              FROM  asset.call_number cn
+                    JOIN asset.copy cp ON (cp.call_number = cn.id)
+                    JOIN actor.org_unit a ON (cp.circ_lib = a.id)
+                    JOIN asset.copy_location cl ON (cp.location = cl.id)
+                    JOIN config.copy_status cs ON (cp.status = cs.id)
+              WHERE NOT cn.deleted
+                    AND NOT cp.deleted
+                    AND cs.holdable
+                    AND cl.opac_visible
+                    AND cp.opac_visible
+                    AND a.opac_visible
+                    AND cp.circ_lib IN ( SELECT * FROM search.explode_array( search_org_list ) )
+                    AND cn.record IN ( SELECT * FROM search.explode_array( core_result.records ) )
+              LIMIT 1;
+
+            IF NOT FOUND THEN
+                -- RAISE NOTICE ' % were all visibility-excluded ... ', core_result.records;
+                excluded_count := excluded_count + 1;
+                CONTINUE;
+            END IF;
+
+        ELSE
+
+            PERFORM 1
+              FROM  asset.call_number cn
+                    JOIN asset.copy cp ON (cp.call_number = cn.id)
+                    JOIN actor.org_unit a ON (cp.circ_lib = a.id)
+                    JOIN asset.copy_location cl ON (cp.location = cl.id)
+                    JOIN config.copy_status cs ON (cp.status = cs.id)
+              WHERE NOT cn.deleted
+                    AND NOT cp.deleted
+                    AND cp.circ_lib IN ( SELECT * FROM search.explode_array( search_org_list ) )
+                    AND cn.record IN ( SELECT * FROM search.explode_array( core_result.records ) )
+              LIMIT 1;
+
+            IF NOT FOUND THEN
+
+                PERFORM 1
+                  FROM  asset.call_number cn
+                  WHERE cn.record IN ( SELECT * FROM search.explode_array( core_result.records ) )
+                  LIMIT 1;
+
+                IF FOUND THEN
+                    -- RAISE NOTICE ' % were all visibility-excluded ... ', core_result.records;
+                    excluded_count := excluded_count + 1;
+                    CONTINUE;
+                END IF;
+
+            END IF;
+
+        END IF;
+
+        visible_count := visible_count + 1;
+
+        current_res.id = core_result.id;
+        current_res.rel = core_result.rel;
+
+        tmp_int := 1;
+        IF metarecord THEN
+            SELECT COUNT(DISTINCT s.source) INTO tmp_int FROM metabib.metarecord_source_map s WHERE s.metarecord = core_result.id;
+        END IF;
+
+        IF tmp_int = 1 THEN
+            current_res.record = core_result.records[1];
+        ELSE
+            current_res.record = NULL;
+        END IF;
+
+        RETURN NEXT current_res;
+
+        IF visible_count % 1000 = 0 THEN
+            -- RAISE NOTICE ' % visible so far ... ', visible_count;
+        END IF;
+
+    END LOOP;
+
+    current_res.id = NULL;
+    current_res.rel = NULL;
+    current_res.record = NULL;
+    current_res.total = total_count;
+    current_res.checked = check_count;
+    current_res.deleted = deleted_count;
+    current_res.visible = visible_count;
+    current_res.excluded = excluded_count;
+
+    CLOSE core_cursor;
+
+    RETURN NEXT current_res;
+
+END;
+$func$ LANGUAGE PLPGSQL;
+
+/*
+    param_statuses  INT[],
+    param_audience  TEXT[], x
+    param_language  TEXT[], x
+    param_lit_form  TEXT[], x
+    param_types     TEXT[], x
+    param_forms     TEXT[], x
+    param_vformats  TEXT[], x
+*/
+
+CREATE OR REPLACE FUNCTION search.explode_array(anyarray) RETURNS SETOF anyelement AS $BODY$
+    SELECT ($1)[s] FROM generate_series(1, array_upper($1, 1)) AS s;
+$BODY$
+LANGUAGE 'sql' IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION search.parse_search_args (TEXT) RETURNS SETOF search.search_args AS $perlcode$
+    use JSON::XS;
+    my $json = shift;
+
+    my $args = decode_json( $json );
+
+    my $id = 1;
+
+    for my $k ( keys %$args ) {
+        (my $alias = $k) =~ s/\|/_/gso;
+        my ($class, $field) = split /\|/, $k;
+        my $part = $args->{$k};
+        for my $p ( keys %$part ) {
+            my $data = $part->{$p};
+            $data = [$data] if (!ref($data));
+            for my $datum ( @$data ) {
+                return_next(
+                    {   field_class => $class,
+                        field_name  => $field,
+                        term        => $datum,
+                        table_alias => $alias,
+                        term_type   => $p,
+                        id          => $id,
+                    }
+                );
+                $id++;
+            }
+        }
+    }
+
+    return undef;
+
+$perlcode$ LANGUAGE PLPERLU;
+
+
+COMMIT;
+

Modified: branches/rel_1_2/Open-ILS/src/sql/Pg/build-db.sh
===================================================================
--- branches/rel_1_2/Open-ILS/src/sql/Pg/build-db.sh	2008-03-20 02:50:12 UTC (rev 9094)
+++ branches/rel_1_2/Open-ILS/src/sql/Pg/build-db.sh	2008-03-20 03:00:23 UTC (rev 9095)
@@ -18,6 +18,8 @@
 PGPASSWORD=$5 PGUSER=$4 PGHOST=$1 PGPORT=$2 PGDATABASE=$3 psql -f 080.schema.money.sql
 PGPASSWORD=$5 PGUSER=$4 PGHOST=$1 PGPORT=$2 PGDATABASE=$3 psql -f 090.schema.action.sql
 
+PGPASSWORD=$5 PGUSER=$4 PGHOST=$1 PGPORT=$2 PGDATABASE=$3 psql -f 300.schema.staged_search.sql
+
 PGPASSWORD=$5 PGUSER=$4 PGHOST=$1 PGPORT=$2 PGDATABASE=$3 psql -f 500.view.cross-schema.sql
 
 PGPASSWORD=$5 PGUSER=$4 PGHOST=$1 PGPORT=$2 PGDATABASE=$3 psql -f 800.fkeys.sql



More information about the open-ils-commits mailing list