[open-ils-commits] r18826 - trunk/Open-ILS/src/sql/Pg (gmc)

svn at svn.open-ils.org svn at svn.open-ils.org
Mon Nov 22 12:18:02 EST 2010


Author: gmc
Date: 2010-11-22 12:17:59 -0500 (Mon, 22 Nov 2010)
New Revision: 18826

Added:
   trunk/Open-ILS/src/sql/Pg/reingest-1.6-2.0.pl
Modified:
   trunk/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql
Log:
bug #680096 - upgrade script to partially reingest bibs after upgrade to 2.0

This solves the problem of the new facets sidebar showing up empty in
OPAC search results.  Since the process of populating metabib.facet_entry
and updating metabib.*_field_entry can take a long time on large databases,
the update SQL is generated by a separate script, reingest-1.6-2.0.pl.  Usage
from an example run is:

./reingest-1.6-2.0.pl: generate SQL script to reingest bibs during an upgrade to Evergreen 2.0

By default, the script writes to the file reingest-1.6-2.0.sql.  To modify
this script's behavior, you can supply the following options:

--config /path/to/opensrf_core.xml  used to get connection information to
                                    the Evergreen database
--chunk_size n                      number of bibs to reingest in a chunk;
                                    specify if you don't want all of the
                                    bibs in the database to be reindexes
                                    in a single transaction
--output /path/to/output_file.sql   path of output SQL file

Writing output to file reingest-1.6-2.0.sql

SQL script complete.  To perform the reingest, please run the script using
the psql program, e.g.,

psql {connection parameters}  < reingest-1.6-2.0.sql

If you are running a large Evergreen installation, it is recommend that you
examine the script first; note that a reingest of a large Evergreen database
can take several hours.

Signed-off-by: Galen Charlton <gmc at esilibrary.com>

Modified: trunk/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql	2010-11-22 17:17:13 UTC (rev 18825)
+++ trunk/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql	2010-11-22 17:17:59 UTC (rev 18826)
@@ -19139,4 +19139,8 @@
 
 
 \qecho Upgrade script completed.
-
+\qecho But wait, there's more: please run reingest-1.6-2.0.pl
+\qecho in order to create an SQL script to run to partially reindex 
+\qecho the bib records; this is required to make the new facet
+\qecho sidebar in OPAC search results work and to upgrade the keyword 
+\qecho indexes to use the revised NACO normalization routine.

Added: trunk/Open-ILS/src/sql/Pg/reingest-1.6-2.0.pl
===================================================================
--- trunk/Open-ILS/src/sql/Pg/reingest-1.6-2.0.pl	                        (rev 0)
+++ trunk/Open-ILS/src/sql/Pg/reingest-1.6-2.0.pl	2010-11-22 17:17:59 UTC (rev 18826)
@@ -0,0 +1,134 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use Getopt::Long;
+use DBI;
+use OpenSRF::EX qw/:try/;
+use OpenSRF::Utils::Logger qw/$logger/;
+use OpenSRF::System;
+use OpenSRF::AppSession;
+use OpenSRF::Utils::SettingsClient;
+
+my ($config, $chunk_size, $output) = ('/openils/conf/opensrf_core.xml', 0, 'reingest-1.6-2.0.sql');
+
+GetOptions(
+        "config:s"      => \$config,
+        "chunk_size:i"  => \$chunk_size,
+        "output:s"      => \$output,
+);
+
+print qq{$0: generate SQL script to reingest bibs during an upgrade to Evergreen 2.0
+
+By default, the script writes to the file reingest-1.6-2.0.sql.  To modify
+this script's behavior, you can supply the following options:
+
+--config /path/to/opensrf_core.xml  used to get connection information to
+                                    the Evergreen database
+--chunk_size n                      number of bibs to reingest in a chunk;
+                                    specify if you don't want all of the 
+                                    bibs in the database to be reindexes
+                                    in a single transaction
+--output /path/to/output_file.sql   path of output SQL file
+
+};
+
+open OUT, ">$output" or die "$0: cannot open output file $output: $!\n";
+print "Writing output to file $output\n";
+my $num_bibs;
+$num_bibs = fetch_num_bibs_from_database($config) if $chunk_size;
+
+header();
+body($num_bibs, $chunk_size);
+footer();
+
+print qq{
+SQL script complete.  To perform the reingest, please run the script using
+the psql program, e.g.,
+
+psql {connection parameters}  < $output
+
+If you are running a large Evergreen installation, it is recommend that you
+examine the script first; note that a reingest of a large Evergreen database
+can take several hours.
+};
+
+sub fetch_num_bibs_from_database {
+    my $config = shift;
+    OpenSRF::System->bootstrap_client( config_file => $config );
+    my $sc = OpenSRF::Utils::SettingsClient->new;
+    my $db_driver = $sc->config_value( reporter => setup => database => 'driver' );
+    my $db_host = $sc->config_value( reporter => setup => database => 'host' );
+    my $db_port = $sc->config_value( reporter => setup => database => 'port' );
+    my $db_name = $sc->config_value( reporter => setup => database => 'db' );
+    if (!$db_name) {
+        $db_name = $sc->config_value( reporter => setup => database => 'name' );
+    }
+    my $db_user = $sc->config_value( reporter => setup => database => 'user' );
+    my $db_pw = $sc->config_value( reporter => setup => database => 'pw' );
+    die "Unable to retrieve database connection information from the settings server" 
+        unless ($db_driver && $db_host && $db_port && $db_name && $db_user);
+
+    my $dsn = "dbi:" . $db_driver . ":dbname=" . $db_name .';host=' . $db_host . ';port=' . $db_port;
+    my $dbh = DBI->connect($dsn, $db_user, $db_pw, {AutoCommit => 1, pg_enable_utf8 => 1, RaiseError => 1});
+    my $count = $dbh->selectrow_array('SELECT COUNT(*) FROM biblio.record_entry WHERE id > -1 AND NOT deleted');
+    return $count;
+}
+
+sub header {
+    print OUT q {
+\qecho Do a partial reingest to fully populate metabib.facet_entry
+\qecho and update the keyword indexes to reflect changes in the default
+\qecho NACO normalization.  This can be time consuming on large databases.
+
+\qecho public.upgrade_2_0_partial_bib_reingest is a thin wrapper around
+\qecho metabib.reingest_metabib_field_entries, which does the actual
+\qecho work; its main purpose is to suggest a means of doing the reingest
+\qecho piecemeal if there isn't sufficient time in the upgrade window
+\qecho to do it in one fell swoop.
+CREATE OR REPLACE FUNCTION public.upgrade_2_0_partial_bib_reingest( l_offset BIGINT, l_limit BIGINT ) RETURNS VOID AS $func$
+BEGIN
+   PERFORM metabib.reingest_metabib_field_entries(id)
+       FROM biblio.record_entry
+       WHERE id IN (
+       SELECT id  
+       FROM biblio.record_entry
+       WHERE id > -1
+       AND NOT deleted
+       ORDER BY id
+       OFFSET l_offset
+       LIMIT l_limit
+   );
+END;
+$func$ LANGUAGE PLPGSQL;
+};
+}
+
+sub body {
+    my ($num_bibs, $chunk_size) = @_;
+    if ($num_bibs && $chunk_size) {
+        for (my $i = 0; $i <= $num_bibs / $chunk_size; $i++) {
+            print OUT qq{SELECT public.upgrade_2_0_partial_bib_reingest($i * $chunk_size, $chunk_size);\n};
+        }
+    } else {
+        print OUT q{
+SELECT public.upgrade_2_0_partial_bib_reingest(0, (SELECT COUNT(*) FROM biblio.record_entry WHERE id > -1 AND NOT deleted));
+        };
+    }
+}
+
+sub footer {
+    print OUT q{
+-- clean up after ourselves
+DROP FUNCTION public.upgrade_2_0_partial_bib_reingest(BIGINT, BIGINT);
+
+VACUUM ANALYZE metabib.keyword_field_entry;
+VACUUM ANALYZE metabib.author_field_entry;
+VACUUM ANALYZE metabib.subject_field_entry;
+VACUUM ANALYZE metabib.title_field_entry;
+VACUUM ANALYZE metabib.series_field_entry;
+VACUUM ANALYZE metabib.identifier_field_entry;
+VACUUM ANALYZE metabib.facet_entry;
+};
+}
+ 


Property changes on: trunk/Open-ILS/src/sql/Pg/reingest-1.6-2.0.pl
___________________________________________________________________
Name: svn:executable
   + *



More information about the open-ils-commits mailing list