[open-ils-commits] r16428 - in trunk/Open-ILS/src/sql/Pg: . upgrade (miker)
svn at svn.open-ils.org
svn at svn.open-ils.org
Thu May 13 12:44:18 EDT 2010
Author: miker
Date: 2010-05-13 12:44:14 -0400 (Thu, 13 May 2010)
New Revision: 16428
Added:
trunk/Open-ILS/src/sql/Pg/upgrade/0261.schema.vandelay.record_overlay.sql
Modified:
trunk/Open-ILS/src/sql/Pg/002.schema.config.sql
trunk/Open-ILS/src/sql/Pg/012.schema.vandelay.sql
Log:
teach the overlay functions about data matching REs; add a template-based merge instead of requiring a vandelay record, and use that for bib merging; minor logic collapsing
Modified: trunk/Open-ILS/src/sql/Pg/002.schema.config.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/002.schema.config.sql 2010-05-13 15:20:36 UTC (rev 16427)
+++ trunk/Open-ILS/src/sql/Pg/002.schema.config.sql 2010-05-13 16:44:14 UTC (rev 16428)
@@ -65,7 +65,7 @@
install_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
-INSERT INTO config.upgrade_log (version) VALUES ('0260'); -- berick
+INSERT INTO config.upgrade_log (version) VALUES ('0261'); -- miker
CREATE TABLE config.bib_source (
id SERIAL PRIMARY KEY,
Modified: trunk/Open-ILS/src/sql/Pg/012.schema.vandelay.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/012.schema.vandelay.sql 2010-05-13 15:20:36 UTC (rev 16427)
+++ trunk/Open-ILS/src/sql/Pg/012.schema.vandelay.sql 2010-05-13 16:44:14 UTC (rev 16428)
@@ -311,7 +311,6 @@
my $target_xml = shift;
my $source_xml = shift;
my $field_spec = shift;
- $field_spec =~ s/\s+//sg;
my $target_r = MARC::Record->new_from_xml( $target_xml );
my $source_r = MARC::Record->new_from_xml( $source_xml );
@@ -322,8 +321,23 @@
my %fields;
for my $f (@field_list) {
- if ($f =~ /^(.{3})(.*)$/) {
- $fields{$1} = [ split('', $2) ];
+ $f =~ s/^\s*//; $f =~ s/\s*$//;
+ if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) {
+ my $field = $1;
+ $field =~ s/\s+//;
+ my $sf = $2;
+ $sf =~ s/\s+//;
+ my $match = $3;
+ $match =~ s/^\s*//; $match =~ s/\s*$//;
+ $fields{$field} = { sf => [ split('', $sf) ] };
+ if ($match) {
+ my ($msf,$mre) = split('~', $match);
+ if (length($msf) > 0 and length($mre) > 0) {
+ $msf =~ s/^\s*//; $msf =~ s/\s*$//;
+ $mre =~ s/^\s*//; $mre =~ s/\s*$//;
+ $fields{$field}{match} = { sf => $msf, re => qr/$mre/ };
+ }
+ }
}
}
@@ -331,6 +345,9 @@
if ( @{$fields{$f}} ) {
for my $from_field ($source_r->field( $f )) {
for my $to_field ($target_r->field( $f )) {
+ if (exists($fields{$f}{match})) {
+ next unless (grep { $_ =~ $field{$f}{match}{re} } $to_field->subfield($field{$f}{match}{sf}));
+ }
my @new_sf = map { ($_ => $from_field->subfield($_)) } @{$fields{$f}};
$to_field->add_subfields( @new_sf );
}
@@ -361,22 +378,41 @@
return $xml unless ($r);
my $field_spec = shift;
- $field_spec =~ s/\s+//sg;
-
my @field_list = split(',', $field_spec);
my %fields;
for my $f (@field_list) {
- if ($f =~ /^(.{3})(.*)$/) {
- $fields{$1} = [ split('', $2) ];
+ $f =~ s/^\s*//; $f =~ s/\s*$//;
+ if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) {
+ my $field = $1;
+ $field =~ s/\s+//;
+ my $sf = $2;
+ $sf =~ s/\s+//;
+ my $match = $3;
+ $match =~ s/^\s*//; $match =~ s/\s*$//;
+ $fields{$field} = { sf => [ split('', $sf) ] };
+ if ($match) {
+ my ($msf,$mre) = split('~', $match);
+ if (length($msf) > 0 and length($mre) > 0) {
+ $msf =~ s/^\s*//; $msf =~ s/\s*$//;
+ $mre =~ s/^\s*//; $mre =~ s/\s*$//;
+ $fields{$field}{match} = { sf => $msf, re => qr/$mre/ };
+ }
+ }
}
}
for my $f ( keys %fields) {
- if ( @{$fields{$f}} ) {
- $_->delete_subfield(code => $fields{$f}) for ($r->field( $f ));
- } else {
- $r->delete_field( $_ ) for ( $r->field( $f ) );
+ for my $to_field ($r->field( $f )) {
+ if (exists($fields{$f}{match})) {
+ next unless (grep { $_ =~ $field{$f}{match}{re} } $to_field->subfield($field{$f}{match}{sf}));
+ }
+
+ if ( @{$fields{$f}} ) {
+ $to_field->delete_subfield(code => $fields{$f}{sf});
+ } else {
+ $r->delete_field( $to_field );
+ }
}
}
@@ -443,7 +479,7 @@
END;
$_$ LANGUAGE PLPGSQL;
-CREATE OR REPLACE FUNCTION vandelay.overlay_bib_record ( import_id BIGINT, eg_id BIGINT, merge_profile_id INT ) RETURNS BOOL AS $$
+CREATE OR REPLACE FUNCTION vandelay.template_overlay_bib_record ( v_marc TEXT, eg_id BIGINT, merge_profile_id INT ) RETURNS BOOL AS $$
DECLARE
merge_profile vandelay.merge_profile%ROWTYPE;
dyn_profile vandelay.compile_profile%ROWTYPE;
@@ -452,7 +488,6 @@
source_marc TEXT;
target_marc TEXT;
eg_marc TEXT;
- v_marc TEXT;
replace_rule TEXT;
match_count INT;
BEGIN
@@ -462,13 +497,8 @@
JOIN vandelay.bib_match m ON (m.eg_record = b.id AND m.queued_record = import_id)
LIMIT 1;
- SELECT q.marc INTO v_marc
- FROM vandelay.queued_record q
- JOIN vandelay.bib_match m ON (m.queued_record = q.id AND q.id = import_id)
- LIMIT 1;
-
IF eg_marc IS NULL OR v_marc IS NULL THEN
- -- RAISE NOTICE 'no marc for vandelay or bib record';
+ -- RAISE NOTICE 'no marc for template or bib record';
RETURN FALSE;
END IF;
@@ -503,7 +533,45 @@
SET marc = vandelay.merge_record_xml( target_marc, source_marc, dyn_profile.add_rule, replace_rule, dyn_profile.strip_rule )
WHERE id = eg_id;
- IF FOUND THEN
+ IF NOT FOUND THEN
+ -- RAISE NOTICE 'update of biblio.record_entry failed';
+ RETURN FALSE;
+ END IF;
+
+ RETURN TRUE;
+
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.template_overlay_bib_record ( v_marc TEXT, eg_id BIGINT) RETURNS BOOL AS $$
+ SELECT vandelay.template_overlay_bib_record( $1, $2, NULL);
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION vandelay.overlay_bib_record ( import_id BIGINT, eg_id BIGINT, merge_profile_id INT ) RETURNS BOOL AS $$
+DECLARE
+ merge_profile vandelay.merge_profile%ROWTYPE;
+ dyn_profile vandelay.compile_profile%ROWTYPE;
+ editor_string TEXT;
+ editor_id INT;
+ source_marc TEXT;
+ target_marc TEXT;
+ eg_marc TEXT;
+ v_marc TEXT;
+ replace_rule TEXT;
+ match_count INT;
+BEGIN
+
+ SELECT q.marc INTO v_marc
+ FROM vandelay.queued_record q
+ JOIN vandelay.bib_match m ON (m.queued_record = q.id AND q.id = import_id)
+ LIMIT 1;
+
+ IF v_marc IS NULL THEN
+ -- RAISE NOTICE 'no marc for vandelay or bib record';
+ RETURN FALSE;
+ END IF;
+
+ IF vandelay.template_overlay_bib_record( v_marc, eg_id, merge_profile_id) THEN
UPDATE vandelay.queued_bib_record
SET imported_as = eg_id,
import_time = NOW()
@@ -581,13 +649,11 @@
CREATE OR REPLACE FUNCTION vandelay.auto_overlay_bib_queue ( queue_id BIGINT, merge_profile_id INT ) RETURNS SETOF BIGINT AS $$
DECLARE
queued_record vandelay.queued_bib_record%ROWTYPE;
- success BOOL;
BEGIN
FOR queued_record IN SELECT * FROM vandelay.queued_bib_record WHERE queue = queue_id AND import_time IS NULL LOOP
- success := vandelay.auto_overlay_bib_record( queued_record.id, merge_profile_id );
- IF success THEN
+ IF vandelay.auto_overlay_bib_record( queued_record.id, merge_profile_id ) THEN
RETURN NEXT queued_record.id;
END IF;
@@ -1226,13 +1292,11 @@
CREATE OR REPLACE FUNCTION vandelay.auto_overlay_authority_queue ( queue_id BIGINT, merge_profile_id INT ) RETURNS SETOF BIGINT AS $$
DECLARE
queued_record vandelay.queued_authority_record%ROWTYPE;
- success BOOL;
BEGIN
FOR queued_record IN SELECT * FROM vandelay.queued_authority_record WHERE queue = queue_id AND import_time IS NULL LOOP
- success := vandelay.auto_overlay_authority_record( queued_record.id, merge_profile_id );
- IF success THEN
+ IF vandelay.auto_overlay_authority_record( queued_record.id, merge_profile_id ) THEN
RETURN NEXT queued_record.id;
END IF;
Added: trunk/Open-ILS/src/sql/Pg/upgrade/0261.schema.vandelay.record_overlay.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/upgrade/0261.schema.vandelay.record_overlay.sql (rev 0)
+++ trunk/Open-ILS/src/sql/Pg/upgrade/0261.schema.vandelay.record_overlay.sql 2010-05-13 16:44:14 UTC (rev 16428)
@@ -0,0 +1,290 @@
+BEGIN;
+
+INSERT INTO config.upgrade_log (version) VALUES ('0261'); --miker
+
+-- Teach add/delete field/subfield stored procs about data matching
+
+CREATE OR REPLACE FUNCTION vandelay.add_field ( target_xml TEXT, source_xml TEXT, field TEXT ) RETURNS TEXT AS $_$
+
+ use MARC::Record;
+ use MARC::File::XML;
+
+ my $target_xml = shift;
+ my $source_xml = shift;
+ my $field_spec = shift;
+
+ my $target_r = MARC::Record->new_from_xml( $target_xml );
+ my $source_r = MARC::Record->new_from_xml( $source_xml );
+
+ return $target_xml unless ($target_r && $source_r);
+
+ my @field_list = split(',', $field_spec);
+
+ my %fields;
+ for my $f (@field_list) {
+ $f =~ s/^\s*//; $f =~ s/\s*$//;
+ if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) {
+ my $field = $1;
+ $field =~ s/\s+//;
+ my $sf = $2;
+ $sf =~ s/\s+//;
+ my $match = $3;
+ $match =~ s/^\s*//; $match =~ s/\s*$//;
+ $fields{$field} = { sf => [ split('', $sf) ] };
+ if ($match) {
+ my ($msf,$mre) = split('~', $match);
+ if (length($msf) > 0 and length($mre) > 0) {
+ $msf =~ s/^\s*//; $msf =~ s/\s*$//;
+ $mre =~ s/^\s*//; $mre =~ s/\s*$//;
+ $fields{$field}{match} = { sf => $msf, re => qr/$mre/ };
+ }
+ }
+ }
+ }
+
+ for my $f ( keys %fields) {
+ if ( @{$fields{$f}} ) {
+ for my $from_field ($source_r->field( $f )) {
+ for my $to_field ($target_r->field( $f )) {
+ if (exists($fields{$f}{match})) {
+ next unless (grep { $_ =~ $field{$f}{match}{re} } $to_field->subfield($field{$f}{match}{sf}));
+ }
+ my @new_sf = map { ($_ => $from_field->subfield($_)) } @{$fields{$f}};
+ $to_field->add_subfields( @new_sf );
+ }
+ }
+ } else {
+ my @new_fields = map { $_->clone } $source_r->field( $f );
+ $target_r->insert_fields_ordered( @new_fields );
+ }
+ }
+
+ $target_xml = $target_r->as_xml_record;
+ $target_xml =~ s/^<\?.+?\?>$//mo;
+ $target_xml =~ s/\n//sgo;
+ $target_xml =~ s/>\s+</></sgo;
+
+ return $target_xml;
+
+$_$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION vandelay.strip_field ( xml TEXT, field TEXT ) RETURNS TEXT AS $_$
+
+ use MARC::Record;
+ use MARC::File::XML;
+
+ my $xml = shift;
+ my $r = MARC::Record->new_from_xml( $xml );
+
+ return $xml unless ($r);
+
+ my $field_spec = shift;
+ my @field_list = split(',', $field_spec);
+
+ my %fields;
+ for my $f (@field_list) {
+ $f =~ s/^\s*//; $f =~ s/\s*$//;
+ if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) {
+ my $field = $1;
+ $field =~ s/\s+//;
+ my $sf = $2;
+ $sf =~ s/\s+//;
+ my $match = $3;
+ $match =~ s/^\s*//; $match =~ s/\s*$//;
+ $fields{$field} = { sf => [ split('', $sf) ] };
+ if ($match) {
+ my ($msf,$mre) = split('~', $match);
+ if (length($msf) > 0 and length($mre) > 0) {
+ $msf =~ s/^\s*//; $msf =~ s/\s*$//;
+ $mre =~ s/^\s*//; $mre =~ s/\s*$//;
+ $fields{$field}{match} = { sf => $msf, re => qr/$mre/ };
+ }
+ }
+ }
+ }
+
+ for my $f ( keys %fields) {
+ for my $to_field ($r->field( $f )) {
+ if (exists($fields{$f}{match})) {
+ next unless (grep { $_ =~ $field{$f}{match}{re} } $to_field->subfield($field{$f}{match}{sf}));
+ }
+
+ if ( @{$fields{$f}} ) {
+ $to_field->delete_subfield(code => $fields{$f}{sf});
+ } else {
+ $r->delete_field( $to_field );
+ }
+ }
+ }
+
+ $xml = $r->as_xml_record;
+ $xml =~ s/^<\?.+?\?>$//mo;
+ $xml =~ s/\n//sgo;
+ $xml =~ s/>\s+</></sgo;
+
+ return $xml;
+
+$_$ LANGUAGE PLPERLU;
+
+-- Template-based marc munging functions
+CREATE OR REPLACE FUNCTION vandelay.template_overlay_bib_record ( v_marc TEXT, eg_id BIGINT, merge_profile_id INT ) RETURNS BOOL AS $$
+DECLARE
+ merge_profile vandelay.merge_profile%ROWTYPE;
+ dyn_profile vandelay.compile_profile%ROWTYPE;
+ editor_string TEXT;
+ editor_id INT;
+ source_marc TEXT;
+ target_marc TEXT;
+ eg_marc TEXT;
+ replace_rule TEXT;
+ match_count INT;
+BEGIN
+
+ SELECT b.marc INTO eg_marc
+ FROM biblio.record_entry b
+ JOIN vandelay.bib_match m ON (m.eg_record = b.id AND m.queued_record = import_id)
+ LIMIT 1;
+
+ IF eg_marc IS NULL OR v_marc IS NULL THEN
+ -- RAISE NOTICE 'no marc for template or bib record';
+ RETURN FALSE;
+ END IF;
+
+ dyn_profile := vandelay.compile_profile( v_marc );
+
+ IF merge_profile_id IS NOT NULL THEN
+ SELECT * INTO merge_profile FROM vandelay.merge_profile WHERE id = merge_profile_id;
+ IF FOUND THEN
+ dyn_profile.add_rule := BTRIM( dyn_profile.add_rule || ',' || COALESCE(merge_profile.add_spec,''), ',');
+ dyn_profile.strip_rule := BTRIM( dyn_profile.strip_rule || ',' || COALESCE(merge_profile.strip_spec,''), ',');
+ dyn_profile.replace_rule := BTRIM( dyn_profile.replace_rule || ',' || COALESCE(merge_profile.replace_spec,''), ',');
+ dyn_profile.preserve_rule := BTRIM( dyn_profile.preserve_rule || ',' || COALESCE(merge_profile.preserve_spec,''), ',');
+ END IF;
+ END IF;
+
+ IF dyn_profile.replace_rule <> '' AND dyn_profile.preserve_rule <> '' THEN
+ -- RAISE NOTICE 'both replace [%] and preserve [%] specified', dyn_profile.replace_rule, dyn_profile.preserve_rule;
+ RETURN FALSE;
+ END IF;
+
+ IF dyn_profile.replace_rule <> '' THEN
+ source_marc = v_marc;
+ target_marc = eg_marc;
+ replace_rule = dyn_profile.replace_rule;
+ ELSE
+ source_marc = eg_marc;
+ target_marc = v_marc;
+ replace_rule = dyn_profile.preserve_rule;
+ END IF;
+
+ UPDATE biblio.record_entry
+ SET marc = vandelay.merge_record_xml( target_marc, source_marc, dyn_profile.add_rule, replace_rule, dyn_profile.strip_rule )
+ WHERE id = eg_id;
+
+ IF NOT FOUND THEN
+ -- RAISE NOTICE 'update of biblio.record_entry failed';
+ RETURN FALSE;
+ END IF;
+
+ RETURN TRUE;
+
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.template_overlay_bib_record ( v_marc TEXT, eg_id BIGINT) RETURNS BOOL AS $$
+ SELECT vandelay.template_overlay_bib_record( $1, $2, NULL);
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION vandelay.overlay_bib_record ( import_id BIGINT, eg_id BIGINT, merge_profile_id INT ) RETURNS BOOL AS $$
+DECLARE
+ merge_profile vandelay.merge_profile%ROWTYPE;
+ dyn_profile vandelay.compile_profile%ROWTYPE;
+ editor_string TEXT;
+ editor_id INT;
+ source_marc TEXT;
+ target_marc TEXT;
+ eg_marc TEXT;
+ v_marc TEXT;
+ replace_rule TEXT;
+ match_count INT;
+BEGIN
+
+ SELECT q.marc INTO v_marc
+ FROM vandelay.queued_record q
+ JOIN vandelay.bib_match m ON (m.queued_record = q.id AND q.id = import_id)
+ LIMIT 1;
+
+ IF v_marc IS NULL THEN
+ -- RAISE NOTICE 'no marc for vandelay or bib record';
+ RETURN FALSE;
+ END IF;
+
+ IF vandelay.template_overlay_bib_record( v_marc, eg_id, merge_profile_id) THEN
+ UPDATE vandelay.queued_bib_record
+ SET imported_as = eg_id,
+ import_time = NOW()
+ WHERE id = import_id;
+
+ editor_string := (oils_xpath('//*[@tag="905"]/*[@code="u"]/text()',v_marc))[1];
+
+ IF editor_string IS NOT NULL AND editor_string <> '' THEN
+ SELECT usr INTO editor_id FROM actor.card WHERE barcode = editor_string;
+
+ IF editor_id IS NULL THEN
+ SELECT id INTO editor_id FROM actor.usr WHERE usrname = editor_string;
+ END IF;
+
+ IF editor_id IS NOT NULL THEN
+ UPDATE biblio.record_entry SET editor = editor_id WHERE id = eg_id;
+ END IF;
+ END IF;
+
+ RETURN TRUE;
+ END IF;
+
+ -- RAISE NOTICE 'update of biblio.record_entry failed';
+
+ RETURN FALSE;
+
+END;
+$$ LANGUAGE PLPGSQL;
+
+-- Minor cleanup of overlay test logic
+CREATE OR REPLACE FUNCTION vandelay.auto_overlay_bib_queue ( queue_id BIGINT, merge_profile_id INT ) RETURNS SETOF BIGINT AS $$
+DECLARE
+ queued_record vandelay.queued_bib_record%ROWTYPE;
+BEGIN
+
+ FOR queued_record IN SELECT * FROM vandelay.queued_bib_record WHERE queue = queue_id AND import_time IS NULL LOOP
+
+ IF vandelay.auto_overlay_bib_record( queued_record.id, merge_profile_id ) THEN
+ RETURN NEXT queued_record.id;
+ END IF;
+
+ END LOOP;
+
+ RETURN;
+
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.auto_overlay_authority_queue ( queue_id BIGINT, merge_profile_id INT ) RETURNS SETOF BIGINT AS $$
+DECLARE
+ queued_record vandelay.queued_authority_record%ROWTYPE;
+BEGIN
+
+ FOR queued_record IN SELECT * FROM vandelay.queued_authority_record WHERE queue = queue_id AND import_time IS NULL LOOP
+
+ IF vandelay.auto_overlay_authority_record( queued_record.id, merge_profile_id ) THEN
+ RETURN NEXT queued_record.id;
+ END IF;
+
+ END LOOP;
+
+ RETURN;
+
+END;
+$$ LANGUAGE PLPGSQL;
+
+COMMIT;
+
More information about the open-ils-commits
mailing list