[open-ils-commits] r14830 - in branches/rel_1_6/Open-ILS/src: perlmods/OpenILS/Application sql/Pg (dbs)

svn at svn.open-ils.org svn at svn.open-ils.org
Sun Nov 8 20:11:09 EST 2009


Author: dbs
Date: 2009-11-08 20:11:08 -0500 (Sun, 08 Nov 2009)
New Revision: 14830

Modified:
   branches/rel_1_6/Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
   branches/rel_1_6/Open-ILS/src/sql/Pg/reporter-schema.sql
Log:
Backport r14825 and r14826 to address ISSN ingest and retrieval bugs


Modified: branches/rel_1_6/Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
===================================================================
--- branches/rel_1_6/Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm	2009-11-09 01:10:07 UTC (rev 14829)
+++ branches/rel_1_6/Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm	2009-11-09 01:11:08 UTC (rev 14830)
@@ -1025,7 +1025,8 @@
     }
 
     $string =~ s/(\w+)\/(\w+)/$1 $2/sgo;
-    $string =~ s/(\d{4})-(\d{4})/ $1 $2 /sgo;
+    # Split date ranges and ISSNs on the hyphen
+    $string =~ s/(\d{4})-(\d{3,4}x?)/ $1 $2 /goi;
 
     return NFD($string);
 }
@@ -1181,92 +1182,88 @@
 
     for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
         next unless $tagline;
+        _special_tag_to_full_rows($type, $tagline, \@ns_list, 'LDR');
+    }
 
-        my $ns = $type->new;
+    for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
+        next unless $tagline;
+        _special_tag_to_full_rows($type, $tagline, \@ns_list, $tagline->getAttribute( "tag" ));
+    }
 
-        $ns->tag( 'LDR' );
-        my $val = $tagline->textContent;
-        $val = NFD($val);
-        $val =~ s/\pM+//sgo;
-        $val =~ s/\pC+//sgo;
-        $val =~ s/\W+$//sgo;
-        $ns->value( $val );
+    for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
+        next unless $tagline;
+        _data_tag_to_full_rows($type, $tagline, \@ns_list, $tagline->getAttribute( "tag" ));
 
-        push @ns_list, $ns;
+        if ($xmltype eq 'metabib' and $tag eq '245') {
+            _data_tag_to_full_rows($type, $tagline, \@ns_list, 'tnf');
+        }
     }
 
-    for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
-        next unless $tagline;
+    $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
+    return @ns_list;
+}
 
-        my $ns = $type->new;
+=head2 _special_tag_to_full_rows
 
-        $ns->tag( $tagline->getAttribute( "tag" ) );
-        my $val = $tagline->textContent;
-        $val = NFD($val);
-        $val =~ s/\pM+//sgo;
-        $val =~ s/\pC+//sgo;
-        $val =~ s/\W+$//sgo;
-        $ns->value( $val );
+Converts a leader or control field to a set of normalized values
 
-        push @ns_list, $ns;
-    }
+=cut
 
-    for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
-        next unless $tagline;
+sub _special_tag_to_full_rows {
+    my $type = shift;
+    my $tagline = shift;
+    my $ns_list = shift;
+    my $tagname = shift;
 
-        my $tag = $tagline->getAttribute( "tag" );
-        my $ind1 = $tagline->getAttribute( "ind1" );
-        my $ind2 = $tagline->getAttribute( "ind2" );
+    my $ns = $type->new;
 
-        for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
-            next unless $data;
+    $ns->tag( $tagname );
+    my $val = $tagline->textContent;
+    $val = NFD($val);
+    $val =~ s/\pM+//sgo;
+    $val =~ s/\pC+//sgo;
+    $val =~ s/\W+$//sgo;
+    $ns->value( $val );
 
-            my $ns = $type->new;
+    push @$ns_list, $ns;
+}
 
-            $ns->tag( $tag );
-            $ns->ind1( $ind1 );
-            $ns->ind2( $ind2 );
-            $ns->subfield( $data->getAttribute( "code" ) );
-            my $val = $data->textContent;
-            $val = NFD($val);
-            $val =~ s/\pM+//sgo;
-            $val =~ s/\pC+//sgo;
-            $val =~ s/\W+$//sgo;
-            $val =~ s/(\d{4})-(\d{4})/ $1 $2 /sgo;
-            $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
-            $ns->value( lc($val) );
+=head2 _data_tag_to_full_rows
 
-            push @ns_list, $ns;
-        }
+Converts a data field to a set of normalized values
 
-        if ($xmltype eq 'metabib' and $tag eq '245') {
-               $tag = 'tnf';
-    
-            for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
-                next unless ($data and $data->getAttribute( "code" ) eq 'a');
-    
-                $ns = $type->new;
-    
-                $ns->tag( $tag );
-                $ns->ind1( $ind1 );
-                $ns->ind2( $ind2 );
-                $ns->subfield( $data->getAttribute( "code" ) );
-                my $val = substr( $data->textContent, $ind2 );
-                $val = NFD($val);
-                $val =~ s/\pM+//sgo;
-                $val =~ s/\pC+//sgo;
-                $val =~ s/\W+$//sgo;
-                $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
-                $val =~ s/(\d{4})-(\d{4})/ $1 $2 /sgo;
-                $ns->value( lc($val) );
-    
-                push @ns_list, $ns;
-            }
-        }
-    }
+=cut
 
-    $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
-    return @ns_list;
+sub _data_tag_to_full_rows {
+    my $type = shift;
+    my $tagline = shift;
+    my $ns_list = shift;
+    my $tag = shift;
+
+    my $ind1 = $tagline->getAttribute( "ind1" );
+    my $ind2 = $tagline->getAttribute( "ind2" );
+
+    for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
+        next unless $data;
+
+        my $ns = $type->new;
+
+        $ns->tag( $tag );
+        $ns->ind1( $ind1 );
+        $ns->ind2( $ind2 );
+        $ns->subfield( $data->getAttribute( "code" ) );
+        my $val = $data->textContent;
+        $val = NFD($val);
+        $val =~ s/\pM+//sgo;
+        $val =~ s/\pC+//sgo;
+        $val =~ s/\W+$//sgo;
+        # Split date ranges and ISSNs on the hyphen
+        $val =~ s/(\d{4})-(\d{3,4}x?)/ $1 $2 /goi;
+        $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
+        $ns->value( lc($val) );
+
+        push @$ns_list, $ns;
+    }
 }
 
 sub flat_marc_xml {

Modified: branches/rel_1_6/Open-ILS/src/sql/Pg/reporter-schema.sql
===================================================================
--- branches/rel_1_6/Open-ILS/src/sql/Pg/reporter-schema.sql	2009-11-09 01:10:07 UTC (rev 14829)
+++ branches/rel_1_6/Open-ILS/src/sql/Pg/reporter-schema.sql	2009-11-09 01:11:08 UTC (rev 14830)
@@ -127,7 +127,7 @@
 	series_statement.value AS series_statement,
 	summary.value AS summary,
 	ARRAY_ACCUM( SUBSTRING(isbn.value FROM $$^\S+$$) ) AS isbn,
-	ARRAY_ACCUM( SUBSTRING(issn.value FROM $$^\S+$$) ) AS issn,
+	ARRAY_ACCUM( REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) AS issn,
 	ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '650' AND subfield = 'a' AND record = r.id)) AS topic_subject,
 	ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '651' AND subfield = 'a' AND record = r.id)) AS geographic_subject,
 	ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '655' AND subfield = 'a' AND record = r.id)) AS genre,



More information about the open-ils-commits mailing list