[open-ils-commits] r14825 - trunk/Open-ILS/src/perlmods/OpenILS/Application (dbs)
svn at svn.open-ils.org
svn at svn.open-ils.org
Sat Nov 7 20:29:05 EST 2009
Author: dbs
Date: 2009-11-07 20:29:04 -0500 (Sat, 07 Nov 2009)
New Revision: 14825
Modified:
trunk/Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
Log:
Normalize ISSNs on ingest so that "1972-156X" gets added as " 1972 156x " to mfr / mkfe
This enables keyword searching of this subset of ISSNs to work because it
sidesteps the full text search tokenizer that would otherwise have indexed
the values as "1972", "-156", and "x".
Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm 2009-11-07 20:02:32 UTC (rev 14824)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm 2009-11-08 01:29:04 UTC (rev 14825)
@@ -1025,7 +1025,8 @@
}
$string =~ s/(\w+)\/(\w+)/$1 $2/sgo;
- $string =~ s/(\d{4})-(\d{4})/ $1 $2 /sgo;
+ # Split date ranges and ISSNs on the hyphen
+ $string =~ s/(\d{4})-(\d{3,4}x?)/ $1 $2 /goi;
return NFD($string);
}
@@ -1181,92 +1182,88 @@
for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
next unless $tagline;
+ _special_tag_to_full_rows($type, $tagline, \@ns_list, 'LDR');
+ }
- my $ns = $type->new;
+ for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
+ next unless $tagline;
+ _special_tag_to_full_rows($type, $tagline, \@ns_list, $tagline->getAttribute( "tag" ));
+ }
- $ns->tag( 'LDR' );
- my $val = $tagline->textContent;
- $val = NFD($val);
- $val =~ s/\pM+//sgo;
- $val =~ s/\pC+//sgo;
- $val =~ s/\W+$//sgo;
- $ns->value( $val );
+ for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
+ next unless $tagline;
+ _data_tag_to_full_rows($type, $tagline, \@ns_list, $tagline->getAttribute( "tag" ));
- push @ns_list, $ns;
+ if ($xmltype eq 'metabib' and $tag eq '245') {
+ _data_tag_to_full_rows($type, $tagline, \@ns_list, 'tnf');
+ }
}
- for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
- next unless $tagline;
+ $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
+ return @ns_list;
+}
- my $ns = $type->new;
+=head2 _special_tag_to_full_rows
- $ns->tag( $tagline->getAttribute( "tag" ) );
- my $val = $tagline->textContent;
- $val = NFD($val);
- $val =~ s/\pM+//sgo;
- $val =~ s/\pC+//sgo;
- $val =~ s/\W+$//sgo;
- $ns->value( $val );
+Converts a leader or control field to a set of normalized values
- push @ns_list, $ns;
- }
+=cut
- for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
- next unless $tagline;
+sub _special_tag_to_full_rows {
+ my $type = shift;
+ my $tagline = shift;
+ my $ns_list = shift;
+ my $tagname = shift;
- my $tag = $tagline->getAttribute( "tag" );
- my $ind1 = $tagline->getAttribute( "ind1" );
- my $ind2 = $tagline->getAttribute( "ind2" );
+ my $ns = $type->new;
- for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
- next unless $data;
+ $ns->tag( $tagname );
+ my $val = $tagline->textContent;
+ $val = NFD($val);
+ $val =~ s/\pM+//sgo;
+ $val =~ s/\pC+//sgo;
+ $val =~ s/\W+$//sgo;
+ $ns->value( $val );
- my $ns = $type->new;
+ push @$ns_list, $ns;
+}
- $ns->tag( $tag );
- $ns->ind1( $ind1 );
- $ns->ind2( $ind2 );
- $ns->subfield( $data->getAttribute( "code" ) );
- my $val = $data->textContent;
- $val = NFD($val);
- $val =~ s/\pM+//sgo;
- $val =~ s/\pC+//sgo;
- $val =~ s/\W+$//sgo;
- $val =~ s/(\d{4})-(\d{4})/ $1 $2 /sgo;
- $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
- $ns->value( lc($val) );
+=head2 _data_tag_to_full_rows
- push @ns_list, $ns;
- }
+Converts a data field to a set of normalized values
- if ($xmltype eq 'metabib' and $tag eq '245') {
- $tag = 'tnf';
-
- for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
- next unless ($data and $data->getAttribute( "code" ) eq 'a');
-
- $ns = $type->new;
-
- $ns->tag( $tag );
- $ns->ind1( $ind1 );
- $ns->ind2( $ind2 );
- $ns->subfield( $data->getAttribute( "code" ) );
- my $val = substr( $data->textContent, $ind2 );
- $val = NFD($val);
- $val =~ s/\pM+//sgo;
- $val =~ s/\pC+//sgo;
- $val =~ s/\W+$//sgo;
- $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
- $val =~ s/(\d{4})-(\d{4})/ $1 $2 /sgo;
- $ns->value( lc($val) );
-
- push @ns_list, $ns;
- }
- }
- }
+=cut
- $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
- return @ns_list;
+sub _data_tag_to_full_rows {
+ my $type = shift;
+ my $tagline = shift;
+ my $ns_list = shift;
+ my $tag = shift;
+
+ my $ind1 = $tagline->getAttribute( "ind1" );
+ my $ind2 = $tagline->getAttribute( "ind2" );
+
+ for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
+ next unless $data;
+
+ my $ns = $type->new;
+
+ $ns->tag( $tag );
+ $ns->ind1( $ind1 );
+ $ns->ind2( $ind2 );
+ $ns->subfield( $data->getAttribute( "code" ) );
+ my $val = $data->textContent;
+ $val = NFD($val);
+ $val =~ s/\pM+//sgo;
+ $val =~ s/\pC+//sgo;
+ $val =~ s/\W+$//sgo;
+ # Split date ranges and ISSNs on the hyphen
+ $val =~ s/(\d{4})-(\d{3,4}x?)/ $1 $2 /goi;
+ $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
+ $ns->value( lc($val) );
+
+ push @$ns_list, $ns;
+ }
}
sub flat_marc_xml {
More information about the open-ils-commits
mailing list