[open-ils-commits] r12664 - in trunk/Open-ILS/src: extras/import perlmods/OpenILS/Application perlmods/OpenILS/Application/Search perlmods/OpenILS/WWW (dbs)
svn at svn.open-ils.org
svn at svn.open-ils.org
Tue Mar 24 23:30:10 EDT 2009
Author: dbs
Date: 2009-03-24 23:30:07 -0400 (Tue, 24 Mar 2009)
New Revision: 12664
Modified:
trunk/Open-ILS/src/extras/import/marc2are.pl
trunk/Open-ILS/src/extras/import/marc2bre.pl
trunk/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm
trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Z3950.pm
trunk/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm
trunk/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm
Log:
Dedupe code by moving to a common implementation of entityize()
Well, almost common. Perhaps we should make strip_ctrl_chars() and ampersize() a standard part of entityize.
Modified: trunk/Open-ILS/src/extras/import/marc2are.pl
===================================================================
--- trunk/Open-ILS/src/extras/import/marc2are.pl 2009-03-25 03:14:33 UTC (rev 12663)
+++ trunk/Open-ILS/src/extras/import/marc2are.pl 2009-03-25 03:30:07 UTC (rev 12664)
@@ -69,7 +69,7 @@
$xml =~ s/^<\?xml.+\?\s*>//go;
$xml =~ s/>\s+</></go;
$xml =~ s/\p{Cc}//go;
- $xml = entityize($xml,'D');
+ $xml = OpenILS::Application::AppUtils->entityize($xml,'D');
$xml =~ s/[\x00-\x1f]//go;
my $bib = new Fieldmapper::authority::record_entry;
@@ -124,17 +124,3 @@
return $authtoken;
}
-sub entityize {
- my $stuff = shift;
- my $form = shift;
-
- if ($form and $form eq 'D') {
- $stuff = NFD($stuff);
- } else {
- $stuff = NFC($stuff);
- }
-
- $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
- return $stuff;
-}
-
Modified: trunk/Open-ILS/src/extras/import/marc2bre.pl
===================================================================
--- trunk/Open-ILS/src/extras/import/marc2bre.pl 2009-03-25 03:14:33 UTC (rev 12663)
+++ trunk/Open-ILS/src/extras/import/marc2bre.pl 2009-03-25 03:30:07 UTC (rev 12664)
@@ -8,6 +8,7 @@
use OpenILS::Utils::Fieldmapper;
use Digest::MD5 qw/md5_hex/;
use OpenSRF::Utils::JSON;
+use OpenILS::Application::AppUtils;
use Data::Dumper;
use Unicode::Normalize;
use Encode;
@@ -260,7 +261,7 @@
$xml =~ s/^<\?xml.+\?\s*>//go;
$xml =~ s/>\s+</></go;
$xml =~ s/\p{Cc}//go;
- $xml = entityize($xml,'D');
+ $xml = OpenILS::Application::AppUtils->entityize($xml,'D');
$xml =~ s/[\x00-\x1f]//go;
my $bib = new Fieldmapper::biblio::record_entry;
@@ -392,20 +393,6 @@
return ($field901, $tcn_value, $tcn_source);
}
-sub entityize {
- my $stuff = shift;
- my $form = shift;
-
- if ($form and $form eq 'D') {
- $stuff = NFD($stuff);
- } else {
- $stuff = NFC($stuff);
- }
-
- $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
- return $stuff;
-}
-
sub despace {
my $value = shift;
Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm 2009-03-25 03:14:33 UTC (rev 12663)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm 2009-03-25 03:30:07 UTC (rev 12664)
@@ -1399,7 +1399,23 @@
return $string;
}
+# x0000-x0008 isn't legal in XML documents
+# XXX Perhaps this should just go into our standard entityize method
+sub strip_ctrl_chars {
+ my ($self, $string) = @_;
+ $string =~ s/([\x{0000}-\x{0008}])//sgoe;
+ return $string;
+}
+
+# Ampersands are special, mmmkay?
+# XXX Perhaps this should go into our standard entityize method
+sub ampersize {
+ my $stuff = shift();
+ $stuff =~ s/&(?!\S+;)/&/gso;
+ return $stuff;
+}
+
sub get_copy_price {
my($self, $e, $copy, $volume) = @_;
Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Z3950.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Z3950.pm 2009-03-25 03:14:33 UTC (rev 12663)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Z3950.pm 2009-03-25 03:30:07 UTC (rev 12664)
@@ -379,9 +379,11 @@
die "Unsupported record transmission format $tformat"
}
- $marcs = entityize($marc->as_xml_record);
+ $marcs = $U->entityize($marc->as_xml_record);
+ $marcs = $U->strip_ctrl_chars($marcs);
my $doc = XML::LibXML->new->parse_string($marcs);
- $marcxml = entityize( $doc->documentElement->toString );
+ $marcxml = $U->entityize($doc->documentElement->toString);
+ $marcxml = $U->strip_ctrl_chars($marcxml);
my $u = OpenILS::Utils::ModsParser->new();
$u->start_mods_batch( $marcxml );
@@ -434,28 +436,4 @@
return $str;
}
-
-
-# -------------------------------------------------------------------
-# Handles the unicode
-# -------------------------------------------------------------------
-sub entityize {
- my $stuff = shift;
- my $form = shift || "";
-
- if ($form eq 'D') {
- $stuff = NFD($stuff);
- } else {
- $stuff = NFC($stuff);
- }
-
- $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
-
- # strip some other unfriendly chars that may leak in
- $stuff =~ s/([\x{0000}-\x{0008}])//sgoe;
-
- return $stuff;
-}
-
-
1;
Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm 2009-03-25 03:14:33 UTC (rev 12663)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm 2009-03-25 03:30:07 UTC (rev 12664)
@@ -23,6 +23,8 @@
# ... and this is our OpenILS object (en|de)coder and psuedo-ORM package.
use OpenILS::Utils::Fieldmapper;
+# ... and this has some handy common methods
+use OpenILS::Application::AppUtils;
# We'll be working with XML, so...
use XML::LibXML;
@@ -39,6 +41,8 @@
%holdings_data_cache,
);
+my $U = 'OpenILS::Application::AppUtils';
+
sub child_init {
# we need an XML parser
$_parser = new XML::LibXML;
@@ -217,13 +221,6 @@
}
}
-
-sub entityize {
- my $stuff = NFC(shift());
- $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
- return $stuff;
-}
-
sub tree_walker {
my $tree = shift;
my $field = shift;
@@ -880,7 +877,7 @@
my $_storage = OpenSRF::AppSession->create( 'open-ils.cstore' );
my $record = $_storage->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rid )->gather(1);
- return entityize( $record->marc ) if ($record);
+ return $U->entityize( $record->marc ) if ($record);
return undef;
}
@@ -920,7 +917,7 @@
return undef unless (@$recs);
my $record = $_storage->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $recs->[0]->record )->gather(1);
- return entityize( $record->marc ) if ($record);
+ return $U->entityize( $record->marc ) if ($record);
return undef;
}
@@ -962,7 +959,7 @@
return undef unless ($record);
- return entityize($record_xslt{$transform}{xslt}->transform( $_parser->parse_string( $record->marc ) )->toString);
+ return $U->entityize($record_xslt{$transform}{xslt}->transform( $_parser->parse_string( $record->marc ) )->toString);
}
sub retrieve_isbn_transform {
@@ -985,7 +982,7 @@
return undef unless ($record);
- return entityize($record_xslt{$transform}{xslt}->transform( $_parser->parse_string( $record->marc ) )->toString);
+ return $U->entityize($record_xslt{$transform}{xslt}->transform( $_parser->parse_string( $record->marc ) )->toString);
}
sub retrieve_record_objects {
@@ -1215,7 +1212,7 @@
$_storage->disconnect;
- return entityize($mods->toString);
+ return $U->entityize($mods->toString);
}
__PACKAGE__->register_method(
Modified: trunk/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm 2009-03-25 03:14:33 UTC (rev 12663)
+++ trunk/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm 2009-03-25 03:30:07 UTC (rev 12664)
@@ -25,11 +25,13 @@
use OpenILS::Utils::Fieldmapper;
use OpenILS::WWW::SuperCat::Feed;
use OpenSRF::Utils::Logger qw/$logger/;
+use OpenILS::Application::AppUtils;
use MARC::Record;
use MARC::File::XML;
my $log = 'OpenSRF::Utils::Logger';
+my $U = 'OpenILS::Application::AppUtils';
# set the bootstrap config when this module is loaded
my ($bootstrap, $supercat, $actor, $parser, $search, $xslt, $cn_browse_xslt, %browse_types);
@@ -72,7 +74,7 @@
my $r_doc = $parser->parse_string($cn->record->marc);
$r_doc->documentElement->setAttribute( id => $rec_tag );
- $content .= entityize($r_doc->documentElement->toString);
+ $content .= $U->entityize($U->ampersize($r_doc->documentElement->toString));
$content .= "</hold:volume>";
}
@@ -101,13 +103,13 @@
return (
"Content-type: text/html\n\n",
- entityize(
+ $U->entityize($U->ampersize(
$cn_browse_xslt->transform(
$parser->parse_string( $xml ),
'prev' => "'$p'",
'next' => "'$n'"
)->toString(1)
- )
+ ))
);
};
@@ -451,7 +453,7 @@
$feed->link( unapi => $base) if ($flesh_feed);
print "Content-type: ". $feed->type ."; charset=utf-8\n\n";
- print entityize($feed->toString) . "\n";
+ print $U->entityize($U->ampersize($feed->toString)) . "\n";
return Apache2::Const::OK;
}
@@ -697,7 +699,7 @@
$feed->link( unapi => $base) if ($flesh_feed);
print "Content-type: ". $feed->type ."; charset=utf-8\n\n";
- print entityize($feed->toString) . "\n";
+ print $U->entityize($U->ampersize($feed->toString)) . "\n";
return Apache2::Const::OK;
}
@@ -722,7 +724,7 @@
}
print "Content-type: application/xml; charset=utf-8\n\n";
- print entityize( $parser->parse_string( $req->gather(1) )->documentElement->toString );
+ print $U->entityize($U->ampersize( $parser->parse_string( $req->gather(1) )->documentElement->toString ));
return Apache2::Const::OK;
}
@@ -798,7 +800,7 @@
print "Content-type: ". $feed->type ."; charset=utf-8\n\n";
- print entityize($feed->toString) . "\n";
+ print $U->entityize($U->ampersize($feed->toString)) . "\n";
return Apache2::Const::OK;
}
@@ -867,7 +869,7 @@
print "Content-type: ". $feed->type ."; charset=utf-8\n\n";
- print entityize($feed->toString) . "\n";
+ print $U->entityize($U->ampersize($feed->toString)) . "\n";
return Apache2::Const::OK;
}
@@ -1244,13 +1246,6 @@
return $feed;
}
-sub entityize {
- my $stuff = NFC(shift());
- $stuff =~ s/&(?!\S+;)/&/gso;
- $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
- return $stuff;
-}
-
sub string_browse {
my $apache = shift;
return Apache2::Const::DECLINED if (-e $apache->filename);
@@ -1636,7 +1631,7 @@
}
print $cgi->header( -type => 'application/xml' );
- print entityize($resp->asXML) . "\n";
+ print $U->entityize($U->ampersize($resp->asXML)) . "\n";
return Apache2::Const::OK;
}
More information about the open-ils-commits
mailing list