[open-ils-commits] r12664 - in trunk/Open-ILS/src: extras/import perlmods/OpenILS/Application perlmods/OpenILS/Application/Search perlmods/OpenILS/WWW (dbs)

svn at svn.open-ils.org svn at svn.open-ils.org
Tue Mar 24 23:30:10 EDT 2009


Author: dbs
Date: 2009-03-24 23:30:07 -0400 (Tue, 24 Mar 2009)
New Revision: 12664

Modified:
   trunk/Open-ILS/src/extras/import/marc2are.pl
   trunk/Open-ILS/src/extras/import/marc2bre.pl
   trunk/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm
   trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Z3950.pm
   trunk/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm
   trunk/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm
Log:
Dedupe code by moving to a common implementation of entityize()
Well, almost common. Perhaps we should make strip_ctrl_chars() and ampersize() a standard part of entityize.


Modified: trunk/Open-ILS/src/extras/import/marc2are.pl
===================================================================
--- trunk/Open-ILS/src/extras/import/marc2are.pl	2009-03-25 03:14:33 UTC (rev 12663)
+++ trunk/Open-ILS/src/extras/import/marc2are.pl	2009-03-25 03:30:07 UTC (rev 12664)
@@ -69,7 +69,7 @@
 	$xml =~ s/^<\?xml.+\?\s*>//go;
 	$xml =~ s/>\s+</></go;
 	$xml =~ s/\p{Cc}//go;
-	$xml = entityize($xml,'D');
+	$xml = OpenILS::Application::AppUtils->entityize($xml,'D');
 	$xml =~ s/[\x00-\x1f]//go;
 
 	my $bib = new Fieldmapper::authority::record_entry;
@@ -124,17 +124,3 @@
         return $authtoken;
 }       
 
-sub entityize {
-        my $stuff = shift;
-        my $form = shift;
-
-        if ($form and $form eq 'D') {
-                $stuff = NFD($stuff);
-        } else {
-                $stuff = NFC($stuff);
-        }
-
-        $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
-        return $stuff;
-}
-

Modified: trunk/Open-ILS/src/extras/import/marc2bre.pl
===================================================================
--- trunk/Open-ILS/src/extras/import/marc2bre.pl	2009-03-25 03:14:33 UTC (rev 12663)
+++ trunk/Open-ILS/src/extras/import/marc2bre.pl	2009-03-25 03:30:07 UTC (rev 12664)
@@ -8,6 +8,7 @@
 use OpenILS::Utils::Fieldmapper;
 use Digest::MD5 qw/md5_hex/;
 use OpenSRF::Utils::JSON;
+use OpenILS::Application::AppUtils;
 use Data::Dumper;
 use Unicode::Normalize;
 use Encode;
@@ -260,7 +261,7 @@
 	$xml =~ s/^<\?xml.+\?\s*>//go;
 	$xml =~ s/>\s+</></go;
 	$xml =~ s/\p{Cc}//go;
-	$xml = entityize($xml,'D');
+	$xml = OpenILS::Application::AppUtils->entityize($xml,'D');
 	$xml =~ s/[\x00-\x1f]//go;
 
 	my $bib = new Fieldmapper::biblio::record_entry;
@@ -392,20 +393,6 @@
 	return ($field901, $tcn_value, $tcn_source);
 }
 
-sub entityize {
-        my $stuff = shift;
-        my $form = shift;
-
-        if ($form and $form eq 'D') {
-                $stuff = NFD($stuff);
-        } else {
-                $stuff = NFC($stuff);
-        }
-
-        $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
-        return $stuff;
-}
-
 sub despace {
 	my $value = shift;
 

Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm	2009-03-25 03:14:33 UTC (rev 12663)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm	2009-03-25 03:30:07 UTC (rev 12664)
@@ -1399,7 +1399,23 @@
 	return $string;
 }
 
+# x0000-x0008 isn't legal in XML documents
+# XXX Perhaps this should just go into our standard entityize method
+sub strip_ctrl_chars {
+	my ($self, $string) = @_;
 
+	$string =~ s/([\x{0000}-\x{0008}])//sgoe; 
+	return $string;
+}
+
+# Ampersands are special, mmmkay?
+# XXX Perhaps this should go into our standard entityize method
+sub ampersize {
+	my $stuff = shift();
+	$stuff =~ s/&(?!\S+;)/&amp;/gso;
+	return $stuff;
+}
+
 sub get_copy_price {
 	my($self, $e, $copy, $volume) = @_;
 

Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Z3950.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Z3950.pm	2009-03-25 03:14:33 UTC (rev 12663)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/Search/Z3950.pm	2009-03-25 03:30:07 UTC (rev 12664)
@@ -379,9 +379,11 @@
                 die "Unsupported record transmission format $tformat"
             }
 
-			$marcs	= entityize($marc->as_xml_record);
+			$marcs	= $U->entityize($marc->as_xml_record);
+			$marcs	= $U->strip_ctrl_chars($marcs);
 			my $doc	= XML::LibXML->new->parse_string($marcs);
-			$marcxml = entityize( $doc->documentElement->toString );
+			$marcxml = $U->entityize($doc->documentElement->toString);
+			$marcxml = $U->strip_ctrl_chars($marcxml);
 	
 			my $u = OpenILS::Utils::ModsParser->new();
 			$u->start_mods_batch( $marcxml );
@@ -434,28 +436,4 @@
 	return $str;
 }
 
-
-
-# -------------------------------------------------------------------
-# Handles the unicode
-# -------------------------------------------------------------------
-sub entityize {
-	my $stuff = shift;
-	my $form = shift || "";
-	
-	if ($form eq 'D') {
-		$stuff = NFD($stuff);
-	} else {
-		$stuff = NFC($stuff);
-	}
-	
-	$stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
-
-	# strip some other unfriendly chars that may leak in
-   $stuff =~ s/([\x{0000}-\x{0008}])//sgoe; 
-
-	return $stuff;
-}
-
-
 1;

Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm	2009-03-25 03:14:33 UTC (rev 12663)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm	2009-03-25 03:30:07 UTC (rev 12664)
@@ -23,6 +23,8 @@
 # ... and this is our OpenILS object (en|de)coder and psuedo-ORM package.
 use OpenILS::Utils::Fieldmapper;
 
+# ... and this has some handy common methods
+use OpenILS::Application::AppUtils;
 
 # We'll be working with XML, so...
 use XML::LibXML;
@@ -39,6 +41,8 @@
   %holdings_data_cache,
 );
 
+my $U = 'OpenILS::Application::AppUtils';
+
 sub child_init {
 	# we need an XML parser
 	$_parser = new XML::LibXML;
@@ -217,13 +221,6 @@
 	}
 }
 
-
-sub entityize {
-	my $stuff = NFC(shift());
-	$stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
-	return $stuff;
-}
-
 sub tree_walker {
 	my $tree = shift;
 	my $field = shift;
@@ -880,7 +877,7 @@
 	my $_storage = OpenSRF::AppSession->create( 'open-ils.cstore' );
 
 	my $record = $_storage->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rid )->gather(1);
-	return entityize( $record->marc ) if ($record);
+	return $U->entityize( $record->marc ) if ($record);
 	return undef;
 }
 
@@ -920,7 +917,7 @@
 	return undef unless (@$recs);
 
 	my $record = $_storage->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $recs->[0]->record )->gather(1);
-	return entityize( $record->marc ) if ($record);
+	return $U->entityize( $record->marc ) if ($record);
 	return undef;
 }
 
@@ -962,7 +959,7 @@
 
 	return undef unless ($record);
 
-	return entityize($record_xslt{$transform}{xslt}->transform( $_parser->parse_string( $record->marc ) )->toString);
+	return $U->entityize($record_xslt{$transform}{xslt}->transform( $_parser->parse_string( $record->marc ) )->toString);
 }
 
 sub retrieve_isbn_transform {
@@ -985,7 +982,7 @@
 
 	return undef unless ($record);
 
-	return entityize($record_xslt{$transform}{xslt}->transform( $_parser->parse_string( $record->marc ) )->toString);
+	return $U->entityize($record_xslt{$transform}{xslt}->transform( $_parser->parse_string( $record->marc ) )->toString);
 }
 
 sub retrieve_record_objects {
@@ -1215,7 +1212,7 @@
 
 	$_storage->disconnect;
 
-	return entityize($mods->toString);
+	return $U->entityize($mods->toString);
 
 }
 __PACKAGE__->register_method(

Modified: trunk/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm	2009-03-25 03:14:33 UTC (rev 12663)
+++ trunk/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm	2009-03-25 03:30:07 UTC (rev 12664)
@@ -25,11 +25,13 @@
 use OpenILS::Utils::Fieldmapper;
 use OpenILS::WWW::SuperCat::Feed;
 use OpenSRF::Utils::Logger qw/$logger/;
+use OpenILS::Application::AppUtils;
 
 use MARC::Record;
 use MARC::File::XML;
 
 my $log = 'OpenSRF::Utils::Logger';
+my $U = 'OpenILS::Application::AppUtils';
 
 # set the bootstrap config when this module is loaded
 my ($bootstrap, $supercat, $actor, $parser, $search, $xslt, $cn_browse_xslt, %browse_types);
@@ -72,7 +74,7 @@
 
 		my $r_doc = $parser->parse_string($cn->record->marc);
 		$r_doc->documentElement->setAttribute( id => $rec_tag );
-		$content .= entityize($r_doc->documentElement->toString);
+		$content .= $U->entityize($U->ampersize($r_doc->documentElement->toString));
 
 		$content .= "</hold:volume>";
 	}
@@ -101,13 +103,13 @@
 
 	return (
 		"Content-type: text/html\n\n",
-		entityize(
+		$U->entityize($U->ampersize(
 			$cn_browse_xslt->transform(
 				$parser->parse_string( $xml ),
 				'prev' => "'$p'",
 				'next' => "'$n'"
 			)->toString(1)
-		)
+		))
 	);
 };
 
@@ -451,7 +453,7 @@
 		$feed->link( unapi => $base) if ($flesh_feed);
 
 		print "Content-type: ". $feed->type ."; charset=utf-8\n\n";
-		print entityize($feed->toString) . "\n";
+		print $U->entityize($U->ampersize($feed->toString)) . "\n";
 
 		return Apache2::Const::OK;
 	}
@@ -697,7 +699,7 @@
 		$feed->link( unapi => $base) if ($flesh_feed);
 
 		print "Content-type: ". $feed->type ."; charset=utf-8\n\n";
-		print entityize($feed->toString) . "\n";
+		print $U->entityize($U->ampersize($feed->toString)) . "\n";
 
 		return Apache2::Const::OK;
 	}
@@ -722,7 +724,7 @@
 	}
 
 	print "Content-type: application/xml; charset=utf-8\n\n";
-	print entityize( $parser->parse_string( $req->gather(1) )->documentElement->toString );
+	print $U->entityize($U->ampersize( $parser->parse_string( $req->gather(1) )->documentElement->toString ));
 
 	return Apache2::Const::OK;
 }
@@ -798,7 +800,7 @@
 
 
 	print "Content-type: ". $feed->type ."; charset=utf-8\n\n";
-	print entityize($feed->toString) . "\n";
+	print $U->entityize($U->ampersize($feed->toString)) . "\n";
 
 	return Apache2::Const::OK;
 }
@@ -867,7 +869,7 @@
 
 
 	print "Content-type: ". $feed->type ."; charset=utf-8\n\n";
-	print entityize($feed->toString) . "\n";
+	print $U->entityize($U->ampersize($feed->toString)) . "\n";
 
 	return Apache2::Const::OK;
 }
@@ -1244,13 +1246,6 @@
 	return $feed;
 }
 
-sub entityize {
-	my $stuff = NFC(shift());
-	$stuff =~ s/&(?!\S+;)/&amp;/gso;
-	$stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
-	return $stuff;
-}
-
 sub string_browse {
 	my $apache = shift;
 	return Apache2::Const::DECLINED if (-e $apache->filename);
@@ -1636,7 +1631,7 @@
 	}
 
    	print $cgi->header( -type => 'application/xml' );
-   	print entityize($resp->asXML) . "\n";
+   	print $U->entityize($U->ampersize($resp->asXML)) . "\n";
     return Apache2::Const::OK;
 }
 



More information about the open-ils-commits mailing list