[open-ils-commits] r15283 - in branches/rel_1_6_0/Open-ILS: examples src/perlmods/OpenILS src/perlmods/OpenILS/SIP (dbs)

svn at svn.open-ils.org svn at svn.open-ils.org
Fri Jan 8 10:28:30 EST 2010


Author: dbs
Date: 2010-01-08 10:28:28 -0500 (Fri, 08 Jan 2010)
New Revision: 15283

Modified:
   branches/rel_1_6_0/Open-ILS/examples/oils_sip.xml.example
   branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/SIP.pm
   branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/SIP/Item.pm
   branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/SIP/Patron.pm
Log:
Backport SIP Server patches from trunk: r15260, r15267, r15268

r15260: Enable the SIP server to speak UTF8 or ASCII, defaulting to ASCII

Some SIP clients have implemented UTF8 support, and it is desirable to
avoid mangling non-ASCII characters when possible, so a new <encoding>
option in the SIPServer config file enables sites to specify an
alternate encoding (such as "utf8"). The example oils_sip.xml file
will continue to default to ASCII, and if no <encoding> option is
supplied in the config file, ASCII will be the default.

In addition, this patch improves the ASCII option by applying the
ASCII mangling to all text fields, as well as ensuring that the
incoming data is in Normalization Form D mode to make the mangling
work cleanly.

Other encodings such as iso-8859-1 can be specified, but to properly
handle other encodings a transliteration phase would have to be added
to the OpenILS::SIP::clean_text()...

r15267: Typo: in theory fixing this will eliminate some calls

r15268: We don't actually have access to the config from $self, but we can get it from OpenILS::SIP 


Modified: branches/rel_1_6_0/Open-ILS/examples/oils_sip.xml.example
===================================================================
--- branches/rel_1_6_0/Open-ILS/examples/oils_sip.xml.example	2010-01-08 15:27:21 UTC (rev 15282)
+++ branches/rel_1_6_0/Open-ILS/examples/oils_sip.xml.example	2010-01-08 15:28:28 UTC (rev 15283)
@@ -57,6 +57,17 @@
 				<bootstrap>SYSCONFDIR/opensrf_core.xml</bootstrap>
 				<currency>USD</currency>
 
+				<!-- The default encoding defined in the SIP specification is -->
+				<!-- ASCII, which isn't great for French, Spanish, Armenian. -->
+				<!-- You can specify a different encoding here, based on the -->
+				<!-- encodings supported by your SIP client and your Encode -->
+				<!-- module; run the following command to get a list of supported -->
+				<!-- encodings: -->
+				<!--   perl -MEncode -le "print for Encode->encodings(':all')" -->
+
+				<!-- UTF-8 is the recommended encoding if your SIP client supports it -->
+				<encoding>ascii</encoding>
+
 				<!-- These defines what this SIP code has the ability to support -->
 				<supports>
 					<item name='magnetic media' value='true'/>

Modified: branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/SIP/Item.pm
===================================================================
--- branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/SIP/Item.pm	2010-01-08 15:27:21 UTC (rev 15282)
+++ branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/SIP/Item.pm	2010-01-08 15:28:28 UTC (rev 15283)
@@ -147,18 +147,19 @@
 sub title_id {
     my $self = shift;
     my $t =  ($self->{mods}) ? $self->{mods}->title : $self->{copy}->dummy_title;
-    $t =~ s/\pM+//og;
+    $t = OpenILS::SIP::clean_text($t);
+
     return $t;
 }
 
 sub permanent_location {
     my $self = shift;
-	 return $self->{volume}->owning_lib->name;
+    return OpenILS::SIP::clean_text($self->{volume}->owning_lib->name);
 }
 
 sub current_location {
     my $self = shift;
-	 return $self->{copy}->circ_lib->name;
+    return OpenILS::SIP::clean_text($self->{copy}->circ_lib->name);
 }
 
 
@@ -214,7 +215,7 @@
 
 sub owner {
     my $self = shift;
-	 return $self->{volume}->owning_lib->name;
+    return OpenILS::SIP::clean_text($self->{volume}->owning_lib->name);
 }
 
 sub hold_queue {
@@ -263,14 +264,14 @@
 # message to display on console
 sub screen_msg {
     my $self = shift;
-    return $self->{screen_msg} || '';
+    return OpenILS::SIP::clean_text($self->{screen_msg}) || '';
 }
 
 
 # reciept printer
 sub print_line {
      my $self = shift;
-     return $self->{print_line} || '';
+     return OpenILS::SIP::clean_text($self->{print_line}) || '';
 }
 
 

Modified: branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/SIP/Patron.pm
===================================================================
--- branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/SIP/Patron.pm	2010-01-08 15:27:21 UTC (rev 15282)
+++ branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/SIP/Patron.pm	2010-01-08 15:28:28 UTC (rev 15283)
@@ -92,9 +92,9 @@
 
 sub name {
     my $self = shift;
-	 my $u = $self->{user};
-	 return $u->first_given_name . ' ' . 
-		$u->second_given_name . ' ' . $u->family_name;
+    my $u = $self->{user};
+    return OpenILS::SIP::clean_text($u->first_given_name . ' ' . 
+            $u->second_given_name . ' ' . $u->family_name);
 }
 
 sub home_library {
@@ -105,15 +105,15 @@
 }
 
 sub __addr_string {
-	my $addr = shift;
-	return "" unless $addr;
-	return $addr->street1 .' '. 
-		$addr->street2 .' '.
-		$addr->city .' '.
-		$addr->county .' '.
-		$addr->state .' '.
-		$addr->country .' '.
-		$addr->post_code;
+    my $addr = shift;
+    return "" unless $addr;
+    return OpenILS::SIP::clean_text($addr->street1 .' '. 
+        $addr->street2 .' '.
+        $addr->city .' '.
+        $addr->county .' '.
+        $addr->state .' '.
+        $addr->country .' '.
+        $addr->post_code);
 }
 
 sub address {
@@ -128,12 +128,12 @@
 
 sub email_addr {
     my $self = shift;
-	return $self->{user}->email;
+    return OpenILS::SIP::clean_text($self->{user}->email);
 }
 
 sub home_phone {
     my $self = shift;
-	return $self->{user}->day_phone;
+    return $self->{user}->day_phone;
 }
 
 sub sip_birthdate {
@@ -145,7 +145,7 @@
 
 sub ptype {
     my $self = shift;
-	return $self->{user}->profile->name;
+    return OpenILS::SIP::clean_text($self->{user}->profile->name);
 }
 
 sub language {
@@ -323,7 +323,7 @@
 	 );
 
 	my @holds;
-	push( @holds, $self->__hold_to_title($_) ) for @$holds;
+	push( @holds, OpenILS::SIP::clean_text($self->__hold_to_title($_)) ) for @$holds;
 
 	return (defined $start and defined $end) ? 
 		[ $holds[($start-1)..($end-1)] ] : 
@@ -402,7 +402,7 @@
 
 sub __patron_items_info {
 	my $self = shift;
-	return if $self->{items_info};
+	return if $self->{item_info};
 	$self->{item_info} = 
 		OpenILS::Application::Actor::_checked_out(
 			0, $self->{editor}, $self->{user}->id);;
@@ -419,14 +419,14 @@
 	syslog('LOG_DEBUG', "OILS: overdue_items() fleshing circs @overdues");
 	
 	
-	my @return_datatype = grep { $_->{name} eq 'msg64_summary_datatype' } @{$self->{config}->{implementation_config}->{options}->{option}};
+	my @return_datatype = grep { $_->{name} eq 'msg64_summary_datatype' } OpenILS::SIP::config()->{implementation_config}->{options}->{option};
 	
 	for my $circid (@overdues) {
 		next unless $circid;
 		if(@return_datatype and $return_datatype[0]->{value} eq 'barcode') {
 			push( @o, __circ_to_barcode($self->{editor}, $circid));
 		} else {
-			push( @o, __circ_to_title($self->{editor}, $circid));
+			push( @o, OpenILS::SIP::clean_text(__circ_to_title($self->{editor}, $circid)));
 		}
 	}
 	@overdues = @o;
@@ -466,14 +466,14 @@
 	my @c;
 	syslog('LOG_DEBUG', "OILS: charged_items() fleshing circs @charges");
 
-	my @return_datatype = grep { $_->{name} eq 'msg64_summary_datatype' } @{$self->{config}->{implementation_config}->{options}->{option}};
+	my @return_datatype = grep { $_->{name} eq 'msg64_summary_datatype' } OpenILS::SIP::config()->{implementation_config}->{options}->{option};
 
 	for my $circid (@charges) {
 		next unless $circid;
 		if(@return_datatype and $return_datatype[0]->{value} eq 'barcode') {
 			push( @c, __circ_to_barcode($self->{editor}, $circid));
 		} else {
-			push( @c, __circ_to_title($self->{editor}, $circid));
+			push( @c, OpenILS::SIP::clean_text(__circ_to_title($self->{editor}, $circid)));
 		}
 	}
 
@@ -525,7 +525,7 @@
 
 	# retrieve the un-fleshed user object for update
 	$u = $e->retrieve_actor_user($u->id);
-	my $note = $u->alert_message || "";
+	my $note = OpenILS::SIP::clean_text($u->alert_message) || "";
 	$note = "CARD BLOCKED BY SELF-CHECK MACHINE\n$note"; # XXX Config option
 
 	$u->alert_message($note);
@@ -569,7 +569,7 @@
 	my $e = OpenILS::SIP->editor();
 	$INET_PRIVS = $e->retrieve_all_config_net_access_level() unless $INET_PRIVS;
 	my ($level) = grep { $_->id eq $self->{user}->net_access_level } @$INET_PRIVS;
-	my $name = $level->name;
+	my $name = OpenILS::SIP::clean_text($level->name);
 	syslog('LOG_DEBUG', "OILS: Patron inet_privs = $name");
 	return $name;
 }

Modified: branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/SIP.pm
===================================================================
--- branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/SIP.pm	2010-01-08 15:27:21 UTC (rev 15282)
+++ branches/rel_1_6_0/Open-ILS/src/perlmods/OpenILS/SIP.pm	2010-01-08 15:28:28 UTC (rev 15283)
@@ -20,10 +20,13 @@
 use OpenILS::Application::AppUtils;
 use OpenSRF::Utils qw/:datetime/;
 use DateTime::Format::ISO8601;
+use Encode;
+use Unicode::Normalize;
 my $U = 'OpenILS::Application::AppUtils';
 
 my $editor;
 my $config;
+my $target_encoding;
 
 use Digest::MD5 qw(md5_hex);
 
@@ -39,6 +42,7 @@
 	$self->{institution} = $institution;
 
 	my $bsconfig = $institution->{implementation_config}->{bootstrap};
+	$target_encoding = $institution->{implementation_config}->{encoding} || 'ascii';
 
 	syslog('LOG_DEBUG', "OILS: loading bootstrap config: $bsconfig");
 	
@@ -104,6 +108,48 @@
 	return $e;
 }
 
+=head2 clean_text(scalar)
+
+Evergreen uses the UTF8 encoding for everything from the database up. Perl
+doesn't know this, however, so we have to convince it to treat our UTF8 strings
+as UTF8 strings. This may enable OpenNCIP to correctly calculate the checksums
+for UTF8 text for SIP clients that support such modern options.
+
+The target encoding is set in the <encoding> element of the SIPServer.pm
+configuration file.
+
+=cut
+
+sub clean_text {
+    my $text = shift || '';
+
+    # Convert our incoming UTF8 data into Perl's internal string format
+
+    # Also convert to Normalization Form D, as the ASCII, iso-8859-1,
+    # and latin-1 encodings (at least) require this to substitute
+    # characters rather than simply returning a string truncated
+    # after the first non-ASCII character
+    $text = NFD(decode_utf8($text));
+
+    if ($target_encoding eq 'ascii') {
+
+        # Try to maintain a reasonable version of the content by
+        # stripping diacritics from the text, given that the SIP client
+        # wants just plain ASCII. This is the base requirement according
+        # to the SIP2 specification.
+
+        # Stripping the combining characters converts ""béè♁ts"
+        # into "bee?ts" instead of "b???ts" - better, eh?
+        $text =~ s/\pM+//og;
+    }
+
+    # Characters that cannot be represented in the target encoding will
+    # generally be replaced with a question mark (?) character.
+    $text = encode($target_encoding, $text);
+
+    return $text;
+}
+
 sub format_date {
 	my $class = shift;
 	my $date = shift;



More information about the open-ils-commits mailing list