[open-ils-commits] r8942 - trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage

svn at svn.open-ils.org svn at svn.open-ils.org
Sun Mar 9 21:18:34 EDT 2008


Author: miker
Date: 2008-03-09 20:45:35 -0400 (Sun, 09 Mar 2008)
New Revision: 8942

Modified:
   trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/FTS.pm
Log:
NACO normalization is handy to have around

Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/FTS.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/FTS.pm	2008-03-10 00:19:58 UTC (rev 8941)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/Storage/FTS.pm	2008-03-10 00:45:35 UTC (rev 8942)
@@ -5,6 +5,7 @@
 package OpenILS::Application::Storage::FTS;
 use OpenSRF::Utils::Logger qw/:level/;
 use Parse::RecDescent;
+use Unicode::Normalize;
 
 my $_default_grammar_parser = new Parse::RecDescent ( <<'GRAMMAR' );
 
@@ -26,6 +27,49 @@
 
 GRAMMAR
 
+sub naco_normalize {
+
+    my $txt = lc(shift);
+    my $sf = shift;
+
+    $txt = NFD($txt);
+    $txt =~ s/\pM+//go; # Remove diacritics
+
+    $txt =~ s/\xE6/AE/go;   # Convert ae digraph
+    $txt =~ s/\x{153}/OE/go;# Convert oe digraph
+    $txt =~ s/\xFE/TH/go;   # Convert Icelandic thorn
+
+    $txt =~ tr/\x{2070}\x{2071}\x{2072}\x{2073}\x{2074}\x{2075}\x{2076}\x{2077}\x{2078}\x{2079}\x{207A}\x{207B}/0123456789+-/;# Convert superscript numbers
+    $txt =~ tr/\x{2080}\x{2081}\x{2082}\x{2083}\x{2084}\x{2085}\x{2086}\x{2087}\x{2088}\x{2089}\x{208A}\x{208B}/0123456889+-/;# Convert subscript numbers
+
+    $txt =~ tr/\x{0251}\x{03B1}\x{03B2}\x{0262}\x{03B3}/AABGG/;     # Convert Latin and Greek
+    $txt =~ tr/\x{2113}\xF0\!\"\(\)\-\{\}\<\>\;\:\.\?\xA1\xBF\/\\\@\*\%\=\xB1\+\xAE\xA9\x{2117}\$\xA3\x{FFE1}\xB0\^\_\~\`/LD /; # Convert Misc
+    $txt =~ tr/\'\[\]\|//d;                         # Remove Misc
+
+    if ($sf && $sf =~ /^a/o) {
+        my $commapos = index($txt,',');
+        if ($commapos > -1) {
+            if ($commapos != length($txt) - 1) {
+                my @list = split /,/, $txt;
+                my $first = shift @list;
+                $txt = $first . ',' . join(' ', @list);
+            } else {
+                $txt =~ s/,/ /go;
+            }
+        }
+    } else {
+        $txt =~ s/,/ /go;
+    }
+
+    $txt =~ s/\s+/ /go; # Compress multiple spaces
+    $txt =~ s/^\s+//o;  # Remove leading space
+    $txt =~ s/\s+$//o;  # Remove trailing space
+
+    return $txt;
+}
+
+#' stupid vim syntax highlighting ...
+
 sub compile {
 
 	$log->debug("You must override me somewhere, or I will make searching really slow!!!!",ERROR);;



More information about the open-ils-commits mailing list