[open-ils-commits] r8944 - trunk/Open-ILS/src/sql/Pg

svn at svn.open-ils.org svn at svn.open-ils.org
Sun Mar 9 21:39:22 EDT 2008


Author: miker
Date: 2008-03-09 21:06:23 -0400 (Sun, 09 Mar 2008)
New Revision: 8944

Modified:
   trunk/Open-ILS/src/sql/Pg/020.schema.functions.sql
Log:
some normalization functions for use in in-DB ingest, when it happens

Modified: trunk/Open-ILS/src/sql/Pg/020.schema.functions.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/020.schema.functions.sql	2008-03-10 01:03:32 UTC (rev 8943)
+++ trunk/Open-ILS/src/sql/Pg/020.schema.functions.sql	2008-03-10 01:06:23 UTC (rev 8944)
@@ -17,9 +17,12 @@
 $$ LANGUAGE SQL STRICT IMMUTABLE;
 
 CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $func$
+    use Unicode::Normalize;
+
 	my $txt = lc(shift);
 	my $sf = shift;
 
+    $txt = NFD($txt);
 	$txt =~ s/\pM+//go;	# Remove diacritics
 
 	$txt =~ s/\xE6/AE/go;	# Convert ae digraph
@@ -33,7 +36,7 @@
 	$txt =~ tr/\x{2113}\xF0\!\"\(\)\-\{\}\<\>\;\:\.\?\xA1\xBF\/\\\@\*\%\=\xB1\+\xAE\xA9\x{2117}\$\xA3\x{FFE1}\xB0\^\_\~\`/LD /;	# Convert Misc
 	$txt =~ tr/\'\[\]\|//d;							# Remove Misc
 
-	if ($sf =~ /^a/o) {
+	if ($sf && $sf =~ /^a/o) {
 		my $commapos = index($txt,',');
 		if ($commapos > -1) {
 			if ($commapos != length($txt) - 1) {
@@ -59,6 +62,36 @@
 	SELECT public.naco_normalize($1,'');
 $func$ LANGUAGE 'sql' STRICT IMMUTABLE;
 
+CREATE OR REPLACE FUNCTION public.normalize_space( TEXT ) RETURNS TEXT AS $$
+    SELECT regexp_replace(regexp_replace(regexp_replace($1, E'\\n', ' ', 'g'), E'(?:^\\s+)|(\\s+$)', '', 'g'), E'\\s+', ' ', 'g');
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION public.lowercase( TEXT ) RETURNS TEXT AS $$
+    return lc(shift);
+$$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION public.uppercase( TEXT ) RETURNS TEXT AS $$
+    return uc(shift);
+$$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION public.remove_diacritics( TEXT ) RETURNS TEXT AS $$
+    use Unicode::Normalize;
+
+    my $x = NFD(shift);
+    $x =~ s/\pM+//go;
+    return $x;
+
+$$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION public.entityize( TEXT ) RETURNS TEXT AS $$
+    use Unicode::Normalize;
+
+    my $x = NFC(shift);
+    $x =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
+    return $x;
+
+$$ LANGUAGE PLPERLU;
+
 CREATE OR REPLACE FUNCTION public.call_number_dewey( TEXT ) RETURNS TEXT AS $$
 	my $txt = shift;
 	$txt =~ s/^\s+//o;



More information about the open-ils-commits mailing list