[open-ils-commits] r8944 - trunk/Open-ILS/src/sql/Pg
svn at svn.open-ils.org
svn at svn.open-ils.org
Sun Mar 9 21:39:22 EDT 2008
Author: miker
Date: 2008-03-09 21:06:23 -0400 (Sun, 09 Mar 2008)
New Revision: 8944
Modified:
trunk/Open-ILS/src/sql/Pg/020.schema.functions.sql
Log:
some normalization functions for use in in-DB ingest, when it happens
Modified: trunk/Open-ILS/src/sql/Pg/020.schema.functions.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/020.schema.functions.sql 2008-03-10 01:03:32 UTC (rev 8943)
+++ trunk/Open-ILS/src/sql/Pg/020.schema.functions.sql 2008-03-10 01:06:23 UTC (rev 8944)
@@ -17,9 +17,12 @@
$$ LANGUAGE SQL STRICT IMMUTABLE;
CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $func$
+ use Unicode::Normalize;
+
my $txt = lc(shift);
my $sf = shift;
+ $txt = NFD($txt);
$txt =~ s/\pM+//go; # Remove diacritics
$txt =~ s/\xE6/AE/go; # Convert ae digraph
@@ -33,7 +36,7 @@
$txt =~ tr/\x{2113}\xF0\!\"\(\)\-\{\}\<\>\;\:\.\?\xA1\xBF\/\\\@\*\%\=\xB1\+\xAE\xA9\x{2117}\$\xA3\x{FFE1}\xB0\^\_\~\`/LD /; # Convert Misc
$txt =~ tr/\'\[\]\|//d; # Remove Misc
- if ($sf =~ /^a/o) {
+ if ($sf && $sf =~ /^a/o) {
my $commapos = index($txt,',');
if ($commapos > -1) {
if ($commapos != length($txt) - 1) {
@@ -59,6 +62,36 @@
SELECT public.naco_normalize($1,'');
$func$ LANGUAGE 'sql' STRICT IMMUTABLE;
+CREATE OR REPLACE FUNCTION public.normalize_space( TEXT ) RETURNS TEXT AS $$
+ SELECT regexp_replace(regexp_replace(regexp_replace($1, E'\\n', ' ', 'g'), E'(?:^\\s+)|(\\s+$)', '', 'g'), E'\\s+', ' ', 'g');
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION public.lowercase( TEXT ) RETURNS TEXT AS $$
+ return lc(shift);
+$$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION public.uppercase( TEXT ) RETURNS TEXT AS $$
+ return uc(shift);
+$$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION public.remove_diacritics( TEXT ) RETURNS TEXT AS $$
+ use Unicode::Normalize;
+
+ my $x = NFD(shift);
+ $x =~ s/\pM+//go;
+ return $x;
+
+$$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION public.entityize( TEXT ) RETURNS TEXT AS $$
+ use Unicode::Normalize;
+
+ my $x = NFC(shift);
+ $x =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
+ return $x;
+
+$$ LANGUAGE PLPERLU;
+
CREATE OR REPLACE FUNCTION public.call_number_dewey( TEXT ) RETURNS TEXT AS $$
my $txt = shift;
$txt =~ s/^\s+//o;
More information about the open-ils-commits
mailing list