[open-ils-commits] [GIT] Evergreen ILS branch master updated. 9bdb4c597ed8adbfec115c528d3d348bf595e070

Evergreen Git git at git.evergreen-ils.org
Mon Feb 27 10:24:21 EST 2017


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Evergreen ILS".

The branch, master has been updated
       via  9bdb4c597ed8adbfec115c528d3d348bf595e070 (commit)
       via  d318a3afa3f543e4dd4006621709f8eb54656ae5 (commit)
       via  24b31926958715692dcbb0538099a26152f53bca (commit)
      from  a989b3b70295fad8fdeeacf5f181d10b2ed24f87 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 9bdb4c597ed8adbfec115c528d3d348bf595e070
Author: Galen Charlton <gmc at equinoxinitiative.org>
Date:   Mon Feb 27 10:22:48 2017 -0500

    LP#1607487: stamp schema update
    
    Signed-off-by: Galen Charlton <gmc at equinoxinitiative.org>

diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index 06928cd..5f52ec3 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps
     BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
     FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
 
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1028', :eg_version); -- jeffdavis/kmlussier
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1029', :eg_version); -- csharp/gmcharlt
 
 CREATE TABLE config.bib_source (
 	id		SERIAL	PRIMARY KEY,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.data.update-broken-naco-links.sql b/Open-ILS/src/sql/Pg/upgrade/1029.schema.update-broken-naco-links.sql
similarity index 98%
rename from Open-ILS/src/sql/Pg/upgrade/XXXX.data.update-broken-naco-links.sql
rename to Open-ILS/src/sql/Pg/upgrade/1029.schema.update-broken-naco-links.sql
index 9b0820f..a024d74 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.data.update-broken-naco-links.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/1029.schema.update-broken-naco-links.sql
@@ -1,6 +1,6 @@
 BEGIN;
 
-SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+SELECT evergreen.upgrade_deps_block_check('1029', :eg_version); -- csharp/gmcharlt
 
 UPDATE config.index_normalizer SET description = 'Apply NACO normalization rules to the extracted text.  See https://www.loc.gov/aba/pcc/naco/normrule-2.html for details.' WHERE func = 'naco_normalize';
 UPDATE config.index_normalizer SET description = 'Apply NACO normalization rules to the extracted text, retaining the first comma.  See https://www.loc.gov/aba/pcc/naco/normrule-2.html for details.' WHERE func = 'naco_normalize_keep_comma';

commit d318a3afa3f543e4dd4006621709f8eb54656ae5
Author: Galen Charlton <gmc at equinoxinitiative.org>
Date:   Mon Feb 27 10:19:30 2017 -0500

    LP#1607487: make schema update script use same whitespace
    
    This patch ensures that the naco_normalize() and search_normalize()
    functions preserve the exact whitespace used in the baseline
    function definitions; this makes no functional difference, but
    may help folks using diffs to compare schema versions.
    
    Signed-off-by: Galen Charlton <gmc at equinoxinitiative.org>

diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.data.update-broken-naco-links.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.data.update-broken-naco-links.sql
index ecf6dff..9b0820f 100644
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.data.update-broken-naco-links.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.data.update-broken-naco-links.sql
@@ -40,14 +40,14 @@ CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $
     # transformations based on Unicode category codes
     $str =~ s/[\p{Cc}\p{Cf}\p{Co}\p{Cs}\p{Lm}\p{Mc}\p{Me}\p{Mn}]//g;
 
-    if ($sf && $sf =~ /^a/o) {
-        my $commapos = index($str, ',');
-        if ($commapos > -1) {
-            if ($commapos != length($str) - 1) {
+	if ($sf && $sf =~ /^a/o) {
+		my $commapos = index($str, ',');
+		if ($commapos > -1) {
+			if ($commapos != length($str) - 1) {
                 $str =~ s/,/\x07/; # preserve first comma
-            }
-        }
-    }
+			}
+		}
+	}
 
     # since we've stripped out the control characters, we can now
     # use a few as placeholders temporarily
@@ -69,7 +69,8 @@ CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $
     return lc $str;
 $func$ LANGUAGE 'plperlu' STRICT IMMUTABLE;
 
-
+-- Currently, the only difference from naco_normalize is that search_normalize
+-- turns apostrophes into spaces, while naco_normalize collapses them.
 CREATE OR REPLACE FUNCTION public.search_normalize( TEXT, TEXT ) RETURNS TEXT AS $func$
 
     use strict;
@@ -105,14 +106,14 @@ CREATE OR REPLACE FUNCTION public.search_normalize( TEXT, TEXT ) RETURNS TEXT AS
     # transformations based on Unicode category codes
     $str =~ s/[\p{Cc}\p{Cf}\p{Co}\p{Cs}\p{Lm}\p{Mc}\p{Me}\p{Mn}]//g;
 
-    if ($sf && $sf =~ /^a/o) {
-        my $commapos = index($str, ',');
-        if ($commapos > -1) {
-            if ($commapos != length($str) - 1) {
+	if ($sf && $sf =~ /^a/o) {
+		my $commapos = index($str, ',');
+		if ($commapos > -1) {
+			if ($commapos != length($str) - 1) {
                 $str =~ s/,/\x07/; # preserve first comma
-            }
-        }
-    }
+			}
+		}
+	}
 
     # since we've stripped out the control characters, we can now
     # use a few as placeholders temporarily

commit 24b31926958715692dcbb0538099a26152f53bca
Author: Chris Sharp <csharp at georgialibraries.org>
Date:   Thu Aug 18 14:52:59 2016 -0400

    LP#1607487 - Fix broken URLs to NACO resources
    
    Remington Steed pointed out that a couple of URLs are now
    broken.  This corrects them.
    
    Signed-off-by: Chris Sharp <csharp at georgialibraries.org>
    Signed-off-by: Galen Charlton <gmc at equinoxinitiative.org>

diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm
index 1102d10..4770b43 100644
--- a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm
@@ -16,7 +16,7 @@ sub naco_normalize {
     my $sf = shift;
 
     # Apply NACO normalization to input string; based on
-    # http://www.loc.gov/catdir/pcc/naco/SCA_PccNormalization_Final_revised.pdf
+    # https://www.loc.gov/aba/pcc/naco/documents/SCA_PccNormalization_Final_revised.pdf 
     #
     # Note that unlike a strict reading of the NACO normalization rules,
     # output is returned as lowercase instead of uppercase for compatibility
diff --git a/Open-ILS/src/sql/Pg/002.functions.config.sql b/Open-ILS/src/sql/Pg/002.functions.config.sql
index 77876fe..6ebdf1d 100644
--- a/Open-ILS/src/sql/Pg/002.functions.config.sql
+++ b/Open-ILS/src/sql/Pg/002.functions.config.sql
@@ -611,7 +611,7 @@ CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $
     my $sf = shift;
 
     # Apply NACO normalization to input string; based on
-    # http://www.loc.gov/catdir/pcc/naco/SCA_PccNormalization_Final_revised.pdf
+    # https://www.loc.gov/aba/pcc/naco/documents/SCA_PccNormalization_Final_revised.pdf
     #
     # Note that unlike a strict reading of the NACO normalization rules,
     # output is returned as lowercase instead of uppercase for compatibility
@@ -677,7 +677,7 @@ CREATE OR REPLACE FUNCTION public.search_normalize( TEXT, TEXT ) RETURNS TEXT AS
     my $sf = shift;
 
     # Apply NACO normalization to input string; based on
-    # http://www.loc.gov/catdir/pcc/naco/SCA_PccNormalization_Final_revised.pdf
+    # https://www.loc.gov/aba/pcc/naco/documents/SCA_PccNormalization_Final_revised.pdf
     #
     # Note that unlike a strict reading of the NACO normalization rules,
     # output is returned as lowercase instead of uppercase for compatibility
diff --git a/Open-ILS/src/sql/Pg/950.data.seed-values.sql b/Open-ILS/src/sql/Pg/950.data.seed-values.sql
index a1f036d..ce5e03f 100644
--- a/Open-ILS/src/sql/Pg/950.data.seed-values.sql
+++ b/Open-ILS/src/sql/Pg/950.data.seed-values.sql
@@ -10023,7 +10023,7 @@ INSERT INTO action_trigger.hook (key,core_type,description) VALUES (
 -- in-db indexing normalizers
 INSERT INTO config.index_normalizer (name, description, func, param_count) VALUES (
 	'NACO Normalize',
-	'Apply NACO normalization rules to the extracted text.  See http://www.loc.gov/catdir/pcc/naco/normrule-2.html for details.',
+	'Apply NACO normalization rules to the extracted text.  See https://www.loc.gov/aba/pcc/naco/normrule-2.html for details.',
 	'naco_normalize',
 	0
 );
@@ -10037,7 +10037,7 @@ INSERT INTO config.index_normalizer (name, description, func, param_count) VALUE
 
 INSERT INTO config.index_normalizer (name, description, func, param_count) VALUES (
 	'NACO Normalize -- retain first comma',
-	'Apply NACO normalization rules to the extracted text, retaining the first comma.  See http://www.loc.gov/catdir/pcc/naco/normrule-2.html for details.',
+	'Apply NACO normalization rules to the extracted text, retaining the first comma.  See https://www.loc.gov/aba/pcc/naco/normrule-2.html for details.',
 	'naco_normalize_keep_comma',
 	0
 );
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.data.update-broken-naco-links.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.data.update-broken-naco-links.sql
new file mode 100644
index 0000000..ecf6dff
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.data.update-broken-naco-links.sql
@@ -0,0 +1,137 @@
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+UPDATE config.index_normalizer SET description = 'Apply NACO normalization rules to the extracted text.  See https://www.loc.gov/aba/pcc/naco/normrule-2.html for details.' WHERE func = 'naco_normalize';
+UPDATE config.index_normalizer SET description = 'Apply NACO normalization rules to the extracted text, retaining the first comma.  See https://www.loc.gov/aba/pcc/naco/normrule-2.html for details.' WHERE func = 'naco_normalize_keep_comma';
+
+CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $func$
+
+    use strict;
+    use Unicode::Normalize;
+    use Encode;
+
+    my $str = shift;
+    my $sf = shift;
+
+    # Apply NACO normalization to input string; based on
+    # https://www.loc.gov/aba/pcc/naco/documents/SCA_PccNormalization_Final_revised.pdf
+    #
+    # Note that unlike a strict reading of the NACO normalization rules,
+    # output is returned as lowercase instead of uppercase for compatibility
+    # with previous versions of the Evergreen naco_normalize routine.
+
+    # Convert to upper-case first; even though final output will be lowercase, doing this will
+    # ensure that the German eszett (ß) and certain ligatures (ff, fi, ffl, etc.) will be handled correctly.
+    # If there are any bugs in Perl's implementation of upcasing, they will be passed through here.
+    $str = uc $str;
+
+    # remove non-filing strings
+    $str =~ s/\x{0098}.*?\x{009C}//g;
+
+    $str = NFKD($str);
+
+    # additional substitutions - 3.6.
+    $str =~ s/\x{00C6}/AE/g;
+    $str =~ s/\x{00DE}/TH/g;
+    $str =~ s/\x{0152}/OE/g;
+    $str =~ tr/\x{0110}\x{00D0}\x{00D8}\x{0141}\x{2113}\x{02BB}\x{02BC}]['/DDOLl/d;
+
+    # transformations based on Unicode category codes
+    $str =~ s/[\p{Cc}\p{Cf}\p{Co}\p{Cs}\p{Lm}\p{Mc}\p{Me}\p{Mn}]//g;
+
+    if ($sf && $sf =~ /^a/o) {
+        my $commapos = index($str, ',');
+        if ($commapos > -1) {
+            if ($commapos != length($str) - 1) {
+                $str =~ s/,/\x07/; # preserve first comma
+            }
+        }
+    }
+
+    # since we've stripped out the control characters, we can now
+    # use a few as placeholders temporarily
+    $str =~ tr/+&@\x{266D}\x{266F}#/\x01\x02\x03\x04\x05\x06/;
+    $str =~ s/[\p{Pc}\p{Pd}\p{Pe}\p{Pf}\p{Pi}\p{Po}\p{Ps}\p{Sk}\p{Sm}\p{So}\p{Zl}\p{Zp}\p{Zs}]/ /g;
+    $str =~ tr/\x01\x02\x03\x04\x05\x06\x07/+&@\x{266D}\x{266F}#,/;
+
+    # decimal digits
+    $str =~ tr/\x{0660}-\x{0669}\x{06F0}-\x{06F9}\x{07C0}-\x{07C9}\x{0966}-\x{096F}\x{09E6}-\x{09EF}\x{0A66}-\x{0A6F}\x{0AE6}-\x{0AEF}\x{0B66}-\x{0B6F}\x{0BE6}-\x{0BEF}\x{0C66}-\x{0C6F}\x{0CE6}-\x{0CEF}\x{0D66}-\x{0D6F}\x{0E50}-\x{0E59}\x{0ED0}-\x{0ED9}\x{0F20}-\x{0F29}\x{1040}-\x{1049}\x{1090}-\x{1099}\x{17E0}-\x{17E9}\x{1810}-\x{1819}\x{1946}-\x{194F}\x{19D0}-\x{19D9}\x{1A80}-\x{1A89}\x{1A90}-\x{1A99}\x{1B50}-\x{1B59}\x{1BB0}-\x{1BB9}\x{1C40}-\x{1C49}\x{1C50}-\x{1C59}\x{A620}-\x{A629}\x{A8D0}-\x{A8D9}\x{A900}-\x{A909}\x{A9D0}-\x{A9D9}\x{AA50}-\x{AA59}\x{ABF0}-\x{ABF9}\x{FF10}-\x{FF19}/0-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-9/;
+
+    # intentionally skipping step 8 of the NACO algorithm; if the string
+    # gets normalized away, that's fine.
+
+    # leading and trailing spaces
+    $str =~ s/\s+/ /g;
+    $str =~ s/^\s+//;
+    $str =~ s/\s+$//g;
+
+    return lc $str;
+$func$ LANGUAGE 'plperlu' STRICT IMMUTABLE;
+
+
+CREATE OR REPLACE FUNCTION public.search_normalize( TEXT, TEXT ) RETURNS TEXT AS $func$
+
+    use strict;
+    use Unicode::Normalize;
+    use Encode;
+
+    my $str = shift;
+    my $sf = shift;
+
+    # Apply NACO normalization to input string; based on
+    # https://www.loc.gov/aba/pcc/naco/documents/SCA_PccNormalization_Final_revised.pdf
+    #
+    # Note that unlike a strict reading of the NACO normalization rules,
+    # output is returned as lowercase instead of uppercase for compatibility
+    # with previous versions of the Evergreen naco_normalize routine.
+
+    # Convert to upper-case first; even though final output will be lowercase, doing this will
+    # ensure that the German eszett (ß) and certain ligatures (ff, fi, ffl, etc.) will be handled correctly.
+    # If there are any bugs in Perl's implementation of upcasing, they will be passed through here.
+    $str = uc $str;
+
+    # remove non-filing strings
+    $str =~ s/\x{0098}.*?\x{009C}//g;
+
+    $str = NFKD($str);
+
+    # additional substitutions - 3.6.
+    $str =~ s/\x{00C6}/AE/g;
+    $str =~ s/\x{00DE}/TH/g;
+    $str =~ s/\x{0152}/OE/g;
+    $str =~ tr/\x{0110}\x{00D0}\x{00D8}\x{0141}\x{2113}\x{02BB}\x{02BC}][/DDOLl/d;
+
+    # transformations based on Unicode category codes
+    $str =~ s/[\p{Cc}\p{Cf}\p{Co}\p{Cs}\p{Lm}\p{Mc}\p{Me}\p{Mn}]//g;
+
+    if ($sf && $sf =~ /^a/o) {
+        my $commapos = index($str, ',');
+        if ($commapos > -1) {
+            if ($commapos != length($str) - 1) {
+                $str =~ s/,/\x07/; # preserve first comma
+            }
+        }
+    }
+
+    # since we've stripped out the control characters, we can now
+    # use a few as placeholders temporarily
+    $str =~ tr/+&@\x{266D}\x{266F}#/\x01\x02\x03\x04\x05\x06/;
+    $str =~ s/[\p{Pc}\p{Pd}\p{Pe}\p{Pf}\p{Pi}\p{Po}\p{Ps}\p{Sk}\p{Sm}\p{So}\p{Zl}\p{Zp}\p{Zs}]/ /g;
+    $str =~ tr/\x01\x02\x03\x04\x05\x06\x07/+&@\x{266D}\x{266F}#,/;
+
+    # decimal digits
+    $str =~ tr/\x{0660}-\x{0669}\x{06F0}-\x{06F9}\x{07C0}-\x{07C9}\x{0966}-\x{096F}\x{09E6}-\x{09EF}\x{0A66}-\x{0A6F}\x{0AE6}-\x{0AEF}\x{0B66}-\x{0B6F}\x{0BE6}-\x{0BEF}\x{0C66}-\x{0C6F}\x{0CE6}-\x{0CEF}\x{0D66}-\x{0D6F}\x{0E50}-\x{0E59}\x{0ED0}-\x{0ED9}\x{0F20}-\x{0F29}\x{1040}-\x{1049}\x{1090}-\x{1099}\x{17E0}-\x{17E9}\x{1810}-\x{1819}\x{1946}-\x{194F}\x{19D0}-\x{19D9}\x{1A80}-\x{1A89}\x{1A90}-\x{1A99}\x{1B50}-\x{1B59}\x{1BB0}-\x{1BB9}\x{1C40}-\x{1C49}\x{1C50}-\x{1C59}\x{A620}-\x{A629}\x{A8D0}-\x{A8D9}\x{A900}-\x{A909}\x{A9D0}-\x{A9D9}\x{AA50}-\x{AA59}\x{ABF0}-\x{ABF9}\x{FF10}-\x{FF19}/0-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-9/;
+
+    # intentionally skipping step 8 of the NACO algorithm; if the string
+    # gets normalized away, that's fine.
+
+    # leading and trailing spaces
+    $str =~ s/\s+/ /g;
+    $str =~ s/^\s+//;
+    $str =~ s/\s+$//g;
+
+    return lc $str;
+$func$ LANGUAGE 'plperlu' STRICT IMMUTABLE;
+
+COMMIT;

-----------------------------------------------------------------------

Summary of changes:
 .../src/perlmods/lib/OpenILS/Utils/Normalize.pm    |    2 +-
 Open-ILS/src/sql/Pg/002.functions.config.sql       |    4 +-
 Open-ILS/src/sql/Pg/002.schema.config.sql          |    2 +-
 Open-ILS/src/sql/Pg/950.data.seed-values.sql       |    4 +-
 .../1029.schema.update-broken-naco-links.sql       |  138 ++++++++++++++++++++
 5 files changed, 144 insertions(+), 6 deletions(-)
 create mode 100644 Open-ILS/src/sql/Pg/upgrade/1029.schema.update-broken-naco-links.sql


hooks/post-receive
-- 
Evergreen ILS


More information about the open-ils-commits mailing list