[open-ils-commits] r17130 - in trunk/Open-ILS: examples src/extras src/perlmods/OpenILS/Application src/sql/Pg src/sql/Pg/upgrade (dbs)

svn at svn.open-ils.org svn at svn.open-ils.org
Fri Aug 6 23:53:04 EDT 2010


Author: dbs
Date: 2010-08-06 23:53:01 -0400 (Fri, 06 Aug 2010)
New Revision: 17130

Added:
   trunk/Open-ILS/src/sql/Pg/upgrade/0364.schema.call_number_normalization.sql
Modified:
   trunk/Open-ILS/examples/fm_IDL.xml
   trunk/Open-ILS/src/extras/Makefile.install
   trunk/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm
   trunk/Open-ILS/src/sql/Pg/002.schema.config.sql
   trunk/Open-ILS/src/sql/Pg/040.schema.asset.sql
Log:
Basic call-number sorting by classification scheme

Until now, call numbers like 'K 22 .U748 v.18' and 'K 215 .E53 W37 1997' would
sort incorrectly in Evergreen, making call number browsing painful.

The following patch adds the following to Evergreen to support better call
number sorting:

  * asset.call_number gains two new columns:
    - label_class - identifies the classification scheme for the given callnumber
    - label_sortkey - holds the normalized callnumber suitable for sorting

  * a new table, asset.call_number_class, which holds the names of each
    classification scheme and the name of the assorted normalization function

  * implementations of three normalization functions:
    * a generic normalizer
    * a  Dewey Decimal Classification normalizer (courtesy the Koha project)
    * an LC classification normalizer (courtesy Bill Dueber's Library::CallNumber::LC)

  * a routine in Makefile.install to checkout and install Library::CallNumber::LC

  * call number sorting routines were modified to use label_sortkey instead of
    label, where I could find them

  * corresponding changes to fm_IDL.xml for the new columns + table

A suggested means for applying this to an existing library would be to issue
update statements against asset.call_number setting label_class to the value
in asset.call_number_class that corresponds to owning_lib's scheme, for example:

UPDATE asset.call_number SET label_class = 3 WHERE owning_lib IN (4,5);

We need to teach the staff client holdings maintenance interfaces how to set
the classification scheme for a given call number, and we should also create
an org_unit setting that can set the preferred default classification scheme
that the staff client should inherit. Or maybe we could just make that a
sticky value and have a BEFORE trigger check the incoming value of label_class
and only set it to the org_unit default value if it is NULL (to support bulk
loads).



Modified: trunk/Open-ILS/examples/fm_IDL.xml
===================================================================
--- trunk/Open-ILS/examples/fm_IDL.xml	2010-08-07 03:26:11 UTC (rev 17129)
+++ trunk/Open-ILS/examples/fm_IDL.xml	2010-08-07 03:53:01 UTC (rev 17130)
@@ -1623,6 +1623,12 @@
             </actions>
         </permacrud>
 	</class>
+    <class id="acnc" controller="open-ils.cstore" oils_obj:fieldmapper="asset::call_number_class" oils_persist:tablename="asset.call_number_class" reporter:label="Call number classification scheme">
+        <fields oils_persist:primary="id" oils_persist:sequence="asset.call_number_class_id_seq">
+            <field reporter:label="Name" name="name" reporter:datatype="text"/>
+            <field reporter:label="Normalizer function" name="normalizer" reporter:datatype="text"/>
+        </fields>
+    </class>
 	<class id="acn" controller="open-ils.cstore open-ils.pcrud" oils_obj:fieldmapper="asset::call_number" oils_persist:tablename="asset.call_number" reporter:label="Call Number/Volume">
 		<fields oils_persist:primary="id" oils_persist:sequence="asset.call_number_id_seq">
 			<field reporter:label="Copies" name="copies" oils_persist:virtual="true" reporter:datatype="link"/>
@@ -1638,6 +1644,8 @@
 			<field reporter:label="Notes" name="notes" oils_persist:virtual="true" reporter:datatype="link"/>
 			<field reporter:label="URI Maps" name="uri_maps" oils_persist:virtual="true" reporter:datatype="link"/>
 			<field reporter:label="URIs" name="uris" oils_persist:virtual="true" reporter:datatype="link"/>
+			<field reporter:label="Sort Key" name="label_sortkey" reporter:datatype="text"/>
+			<field reporter:label="Classification Scheme" name="label_class" reporter:datatype="link"/>
 		</fields>
 		<links>
 			<link field="editor" reltype="has_a" key="id" map="" class="au"/>
@@ -1648,6 +1656,7 @@
 			<link field="copies" reltype="has_many" key="call_number" map="" class="acp"/>
 			<link field="uris" reltype="has_many" key="call_number" map="uri" class="auricnm"/>
 			<link field="uri_maps" reltype="has_many" key="call_number" map="" class="auricnm"/>
+			<link field="label_class" reltype="has_a" key="id" map="" class="acnc"/>
 		</links>
         <permacrud xmlns="http://open-ils.org/spec/opensrf/IDL/permacrud/v1">
             <actions>

Modified: trunk/Open-ILS/src/extras/Makefile.install
===================================================================
--- trunk/Open-ILS/src/extras/Makefile.install	2010-08-07 03:26:11 UTC (rev 17129)
+++ trunk/Open-ILS/src/extras/Makefile.install	2010-08-07 03:53:01 UTC (rev 17130)
@@ -266,12 +266,12 @@
 	@echo "please specify an OS" && exit 0
 
 # these should be the same for any distro
-install: install_cpan install_js_sm install_libdbi 
+install: install_cpan install_js_sm install_libdbi install_library_callnumber_lc
 
 centos: install_centos_pgsql install_centos_rpms install_yaz install_cpan_marc install install_centos_perl create_ld_local install_cpan_safe
 rhel: install_redhat_pgsql install_centos_rpms install_yaz install_cpan_marc install install_centos_perl create_ld_local install_cpan_safe
 
-fedora-13: install_fedora_13_rpms install_cpan install_cpan_marc install_cpan_fedora install_spidermonkey
+fedora-13: install_fedora_13_rpms install_cpan install_cpan_marc install_cpan_fedora install_spidermonkey install_library_callnumber_lc
 
 debian-etch: etch generic_debian
 debian-lenny: lenny generic_debian
@@ -343,6 +343,9 @@
 	cd $(LIBDBI_DRIVERS) && ./configure  \
 		--disable-docs --with-pgsql --enable-libdbi && make all install  
 
+install_library_callnumber_lc:
+	if [ ! -d Library-CallNumber-LC ]; then svn checkout http://library-callnumber-lc.googlecode.com/svn/trunk/perl/Library-CallNumber-LC; fi;
+	cd Library-CallNumber-LC && perl Makefile.PL && make install
 
 clean:
 	make -C $(LIBDBI) clean

Modified: trunk/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm
===================================================================
--- trunk/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm	2010-08-07 03:26:11 UTC (rev 17129)
+++ trunk/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm	2010-08-07 03:53:01 UTC (rev 17130)
@@ -305,7 +305,7 @@
 			},
 			{ flesh		=> 1,
 			  flesh_fields	=> { acn => [qw/record owning_lib/] },
-			  order_by	=> { acn => "upper(label) desc, id desc, owning_lib desc" },
+			  order_by	=> { acn => "label_sortkey, upper(label) desc, id desc, owning_lib desc" },
 			  limit		=> $before_limit,
 			  offset	=> abs($page) * $page_size - $before_offset,
 			}
@@ -323,7 +323,7 @@
 			},
 			{ flesh		=> 1,
 			  flesh_fields	=> { acn => [qw/record owning_lib/] },
-			  order_by	=> { acn => "upper(label), id, owning_lib" },
+			  order_by	=> { acn => "label_sortkey, upper(label), id, owning_lib" },
 			  limit		=> $after_limit,
 			  offset	=> abs($page) * $page_size - $after_offset,
 			}
@@ -428,7 +428,7 @@
 			},
 			{ flesh		=> 1,
 			  flesh_fields	=> { acn => [qw/record owning_lib/] },
-			  order_by	=> { acn => "upper(label) desc, id desc, owning_lib desc" },
+			  order_by	=> { acn => "label_sortkey, upper(label) desc, id desc, owning_lib desc" },
 			  limit		=> $limit,
 			  offset	=> $offset,
 			}
@@ -446,7 +446,7 @@
 			},
 			{ flesh		=> 1,
 			  flesh_fields	=> { acn => [qw/record owning_lib/] },
-			  order_by	=> { acn => "upper(label), id, owning_lib" },
+			  order_by	=> { acn => "label_sortkey, upper(label), id, owning_lib" },
 			  limit		=> $limit,
 			  offset	=> $offset,
 			}
@@ -1760,7 +1760,7 @@
 				},
           ( $limit > -1 ? ( limit  => $limit  ) : () ),
           ( $offset     ? ( offset => $offset ) : () ),
-          order_by  => { acn => { label => {} } }
+          order_by  => { acn => { label_sortkey => {} } }
 		}
 	)->gather(1);
 

Modified: trunk/Open-ILS/src/sql/Pg/002.schema.config.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/002.schema.config.sql	2010-08-07 03:26:11 UTC (rev 17129)
+++ trunk/Open-ILS/src/sql/Pg/002.schema.config.sql	2010-08-07 03:53:01 UTC (rev 17130)
@@ -68,7 +68,7 @@
     install_date    TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
 );
 
-INSERT INTO config.upgrade_log (version) VALUES ('0363'); -- miker
+INSERT INTO config.upgrade_log (version) VALUES ('0364'); -- dbs
 
 CREATE TABLE config.bib_source (
 	id		SERIAL	PRIMARY KEY,

Modified: trunk/Open-ILS/src/sql/Pg/040.schema.asset.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/040.schema.asset.sql	2010-08-07 03:26:11 UTC (rev 17129)
+++ trunk/Open-ILS/src/sql/Pg/040.schema.asset.sql	2010-08-07 03:53:01 UTC (rev 17130)
@@ -167,6 +167,103 @@
     active  BOOL    NOT NULL DEFAULT TRUE
 );
 
+CREATE TABLE asset.call_number_class (
+    id             bigserial     PRIMARY KEY,
+    name           TEXT          NOT NULL,
+    normalizer     TEXT          NOT NULL DEFAULT 'asset.normalize_generic'
+);
+
+CREATE OR REPLACE FUNCTION asset.label_normalizer() RETURNS TRIGGER AS $func$
+DECLARE
+    sortkey        TEXT := '';
+BEGIN
+    sortkey := NEW.label_sortkey;
+
+    EXECUTE 'SELECT ' || acnc.normalizer || '(' || 
+       quote_literal( NEW.label ) || ')'
+       FROM asset.call_number_class acnc
+       WHERE acnc.id = NEW.label_class
+       INTO sortkey;
+
+    NEW.label_sortkey = sortkey;
+
+    RETURN NEW;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION asset.label_normalizer_generic(TEXT) RETURNS TEXT AS $func$
+    # Created after looking at the Koha C4::ClassSortRoutine::Generic module,
+    # thus could probably be considered a derived work, although nothing was
+    # directly copied - but to err on the safe side of providing attribution:
+    # Copyright (C) 2007 LibLime
+    # Licensed under the GPL v2 or later
+
+    use strict;
+    use warnings;
+
+    # Converts the callnumber to uppercase
+    # Strips spaces from start and end of the call number
+    # Converts anything other than letters, digits, and periods into underscores
+    # Collapses multiple underscores into a single underscore
+    my $callnum = uc(shift);
+    $callnum =~ s/^\s//g;
+    $callnum =~ s/\s$//g;
+    $callnum =~ s/[^A-Z0-9_.]/_/g;
+    $callnum =~ s/_{2,}/_/g;
+
+    return $callnum;
+$func$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION asset.label_normalizer_dewey(TEXT) RETURNS TEXT AS $func$
+    # Derived from the Koha C4::ClassSortRoutine::Dewey module
+    # Copyright (C) 2007 LibLime
+    # Licensed under the GPL v2 or later
+
+    use strict;
+    use warnings;
+
+    my $init = uc(shift);
+    $init =~ s/^\s+//;
+    $init =~ s/\s+$//;
+    $init =~ s!/!!g;
+    $init =~ s/^([\p{IsAlpha}]+)/$1 /;
+    my @tokens = split /\.|\s+/, $init;
+    my $digit_group_count = 0;
+    for (my $i = 0; $i <= $#tokens; $i++) {
+        if ($tokens[$i] =~ /^\d+$/) {
+            $digit_group_count++;
+            if (2 == $digit_group_count) {
+                $tokens[$i] = sprintf("%-15.15s", $tokens[$i]);
+                $tokens[$i] =~ tr/ /0/;
+            }
+        }
+    }
+    my $key = join("_", @tokens);
+    $key =~ s/[^\p{IsAlnum}_]//g;
+
+    return $key;
+
+$func$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION asset.label_normalizer_lc(TEXT) RETURNS TEXT AS $func$
+    use strict;
+    use warnings;
+
+    # Library::CallNumber::LC is currently hosted at http://code.google.com/p/library-callnumber-lc/
+    # The author hopes to upload it to CPAN some day, which would make our lives easier
+    use Library::CallNumber::LC;
+
+    my $callnum = Library::CallNumber::LC->new(shift);
+    return $callnum->normalize();
+
+$func$ LANGUAGE PLPERLU;
+
+INSERT INTO asset.call_number_class (name, normalizer) VALUES 
+    ('Generic', 'asset.label_normalizer_generic'),
+    ('Dewey (DDC)', 'asset.label_normalizer_dewey'),
+    ('Library of Congress (LC)', 'asset.label_normalizer_lc')
+;
+
 CREATE TABLE asset.call_number (
 	id		bigserial PRIMARY KEY,
 	creator		BIGINT				NOT NULL,
@@ -176,15 +273,23 @@
 	record		bigint				NOT NULL,
 	owning_lib	INT				NOT NULL,
 	label		TEXT				NOT NULL,
-	deleted		BOOL				NOT NULL DEFAULT FALSE
+	deleted		BOOL				NOT NULL DEFAULT FALSE,
+	label_class	BIGINT				DEFAULT 1 NOT NULL
+							REFERENCES asset.call_number_class(id)
+							DEFERRABLE INITIALLY DEFERRED,
+	label_sortkey	TEXT
 );
 CREATE INDEX asset_call_number_record_idx ON asset.call_number (record);
 CREATE INDEX asset_call_number_creator_idx ON asset.call_number (creator);
 CREATE INDEX asset_call_number_editor_idx ON asset.call_number (editor);
 CREATE INDEX asset_call_number_dewey_idx ON asset.call_number (public.call_number_dewey(label));
 CREATE INDEX asset_call_number_upper_label_id_owning_lib_idx ON asset.call_number (upper(label),id,owning_lib);
+CREATE INDEX asset_call_number_label_sortkey ON asset.call_number(label_sortkey);
 CREATE UNIQUE INDEX asset_call_number_label_once_per_lib ON asset.call_number (record, owning_lib, label) WHERE deleted = FALSE OR deleted IS FALSE;
 CREATE RULE protect_cn_delete AS ON DELETE TO asset.call_number DO INSTEAD UPDATE asset.call_number SET deleted = TRUE WHERE OLD.id = asset.call_number.id;
+CREATE TRIGGER asset_label_sortkey_trigger
+    BEFORE UPDATE OR INSERT ON asset.call_number
+    FOR EACH ROW EXECUTE PROCEDURE asset.label_normalizer();
 
 CREATE TABLE asset.uri_call_number_map (
     id          BIGSERIAL   PRIMARY KEY,

Added: trunk/Open-ILS/src/sql/Pg/upgrade/0364.schema.call_number_normalization.sql
===================================================================
--- trunk/Open-ILS/src/sql/Pg/upgrade/0364.schema.call_number_normalization.sql	                        (rev 0)
+++ trunk/Open-ILS/src/sql/Pg/upgrade/0364.schema.call_number_normalization.sql	2010-08-07 03:53:01 UTC (rev 17130)
@@ -0,0 +1,108 @@
+BEGIN;
+
+INSERT INTO config.upgrade_log (version) VALUES ('0364'); -- dbs
+
+CREATE TABLE asset.call_number_class (
+    id             bigserial     PRIMARY KEY,
+    name           TEXT          NOT NULL,
+    normalizer     TEXT          NOT NULL DEFAULT 'asset.normalize_generic'
+);
+
+ALTER TABLE asset.call_number ADD COLUMN label_class BIGINT DEFAULT 1 NOT NULL REFERENCES asset.call_number_class(id) DEFERRABLE INITIALLY DEFERRED;
+ALTER TABLE asset.call_number ADD COLUMN label_sortkey TEXT;
+CREATE INDEX asset_call_number_label_sortkey ON asset.call_number(label_sortkey);
+ALTER TABLE auditor.asset_call_number_history ADD COLUMN label_class BIGINT NOT NULL;
+ALTER TABLE auditor.asset_call_number_history ADD COLUMN label_sortkey TEXT;
+
+CREATE OR REPLACE FUNCTION asset.label_normalizer() RETURNS TRIGGER AS $func$
+DECLARE
+    sortkey        TEXT := '';
+BEGIN
+    sortkey := NEW.label_sortkey;
+
+    EXECUTE 'SELECT ' || acnc.normalizer || '(' || 
+       quote_literal( NEW.label ) || ')'
+       FROM asset.call_number_class acnc
+       WHERE acnc.id = NEW.label_class
+       INTO sortkey;
+
+    NEW.label_sortkey = sortkey;
+
+    RETURN NEW;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION asset.label_normalizer_generic(TEXT) RETURNS TEXT AS $func$
+    # Created after looking at the Koha C4::ClassSortRoutine::Generic module,
+    # thus could probably be considered a derived work, although nothing was
+    # directly copied - but to err on the safe side of providing attribution:
+    # Copyright (C) 2007 LibLime
+    # Licensed under the GPL v2 or later
+
+    use strict;
+    use warnings;
+
+    # Converts the callnumber to uppercase
+    # Strips spaces from start and end of the call number
+    # Converts anything other than letters, digits, and periods into underscores
+    # Collapses multiple underscores into a single underscore
+    my $callnum = uc(shift);
+    $callnum =~ s/^\s//g;
+    $callnum =~ s/\s$//g;
+    $callnum =~ s/[^A-Z0-9_.]/_/g;
+    $callnum =~ s/_{2,}/_/g;
+
+    return $callnum;
+$func$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION asset.label_normalizer_dewey(TEXT) RETURNS TEXT AS $func$
+    # Derived from the Koha C4::ClassSortRoutine::Dewey module
+    # Copyright (C) 2007 LibLime
+    # Licensed under the GPL v2 or later
+
+    use strict;
+    use warnings;
+
+    my $init = uc(shift);
+    $init =~ s/^\s+//;
+    $init =~ s/\s+$//;
+    $init =~ s!/!!g;
+    $init =~ s/^([\p{IsAlpha}]+)/$1 /;
+    my @tokens = split /\.|\s+/, $init;
+    my $digit_group_count = 0;
+    for (my $i = 0; $i <= $#tokens; $i++) {
+        if ($tokens[$i] =~ /^\d+$/) {
+            $digit_group_count++;
+            if (2 == $digit_group_count) {
+                $tokens[$i] = sprintf("%-15.15s", $tokens[$i]);
+                $tokens[$i] =~ tr/ /0/;
+            }
+        }
+    }
+    my $key = join("_", @tokens);
+    $key =~ s/[^\p{IsAlnum}_]//g;
+
+    return $key;
+
+$func$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION asset.label_normalizer_lc(TEXT) RETURNS TEXT AS $func$
+    use strict;
+    use warnings;
+
+    # Library::CallNumber::LC is currently hosted at http://code.google.com/p/library-callnumber-lc/
+    # The author hopes to upload it to CPAN some day, which would make our lives easier
+    use Library::CallNumber::LC;
+
+    my $callnum = Library::CallNumber::LC->new(shift);
+    return $callnum->normalize();
+
+$func$ LANGUAGE PLPERLU;
+
+INSERT INTO asset.call_number_class (name, normalizer) VALUES 
+    ('Generic', 'asset.label_normalizer_generic'),
+    ('Dewey (DDC)', 'asset.label_normalizer_dewey'),
+    ('Library of Congress (LC)', 'asset.label_normalizer_lc')
+;
+
+COMMIT;



More information about the open-ils-commits mailing list