[open-ils-commits] r1056 - in conifer/branches/rel_1_6_1/tools: . patron-load (dbs)

svn at svn.open-ils.org svn at svn.open-ils.org
Fri Oct 29 15:55:10 EDT 2010


Author: dbs
Date: 2010-10-29 15:55:05 -0400 (Fri, 29 Oct 2010)
New Revision: 1056

Added:
   conifer/branches/rel_1_6_1/tools/patron-load/
   conifer/branches/rel_1_6_1/tools/patron-load/lu_student_data.pl
Log:
Basic script for munging patron data of the format provided by LU's Datatel instance


Added: conifer/branches/rel_1_6_1/tools/patron-load/lu_student_data.pl
===================================================================
--- conifer/branches/rel_1_6_1/tools/patron-load/lu_student_data.pl	                        (rev 0)
+++ conifer/branches/rel_1_6_1/tools/patron-load/lu_student_data.pl	2010-10-29 19:55:05 UTC (rev 1056)
@@ -0,0 +1,166 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+
+my $lsd = 'scratchpad.lu_student_data';
+my $lda = 'scratchpad.lu_deleted_accounts'; 
+my $lec = 'scratchpad.lu_email_changes';
+
+sub mod10_checksum {
+    my $barcode = shift;
+    my $total = 0;
+    my $position = 0;
+    foreach my $digit (split('', $barcode)) {
+        $position++;
+        if ($position % 2) {
+            # Double it
+            $digit *= 2;
+            # If less than 10, add to the total
+            if ($digit < 10) {
+                $total += $digit;
+            } else {
+                $total += $digit - 9;
+            }
+        } else {
+            $total += $digit;
+        }
+    }
+    my $rem = $total % 10;
+    if ($rem) {
+        return 10 - $rem;
+    }
+    return $rem;
+}
+
+# Data structure from LU CTS:
+# "STUDENTS_ID"	"STU_LAST_NAME"	"STU_L09_FIRST_NAME"	"STU_L09_CAST_PROGRAM"	"STU_CURRENT_ACAD_LEVELS"	"STU_L09_GET_LAU_EMAIL"	"STU_L09_GET_NON_LAU_EMAIL"	"PERSON_L09_PERM_ADDR_LINES"	"PERSON_L09_PERM_ZIP"	"PERSON_L09_PERM_CITY"	"PERSON_PRIMARY_LANGUAGE"	"STU_TERMS"
+
+print <<HERE;
+CREATE SCHEMA scratchpad;
+DROP TABLE IF EXISTS $lsd;
+CREATE TABLE $lsd (barcode TEXT, family_name TEXT, first_given_name TEXT, program TEXT, academic_level TEXT, lu_email TEXT, non_lu_email TEXT, street_address TEXT, postal_code TEXT, city TEXT, lang TEXT, term TEXT);
+CREATE TABLE $lda (usr BIGINT, delete_date timestamp DEFAULT NOW());
+CREATE TABLE $lec (usr BIGINT, before TEXT, after TEXT, edit_date timestamp DEFAULT NOW());
+COPY $lsd FROM STDIN;
+HERE
+
+my $line = 0;
+while (<>) {
+    if (!$line) {
+        $line++;
+        next;
+    }
+    # Remove quotes around columns
+    s/^"//;    
+    s/"$//;    
+    s/"\t"/\t/g;
+    s/"\t/\t/g;
+    s/\t"/\t/g;
+    # Represent NULL values
+    s/\t\t/\t\\N\t/g;
+    # Escape single-quotes
+    s/'/''/g;
+    chomp;
+
+    my @studata = split(/\t/);
+    my $barcode = '000070' . shift(@studata);
+    
+    print $barcode, mod10_checksum($barcode) . "\t";
+    print join("\t", @studata) . "\n";
+}
+print "\\.\n";
+
+print <<HERE;
+ALTER TABLE $lsd ADD COLUMN usr BIGINT;
+UPDATE $lsd SET usr = (SELECT usr FROM actor.card WHERE actor.card.barcode = $lsd.barcode);
+UPDATE $lsd SET lu_email = LOWER(lu_email);
+UPDATE $lsd SET lu_email = replace(lu_email, 'laurentian.ca', 'laurentienne.ca') WHERE lang = 'F';
+UPDATE $lsd SET non_lu_email = LOWER(non_lu_email);
+CREATE INDEX lsd_usr ON $lsd(usr);
+CREATE INDEX lsd_lu_email ON $lsd(lu_email);
+
+--Things we can do - inside a transaction!
+BEGIN;
+
+-- Update email address based on Datatel
+-- Non-LU email addresses are currently truncated, so don't use them
+--UPDATE actor.usr SET email = COALESCE($lsd.lu_email, $lsd.non_lu_email) FROM $lsd WHERE $lsd.usr = actor.usr.id;
+SELECT 'Number of users with mismatched email addresses (before): ', COUNT(*)
+    FROM actor.usr au
+        INNER JOIN $lsd lsd ON au.id = lsd.usr
+    WHERE au.email <> lsd.lu_email
+        AND lsd.lu_email IS NOT NULL
+        AND au.email NOT ILIKE '%normed.ca%'
+;
+INSERT INTO $lec (usr, before, after)
+    SELECT lsd.usr, au.email, lsd.lu_email
+    FROM actor.usr au
+        INNER JOIN $lsd lsd ON lsd.usr = au.id
+    WHERE au.email <> lsd.lu_email
+        AND lsd.lu_email IS NOT NULL
+        AND au.email NOT ILIKE '%normed.ca%'
+;
+UPDATE actor.usr SET email = lu_email
+    FROM $lsd
+    WHERE $lsd.usr = actor.usr.id
+        AND $lsd.lu_email IS NOT NULL
+        AND email NOT ILIKE '%normed.ca%'
+;
+SELECT 'Number of users with mismatched email addresses (after): ', COUNT(*)
+    FROM actor.usr au
+        INNER JOIN $lsd lsd ON au.id = lsd.usr
+    WHERE au.email <> lsd.lu_email
+        AND lsd.lu_email IS NOT NULL
+        AND au.email NOT ILIKE '%normed.ca%'
+;
+
+-- Update active state based on Datatel - for undergrad (13) and graduate students (12) only, registered at Desmarais (103)
+SELECT 'Number of active UG and GRAD students (before): ', COUNT(*)
+    FROM actor.usr
+    WHERE profile IN (12,13) AND home_ou = 103 AND active = TRUE
+;
+INSERT INTO $lda (usr) SELECT usr FROM $lsd lsd INNER JOIN actor.usr au ON au.id = lsd.usr WHERE au.home_ou = 103 AND au.profile IN (12, 13) AND term <> '2010AW';
+UPDATE actor.usr SET active = FALSE, expire_date = NOW() WHERE home_ou = 103 AND profile IN (12, 13);
+
+-- Set expiry date to Sept 30th of next year 
+UPDATE actor.usr SET active = TRUE, expire_date = (extract(year FROM NOW()) + 1 || '-09-30')::date
+    WHERE home_ou = 103 AND profile IN (12, 13)
+    AND id IN (SELECT usr FROM $lsd WHERE term = '2010AW')
+;
+SELECT 'Number of active UG and GRAD students (after): ', COUNT(*)
+    FROM actor.usr
+    WHERE profile IN (12,13) AND home_ou = 103 AND active = TRUE
+;
+
+-- Create or update language preference - currently stored as a actor.stat_cat
+SELECT 'Language preferences before:';
+SELECT au.home_ou, stat_cat_entry, COUNT(stat_cat_entry)
+    FROM actor.stat_cat_entry_usr_map INNER JOIN actor.usr au ON au.id = target_usr
+    GROUP BY au.home_ou, stat_cat_entry
+    ORDER BY 1, 2, 3 DESC
+;
+-- Clean up language inconsistencies
+UPDATE actor.stat_cat_entry_usr_map
+    SET stat_cat_entry = 'English'
+    WHERE stat_cat_entry = 'ENGLISH'
+;
+UPDATE actor.stat_cat_entry_usr_map
+    SET stat_cat_entry = 'français'
+    WHERE stat_cat_entry IN ('FRANCAIS', 'French', 'french')
+;
+INSERT INTO actor.stat_cat_entry_usr_map (stat_cat, target_usr, stat_cat_entry)
+    SELECT DISTINCT 2, usr, CASE WHEN lang = 'F' THEN 'français' ELSE 'English' END
+        FROM $lsd
+        WHERE usr NOT IN (SELECT target_usr FROM actor.stat_cat_entry_usr_map)
+;
+UPDATE actor.stat_cat_entry_usr_map SET stat_cat_entry = 'français'
+     WHERE target_usr IN (SELECT usr FROM $lsd WHERE lang = 'F')
+;
+SELECT 'Language preferences after:';
+SELECT au.home_ou, stat_cat_entry, COUNT(stat_cat_entry)
+    FROM actor.stat_cat_entry_usr_map INNER JOIN actor.usr au ON au.id = target_usr
+    GROUP BY au.home_ou, stat_cat_entry
+    ORDER BY 1, 2, 3 DESC
+;
+HERE
+



More information about the open-ils-commits mailing list