[OPEN-ILS-DEV] More Quick Utilities
Travis Schafer
tschafer at carsoncity.k12.mi.us
Thu Nov 15 15:11:31 EST 2007
Skipped content of type multipart/alternative-------------- next part --------------
//
// import2.c
//
// Carson City - Crystal Area Schools
// Technology Department
// 115 E Main
// Carson City, MI 48811
// (989) 584-3138
//
// This program synchronizes patron records within an Evergreen (aka Open-ILS)
// Integrated Libary System database with an external file. Specifically,
// this program is used to create/update/inactivate patron records to
// match the CIMS Student Management System. It uses the CMT/CIMS Excelsior
// export file format to do this, which is a simple file format to generate.
//
// The program only deals with records within a given user group. This
// is to prevent student changes from affecting staff/public patron records.
// As such, the program takes the primary key of an Evergreen usergroup
// as an argument. It additionally requires the primary key of the
// Organizational Unit that should be set as the "home library."
//
// The program matches the import file's student ID field to the Evergreen
// database's username field. The address is created with a label of "CIMS,"
// we leave all other address records alone when we update records, but we
// will always reset the "CIMS" label to match the import file. Also, after
// the initial creation of the record, we leave password and barcode data
// alone.
//
// Copyright 2007 Travis Schafer
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
//
//
// Initial Program:
// 2007-10-26 T.Schafer
//
// Changes:
//
// 2007-11-01 T.Schafer - Added student_print function for diagnostics
// - Modified Makefile because, strangely, we segfault on
// DeWitt data if we compile at optimization level 2.
//
// 2007-11-07 T.Schafer - Noticed that we had some lines switched that meant:
// We exit the program before freeing the DB connection
// if the inital connection fails. This doesn't really
// matter, but it's good to be good...
// - Modified record insertions to be concurrency safe
// (ie, reserve an OID, instead of selecting the max
// id in the current table and assume it will work
// as a foreign key).
//
// 2007-11-15 T.Schafer - Added comments to make program GPL
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libpq-fe.h>
#define _GNU_SOURCE
#include <getopt.h>
#define MAXLINE 4096 // Maximum length of line in import file
#define MAXFIELD 30 // Maximum length of field in import file
#define MAXSTMT 4096 // Maximum length of an SQL statement
// Birthdates are given to us with a two digit year. If those two digits are
// LESS than CENTURYCUT, we set the century to CENTURYHIGH. Else, we set
// CENTURYLOW. These settings give any two digit birthyear greater than 69
// a century of 19, and anything else a century of 20
#define CENTURYCUT 70
#define CENTURYHIGH 20
#define CENTURYLOW 19
// Structure for student records
struct student
{
char *id; // Student ID
char *fname; // First Name
char *mname; // Middle Name
char *lname; // Last Name
char *guardian; // Guardian Full Name
char *street; // Street (eg 115 E Main St)
char *city; // City
char *state; // State
char *zip; // Zip
char *bdate; // Birthdate (YYYY-MM-DD)
char *phone; // Phone (NXX-NXX-XXXX)
struct student *next; // Next student in linked list
};
// Initialize a linked list of students (IE, create a guard)
//
// Arguments: (void)
// Returns: Pointer to new list (ie, guard record)
struct student *student_initlist( void );
// Free a list of students
//
// Arguments: student *stulist - Student list to free
// Returns: void
void student_freelist( struct student* stulist );
// Add a student to the end of a student list
//
// Arguments: struct student * stulist - List to which we insert
// char *id - Student ID
// char *fname - First Name
// char *mname - Middle Name
// char *lname - Last Name
// char *guardian - Guardian Full Name
// char *street - Street (eg 115 E Main St)
// char *city - City
// char *state - State
// char *zip - Zip
// char *bdate - Birthdate (YYYY-MM-DD)
// char *phone - Phone (NXX-NXX-XXXX)
//
// Returns: Void
void student_insert( struct student *stulist, char *id, char *fname,
char *mname, char *lname, char *guardian, char *street,
char *city, char *state, char *zip, char *bdate,
char *phone );
// Insert students from file at end of given list of students
//
// Arguments: PGconn *conn - Database connection (needed to escape
// strings for SQL, ie, handle single quotes)
// student *stulist - Student list to populate
// FILE *ifd - File descriptor for input file
// Returns: Number of students inserted in stulist
unsigned student_readlist( PGconn *conn, struct student* stulist, FILE *ifd );
// Prints the contents of the current student structures records
//
// Arguments: FILE* ofd - File descriptor to which we print records.
// struct *currstu - Student to print
// Returns: void
void student_print( FILE* ofd, struct student *currstu );
// Update database to reflect import file.
//
// This function first sets all users who are in both the specified
// organizational unit AND group to inactive. It then scans the student
// list, matching student ID in the list to username in the database.
// If a match is found, that record is marked as active and all fields are
// updated to reflect the import file. If a record is not found, one is
// inserted and made active. Marking everyone inactive ahead of time is
// brute force... but it does handle end-of-year promotion to another building
// (and hence OU). It is important to note that when we update a record, we
// do so without regaurd to OU. This facilitates students who move between
// OUs. This is all done in a single transaction.
//
// Arguments: PGconn *conn - Connection to Evergreen database
// struct student *stulist - List of students
// unsigned ou - Organizational Unit ID
// unsigned profile - Profile (Group) ID
void evergreen_update( PGconn *conn, struct student *stulist, unsigned ou,
unsigned group );
int main( int argc, char **argv )
{
// Runtime ptions - Filled by getopts_long_only()
int currOpt; // Current Option
char *dbhost; // Database Host
char *dbname; // Database Name
char *dbuser; // Database User
char *dbpass; // Password for Database User
unsigned egprof; // Touch/Create only users with this profile ID
unsigned egou; // Create users in the following OU ID
char *infile; // Import file name
// Number of options that we processed. Probably provided already by
// getopt(), but I don't know where or how.
unsigned optcount=0;
// File descriptor for input file
FILE *ifd;
// Connection to database
PGconn *conn;
// List of students in import file
struct student *stulist;
// Process options
while( 1 )
{
static struct option options[] =
{
{ "dbhost", required_argument, 0, 'h' }, // Database Host
{ "dbname", required_argument, 0, 'd' }, // Database Name
{ "dbuser", required_argument, 0, 'u' }, // Database User
{ "dbpass", required_argument, 0, 'p' }, // Database Password
{ "egprof", required_argument, 0, 'g' }, // Evergreen Profile ID
{ "egou", required_argument, 0, 'l' }, // Evergreen home OU ID
{ "infile", required_argument, 0, 'f' }, // Import File
{ 0, 0, 0, 0 }
};
int option_index = 0;
currOpt = getopt_long_only( argc, argv, "", options, &option_index );
// Are we done processing options?
if( currOpt == -1 )
{
break;
}
switch( currOpt )
{
case 'h':
{
dbhost = strdup( optarg );
optcount++;
break;
}
case 'd':
{
dbname = strdup( optarg );
optcount++;
break;
}
case 'u':
{
dbuser = strdup( optarg );
optcount++;
break;
}
case 'p':
{
dbpass = strdup( optarg );
optcount++;
break;
}
case 'g':
{
sscanf( optarg, "%d", &egprof );
optcount++;
break;
}
case 'l':
{
sscanf( optarg, "%d", &egou );
optcount++;
break;
}
case 'f':
{
infile = strdup( optarg );
optcount++;
break;
}
case '?':
{
/* Error message printed by getopt */
break;
}
}
}
// Did we get enough options?
if( optcount != 7 )
{
fprintf( stderr, "%s: Required options\n"
"--dbhost= : Database Host\n"
"--dbname= : Database Username\n"
"--dbuser= : Database User\n"
"--dbpass= : Database Password\n"
"--egprof= : Evergreen Profile ID for Students\n"
"--egou= : OU ID of Home Library for this import\n"
"--infile= : Import file\n", argv[0] );
exit( 1 );
}
// Open the input file
if( !( ifd = fopen( infile, "r" ) ) )
{
fprintf( stderr, "%s: Can not open input file. Exiting!\n", argv[0] );
exit( 1 );
}
// Connect to database
conn = PQsetdbLogin( dbhost, NULL, NULL, NULL, dbname, dbuser, dbpass );
if( PQstatus( conn ) == CONNECTION_BAD )
{
fprintf( stderr, "%s: Cannot connect to database!\n"
"Error is: %s\n\n***Exiting!***\n",
argv[0], PQerrorMessage( conn ) );
PQfinish( conn );
exit( 1 );
}
// Build list of students from file
stulist = student_initlist();
student_readlist( conn, stulist, ifd );
fclose( ifd );
// Use student list to update database
evergreen_update( conn, stulist, egou, egprof );
// End of run cleanup
student_freelist( stulist );
PQfinish( conn );
return 0;
}
struct student *student_initlist( void )
{
// Pointer to return
struct student *retPtr;
retPtr = (struct student*)malloc( sizeof( struct student ) );
retPtr->next = 0;
return retPtr;
}
void student_freelist( struct student *stulist )
{
// Pointer to current student
struct student *currStu;
// Pointer to next student
struct student *nextStu;
// Advance past guard
currStu = stulist->next;
while( currStu )
{
nextStu = currStu->next;
free( currStu->id );
free( currStu->fname );
free( currStu->mname );
free( currStu->lname );
free( currStu->guardian );
free( currStu->street );
free( currStu->city );
free( currStu->state );
free( currStu->zip );
free( currStu->bdate );
free( currStu->phone );
free( currStu );
currStu = nextStu;
}
// Free the guard
free( stulist );
return;
}
void student_insert( struct student *stulist, char *id,
char *fname, char *mname, char *lname,
char *guardian, char *street, char *city,
char *state, char *zip, char *bdate,
char *phone )
{
// Current student in list
struct student *currStu;
currStu = stulist;
// Find the end of the list
while( currStu )
{
if( !currStu->next )
{
currStu->next = (struct student*)malloc( sizeof( struct student ) );
currStu = currStu->next;
currStu->id = strdup( id );
currStu->fname = strdup( fname );
currStu->mname = strdup( mname );
currStu->lname = strdup( lname );
currStu->guardian = strdup( guardian );
currStu->street = strdup( street );
currStu->city = strdup( city );
currStu->state = strdup( state );
currStu->zip = strdup( zip );
currStu->bdate = strdup( bdate );
currStu->phone = strdup( phone );
currStu->next = 0;
}
currStu = currStu->next;
}
return;
}
unsigned student_readlist( PGconn *conn, struct student* stulist, FILE *ifd )
{
// Buffer for reading from input file
char buff[MAXLINE+1];
// Contents of current field, properly escaped
char currField[MAXFIELD+1];
// Counter of records processed
unsigned rp;
rp = 0;
while( fgets( buff, MAXLINE, ifd ) )
{
// Fields from file
char id[MAXFIELD+1]; // ID
char fullname[MAXFIELD+1]; // Full Name
char grade[MAXFIELD+1]; // Grade
char bmon[MAXFIELD+1]; // Birthday Month (MM)
char bday[MAXFIELD+1]; // Birthday Day (DD)
char byear[MAXFIELD+1]; // Birthday Year (YYYY)
char gender[MAXFIELD+1]; // Gender
char guardian[MAXFIELD+1]; // Guardian
char street[MAXFIELD+1]; // Street Address
char city[MAXFIELD+1]; // City
char state[MAXFIELD+1]; // State
char zip[MAXFIELD+1]; // Zip
char phone[MAXFIELD+1]; // Phone
// Fields we build (because we dont have the correct format, yet)
char fname[MAXFIELD+1]; // First Name
char mname[MAXFIELD+1]; // Middle Name
char lname[MAXFIELD+1]; // Last Name
char birthdate[MAXFIELD+1]; // Birthday YYYY-MM-DD
unsigned century; // Birth Century
// Cleanup Middle name (otherwise, if current record has no
// middle name, we inherit the previous middle name)
mname[0]='\0';
// Extract Records
sscanf( buff, "\"%[^\"]\",\"%[^\"]\",\"%[^\"]\",\"%[^/]/%[^/]/%[^\"]\",\"%[^\"]\",\"%[^\"]\",\"%[^\"]\",\"%[^,], %s %[^\"]\",\"%[^\"]",
id,
fullname,
grade,
bmon,
bday,
byear,
gender,
guardian,
street,
city,
state,
zip,
phone
);
// Seperate first, middle, last names
sscanf( fullname,"%[^,], %s%s", lname, fname, mname );
// Add a century to the birth year
sscanf( byear, "%d", ¢ury );
if( century < CENTURYCUT )
{
century = CENTURYHIGH;
}
else
{
century = CENTURYLOW;
}
sprintf( birthdate, "%d%s-%s-%s", century, byear, bmon, bday );
// Clean up strings for use in SQL (ie, escape special characters).
// This function uses the database connection to determine special
// properties of the database that may effect the conversion.
PQescapeStringConn( conn, currField, id, strlen(id), NULL );
strcpy( id, currField );
PQescapeStringConn( conn, currField, fname, strlen(fname), NULL );
strcpy( fname, currField );
PQescapeStringConn( conn, currField, mname, strlen(mname), NULL );
strcpy( mname, currField );
PQescapeStringConn( conn, currField, lname, strlen(lname), NULL );
strcpy( lname, currField );
PQescapeStringConn( conn, currField, guardian, strlen(guardian), NULL );
strcpy( guardian, currField );
PQescapeStringConn( conn, currField, street, strlen(street), NULL );
strcpy( street, currField );
PQescapeStringConn( conn, currField, city, strlen(city), NULL );
strcpy( city, currField );
PQescapeStringConn( conn, currField, state, strlen(state), NULL );
strcpy( state, currField );
PQescapeStringConn( conn, currField, zip, strlen(zip), NULL );
strcpy( zip, currField );
PQescapeStringConn( conn, currField, birthdate, strlen(birthdate), NULL );
strcpy( birthdate, currField );
PQescapeStringConn( conn, currField, phone, strlen(phone), NULL );
strcpy( phone, currField );
student_insert( stulist, id, fname, mname, lname, guardian, street,
city, state, zip, birthdate, phone );
rp++;
}
return rp;
}
void student_print( FILE* ofd, struct student *currstu )
{
fprintf( ofd, "\n***Student Record***\n" );
fprintf( ofd, "ID: %s\n", currstu->id );
fprintf( ofd, "First Name: %s\n", currstu->fname );
fprintf( ofd, "Middle Name: %s\n", currstu->mname );
fprintf( ofd, "Last Name: %s\n", currstu->lname );
fprintf( ofd, "Guardian: %s\n", currstu->guardian );
fprintf( ofd, "Street: %s\n", currstu->street );
fprintf( ofd, "City: %s\n", currstu->city );
fprintf( ofd, "State: %s\n", currstu->state );
fprintf( ofd, "Zip: %s\n", currstu->zip );
fprintf( ofd, "Bdate: %s\n", currstu->bdate );
fprintf( ofd, "Phone: %s\n\n\n", currstu->phone );
return;
}
void evergreen_update( PGconn *conn, struct student *stulist, unsigned ou,
unsigned group )
{
// SQL Statement Buffer
char stmt[MAXSTMT+1];
// Result of most recently submitted statement
PGresult *sres;
// Current Student
struct student *currStu;
// Advance past guard
currStu = stulist->next;
sres = PQexec( conn, "BEGIN;" );
PQclear( sres );
// Mark everyone as inactive
sprintf( stmt, "UPDATE actor.usr SET active = 'f' WHERE profile = %d "
"AND home_ou = %d;", group, ou );
sres = PQexec( conn, stmt );
PQclear( sres );
while( currStu )
{
// Get ID of current student
sprintf( stmt, "SELECT id FROM actor.usr WHERE usrname='%s';",
currStu->id );
sres = PQexec( conn, stmt );
if( PQntuples( sres ) )
{
// There is a user record in the database already, this will
// be an update.
// Primary Key of existing user record
unsigned ukey;
// Primary Key of existing address record
unsigned akey;
// We found an existing record, get the ID.
sscanf( PQgetvalue( sres, 0, 0 ), "%d", &ukey );
PQclear( sres );
// Okay, lets try an update
// We leave a few fields alone, to avoid clobbering legitimately
// changed values (uhh, password)
sprintf( stmt, "UPDATE actor.usr SET profile=%d, "
"ident_type = 3, ident_value = '%s', ident_value2 = '%s', "
"first_given_name = '%s', second_given_name = '%s', "
"family_name = '%s', day_phone = '%s', evening_phone='%s', "
"home_ou = %d, dob = '%s' WHERE id = %d; ",
group, currStu->id, currStu->guardian,
currStu->fname, currStu->mname,
currStu->lname, currStu->phone, currStu->phone,
ou, currStu->bdate, ukey );
sres = PQexec( conn, stmt );
PQclear( sres );
// And address...
sprintf( stmt, "SELECT id FROM actor.usr_address "
"WHERE usr = %d AND address_type = 'CIMS';", ukey );
sres = PQexec( conn, stmt );
sscanf( PQgetvalue( sres, 0, 0 ), "%d", &akey );
PQclear( sres );
sprintf( stmt, "UPDATE actor.usr_address "
"SET street1 = '%s', city = '%s', state = '%s', "
"country = 'USA', post_code = '%s' WHERE id = %d;",
currStu->street, currStu->city, currStu->state,
currStu->zip, akey );
sres = PQexec( conn, stmt );
PQclear( sres );
// Mark the record as active...
sprintf( stmt, "UPDATE actor.usr SET active='t' WHERE id= %d;",
ukey );
sres = PQexec( conn, stmt );
PQclear( sres );
}
else
{
// Primary key of new user record
unsigned ukey;
// Primary key of new card record
unsigned ckey;
// Primary key of new address record
unsigned akey;
// Reserve primary keys for new entries
sprintf( stmt, "SELECT nextval( 'actor.usr_id_seq' );" );
sres = PQexec( conn, stmt );
sscanf( PQgetvalue( sres, 0, 0 ), "%d", &ukey );
sprintf( stmt, "SELECT nextval( 'actor.usr_address_id_seq' );" );
sres = PQexec( conn, stmt );
sscanf( PQgetvalue( sres, 0, 0 ), "%d", &akey );
sprintf( stmt, "SELECT nextval( 'actor.card_id_seq' );" );
sres = PQexec( conn, stmt );
sscanf( PQgetvalue( sres, 0, 0 ), "%d", &ckey );
// Insert into actor.usr
sprintf( stmt, "INSERT INTO actor.usr "
"( id, profile, usrname, passwd, ident_type, ident_value, "
"ident_value2, "
"first_given_name, second_given_name, family_name, "
"day_phone, evening_phone, "
"home_ou, dob, claims_returned_count ) "
"VALUES "
"( %d, %d, '%s', '%s', 3, '%s', "
"'%s', "
"'%s', '%s', '%s', "
"'%s', '%s', "
" %d, '%s', 0 );",
ukey, group, currStu->id, currStu->id, currStu->id,
currStu->guardian,
currStu->fname, currStu->mname, currStu->lname,
currStu->phone, currStu->phone,
ou, currStu->bdate );
sres = PQexec( conn, stmt );
PQclear( sres );
// Insert into actor.usr_address
sprintf( stmt, "INSERT INTO actor.usr_address "
"( id, address_type, usr, street1, "
"city, state, country, post_code ) "
"VALUES "
"( %d, 'CIMS', %d, '%s', "
"'%s', '%s', 'USA', '%s' );",
akey, ukey, currStu->street,
currStu->city, currStu->state, currStu->zip );
sres = PQexec( conn, stmt );
PQclear( sres );
// And the card
sprintf( stmt, "INSERT INTO actor.card( id, usr, barcode ) "
"VALUES ( %d, %d, '%s' );",
ckey, ukey, currStu->id );
sres = PQexec( conn, stmt );
PQclear( sres );
// Set address for user...
sprintf( stmt, "UPDATE actor.usr SET mailing_address=%d, "
"billing_address=%d WHERE id=%d;",
akey, akey, ukey );
sres = PQexec( conn, stmt );
PQclear( sres );
// Set card for user...
sprintf( stmt, "UPDATE actor.usr SET card=%d "
" where id = %d;", ckey, ukey );
sres = PQexec( conn, stmt );
PQclear( sres );
}
currStu = currStu->next;
}
sres = PQexec( conn, "COMMIT;" );
PQclear( sres );
return;
}
-------------- next part --------------
//
// cleantcn.c
//
// Given an XML file of MARC records, removes spaces from the TCN (tag 001)
//
// Carson City - Crystal Area Schools
// Technology Department
// 115 E Main
// Carson City, MI 48811
// (989) 584-3138
//
// Copyright 2007 Travis Schafer
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// Initial Program:
// 2007-11-05 T.Schafer
//
// Changes:
// 2007-11-15 T.Schafer - Added comments to make program GPL
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#define _GNU_SOURCE
#include <getopt.h>
#define FALSE 0
#define TRUE 1
// Extract, clean, and replace TCNs
void tcn_clean( xmlNode *collection );
// Return a string identical to inStr, with the exception that
// the returned string has no spaces. It is up to the caller to free the
// returned string.
char *no_spaces( char *inStr );
// Return a string indentical to inStr, with the exception that all alpha
// characters are in lowercase. It is up to the caller to free the returned
// string
char *no_caps( char *inStr );
int main( int argc, char **argv )
{
char* xfname; // XML File to process (name)
int currOpt; // Current Option
unsigned optCount=0; // Number of command line options parsed
xmlDocPtr doc; // Tree representation of XML file
while( TRUE )
{
static struct option options[] =
{
{ "xmlfile", required_argument, 0, 'x' },
{ 0, 0, 0, 0 }
};
int option_index = 0;
currOpt = getopt_long_only( argc, argv, "", options, &option_index );
if( currOpt == -1 )
{
break;
}
switch( currOpt )
{
case 'x':
{
xfname = strdup( optarg );
optCount++;
break;
}
case '?':
{
// Error message printed by getopt()
break;
}
}
}
// Did we get enough options
if( optCount != 1 )
{
fprintf( stderr, "%s: Clean TCNs in XML MARC file\n"
"--xmlfile : XML File to Process\n", argv[0] );
exit( 1 );
}
if( !( doc = xmlReadFile( xfname, NULL, 0 ) ) )
{
fprintf( stderr, "%s: Failed to read XML MARC file! Exiting\n",
argv[0] );
exit( 1 );
}
free( xfname );
tcn_clean( xmlDocGetRootElement( doc ) );
xmlDocDump( stdout, doc );
xmlCleanupParser();
return 0;
}
void tcn_clean( xmlNode *collection )
{
// Current Record in XML Tree
xmlNode *currRec = NULL;
// Current Field in XML Tree
xmlNode *currField = NULL;
// Current Tag in XML Tree
xmlChar *tag = NULL;
// Current Value in XML Tree
xmlNode *currVal = NULL;
// Tag Value
xmlChar *val;
// Tag Value without spaces
xmlChar *valnospace;
// Tag Value converted to lowercase
xmlChar *valnocaps;
// We were given the root (collection) node. The children will be records,
// so we need to walk the records...
for( currRec = collection->children; currRec; currRec = currRec->next )
{
// Okay, we're looking at a record. Now we need to run through the
// children to find the controlfield with tag="001"
for( currField = currRec->children; currField; currField=currField->next)
{
tag = xmlGetProp( currField, "tag" );
if( !xmlStrcmp( tag, "001" ) )
{
// Get the child, which will be the text node
currVal = currField->children;
// Get the actuall text
val = xmlNodeGetContent( currVal );
// Clean spaces and replace value
valnospace = no_spaces( val );
// Convert to all lower
valnocaps = no_caps( valnospace );
xmlNodeSetContent( currVal, valnocaps );
// Cleanup
xmlFree( val );
//free( valnospace );
}
xmlFree( tag );
}
}
return;
}
char *no_spaces( char *inStr )
{
// String to return
char *retStr;
// Current position in retStr
char *currRet;
// Generic counter
unsigned i;
retStr = (char*)malloc( strlen( inStr) * sizeof( char ) );
currRet = retStr;
for( i=0; i < strlen( inStr ); i++ )
{
if( inStr[i] != ' ' )
{
*currRet = inStr[i];
currRet += sizeof( char );
}
}
// We should already be at the end of the string (we get an advance above),
// so cap it.
*currRet = '\0';
return retStr;
}
char *no_caps( char *inStr )
{
// String to return
char *retStr;
// Generic counter
unsigned i;
retStr = (char *)malloc( strlen( inStr) * sizeof(char) );
for( i = 0; i < strlen( inStr ); i++ )
{
retStr[i] = tolower( inStr[i] );
}
// Cap the retStr
retStr[i] = '\0';
return retStr;
}
-------------- next part --------------
//
// deltcn.c
//
// Carson City - Crystal Area Schools
// Technology Department
// 115 E Main
// Carson City, MI 48811
// (989) 584-3138
//
// Given a list of TCNs tc_list, remove all entries in the BRE file with
// a TCN found on tc_list.
//
// When adding a new organization to an Evergreen installation, it is
// necessary to import that organization's Bibliographic MARC records.
// Unfortunately, there is a great certainty that a subset of the new
// organization's records already exist in the Evergreen database. A direct
// import, therfore, will result the uniqeness constraint on TCN in the
// Evergreen database.
//
// As a result, this program takes a list of TCNs already present in the
// Evergreen database, with a format of on TCN per line. The program
// also takes an Evergreen BRE file. The program then prints, to STDOUT,
// only those records that were not matched with the provided list of
// existing TCNs.
//
// Copyright 2007 Travis Schafer
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// Initial Program
// 2007-11-04 T.Schafer
//
// Changes:
// 2007-11-15 T.Schafer - Made program GPL
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define _GNU_SOURCE
#include <getopt.h>
#define MAXBRE 1048576 // Maximum length of line in BRE file
#define MAXTCN 128 // Maximum length of line in TCN file
#define TRUE 1
#define FALSE 0
int main( int argc, char **argv )
{
unsigned optcount=0; // Number of options returned by getopt
unsigned i; // Generic Counter
int currOpt; // Current Option on Commandline
char *tfname; // TCN File Name
char *bfname; // BRE File Name
FILE *tfd; // TCN File Descriptor
FILE *bfd; // BRE File Descriptor
char tcnbuff[MAXTCN+1]; // TCN buffer
char brebuff[MAXBRE+1]; // BRE Buffer
char **tcnList; // TCN List
unsigned tcnListSz; // Number of entries in TCN list
while( 1 )
{
static struct option options[] =
{
{ "tcnfile", required_argument, 0, 't' }, // TCN File Name
{ "tcncount", required_argument, 0, 's' }, // Number of TCN File recs
{ "brefile", required_argument, 0, 'b' }, // BRE File Name
{ 0, 0, 0, 0 }
};
int option_index = 0;
currOpt = getopt_long_only( argc, argv, "", options, &option_index );
// Are we done processing options?
if( currOpt == -1 )
{
break;
}
switch( currOpt )
{
case 't':
{
tfname = strdup( optarg );
optcount++;
break;
}
case 's':
{
sscanf( optarg, "%d", &tcnListSz );
optcount++;
break;
}
case 'b':
{
bfname = strdup( optarg );
optcount++;
break;
}
case '?':
{
// Error message printed by getopt()
break;
}
}
}
// Did we get enough options
if( optcount != 3 )
{
fprintf( stderr, "%s: Remove records from BRE file based on TCN file\n"
"--tcnfile= : TCN Filename\n"
"--tcncount= : Number of entires in TCN file\n"
"--brefile= : BRE Filename\n", argv[0] );
exit( 1 );
}
if( !( tfd = fopen( tfname, "r" ) ) )
{
fprintf( stderr, "%s: Can not open TCN file. Exiting!\n", argv[0] );
exit( 1 );
}
if( !( bfd = fopen( bfname, "r" ) ) )
{
fprintf( stderr, "%s: Can not open BRE file. Exiting!\n", argv[0] );
exit( 1 );
}
free( tfname );
free( bfname );
// Load and close TCN List
tcnList = (char**)malloc( sizeof(char*) * tcnListSz );
for( i = 0; fgets( tcnbuff, MAXTCN, tfd); i++ )
{
// Pointer to newline character in TCN
char *n;
tcnList[i] = strdup( tcnbuff );
// Trim ending newline, if there is one
if( (n = strstr( tcnList[i], "\n" ) ) )
{
*n = '\0';
}
}
fclose( tfd );
// Now that we have a list of TCNs, we will scan the BRE file. If a BRE
// record (line) does not contain a TCN that is also present in the TCN
// list, we print the line to STDOUT
while( fgets( brebuff, MAXBRE, bfd ) )
{
// Current position in buffer
char *curr;
// Next position in buffer
char *next;
// Length of Current TCN
unsigned currTcnLen;
// Current TCN as extracted from BRE
char currTcn[MAXTCN+1];
// Did we find the current BRE's TCN on the list to remove?
unsigned match = FALSE;
// The TCN is contained in the BRE between the last comma of the
// record, and the last ']' of the record.
// Set the next position to the last character in the buffer
next = brebuff + sizeof( char ) * strlen( brebuff );
// Now, slide next back until we hit ']'
while( *next != ']' )
{
next -= sizeof( char );
}
// Now, set current position in buffer to last ',' in the string
// before ']'
curr = next;
while( *curr != ',' )
{
curr -= sizeof( char );
}
// Advance curr past ','
curr += sizeof( char );
// Decrement next past ']'
next -= sizeof( char );
currTcnLen = (next - curr) / sizeof( char );
currTcnLen++;
// Extract the current TCN from the BRE
strncpy( currTcn, curr, currTcnLen );
// Terminat string
currTcn[currTcnLen] = '\0';
// Okay, now, we decide if the record should be printed
for( i = 0; i < tcnListSz; i++ )
{
if( !strcmp( tcnList[i], currTcn ) )
{
match = TRUE;
}
}
if( !match )
{
fprintf( stdout, "%s", brebuff );
}
}
return 0;
}
-------------- next part --------------
//
// rebar.c
//
// Carson City - Crystal Area Schools
// Technology Department
// 115 E Main
// Carson City, MI 48811
// (989) 584-3138
//
// Apparently, the standard barcode for libraries is a CODABAR encoded 14
// digit barcode with a format of ABBBBCCCCCCCCD, where 'A' indicates the
// item type, BBBB is a four digit organization code, CCCCCCCC is a "series
// number," which identifies an item, and D is a Mod-10 check digit.
//
// If you are converting between systems, you may want to change nonstandard
// "simple" barcodes to the above format. This program scans a MARC XML
// file, and does just that. A is set to '3' to indicate a book, BBBB
// is set to the orgcode value defined on the command line, the first 'C'
// is set to the sleader value defined on the command line, and the
// remaining Cs take the value of the barcode present in the MARC XML file. D
// is calculated and set appropriately.
//
// The program prints a modified MARC XML file to stdout, with new barcodes,
// which will be your old barcodes, "wrapped" in the above format. If you
// plan to use a single BBBB for your entire organization (good idea), but
// are migrating multiple libraries that have duplicate barcodes... set sleader
// to a different value for each barcode.
//
// Please note that this program strips all non-numeric characters from
// the barcode present in the MARC XML file before "wrapping" the barcode.
// This is because a certain legacy system prefixes all Bibliographic
// barcodes with a 'T'.
//
// Copyright 2007 Travis Schafer
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// Initial Program: 2007-11-05 T.Schafer
//
// Changes:
// 2007-11-11 T.Schafer - 1. Modified program to strip non-numerics
// from original barcode BEFORE recoding.
//
// 2007-11-15 T.Scharer - 2. Added GPL
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#define _GNU_SOURCE
#include <getopt.h>
#define FALSE 0
#define TRUE 1
// Maximum Length of Replacement Barcode
#define MAXBARCODE 14
// Maximum length of Organization code
#define MAXORG 4
// SLEADER length copied is always 1
// Maximum length of series
#define MAXSER 7
//
void process_doc( xmlNode *collection, char *holdfield, char *bcodetag,
char *orgcode, char *sleader );
// Given a field, return a pointer to a subfield specified by tag
//
// Arguments: xmlNode *field - Field to search
// char *tag - Target Tag
//
// Returns: xmlNode * - Subfield Node
xmlNode *get_subfield( xmlNode *field, char *tag );
// Replace barcode in file
//
// Arguments: xmlNode *node - XML Node that is the barcode subfield
// char *orgcode - Orginzation Code for new barcode
// char *sleader - Series Leader for new barcode
//
// Returns: (void)
void modify_barcode( xmlNode *node, char *orgcode, char *sleader );
// Function to calculate MOD-10 check digit, which is
// a "non-standard" library standard
//
// Arguments: char *codestring - The barcode, less check digit
//
// Returns: char * - One character string, consisting of
// MOD-10 checkdigit calculated for argument
char *checkdigit( char *codestring );
// Remove all non-numeric characters (if any) from a string
//
// Arguments: char *str - String from which non-numerics will be removed
//
// Outputs: char *str - String with non-numberic removed
//
// Returns: (void) - Operates as "side-effect"
void remove_nondigit( char *str );
int main( int argc, char **argv )
{
// Current Command Line Option
int currOpt;
// Count of Command Line Options
unsigned optCount;
// Name of XML MARC file
char *xfname;
// Field with holdings
char *holdfield;
// Barcode Tag
char *bcodetag;
// Orginization Code
char *orgcode;
// Series leader (IE, Smart Barcodes)
char *sleader;
// XML Document
xmlDocPtr doc;
optCount = 0;
while( TRUE )
{
static struct option options[] =
{
{ "xmlfile", required_argument, 0, 'x' }, // XML MARC File
{ "holdfield", required_argument, 0, 'f' },
{ "bcodetag", required_argument, 0, 'b' },
{ "orgcode", required_argument, 0, 'o' },
{ "sleader", required_argument, 0, 's' },
{ 0, 0, 0, 0 }
};
int option_index = 0;
currOpt = getopt_long_only( argc, argv, "", options, &option_index );
if( currOpt == -1 )
{
break;
}
switch( currOpt )
{
case 'x' :
{
xfname = strdup( optarg );
optCount++;
break;
}
case 'f' :
{
holdfield = strdup( optarg );
optCount++;
break;
}
case 'b':
{
bcodetag = strdup( optarg );
optCount++;
break;
}
case 'o':
{
orgcode = strdup( optarg );
optCount++;
break;
}
case 's':
{
sleader = strdup( optarg );
optCount++;
break;
}
case '?':
{
// Error printed by getopt()
break;
}
}
}
// Did we get enough options?
if( optCount != 5 )
{
fprintf( stderr, "%s: Modify barcode in holdings information\n"
"--xmlfile : XML MARC file to modify\n"
"--holdfield : Field used for holdings\n"
"--bcodetag : Tag that contains barcode\n"
"--orgcode : Organization for Barcode\n"
"--sleader : Leading digit in series for Barcode\n",
argv[0] );
exit( 1 );
}
if( !( doc = xmlReadFile( xfname, NULL, 0 ) ) )
{
fprintf( stderr, "%s: Failed to read XML MARC file! Exiting.\n",
argv[0] );
exit( 1 );
}
free( xfname );
process_doc( xmlDocGetRootElement( doc ), holdfield, bcodetag, orgcode,
sleader );
xmlDocDump( stdout, doc );
xmlCleanupParser();
return 0;
}
void process_doc( xmlNode *collection, char *holdfield, char *bcodetag,
char *orgcode, char *sleader )
{
// Current record in XML Tree
xmlNode *currRec = NULL;
// Current field in XML Tree
xmlNode *currField = NULL;
for( currRec = collection->children; currRec; currRec=currRec->next )
{
// Okay, we're looking at a record. Scan the fields to find the
// field with tag = holdfield
for( currField = currRec->children; currField; currField=currField->next)
{
if( !xmlStrcmp( xmlGetProp( currField, (xmlChar*)"tag"),
(xmlChar*)holdfield ) )
{
modify_barcode( get_subfield( currField, bcodetag ), orgcode,
sleader );
}
}
}
return;
}
xmlNode *get_subfield( xmlNode *field, char *tag )
{
// Current Subfield
xmlNode *currSub;
// Pointer to return
xmlNode *retNode;
// Have we found the correct subfield?
unsigned found = FALSE;
for( currSub=field->children; currSub && !found; currSub=currSub->next )
{
if( !xmlStrcmp( xmlGetProp( currSub, (xmlChar*)"code" ), (xmlChar*)tag ))
{
found = TRUE;
retNode = currSub;
}
}
return retNode;
}
void modify_barcode( xmlNode *node, char *orgcode, char *sleader )
{
// Current Text Node
xmlNode *currTextNode;
// Current Barcode
xmlChar* currBar;
// Original Series
unsigned orgSeries;
// Barcode Buffer
xmlChar buff[MAXBARCODE+1];
// Current position in barcode buffer
xmlChar *currPos;
// Advance to child text node
currTextNode = node->children;
currBar = xmlNodeGetContent( node );
// Clean barcode
remove_nondigit( (char*)currBar );
// Get current barcode (series) as text
sscanf( (char*)currBar, "%d", &orgSeries );
// Set initial position
currPos = buff;
// Set barcode for "title"
*currPos = '3';
currPos += sizeof( xmlChar );
// Copy appropriate portion of organization code
strncpy( (char*)currPos, (char*)orgcode, sizeof( xmlChar ) * MAXORG );
// Advance past organization code
currPos += sizeof( xmlChar ) * MAXORG;
// Insert leader
*currPos = *sleader;
// Advance one position, fill existing barcode
currPos += sizeof( xmlChar );
sprintf( (char*)currPos, "%07d", orgSeries );
// Advance past series
currPos += sizeof( xmlChar ) * MAXSER;
strcpy( (char*)currPos, checkdigit( (char*)buff ) );
xmlNodeSetContent( node, buff );
return;
}
char *checkdigit( char *codestring )
{
// The algorithm to generate the check
// Stolen from http://montanalibraries.org/MLNresources/MLNbarcodes.htm,
// Modified for clarity
// Algorithm.
// 1. If the digit is in an even position, add it to the total
// 2. If the digit is in an odd-numbered position, multiply it by two.
// 2a. If the result of 2 greater than or equal to 10, subtract 9 and
// add to total.
// 2b. If the result of 2 is less than 10, add it to the total
// 3. After all digits are added, divide total by 10 and take the remainder.
// 4a. If the remainder is 0, then check digit is 0.
// 4b. If the remainder is nonzero, check digit is 10-remainder.
// Running total for the checksum
int runningTotal;
// Generic Counter
int i;
// String to return
char *retStr;
retStr = (char*)malloc( sizeof(char) * 2 );
runningTotal = 0;
for( i = 1; i <= strlen( codestring ); i++ )
{
// Current subtotal
int subtotal=0;
if( i%2 )
{
subtotal += ( codestring[i-1] - '0' ) * 2;
if( 10 <= subtotal )
{
subtotal -= 9;
}
runningTotal += subtotal;
}
else
{
subtotal = ( codestring[i-1] - '0' );
runningTotal += subtotal;
}
}
runningTotal = runningTotal % 10;
if( runningTotal )
{
runningTotal = 10 - runningTotal;
}
sprintf( retStr,"%d", runningTotal );
return retStr;
}
void remove_nondigit( char *str )
{
// Work Buffer
char *buff;
// Current Position in input/output string
char *curr;
// Generic counter
unsigned i;
buff = strdup( str );
curr = str;
for( i = 0; i < strlen( buff ); i++ )
{
if( isdigit( buff[i] ) )
{
*curr = buff[i];
curr += sizeof( char );
}
}
*curr = '\0';
free( buff );
return;
}
-------------- next part --------------
//
// gettcn.c
//
// Carson City - Crystal Area Schools
// Technology Department
// 115 E Main
// Carson City, MI 48811
// (989) 584-3138
//
// For each MARC record in a MARC XML file, this program will find a matching
// MARC record in an Evergreen Database. It will then replace the MARC XML
// file TCN with the TCN of the matching database MARC record, and output
// an updated MARC XML file to stdout.
//
// Additionally, it will produce an exception file for records in the MARC
// XML file that could not be matched with the database. Records with no
// match are excluded from the output XML document.
//
// Why? Let's say that you are migrating from a legacy system to an
// Evergreen system, and your legacy MARC records are of poor quality. The
// Evergreen database, for one reason or another, has high quality MARC
// records. You still need to import holdings, which is based on TCN.
// This program would let you import your existing holdings against the higher
// quality Evergreen MARC records
//
// Copyright 2007 Travis Schafer
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// Initial Program:
// 2007-11-11 T.Schafer
//
// Changes:
// 2007-11-15 T.Schafer 1) Added GPL
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <libpq-fe.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#define _GNU_SOURCE
#include <getopt.h>
#define FALSE 0
#define TRUE 1
#define ISBNLEN 10 // Length of an ISBN (used when cleaning ISBNs)
#define MAXSTMT 4096 // Max length of SQL stmt
#define STATUSRECS 25 // Print status every STATUSRECS records during
// DB processing
// Structure for MARC records (ie, titles)
struct title
{
char *filetcn; // TCN from file - Field 001
char *dbtcn; // TCN from database - Field 001
char *isbn; // ISBN from file - Field 020 Tag A
char *author; // Author from file - Field 100 Tag A
char *title; // Title from file - Field 245 Tag A
char *pub; // Publisher from file - Field 260 Tag A
xmlNode *record; // Pointer to this record in XML doc tree
xmlNode *tcntext; // Pointer to this record's TCN Text in doc tree
struct title *next; // Next title in linked list
};
// Initialize a linked list of titles (ie, create front guard)
//
// Arguments: (void)
//
// Returns: Pointer to new list (ie, pointer to front guard)
struct title *title_initlist( void );
// Free a list of titles
//
// Arguments: struct copy *copylist - Title list to free
// Returns: (void)
void title_freelist( struct title *titlelist );
// Append a title to the end of a linked list of titles
//
// Arguments: struct title *titlelist - List to which record is appended
// char *filetcn - File TCN
// char *isbn - ISBN
// char *author - Author
// char *title - Title
// char *pub - Publisher
// xmlNode *rec - Pointer to corresponding
// xmlNode for record
// xmlNode *tcnText - Pointer to TCN text XML Node
// The dbtcn member of the struct is initialized to a null string...
// Returns: (void)
void title_insert( struct title *titlelist, char *filetcn, char *isbn,
char *author, char *title, char *pub,
xmlNode* record, xmlNode *text);
// Fill a title list with all possible information from MARC XML file
// (ie, everthing but the database TCN)
//
// Arguments: xmlNode *collection - Collection element of XML
// doc to be processed
// struct title *titlelist - List of titles to be filled
void process_xml( xmlNode *collection, struct title *titlelist );
// Given a list of titles, fill in the database TCN where possible
//
// Arguments: PGconn *conn - Evergreen DB Connection
// struct title *titlelist - Title list to process
void process_db( PGconn *conn, struct title *titlelist );
// Run through a Title List that has been updated with database TCNs
// If there is no Database TCN present, print an error to efd, and
// remove the node from the XML tree. If there is a Database TCN present,
// update the TCN in the XML Tree
//
// Arguments: FILE *efd - Error File Descriptor
// struct title *titlelist - Fully Processed Title List
// Returns: (void)
void finalize_xml( FILE *efd, struct title *titlelist );
// Given an XML node, return a pointer to the child node whose "property"
// is set to "value"
//
// Arguments: xmlNode *parent - Node whose children will be searched
// char *property - Property that will be searched
// char *value - Value for which we are searching
//
// Returns: Pointer to matching child node, or pointer to 0 if no matching
// child is found
xmlNode *get_child_node( xmlNode *parent, char *property, char *value );
// Return the content of a given subfield (ie, child of xmlNode *field)
// where the subfield has "property" set to "value"
//
// Note: Caller must free returned string, as appropriate
//
// Arguments: xmlNode *field - Field whose subfields are searched
// char *property - Propert to search
// char *value - Property is set to this value for match
//
// Returns: Text of subfield, or a zero length string if nothing is found
char *get_subfield_contents( xmlNode *field, char *property, char *value );
// Given an ISBN, remove all characters that are not a digit or an alpha,
// and truncate resulting string.
//
// Arguments: char *isbn - ISBN to be "cleaned"
// Outputs: char *isbn - Cleaned ISBN
// Returns: (void)
void clean_isbn( char *isbn );
int main( int argc, char **argv )
{
// Current command line option
int currOpt;
// Number of command line options processed
unsigned optCount;
// Name of XML Marc File
char *xfname;
// Name of Exception File
char *efname;
// Exception file descriptor
FILE *efd;
// Evergreen database host
char *dbhost;
// Evergreen database name
char *dbname;
// Evergreen database user
char *dbuser;
// Evergreen database password
char *dbpass;
// Evergreen database connection
PGconn *conn;
// XML Document
xmlDocPtr doc;
// Title List
struct title *titlelist;
optCount = 0;
while( TRUE )
{
static struct option options[] =
{
{ "xmlfile", required_argument, 0, 'x' },
{ "expfile", required_argument, 0, 'e' },
{ "dbhost", required_argument, 0, 'h' },
{ "dbname", required_argument, 0, 'd' },
{ "dbuser", required_argument, 0, 'u' },
{ "dbpass", required_argument, 0, 'p' },
{ 0, 0, 0, 0 }
};
int option_index = 0;
currOpt = getopt_long_only( argc, argv, "", options, &option_index );
if( currOpt == -1 )
{
break;
}
switch( currOpt )
{
case 'x' :
{
xfname = strdup( optarg );
optCount++;
break;
}
case 'e' :
{
efname = strdup( optarg );
optCount++;
break;
}
case 'h' :
{
dbhost = strdup( optarg );
optCount++;
break;
}
case 'd' :
{
dbname = strdup( optarg );
optCount++;
break;
}
case 'u' :
{
dbuser = strdup( optarg );
optCount++;
break;
}
case 'p' :
{
dbpass = strdup( optarg );
optCount++;
break;
}
case '?' :
{
// Error printed by getopt()
break;
}
}
}
if( optCount != 6 )
{
fprintf( stderr, "%s: Replace TCNs in a MARC XML File with TCN of\n"
"matching record in Evergreen database.\n"
"--xmlfile : MARC XML file to process\n"
"--expfile : Exceptions Report File\n"
"--dbhost : Evergreen DB Host\n"
"--dbname : Evergreen DB Name\n"
"--dbuser : Evergreen DB User\n"
"--dbpass : Evergreen DB Pass\n",
argv[0] );
exit( 1 );
}
// Okay, we have enough arguments, try to open the exception report
// file
if( !( efd = fopen( efname, "w" ) ) )
{
fprintf( stderr, "%s: Failed to create exception file! Exiting.\n",
argv[0] );
exit( 1 );
}
// Attempt DB Connection
conn = PQsetdbLogin( dbhost, NULL, NULL, NULL, dbname, dbuser, dbpass );
if( PQstatus( conn ) == CONNECTION_BAD )
{
fprintf( stderr, "%s: Can not connet to database!\n"
"Error is :%s\n\n"
"***Exiting***\n",
argv[0], PQerrorMessage( conn ) );
PQfinish( conn );
exit( 1 );
}
if( !( doc = xmlReadFile( xfname, NULL, 0 ) ) )
{
// Error printed by xmlReadFile
exit( 1 );
}
titlelist = title_initlist();
fprintf( stderr, "Processing XML File\n" );
process_xml( xmlDocGetRootElement( doc ), titlelist );
fprintf( stderr, "Matching Records to DB\n" );
process_db( conn, titlelist );
fprintf( stderr, "Finalizing Output Document\n" );
finalize_xml( efd, titlelist );
xmlDocDump( stdout, doc );
PQfinish( conn );
xmlFreeDoc( doc );
xmlCleanupParser();
return 0;
}
struct title *title_initlist( void )
{
// List to return
struct title *retList;
retList = (struct title*)malloc( sizeof( struct title ) );
retList->next = 0;
return retList;
}
void title_freelist( struct title *titlelist )
{
// Pointer to current copy in list
struct title *curr;
// Pointer to next copy in list
struct title *next;
// Advance past guard
curr = titlelist->next;
while( curr )
{
next = curr->next;
free( curr->filetcn );
free( curr->dbtcn );
free( curr->isbn );
free( curr->author );
free( curr->title );
free( curr->pub );
free( curr );
curr = next;
}
// Free the guard
free( titlelist );
return;
}
void title_insert( struct title *titlelist, char *filetcn, char *isbn,
char *author, char *title, char *pub,
xmlNode *record, xmlNode *tcntext )
{
// Current Title in List
struct title *curr;
curr = titlelist;
// Find end of list
while( curr )
{
if( !curr->next )
{
curr->next = (struct title*)malloc( sizeof( struct title ) );
curr = curr->next;
curr->filetcn = strdup( filetcn );
curr->isbn = strdup( isbn );
curr->author = strdup( author );
curr->title = strdup( title );
curr->pub = strdup( pub );
curr->record = record;
curr->tcntext = tcntext;
curr->next = 0;
curr->dbtcn = (char*)malloc( sizeof( char ) );
*(curr->dbtcn) = '\0';
}
curr = curr->next;
}
return;
}
void process_xml( xmlNode *collection, struct title *titlelist )
{
// Current Record in XML File
xmlNode *currRec = NULL;
for( currRec = collection->children; currRec; currRec=currRec->next )
{
if( currRec->type == XML_ELEMENT_NODE )
{
xmlNode *field001; // Pointer to Field 001 element
xmlNode *field020; // Pointer to Field 020 element
xmlNode *field100; // Pointer to Field 100 element
xmlNode *field245; // Pointer to Field 245 element
xmlNode *field260; // Pointer to Field 260 element
char *filetcn; // TCN from File
char *isbn; // ISBN from File
char *author; // Author
char *title; // Title from File
char *pub; // Publisher Info from file
xmlNode *tcnText; // TCN Text Node
field001 = get_child_node( currRec, "tag", "001" );
field020 = get_child_node( currRec, "tag", "020" );
field100 = get_child_node( currRec, "tag", "100" );
field245 = get_child_node( currRec, "tag", "245" );
field260 = get_child_node( currRec, "tag", "260" );
// Great, let's get the text (if any) associated with each field,
// and load up an entry in the title list.
tcnText = field001->children;
filetcn = get_subfield_contents( currRec, "tag", "001" );
isbn = get_subfield_contents( field020, "code", "a" );
author = get_subfield_contents( field100, "code", "a" );
title = get_subfield_contents( field245, "code", "a" );
pub = get_subfield_contents( field260, "code", "a" );
title_insert( titlelist, filetcn, isbn, author, title, pub,
currRec, tcnText );
free( filetcn );
free( isbn );
free( author );
free( title );
free( pub );
}
}
return;
}
void process_db( PGconn *conn, struct title *titlelist )
{
// Current record in Title list
struct title *curr;
// SQL statement buffer
char stmt[MAXSTMT+1];
// Counter for status updates
unsigned statCount;
// Number of matched records, for status updates
unsigned matchCount;
// Current match percentage
double matchPercent;
// Result of most recently submitted statement
PGresult *sres;
// Error for most recent SQL statement
char *err;
statCount = 0;
matchCount = 0;
for( curr = titlelist->next; curr; curr=curr->next )
{
// We really only need to search for an ISBN if there is one
if( strlen( curr->isbn ) == ISBNLEN )
{
// ISBN, formatted for search
char *searchIsbn;
searchIsbn = strdup( curr->isbn );
clean_isbn( searchIsbn );
sprintf( stmt, "SELECT tcn_value, "
"CASE tcn_source "
"WHEN 'OCLC' THEN 1 "
"WHEN 'System' THEN 2 "
"WHEN 'System Local' THEN 3 "
"WHEN 'LCCN' THEN 4 "
"WHEN 'ISxN' THEN 5 "
"ELSE 9 END "
"AS sort "
"FROM "
"biblio.record_entry, metabib.full_rec "
"WHERE "
"record_entry.id = full_rec.record AND "
"tag = '020' AND "
"subfield = 'a' AND "
"tcn_source <> 'ISxN' AND "
"value ILIKE '%s%%' ORDER BY sort;",
searchIsbn );
sres = PQexec( conn, stmt );
free( searchIsbn );
err = PQresultErrorMessage( sres );
if( strlen( err ) )
{
fprintf( stderr, "\n\n\n***BEGIN SQL ERROR***\n\n" );
fprintf( stderr, "Statment: %s", stmt );
fprintf( stderr, "%s\n", err );
fprintf( stderr, "\n\nPROGRAM HALTED\n" );
exit( 1 );
}
if( PQntuples( sres ) )
{
// Valid record...
free( curr->dbtcn );
curr->dbtcn = strdup( PQgetvalue( sres, 0, 0 ) );
matchCount++;
}
}
statCount++;
if( !(statCount % STATUSRECS ) )
{
matchPercent = ((double)matchCount/(double)statCount)*100;
fprintf( stderr, "Records Processed: %d\tMatches: %d\tPercent: %f\n",
statCount, matchCount, matchPercent );
}
//PQclear( sres );
}
return;
}
void finalize_xml( FILE *efd, struct title *titlelist )
{
// Current entry in Title List
struct title *curr;
for( curr = titlelist->next; curr; curr=curr->next )
{
// There are really two ways to procede here
if( strlen( curr->dbtcn ) )
{
xmlNodeSetContent( curr->tcntext, (xmlChar*)curr->dbtcn );
}
else
{
fprintf( efd, "*** RECORD NOT FOUND IN EVERGREEN DB ***\n" );
fprintf( efd, "No TCNs found for ISBN\n" );
fprintf( efd, "File TCN: %s\n", curr->filetcn );
fprintf( efd, "ISBN: %s\n", curr->isbn );
fprintf( efd, "Title: %s\n", curr->title );
fprintf( efd, "Author: %s\n", curr->author );
fprintf( efd, "Publisher: %s\n\n\n", curr->pub );
xmlUnlinkNode( curr->record );
}
}
return;
}
xmlNode *get_child_node( xmlNode *parent, char *property, char *value )
{
// Current Child Node
xmlNode *currChild = NULL;
// Pointer to Return
xmlNode *retNode;
// Current Property
xmlChar *currProp;
// Have we found the child?
unsigned found = FALSE;
for( currChild = parent->children; currChild && !found;
currChild=currChild->next )
{
currProp = xmlGetProp( currChild, (xmlChar*)property );
if( !xmlStrcmp( currProp, (xmlChar*)value ) )
{
found = TRUE;
retNode = currChild;
}
xmlFree( currProp );
}
if( !found )
{
retNode = 0;
}
return retNode;
}
char *get_subfield_contents( xmlNode *field, char *property, char *value )
{
// Current Subfield
xmlNode *currSub = NULL;
// Current Tag
xmlChar *currTag;
// String to return
char *retStr;
// Did we find the subfield?
unsigned found = FALSE;
if ( field )
{
for( currSub = field->children; currSub && !found; currSub = currSub->next )
{
currTag = xmlGetProp( currSub, (xmlChar*)property );
if( !xmlStrcmp( currTag, (xmlChar*)value ) )
{
found = TRUE;
retStr = (char*)xmlNodeListGetString( currSub->doc,
currSub->children,
1 );
}
xmlFree( currTag );
}
}
if( !found )
{
retStr = (char*)malloc( sizeof( char ) );
*retStr = '\0';
}
return retStr;
}
void clean_isbn( char *isbn )
{
// Work buffer
char *buff;
// Current Position in "cleaned" string
char *curr;
// Generic Counter
unsigned i;
buff = strdup( isbn );
curr = isbn;
for( i = 0; i < strlen( buff ); i++ )
{
if( isdigit( buff[i] ) || isalpha( buff[i] ) )
{
*curr = buff[i];
curr += sizeof( char );
}
}
if( strlen( isbn ) < ISBNLEN )
{
*curr = '\0';
}
else
{
isbn[ISBNLEN] = '\0';
}
return;
}
-------------- next part --------------
//
// create_holdings.c
//
// Carson City - Crystal Area Schools
// Technology Department
// 115 E Main
// Carson City, MI 48811
// (989) 584-3138
//
// Given an XML MARC file, this program creates holdings information
// for records whose bibliographic data is already present in the Evergreen
// database (based on TCN).
//
// This program DOES NOT pull the "holding libary" from the the MARC XML
// file. Instead, this option is set on the command line by specifying the
// actor.org_unit.id value for the owning and circulating libraries. This
// is because the program was originally developed for K-12 migrations from
// systems that were "islands," and in many cases didn't have a holdings
// tag that indicated the owning or circulating library. Serveral other
// items of information that would be pulled from MARC during conversion
// from a "sane" system are simalarly defined as command line options. Again,
// the incumbant K-12 systems aren't sane.
//
// However, it would be trivial to modify this program to extract additional
// information from a holdings tag. Note also that several functions are
// declared and defined, but not used... they are present for debugging.
//
// This program is poorly written. It probably leaks memory... it actually causes
// permanent damage to RAM. It's been known to cause server farms to burst
// into flames. It stole money from my sock drawer. You've been warned...
//
// There are two tables involved in this enterprise. The relavent fields
// (the ones we will be filling out) are as follows (FK indicates a foreign
// key):
//
// asset.call_number
// creator : FK - User who created this entry (actor.usr.id)
// editor : FK - User who last edited this record (actor.usr.id)
// record : FK - Biblio Data for copy (biblio.record_entry.id)
// owning_lib : FK - Owning library (actor.org_unit.id)
// label : The call number!
//
//
// asset.copy
// circ_lib : FK - Circulating Library (actor.org_unit.id)
// creator : FK - User who created this entry (actor.usr.id)
// call_number : FK - Item Call Number (asset.call_number.id)
// editor : FK - User who last edited this record (actor.usr.id)
// status : FK - Item Status (config.copy_status.id)
// location : FK - Location (ie, Stacks) of copy (asset.copy_location.id)
// loan_duration : Required, but not an FK... '2' is popular
// fine_level : Required, but not an FK... '2' is popular
// price : Item Price
// barcode : Not suprisingly, the item barcode
//
// So, basically, we extract the Barcode, Price, Call Number, and TCN of
// each record in the MARC File. Then, we use the TCN to find the value for
// asset.call_number.record, create a call number record, then create a copy
// record.
//
// We get editor, owning_lib, circ_lib, creator, status, location, etc from
// the command line...
//
// Or at least that's the plan
//
//
// Copyright 2007 Travis Schafer
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 3
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// Initial Program:
// 2007-11-07 T.Schafer
//
// Changes:
//
// 2007-11-13 T.Schafer - 1) Initialized variable in wrong spot. This meant
// that if any copy was found to have an existing
// barcode, no subsequent entires would be inserted
// to the database (basically, we didn't reset and
// "exists" flag at the top of a loop)
//
// 2007-11-14 T.Schafer 2) Inserted GPL/Copyright Notice
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <libpq-fe.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#define _GNU_SOURCE
#include <getopt.h>
#define TRUE 1
#define FALSE 0
#define MAXFIELD 512 // Maximum field length in copy struct (used to
// construct a buffer for escaping of strings).
#define MAXSTMT 4096 // Maximum length of an SQL statement
#define STATUSINTVL 250 // On database update, give status every STATUSINTVL
// records.
// Structure for copy records
struct copy
{
char *tcn; // TCN of biblio to which copy is associated
char *call; // Copy Call Number
char *bcode; // Copy Barcode
char *price; // Copy Price
struct copy *next; // Next copy in a linked list
};
// Initialize a linked list of copies (ie, create front guard)
//
// Arguments: (void)
//
// Returns: Pointer to new list (ie, pointer to front guard)
struct copy *copy_initlist( void );
// Free a list of copies
//
// Arguments: struct copy *copylist - Copy list to free
// Returns: (void)
void copy_freelist( struct copy *copylist );
// Insert a copy at the end of a list
//
// Arguments: struct copy *copylist - List into which new record is inserted
// char *tcn - TCN of new record
// char *call - Call Number of new record
// char *bcode - Barcode of new record
// char *price - Price of new record
//
// Returns: (void)
void copy_insert( struct copy *copylist, char *tcn, char *call, char *bcode,
char *price );
// Link copy lists lista and listb
//
// Note: This function literally links the linked lists... so neither list
// may be freed after a call to this function. However, the list B start guard
// is freed, so all references to listb become invalid.
//
// Arguments: struct copy* lista - First List
// struct copy* listb - Second List
//
// Returns: (void)
void copy_linklists( struct copy *lista, struct copy *listb );
// Print the contents of a list of copies
//
// Arguments: FILE* ofd - File Descriptor to which list
// is printed
// struct copy *copylist - Copy list to print
// Returns: (void)
void copy_printlist( FILE* ofd, struct copy *copylist );
// Count the number of elements in a linked list of copies
//
// Arguments: struct copy *copylist - List to count
// Returns: unsigned - Number of copies in list
unsigned copy_countlist( struct copy *copylist );
// Return a linked list of all holdings found in a MARC XML document
//
// Arguments: PGconn *conn - Needed for escaping string for SQL
// (ie, get DB localization info)
// xmlNode *collection - Collection node from XML Marc doc
// char *tcnfield - Field that holds item TCN
// char *holdfield - (sub)Field that has holdings data
// char *bcodefield - (sub)Field that has barcode
// char *calltag - (sub)Field that price
//
// Returns: Linked list of all holdings found in document
struct copy *process_doc( PGconn *conn, xmlNode *collection, char *tcnfield,
char *holdfield, char *bcodetag, char *calltag,
char *pricetag );
// Given a record, return a linked list of all holdings found within that
// record
//
// Arguments: PGconn *conn - DB Connection for escaping strings
// xmlNode *record - Record to process
// char *tcnfield - Tag of TCN Field
// char *holdfield - Tag of holdings field
// char *bcodetag - Tag of barcode subfield
// char *calltag - Tag of call number subfield
// char *pricetag - Tag of price subfield
//
// Returns: struct copy* - Linked list of copies found in record,
// or NULL if no copies are found
struct copy *process_record( PGconn *conn, xmlNode *record, char *tcnfield,
char *holdfield, char *bcodetag, char *calltag,
char *pricetag );
// Return the content of a given subfield (ie, child of xmlNode *field)
// where the subfield has "property" set to "value"
//
// Note: Caller must free returned string, as appropriate
//
// Arguments: xmlNode *field - Field whose subfields are searched
// char *property - Propert to search
// char *value - Property is set to this value for match
//
// Returns: Text of subfield, or a zero length string if nothing is found
char *get_subfield_contents( xmlNode *field, char *property, char *value );
// Remove any character that is not a digit or a '.'
//
// Arguments: char *price - Price string to be cleaned
//
// Outputs: char *price - Cleaned price string
// Returns: (void)
void clean_price( char *price );
// Given a linked list of copies, update the Evergreen database...
//
// Arguments: PGconn *conn - Connection to Evergreen DB
// struct copy *copylist - List of copies for which records must
// be created
// unsigned egcreator - Database ID of Creating User
//
// Returns: (void)
void update_database( PGconn *conn, struct copy *copylist, unsigned egcreator,
unsigned egeditor, unsigned egstatus,
unsigned egownlib, unsigned egcirclib,
unsigned eglocation );
// Print the statement, error message contained in a statement result (if any),
// and halt execution immediately.
//
// Arguments: FILE *ofd - Descriptor to which error is printed
// char *stmt - Statement associated with result
// PGresult *sres - Statement Result
//
// Returns: (void)
void database_printerr( FILE *ofd, char *stmt, PGresult *sres );
int main( int argc, char **argv )
{
// Current Command Line Option
int currOpt;
// Count of Command Line Options Process
unsigned optCount;
// Name of XML MARC file
char *xfname;
// Database Host
char *dbhost;
// Database Name
char *dbname;
// Database User
char *dbuser;
// Database Password
char *dbpass;
// Evergreen Creator ID
unsigned egcreator;
// Evergreen Editor ID
unsigned egeditor;
// Evergreen Status ID
unsigned egstatus;
// Evergreen Owning Lib ID
unsigned egownlib;
// Evergreen Cirgulating Lib ID
unsigned egcirclib;
// Evergreen Location ID
unsigned eglocation;
// Tag of TCN Field
char *tcnfield;
// Tag of Holdings Field
char *holdfield;
// Tag of Barcode Field
char *bcodetag;
// Tag of Call Number Subfield
char *calltag;
// Tag of Price Subfield
char *pricetag;
// Linked list of copies
struct copy *copylist;
// XML Document
xmlDocPtr doc;
// Connection to database
PGconn *conn;
optCount=0;
while( TRUE )
{
static struct option options [] =
{
{ "xmlfile", required_argument, 0, 'x' },
{ "dbhost", required_argument, 0, 's' },
{ "dbname", required_argument, 0, 'd' },
{ "dbuser", required_argument, 0, 'u' },
{ "dbpass", required_argument, 0, 'w' },
{ "egcreator", required_argument, 0, 'r' },
{ "egeditor", required_argument, 0, 'e' },
{ "egownlib", required_argument, 0, 'o' },
{ "egcirclib", required_argument, 0, 'i' },
{ "egstatus", required_argument, 0, 'a' },
{ "eglocation", required_argument, 0, 'l' },
{ "tcnfield", required_argument, 0, 't' },
{ "holdfield", required_argument, 0, 'h' },
{ "bcodetag", required_argument, 0, 'b' },
{ "calltag", required_argument, 0, 'c' },
{ "pricetag", required_argument, 0, 'p' },
{ 0, 0, 0, 0 }
};
int option_index = 0;
currOpt = getopt_long_only( argc, argv, "", options, &option_index );
if( currOpt == -1 )
{
break;
}
switch( currOpt )
{
case 'x' :
{
xfname = strdup( optarg );
optCount++;
break;
}
case 's' :
{
dbhost = strdup( optarg );
optCount++;
break;
}
case 'd' :
{
dbname = strdup( optarg );
optCount++;
break;
}
case 'u' :
{
dbuser = strdup( optarg );
optCount++;
break;
}
case 'w' :
{
dbpass = strdup( optarg );
optCount++;
break;
}
case 'r' :
{
sscanf( optarg, "%d", &egcreator );
optCount++;
break;
}
case 'e' :
{
sscanf( optarg, "%d", &egeditor );
optCount++;
break;
}
case 'a' :
{
sscanf( optarg, "%d", &egstatus );
optCount++;
break;
}
case 'o' :
{
sscanf( optarg, "%d", &egownlib );
optCount++;
break;
}
case 'i' :
{
sscanf( optarg, "%d", &egcirclib );
optCount++;
break;
}
case 'l' :
{
sscanf( optarg, "%d", &eglocation );
optCount++;
break;
}
case 't' :
{
tcnfield = strdup( optarg );
optCount++;
break;
}
case 'h' :
{
holdfield = strdup( optarg );
optCount++;
break;
}
case 'b' :
{
bcodetag = strdup( optarg );
optCount++;
break;
}
case 'c' :
{
calltag = strdup( optarg );
optCount++;
break;
}
case 'p' :
{
pricetag = strdup( optarg );
optCount++;
break;
}
case '?':
{
// Error printed by getopt()
break;
}
}
}
if( optCount != 16 )
{
fprintf( stderr, "%s: Given an XML MARC file, create holding\n"
"records in Evergreen.\n\n"
"Arguments:\n"
"--xmlfile : XML MARC File\n"
"--dbhost : Evergreen Database Host\n"
"--dbname : Evergreen Database Name\n"
"--dbuser : Evergreen Database User\n"
"--dbpass : Evergreen Database Password\n"
"--egcreator : Primary Key of Evergreen Record Creator\n"
" (ie, record is marked as created by this user\n"
"--egeditor : Primary Key of Evergreen Editing User\n"
"--egstatus : Evergreen Status to set on creation\n"
"--egownlib : Primary Key of Owning Library\n"
"--egcirclib : Primary Key of Circulating Library\n"
"--eglocation : Primary Key of Copy Location (ie, stacks)\n"
"--tcnfield : Tag of TCN Field (001 is common)\n"
"--holdfield : Tag of holdings field (852 is common)\n"
"--bcodetag : Tag of barcode subfield\n"
"--calltag : Tag of callnumber subfield\n"
"--pricetag : Tag of item price subfield\n",
argv[ 0 ] );
exit( 1 );
}
// Attempt to parse XML File
if( !(doc = xmlReadFile( xfname, NULL, 0 ) ) )
{
// Error printed by xmlReadFile
exit( 1 );
}
// Attempt DB Connection
conn = PQsetdbLogin( dbhost, NULL, NULL, NULL, dbname, dbuser, dbpass );
if( PQstatus( conn ) == CONNECTION_BAD )
{
fprintf( stderr, "%s: Can not connet to database!\n"
"Error is :%s\n\n"
"***Exiting***\n",
argv[0], PQerrorMessage( conn ) );
PQfinish( conn );
exit( 1 );
}
// Extract linked list of copy structures from XML doc tree
copylist = process_doc( conn, xmlDocGetRootElement( doc ), tcnfield,
holdfield, bcodetag, calltag, pricetag );
xmlFreeDoc( doc );
xmlCleanupParser();
// Update database to reflect linked list of copies
update_database( conn, copylist, egcreator, egeditor, egstatus, egownlib,
egcirclib, eglocation );
copy_freelist( copylist );
PQfinish( conn );
free( xfname );
free( dbhost );
free( dbname );
free( dbuser );
free( dbpass );
free( tcnfield );
free( holdfield );
free( bcodetag );
free( calltag );
free( pricetag );
return 0;
}
struct copy *copy_initlist( void )
{
// List to return
struct copy *retList;
retList = (struct copy*)malloc( sizeof( struct copy ) );
retList->next = 0;
return retList;
}
void copy_freelist( struct copy *copylist )
{
// Pointer to current copy in list
struct copy *curr;
// Pointer to next copy in list
struct copy *next;
// Advance past guard
curr = copylist->next;
while( curr )
{
next = curr->next;
free( curr->tcn );
free( curr->call );
free( curr->bcode );
free( curr->price );
free( curr );
curr = next;
}
// Free the guard
free( copylist );
return;
}
void copy_insert( struct copy *copylist, char *tcn, char *call, char *bcode,
char *price )
{
// Current Copy in List
struct copy* curr;
curr = copylist;
// Find the end of the list
while( curr )
{
if( !curr->next )
{
curr->next = (struct copy*)malloc( sizeof( struct copy ) );
curr = curr->next;
curr->tcn = strdup( tcn );
curr->call = strdup( call );
curr->bcode = strdup( bcode );
curr->price = strdup( price );
curr->next = 0;
}
curr = curr->next;
}
return;
}
void copy_linklists( struct copy *lista, struct copy *listb )
{
// Current position in list a
struct copy *curr;
// Advance curr to the last record
for( curr = lista; curr->next; curr=curr->next );
// Okay, curr is pointing to the end record in list A. Link the lists.
curr->next = listb->next;
// Dont forget to free the guard.
free( listb );
return;
}
void copy_printlist( FILE* ofd, struct copy *copylist )
{
// Current item in copylist
struct copy* curr;
for( curr = copylist->next; curr; curr=curr->next )
{
fprintf( ofd, "\n*** Copy Record ***\n" );
fprintf( ofd, "TCN: %s\n", curr->tcn );
fprintf( ofd, "Call: %s\n", curr->call );
fprintf( ofd, "Barcode: %s\n", curr->bcode );
fprintf( ofd, "Price: %s\n", curr->price );
}
return;
}
unsigned copy_countlist( struct copy *copylist )
{
// Current copy in list
struct copy *curr;
// Count of copies
unsigned count=0;
for( curr = copylist->next; curr; curr=curr->next )
{
count++;
}
return count;
}
struct copy *process_doc( PGconn *conn, xmlNode *collection, char *tcnfield,
char *holdfield, char *bcodetag, char *calltag,
char *pricetag )
{
// Current Record in XML Tree
xmlNode *currRec = NULL;
// Linked list of holdings found...
struct copy *retList;
retList = copy_initlist();
for( currRec = collection->children; currRec; currRec=currRec->next )
{
copy_linklists( retList,
process_record( conn, currRec, tcnfield, holdfield,
bcodetag, calltag, pricetag ) );
}
return retList;
}
struct copy *process_record( PGconn *conn, xmlNode *record, char *tcnfield,
char *holdfield, char *bcodetag, char *calltag,
char *pricetag )
{
// Pointer to current field
xmlNode *currField = NULL;
// Tag of current Field
xmlChar *currTag;
// Current TCN
char *currTcn;
// Current Call Number
char *currCall;
// Current Barcode
char *currBcode;
// Current Price
char *currPrice;
// Copy list to return
struct copy *retList;
// Did we find good copy records?
unsigned goodRecs = TRUE;
// Initialize return list
retList = copy_initlist();
// Get the TCN
currTcn = get_subfield_contents( record, "tag", tcnfield );
if( !strlen( currTcn ) )
{
goodRecs = FALSE;
}
// Now we need to find any holdings tags and get the necessary subfields.
// There is really no reason to do this if we didn't get a TCN.
if( goodRecs )
{
for( currField=record->children; currField; currField=currField->next )
{
currTag = xmlGetProp( currField, (xmlChar*)"tag" );
if( !xmlStrcmp( currTag, (xmlChar*)holdfield ) )
{
currCall = get_subfield_contents( currField, "code", calltag );
currBcode = get_subfield_contents( currField, "code", bcodetag );
currPrice = get_subfield_contents( currField, "code", pricetag );
// Okay, call number and barcode are required... we can live
// without price. Therefore, if we have both currCall and
// currBcode, we should enter a new copy in our copylist to
// return
if( strlen( currCall ) && strlen( currBcode ) )
{
// Buffer for escaping of strings
char buff[MAXFIELD+1];
// If we didn't get a price, default to '0.00'. Otherwise,
// clean up existing price
if( !strlen( currPrice ) )
{
free( currPrice );
currPrice = strdup( "0.00" );
}
else
{
clean_price( currPrice );
}
// We really need to escape these, as well as the TCN,
// before we insert into the struct... since the struct
// is later used for SQL purposes.
PQescapeStringConn( conn, buff, currTcn, strlen(currTcn),
NULL );
free( currTcn );
currTcn = strdup( buff );
PQescapeStringConn( conn, buff, currCall, strlen(currCall),
NULL );
free( currCall );
currCall = strdup( buff );
PQescapeStringConn( conn, buff, currBcode, strlen(currBcode),
NULL );
free( currBcode );
currBcode = strdup( buff );
PQescapeStringConn( conn, buff, currPrice, strlen(currPrice),
NULL );
free( currPrice );
currPrice = strdup( buff );
copy_insert( retList, currTcn, currCall, currBcode,
currPrice );
free( currCall );
free( currBcode );
free( currPrice );
}
}
xmlFree( currTag );
}
}
free( currTcn );
return retList;
}
char *get_subfield_contents( xmlNode *field, char *property, char *value )
{
// Current Subfield
xmlNode *currSub = NULL;
// Current Tag
xmlChar *currTag;
// String to return
char *retStr;
// Did we find the subfield?
unsigned found = FALSE;
for( currSub = field->children; currSub && !found; currSub = currSub->next )
{
currTag = xmlGetProp( currSub, (xmlChar*)property );
if( !xmlStrcmp( currTag, (xmlChar*)value ) )
{
found = TRUE;
retStr = (char*)xmlNodeListGetString( currSub->doc,
currSub->children,
1 );
}
xmlFree( currTag );
}
if( !found )
{
retStr = (char*)malloc( sizeof( char ) );
*retStr = '\0';
}
return retStr;
}
void clean_price( char *price )
{
// Buffer
char *buff;
// Current position in input/output string (we're a side-effect)
char *currPos;
// Generic counter
unsigned i;
// Initialize buffer and set current position in output string
buff = strdup( price );
currPos = price;
*price = '\0';
for( i = 0; i < strlen( buff ); i++ )
{
if( isdigit( buff[i] ) || (buff[i] == '.' ) )
{
*currPos = buff[i];
currPos += sizeof( char );
}
}
*currPos = '\0';
free( buff );
}
void update_database( PGconn *conn, struct copy *copylist, unsigned egcreator,
unsigned egeditor, unsigned egstatus,
unsigned egownlib, unsigned egcirclib,
unsigned eglocation )
{
// SQL statement Buffer
char stmt[MAXSTMT+1];
// Result of most recently submitted statement
PGresult *sres;
// Current Copy
struct copy *curr;
// This could take a while. Keep track of the number of records that
// we've processed. Give a status update every once in a while.
unsigned processed = 0;
// Advance past guard
curr = copylist->next;
// Start Transaction
sres = PQexec( conn, "BEGIN;" );
database_printerr( stderr, stmt, sres );
PQclear( sres );
while( curr )
{
// Does copy already exist, based on barcode?
unsigned exists = FALSE;
// See if a copy record already exists in the database, based on barcode
sprintf( stmt, "SELECT id FROM asset.copy WHERE barcode = '%s';",
curr->bcode );
sres = PQexec( conn, stmt );
database_printerr( stderr, stmt, sres );
if( PQntuples( sres ) )
{
fprintf( stdout, "Barcode already exists: Bcode %s\tTCN %s\n",
curr->bcode, curr->tcn );
exists = TRUE;
}
PQclear( sres );
// Get biblio.record_entry key for this record
sprintf( stmt, "SELECT id FROM biblio.record_entry WHERE "
"tcn_value = '%s';", curr->tcn );
sres = PQexec( conn, stmt );
database_printerr( stderr, stmt, sres );
if( PQntuples( sres ) && !exists )
{
// Primary key of existing bibliographic record entry
unsigned biblioKey;
// Primary key of new/existing call number record
unsigned callKey;
// We have a bibliographic record entry. Get the key
sscanf( PQgetvalue( sres, 0, 0 ), "%d", &biblioKey );
PQclear( sres );
// This is worth a short discussion. The "right" way to do this
// is to figure out how many records we are going to insert
// ahead of time, and reserve space in the primary key sequence.
// However, there is a damned fine chance that we'll end up
// inserting far fewer records than are present in our list.
// To compensate for that, we could check all of the records
// ahead of time, to make sure that there is a matching bibliographic
// entry in the database... but then we aren't saving any queries..
// Okay, the record, owning_lib, label tuple must be unique in
// asset.call_number. This implies that asset.call_number is the
// "volume" that appears in the user interface. SO, we need to see
// if an appropriate asset.call_number record exists. If it does,
// we use that... if it doesn't, we create one
sprintf( stmt, "SELECT id FROM asset.call_number WHERE "
"record = %d AND owning_lib = %d AND label = '%s';",
biblioKey, egownlib, curr->call );
sres = PQexec( conn, stmt );
database_printerr( stderr, stmt, sres );
if( PQntuples( sres ) )
{
// Existing asset.call_number
sscanf( PQgetvalue( sres, 0, 0 ), "%d", &callKey );
PQclear( sres );
}
else
{
// New asset.call_number
sprintf( stmt, "SELECT nextval( 'asset.call_number_id_seq' );" );
sres = PQexec( conn, stmt );
database_printerr( stderr, stmt, sres );
sscanf( PQgetvalue( sres, 0, 0 ), "%d", &callKey );
PQclear( sres );
sprintf( stmt, "INSERT INTO asset.call_number "
"( id, creator, editor, record, "
" owning_lib, label ) "
"VALUES "
"( %d, %d, %d, %d, "
" %d, '%s' );",
callKey, egcreator, egeditor, biblioKey,
egownlib, curr->call );
sres = PQexec( conn, stmt );
database_printerr( stderr, stmt, sres );
PQclear( sres );
}
// Create the asset.copy entry
sprintf( stmt, "INSERT INTO asset.copy "
"( circ_lib, creator, call_number, "
" editor, status, location, "
" loan_duration, fine_level, price, "
" barcode ) "
"VALUES "
"( %d, %d, %d, "
" %d, %d, %d, "
" %d, %d, '%s', "
" %s );",
egcirclib, egcreator, callKey,
egeditor, egstatus, eglocation,
2, 2, curr->price,
curr->bcode );
sres = PQexec( conn, stmt );
database_printerr( stderr, stmt, sres );
PQclear( sres );
}
else if( !exists )
{
fprintf( stdout, "TCN Not Found: %s\n", curr->tcn );
PQclear( sres );
}
curr = curr->next;
processed++;
if( !(processed % STATUSINTVL ) )
{
fprintf( stderr, "Processed: %d records\n", processed );
}
}
// End Transaction
sres = PQexec( conn, "COMMIT;" );
database_printerr( stderr, stmt, sres );
PQclear( sres );
return;
}
void database_printerr( FILE *ofd, char *stmt, PGresult *sres )
{
// Pointer to error message within sres
char *err;
err = PQresultErrorMessage( sres );
if( strlen( err ) )
{
fprintf( stderr, "\n\n\n***Begin SQL Error**\n\n" );
fprintf( stderr, "Statement: %s\n", stmt );
fprintf( stderr, "%s\n", err );
fprintf( stderr, "IMPORT ABORTED. TRANSACTION ROLLED BACK.\n" );
fprintf( stderr, "DATABASE REMAINS UNCHANGED.\n" );
exit( 1 );
}
return;
}
More information about the Open-ils-dev
mailing list