[OPEN-ILS-DEV] More Quick Utilities

Travis Schafer tschafer at carsoncity.k12.mi.us
Thu Nov 15 15:11:31 EST 2007


Skipped content of type multipart/alternative-------------- next part --------------
//
// import2.c
//
// Carson City - Crystal Area Schools
// Technology Department
// 115 E Main
// Carson City, MI 48811
// (989) 584-3138
//
// This program synchronizes patron records within an Evergreen (aka Open-ILS)
// Integrated Libary System database with an external file. Specifically,
// this program is used to create/update/inactivate patron records to
// match the CIMS Student Management System. It uses the CMT/CIMS Excelsior
// export file format to do this, which is a simple file format to generate.
//
// The program only deals with records within a given user group. This
// is to prevent student changes from affecting staff/public patron records.
// As such, the program takes the primary key of an Evergreen usergroup
// as an argument. It additionally requires the primary key of the
// Organizational Unit that should be set as the "home library."
//
// The program matches the import file's student ID field to the Evergreen
// database's username field. The address is created with a label of "CIMS,"
// we leave all other address records alone when we update records, but we
// will always reset the "CIMS" label to match the import file. Also, after
// the initial creation of the record, we leave password and barcode data
// alone.
//
// Copyright 2007 Travis Schafer
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
//
//
//
// Initial Program:
// 2007-10-26 T.Schafer
//
// Changes:
//
// 2007-11-01 T.Schafer - Added student_print function for diagnostics
//                      - Modified Makefile because, strangely, we segfault on
//                        DeWitt data if we compile at optimization level 2.
//
// 2007-11-07 T.Schafer - Noticed that we had some lines switched that meant:
//                        We exit the program before freeing the DB connection
//                        if the inital connection fails. This doesn't really
//                        matter, but it's good to be good...
//                      - Modified record insertions to be concurrency safe
//                        (ie, reserve an OID, instead of selecting the max
//                         id in the current table and assume it will work
//                         as a foreign key).
//
// 2007-11-15 T.Schafer - Added comments to make program GPL

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libpq-fe.h>

#define _GNU_SOURCE
#include <getopt.h>

#define MAXLINE 4096       // Maximum length of line in import file
#define MAXFIELD 30        // Maximum length of field in import file
#define MAXSTMT 4096       // Maximum length of an SQL statement

// Birthdates are given to us with a two digit year. If those two digits are
// LESS than CENTURYCUT, we set the century to CENTURYHIGH. Else, we set
// CENTURYLOW. These settings give any two digit birthyear greater than 69
// a century of 19, and anything else a century of 20
#define CENTURYCUT 70
#define CENTURYHIGH 20
#define CENTURYLOW 19

// Structure for student records
struct student
{
  char *id;             // Student ID
  char *fname;          // First Name
  char *mname;          // Middle Name
  char *lname;          // Last Name
  char *guardian;       // Guardian Full Name
  char *street;         // Street (eg 115 E Main St)
  char *city;           // City
  char *state;          // State
  char *zip;            // Zip
  char *bdate;          // Birthdate (YYYY-MM-DD)
  char *phone;          // Phone (NXX-NXX-XXXX)
  struct student *next; // Next student in linked list
};


// Initialize a linked list of students (IE, create a guard)
//
// Arguments: (void)
// Returns: Pointer to new list (ie, guard record)
struct student *student_initlist( void );

// Free a list of students
//
// Arguments: student *stulist   - Student list to free
// Returns: void
void student_freelist( struct student* stulist );

// Add a student to the end of a student list
//
// Arguments: struct student * stulist   - List to which we insert
//            char *id                   - Student ID
//            char *fname                - First Name
//            char *mname                - Middle Name
//            char *lname                - Last Name
//            char *guardian             - Guardian Full Name
//            char *street               - Street (eg 115 E Main St)
//            char *city                 - City
//            char *state                - State
//            char *zip                  - Zip
//            char *bdate                - Birthdate (YYYY-MM-DD)
//            char *phone                - Phone (NXX-NXX-XXXX)
//
// Returns: Void
void student_insert( struct student *stulist, char *id, char *fname,
		     char *mname, char *lname, char *guardian, char *street,
		     char *city, char *state, char *zip, char *bdate,
		     char *phone );

// Insert students from file at end of given list of students
//
// Arguments: PGconn *conn       - Database connection (needed to escape
//                                 strings  for SQL, ie, handle single quotes)
//            student *stulist   - Student list to populate
//            FILE *ifd          - File descriptor for input file
// Returns: Number of students inserted in stulist
unsigned student_readlist( PGconn *conn, struct student* stulist, FILE *ifd );

// Prints the contents of the current student structures records
//
// Arguments: FILE* ofd          - File descriptor to which we print records.
//            struct *currstu    - Student to print
// Returns:   void
void student_print( FILE* ofd, struct student *currstu );

// Update database to reflect import file.
//
// This function first sets all users who are in both the specified
// organizational unit AND group to inactive. It then scans the student
// list, matching student ID in the list to username in the database.
// If a match is found, that record is marked as active and all fields are
// updated to reflect the import file. If a record is not found, one is
// inserted and made active. Marking everyone inactive ahead of time is
// brute force... but it does handle end-of-year promotion to another building
// (and hence OU). It is important to note that when we update a record, we
// do so without regaurd to OU. This facilitates students who move between
// OUs. This is all done in a single transaction.
//
// Arguments: PGconn *conn              - Connection to Evergreen database
//            struct student *stulist   - List of students
//            unsigned ou               - Organizational Unit ID
//            unsigned profile          - Profile (Group) ID
void evergreen_update( PGconn *conn, struct student *stulist, unsigned ou,
		       unsigned group );

int main( int argc, char **argv )
{
  // Runtime ptions - Filled by getopts_long_only()
  int currOpt;      // Current Option
  char *dbhost;     // Database Host
  char *dbname;     // Database Name
  char *dbuser;     // Database User
  char *dbpass;     // Password for Database User
  unsigned egprof;  // Touch/Create only users with this profile ID
  unsigned egou;    // Create users in the following OU ID
  char *infile;     // Import file name

  // Number of options that we processed. Probably provided already by
  // getopt(), but I don't know where or how.
  unsigned optcount=0;

  // File descriptor for input file
  FILE *ifd;

  // Connection to database
  PGconn *conn;

  // List of students in import file
  struct student *stulist;
  
  // Process options
  while( 1 )
    {
      static struct option options[] =
	{
	  { "dbhost", required_argument, 0, 'h' },    // Database Host
	  { "dbname", required_argument, 0, 'd' },    // Database Name
	  { "dbuser", required_argument, 0, 'u' },    // Database User
	  { "dbpass", required_argument, 0, 'p' },    // Database Password
	  { "egprof", required_argument, 0, 'g' },    // Evergreen Profile ID
	  { "egou",   required_argument, 0, 'l' },    // Evergreen home OU ID
	  { "infile", required_argument, 0, 'f' },    // Import File	  
	  { 0, 0, 0, 0 }
	};
      int option_index = 0;
      
      currOpt = getopt_long_only( argc, argv, "", options, &option_index );

      // Are we done processing options?
      if( currOpt == -1 )
	{
	  break;
	}

      switch( currOpt )
	{
	case 'h':
	  {
	    dbhost =  strdup( optarg );
	    optcount++;
	    break;
	  }
	case 'd':
	  {
	    dbname = strdup( optarg );
	    optcount++;
	    break;
	  }
	case 'u':
	  {
	    dbuser = strdup( optarg );
	    optcount++;
	    break;
	  }
	case 'p':
	  {
	    dbpass = strdup( optarg );
	    optcount++;
	    break;
	  }
	case 'g':
	  {
	    sscanf( optarg, "%d", &egprof );
	    optcount++;
	    break;
	  }
	case 'l':
	  {
	    sscanf( optarg, "%d", &egou );
	    optcount++;
	    break;
	  }
	case 'f':
	  {
	    infile = strdup( optarg  );
	    optcount++;
	    break;
	  }
	case '?':
	  {
	    /* Error message printed by getopt */
	    break;
	  }
	}    
    }
  
  // Did we get enough options?
  if( optcount != 7 )
    {
      fprintf( stderr, "%s: Required options\n"
	       "--dbhost=          : Database Host\n"
	       "--dbname=          : Database Username\n"
	       "--dbuser=          : Database User\n"
	       "--dbpass=          : Database Password\n"
	       "--egprof=          : Evergreen Profile ID for Students\n"
	       "--egou=            : OU ID of Home Library for this import\n"
	       "--infile=          : Import file\n", argv[0] );
      exit( 1 );
    }
  
  // Open the input file
  if( !( ifd = fopen( infile, "r" ) ) )
    {
      fprintf( stderr, "%s: Can not open input file. Exiting!\n", argv[0] );
      exit( 1 );
    }
  
  // Connect to database
  conn = PQsetdbLogin( dbhost, NULL, NULL, NULL, dbname, dbuser, dbpass );
  if( PQstatus( conn ) == CONNECTION_BAD )
    {
      fprintf( stderr, "%s: Cannot connect to database!\n"
	       "Error is: %s\n\n***Exiting!***\n",
	       argv[0], PQerrorMessage( conn ) );
      PQfinish( conn );
      exit( 1 );
    }

  // Build list of students from file
  stulist = student_initlist();
  student_readlist( conn, stulist, ifd );
  fclose( ifd );
  
  // Use student list to update database
  evergreen_update( conn, stulist, egou, egprof );
  
  // End of run cleanup
  student_freelist( stulist );
  PQfinish( conn );
  
  return 0;
}

struct student *student_initlist( void )
{
  // Pointer to return
  struct student *retPtr;

  retPtr = (struct student*)malloc( sizeof( struct student ) );

  retPtr->next = 0;

  return retPtr;
}

void student_freelist( struct student *stulist )
{
  // Pointer to current student
  struct student *currStu;

  // Pointer to next student
  struct  student *nextStu;

  // Advance past guard
  currStu = stulist->next;

  while( currStu )
    {
      nextStu = currStu->next;
      free( currStu->id );
      free( currStu->fname );
      free( currStu->mname );
      free( currStu->lname );
      free( currStu->guardian );
      free( currStu->street );
      free( currStu->city );
      free( currStu->state );
      free( currStu->zip );
      free( currStu->bdate );
      free( currStu->phone );
      free( currStu );
      currStu = nextStu;
    }

  // Free the guard
  free( stulist );
  
  return;
}

void student_insert( struct student *stulist, char *id,
				char *fname, char *mname, char *lname,
				char *guardian, char *street, char *city,
				char *state, char *zip, char *bdate,
				char *phone )
{
  // Current student in list
  struct student *currStu;

  currStu = stulist;
    
  // Find the end of the list
  while( currStu )
    {
      if( !currStu->next )
	{
	  currStu->next = (struct student*)malloc( sizeof( struct student ) );
	  currStu = currStu->next;
	  
	  currStu->id = strdup( id );
	  currStu->fname = strdup( fname );
	  currStu->mname = strdup( mname );
	  currStu->lname = strdup( lname );
	  currStu->guardian = strdup( guardian );
	  currStu->street = strdup( street );
	  currStu->city = strdup( city );
	  currStu->state = strdup( state );
	  currStu->zip = strdup( zip );
	  currStu->bdate = strdup( bdate );
	  currStu->phone = strdup( phone );
	  currStu->next = 0;
	}
      currStu = currStu->next;
    }
  return;
}

unsigned student_readlist( PGconn *conn, struct student* stulist, FILE *ifd )
{
  // Buffer for reading from input file
  char buff[MAXLINE+1];

  // Contents of current field, properly escaped
  char currField[MAXFIELD+1];
  
  // Counter of records processed
  unsigned rp;

  rp = 0;
  while( fgets( buff, MAXLINE, ifd ) )
  {
    // Fields from file
    char id[MAXFIELD+1];              // ID
    char fullname[MAXFIELD+1];        // Full Name
    char grade[MAXFIELD+1];           // Grade
    char bmon[MAXFIELD+1];            // Birthday Month (MM)
    char bday[MAXFIELD+1];            // Birthday Day   (DD)
    char byear[MAXFIELD+1];           // Birthday Year  (YYYY)
    char gender[MAXFIELD+1];          // Gender
    char guardian[MAXFIELD+1];        // Guardian
    char street[MAXFIELD+1];          // Street Address
    char city[MAXFIELD+1];            // City
    char state[MAXFIELD+1];           // State
    char zip[MAXFIELD+1];             // Zip
    char phone[MAXFIELD+1];           // Phone
    
    // Fields we build (because we dont have the correct format, yet)
    char fname[MAXFIELD+1];           // First Name
    char mname[MAXFIELD+1];           // Middle Name
    char lname[MAXFIELD+1];           // Last Name
    char birthdate[MAXFIELD+1];       // Birthday YYYY-MM-DD
    unsigned century;                 // Birth Century
    
    // Cleanup Middle name (otherwise, if current record has no
    // middle name, we inherit the previous middle name)
    mname[0]='\0';
    // Extract Records
    sscanf( buff, "\"%[^\"]\",\"%[^\"]\",\"%[^\"]\",\"%[^/]/%[^/]/%[^\"]\",\"%[^\"]\",\"%[^\"]\",\"%[^\"]\",\"%[^,], %s %[^\"]\",\"%[^\"]",
	    id,
	    fullname,
	    grade,
	    bmon,
	    bday,
	    byear,
	    gender,
	    guardian,
	    street,
	    city,
	    state,
	    zip,
	    phone
	    );
    
    
    // Seperate first, middle, last names
    sscanf( fullname,"%[^,], %s%s", lname, fname, mname );
    
    // Add a century to the birth year
    sscanf( byear, "%d", &century );
    
    if( century < CENTURYCUT )
      {
	century = CENTURYHIGH;
      }
    else
      {
	century = CENTURYLOW;
      }
    sprintf( birthdate, "%d%s-%s-%s", century, byear, bmon, bday );

    // Clean up strings for use in SQL (ie, escape special characters).
    // This function uses the database connection to determine special
    // properties of the database that may effect the conversion.
    PQescapeStringConn( conn, currField, id, strlen(id), NULL );
    strcpy( id, currField );
    
    PQescapeStringConn( conn, currField, fname, strlen(fname), NULL );
    strcpy( fname, currField );

    PQescapeStringConn( conn, currField, mname, strlen(mname), NULL );
    strcpy( mname, currField );

    PQescapeStringConn( conn, currField, lname, strlen(lname), NULL );
    strcpy( lname, currField );
    
    PQescapeStringConn( conn, currField, guardian, strlen(guardian), NULL );
    strcpy( guardian, currField );
    
    PQescapeStringConn( conn, currField, street, strlen(street), NULL );
    strcpy( street, currField );

    PQescapeStringConn( conn, currField, city, strlen(city), NULL );
    strcpy( city, currField );

    PQescapeStringConn( conn, currField, state, strlen(state), NULL );
    strcpy( state, currField );
    
    PQescapeStringConn( conn, currField, zip, strlen(zip), NULL );
    strcpy( zip, currField );
    
    PQescapeStringConn( conn, currField, birthdate, strlen(birthdate), NULL );
    strcpy( birthdate, currField );

    PQescapeStringConn( conn, currField, phone, strlen(phone), NULL );
    strcpy( phone, currField );
    
    student_insert( stulist, id, fname, mname, lname, guardian, street,
		    city, state, zip, birthdate, phone );

    rp++;
  }
  
  return rp;
}

void student_print( FILE* ofd, struct student *currstu )
{
  fprintf( ofd, "\n***Student Record***\n" );
  fprintf( ofd, "ID: %s\n", currstu->id );
  fprintf( ofd, "First Name: %s\n", currstu->fname );
  fprintf( ofd, "Middle Name: %s\n", currstu->mname );
  fprintf( ofd, "Last Name: %s\n", currstu->lname );
  fprintf( ofd, "Guardian: %s\n", currstu->guardian );
  fprintf( ofd, "Street: %s\n", currstu->street );
  fprintf( ofd, "City: %s\n", currstu->city );
  fprintf( ofd, "State: %s\n", currstu->state );
  fprintf( ofd, "Zip: %s\n", currstu->zip );
  fprintf( ofd, "Bdate: %s\n", currstu->bdate );
  fprintf( ofd, "Phone: %s\n\n\n", currstu->phone );
 
  return;
}


void evergreen_update( PGconn *conn, struct student *stulist, unsigned ou,
		       unsigned group )
{

  // SQL Statement Buffer
  char stmt[MAXSTMT+1];

  // Result of most recently submitted statement
  PGresult *sres;

  // Current Student
  struct student *currStu;

  // Advance past guard
  currStu = stulist->next;
  sres = PQexec( conn, "BEGIN;" );
  PQclear( sres );

  // Mark everyone as inactive
  sprintf( stmt, "UPDATE actor.usr SET active = 'f' WHERE profile = %d "
	   "AND home_ou = %d;", group, ou );
  sres = PQexec( conn, stmt );
  PQclear( sres );

  while( currStu )
    {
      // Get ID of current student
      sprintf( stmt, "SELECT id FROM actor.usr WHERE usrname='%s';",
	       currStu->id );
      sres = PQexec( conn, stmt );

      if( PQntuples( sres ) )
	{
	  // There is a user record in the database already, this will
	  // be an update.
	  
	  // Primary Key of existing user record
	  unsigned ukey;

	  // Primary Key of existing address record
	  unsigned akey;
	  
	  // We found an existing record, get the ID.
	  sscanf( PQgetvalue( sres, 0, 0 ), "%d", &ukey );
	  PQclear( sres );
	  
	  // Okay, lets try an update
	  // We leave a few fields alone, to avoid clobbering legitimately
	  // changed values (uhh, password)
	  sprintf( stmt, "UPDATE actor.usr SET profile=%d, "
		   "ident_type = 3, ident_value = '%s', ident_value2 = '%s', "
		   "first_given_name = '%s', second_given_name = '%s', "
		   "family_name = '%s', day_phone = '%s', evening_phone='%s', "
		   "home_ou = %d, dob = '%s' WHERE  id = %d; ",
		   group, currStu->id, currStu->guardian,
		   currStu->fname, currStu->mname,
		   currStu->lname, currStu->phone, currStu->phone,
		   ou, currStu->bdate, ukey );
	  sres = PQexec( conn, stmt );
	  PQclear( sres );
	  
	  // And address...
	  sprintf( stmt, "SELECT id FROM actor.usr_address "
		   "WHERE  usr = %d AND address_type = 'CIMS';", ukey );
	  sres = PQexec( conn, stmt );
	  sscanf( PQgetvalue( sres, 0, 0 ), "%d", &akey );
	  PQclear( sres );

	  sprintf( stmt, "UPDATE actor.usr_address "
		   "SET street1 = '%s', city = '%s', state = '%s', "
		   "country = 'USA', post_code = '%s' WHERE id = %d;",
		   currStu->street, currStu->city, currStu->state,
		   currStu->zip, akey );
	  sres = PQexec( conn, stmt );
	  PQclear( sres );

	  // Mark the record as active...
	  sprintf( stmt, "UPDATE actor.usr SET active='t' WHERE id= %d;",
		   ukey );
	  sres = PQexec( conn, stmt );
	  PQclear( sres );
	}
      else
	{
	  // Primary key of new user record
	  unsigned ukey;

	  // Primary key of new card record
	  unsigned ckey;

	  // Primary key of new address record
	  unsigned akey;

	  // Reserve primary keys for new entries
	  sprintf( stmt, "SELECT nextval( 'actor.usr_id_seq' );" );
	  sres = PQexec( conn, stmt );
	  sscanf( PQgetvalue( sres, 0, 0 ), "%d", &ukey );

	  sprintf( stmt, "SELECT nextval( 'actor.usr_address_id_seq' );" );
	  sres = PQexec( conn, stmt );
	  sscanf( PQgetvalue( sres, 0, 0 ), "%d", &akey );

	  sprintf( stmt, "SELECT nextval( 'actor.card_id_seq' );" );
	  sres = PQexec( conn, stmt );
	  sscanf( PQgetvalue( sres, 0, 0 ), "%d", &ckey );
	  
	  // Insert into actor.usr
	  sprintf( stmt, "INSERT INTO actor.usr "
		   "( id, profile, usrname, passwd, ident_type, ident_value, "
		   "ident_value2, "
		   "first_given_name, second_given_name, family_name, "
		   "day_phone, evening_phone, "
		   "home_ou, dob, claims_returned_count ) "
		   "VALUES "
		   "( %d, %d, '%s', '%s', 3, '%s', "
		   "'%s', "
		   "'%s', '%s', '%s', "
		   "'%s', '%s', "
		   " %d, '%s', 0 );",
		   ukey, group, currStu->id, currStu->id, currStu->id,
		   currStu->guardian,
		   currStu->fname, currStu->mname, currStu->lname,
		   currStu->phone, currStu->phone,
		   ou, currStu->bdate );
	  sres = PQexec( conn, stmt );
	  PQclear( sres );

	  // Insert into actor.usr_address
	  sprintf( stmt, "INSERT INTO actor.usr_address "
		   "( id, address_type, usr, street1, "
		   "city, state, country, post_code ) "
		   "VALUES "
		   "( %d, 'CIMS', %d, '%s', "
		   "'%s', '%s', 'USA', '%s' );",
		   akey, ukey, currStu->street,
		   currStu->city, currStu->state, currStu->zip );
	  sres = PQexec( conn, stmt );
	  PQclear( sres );
	  
	  // And the card
	  sprintf( stmt, "INSERT INTO actor.card( id, usr, barcode ) "
		   "VALUES ( %d, %d, '%s' );",
		   ckey, ukey, currStu->id );
	  sres = PQexec( conn, stmt );
	  PQclear( sres );

	  // Set address for user...
	  sprintf( stmt, "UPDATE actor.usr SET mailing_address=%d, "
		   "billing_address=%d WHERE id=%d;",
		   akey, akey, ukey );
	  sres = PQexec( conn, stmt );
	  PQclear( sres );

	  // Set card for user...
	  sprintf( stmt, "UPDATE actor.usr SET card=%d "
		   " where id = %d;", ckey, ukey );
	  sres = PQexec( conn, stmt );
	  PQclear( sres );
	  
	}
      currStu = currStu->next;
      
    }

  sres = PQexec( conn, "COMMIT;" );
  PQclear( sres );
	  
  return;
}
-------------- next part --------------
//
// cleantcn.c
//
// Given an XML file of MARC records, removes spaces from the TCN (tag 001)
//
// Carson City - Crystal Area Schools
// Technology Department
// 115 E Main
// Carson City, MI 48811
// (989) 584-3138
//
// Copyright 2007 Travis Schafer
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
//
// Initial Program:
// 2007-11-05 T.Schafer
//
// Changes:
// 2007-11-15 T.Schafer  - Added comments to make program GPL
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include <libxml/parser.h>
#include <libxml/tree.h>

#define _GNU_SOURCE
#include <getopt.h>

#define FALSE 0
#define TRUE 1

// Extract, clean, and replace TCNs
void tcn_clean( xmlNode *collection );

// Return a string identical to inStr, with the exception that
// the returned string has no spaces. It is up to the caller to free the
// returned string.
char *no_spaces( char *inStr );

// Return a string indentical to inStr, with the exception that all alpha
// characters are in lowercase. It is up to the caller to free the returned
// string
char *no_caps( char *inStr );

int main( int argc, char **argv )
{
  char* xfname;          // XML File to process (name)
  int currOpt;           // Current Option
  unsigned optCount=0;   // Number of command line options parsed
  xmlDocPtr doc;         // Tree representation of XML file

  while( TRUE )
    {
      static struct option options[] =
	{
	  { "xmlfile", required_argument, 0, 'x' },
	  { 0, 0, 0, 0 }
	};

      int option_index = 0;
      currOpt = getopt_long_only( argc, argv, "", options, &option_index );

      if( currOpt == -1 )
	{
	  break;
	}

      switch( currOpt )
	{
	case 'x':
	  {
	    xfname = strdup( optarg );
	    optCount++;
	    break;
	  }
	case '?':
	  {
	    // Error message printed by getopt()
	    break;
	  }
	}
    }
  // Did we get enough options
  if( optCount != 1 )
    {
      fprintf( stderr, "%s: Clean TCNs in XML MARC file\n"
	       "--xmlfile        : XML File to Process\n", argv[0] );
      exit( 1 );
    }

  if( !( doc = xmlReadFile( xfname, NULL, 0 ) ) )
    {
      fprintf( stderr, "%s: Failed to read XML MARC file! Exiting\n",
	       argv[0] );
      exit( 1 );
    }

  free( xfname );

  tcn_clean( xmlDocGetRootElement( doc ) );

  xmlDocDump( stdout, doc );
  
  xmlCleanupParser();
    
  return 0;
}

void tcn_clean( xmlNode *collection )
{
  // Current Record in XML Tree
  xmlNode *currRec = NULL;

  // Current Field in XML Tree
  xmlNode *currField = NULL;
  
  // Current Tag in XML Tree
  xmlChar *tag = NULL;

  // Current Value in XML Tree
  xmlNode *currVal = NULL;
  
  // Tag Value
  xmlChar *val;

  // Tag Value without spaces
  xmlChar *valnospace;

  // Tag Value converted to lowercase
  xmlChar *valnocaps;
  
  // We were given the root (collection) node. The children will be records,
  // so we need to walk the records...
  for( currRec = collection->children; currRec; currRec = currRec->next )
    {
      // Okay, we're looking at a record. Now we need to run through the
      // children to find the controlfield with tag="001"

      for( currField = currRec->children; currField; currField=currField->next)
	{
	  tag = xmlGetProp( currField, "tag" );
	  if( !xmlStrcmp( tag, "001" ) )
	    {
     
	      // Get the child, which will be the text node
	      currVal = currField->children;

	      // Get the actuall text
	      val = xmlNodeGetContent( currVal );

	      // Clean spaces and replace value
	      valnospace = no_spaces( val );

	      // Convert to all lower
	      valnocaps = no_caps( valnospace );
	      
	      xmlNodeSetContent( currVal, valnocaps );

	      // Cleanup
	      xmlFree( val );
	      //free( valnospace );
	      
	    }
	  xmlFree( tag );
	
	  
	}
      
	  
    }
  
  
  return;
}

char *no_spaces( char *inStr )
{
  // String to return
  char *retStr;

  // Current position in retStr
  char *currRet;

  // Generic counter
  unsigned i;
    
  retStr = (char*)malloc( strlen( inStr) * sizeof( char ) );

  currRet = retStr;

  for( i=0; i < strlen( inStr ); i++ )
    {
      if( inStr[i] != ' ' )
	{
	  *currRet = inStr[i];
	  currRet += sizeof( char );
	}
    }

  // We should already be at the end of the string (we get an advance above),
  // so cap it.
  *currRet = '\0';
  
    
  return retStr;
}

char *no_caps( char *inStr )
{
  // String to return
  char *retStr;

  // Generic counter
  unsigned i;
  
  retStr =  (char *)malloc( strlen( inStr) * sizeof(char) );

  for( i = 0; i < strlen( inStr ); i++ )
    {
      retStr[i] = tolower( inStr[i] );
    }

  // Cap the retStr
  retStr[i] = '\0';
    
  return retStr;
}

-------------- next part --------------
//
// deltcn.c
//
// Carson City - Crystal Area Schools
// Technology Department
// 115 E Main
// Carson City, MI 48811
// (989) 584-3138
//
// Given a list of TCNs tc_list, remove all entries in the BRE file with
// a TCN found on tc_list.
//
// When adding a new organization to an Evergreen installation, it is
// necessary to import that organization's Bibliographic MARC records.
// Unfortunately, there is a great certainty that a subset of the new
// organization's records already exist in the Evergreen database. A direct
// import, therfore, will result the uniqeness constraint on TCN in the
// Evergreen database.
//
// As a result, this program takes a list of TCNs already present in the
// Evergreen database, with a format of on TCN per line. The program
// also takes an Evergreen BRE file. The program then prints, to STDOUT,
// only those records that were not matched with the provided list of
// existing TCNs.
//
// Copyright 2007 Travis Schafer
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
//
// Initial Program
// 2007-11-04 T.Schafer
//
// Changes:
// 2007-11-15 T.Schafer   - Made program GPL

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define _GNU_SOURCE
#include <getopt.h>

#define MAXBRE 1048576            // Maximum length of line in BRE file
#define MAXTCN 128                // Maximum length of line in TCN file

#define TRUE 1
#define FALSE 0

int main( int argc, char **argv )
{
  unsigned optcount=0;           // Number of options returned by getopt
  unsigned i;                    // Generic Counter
  int currOpt;                   // Current Option on Commandline
  char *tfname;                  // TCN File Name
  char *bfname;                  // BRE File Name
  FILE *tfd;                     // TCN File Descriptor
  FILE *bfd;                     // BRE File Descriptor
  char tcnbuff[MAXTCN+1];        // TCN buffer
  char brebuff[MAXBRE+1];        // BRE Buffer
  char **tcnList;                // TCN List
  unsigned tcnListSz;            // Number of entries in TCN list
  
  while( 1 )
    {
      static struct option options[] =
	{
	  { "tcnfile", required_argument, 0, 't' },  // TCN File Name
	  { "tcncount", required_argument, 0, 's' }, // Number of TCN File recs
	  { "brefile", required_argument, 0, 'b' },  // BRE File Name
	  { 0, 0, 0, 0 }
	};
      int option_index = 0;

      currOpt = getopt_long_only( argc, argv, "", options, &option_index );

      // Are we done processing options?
      if( currOpt == -1 )
	{
	  break;
	}

      switch( currOpt )
	{
	case 't':
	  {
	    tfname = strdup( optarg );
	    optcount++;
	    break;
	  }
	case 's':
	  {
	    sscanf( optarg, "%d", &tcnListSz );
	    optcount++;
	    break;
	  }
	case 'b':
	  {
	    bfname = strdup( optarg );
	    optcount++;
	    break;
	  }
	case '?':
	  {
	    // Error message printed by getopt()
	    break;
	  }
	}
    }

  // Did we get enough options
  if( optcount != 3  )
    {
      fprintf( stderr, "%s: Remove records from BRE file based on TCN file\n"
	       "--tcnfile=            : TCN Filename\n"
	       "--tcncount=           : Number of entires in TCN file\n"
	       "--brefile=            : BRE Filename\n", argv[0] );
      exit( 1 );
    }

  if( !( tfd = fopen( tfname, "r" ) ) )
    {
      fprintf( stderr, "%s: Can not open TCN file. Exiting!\n", argv[0] );
      exit( 1 );
    }

  if( !( bfd = fopen( bfname, "r" ) ) )
    {
      fprintf( stderr, "%s: Can not open BRE file. Exiting!\n", argv[0] );
      exit( 1 );
    }

  free( tfname );
  free( bfname );

  // Load and close TCN List
  tcnList = (char**)malloc( sizeof(char*) * tcnListSz );
  
  for( i = 0; fgets( tcnbuff, MAXTCN, tfd); i++ )
    {
      // Pointer to newline character in TCN
      char *n;
      
      tcnList[i] = strdup( tcnbuff );
      
      // Trim ending newline, if there is one
      if( (n = strstr( tcnList[i], "\n" ) ) )
	{
	  *n = '\0';
	}
    }

  fclose( tfd );
  

  // Now that we have a list of TCNs, we will scan the BRE file. If a BRE
  // record (line) does not contain a TCN that is also present in the TCN
  // list, we print the line to STDOUT
  while( fgets( brebuff, MAXBRE, bfd ) )
    {
      // Current position in buffer
      char *curr;

      // Next position in buffer
      char *next;

      // Length of Current TCN
      unsigned currTcnLen;
     
      // Current TCN as extracted from BRE
      char currTcn[MAXTCN+1];

      // Did we find the current BRE's TCN on the list to remove?
      unsigned match = FALSE;
      
     
      // The TCN is contained in the BRE between the last comma of the
      // record, and the last ']' of the record.

      // Set the next position to the last character in the buffer
      next = brebuff + sizeof( char ) * strlen( brebuff );

      // Now, slide next back until we hit ']'
      while( *next != ']' )
	{
	  next -= sizeof( char );
	}

      // Now, set current position in buffer to last ',' in the string
      // before ']'
      curr = next;

      while( *curr != ',' )
	{
	  curr -= sizeof( char );
	}

      // Advance curr past ','
      curr += sizeof( char );

      // Decrement next past ']'
      next -= sizeof( char );

      currTcnLen = (next - curr) / sizeof( char );
      currTcnLen++;
      
      // Extract the current TCN from the BRE
      strncpy( currTcn, curr, currTcnLen );

      // Terminat string
      currTcn[currTcnLen] = '\0';

      // Okay, now, we decide if the record should be printed
      for( i = 0; i < tcnListSz; i++ )
	{
	  if( !strcmp( tcnList[i], currTcn ) )
	    {
	      match = TRUE;
	    }
	}

      if( !match )
	{
	  fprintf( stdout, "%s", brebuff );
	}
    }
  
	 
  return 0;
}
-------------- next part --------------
//
// rebar.c
//
// Carson City - Crystal Area Schools
// Technology Department
// 115 E Main
// Carson City, MI 48811
// (989) 584-3138
//
// Apparently, the standard barcode for libraries is a CODABAR encoded 14
// digit barcode with a format of ABBBBCCCCCCCCD, where 'A' indicates the
// item type, BBBB is a four digit organization code, CCCCCCCC is a "series
// number," which identifies an item, and D is a Mod-10 check digit.
//
// If you are converting between systems, you may want to change nonstandard
// "simple" barcodes to the above format. This program scans a MARC XML
// file, and does just that. A is set to '3' to indicate a book, BBBB
// is set to the orgcode value defined on the command line, the first  'C'
// is set to the sleader value defined on the command line, and the
// remaining Cs take the value of the barcode present in the MARC XML file. D
// is calculated and set appropriately.
//
// The program prints a modified MARC XML file to stdout, with new barcodes,
// which will be your old barcodes, "wrapped" in the above format. If you
// plan to use a single BBBB for your entire organization (good idea), but
// are migrating multiple libraries that have duplicate barcodes... set sleader
// to a different value for each barcode.
//
// Please note that this program strips all non-numeric characters from
// the barcode present in the MARC XML file before "wrapping" the barcode.
// This is because a certain legacy system prefixes all Bibliographic
// barcodes with a 'T'.
//
// Copyright 2007 Travis Schafer
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
// 
// Initial Program:  2007-11-05 T.Schafer
//
// Changes:
// 2007-11-11 T.Schafer   - 1. Modified program to strip non-numerics
//                             from original barcode BEFORE recoding.
//
// 2007-11-15 T.Scharer   - 2. Added GPL
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include <libxml/parser.h>
#include <libxml/tree.h>

#define _GNU_SOURCE
#include <getopt.h>

#define FALSE 0
#define TRUE 1

// Maximum Length of Replacement Barcode
#define MAXBARCODE 14

// Maximum length of Organization code
#define MAXORG 4

// SLEADER length copied is always 1
// Maximum length of series
#define MAXSER 7

//
void process_doc( xmlNode *collection, char *holdfield, char *bcodetag,
		  char *orgcode, char *sleader );

// Given a field, return a pointer to a subfield specified by tag
//
// Arguments:     xmlNode *field    - Field to search
//                char    *tag      - Target Tag
//
// Returns:       xmlNode *         - Subfield Node
xmlNode *get_subfield( xmlNode *field, char *tag );

// Replace barcode in file
//
// Arguments:     xmlNode *node     - XML Node that is the barcode subfield
//                char    *orgcode  - Orginzation Code for new barcode
//                char    *sleader  - Series Leader for new barcode
//
// Returns: (void)
void modify_barcode( xmlNode *node, char *orgcode, char *sleader );

// Function to calculate MOD-10 check digit, which is
// a "non-standard" library standard
//
// Arguments: char *codestring  - The barcode, less check digit
//
// Returns:   char *            - One character string, consisting of
//                                MOD-10 checkdigit calculated for argument
char *checkdigit( char *codestring );

// Remove all non-numeric characters (if any) from a string
//
// Arguments:    char *str     - String from which non-numerics will be removed
//
// Outputs:      char *str     - String with non-numberic removed
//
// Returns: (void) - Operates as "side-effect"
void remove_nondigit( char *str );


int main( int argc, char **argv )
{
  // Current Command Line Option
  int currOpt;

  // Count of Command Line Options
  unsigned optCount;

  // Name of XML MARC file
  char *xfname;

  // Field with holdings
  char *holdfield;

  // Barcode Tag
  char *bcodetag;

  // Orginization Code
  char *orgcode;

  // Series leader (IE, Smart Barcodes)
  char *sleader;
  
  // XML Document
  xmlDocPtr doc;
  
  optCount = 0;
  while( TRUE )
    {
      static struct option options[] =
	{
	  { "xmlfile",   required_argument, 0, 'x' },    // XML MARC File
	  { "holdfield", required_argument, 0, 'f' },
	  { "bcodetag",  required_argument, 0, 'b' },
	  { "orgcode",   required_argument, 0, 'o' },
	  { "sleader",   required_argument, 0, 's' },
	  { 0, 0, 0, 0 }
	};

      int option_index = 0;

      currOpt = getopt_long_only( argc, argv, "", options, &option_index );

      if( currOpt == -1 )
	{
	  break;
	}

      switch( currOpt )
	{
	case 'x' :
	  {
	    xfname = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'f' :
	  {
	    holdfield = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'b':
	  {
	    bcodetag = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'o':
	  {
	    orgcode = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 's':
	  {
	    sleader = strdup( optarg );
	    optCount++;
	    break;
	  }
	case '?':
	  {
	    // Error printed by getopt()
	    break;
	  }
	}
    }
  
  // Did we get enough options?
  if( optCount != 5 )
    {
      fprintf( stderr, "%s: Modify barcode in holdings information\n"
	       "--xmlfile            : XML MARC file to modify\n"
	       "--holdfield          : Field used for holdings\n"
	       "--bcodetag           : Tag that contains barcode\n"
	       "--orgcode            : Organization for Barcode\n"
	       "--sleader            : Leading digit in series for Barcode\n",
	       argv[0] );
      exit( 1 );
    }
  
  if( !( doc = xmlReadFile( xfname, NULL, 0 ) ) )
    {
      fprintf( stderr, "%s: Failed to read XML MARC file! Exiting.\n",
	       argv[0] );
      exit( 1 );
    }

  free( xfname );

  process_doc( xmlDocGetRootElement( doc ), holdfield, bcodetag, orgcode,
	       sleader );

  xmlDocDump( stdout, doc );
  
  xmlCleanupParser();
  
  
  return 0;
}

void process_doc( xmlNode *collection, char *holdfield, char *bcodetag,
		  char *orgcode, char *sleader )
{
  // Current record in XML Tree
  xmlNode *currRec = NULL;

  // Current field in XML Tree
  xmlNode *currField = NULL;

  for( currRec = collection->children; currRec; currRec=currRec->next )
    {
      // Okay, we're looking at a  record. Scan the fields to find the
      // field with tag = holdfield
      for( currField = currRec->children; currField; currField=currField->next)
	{
	  if( !xmlStrcmp( xmlGetProp( currField, (xmlChar*)"tag"),
			  (xmlChar*)holdfield ) )
	    {
	      modify_barcode( get_subfield( currField, bcodetag ), orgcode,
			      sleader );
	     
	      
	    }
	}
    }
  
  return;
}

xmlNode *get_subfield( xmlNode *field, char *tag )
{
  // Current Subfield
  xmlNode *currSub;

  // Pointer to return
  xmlNode *retNode;
  
  // Have we found the correct subfield?
  unsigned found = FALSE;
      
  for( currSub=field->children; currSub && !found; currSub=currSub->next  )
    {
      if( !xmlStrcmp( xmlGetProp( currSub, (xmlChar*)"code" ), (xmlChar*)tag ))
	{
	  found = TRUE;
	  retNode = currSub;
	}
    }
  
  return retNode;
}

void modify_barcode( xmlNode *node, char *orgcode, char *sleader )
{
  // Current Text Node
  xmlNode *currTextNode;

  // Current Barcode
  xmlChar* currBar;

  // Original Series
  unsigned orgSeries;
  
  // Barcode Buffer
  xmlChar buff[MAXBARCODE+1];

  // Current position in barcode buffer
  xmlChar *currPos;
  
  // Advance to child text node
  currTextNode = node->children;
  currBar = xmlNodeGetContent( node );

  // Clean barcode
  remove_nondigit( (char*)currBar );
  
  // Get current barcode (series) as text
  sscanf( (char*)currBar, "%d", &orgSeries );
    
  // Set initial position
  currPos = buff;

  // Set barcode for "title"
  *currPos = '3';
  currPos += sizeof( xmlChar );

  // Copy appropriate portion of organization code
  strncpy( (char*)currPos, (char*)orgcode, sizeof( xmlChar ) * MAXORG );
  
  // Advance past organization code
  currPos += sizeof( xmlChar ) * MAXORG;

  // Insert leader
  *currPos = *sleader;

  // Advance one position, fill existing barcode
  currPos += sizeof( xmlChar );
  sprintf( (char*)currPos, "%07d", orgSeries );

  // Advance past series
  currPos += sizeof( xmlChar ) * MAXSER;

  strcpy( (char*)currPos, checkdigit( (char*)buff ) );

  xmlNodeSetContent( node, buff );
    
  return;
}


char *checkdigit( char *codestring )
{
  // The algorithm to generate the check 
  // Stolen from http://montanalibraries.org/MLNresources/MLNbarcodes.htm,
  // Modified for clarity

  // Algorithm.
  // 1. If the digit is in an even position, add it to the total
  // 2. If the digit is in an odd-numbered position, multiply it by two.
  // 2a. If the result of 2 greater than or equal to 10, subtract 9 and
  //     add to total.
  // 2b. If the result of 2 is less than 10, add it to the total
  // 3. After all digits are added, divide total by 10 and take the remainder.
  // 4a. If the remainder is 0, then check digit is 0.
  // 4b. If the remainder is nonzero, check digit is 10-remainder.

  // Running total for the checksum
  int runningTotal;
  
  // Generic Counter
  int i;

  // String to return
  char *retStr;

  retStr = (char*)malloc( sizeof(char) * 2 );
  
  runningTotal = 0;
  for( i = 1; i <= strlen( codestring ); i++ )
    {
      // Current subtotal
      int subtotal=0;
      
      if( i%2 )
	{
	  subtotal +=  ( codestring[i-1] - '0' ) * 2;
	  if( 10 <= subtotal )
	    {
	      subtotal -= 9;
	    }
	  runningTotal += subtotal;
	}
      else
	{
	  subtotal = ( codestring[i-1] - '0' );
	  runningTotal += subtotal;
	  
	}
    }

  runningTotal = runningTotal % 10;

  if( runningTotal )
    {
      runningTotal = 10 - runningTotal;
    }
  
  sprintf( retStr,"%d", runningTotal );
  
  return retStr;
}
	    
void remove_nondigit( char *str )
{
  // Work Buffer
  char *buff;

  // Current Position in input/output string
  char *curr;

  // Generic counter
  unsigned i;

  buff = strdup( str );
  curr = str;

  for( i = 0; i < strlen( buff ); i++ )
    {
      if( isdigit( buff[i] ) )
	{
	  *curr = buff[i];
	  curr += sizeof( char );
	}
    }
  *curr = '\0';

  free( buff );
  
  return;
}


-------------- next part --------------
//
// gettcn.c
//
// Carson City - Crystal Area Schools
// Technology Department
// 115 E Main
// Carson City, MI 48811
// (989) 584-3138
//
// For each MARC record in a MARC XML file, this program will find a matching
// MARC record in an Evergreen Database. It will then replace the MARC XML
// file TCN with the TCN of the matching database MARC record, and output
// an updated MARC XML file to stdout.
//
// Additionally, it will produce an exception file for records in the MARC
// XML file that could not be matched with the database. Records with no
// match are excluded from the output XML document.
//
// Why? Let's say that you are migrating from a legacy system to an
// Evergreen system, and your legacy MARC records are of poor quality. The
// Evergreen database, for one reason or another, has high quality MARC
// records. You still need to import holdings, which is based on TCN.
// This program would let you import your existing holdings against the higher
// quality Evergreen MARC records
//
// Copyright 2007 Travis Schafer
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
//
// Initial Program:
// 2007-11-11 T.Schafer
//
// Changes:
// 2007-11-15 T.Schafer 1) Added GPL
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include <libpq-fe.h>

#include <libxml/parser.h>
#include <libxml/tree.h>

#define _GNU_SOURCE
#include <getopt.h>

#define FALSE 0
#define TRUE 1

#define ISBNLEN 10       // Length of an ISBN (used when cleaning ISBNs)
#define MAXSTMT 4096     // Max length of SQL stmt
#define STATUSRECS 25   // Print status every STATUSRECS records during
                         // DB processing

// Structure for MARC records (ie, titles)
struct title
{
  char *filetcn;        // TCN from file       - Field 001
  char *dbtcn;          // TCN from database   - Field 001
  char *isbn;           // ISBN from file      - Field 020 Tag A
  char *author;         // Author from file    - Field 100 Tag A
  char *title;          // Title from file     - Field 245 Tag A
  char *pub;            // Publisher from file - Field 260 Tag A
  xmlNode *record;      // Pointer to this record in XML doc tree
  xmlNode *tcntext;     // Pointer to this record's TCN Text in doc tree
  struct title *next;   // Next title in linked list
};

// Initialize a linked list of titles (ie, create front guard)
//
// Arguments: (void)
//
// Returns: Pointer to new list (ie, pointer to front guard)
struct title *title_initlist( void );

// Free a list of titles
//
// Arguments: struct copy *copylist  - Title list to free
// Returns: (void)
void title_freelist( struct title *titlelist );


// Append a title to the end of a linked list of titles
//
// Arguments:    struct title *titlelist   - List to which record is appended
//               char         *filetcn     - File TCN
//               char         *isbn        - ISBN
//               char         *author      - Author
//               char         *title       - Title
//               char         *pub         - Publisher
//               xmlNode      *rec         - Pointer to corresponding
//                                           xmlNode for record
//               xmlNode      *tcnText     - Pointer to TCN text XML Node
// The dbtcn member of the struct is initialized to a null string...
// Returns: (void)
void title_insert( struct title *titlelist, char *filetcn, char *isbn,
		  char *author, char *title, char *pub,
		  xmlNode* record, xmlNode *text);



// Fill a title list with all possible information from MARC XML file
// (ie, everthing but the database TCN)
//
// Arguments:      xmlNode *collection      - Collection element of XML
//                                            doc to be processed
//                 struct title *titlelist  - List of titles to be filled
void process_xml( xmlNode *collection, struct title *titlelist );

// Given a list of titles, fill in the database TCN where possible
//
// Arguments:     PGconn *conn              - Evergreen DB Connection
//                struct title *titlelist   - Title list to process
void process_db( PGconn *conn, struct title *titlelist );

// Run through a Title List that has been updated with database TCNs
// If there is no Database TCN present, print an error to efd, and
// remove the node from the XML tree. If there is a Database TCN present,
// update the TCN in the XML Tree
//
// Arguments:     FILE *efd                 - Error File Descriptor
//                struct title *titlelist   - Fully Processed Title List
// Returns:       (void)
void finalize_xml( FILE *efd, struct title *titlelist );


// Given an XML node, return a pointer to the child node whose "property"
// is set to "value"
//
// Arguments:   xmlNode *parent    - Node whose children will be searched
//              char    *property  - Property that will be searched
//              char    *value     - Value for which we are searching
//
// Returns:     Pointer to matching child node, or pointer to 0 if no matching
//              child is found
xmlNode *get_child_node( xmlNode *parent, char *property, char *value );

// Return the content of a given subfield (ie, child of xmlNode *field)
// where the subfield has "property" set to "value"
//
// Note: Caller must free returned string, as appropriate
//
// Arguments:    xmlNode *field      - Field whose subfields are searched
//               char    *property   - Propert to search
//               char    *value      - Property is set to this value for match
//
// Returns: Text of subfield, or a zero length string if nothing is found
char *get_subfield_contents( xmlNode *field, char *property, char *value );

// Given an ISBN, remove all characters that are not a digit or an alpha,
// and truncate resulting string.
//
// Arguments:   char *isbn      - ISBN to be "cleaned"
// Outputs:     char *isbn      - Cleaned ISBN
// Returns:     (void)
void clean_isbn( char *isbn );


int main( int argc, char **argv )
{
  // Current command line option
  int currOpt;

  // Number of command line options processed
  unsigned optCount;

  // Name of XML Marc File
  char *xfname;

  // Name of Exception File
  char *efname;

  // Exception file descriptor
  FILE *efd;

  // Evergreen database host
  char *dbhost;

  // Evergreen database name
  char *dbname;
  
  // Evergreen database user
  char *dbuser;

  // Evergreen database password
  char *dbpass;

  // Evergreen database connection
  PGconn *conn;
  
  // XML Document
  xmlDocPtr doc;

  // Title List
  struct title *titlelist;
  
  optCount = 0;
  while( TRUE )
    {
      static struct option options[] =
	{
	  { "xmlfile",   required_argument, 0, 'x' },
	  { "expfile",   required_argument, 0, 'e' },
	  { "dbhost",    required_argument, 0, 'h' },
	  { "dbname",    required_argument, 0, 'd' },
	  { "dbuser",    required_argument, 0, 'u' },
	  { "dbpass",    required_argument, 0, 'p' },
	  { 0, 0, 0, 0 }
	};

      int option_index = 0;

      currOpt = getopt_long_only( argc, argv, "", options, &option_index );

      if( currOpt == -1 )
	{
	  break;
	}

      switch( currOpt )
	{
	case 'x' :
	  {
	    xfname = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'e' :
	  {
	    efname = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'h' :
	  {
	    dbhost =  strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'd' :
	  {
	    dbname = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'u' :
	  {
	    dbuser = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'p' :
	  {
	    dbpass = strdup( optarg );
	    optCount++;
	    break;
	  }
	case '?' :
	  {
	    // Error printed by getopt()
	    break;
	  }
	}
    }

  if( optCount != 6 )
    {
      fprintf( stderr, "%s: Replace TCNs in a MARC XML File with TCN of\n"
	       "matching record in Evergreen database.\n"
	       "--xmlfile             : MARC XML file to process\n"
	       "--expfile             : Exceptions Report File\n"
	       "--dbhost              : Evergreen DB Host\n"
	       "--dbname              : Evergreen DB Name\n"
	       "--dbuser              : Evergreen DB User\n"
	       "--dbpass              : Evergreen DB Pass\n",
	       argv[0] );
      exit( 1 );
    }

  // Okay, we have enough arguments, try to open the exception report
  // file
  if( !( efd = fopen( efname, "w" ) ) )
    {
      fprintf( stderr, "%s: Failed to create exception file! Exiting.\n",
	       argv[0] );
      exit( 1 );
    }

  // Attempt DB Connection
  conn = PQsetdbLogin( dbhost, NULL, NULL, NULL, dbname, dbuser, dbpass );
  if( PQstatus( conn ) == CONNECTION_BAD )
    {
      fprintf( stderr, "%s: Can not connet to database!\n"
	       "Error is :%s\n\n"
	       "***Exiting***\n",
	       argv[0], PQerrorMessage( conn ) );
      PQfinish( conn );
      exit( 1 );
    }
  
  if( !( doc = xmlReadFile( xfname, NULL, 0 ) ) )
    {
      // Error printed by xmlReadFile
      exit( 1 );
    }

  titlelist = title_initlist();

  fprintf( stderr, "Processing XML File\n" );
  process_xml( xmlDocGetRootElement( doc ), titlelist );

  fprintf( stderr, "Matching Records to DB\n" );
  process_db( conn, titlelist );

  fprintf( stderr, "Finalizing Output Document\n" );
  finalize_xml( efd, titlelist );
  xmlDocDump( stdout, doc );
  
  PQfinish( conn );
  xmlFreeDoc( doc );
  xmlCleanupParser();
  
  return 0;
}

struct title *title_initlist( void )
{
  // List to return
  struct title *retList;

  retList = (struct title*)malloc( sizeof( struct title ) );
  retList->next = 0;

  return retList;
}

void title_freelist( struct title *titlelist )
{
  // Pointer to current copy in list
  struct title *curr;

  // Pointer to next copy in list
  struct title *next;

  // Advance past guard
  curr = titlelist->next;

  while( curr )
    {
      next = curr->next;
      free( curr->filetcn );
      free( curr->dbtcn );
      free( curr->isbn );
      free( curr->author );
      free( curr->title );
      free( curr->pub );
      free( curr );
      curr = next;
    }

  // Free the guard
  free( titlelist );
  
  return;
}

void title_insert( struct title *titlelist, char *filetcn, char *isbn,
		   char *author, char *title, char *pub,
		   xmlNode *record, xmlNode *tcntext )
{
  // Current Title in List
  struct title *curr;

  curr = titlelist;

  // Find end of  list
  while( curr )
    {
      
      if( !curr->next )
	{
	  curr->next = (struct title*)malloc( sizeof( struct title ) );
	  curr = curr->next;
	  
	  curr->filetcn = strdup( filetcn );
	  curr->isbn = strdup( isbn );
	  curr->author = strdup( author );
	  curr->title = strdup( title );
	  curr->pub = strdup( pub );
	  curr->record = record;
	  curr->tcntext = tcntext;
	  
	  curr->next = 0;
	  curr->dbtcn = (char*)malloc( sizeof( char ) );
	  *(curr->dbtcn) = '\0';
	}
      curr = curr->next;
    }
  
  return;
}


void process_xml( xmlNode *collection, struct title *titlelist )
{
  // Current Record in XML File
  xmlNode *currRec = NULL;
  
  for( currRec = collection->children; currRec; currRec=currRec->next )
    {
      if( currRec->type == XML_ELEMENT_NODE )
	{
	  xmlNode *field001;          // Pointer to Field 001 element
	  xmlNode *field020;          // Pointer to Field 020 element
	  xmlNode *field100;          // Pointer to Field 100 element
	  xmlNode *field245;          // Pointer to Field 245 element
	  xmlNode *field260;          // Pointer to Field 260 element
	  char *filetcn;              // TCN from File
	  char *isbn;                 // ISBN from File
	  char *author;               // Author
	  char *title;                // Title from File
	  char *pub;                  // Publisher Info from file
	  xmlNode *tcnText;           // TCN Text Node
	  
	  
	  field001 = get_child_node( currRec, "tag", "001" );
	  field020 = get_child_node( currRec, "tag", "020" );
	  field100 = get_child_node( currRec, "tag", "100" );
	  field245 = get_child_node( currRec, "tag", "245" );
	  field260 = get_child_node( currRec, "tag", "260" );

	  // Great, let's get the text (if any) associated with each field,
	  // and load up an entry in the title list.
	  tcnText = field001->children;
	  filetcn = get_subfield_contents( currRec, "tag", "001" );
	  isbn = get_subfield_contents( field020, "code", "a" );
	  author = get_subfield_contents( field100, "code", "a" );
	  title = get_subfield_contents( field245, "code", "a" );
	  pub = get_subfield_contents( field260, "code", "a" );
	  	  
	  title_insert( titlelist, filetcn, isbn, author, title, pub,
			currRec, tcnText );
	  free( filetcn );
	  free( isbn );
	  free( author );
	  free( title );
	  free( pub );
	  
	}
    }
  return;
}

void process_db( PGconn *conn, struct title *titlelist )
{
  // Current record in Title list
  struct title *curr;

  // SQL statement buffer
  char stmt[MAXSTMT+1];

  // Counter for status updates
  unsigned statCount;

  // Number of matched records, for status updates
  unsigned matchCount;

  // Current match percentage
  double matchPercent;
  
  // Result of most recently submitted statement
  PGresult *sres;

  // Error for most recent SQL statement
  char *err;
  
  statCount = 0;
  matchCount = 0;
  
  for( curr = titlelist->next; curr; curr=curr->next )
    {
      // We really only need to search for an ISBN if there is one
      if( strlen( curr->isbn ) == ISBNLEN )
	{
	  
	  // ISBN, formatted for search
	  char *searchIsbn;
	  
	  searchIsbn = strdup( curr->isbn );
	  clean_isbn( searchIsbn );
	  
	  sprintf( stmt, "SELECT tcn_value, "
		   "CASE tcn_source "
		   "WHEN 'OCLC' THEN 1 "
		   "WHEN 'System' THEN 2 "
		   "WHEN 'System Local' THEN 3 "
		   "WHEN 'LCCN' THEN 4 "
		   "WHEN 'ISxN' THEN 5 "
		   "ELSE 9 END "
		   "AS sort "
		   "FROM "
		   "biblio.record_entry, metabib.full_rec "
		   "WHERE "
		   "record_entry.id = full_rec.record AND "
		   "tag = '020' AND "
		   "subfield = 'a' AND "
		   "tcn_source <> 'ISxN' AND "
		   "value ILIKE '%s%%' ORDER BY sort;",
		   searchIsbn );
	  sres = PQexec( conn, stmt );
	  free( searchIsbn );

	  err = PQresultErrorMessage( sres );
	  if( strlen( err ) )
	    {
	      fprintf( stderr, "\n\n\n***BEGIN SQL ERROR***\n\n" );
	      fprintf( stderr, "Statment: %s", stmt );
	      fprintf( stderr, "%s\n", err );
	      fprintf( stderr, "\n\nPROGRAM HALTED\n" );
	      exit( 1 );
	    }
	  
	  
	  if( PQntuples( sres ) )
	    {
	      // Valid record...
	      free( curr->dbtcn );
	      curr->dbtcn = strdup( PQgetvalue( sres, 0, 0 ) );
	      matchCount++;
	    }
	 
	}
      statCount++;

      if( !(statCount % STATUSRECS ) )
	{
	  matchPercent = ((double)matchCount/(double)statCount)*100;
	  
	  fprintf( stderr, "Records Processed: %d\tMatches: %d\tPercent: %f\n",
		   statCount, matchCount, matchPercent );
	}

      //PQclear( sres );
      
    }
  
  
  return;
}

void finalize_xml( FILE *efd, struct title *titlelist )
{
  // Current entry in Title List
  struct title *curr;

  for( curr = titlelist->next; curr; curr=curr->next )
    {
      // There are really two ways to procede here
      if( strlen( curr->dbtcn ) )
	{
	  xmlNodeSetContent( curr->tcntext, (xmlChar*)curr->dbtcn );
	}
      else
	{
	  fprintf( efd, "*** RECORD NOT FOUND IN EVERGREEN DB ***\n" );
	  fprintf( efd, "No TCNs found for ISBN\n" );
	  fprintf( efd, "File TCN: %s\n", curr->filetcn );
	  fprintf( efd, "ISBN: %s\n", curr->isbn );
	  fprintf( efd, "Title: %s\n", curr->title );
	  fprintf( efd, "Author: %s\n", curr->author );
	  fprintf( efd, "Publisher: %s\n\n\n", curr->pub );
	  xmlUnlinkNode( curr->record );
	}
    }
  
	  
  return;
}


xmlNode *get_child_node( xmlNode *parent, char *property, char *value )
{
  // Current Child Node
  xmlNode *currChild = NULL;

  // Pointer to Return
  xmlNode *retNode;

  // Current Property
  xmlChar *currProp;
  
  // Have we found the child?
  unsigned found = FALSE;

  for( currChild = parent->children; currChild && !found;
       currChild=currChild->next )
    {
      currProp = xmlGetProp( currChild, (xmlChar*)property );
      if( !xmlStrcmp( currProp, (xmlChar*)value ) )
	{
	  found = TRUE;
	  retNode = currChild;
	}
      xmlFree( currProp );
    }

  if( !found )
    {
      retNode = 0;
    }
  
  return retNode;
}

char *get_subfield_contents( xmlNode *field, char *property, char *value )
{
  // Current Subfield
  xmlNode *currSub = NULL;

  // Current Tag
  xmlChar *currTag;

  // String to return
  char *retStr;
  
  // Did we find the subfield?
  unsigned found = FALSE;
  if ( field )
    {
      for( currSub = field->children; currSub && !found; currSub = currSub->next )
	{
	  currTag = xmlGetProp( currSub, (xmlChar*)property );
	  if( !xmlStrcmp( currTag, (xmlChar*)value ) )
	    {
	      found = TRUE;
	      retStr = (char*)xmlNodeListGetString( currSub->doc,
						    currSub->children,
						    1 );
	    }
	  
	  xmlFree( currTag );
	}
      
    }
  

  if( !found )
    {
      retStr = (char*)malloc( sizeof( char ) );
      *retStr = '\0';
    }
  
  return retStr;
}

void clean_isbn( char *isbn )
{
  // Work buffer
  char *buff;

  // Current Position in "cleaned" string
  char *curr;

  // Generic Counter
  unsigned i;

  buff = strdup( isbn );
  curr = isbn;

  for( i = 0; i < strlen( buff ); i++ )
    {
      if( isdigit( buff[i] ) || isalpha( buff[i] ) )
	{
	  *curr = buff[i];
	  curr += sizeof( char );
	}
    }

  if( strlen( isbn ) < ISBNLEN )
    {
      *curr = '\0';
    }
  else
    {
      isbn[ISBNLEN] = '\0';
    }
  
  return;
}
-------------- next part --------------
//
// create_holdings.c
//
// Carson City - Crystal Area Schools
// Technology Department
// 115 E Main
// Carson City, MI 48811
// (989) 584-3138
//
// Given an XML MARC file, this program creates holdings information
// for records whose bibliographic data is already present in the Evergreen
// database (based on TCN).
//
// This program DOES NOT pull the "holding libary" from the the MARC XML
// file. Instead, this option is set on the command line by specifying the
// actor.org_unit.id value for the owning and circulating libraries. This
// is because the program was originally developed for K-12 migrations from
// systems that were "islands," and in many cases didn't have a holdings
// tag that indicated the owning or circulating library. Serveral other
// items of information that would be pulled from MARC during conversion
// from a "sane" system are simalarly defined as command line options. Again,
// the incumbant K-12 systems aren't sane.
//
// However, it would be trivial to modify this program to extract additional
// information from a holdings tag. Note also that several functions are
// declared and defined, but not used... they are present for debugging.
//
// This program is poorly written. It probably leaks memory... it actually causes
// permanent damage to RAM. It's been known to cause server farms to burst
// into flames. It stole money from my sock drawer. You've been warned...
//
// There are two tables involved in this enterprise. The relavent fields
// (the ones we will be filling out) are as follows (FK indicates a foreign
// key):
//
// asset.call_number
// creator       : FK - User who created this entry (actor.usr.id)
// editor        : FK - User who last edited this record (actor.usr.id)
// record        : FK - Biblio Data for copy (biblio.record_entry.id)
// owning_lib    : FK - Owning library (actor.org_unit.id)
// label         : The call number!
// 
//
// asset.copy
// circ_lib      :  FK - Circulating Library (actor.org_unit.id)
// creator       :  FK - User who created this entry (actor.usr.id)
// call_number   :  FK - Item Call Number (asset.call_number.id)
// editor        :  FK - User who last edited this record (actor.usr.id)
// status        :  FK - Item Status (config.copy_status.id)
// location      :  FK - Location (ie, Stacks) of copy (asset.copy_location.id)
// loan_duration :  Required, but not an FK... '2' is popular
// fine_level    :  Required, but not an FK... '2' is popular
// price         :  Item Price
// barcode       :  Not suprisingly, the item barcode
//
// So, basically, we extract the Barcode, Price, Call Number, and TCN of
// each record in the MARC File. Then, we use the TCN to  find the value for
// asset.call_number.record, create a call number record, then create a copy
// record.
//
// We get editor, owning_lib, circ_lib, creator, status, location, etc from
// the command line...
//
// Or at least that's the plan
//
//
// Copyright 2007 Travis Schafer
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 3
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
//
// Initial Program:
// 2007-11-07 T.Schafer
//
// Changes:
//
// 2007-11-13 T.Schafer - 1) Initialized variable in wrong spot. This meant
//                           that if any copy was found to have an existing
//                           barcode, no subsequent entires would be inserted
//                           to the database (basically, we didn't reset and
//                           "exists" flag at the top of a loop)
//
// 2007-11-14 T.Schafer   2) Inserted GPL/Copyright Notice


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include <libpq-fe.h>
#include <libxml/parser.h>
#include <libxml/tree.h>

#define _GNU_SOURCE
#include <getopt.h>

#define TRUE 1
#define FALSE 0

#define MAXFIELD 512     // Maximum field length in copy struct (used to
                         // construct a buffer for escaping of strings).
#define MAXSTMT 4096     // Maximum length of an SQL statement
#define STATUSINTVL 250  // On database update, give status every STATUSINTVL
                         // records.

// Structure for copy records
struct copy
{
  char *tcn;             // TCN of biblio to which copy is associated
  char *call;            // Copy Call Number
  char *bcode;           // Copy Barcode
  char *price;           // Copy Price
  struct copy *next;     // Next copy in a linked list
};


// Initialize a linked list of copies (ie, create front guard)
//
// Arguments: (void)
//
// Returns: Pointer to new list (ie, pointer to front guard)
struct copy *copy_initlist( void );

// Free a list of copies
//
// Arguments: struct copy *copylist  - Copy list to free
// Returns: (void)
void copy_freelist( struct copy *copylist );

// Insert a copy at the end of a list
//
// Arguments: struct copy *copylist  - List into which new record is inserted
//            char *tcn              - TCN of new record
//            char *call             - Call Number of new record
//            char *bcode            - Barcode of new record
//            char *price            - Price of new record
//
// Returns: (void)
void copy_insert( struct copy *copylist, char *tcn, char *call, char *bcode,
		  char *price );


// Link copy lists lista and listb
//
// Note: This function literally links the linked lists... so neither list
// may be freed after a call to this function. However, the list B start guard
// is freed, so all references to listb become invalid.
//
// Arguments:      struct copy* lista  - First List
//                 struct copy* listb  - Second List
//
// Returns:        (void)
void copy_linklists( struct copy *lista, struct copy *listb );

// Print the contents of a list of copies
//
// Arguments:     FILE* ofd              - File Descriptor to which list
//                                         is printed
//                struct copy *copylist  - Copy list to print
// Returns:       (void)
void copy_printlist( FILE* ofd, struct copy *copylist );

// Count the number of elements in a linked list of copies
//
// Arguments:     struct copy *copylist  - List to count
// Returns:       unsigned               - Number of copies in list
unsigned copy_countlist( struct copy *copylist );

// Return a linked list of all holdings found in a MARC XML document
//
// Arguments:     PGconn  *conn          - Needed for escaping string for SQL
//                                         (ie, get DB localization info)
//                xmlNode *collection    - Collection node from XML Marc doc
//                char    *tcnfield      - Field that holds item TCN
//                char    *holdfield     - (sub)Field that has holdings data
//                char    *bcodefield    - (sub)Field that has barcode
//                char    *calltag       - (sub)Field that price
//
// Returns: Linked list of all holdings found in document
struct copy *process_doc( PGconn *conn, xmlNode *collection, char *tcnfield,
			  char *holdfield, char *bcodetag, char *calltag,
			  char *pricetag );

// Given a record, return a linked list of all holdings found within that
// record
//
// Arguments:    PGconn *conn      - DB Connection for escaping strings
//               xmlNode *record   - Record to process
//               char *tcnfield    - Tag of TCN Field
//               char *holdfield   - Tag of holdings field
//               char *bcodetag    - Tag of barcode subfield
//               char *calltag     - Tag of call number subfield
//               char *pricetag    - Tag of price subfield
//
// Returns: struct copy*           - Linked list of copies found in record,
//                                   or NULL if no copies are found
struct copy *process_record( PGconn *conn, xmlNode *record, char *tcnfield,
			     char *holdfield, char *bcodetag, char *calltag,
			     char *pricetag );

// Return the content of a given subfield (ie, child of xmlNode *field)
// where the subfield has "property" set to "value"
//
// Note: Caller must free returned string, as appropriate
//
// Arguments:    xmlNode *field      - Field whose subfields are searched
//               char    *property   - Propert to search
//               char    *value      - Property is set to this value for match
//
// Returns: Text of subfield, or a zero length string if nothing is found
char *get_subfield_contents( xmlNode *field, char *property, char *value );

// Remove any character that is not a digit or a '.'
//
// Arguments:  char *price   - Price string to be cleaned
//
// Outputs:    char *price   - Cleaned price string
// Returns: (void)
void clean_price( char *price );

// Given a linked list of copies, update the Evergreen database...
//
// Arguments:  PGconn *conn           - Connection to Evergreen DB
//             struct copy *copylist  - List of copies for which records must
//                                      be created
//             unsigned egcreator     - Database ID of Creating User
//
// Returns: (void)
void update_database( PGconn *conn, struct copy *copylist, unsigned egcreator,
		      unsigned egeditor, unsigned egstatus,
		      unsigned egownlib, unsigned egcirclib,
		      unsigned eglocation );

// Print the statement, error message contained in a statement result (if any),
// and halt execution immediately.
//
// Arguments: FILE *ofd         - Descriptor to which error is printed
//            char *stmt        - Statement associated with result
//            PGresult *sres    - Statement Result
//
// Returns: (void)
void database_printerr( FILE *ofd, char *stmt, PGresult *sres );

int main( int argc, char **argv )
{
  // Current Command Line Option
  int currOpt;
  // Count of Command Line Options Process
  unsigned optCount;

  // Name of XML MARC file
  char *xfname;

  // Database Host
  char *dbhost;

  // Database Name
  char *dbname;

  // Database User
  char *dbuser;
  
  // Database Password
  char *dbpass;

  // Evergreen Creator ID
  unsigned egcreator;

  // Evergreen Editor ID
  unsigned egeditor;

  // Evergreen Status ID
  unsigned egstatus;

  // Evergreen Owning Lib ID
  unsigned egownlib;

  // Evergreen Cirgulating Lib ID
  unsigned egcirclib;

  // Evergreen Location ID
  unsigned eglocation;
  
  // Tag of TCN Field
  char *tcnfield;
  
  // Tag of Holdings Field
  char *holdfield;

  // Tag of Barcode Field
  char *bcodetag;

  // Tag of Call Number Subfield
  char *calltag;

  // Tag of Price Subfield
  char *pricetag;

  // Linked list of copies
  struct copy *copylist;
 
  // XML Document
  xmlDocPtr doc;

  // Connection to database
  PGconn *conn;
  
  optCount=0;
  while( TRUE )
    {
      static struct option options [] =
	{	  
	  { "xmlfile",     required_argument, 0, 'x' },
	  { "dbhost",      required_argument, 0, 's' }, 
	  { "dbname",      required_argument, 0, 'd' },
	  { "dbuser",      required_argument, 0, 'u' },
	  { "dbpass",      required_argument, 0, 'w' },
	  { "egcreator",   required_argument, 0, 'r' },
	  { "egeditor",    required_argument, 0, 'e' },
	  { "egownlib",    required_argument, 0, 'o' },
	  { "egcirclib",   required_argument, 0, 'i' },
	  { "egstatus",    required_argument, 0, 'a' },
	  { "eglocation",  required_argument, 0, 'l' },
  	  { "tcnfield",    required_argument, 0, 't' },
	  { "holdfield",   required_argument, 0, 'h' },
	  { "bcodetag",    required_argument, 0, 'b' },
	  { "calltag",     required_argument, 0, 'c' },
	  { "pricetag",    required_argument, 0, 'p' },
	  { 0, 0, 0, 0 }
	};

      int option_index = 0;
      
      currOpt = getopt_long_only( argc, argv, "", options, &option_index );
      
      if( currOpt == -1 )
	{
	  break;
	}

      switch( currOpt )
	{
	case 'x' :
	  {
	    xfname = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 's' :
	  {
	    dbhost = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'd' :
	  {
	    dbname = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'u' :
	  {
	    dbuser = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'w' :
	  {
	    dbpass = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'r' :
	  {
	    sscanf( optarg, "%d", &egcreator );
	    optCount++;
	    break;
	  }
	case 'e' :
	  {
	    sscanf( optarg, "%d", &egeditor );
	    optCount++;
	    break;
	  }
	case 'a' :
	  {
	    sscanf( optarg, "%d", &egstatus );
	    optCount++;
	    break;
	  }
	case 'o' :
	  {
	    sscanf( optarg, "%d", &egownlib );
	    optCount++;
	    break;
	  }
	case 'i' :
	  {
	    sscanf( optarg, "%d", &egcirclib );
	    optCount++;
	    break;
	  }
	case 'l' :
	  {
	    sscanf( optarg, "%d", &eglocation );
	    optCount++;
	    break;
	  }
	case 't' :
	  {
	    tcnfield = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'h' :
	  {
	    holdfield = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'b' :
	  {
	    bcodetag = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'c' :
	  {
	    calltag = strdup( optarg );
	    optCount++;
	    break;
	  }
	case 'p' :
	  {
	    pricetag = strdup( optarg );
	    optCount++;
	    break;
	  }
	case '?':
	  {
	    // Error printed by getopt()
	    break;
	  }
	}
    }

  if( optCount != 16 )
    {
      fprintf( stderr, "%s: Given an XML MARC file, create holding\n"
	       "records in Evergreen.\n\n"
	       "Arguments:\n"
	       "--xmlfile    : XML MARC File\n"
	       "--dbhost     : Evergreen Database Host\n"
	       "--dbname     : Evergreen Database Name\n"
	       "--dbuser     : Evergreen Database User\n"
	       "--dbpass     : Evergreen Database Password\n"
	       "--egcreator  : Primary Key of Evergreen Record Creator\n"
	       "               (ie, record is marked as created by this user\n"
	       "--egeditor   : Primary Key of Evergreen Editing User\n"
	       "--egstatus   : Evergreen Status to set on creation\n"
	       "--egownlib   : Primary Key of Owning Library\n"
	       "--egcirclib  : Primary Key of Circulating Library\n"
	       "--eglocation : Primary Key of Copy Location (ie, stacks)\n"
	       "--tcnfield   : Tag of TCN Field (001 is common)\n"
	       "--holdfield  : Tag of holdings field (852 is common)\n"
	       "--bcodetag   : Tag of barcode subfield\n"
	       "--calltag    : Tag of callnumber subfield\n"
	       "--pricetag   : Tag of item price subfield\n",
	       argv[ 0 ] );
      exit( 1 );
    }

  // Attempt to parse XML File
  if( !(doc = xmlReadFile( xfname, NULL, 0 ) ) )
    {
      // Error printed by xmlReadFile
      exit( 1 );
    }

  // Attempt DB Connection
  conn = PQsetdbLogin( dbhost, NULL, NULL, NULL, dbname, dbuser, dbpass );
  if( PQstatus( conn ) == CONNECTION_BAD )
    {
      fprintf( stderr, "%s: Can not connet to database!\n"
	       "Error is :%s\n\n"
	       "***Exiting***\n",
	       argv[0], PQerrorMessage( conn ) );
      PQfinish( conn );
      exit( 1 );
    }
  
  // Extract linked list of copy structures from XML doc tree
  copylist = process_doc( conn, xmlDocGetRootElement( doc ), tcnfield,
			  holdfield, bcodetag, calltag, pricetag );
  xmlFreeDoc( doc );
  xmlCleanupParser();

  // Update database to reflect linked list of copies
  update_database( conn, copylist, egcreator, egeditor, egstatus, egownlib,
		   egcirclib, eglocation );
  copy_freelist( copylist );

  PQfinish( conn );
  
  free( xfname );
  free( dbhost );
  free( dbname );
  free( dbuser );
  free( dbpass );
  free( tcnfield );
  free( holdfield );
  free( bcodetag );
  free( calltag );
  free( pricetag );
    
  return 0;
}

struct copy *copy_initlist( void )
{
  // List to return
  struct copy *retList;

  retList = (struct copy*)malloc( sizeof( struct copy ) );
  retList->next = 0;

  return retList;
}

void copy_freelist( struct copy *copylist )
{
  // Pointer to current copy in list
  struct copy *curr;

  // Pointer to next copy in list
  struct copy *next;

  // Advance past guard
  curr = copylist->next;

  while( curr )
    {
      next = curr->next;
      free( curr->tcn );
      free( curr->call );
      free( curr->bcode );
      free( curr->price );
      free( curr );
      curr = next;
    }

  // Free the guard
  free( copylist );
  
  return;
}

void copy_insert( struct copy *copylist, char *tcn, char *call, char *bcode,
		  char *price )
{
  // Current Copy in List
  struct copy* curr;

  curr = copylist;

  // Find the end of the list
  while( curr )
    {
      if( !curr->next )
	{
	  curr->next = (struct copy*)malloc( sizeof( struct copy ) );
	  curr = curr->next;

	  curr->tcn = strdup( tcn );
	  curr->call = strdup( call );
	  curr->bcode = strdup( bcode );
	  curr->price = strdup( price );
	  curr->next = 0;
	}
      curr = curr->next;
    }
  return;
}

void copy_linklists( struct copy *lista, struct copy *listb )
{
  // Current position in list a
  struct copy *curr;

  // Advance curr to the last record
  for( curr = lista; curr->next; curr=curr->next );

  // Okay, curr is pointing to the end record in list A. Link the lists.
  curr->next = listb->next;

  // Dont forget to free the guard.
  free( listb );
  
  return;
}

void copy_printlist( FILE* ofd, struct copy *copylist )
{
  // Current item in copylist
  struct copy* curr;

  for( curr = copylist->next; curr; curr=curr->next )
    {
      fprintf( ofd, "\n*** Copy Record ***\n" );
      fprintf( ofd, "TCN: %s\n", curr->tcn );
      fprintf( ofd, "Call: %s\n", curr->call );
      fprintf( ofd, "Barcode: %s\n", curr->bcode );
      fprintf( ofd, "Price: %s\n", curr->price );
    }

  return;
}

unsigned copy_countlist( struct copy *copylist )
{
  // Current copy in list
  struct copy *curr;

  // Count of copies
  unsigned count=0;

  for( curr = copylist->next; curr; curr=curr->next )
    {
      count++;
    }

  return count;
}


struct copy *process_doc( PGconn *conn, xmlNode *collection, char *tcnfield,
			  char *holdfield, char *bcodetag, char *calltag,
			  char *pricetag )
{
  // Current Record in XML Tree
  xmlNode *currRec = NULL;

  // Linked list of holdings found...
  struct copy *retList;

  retList = copy_initlist();
   
  for( currRec = collection->children; currRec; currRec=currRec->next )
    {
      copy_linklists( retList,
		      process_record( conn, currRec, tcnfield, holdfield,
				      bcodetag, calltag, pricetag ) );
    }
  return retList;
}

struct copy *process_record( PGconn *conn, xmlNode *record, char *tcnfield,
			     char *holdfield, char *bcodetag, char *calltag,
			     char *pricetag )
{
  // Pointer to current field
  xmlNode *currField = NULL;

  // Tag of current Field
  xmlChar *currTag;

  // Current TCN
  char *currTcn;

  // Current Call Number
  char *currCall;

  // Current Barcode
  char *currBcode;

  // Current Price
  char *currPrice;
  
  // Copy list to return
  struct copy *retList;

  // Did we find good copy records?
  unsigned goodRecs = TRUE;

  // Initialize return list
  retList = copy_initlist();
  
  // Get the TCN
  currTcn = get_subfield_contents( record, "tag", tcnfield );

  if( !strlen( currTcn ) )
    {
      goodRecs = FALSE;
    }

  // Now we need to find any holdings tags and get the necessary subfields.
  // There is really no reason to do this if we didn't get a TCN.
  if( goodRecs )
    {
      for( currField=record->children; currField; currField=currField->next )
	{
	  currTag = xmlGetProp( currField, (xmlChar*)"tag" );
	  if( !xmlStrcmp( currTag, (xmlChar*)holdfield ) )
	    {
	      currCall = get_subfield_contents( currField, "code", calltag );
	      currBcode = get_subfield_contents( currField, "code", bcodetag );
	      currPrice = get_subfield_contents( currField, "code", pricetag );

	      // Okay, call number and barcode are required... we can live
	      // without price. Therefore, if we have both currCall and
	      // currBcode, we should enter a new copy in our copylist to
	      // return
	      if( strlen( currCall ) && strlen( currBcode ) )
		{
		  // Buffer for escaping of strings
		  char buff[MAXFIELD+1];
		  
		  // If we didn't get a price, default to '0.00'. Otherwise,
		  // clean up existing price
		  if( !strlen( currPrice ) )
		    {
		      free( currPrice );
		      currPrice = strdup( "0.00" );
		    }
		  else
		    {
		      clean_price( currPrice );
		    }
		  

		  // We really need to escape these, as well as the TCN,
		  // before we insert into the struct... since the struct
		  // is later used for SQL purposes.
		  PQescapeStringConn( conn, buff, currTcn, strlen(currTcn),
				      NULL );
		  free( currTcn );
		  currTcn = strdup( buff );

		  PQescapeStringConn( conn, buff, currCall, strlen(currCall),
				      NULL );
		  free( currCall );
		  currCall = strdup( buff );

		  PQescapeStringConn( conn, buff, currBcode, strlen(currBcode),
				      NULL );
		  free( currBcode );
		  currBcode = strdup( buff );

		  PQescapeStringConn( conn, buff, currPrice, strlen(currPrice),
				      NULL );
		  free( currPrice );
		  currPrice = strdup( buff );
		  
		  copy_insert( retList, currTcn, currCall, currBcode,
			       currPrice );
		  free( currCall );
		  free( currBcode );
		  free( currPrice );
		}
	    }
	  xmlFree( currTag );
	}
    }
  
  free( currTcn );
    
  return retList;
}

char *get_subfield_contents( xmlNode *field, char *property, char *value )
{
  // Current Subfield
  xmlNode *currSub = NULL;

  // Current Tag
  xmlChar *currTag;

  // String to return
  char *retStr;
  
  // Did we find the subfield?
  unsigned found = FALSE;

  for( currSub = field->children; currSub && !found; currSub = currSub->next )
    {
      currTag = xmlGetProp( currSub, (xmlChar*)property );
      if( !xmlStrcmp( currTag, (xmlChar*)value ) )
	{
	  found = TRUE;
	  retStr = (char*)xmlNodeListGetString( currSub->doc,
						currSub->children,
						1 );
	}
      
      xmlFree( currTag );
    }

  if( !found )
    {
      retStr = (char*)malloc( sizeof( char ) );
      *retStr = '\0';
    }
  
  return retStr;
}

void clean_price( char *price )
{
  // Buffer
  char *buff;

  // Current position in input/output string (we're a side-effect)
  char *currPos;
  
  // Generic counter
  unsigned i;

  // Initialize buffer and set current position in output string
  buff = strdup( price );
  currPos = price;
  *price = '\0';
  
  for( i = 0; i < strlen( buff ); i++ )
    {
      if( isdigit( buff[i] ) || (buff[i] == '.' ) )
	{
	  *currPos = buff[i];
	  currPos += sizeof( char );
	}
    }
  *currPos = '\0';

  free( buff );  
}

void update_database( PGconn *conn, struct copy *copylist, unsigned egcreator,
		      unsigned egeditor, unsigned egstatus,
		      unsigned egownlib, unsigned egcirclib,
		      unsigned eglocation )
{
  // SQL statement Buffer
  char stmt[MAXSTMT+1];

  // Result of most recently submitted statement
  PGresult *sres;

  // Current Copy
  struct copy *curr;
  
  // This could take a while. Keep track of the number of records that
  // we've processed. Give a status update every once in a while.
  unsigned processed = 0;
  
  // Advance past guard
  curr = copylist->next;

  // Start Transaction
  sres = PQexec( conn, "BEGIN;" );
  database_printerr( stderr, stmt, sres );
  PQclear( sres );
  
  while( curr )
    {
      // Does copy already exist, based on barcode?
      unsigned exists = FALSE;
      
      // See if a copy record already exists in the database, based on barcode
      sprintf( stmt, "SELECT id FROM asset.copy WHERE barcode = '%s';",
	       curr->bcode );
      sres = PQexec( conn, stmt );
      database_printerr( stderr, stmt, sres );
      
      
      if( PQntuples( sres ) )
	{
	  fprintf( stdout, "Barcode already exists: Bcode %s\tTCN %s\n",
		   curr->bcode, curr->tcn );
	  exists = TRUE;
	}
      PQclear( sres );
      
      
      // Get biblio.record_entry key for this record

      sprintf( stmt, "SELECT id FROM biblio.record_entry WHERE "
	       "tcn_value = '%s';", curr->tcn );
      sres = PQexec( conn, stmt );
      database_printerr( stderr, stmt, sres );

      if( PQntuples( sres ) && !exists )
	{
	  // Primary key of existing bibliographic record entry
	  unsigned biblioKey;

	  // Primary key of new/existing call number record
	  unsigned callKey;
	  
	  // We have a bibliographic record entry. Get the key
	  sscanf( PQgetvalue( sres, 0, 0 ), "%d", &biblioKey );
	  PQclear( sres );
	 
	  // This is worth a short discussion. The "right" way to do this
	  // is to figure out how many records we are going to insert
	  // ahead of time, and reserve space in the primary key sequence.
	  // However, there is a damned fine chance that we'll end up
	  // inserting far fewer records than are present in our list.
	  // To compensate for that, we could check all of the records
	  // ahead of time, to make sure that there is a matching bibliographic
	  // entry in the database... but then we aren't saving any queries..

	  // Okay, the record, owning_lib, label tuple must be unique in
	  // asset.call_number. This implies that asset.call_number is the
	  // "volume" that appears in the user interface. SO, we need to see
	  // if an appropriate asset.call_number record exists. If it does,
	  // we use that... if it doesn't, we create one

	  sprintf( stmt, "SELECT id FROM asset.call_number WHERE "
		   "record = %d AND owning_lib = %d AND label = '%s';",
		   biblioKey, egownlib, curr->call );
	  sres = PQexec( conn, stmt );
	  database_printerr( stderr, stmt, sres );

	  if( PQntuples( sres ) )
	    {
	      // Existing asset.call_number
	      sscanf( PQgetvalue( sres, 0, 0 ), "%d", &callKey );
	      PQclear( sres );
	    }
	  else
	    {
	      // New asset.call_number
	      sprintf( stmt, "SELECT nextval( 'asset.call_number_id_seq' );" );
	      sres = PQexec( conn, stmt );
	      database_printerr( stderr, stmt, sres );
	      sscanf( PQgetvalue( sres, 0, 0 ), "%d", &callKey );
	      PQclear( sres );

	      sprintf( stmt, "INSERT INTO asset.call_number "
		       "( id, creator, editor, record, "
		       "  owning_lib, label ) "
		       "VALUES "
		       "( %d, %d, %d, %d, "
		       "  %d, '%s' );",
		       callKey, egcreator, egeditor, biblioKey,
		       egownlib, curr->call );
	      sres = PQexec( conn, stmt );
	      database_printerr( stderr, stmt, sres );
	      PQclear( sres );
	    }
	  
	  
	  // Create the asset.copy entry
	  sprintf( stmt, "INSERT INTO asset.copy "
		   "( circ_lib, creator, call_number, "
		   "  editor, status, location, "
		   "  loan_duration, fine_level, price, "
		   "  barcode ) "
		   "VALUES "
		   "( %d, %d, %d, "
		   "  %d, %d, %d, "
		   "  %d, %d, '%s', "
		   "  %s );",
		   egcirclib, egcreator, callKey,
		   egeditor, egstatus, eglocation,
		   2, 2, curr->price,
		   curr->bcode );
	  sres = PQexec( conn, stmt );
	  database_printerr( stderr, stmt, sres );
	  PQclear( sres );
	}
      else if( !exists )
	{
	  fprintf( stdout, "TCN Not Found: %s\n", curr->tcn );
	  PQclear( sres );  
	}
      curr = curr->next;
      processed++;

      if( !(processed % STATUSINTVL ) )
	{
	  fprintf( stderr, "Processed: %d records\n", processed );
	}
      
    }
  
  
  // End Transaction
  sres = PQexec( conn, "COMMIT;" );
  database_printerr( stderr, stmt, sres );
  PQclear( sres );
  
  return;
}

void database_printerr( FILE *ofd, char *stmt, PGresult *sres )
{
  // Pointer to error message within sres
  char *err;

  err = PQresultErrorMessage( sres );

  if( strlen( err ) )
    {
      fprintf( stderr, "\n\n\n***Begin SQL Error**\n\n" );
      fprintf( stderr, "Statement: %s\n", stmt );
      fprintf( stderr, "%s\n", err );
      fprintf( stderr, "IMPORT ABORTED. TRANSACTION ROLLED BACK.\n" );
      fprintf( stderr, "DATABASE REMAINS UNCHANGED.\n" );
      exit( 1 );
    }
  return;
}


More information about the Open-ils-dev mailing list