[open-ils-commits] r1076 - in conifer/branches/rel_1_6_1/tools: . ebooks (dbs)
svn at svn.open-ils.org
svn at svn.open-ils.org
Fri Nov 19 17:33:09 EST 2010
Author: dbs
Date: 2010-11-19 17:33:03 -0500 (Fri, 19 Nov 2010)
New Revision: 1076
Added:
conifer/branches/rel_1_6_1/tools/ebooks/
conifer/branches/rel_1_6_1/tools/ebooks/prep_ebook_records.py
Log:
Close to working ebook MARC record processing script
Needs some massive refactoring for a smarter, more object-oriented approach.
But some days you just need to bash out something that mostly works.
Also needs the ebrary URL distinction and Algoma's settings.
Added: conifer/branches/rel_1_6_1/tools/ebooks/prep_ebook_records.py
===================================================================
--- conifer/branches/rel_1_6_1/tools/ebooks/prep_ebook_records.py (rev 0)
+++ conifer/branches/rel_1_6_1/tools/ebooks/prep_ebook_records.py 2010-11-19 22:33:03 UTC (rev 1076)
@@ -0,0 +1,286 @@
+#!/usr/bin/env python
+
+import codecs, os, os.path, sys, getopt, pymarc, pymarc.marc8
+
+class Institution():
+ """Defines standard settings for each Conifer institution"""
+
+ def __init__(self):
+ """Initialize the Institution object"""
+ self.algoma = { \
+ "code": "OSTMA", \
+ "ebrary_code": "XXX", \
+ "proxy": "XXX", \
+ "public_note": "XXX", \
+ "link_text": "XXX" \
+ }
+
+ self.laurentian = { \
+ "code": "OSUL", \
+ "ebrary_code": "jndlu", \
+ "proxy": "https://librweb.laurentian.ca/login?url=", \
+ "public_note": "Available online / disponible en ligne", \
+ "link_text": "Available online / disponible en ligne" \
+ }
+
+ self.windsor = { \
+ "code": "OWA", \
+ "ebrary_code": "oculwindsor", \
+ "proxy": "http://ezproxy.uwindsor.ca/login?url=", \
+ "public_note": "To view Windsor's electronic resource click here.", \
+ "link_text": "To view Windsor's electronic resource click here." \
+ }
+
+ def get_settings(self, lib):
+ """Return the settings for a library by name"""
+ return getattr(self, lib)
+
+
+def do_help():
+ '''
+ Print help for the Conifer ebook MARCXML processor
+ '''
+
+ print '''
+Conifer ebook MARCXML processor
+
+This script takes a set of MARCXML records and processes them to generate a set
+of MARCXML records ready for loading into the Conifer consortial library
+system. The processing consists of taking the existing 856 field and creating
+one or more new 856 fields for each Conifer institution that should have access
+to these resources.
+
+The script customizes the following aspects of each record:
+
+ * Adds one 856 per institution specified at the command line:
+ * $u (URL) - prepends the institutional proxy and, for eBrary records,
+ changes the insitutional code
+ * $y (link text) - sets preferred text of the link to the resource
+ * $z (public note) - sets public note for the resource
+
+ * Adds a 710 field to identify the publisher using the value specified
+ at the command line
+ * Adds a 590 internal note field using the value specified at the command
+ line.
+
+Required arguments:
+ -i / --input : The name of the input MARCXML file.
+
+ -o / --output : The name of the output MARCXML file.
+
+ -p / --publisher : The name of the publisher to be inserted in a 710 field.
+
+ -A / --algoma: Add an 856 for Algoma University
+
+ -L / --laurentian: Add an 856 for Laurentian University
+
+ -W / --windsor : Add an 856 for University of Windsor
+
+Optional arguments:
+ -n / --note : The text of the internal note to be inserted into a 590 field.
+
+ -h / --help : Prints help message
+
+Examples:
+ %s --algoma --windsor -i crkn.xml -o /tmp/crkn_out.xml -p "eBrary Inc."
+ ''' % sys.argv[0]
+ sys.exit(0)
+
+def consolidate_options(opts):
+ """Make long arguments the standard form in command line options"""
+
+ _options = dict(opts)
+
+ for key, val in opts:
+ if key == '-i':
+ _options['--input'] = val
+ elif key == '-o':
+ _options['--output'] = val
+ elif key == '-p':
+ _options['--publisher'] = val
+ elif key == '-n':
+ _options['--note'] = val
+ elif key == '-A':
+ _options['--algoma'] = val
+ elif key == '-L':
+ _options['--laurentian'] = val
+ elif key == '-W':
+ _options['--windsor'] = val
+ elif key == '-h':
+ _options['--help'] = val
+
+ return _options
+
+def check_options(options):
+ """Check the validity of options that were passed in"""
+
+ _help = False
+
+ if '--help' in options:
+ do_help()
+
+ if '--input' not in options:
+ print "* Missing -i / --input argument!"
+ _help = True
+
+ if '--output' not in options:
+ print "* Missing -o / --output argument!"
+ _help = True
+
+ if '--publisher' not in options:
+ print "* Missing -p / --publisher argument!"
+ _help = True
+
+ _libraries = dict()
+ if '--algoma' in options:
+ _libraries['algoma'] = True
+
+ if '--laurentian' in options:
+ _libraries['laurentian'] = True
+
+ if '--windsor' in options:
+ _libraries['windsor'] = True
+
+ if len(_libraries.keys()) == 0:
+ _help = True
+
+ if _help == True:
+ do_help()
+
+ # Get the input and output files
+ _input = options['--input']
+ _output = options['--output']
+
+ try:
+ os.stat(_input)
+ except OSError:
+ print("* Cannot read input file %s" % (_input))
+ sys.exit(0)
+
+ try:
+ os.access(os.path.dirname(_output), os.W_OK)
+ except OSError:
+ print("* Cannot write to output path %s" % (os.path.dirname(_output)))
+ sys.exit(0)
+
+ clean_opts = dict()
+ clean_opts['publisher'] = options['--publisher']
+
+ if '--note' in options:
+ clean_opts['note'] = options['--note']
+
+ clean_opts['libraries'] = _libraries
+ clean_opts['input'] = _input
+ clean_opts['output'] = _output
+ clean_opts['settings'] = Institution()
+
+ return clean_opts
+
+def parse_opts():
+ """Get command-line arguments from the script"""
+ try:
+ _short_opts = 'i:o:p:ALWn:h'
+ _long_opts = ['input=', 'output=', 'publisher=', 'algoma', \
+ 'laurentian', 'windsor', 'note=', 'help']
+ opts, args = getopt.getopt(sys.argv[1:], _short_opts, _long_opts)
+ except getopt.GetoptError, ex:
+ print "* %s" % str(ex)
+ do_help()
+
+ _options = consolidate_options(opts)
+ return check_options(_options)
+
+def process_records(options):
+ """Converts raw ebook MARC records to Conifer-ready MARC records"""
+
+ reader = pymarc.MARCReader(open(options['input'], 'rb'))
+ writer = pymarc.MARCWriter(open(options['output'], 'wb'))
+
+ cnt = 0
+ for record in reader:
+ url = False
+ cnt = cnt + 1
+ if record['856'] and record['856']['u']:
+ url_tag = record['856']['u']
+ # print url
+ else:
+ print("* No 856 for record # %s" % (cnt))
+
+ new_record = pymarc.Record()
+ for field in record.get_fields():
+ # Only process the first 856 field, for better or worse
+ if field.tag == '856':
+ if url == False:
+ url = True
+ new_fields = process_urls(field, options)
+ for nf in new_fields:
+ new_record.add_field(nf)
+ else:
+ new_record.add_field(field)
+
+ seven_ten = pymarc.Field(tag = '710',
+ indicators = ['2', ' '],
+ subfields = [
+ 'a', options['publisher']
+ ]
+ )
+ new_record.add_field(seven_ten)
+
+ if 'note' in options:
+ note = pymarc.Field(tag = '590',
+ indicators = [' ', ' '],
+ subfields = [
+ 'a', options['note']
+ ]
+ )
+ new_record.add_field(note)
+
+ writer.write(new_record)
+
+def process_urls(field, options):
+ """Creates 856 fields required by Conifer"""
+
+ new_fields = []
+
+ try:
+ url = field['u']
+ except Error:
+ print "* No subfield 'u' found in this 856"
+ return None
+
+ for lib in options['libraries']:
+ data = options['settings'].get_settings(lib)
+ eight_five_six = pymarc.Field(tag = '856',
+ indicators = ['4', '0'],
+ subfields = [
+ 'u', data['proxy'] + url,
+ 'y', data['link_text'],
+ 'z', data['public_note'],
+ '9', data['code']
+ ]
+ )
+ new_fields.append(eight_five_six)
+
+ return new_fields
+
+if __name__ == '__main__':
+
+ options = parse_opts()
+ process_records(options);
+
+## Okay, made it through the basic invocation requirements; moving on
+#
+#For each MARC record:
+#
+#Find the 856 u ($url)
+#for each institution:
+#create a new 856 40
+#if $url =~ /\.ebrary\./, then:
+#$url =~ s/^.*?id=(\d+)\s*$/$1/
+#$url = http://site.ebrary.com/lib/ + institution.ebrary_code + "/Doc?id=" + $url
+#else:
+#$url = institution.proxy + $url
+#$u = $url
+#$y = institution.link_text
+#$z = institution.public_note
+#$9 = institution.code
More information about the open-ils-commits
mailing list