[open-ils-commits] r1106 - conifer/branches/rel_1_6_1/tools/ebooks (dbs)

svn at svn.open-ils.org svn at svn.open-ils.org
Thu Dec 16 11:42:27 EST 2010


Author: dbs
Date: 2010-12-16 11:42:24 -0500 (Thu, 16 Dec 2010)
New Revision: 1106

Modified:
   conifer/branches/rel_1_6_1/tools/ebooks/prep_ebook_records.py
Log:
Convert uniformly to Unicode output

LoC added content appears to come in ISO-8859-1, so decode that accordingly.

Then generate UTF8-encoded Unicode MARC records on output.


Modified: conifer/branches/rel_1_6_1/tools/ebooks/prep_ebook_records.py
===================================================================
--- conifer/branches/rel_1_6_1/tools/ebooks/prep_ebook_records.py	2010-12-13 18:54:58 UTC (rev 1105)
+++ conifer/branches/rel_1_6_1/tools/ebooks/prep_ebook_records.py	2010-12-16 16:42:24 UTC (rev 1106)
@@ -220,29 +220,34 @@
     """Converts raw ebook MARC records to Conifer-ready MARC records"""
 
     sample = ''
-    reader = pymarc.MARCReader(open(options['input'], 'rb'))
-    writer = pymarc.MARCWriter(open(options['output'], 'wb'))
+    reader = pymarc.MARCReader(
+        open(options['input'], mode='rb'), to_unicode=True
+    )
+    writer = pymarc.MARCWriter(open(options['output'], mode='wb'))
     if ('sample' in options):
-        sample = pymarc.MARCWriter(open(options['sample'], 'wb'))
+        sample = pymarc.MARCWriter(open(options['sample'], mode='wb'))
 
     cnt = 0
     for record in reader:
         cnt = cnt + 1
-        if not (record['856'] and record['856']['u']):
-            print("* No 856 for record # %s in file %s"
-                    % (cnt, options['input'])
-            )
+        try:
+            if not (record['856'] and record['856']['u']):
+                print("* No 856 for record # %s in file %s"
+                        % (cnt, options['input'])
+                )
 
-        new_record = process_fields(record, options)
+            new_record = process_fields(record, options)
 
-        writer.write(new_record)
-        if (sample and ((cnt == 1) or (cnt % 100 == 0))):
-            sample.write(new_record)
+            writer.write(new_record)
+            if (sample and ((cnt == 1) or (cnt % 100 == 0))):
+                sample.write(new_record)
+        except Exception, ex:
+            print("* Error processing record %s - %s" % (cnt, ex))
 
 def process_fields(record, options):
     """Decide which fields to add, delete, and keep"""
 
-    new_record = pymarc.Record()
+    new_record = pymarc.Record(to_unicode=True, force_utf8=True)
 
     for field in record.get_fields():
         # Process all of the 856 fields
@@ -435,7 +440,7 @@
         content = content[0:lcsh]
 
     # Farewell, starting and ending whitespace
-    content = content.strip()
+    content = content.strip().decode('iso8859-1')
 
     return content
 



More information about the open-ils-commits mailing list