[open-ils-commits] r358 - conifer/trunk/tools/migration-scripts (dbs)
svn at svn.open-ils.org
svn at svn.open-ils.org
Sun Apr 19 17:21:03 EDT 2009
Author: dbs
Date: 2009-04-19 17:20:57 -0400 (Sun, 19 Apr 2009)
New Revision: 358
Modified:
conifer/trunk/tools/migration-scripts/fix_bad_marcxml.pl
Log:
Fix a few common yaz-marcdump conversion to XML errors
Modified: conifer/trunk/tools/migration-scripts/fix_bad_marcxml.pl
===================================================================
--- conifer/trunk/tools/migration-scripts/fix_bad_marcxml.pl 2009-04-18 20:20:13 UTC (rev 357)
+++ conifer/trunk/tools/migration-scripts/fix_bad_marcxml.pl 2009-04-19 21:20:57 UTC (rev 358)
@@ -3,19 +3,20 @@
use warnings;
foreach my $file (@ARGV) {
- clean_empty_datafields($file);
+ process_file($file);
}
-sub clean_empty_datafields {
+sub process_file {
my $file = shift;
# Empty datafields anger MARC::File::XML
open(FH, '<', $file) or die $!;
open(CLEAN, '>', "$file.new");
- my ($trim, $lastline) = (0, '');
+ my ($trim, $lastline, $lineno) = (0, '', 1);
while (<FH>) {
if ($_ =~ m#</datafield># and $lastline =~ m#<datafield#) {
+ print STDERR "Empty datafield at line $lineno of file $file\n";
$trim = 1;
} elsif ($trim) {
$trim = 0;
@@ -23,8 +24,25 @@
print CLEAN $lastline;
$trim = 0;
}
+
+ # Given questionable input, yaz-marcdump creates invalid XML like this:
+ # <datafield tag="500" ind1=" " ind2=" ">
+ # <subfield code="a">In subtitle "sports" appears as "</subfield>
+ # <subfield code="p">ort</subfield>
+ # <subfield code=""">.</subfield>
+ # </datafield>
+ #
+ # This will at least enable MARC::File::XML to process it:
+ if ($_ =~ m#<subfield code=""">#o) {
+ print STDERR "Bad subfield code \" at line $lineno of file $file\n";
+ $_ =~ s{<subfield code=""">}{<subfield code="a">}o;
+ } elsif ($_ =~ m#<subfield code="<">#o) {
+ print STDERR "Bad subfield code < at line $lineno of file $file\n";
+ $_ =~ s{<subfield code="<">}{<subfield code="a">}o;
+ }
$lastline = $_;
+ $lineno++;
}
print CLEAN $lastline;
More information about the open-ils-commits
mailing list