[OPEN-ILS-GENERAL] ***SPAM*** Re: SQL help needed
John C. Houser
houser at hslc.org
Wed Sep 12 11:22:51 EDT 2012
Mary,
Here's a perl script I used to make some changes to 856 tags in our
system. You might be able to use it as a model for what you're trying to
do. Let me know if I can answer any questions about it.
J
***
#!/usr/bin/perl
use strict;
use DBI;
use Config::IniFiles;
use MARC::Record;
use MARC::File::XML;
use LWP::UserAgent;
my $cfg = Config::IniFiles->new( -file =>
"/usr/local/vufind/web/conf/Evergreen-production.ini" );
my $port = $cfg->val( 'Catalog', 'port' );
my $hostname = $cfg->val( 'Catalog', 'hostname' );
my $database = $cfg->val( 'Catalog', 'database' );
my $username = $cfg->val( 'Catalog', 'user' );
my $password = $cfg->val( 'Catalog', 'password' );
my $dsn = "dbi:Pg:dbname=$database;host=$hostname;port=$port";
my $dbh = DBI->connect($dsn, $username, $password, {AutoCommit => 0,
RaiseError => 0, PrintError => 0});
die("Could not connect to database!") unless $dbh;
$/ = "\035";
$| = 1;
my $total = 0;
my $updated = 0;
my $errors = 0;
my @ids = ();
my $sql = "
SELECT record_entry.id
FROM biblio.record_entry
WHERE record_entry.deleted IS FALSE
AND record_entry.active IS TRUE
";
print "$sql\n";
my $sth = $dbh->prepare($sql);
$sth->execute;
my $rv = $sth->err;
die($sth->errstr) if $rv;
while ( my $id = $sth->fetchrow ) {
push @ids, $id;
}
$sth->finish;
LOOP: while ( my $record_id = pop @ids ) {
$total++;
my $sql2 = "
SELECT record_entry.marc
FROM biblio.record_entry
WHERE record_entry.id = $record_id
";
my $sth2 = $dbh->prepare($sql2);
$sth2->execute;
my $rv2 = $sth2->err;
die($sth2->errstr) if $rv2;
my $hashref = $sth2->fetchrow_hashref;
my $marc_xml = $hashref->{'marc'};
$sth2->finish;
# Read in MARC and set values
my $record = '';
eval { $record = MARC::Record->new_from_xml($marc_xml, 'UTF-8'); };
if ( $@ ) {
print STDERR "ERROR LOADING TCN $record_id\n";
next LOOP;
}
# Fix the leader to indicate UTF-8
$record->encoding('UTF-8');
my @f856 = $record->field('856');
if ( @f856 ) {
print "\nTCN $record_id\n";
FOR: foreach my $field (@f856) {
print 'FOUND ', $field->as_formatted(), "\n";
# Check for URL in the 856 $u
my $u = $field->subfield('u');
if ( ! $u ) {
$record->delete_fields($field);
next FOR;
}
# First make the fixes
my $z = $field->subfield('z');
my $s3 = $field->subfield('3');
# Remove spaces from the URL
$u =~ s/\s//g;
# Copy $3 to $z if no $z
if ( ! $z && $s3 ) {
$field->add_subfields('z' => $s3);
$z = $s3;
}
# Fix LC links
if ( $u =~ m/www\.loc\.gov\/catdir\/bios/ ) {
$field->delete_subfield(code => 'z');
$field->add_subfields('z' => 'Contributor biographical
information');
} elsif ( $u =~ m/www\.loc\.gov\/catdir\/samples/ ) {
$field->delete_subfield(code => 'z');
$field->add_subfields('z' => 'Sample text');
} elsif ( $u =~ m/www\.loc\.gov\/catdir\/description/ ) {
$field->delete_subfield(code => 'z');
$field->add_subfields('z' => 'Publisher description');
} elsif ( $u =~ m/www\.loc\.gov\/catdir\/toc/ ) {
$field->delete_subfield(code => 'z');
$field->add_subfields('z' => 'Table of contents');
}
# Change first "www.http" to "http"
$u =~ s/www\.http/http/i;
# Change "www.loc/gov" to "www.loc.gov"
$u =~ s/www\.loc\/gov/www\.loc\.gov/i;
# Change "hhtp" to "http" at beginning of $u
$u =~ s/^hhtp/http/i;
# Add "http://" if protocol missing from start of $u
unless ( $u =~ m/^http/i || $u =~ m/^ftp/i ) {
$u = 'http://' . $u;
}
# Save changes
$field->delete_subfield(code => 'u');
$field->add_subfields('u' => $u);
# Now check the link
my $ua = LWP::UserAgent->new;
my $response = $ua->get($u);
if (! $response->is_success) {
print 'ERROR ', $response->status_line, "\n";
$errors++;
my $status = $response->status_line;
if ( $status =~ m/400 URL missing/i ||
$status =~ m/^401 Unauthorized/i ||
$status =~ m/^403 Forbidden/i ||
$status =~ m/^404 Can't chdir to/i ||
$status =~ m/^404 - File not found/i ||
$status =~ m/^404 Object Not Found/i ||
$status =~ m/^406 Not Acceptable/i ||
$status =~ m/^410 Gone/i ||
$status =~ m/^500 Can't connect to .* \(certificate
verify failed\)/i ||
$status =~ m/^500 Can't connect to .* \(No route to
host\)/i ||
$status =~ m/^500 No Host option provided/i ||
$status =~ m/^501 Protocol scheme .* is not
supported/i ||
$status =~ m/^503 Server Error/i
) {
$record->delete_fields($field);
print "DELETED 856\n";
next FOR;
} elsif ( $status =~ m/500 read timeout/i ) {
next FOR;
} else {
# Copy subfield 856 $z to 999 $z
my $f999 = $record->field('999');
if ( $f999 ) {
$f999->add_subfields('z' => $z);
} else {
my $new = MARC::Field->new('999', ' ', ' ', 'z'
=> $z);
$record->insert_fields_ordered($new);
}
# Delete subfield z from 856 (causes link not to
appear)
$field->delete_subfield(code => 'z');
# Show what we did
$f999 = $record->field('999');
print 'UPDATED ', $f999->as_formatted(), "\n";
}
}
}
my @new856 = $record->field('856');
foreach my $field ( @new856 ) {
print 'UPDATED ', $field->as_formatted(), "\n";
}
# Insert back into database
$marc_xml = $record->as_xml();
$marc_xml = $dbh->quote($marc_xml);
my $sql3 = "
UPDATE biblio.record_entry SET marc = $marc_xml, editor =
149592, edit_date = 'now' WHERE id = $record_id
";
my $sth3 = $dbh->prepare($sql3);
$sth3->execute;
my $rv3 = $sth3->err;
if ( $rv3 ) {
print $sth3->errstr, "\n";
}
$dbh->commit;
$sth3->finish;
$updated++;
}
}
$dbh->disconnect;
print "\nUpdated $updated of $total records\n";
print "\nFound $errors errors\n";
###############################################################################
###############################################################################
> Mary Llewellyn <mailto:mllewell at biblio.org>
> September 11, 2012 7:18 PM
>
> Hi all,
>
> I've just found out that one of our libraries has dropped its
> subscription to a downloadable e-resource service. I need a way to
> find and delete their URLs in several thousand records while leaving
> intact the URLs belonging to the other libraries that still subscribe.
>
> I see 2 ways to go: find and export all the bibs involved, delete the
> obsolete 856s in a third-party MARC editor, then load the bibs back in
> and replace the bibs in the database. Or, develop some backdoor way to
> remove the 856s using SQL, just for the one library. It helps that
> each library has a unique URL, such as
> http://smalltownct.oneclickdigital.com. Only trouble is I'm not sure
> how to write a query incorporating a MARC tag and a particular string.
>
> I'd appreciate some guidance.
>
> Thanks!
>
> Mary
>
> Mary Llewellyn
>
> Database Manager
>
> Bibliomation, Inc.
>
> Middlebury, CT
>
> mllewell at biblio.org
>
--
John Houser
System Architect
HSLC
215-534-6820
houser at hslc.org
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://libmail.georgialibraries.org/pipermail/open-ils-general/attachments/20120912/74b5e367/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: compose-unknown-contact.jpg
Type: image/jpeg
Size: 770 bytes
Desc: not available
URL: <http://libmail.georgialibraries.org/pipermail/open-ils-general/attachments/20120912/74b5e367/attachment-0001.jpg>
More information about the Open-ils-general
mailing list