[open-ils-commits] r10567 -
branches/acq-experiment/Open-ILS/src/perlmods/OpenILS/Utils
svn at svn.open-ils.org
svn at svn.open-ils.org
Tue Sep 9 13:56:18 EDT 2008
Author: dbs
Date: 2008-09-09 13:56:15 -0400 (Tue, 09 Sep 2008)
New Revision: 10567
Added:
branches/acq-experiment/Open-ILS/src/perlmods/OpenILS/Utils/MFHDParser.pm
Log:
Commit the little bit of an MFHD parser that I had from back in May
Added: branches/acq-experiment/Open-ILS/src/perlmods/OpenILS/Utils/MFHDParser.pm
===================================================================
--- branches/acq-experiment/Open-ILS/src/perlmods/OpenILS/Utils/MFHDParser.pm (rev 0)
+++ branches/acq-experiment/Open-ILS/src/perlmods/OpenILS/Utils/MFHDParser.pm 2008-09-09 17:56:15 UTC (rev 10567)
@@ -0,0 +1,168 @@
+#!/usr/bin/perl -w
+use strict;
+use Date::Manip;
+
+# Parse MFHD patterns for http://www.loc.gov/marc/holdings/hd853855.html
+
+# Primary goal:
+# Expected input: a chunk of MFHD, a start date, and # of issuances to project
+# Expected output: a set of issuances projected forward from the start date,
+# with issue/volume/dates/captions conforming to what the MFHD actually says
+
+# The thought had been to use Date::Manip to generate the actual dates for
+# each issuance, like:
+#
+# # To find the 2nd Tuesday of every month
+# @date = ParseRecur("0:1*2:2:0:0:0",$base,$start,$stop);
+
+# Secondary goal: generate automatic summary holdings
+# (a la http://www.loc.gov/marc/holdings/hd863865.html)
+
+# Compressability comes from first indicator
+sub parse_compressability {
+ my $c = shift || return undef;
+
+ my %compressability = (
+ '0' => 'Cannot compress or expand',
+ '1' => 'Can compress but cannot expand',
+ '2' => 'Can compress or expand',
+ '3' => 'Unknown',
+ '#' => 'Undefined'
+ );
+
+ if (exists $compressability{$c}) {
+ return $compressability{$c};
+ }
+ # 'Unknown compressability indicator - expected one of (0,1,2,3,#)';
+ return undef;
+}
+
+# Caption evaluation comes from second indicator
+sub caption_evaluation {
+ my $ce = shift || return undef;
+
+ my %caption_evaluation = (
+ '0' => 'Captions verified; all levels present',
+ '1' => 'Captions verified; all levels may not be present',
+ '2' => 'Captions unverified; all levels present',
+ '3' => 'Captions unverified; all levels may not be present',
+ '#' => 'Undefined',
+ );
+
+ if (exists $caption_evaluation{$ce}) {
+ return $caption_evaluation{$ce};
+ }
+ # 'Unknown caption evaluation indicator - expected one of (0,1,2,3,#)';
+ return undef;
+}
+
+# Start with frequency ($w)
+# then overlay number of pieces of issuance ($p)
+# then regularity pattern ($y)
+my %frequency = (
+ 'a' => 'annual',
+ 'b' => 'bimonthly',
+ 'c' => 'semiweekly',
+ 'd' => 'daily',
+ 'e' => 'biweekly',
+ 'f' => 'semiannual',
+ 'g' => 'biennial',
+ 'h' => 'triennial',
+ 'i' => 'three times a week',
+ 'j' => 'three times a month',
+ 'k' => 'continuously updated',
+ 'm' => 'monthly',
+ 'q' => 'quarterly',
+ 's' => 'semimonthly',
+ 't' => 'three times a year',
+ 'w' => 'weekly',
+ 'x' => 'completely irregular',
+);
+
+sub parse_frequency {
+ my $freq = shift || return undef;
+
+ if ($freq =~ m/^\d+$/) {
+ return "$freq times a year";
+ } elsif (exists $frequency{$freq}) {
+ return $frequency{$freq};
+ }
+ # unrecognized frequency specification
+ return undef;
+}
+
+# $x - Point at which the highest level increments or changes
+# Interpretation of two-digit numbers in the 01-12 range depends on the publishing frequency
+# More than one change can be passed in the subfield and are delimited by commas
+sub chronology_change {
+ my $chronology_change = shift || return undef;
+ my @c_changes = split /,/, $chronology_change;
+ foreach my $change (@c_changes) {
+ if ($change == 21) {
+
+ }
+ }
+ return undef;
+}
+
+# Publication code : first character in regularity pattern ($y)
+sub parse_publication_code {
+ my $c = shift || return undef;
+
+ my %publication_code = (
+ 'c' => 'combined',
+ 'o' => 'omitted',
+ 'p' => 'published',
+ '#' => 'undefined',
+ );
+
+ if (exists $publication_code{$c}) {
+ return $publication_code{$c};
+ }
+ return undef;
+}
+
+# Chronology code : part of regularity pattern ($y)
+sub parse_chronology_code {
+ my $c = shift || return undef;
+
+ my %chronology_code = (
+ 'd' => 'Day',
+ 'm' => 'Month',
+ 's' => 'Season',
+ 'w' => 'Week',
+ 'y' => 'Year',
+ 'e' => 'Enumeration',
+ );
+
+ if (exists $chronology_code{$c}) {
+ return $chronology_code{$c};
+ }
+ return undef;
+}
+
+sub parse_regularity_pattern {
+ my $pattern = shift;
+ my ($pc, $cc, $cd) = $pattern =~ m{^(\w)(\w)(.+)$};
+
+ my $pub_code = parse_publication_code($pc);
+ my $chron_code = parse_chronology_code($cc);
+ my $chron_def = parse_chronology_definition($cd);
+
+ return ($pub_code, $chron_code, $chron_def);
+}
+
+sub parse_chronology_definition {
+ my $chron_def = shift || return undef;
+ # Well, this is where it starts to get hard, doesn't it?
+ return $chron_def;
+}
+
+print parse_regularity_pattern("cddd");
+print "\n";
+print parse_regularity_pattern("38dd");
+print "\n";
+
+1;
+
+# :vim:noet:ts=4:sw=4:
More information about the open-ils-commits
mailing list