[open-ils-commits] r16373 - in branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils: . MFHD MFHD/test (dbwells)
svn at svn.open-ils.org
svn at svn.open-ils.org
Mon May 3 08:50:42 EDT 2010
Author: dbwells
Date: 2010-05-03 08:50:40 -0400 (Mon, 03 May 2010)
New Revision: 16373
Modified:
branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD.pm
branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/Holding.pm
branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/test/testlib.pm
Log:
This changeset deals with enhancements to the MFHD/Caption/Holding Perl modules. Along with a few smaller changes,
it:
- Forces MARC::Record to be newer than 2.0.0, as that version had an issue with classes derived from MARC::Field
- Augments MFHD to properly deal with inserting, appending, and deleting Caption and Holding objects
- Replaces the issuance-table specific version of generate_predictions() with something more general and flexible (the diff butchers this!)
- Adds new methods for getting a compressed or decompressed set of Holdings for a given Caption
- Splits increment() into increment() and extend(), with the second being meant for compressed holdings
- Adds compressed_to_first(), a companion method for compressed_to_last()
- Overloads the 'cmp' operator for Holdings to aid in sorting, compressing, and deduping of Holding objects
Modified: branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/Holding.pm
===================================================================
--- branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/Holding.pm 2010-05-03 01:03:25 UTC (rev 16372)
+++ branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/Holding.pm 2010-05-03 12:50:40 UTC (rev 16373)
@@ -39,6 +39,10 @@
my ($key, $val) = @$subfield;
if ($key =~ /[a-m]/) {
+ if (exists($self->{_mfhdh_FIELDS}->{$key})) {
+ carp("Duplicate, non-repeatable subfield '$key' found, ignoring");
+ next;
+ }
if ($self->{_mfhdh_COMPRESSED}) {
$self->{_mfhdh_FIELDS}->{$key}{HOLDINGS} = [split(/\-/, $val)];
} else {
@@ -466,29 +470,21 @@
# Replace a single holding with it's next prediction
# and return itself
#
-# If the holding is compressed, the range is expanded
-#
sub increment {
my $self = shift;
if ($self->is_open_ended) {
carp "Holding is open-ended, cannot increment";
return $self;
+ } elsif ($self->is_compressed) {
+ carp "Incrementing a compressed holding is deprecated, use extend instead";
+ return $self->extend;
}
my $next = $self->next();
- if ($self->is_compressed) { # expand range
- foreach my $key (keys %{$next}) {
- my @values = @{$self->field_values($key)};
- $values[1] = $next->{$key};
- $self->fields->{$key}{HOLDINGS} = \@values;
- $next->{$key} = join('-', @values);
- }
- } else {
- foreach my $key (keys %{$next}) {
- $self->fields->{$key}{HOLDINGS}[0] = $next->{$key};
- }
+ foreach my $key (keys %{$next}) {
+ $self->fields->{$key}{HOLDINGS}[0] = $next->{$key};
}
$self->seqno($self->seqno + 1);
@@ -497,6 +493,60 @@
}
#
+# Extends a holding (compressing if needed) to include the next
+# prediction and returns itself
+#
+sub extend {
+ my $self = shift;
+
+ if ($self->is_open_ended) {
+ carp "Holding is open-ended, cannot extend";
+ return $self;
+ }
+
+ my $next = $self->next();
+
+ if (!$self->is_compressed) {
+ $self->is_compressed(1); # add compressed state
+ }
+
+ foreach my $key (keys %{$next}) {
+ my @values = @{$self->field_values($key)};
+ $values[1] = $next->{$key};
+ $self->fields->{$key}{HOLDINGS} = \@values;
+ $next->{$key} = join('-', @values);
+ }
+
+ $self->update(%{$next}); # update underlying subfields
+ return $self;
+}
+
+#
+# Turns a compressed holding into the singular form of the first member
+# in the range
+#
+sub compressed_to_first {
+ my $self = shift;
+
+ if (!$self->is_compressed) {
+ carp "Holding not compressed, cannot convert to first member";
+ return $self;
+ }
+
+ my %changes;
+ foreach my $key (keys %{$self->fields}) {
+ my @values = @{$self->field_values($key)};
+ $self->fields->{$key}{HOLDINGS} = [$values[0]];
+ $changes{$key} = $values[0];
+ }
+
+ $self->update(%changes); # update underlying subfields
+ $self->is_compressed(0); # remove compressed state
+
+ return $self;
+}
+
+#
# Turns a compressed holding into the singular form of the last member
# in the range
#
@@ -627,4 +677,117 @@
my @parts = split('/', $combo);
return $parts[$pos];
}
+
+#
+# Overload string comparison operators
+#
+# We are not overloading '<=>' because '==' is used liberally in MARC::Record
+# to compare field identity (i.e. is this the same exact Field object?), not value
+#
+# Other string operators are auto-generated from 'cmp'
+#
+# Please note that this comparison is based on what the holding represents,
+# not whether it is strictly identical (e.g. the seqno and link may vary)
+#
+use overload ('cmp' => \&_compare,
+ 'fallback' => 1);
+sub _compare {
+ my ($holding_1, $holding_2) = @_;
+
+ # TODO: this needs some more consideration
+ # fall back to 'built-in' comparison
+ if (!UNIVERSAL::isa($holding_2, ref $holding_1)) {
+ if (defined $holding_2) {
+ carp("Use of non-holding in holding comparison operation");
+ return ( "$holding_1" cmp "$holding_2" );
+ } else {
+ carp("Use of undefined value in holding comparison operation");
+ return 1; # similar to built-in, something is "greater than" nothing
+ }
+ }
+
+ # special cases for compressed holdings
+ my ($holding_1_first, $holding_1_last, $holding_2_first, $holding_2_last, $found_compressed);
+ # 0 for no compressed, 1 for first compressed, 2 for second compressed, 3 for both compressed
+ $found_compressed = 0;
+ if ($holding_1->is_compressed) {
+ $holding_1_last = $holding_1->clone->compressed_to_last;
+ $found_compressed += 1;
+ } else {
+ $holding_1_first = $holding_1;
+ $holding_1_last = $holding_1;
+ }
+ if ($holding_2->is_compressed) {
+ $holding_2_first = $holding_2->clone->compressed_to_first;
+ $found_compressed += 2;
+ } else {
+ $holding_2_first = $holding_2;
+ $holding_2_last = $holding_2;
+ }
+
+ if ($found_compressed) {
+ my $cmp = ($holding_1_last cmp $holding_2_first); # 1 ends before 2 starts
+ if ($cmp == -1) {
+ return -1; # 1 is fully lt
+ } elsif ($cmp == 0) {
+ carp("Overlapping holdings in comparison, lt and gt based on start value only");
+ return -1;
+ } else { # check the opposite, 2 ends before 1 starts
+ # clone is expensive, wait until we need it (here)
+ if (!defined($holding_2_last)) {
+ $holding_2_last = $holding_2->clone->compressed_to_last;
+ }
+ if (!defined($holding_1_first)) {
+ $holding_1_first = $holding_1->clone->compressed_to_first;
+ }
+ $cmp = ($holding_2_last cmp $holding_1_first);
+ if ($cmp == -1) {
+ return 1; # 1 is fully gt
+ } elsif ($cmp == 0) {
+ carp("Overlapping holdings in comparison, lt and gt based on start value only");
+ return 1;
+ } else {
+ $cmp = ($holding_1_first cmp $holding_2_first);
+ if (!$cmp) { # they are not equal
+ carp("Overlapping holdings in comparison, lt and gt based on start value only");
+ return $cmp;
+ } elsif ($found_compressed == 1) {
+ carp("Compressed holding found with start equal to non-compressed holding");
+ return 1; # compressed (first holding) is 'greater than' non-compressed
+ } elsif ($found_compressed == 2) {
+ carp("Compressed holding found with start equal to non-compressed holding");
+ return -1; # compressed (second holding) is 'greater than' non-compressed
+ } else { # both holdings compressed, check for full equality
+ $cmp = ($holding_1_last cmp $holding_2_last);
+ if (!$cmp) { # they are not equal
+ carp("Compressed holdings in comparison have equal starts, lt and gt based on end value only");
+ return $cmp;
+ } else {
+ return 0; # both are compressed, both ends are equal
+ }
+ }
+ }
+ }
+ }
+
+ # start doing the actual comparison
+ my $result;
+ foreach my $key ('a'..'f') {
+ if (defined($holding_1->field_values($key))) {
+ if (!defined($holding_2->field_values($key))) {
+ return 1; # more details equals 'greater' (?)
+ } else {
+ $result = $holding_1->field_values($key)->[0] <=> $holding_2->field_values($key)->[0];
+ }
+ } elsif (defined($holding_2->field_values($key))) {
+ return -1; # more details equals 'greater' (?)
+ }
+
+ return $result if $result;
+ }
+
+ # got through, return 0 for equal
+ return 0;
+}
+
1;
Modified: branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/test/testlib.pm
===================================================================
--- branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/test/testlib.pm 2010-05-03 01:03:25 UTC (rev 16372)
+++ branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/test/testlib.pm 2010-05-03 12:50:40 UTC (rev 16373)
@@ -55,7 +55,6 @@
$field = MARC::Field->new(
$fieldno, $inds[0], $inds[1],
- a => 'scratch',
@subfields
);
Modified: branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD.pm
===================================================================
--- branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD.pm 2010-05-03 01:03:25 UTC (rev 16372)
+++ branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD.pm 2010-05-03 12:50:40 UTC (rev 16373)
@@ -6,6 +6,9 @@
use DateTime::Format::Strptime;
use Data::Dumper;
+# for inherited methods to work properly, we need to force a
+# MARC::Record version greater than 2.0.0
+use MARC::Record 2.0.1;
use base 'MARC::Record';
use OpenILS::Utils::MFHD::Caption;
@@ -84,30 +87,148 @@
return sort keys %{$self->{_mfhd_CAPTIONS}->{$field}};
}
+# optional argument to get back a 'hashref' or an 'array' (default)
sub captions {
my $self = shift;
- my $field = shift;
+ my $tag = shift;
+ my $return_type = shift;
# TODO: add support for caption types as argument? (base, index, supplement)
- my @captions;
- my @sorted_ids = $self->caption_link_ids($field);
+ my @sorted_ids = $self->caption_link_ids($tag);
- foreach my $link_id (@sorted_ids) {
- push(@captions, $self->{_mfhd_CAPTIONS}{$field}{$link_id});
+ if (defined($return_type) and $return_type eq 'hashref') {
+ my %captions;
+ foreach my $link_id (@sorted_ids) {
+ $captions{$link_id} = $self->{_mfhd_CAPTIONS}{$tag}{$link_id};
+ }
+ return \%captions;
+ } else {
+ my @captions;
+ foreach my $link_id (@sorted_ids) {
+ push(@captions, $self->{_mfhd_CAPTIONS}{$tag}{$link_id});
+ }
+ return @captions;
}
+}
- return @captions;
+sub append_fields {
+ my $self = shift;
+
+ my $field_count = $self->SUPER::append_fields(@_);
+ if ($field_count) {
+ foreach my $field (@_) {
+ $self->_avoid_link_collision($field);
+ my $field_type = ref $field;
+ if ($field_type eq 'MFHD::Holding') {
+ $self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$field->seqno} = $field;
+ } elsif ($field_type eq 'MFHD::Caption') {
+ $self->{_mfhd_CAPTIONS}{$field->tag}{$field->link_id} = $field;
+ }
+ }
+ return $field_count;
+ } else {
+ return;
+ }
}
+sub delete_field {
+ my $self = shift;
+ my $field = shift;
+
+ my $field_count = $self->SUPER::delete_field($field);
+ if ($field_count) {
+ my $field_type = ref($field);
+ if ($field_type eq 'MFHD::Holding') {
+ delete($self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$field->seqno});
+ } elsif ($field_type eq 'MFHD::Caption') {
+ delete($self->{_mfhd_CAPTIONS}{$field->tag}{$field->link_id});
+ }
+ return $field_count;
+ } else {
+ return;
+ }
+}
+
+sub insert_fields_before {
+ my $self = shift;
+ my $before = shift;
+
+ my $field_count = $self->SUPER::insert_fields_before($before, @_);
+ if ($field_count) {
+ foreach my $field (@_) {
+ $self->_avoid_link_collision($field);
+ my $field_type = ref $field;
+ if ($field_type eq 'MFHD::Holding') {
+ $self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$field->seqno} = $field;
+ } elsif ($field_type eq 'MFHD::Caption') {
+ $self->{_mfhd_CAPTIONS}{$field->tag}{$field->link_id} = $field;
+ }
+ }
+ return $field_count;
+ } else {
+ return;
+ }
+}
+
+sub insert_fields_after {
+ my $self = shift;
+ my $after = shift;
+
+ my $field_count = $self->SUPER::insert_fields_after($after, @_);
+ if ($field_count) {
+ foreach my $field (@_) {
+ $self->_avoid_link_collision($field);
+ my $field_type = ref $field;
+ if ($field_type eq 'MFHD::Holding') {
+ $self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$field->seqno} = $field;
+ } elsif ($field_type eq 'MFHD::Caption') {
+ $self->{_mfhd_CAPTIONS}{$field->tag}{$field->link_id} = $field;
+ }
+ }
+ return $field_count;
+ } else {
+ return;
+ }
+}
+
+sub _avoid_link_collision {
+ my $self = shift;
+ my $field = shift;
+
+ my $fieldref = ref($field);
+ if ($fieldref eq 'MFHD::Holding') {
+ my $seqno = $field->seqno;
+ my $changed_seqno = 0;
+ if (exists($self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$seqno})) {
+ $changed_seqno = 1;
+ do {
+ $seqno++;
+ } while (exists($self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$seqno}));
+ }
+ $field->seqno($seqno) if $changed_seqno;
+ } elsif ($fieldref eq 'MFHD::Caption') {
+ my $link_id = $field->link_id;
+ my $changed_link_id = 0;
+ if (exists($self->{_mfhd_CAPTIONS}{$field->tag}{$link_id})) {
+ $link_id++;
+ $changed_link_id = 1;
+ do {
+ $link_id++;
+ } while (exists($self->{_mfhd_CAPTIONS}{$field->tag}{$link_id}));
+ }
+ $field->link_id($link_id) if $changed_link_id;
+ }
+}
+
sub active_captions {
my $self = shift;
- my $field = shift;
+ my $tag = shift;
# TODO: add support for caption types as argument? (base, index, supplement)
my @captions;
my @active_captions;
- @captions = $self->captions($field);
+ @captions = $self->captions($tag);
# TODO: for now, we will assume the last 85X field is active
# and the rest are historical. The standard is hazy about
@@ -130,74 +251,178 @@
}
#
-# generate_predictions() is an initial attempt at a function which can be used
-# to populate an issuance table with a list of predicted issues. It accepts
-# a hash ref of options initially defined as:
-# field : the caption field to predict on (853, 854, or 855)
+# generate_predictions()
+# Accepts a hash ref of options initially defined as:
+# base_holding : reference to the holding field to predict from
# num_to_predict : the number of issues you wish to predict
-# last_rec_date : the date of the last received issue, to be used as an offset
-# for predicting future issues
+# OR
+# end_holding : holding field ref, keep predicting until you meet or exceed it
#
# The basic method is to first convert to a single holding if compressed, then
# increment the holding and save the resulting values to @predictions.
#
-# returns @preditions, an array of array refs containing (link id, formatted
-# label, formatted chronology date, formatted estimated arrival date, and an
-# array ref of holding subfields as (key, value, key, value ...)) (not a hash
-# to protect order and possible duplicate keys).
-#
+# returns @predictions, an array of holding field refs (including end_holding
+# if applicable but NOT base_holding)
+#
sub generate_predictions {
my ($self, $options) = @_;
- my $field = $options->{field};
+
+ my $base_holding = $options->{base_holding};
my $num_to_predict = $options->{num_to_predict};
- my $last_rec_date =
- $options->{last_rec_date}; # expected or actual, according to preference
+ my $end_holding = $options->{end_holding};
+ my $max_to_predict = $options->{max_to_predict} || 10000; # fail-safe
- # TODO: add support for predicting serials with no chronology by passing in
- # a last_pub_date option?
+ if (!defined($base_holding)) {
+ carp("Base holding not defined in generate_predictions, returning empty set");
+ return ();
+ }
+ if ($base_holding->is_compressed) {
+ carp("Ambiguous compressed base holding in generate_predictions, returning empty set");
+ return ();
+ }
+ my $curr_holding = $base_holding->clone; # prevent side-effects
+
+ my @predictions;
+
+ if ($num_to_predict) {
+ for (my $i = 0; $i < $num_to_predict; $i++) {
+ push(@predictions, $curr_holding->increment->clone);
+ }
+ } elsif (defined($end_holding)) {
+ $end_holding = $end_holding->clone; # prevent side-effects
+ my $next_holding = $curr_holding->increment->clone;
+ my $num_predicted = 0;
+ while ($next_holding le $end_holding) {
+ push(@predictions, $next_holding);
+ $num_predicted++;
+ if ($num_predicted >= $max_to_predict) {
+ carp("Maximum prediction count exceeded");
+ last;
+ }
+ $next_holding = $curr_holding->increment->clone;
+ }
+ }
- my $strp = new DateTime::Format::Strptime(pattern => '%F');
+ return @predictions;
+}
- my $receival_date = $strp->parse_datetime($last_rec_date);
+#
+# create an array of compressed holdings from all holdings for a given caption,
+# compressing as needed
+#
+# Optionally you can skip sorting, but the resulting compression will be compromised
+# if the current holdings are out of order
+#
+# TODO: gap marking, gap preservation
+#
+# TODO: some of this could be moved to the Caption object to allow for
+# decompression in the absense of an overarching MFHD object
+#
+sub get_compressed_holdings {
+ my $self = shift;
+ my $caption = shift;
+ my $opts = shift;
+ my $skip_sort = $opts->{'skip_sort'};
- my @active_captions = $self->active_captions($field);
+ # make sure none are compressed
+ my @decomp_holdings;
+ if ($skip_sort) {
+ @decomp_holdings = $self->get_decompressed_holdings($caption, {'skip_sort' => 1});
+ } else {
+ # sort for best algorithm
+ @decomp_holdings = $self->get_decompressed_holdings($caption, {'dedupe' => 1});
+ }
- my @predictions;
- foreach my $caption (@active_captions) {
- my $htag = $caption->tag;
- my $link_id = $caption->link_id;
- $htag =~ s/^85/86/;
- my @holdings = $self->holdings($htag, $link_id);
- my $last_holding = $holdings[-1];
-
- if ($last_holding->is_compressed) {
- $last_holding->compressed_to_last; # convert to last in range
+ my $runner = $decomp_holdings[0]->clone->increment;
+ my $curr_holding = shift(@decomp_holdings);
+ $curr_holding = $curr_holding->clone;
+ my $seqno = 1;
+ $curr_holding->seqno($seqno);
+ my @comp_holdings;
+# my $last_holding;
+ foreach my $holding (@decomp_holdings) {
+ if ($runner eq $holding) {
+ $curr_holding->extend;
+ $runner->increment;
+# } elsif ($holding eq $last_holding) {
+# carp("Found duplicate holding in compression set, skipping");
+ } elsif ($runner gt $holding) { # should not happen unless holding is not in series
+ carp("Found unexpected holding, skipping");
+ } else {
+ push(@comp_holdings, $curr_holding);
+ while ($runner le $holding) {
+ $runner->increment;
+ }
+ $curr_holding = $holding->clone;
+ $seqno++;
+ $curr_holding->seqno($seqno);
}
+# $last_holding = $holding;
+ }
+ push(@comp_holdings, $curr_holding);
- my $pub_date = $strp->parse_datetime($last_holding->chron_to_date);
- my $date_diff = $receival_date - $pub_date;
+ return @comp_holdings;
+}
- $last_holding->notes('public', []);
- # add a note marker for system use
- $last_holding->notes('private', ['AUTOGEN']);
+#
+# create an array of single holdings from all holdings for a given caption,
+# decompressing as needed
+#
+# resulting array is returned as they come in the record, unsorted
+#
+# optional argument will reorder and renumber the holdings before returning
+#
+# TODO: some of this could be moved to the Caption (and/or Holding) object to
+# allow for decompression in the absense of an overarching MFHD object
+#
+sub get_decompressed_holdings {
+ my $self = shift;
+ my $caption = shift;
+ my $opts = shift;
+ my $skip_sort = $opts->{'skip_sort'};
+ my $dedupe = $opts->{'dedupe'};
- for (my $i = 0; $i < $num_to_predict; $i++) {
- $last_holding->increment;
- $pub_date = $strp->parse_datetime($last_holding->chron_to_date);
- my $arrival_date = $pub_date + $date_diff;
- push(
- @predictions,
- [
- $link_id,
- $last_holding->format,
- $pub_date->strftime('%F'),
- $arrival_date->strftime('%F'),
- [$last_holding->subfields_list]
- ]
- );
+ if ($dedupe and $skip_sort) {
+ carp("Attempted deduplication without sorting, failure likely");
+ }
+
+ my $htag = $caption->tag;
+ my $link_id = $caption->link_id;
+ $htag =~ s/^85/86/;
+ my @holdings = $self->holdings($htag, $link_id);
+ my @decomp_holdings;
+
+ foreach my $holding (@holdings) {
+ if (!$holding->is_compressed) {
+ push(@decomp_holdings, $holding->clone);
+ } else {
+ my $base_holding = $holding->clone->compressed_to_first;
+ my @new_holdings = $self->generate_predictions(
+ {'base_holding' => $base_holding,
+ 'end_holding' => $holding->clone->compressed_to_last});
+ push(@decomp_holdings, $base_holding, @new_holdings);
}
}
- return @predictions;
+
+ unless ($skip_sort) {
+ my @temp_holdings = sort {$a cmp $b} @decomp_holdings;
+ @decomp_holdings = @temp_holdings;
+ }
+
+ my @return_holdings = (shift(@decomp_holdings));
+ $return_holdings[0]->seqno(1);
+ my $seqno = 2;
+ foreach my $holding (@decomp_holdings) { # renumber sequence
+ if ($holding eq $return_holdings[-1] and $dedupe) {
+ carp("Found duplicate holding in decompression set, discarding");
+ next;
+ }
+ $holding->seqno($seqno);
+ $seqno++;
+ push(@return_holdings, $holding);
+ }
+
+ return @return_holdings;
}
#
More information about the open-ils-commits
mailing list