[open-ils-commits] r16373 - in branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils: . MFHD MFHD/test (dbwells)

svn at svn.open-ils.org svn at svn.open-ils.org
Mon May 3 08:50:42 EDT 2010


Author: dbwells
Date: 2010-05-03 08:50:40 -0400 (Mon, 03 May 2010)
New Revision: 16373

Modified:
   branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD.pm
   branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/Holding.pm
   branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/test/testlib.pm
Log:
This changeset deals with enhancements to the MFHD/Caption/Holding Perl modules. Along with a few smaller changes, 
it: 

- Forces MARC::Record to be newer than 2.0.0, as that version had an issue with classes derived from MARC::Field

- Augments MFHD to properly deal with inserting, appending, and deleting Caption and Holding objects

- Replaces the issuance-table specific version of generate_predictions() with something more general and flexible (the diff butchers this!)

- Adds new methods for getting a compressed or decompressed set of Holdings for a given Caption

- Splits increment() into increment() and extend(), with the second being meant for compressed holdings

- Adds compressed_to_first(), a companion method for compressed_to_last()

- Overloads the 'cmp' operator for Holdings to aid in sorting, compressing, and deduping of Holding objects


Modified: branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/Holding.pm
===================================================================
--- branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/Holding.pm	2010-05-03 01:03:25 UTC (rev 16372)
+++ branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/Holding.pm	2010-05-03 12:50:40 UTC (rev 16373)
@@ -39,6 +39,10 @@
         my ($key, $val) = @$subfield;
 
         if ($key =~ /[a-m]/) {
+            if (exists($self->{_mfhdh_FIELDS}->{$key})) {
+                carp("Duplicate, non-repeatable subfield '$key' found, ignoring");
+                next;
+            }
             if ($self->{_mfhdh_COMPRESSED}) {
                 $self->{_mfhdh_FIELDS}->{$key}{HOLDINGS} = [split(/\-/, $val)];
             } else {
@@ -466,29 +470,21 @@
 # Replace a single holding with it's next prediction
 # and return itself
 #
-# If the holding is compressed, the range is expanded
-#
 sub increment {
     my $self = shift;
 
     if ($self->is_open_ended) {
         carp "Holding is open-ended, cannot increment";
         return $self;
+    } elsif ($self->is_compressed) {
+        carp "Incrementing a compressed holding is deprecated, use extend instead";
+        return $self->extend;
     }
 
     my $next = $self->next();
 
-    if ($self->is_compressed) {    # expand range
-        foreach my $key (keys %{$next}) {
-            my @values = @{$self->field_values($key)};
-            $values[1] = $next->{$key};
-            $self->fields->{$key}{HOLDINGS} = \@values;
-            $next->{$key} = join('-', @values);
-        }
-    } else {
-        foreach my $key (keys %{$next}) {
-            $self->fields->{$key}{HOLDINGS}[0] = $next->{$key};
-        }
+    foreach my $key (keys %{$next}) {
+        $self->fields->{$key}{HOLDINGS}[0] = $next->{$key};
     }
 
     $self->seqno($self->seqno + 1);
@@ -497,6 +493,60 @@
 }
 
 #
+# Extends a holding (compressing if needed) to include the next
+# prediction and returns itself
+#
+sub extend {
+    my $self = shift;
+
+    if ($self->is_open_ended) {
+        carp "Holding is open-ended, cannot extend";
+        return $self;
+    }
+
+    my $next = $self->next();
+
+    if (!$self->is_compressed) {
+        $self->is_compressed(1);  # add compressed state
+    }
+
+    foreach my $key (keys %{$next}) {
+        my @values = @{$self->field_values($key)};
+        $values[1] = $next->{$key};
+        $self->fields->{$key}{HOLDINGS} = \@values;
+        $next->{$key} = join('-', @values);
+    }
+
+    $self->update(%{$next});    # update underlying subfields
+    return $self;
+}
+
+#
+# Turns a compressed holding into the singular form of the first member
+# in the range
+#
+sub compressed_to_first {
+    my $self = shift;
+
+    if (!$self->is_compressed) {
+        carp "Holding not compressed, cannot convert to first member";
+        return $self;
+    }
+
+    my %changes;
+    foreach my $key (keys %{$self->fields}) {
+        my @values = @{$self->field_values($key)};
+        $self->fields->{$key}{HOLDINGS} = [$values[0]];
+        $changes{$key} = $values[0];
+    }
+
+    $self->update(%changes);    # update underlying subfields
+    $self->is_compressed(0);    # remove compressed state
+
+    return $self;
+}
+
+#
 # Turns a compressed holding into the singular form of the last member
 # in the range
 #
@@ -627,4 +677,117 @@
     my @parts = split('/', $combo);
     return $parts[$pos];
 }
+
+#
+# Overload string comparison operators
+#
+# We are not overloading '<=>' because '==' is used liberally in MARC::Record
+# to compare field identity (i.e. is this the same exact Field object?), not value
+#
+# Other string operators are auto-generated from 'cmp'
+#
+# Please note that this comparison is based on what the holding represents,
+# not whether it is strictly identical (e.g. the seqno and link may vary)
+#
+use overload ('cmp' => \&_compare,
+              'fallback' => 1);
+sub _compare {
+    my ($holding_1, $holding_2) = @_;
+
+    # TODO: this needs some more consideration
+    # fall back to 'built-in' comparison
+    if (!UNIVERSAL::isa($holding_2, ref $holding_1)) {
+        if (defined $holding_2) {
+            carp("Use of non-holding in holding comparison operation");
+            return ( "$holding_1" cmp "$holding_2" );
+        } else {
+            carp("Use of undefined value in holding comparison operation");
+            return 1; # similar to built-in, something is "greater than" nothing
+        }
+    }
+
+    # special cases for compressed holdings
+    my ($holding_1_first, $holding_1_last, $holding_2_first, $holding_2_last, $found_compressed);
+    # 0 for no compressed, 1 for first compressed, 2 for second compressed, 3 for both compressed
+    $found_compressed = 0; 
+    if ($holding_1->is_compressed) {
+        $holding_1_last = $holding_1->clone->compressed_to_last;
+        $found_compressed += 1;
+    } else {
+        $holding_1_first = $holding_1;
+        $holding_1_last = $holding_1;
+    }
+    if ($holding_2->is_compressed) {
+        $holding_2_first = $holding_2->clone->compressed_to_first;
+        $found_compressed += 2;
+    } else {
+        $holding_2_first = $holding_2;
+        $holding_2_last = $holding_2;
+    }
+
+    if ($found_compressed) {
+        my $cmp = ($holding_1_last cmp $holding_2_first); # 1 ends before 2 starts
+        if ($cmp == -1) {
+            return -1; # 1 is fully lt
+        } elsif ($cmp == 0) {
+            carp("Overlapping holdings in comparison, lt and gt based on start value only");
+            return -1;
+        } else { # check the opposite, 2 ends before 1 starts
+            # clone is expensive, wait until we need it (here)
+            if (!defined($holding_2_last)) {
+                $holding_2_last = $holding_2->clone->compressed_to_last;
+            }
+            if (!defined($holding_1_first)) {
+                $holding_1_first = $holding_1->clone->compressed_to_first;
+            }
+            $cmp = ($holding_2_last cmp $holding_1_first);
+            if ($cmp == -1) {
+                return 1; # 1 is fully gt
+            } elsif ($cmp == 0) {
+                carp("Overlapping holdings in comparison, lt and gt based on start value only");
+                return 1;
+            } else {
+                $cmp = ($holding_1_first cmp $holding_2_first);
+                if (!$cmp) { # they are not equal
+                    carp("Overlapping holdings in comparison, lt and gt based on start value only");
+                    return $cmp;
+                } elsif ($found_compressed == 1) {
+                    carp("Compressed holding found with start equal to non-compressed holding");
+                    return 1; # compressed (first holding) is 'greater than' non-compressed
+                } elsif ($found_compressed == 2) {
+                    carp("Compressed holding found with start equal to non-compressed holding");
+                    return -1; # compressed (second holding) is 'greater than' non-compressed
+                } else { # both holdings compressed, check for full equality
+                    $cmp = ($holding_1_last cmp $holding_2_last);
+                    if (!$cmp) { # they are not equal
+                        carp("Compressed holdings in comparison have equal starts, lt and gt based on end value only");
+                        return $cmp;
+                    } else {
+                        return 0; # both are compressed, both ends are equal
+                    }
+                }
+            }
+        }
+    }
+
+    # start doing the actual comparison
+    my $result;
+    foreach my $key ('a'..'f') {
+        if (defined($holding_1->field_values($key))) {
+            if (!defined($holding_2->field_values($key))) {
+                return 1; # more details equals 'greater' (?)
+            } else {
+                $result = $holding_1->field_values($key)->[0] <=> $holding_2->field_values($key)->[0];
+            }
+        } elsif (defined($holding_2->field_values($key))) {
+            return -1; # more details equals 'greater' (?)
+        }
+
+        return $result if $result;
+    }
+
+    # got through, return 0 for equal
+    return 0;
+}
+
 1;

Modified: branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/test/testlib.pm
===================================================================
--- branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/test/testlib.pm	2010-05-03 01:03:25 UTC (rev 16372)
+++ branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/test/testlib.pm	2010-05-03 12:50:40 UTC (rev 16373)
@@ -55,7 +55,6 @@
 
         $field = MARC::Field->new(
             $fieldno, $inds[0], $inds[1],
-            a => 'scratch',
             @subfields
         );
 

Modified: branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD.pm
===================================================================
--- branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD.pm	2010-05-03 01:03:25 UTC (rev 16372)
+++ branches/seials-integration/Open-ILS/src/perlmods/OpenILS/Utils/MFHD.pm	2010-05-03 12:50:40 UTC (rev 16373)
@@ -6,6 +6,9 @@
 use DateTime::Format::Strptime;
 use Data::Dumper;
 
+# for inherited methods to work properly, we need to force a
+# MARC::Record version greater than 2.0.0
+use MARC::Record 2.0.1;
 use base 'MARC::Record';
 
 use OpenILS::Utils::MFHD::Caption;
@@ -84,30 +87,148 @@
     return sort keys %{$self->{_mfhd_CAPTIONS}->{$field}};
 }
 
+# optional argument to get back a 'hashref' or an 'array' (default)
 sub captions {
     my $self  = shift;
-    my $field = shift;
+    my $tag = shift;
+    my $return_type = shift;
 
     # TODO: add support for caption types as argument? (base, index, supplement)
-    my @captions;
-    my @sorted_ids = $self->caption_link_ids($field);
+    my @sorted_ids = $self->caption_link_ids($tag);
 
-    foreach my $link_id (@sorted_ids) {
-        push(@captions, $self->{_mfhd_CAPTIONS}{$field}{$link_id});
+    if (defined($return_type) and $return_type eq 'hashref') {
+        my %captions;
+        foreach my $link_id (@sorted_ids) {
+            $captions{$link_id} = $self->{_mfhd_CAPTIONS}{$tag}{$link_id};
+        }
+        return \%captions;
+    } else {
+        my @captions;
+        foreach my $link_id (@sorted_ids) {
+            push(@captions, $self->{_mfhd_CAPTIONS}{$tag}{$link_id});
+        }
+        return @captions;
     }
+}
 
-    return @captions;
+sub append_fields {
+    my $self = shift;
+
+    my $field_count = $self->SUPER::append_fields(@_);
+    if ($field_count) {
+        foreach my $field (@_) {
+            $self->_avoid_link_collision($field);
+            my $field_type = ref $field;
+            if ($field_type eq 'MFHD::Holding') {
+                $self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$field->seqno} = $field;
+            } elsif ($field_type eq 'MFHD::Caption') {
+                $self->{_mfhd_CAPTIONS}{$field->tag}{$field->link_id} = $field;
+            }
+        }
+        return $field_count;
+    } else {
+        return;
+    }   
 }
 
+sub delete_field {
+    my $self = shift;
+    my $field = shift;
+
+    my $field_count = $self->SUPER::delete_field($field);
+    if ($field_count) {
+        my $field_type = ref($field);
+        if ($field_type eq 'MFHD::Holding') {
+            delete($self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$field->seqno});
+        } elsif ($field_type eq 'MFHD::Caption') {
+            delete($self->{_mfhd_CAPTIONS}{$field->tag}{$field->link_id});
+        }
+        return $field_count;
+    } else {
+        return;
+    }
+}
+
+sub insert_fields_before {
+    my $self = shift;
+    my $before = shift;
+
+    my $field_count = $self->SUPER::insert_fields_before($before, @_);
+    if ($field_count) {
+        foreach my $field (@_) {
+            $self->_avoid_link_collision($field);
+            my $field_type = ref $field;
+            if ($field_type eq 'MFHD::Holding') {
+                $self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$field->seqno} = $field;
+            } elsif ($field_type eq 'MFHD::Caption') {
+                $self->{_mfhd_CAPTIONS}{$field->tag}{$field->link_id} = $field;
+            }
+        }
+        return $field_count;
+    } else {
+        return;
+    }
+}
+
+sub insert_fields_after {
+    my $self = shift;
+    my $after = shift;
+
+    my $field_count = $self->SUPER::insert_fields_after($after, @_);
+    if ($field_count) {
+        foreach my $field (@_) {
+            $self->_avoid_link_collision($field);
+            my $field_type = ref $field;
+            if ($field_type eq 'MFHD::Holding') {
+                $self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$field->seqno} = $field;
+            } elsif ($field_type eq 'MFHD::Caption') {
+                $self->{_mfhd_CAPTIONS}{$field->tag}{$field->link_id} = $field;
+            }
+        }
+        return $field_count;
+    } else {
+        return;
+    }
+}
+
+sub _avoid_link_collision {
+    my $self = shift;
+    my $field = shift;
+
+    my $fieldref = ref($field);
+    if ($fieldref eq 'MFHD::Holding') {
+        my $seqno = $field->seqno;
+        my $changed_seqno = 0;
+        if (exists($self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$seqno})) {
+            $changed_seqno = 1;
+            do {
+                $seqno++;
+            } while (exists($self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$seqno}));
+        }
+        $field->seqno($seqno) if $changed_seqno;
+    } elsif ($fieldref eq 'MFHD::Caption') {
+        my $link_id = $field->link_id;
+        my $changed_link_id = 0;
+        if (exists($self->{_mfhd_CAPTIONS}{$field->tag}{$link_id})) {
+            $link_id++;
+            $changed_link_id = 1;
+            do {
+                $link_id++;
+            } while (exists($self->{_mfhd_CAPTIONS}{$field->tag}{$link_id}));
+        }
+        $field->link_id($link_id) if $changed_link_id;
+    }
+}
+
 sub active_captions {
     my $self  = shift;
-    my $field = shift;
+    my $tag = shift;
 
     # TODO: add support for caption types as argument? (base, index, supplement)
     my @captions;
     my @active_captions;
 
-    @captions = $self->captions($field);
+    @captions = $self->captions($tag);
 
     # TODO: for now, we will assume the last 85X field is active
     # and the rest are historical.  The standard is hazy about
@@ -130,74 +251,178 @@
 }
 
 #
-# generate_predictions() is an initial attempt at a function which can be used
-# to populate an issuance table with a list of predicted issues.  It accepts
-# a hash ref of options initially defined as:
-# field : the caption field to predict on (853, 854, or 855)
+# generate_predictions()
+# Accepts a hash ref of options initially defined as:
+# base_holding : reference to the holding field to predict from
 # num_to_predict : the number of issues you wish to predict
-# last_rec_date : the date of the last received issue, to be used as an offset
-#                 for predicting future issues
+# OR
+# end_holding : holding field ref, keep predicting until you meet or exceed it
 #
 # The basic method is to first convert to a single holding if compressed, then
 # increment the holding and save the resulting values to @predictions.
 # 
-# returns @preditions, an array of array refs containing (link id, formatted
-# label, formatted chronology date, formatted estimated arrival date, and an
-# array ref of holding subfields as (key, value, key, value ...)) (not a hash
-# to protect order and possible duplicate keys).
-#
+# returns @predictions, an array of holding field refs (including end_holding
+# if applicable but NOT base_holding)
+# 
 sub generate_predictions {
     my ($self, $options) = @_;
-    my $field          = $options->{field};
+
+    my $base_holding   = $options->{base_holding};
     my $num_to_predict = $options->{num_to_predict};
-    my $last_rec_date =
-      $options->{last_rec_date};   # expected or actual, according to preference
+    my $end_holding    = $options->{end_holding};
+    my $max_to_predict = $options->{max_to_predict} || 10000; # fail-safe
 
-    # TODO: add support for predicting serials with no chronology by passing in
-    # a last_pub_date option?
+    if (!defined($base_holding)) {
+        carp("Base holding not defined in generate_predictions, returning empty set");
+        return ();
+    }
+    if ($base_holding->is_compressed) {
+        carp("Ambiguous compressed base holding in generate_predictions, returning empty set");
+        return ();
+    }
+    my $curr_holding = $base_holding->clone; # prevent side-effects
+    
+    my @predictions;
+        
+    if ($num_to_predict) {
+        for (my $i = 0; $i < $num_to_predict; $i++) {
+            push(@predictions, $curr_holding->increment->clone);
+        }
+    } elsif (defined($end_holding)) {
+        $end_holding = $end_holding->clone; # prevent side-effects
+        my $next_holding = $curr_holding->increment->clone;
+        my $num_predicted = 0;
+        while ($next_holding le $end_holding) {
+            push(@predictions, $next_holding);
+            $num_predicted++;
+            if ($num_predicted >= $max_to_predict) {
+                carp("Maximum prediction count exceeded");
+                last;
+            }
+            $next_holding = $curr_holding->increment->clone;
+        }
+    }
 
-    my $strp = new DateTime::Format::Strptime(pattern => '%F');
+    return @predictions;
+}
 
-    my $receival_date = $strp->parse_datetime($last_rec_date);
+#
+# create an array of compressed holdings from all holdings for a given caption,
+# compressing as needed
+#
+# Optionally you can skip sorting, but the resulting compression will be compromised
+# if the current holdings are out of order
+#
+# TODO: gap marking, gap preservation
+#
+# TODO: some of this could be moved to the Caption object to allow for 
+# decompression in the absense of an overarching MFHD object
+#
+sub get_compressed_holdings {
+    my $self = shift;
+    my $caption = shift;
+    my $opts = shift;
+    my $skip_sort = $opts->{'skip_sort'};
 
-    my @active_captions = $self->active_captions($field);
+    # make sure none are compressed
+    my @decomp_holdings;
+    if ($skip_sort) {
+        @decomp_holdings = $self->get_decompressed_holdings($caption, {'skip_sort' => 1});
+    } else {
+        # sort for best algorithm
+        @decomp_holdings = $self->get_decompressed_holdings($caption, {'dedupe' => 1});
+    }
 
-    my @predictions;
-    foreach my $caption (@active_captions) {
-        my $htag    = $caption->tag;
-        my $link_id = $caption->link_id;
-        $htag =~ s/^85/86/;
-        my @holdings = $self->holdings($htag, $link_id);
-        my $last_holding = $holdings[-1];
-
-        if ($last_holding->is_compressed) {
-            $last_holding->compressed_to_last; # convert to last in range
+    my $runner = $decomp_holdings[0]->clone->increment;   
+    my $curr_holding = shift(@decomp_holdings);
+    $curr_holding = $curr_holding->clone;
+    my $seqno = 1;
+    $curr_holding->seqno($seqno);
+    my @comp_holdings;
+#    my $last_holding;
+    foreach my $holding (@decomp_holdings) {
+        if ($runner eq $holding) {
+            $curr_holding->extend;
+            $runner->increment;
+#        } elsif ($holding eq $last_holding) {
+#            carp("Found duplicate holding in compression set, skipping");
+        } elsif ($runner gt $holding) { # should not happen unless holding is not in series
+            carp("Found unexpected holding, skipping");
+        } else {
+            push(@comp_holdings, $curr_holding);
+            while ($runner le $holding) {
+                $runner->increment;
+            }
+            $curr_holding = $holding->clone;
+            $seqno++;
+            $curr_holding->seqno($seqno);
         }
+#        $last_holding = $holding;
+    }
+    push(@comp_holdings, $curr_holding);
 
-        my $pub_date  = $strp->parse_datetime($last_holding->chron_to_date);
-        my $date_diff = $receival_date - $pub_date;
+    return @comp_holdings;
+}
 
-        $last_holding->notes('public',  []);
-        # add a note marker for system use
-        $last_holding->notes('private', ['AUTOGEN']);
+#
+# create an array of single holdings from all holdings for a given caption,
+# decompressing as needed
+#
+# resulting array is returned as they come in the record, unsorted
+#
+# optional argument will reorder and renumber the holdings before returning
+# 
+# TODO: some of this could be moved to the Caption (and/or Holding) object to
+# allow for decompression in the absense of an overarching MFHD object
+#
+sub get_decompressed_holdings {
+    my $self = shift;
+    my $caption = shift;
+    my $opts = shift;
+    my $skip_sort = $opts->{'skip_sort'};
+    my $dedupe = $opts->{'dedupe'};
 
-        for (my $i = 0; $i < $num_to_predict; $i++) {
-            $last_holding->increment;
-            $pub_date = $strp->parse_datetime($last_holding->chron_to_date);
-            my $arrival_date = $pub_date + $date_diff;
-            push(
-                @predictions,
-                [
-                    $link_id,
-                    $last_holding->format,
-                    $pub_date->strftime('%F'),
-                    $arrival_date->strftime('%F'),
-                    [$last_holding->subfields_list]
-                ]
-            );
+    if ($dedupe and $skip_sort) {
+        carp("Attempted deduplication without sorting, failure likely");
+    }
+
+    my $htag    = $caption->tag;
+    my $link_id = $caption->link_id;
+    $htag =~ s/^85/86/;
+    my @holdings = $self->holdings($htag, $link_id);
+    my @decomp_holdings;
+
+    foreach my $holding (@holdings) {
+        if (!$holding->is_compressed) {
+            push(@decomp_holdings, $holding->clone);
+        } else {
+            my $base_holding = $holding->clone->compressed_to_first;
+            my @new_holdings = $self->generate_predictions(
+                {'base_holding' => $base_holding,
+                 'end_holding' => $holding->clone->compressed_to_last});
+            push(@decomp_holdings, $base_holding, @new_holdings);
         }
     }
-    return @predictions;
+
+    unless ($skip_sort) {
+        my @temp_holdings = sort {$a cmp $b} @decomp_holdings;
+        @decomp_holdings = @temp_holdings;
+    }
+
+    my @return_holdings = (shift(@decomp_holdings));
+    $return_holdings[0]->seqno(1);
+    my $seqno = 2;
+    foreach my $holding (@decomp_holdings) { # renumber sequence
+        if ($holding eq $return_holdings[-1] and $dedupe) {
+            carp("Found duplicate holding in decompression set, discarding");
+            next;
+        }
+        $holding->seqno($seqno);
+        $seqno++;
+        push(@return_holdings, $holding);
+    }
+
+    return @return_holdings;
 }
 
 #



More information about the open-ils-commits mailing list