[open-ils-commits] r986 - in conifer/branches/rel_1_6_1/src/perlmods/OpenILS: . Application Application/Storage Application/Storage/Driver Application/Storage/Driver/Pg (dbs)

svn at svn.open-ils.org svn at svn.open-ils.org
Mon Sep 13 10:28:01 EDT 2010


Author: dbs
Date: 2010-09-13 10:27:59 -0400 (Mon, 13 Sep 2010)
New Revision: 986

Added:
   conifer/branches/rel_1_6_1/src/perlmods/OpenILS/Application/
   conifer/branches/rel_1_6_1/src/perlmods/OpenILS/Application/Ingest.pm
   conifer/branches/rel_1_6_1/src/perlmods/OpenILS/Application/Storage/
   conifer/branches/rel_1_6_1/src/perlmods/OpenILS/Application/Storage/Driver/
   conifer/branches/rel_1_6_1/src/perlmods/OpenILS/Application/Storage/Driver/Pg/
   conifer/branches/rel_1_6_1/src/perlmods/OpenILS/Application/Storage/Driver/Pg/fts.pm
Log:
Add stock versions of O:A:Ingest and O:A:Storage:Driver:Pg:fts

Prepping for custom indexing patches


Added: conifer/branches/rel_1_6_1/src/perlmods/OpenILS/Application/Ingest.pm
===================================================================
--- conifer/branches/rel_1_6_1/src/perlmods/OpenILS/Application/Ingest.pm	                        (rev 0)
+++ conifer/branches/rel_1_6_1/src/perlmods/OpenILS/Application/Ingest.pm	2010-09-13 14:27:59 UTC (rev 986)
@@ -0,0 +1,1453 @@
+package OpenILS::Application::Ingest;
+use OpenILS::Application;
+use base qw/OpenILS::Application/;
+
+use Unicode::Normalize;
+use OpenSRF::EX qw/:try/;
+
+use OpenSRF::AppSession;
+use OpenSRF::Utils::SettingsClient;
+use OpenSRF::Utils::Logger qw/:level/;
+
+use OpenILS::Application::AppUtils;
+use OpenILS::Utils::ScriptRunner;
+use OpenILS::Utils::Fieldmapper;
+use OpenSRF::Utils::JSON;
+
+use OpenILS::Utils::Fieldmapper;
+
+use XML::LibXML;
+use XML::LibXSLT;
+use Time::HiRes qw(time);
+
+our %supported_formats = (
+    mods33    => {ns => 'http://www.loc.gov/mods/v3'},
+    mods32    => {ns => 'http://www.loc.gov/mods/v3'},
+    mods3    => {ns => 'http://www.loc.gov/mods/v3'},
+    mods    => {ns => 'http://www.loc.gov/mods/'},
+    marcxml    => {ns => 'http://www.loc.gov/MARC21/slim'},
+    srw_dc    => {ns => 'info:srw/schema/1/dc-schema'},
+    oai_dc    => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
+    rdf_dc    => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
+    atom    => {ns => 'http://www.w3.org/2005/Atom'},
+    rss091    => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
+    rss092    => {ns => ''},
+    rss093    => {ns => ''},
+    rss094    => {ns => ''},
+    rss10    => {ns => 'http://purl.org/rss/1.0/'},
+    rss11    => {ns => 'http://purl.org/net/rss1.1#'},
+    rss2    => {ns => ''},
+);
+
+
+my $log = 'OpenSRF::Utils::Logger';
+
+my  $parser = XML::LibXML->new();
+my  $xslt = XML::LibXSLT->new();
+
+my  $mods_sheet;
+my  $mads_sheet;
+my  $xpathset = {};
+sub initialize {}
+sub child_init {}
+
+sub post_init {
+
+    unless (keys %$xpathset) {
+        $log->debug("Running post_init", DEBUG);
+
+        my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
+
+        unless ($supported_formats{mods}{xslt}) {
+            $log->debug("Loading MODS XSLT", DEBUG);
+             my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
+            $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
+        }
+
+        unless ($supported_formats{mods3}{xslt}) {
+            $log->debug("Loading MODS v3 XSLT", DEBUG);
+             my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
+            $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
+        }
+
+        unless ($supported_formats{mods32}{xslt}) {
+            $log->debug("Loading MODS v32 XSLT", DEBUG);
+             my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS32.xsl");
+            $supported_formats{mods32}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
+        }
+
+        unless ($supported_formats{mods33}{xslt}) {
+            $log->debug("Loading MODS v33 XSLT", DEBUG);
+             my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS33.xsl");
+            $supported_formats{mods33}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
+        }
+
+        my $req = OpenSRF::AppSession
+                ->create('open-ils.cstore')
+                
+                # XXX testing new metabib field use for faceting
+                #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
+                ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
+
+                ->gather(1);
+
+        if (ref $req and @$req) {
+            for my $f (@$req) {
+                $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
+                $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
+                $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
+                $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
+            }
+        }
+    }
+}
+
+# --------------------------------------------------------------------------------
+# Biblio ingest
+
+package OpenILS::Application::Ingest::Biblio;
+use base qw/OpenILS::Application::Ingest/;
+use Unicode::Normalize;
+
+sub rw_biblio_ingest_single_object {
+    my $self = shift;
+    my $client = shift;
+    my $bib = shift;
+
+    my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
+    return undef unless ($blob);
+
+    $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
+    $bib->quality( $blob->{fingerprint}->{quality} );
+
+    my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
+
+    my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
+    my $tmp;
+
+    # update uri stuff ...
+
+    # gather URI call numbers for this record
+    my $uri_cns = $u->{call_number} = $cstore->request(
+        'open-ils.cstore.direct.asset.call_number.id_list.atomic' => { record => $bib->id, label => '##URI##' }
+    )->gather(1);
+
+    if (@$uri_cns) {
+        # gather the maps for those call numbers
+        my $uri_maps = $u->{call_number} = $cstore->request(
+            'open-ils.cstore.direct.asset.uri_call_number_map.id_list.atomic' => { call_number => $uri_cns }
+        )->gather(1);
+    
+        # delete the old maps
+        $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.delete' => $_ )->gather(1) for (@$uri_maps);
+    
+        # and delete the call numbers if there are no more URIs
+        if (!@{ $blob->{uri} }) {
+            $cstore->request( 'open-ils.cstore.direct.asset.call_number.delete' => $_ )->gather(1) for (@$uri_cns);
+        }
+    }
+
+    # now, add CNs, URIs and maps
+    my %new_cns_by_owner;
+    my %new_uris_by_owner;
+    for my $u ( @{ $blob->{uri} } ) {
+
+        my $owner = $u->{call_number}->owning_lib;
+
+        if ($u->{call_number}->isnew) {
+            if ($new_cns_by_owner{$owner}) {
+                $u->{call_number} = $new_cns_by_owner{$owner};
+            } else {
+                $u->{call_number}->clear_id;
+                $u->{call_number} = $new_cns_by_owner{$owner} = $cstore->request(
+                    'open-ils.cstore.direct.asset.call_number.create' => $u->{call_number}
+                )->gather(1);
+            }
+        }
+
+        if ($u->{uri}->isnew) {
+            if ($new_uris_by_owner{$owner}) {
+                $u->{uri} = $new_uris_by_owner{$owner};
+            } else {
+                $u->{uri} = $new_uris_by_owner{$owner} = $cstore->request(
+                    'open-ils.cstore.direct.asset.uri.create' => $u->{uri}
+                )->gather(1);
+            }
+        }
+
+        # Check for an existing CN-URI map
+        $tmp = $cstore->request(
+            'open-ils.cstore.direct.asset.uri_call_number_map.id_list',
+            { call_number => $u->{call_number}->id, uri => $u->{uri}->id }
+        )->gather(1);
+
+        next if ($tmp);
+
+        my $umap = Fieldmapper::asset::uri_call_number_map->new;
+        $umap->uri($u->{uri}->id);
+        $umap->call_number($u->{call_number}->id);
+
+        $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.create' => $umap )->gather(1);
+    }
+
+    # update full_rec stuff ...
+    $tmp = $cstore->request(
+        'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
+        { record => $bib->id }
+    )->gather(1);
+
+    $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
+    $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
+
+    # update rec_descriptor stuff ...
+    $tmp = $cstore->request(
+        'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
+        { record => $bib->id }
+    )->gather(1);
+
+    $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
+    $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
+
+    # deal with classed fields...
+    for my $class ( qw/title author subject keyword series/ ) {
+        $tmp = $cstore->request(
+            "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
+            { source => $bib->id }
+        )->gather(1);
+
+        $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
+    }
+    for my $obj ( @{ $blob->{field_entries} } ) {
+        my $class = $obj->class_name;
+        $class =~ s/^Fieldmapper:://o;
+        $class =~ s/::/./go;
+        $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
+    }
+
+    # update MR map ...
+
+    $tmp = $cstore->request(
+        'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
+        { source => $bib->id }
+    )->gather(1);
+
+    $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
+
+    # get the old MRs
+    my $old_mrs = $cstore->request(
+        'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
+    )->gather(1) if (@$tmp);
+
+    $old_mrs = [] if (!ref($old_mrs));
+
+    my $mr;
+    for my $m (@$old_mrs) {
+        if ($m->fingerprint eq $bib->fingerprint) {
+            $mr = $m;
+        } else {
+            my $others = $cstore->request(
+                'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
+            )->gather(1);
+
+            if (!@$others) {
+                $cstore->request(
+                    'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
+                )->gather(1);
+            }
+
+            $m->isdeleted(1);
+        }
+    }
+
+    my $holds;
+    if (!$mr) {
+        # Get the matchin MR, if any.
+        $mr = $cstore->request(
+            'open-ils.cstore.direct.metabib.metarecord.search',
+            { fingerprint => $bib->fingerprint }
+        )->gather(1);
+
+        $holds = $cstore->request(
+            'open-ils.cstore.direct.action.hold_request.search.atomic',
+            { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
+        )->gather(1) if (@$old_mrs);
+
+        if ($mr) {
+            for my $h (@$holds) {
+                $h->target($mr);
+                $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
+                $h->ischanged(1);
+            }
+        }
+    }
+
+    if (!$mr) {
+        $mr = new Fieldmapper::metabib::metarecord;
+        $mr->fingerprint( $bib->fingerprint );
+        $mr->master_record( $bib->id );
+        $mr->id(
+            $cstore->request(
+                "open-ils.cstore.direct.metabib.metarecord.create",
+                $mr => { quiet => 'true' }
+            )->gather(1)
+        );
+
+        for my $h (grep { !$_->ischanged } @$holds) {
+            $h->target($mr);
+            $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
+        }
+    } else {
+        my $mrm = $cstore->request(
+            'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
+            { metarecord => $mr->id }
+        )->gather(1);
+
+        if (@$mrm) {
+            my $best = $cstore->request(
+                "open-ils.cstore.direct.biblio.record_entry.search",
+                { id => [ map { $_->source } @$mrm ] },
+                { 'select'    => { bre => [ qw/id quality/ ] },
+                  order_by    => { bre => "quality desc" },
+                  limit        => 1,
+                }
+            )->gather(1);
+
+            if ($best->quality > $bib->quality) {
+                $mr->master_record($best->id);
+            } else {
+                $mr->master_record($bib->id);
+            }
+        } else {
+            $mr->master_record($bib->id);
+        }
+
+        $mr->clear_mods;
+
+        $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
+    }
+
+    my $mrm = new Fieldmapper::metabib::metarecord_source_map;
+    $mrm->source($bib->id);
+    $mrm->metarecord($mr->id);
+
+    $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
+    $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
+
+    $cstore->request( 'open-ils.cstore.json_query.atomic' => { from => [ 'reporter.simple_rec_update', $bib->id ] } )->gather(1);
+
+    $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
+    $cstore->disconnect;
+
+    return $bib->id;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.biblio.object",
+    method        => "rw_biblio_ingest_single_object",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+sub rw_biblio_ingest_single_record {
+    my $self = shift;
+    my $client = shift;
+    my $rec = shift;
+
+    OpenILS::Application::Ingest->post_init();
+    my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
+    $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
+
+    my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
+
+    $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
+    $cstore->disconnect;
+
+    return undef unless ($r and @$r);
+
+    return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.biblio.record",
+    method        => "rw_biblio_ingest_single_record",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+sub rw_biblio_ingest_record_list {
+    my $self = shift;
+    my $client = shift;
+    my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
+
+    OpenILS::Application::Ingest->post_init();
+    my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
+    $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
+
+    my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => \@rec } )->gather(1);
+
+    $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
+    $cstore->disconnect;
+
+    return undef unless ($r and @$r);
+
+    my $count = 0;
+    for (@$r) {
+        if (($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0]) {
+            $count++
+        }
+    }
+    return $count;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.biblio.record_list",
+    method        => "rw_biblio_ingest_record_list",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+sub ro_biblio_ingest_single_object {
+    my $self = shift;
+    my $client = shift;
+    my $bib = shift;
+    my $xml = OpenILS::Application::AppUtils->entityize($bib->marc);
+    my $max_cn = shift;
+    my $max_uri = shift;
+
+    my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
+
+    if (!$max_cn) {
+        my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1);
+        $max_cn = int($cn->id) + 1000;
+    }
+
+    if (!$max_uri) {
+        my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1);
+        $max_uri = int($cn->id) + 1000;
+    }
+
+    $cstore->disconnect;
+
+    my $document = $parser->parse_string($xml);
+
+    my @uris = $self->method_lookup("open-ils.ingest.856_uri.object")->run($bib, $max_cn, $max_uri);
+    my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
+    my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
+    my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
+    my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
+
+    $_->source($bib->id) for (@mXfe);
+    $_->record($bib->id) for (@mfr);
+    $rd->record($bib->id) if ($rd);
+
+    return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd, uri => \@uris };
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.biblio.object.readonly",
+    method        => "ro_biblio_ingest_single_object",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+sub ro_biblio_ingest_single_xml {
+    my $self = shift;
+    my $client = shift;
+    my $xml = OpenILS::Application::AppUtils->entityize(shift);
+
+    my $document = $parser->parse_string($xml);
+
+    my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
+    my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
+    my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
+    my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
+
+    return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.biblio.xml.readonly",
+    method        => "ro_biblio_ingest_single_xml",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+sub ro_biblio_ingest_single_record {
+    my $self = shift;
+    my $client = shift;
+    my $rec = shift;
+
+    OpenILS::Application::Ingest->post_init();
+    my $r = OpenSRF::AppSession
+            ->create('open-ils.cstore')
+            ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
+            ->gather(1);
+
+    return undef unless ($r and @$r);
+
+    my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
+
+    $_->source($rec) for (@{$res->{field_entries}});
+    $_->record($rec) for (@{$res->{full_rec}});
+    $res->{descriptor}->record($rec);
+
+    return $res;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.biblio.record.readonly",
+    method        => "ro_biblio_ingest_single_record",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+sub ro_biblio_ingest_stream_record {
+    my $self = shift;
+    my $client = shift;
+
+    OpenILS::Application::Ingest->post_init();
+
+    my $ses = OpenSRF::AppSession->create('open-ils.cstore');
+
+    while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
+    
+        my $rec = $resp->content;
+        last unless (defined $rec);
+
+        $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
+        my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
+
+        $_->source($rec) for (@{$res->{field_entries}});
+        $_->record($rec) for (@{$res->{full_rec}});
+
+        $client->respond( $res );
+    }
+
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.biblio.record_stream.readonly",
+    method        => "ro_biblio_ingest_stream_record",
+    api_level    => 1,
+    stream        => 1,
+);                      
+
+sub ro_biblio_ingest_stream_xml {
+    my $self = shift;
+    my $client = shift;
+
+    OpenILS::Application::Ingest->post_init();
+
+    my $ses = OpenSRF::AppSession->create('open-ils.cstore');
+
+    while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
+    
+        my $xml = $resp->content;
+        last unless (defined $xml);
+
+        $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
+        my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
+
+        $client->respond( $res );
+    }
+
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.biblio.xml_stream.readonly",
+    method        => "ro_biblio_ingest_stream_xml",
+    api_level    => 1,
+    stream        => 1,
+);                      
+
+sub rw_biblio_ingest_stream_import {
+    my $self = shift;
+    my $client = shift;
+
+    OpenILS::Application::Ingest->post_init();
+
+    my $ses = OpenSRF::AppSession->create('open-ils.cstore');
+
+    while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
+    
+        my $bib = $resp->content;
+        last unless (defined $bib);
+
+        $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
+        my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
+
+        $_->source($bib->id) for (@{$res->{field_entries}});
+        $_->record($bib->id) for (@{$res->{full_rec}});
+
+        $client->respond( $res );
+    }
+
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.biblio.bib_stream.import",
+    method        => "rw_biblio_ingest_stream_import",
+    api_level    => 1,
+    stream        => 1,
+);                      
+
+
+# --------------------------------------------------------------------------------
+# Authority ingest
+
+package OpenILS::Application::Ingest::Authority;
+use base qw/OpenILS::Application::Ingest/;
+use Unicode::Normalize;
+
+sub rw_authority_ingest_single_object {
+    my $self = shift;
+    my $client = shift;
+    my $auth = shift;
+
+    my ($blob) = $self->method_lookup("open-ils.ingest.full.authority.object.readonly")->run($auth);
+    return undef unless ($blob);
+
+    my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
+
+    my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
+    my $tmp;
+
+    # update full_rec stuff ...
+    $tmp = $cstore->request(
+        'open-ils.cstore.direct.authority.full_rec.id_list.atomic',
+        { record => $auth->id }
+    )->gather(1);
+
+    $cstore->request( 'open-ils.cstore.direct.authority.full_rec.delete' => $_ )->gather(1) for (@$tmp);
+    $cstore->request( 'open-ils.cstore.direct.authority.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
+
+    # XXX when we start extracting authority descriptors and adding sources ...
+    #
+    # update rec_descriptor stuff ...
+    #$tmp = $cstore->request(
+    #    'open-ils.cstore.direct.authority.record_descriptor.id_list.atomic',
+    #    { record => $auth->id }
+    #)->gather(1);
+    #
+    #$cstore->request( 'open-ils.cstore.direct.authority.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
+    #$cstore->request( 'open-ils.cstore.direct.authority.record_descriptor.create' => $blob->{descriptor} )->gather(1);
+    #$cstore->request( 'open-ils.cstore.direct.authority.record_entry.update' => $auth )->gather(1);
+
+    $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
+    $cstore->disconnect;
+
+    return $auth->id;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.authority.object",
+    method        => "rw_authority_ingest_single_object",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+sub rw_authority_ingest_single_record {
+    my $self = shift;
+    my $client = shift;
+    my $rec = shift;
+
+    OpenILS::Application::Ingest->post_init();
+    my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
+    $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
+
+    my $r = $cstore->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )->gather(1);
+
+    $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
+    $cstore->disconnect;
+
+    return undef unless ($r and @$r);
+
+    return ($self->method_lookup("open-ils.ingest.full.authority.object")->run($r))[0];
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.authority.record",
+    method        => "rw_authority_ingest_single_record",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+sub ro_authority_ingest_single_object {
+    my $self = shift;
+    my $client = shift;
+    my $bib = shift;
+    my $xml = OpenILS::Application::AppUtils->entityize($bib->marc);
+
+    my $document = $parser->parse_string($xml);
+
+    my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
+
+    $_->record($bib->id) for (@mfr);
+
+    return { full_rec => \@mfr };
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.authority.object.readonly",
+    method        => "ro_authority_ingest_single_object",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+sub ro_authority_ingest_single_xml {
+    my $self = shift;
+    my $client = shift;
+    my $xml = OpenILS::Application::AppUtils->entityize(shift);
+
+    my $document = $parser->parse_string($xml);
+
+    my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
+
+    return { full_rec => \@mfr };
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.authority.xml.readonly",
+    method        => "ro_authority_ingest_single_xml",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+sub ro_authority_ingest_single_record {
+    my $self = shift;
+    my $client = shift;
+    my $rec = shift;
+
+    OpenILS::Application::Ingest->post_init();
+    my $r = OpenSRF::AppSession
+            ->create('open-ils.cstore')
+            ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
+            ->gather(1);
+
+    return undef unless ($r and @$r);
+
+    my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
+
+    $_->record($rec) for (@{$res->{full_rec}});
+    $res->{descriptor}->record($rec);
+
+    return $res;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.authority.record.readonly",
+    method        => "ro_authority_ingest_single_record",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+sub ro_authority_ingest_stream_record {
+    my $self = shift;
+    my $client = shift;
+
+    OpenILS::Application::Ingest->post_init();
+
+    my $ses = OpenSRF::AppSession->create('open-ils.cstore');
+
+    while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
+    
+        my $rec = $resp->content;
+        last unless (defined $rec);
+
+        $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
+        my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
+
+        $_->record($rec) for (@{$res->{full_rec}});
+
+        $client->respond( $res );
+    }
+
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.authority.record_stream.readonly",
+    method        => "ro_authority_ingest_stream_record",
+    api_level    => 1,
+    stream        => 1,
+);                      
+
+sub ro_authority_ingest_stream_xml {
+    my $self = shift;
+    my $client = shift;
+
+    OpenILS::Application::Ingest->post_init();
+
+    my $ses = OpenSRF::AppSession->create('open-ils.cstore');
+
+    while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
+    
+        my $xml = $resp->content;
+        last unless (defined $xml);
+
+        $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
+        my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
+
+        $client->respond( $res );
+    }
+
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.authority.xml_stream.readonly",
+    method        => "ro_authority_ingest_stream_xml",
+    api_level    => 1,
+    stream        => 1,
+);                      
+
+sub rw_authority_ingest_stream_import {
+    my $self = shift;
+    my $client = shift;
+
+    OpenILS::Application::Ingest->post_init();
+
+    my $ses = OpenSRF::AppSession->create('open-ils.cstore');
+
+    while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
+    
+        my $bib = $resp->content;
+        last unless (defined $bib);
+
+        $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
+        my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
+
+        $_->record($bib->id) for (@{$res->{full_rec}});
+
+        $client->respond( $res );
+    }
+
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.full.authority.bib_stream.import",
+    method        => "rw_authority_ingest_stream_import",
+    api_level    => 1,
+    stream        => 1,
+);                      
+
+# --------------------------------------------------------------------------------
+# MARC index extraction
+
+package OpenILS::Application::Ingest::XPATH;
+use base qw/OpenILS::Application::Ingest/;
+use Unicode::Normalize;
+
+# give this an XML documentElement and an XPATH expression
+sub xpath_to_string {
+    my $xml = shift;
+    my $xpath = shift;
+    my $ns_uri = shift;
+    my $ns_prefix = shift;
+    my $unique = shift;
+
+    $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
+
+    my $string = "";
+
+    # grab the set of matching nodes
+    my @nodes = $xml->findnodes( $xpath );
+    for my $value (@nodes) {
+
+        # grab all children of the node
+        my @children = $value->childNodes();
+        for my $child (@children) {
+
+            # add the childs content to the growing buffer
+            my $content = quotemeta($child->textContent);
+            next if ($unique && $string =~ /$content/);  # uniquify the values
+            $string .= $child->textContent . " ";
+        }
+        if( ! @children ) {
+            $string .= $value->textContent . " ";
+        }
+    }
+
+    $string =~ s/(\w+)\/(\w+)/$1 $2/sgo;
+    # Split date ranges and ISSNs on the hyphen
+    $string =~ s/(\d{4})-(\d{3,4}x?)/ $1 $2 /goi;
+
+    return NFD($string);
+}
+
+sub class_index_string_xml {
+    my $self = shift;
+    my $client = shift;
+    my $xml = shift;
+    my @classes = @_;
+
+    OpenILS::Application::Ingest->post_init();
+    $xml = $parser->parse_string(OpenILS::Application::AppUtils->entityize($xml)) unless (ref $xml);
+
+    my %transform_cache;
+    
+    for my $class (@classes) {
+        my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
+        for my $type ( keys %{ $xpathset->{$class} } ) {
+
+            my $def = $xpathset->{$class}->{$type};
+            my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
+
+            my $document = $xml;
+
+            if ($sf->{xslt}) {
+                $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
+                $transform_cache{$def->{format}} = $document;
+            }
+
+            my $value =  xpath_to_string(
+                    $document->documentElement    => $def->{xpath},
+                    $sf->{ns}            => $def->{format},
+                    1
+            );
+
+            next unless $value;
+
+            $value = NFD($value);
+            $value =~ s/\pM+//sgo;
+            $value =~ s/\pC+//sgo;
+            $value =~ s/\W+$//sgo;
+
+            # hack to normalize ratio-like strings
+            while ($term =~ /\b\d{1}:[, ]?\d+(?:[ ,]\d+[^:])+/o) {
+                $term = $` . join ('', split(/[, ]/, $&)) . $';
+            }
+
+            $value =~ s/\b\.+\b//sgo;
+            $value = lc($value);
+
+            my $fm = $class_constructor->new;
+            $fm->value( $value );
+            $fm->field( $xpathset->{$class}->{$type}->{id} );
+            $client->respond($fm);
+        }
+    }
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.field_entry.class.xml",
+    method        => "class_index_string_xml",
+    api_level    => 1,
+    argc        => 2,
+    stream        => 1,
+);                      
+
+sub class_index_string_record {
+    my $self = shift;
+    my $client = shift;
+    my $rec = shift;
+    my @classes = shift;
+
+    OpenILS::Application::Ingest->post_init();
+    my $r = OpenSRF::AppSession
+            ->create('open-ils.cstore')
+            ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
+            ->gather(1);
+
+    return undef unless ($r and @$r);
+
+    for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
+        $fm->source($rec);
+        $client->respond($fm);
+    }
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.field_entry.class.record",
+    method        => "class_index_string_record",
+    api_level    => 1,
+    argc        => 2,
+    stream        => 1,
+);                      
+
+sub all_index_string_xml {
+    my $self = shift;
+    my $client = shift;
+    my $xml = shift;
+
+    for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
+        $client->respond($fm);
+    }
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.extract.field_entry.all.xml",
+    method        => "all_index_string_xml",
+    api_level    => 1,
+    argc        => 1,
+    stream        => 1,
+);                      
+
+sub all_index_string_record {
+    my $self = shift;
+    my $client = shift;
+    my $rec = shift;
+
+    OpenILS::Application::Ingest->post_init();
+    my $r = OpenSRF::AppSession
+            ->create('open-ils.cstore')
+            ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
+            ->gather(1);
+
+    return undef unless ($r and @$r);
+
+    for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
+        $fm->source($rec);
+        $client->respond($fm);
+    }
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.extract.field_entry.all.record",
+    method        => "all_index_string_record",
+    api_level    => 1,
+    argc        => 1,
+    stream        => 1,
+);                      
+
+# --------------------------------------------------------------------------------
+# Flat MARC
+
+package OpenILS::Application::Ingest::FlatMARC;
+use base qw/OpenILS::Application::Ingest/;
+use Unicode::Normalize;
+
+
+sub _marcxml_to_full_rows {
+
+    my $marcxml = shift;
+    my $xmltype = shift || 'metabib';
+
+    my $type = "Fieldmapper::${xmltype}::full_rec";
+
+    my @ns_list;
+    
+    my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
+
+    for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
+        next unless $tagline;
+        _special_tag_to_full_rows($type, $tagline, \@ns_list, 'LDR');
+    }
+
+    for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
+        next unless $tagline;
+        _special_tag_to_full_rows($type, $tagline, \@ns_list, $tagline->getAttribute( "tag" ));
+    }
+
+    for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
+        next unless $tagline;
+        _data_tag_to_full_rows($type, $tagline, \@ns_list, $tagline->getAttribute( "tag" ));
+
+        if ($xmltype eq 'metabib' and $tag eq '245') {
+            _data_tag_to_full_rows($type, $tagline, \@ns_list, 'tnf');
+        }
+    }
+
+    $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
+    return @ns_list;
+}
+
+=head2 _special_tag_to_full_rows
+
+Converts a leader or control field to a set of normalized values
+
+=cut
+
+sub _special_tag_to_full_rows {
+    my $type = shift;
+    my $tagline = shift;
+    my $ns_list = shift;
+    my $tagname = shift;
+
+    my $ns = $type->new;
+
+    $ns->tag( $tagname );
+    my $val = $tagline->textContent;
+    $val = NFD($val);
+    $val =~ s/\pM+//sgo;
+    $val =~ s/\pC+//sgo;
+    $val =~ s/\W+$//sgo;
+    $ns->value( $val );
+
+    push @$ns_list, $ns;
+}
+
+=head2 _data_tag_to_full_rows
+
+Converts a data field to a set of normalized values
+
+=cut
+
+sub _data_tag_to_full_rows {
+    my $type = shift;
+    my $tagline = shift;
+    my $ns_list = shift;
+    my $tag = shift;
+
+    my $ind1 = $tagline->getAttribute( "ind1" );
+    my $ind2 = $tagline->getAttribute( "ind2" );
+
+    for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
+        next unless $data;
+
+        my $ns = $type->new;
+
+        $ns->tag( $tag );
+        $ns->ind1( $ind1 );
+        $ns->ind2( $ind2 );
+        $ns->subfield( $data->getAttribute( "code" ) );
+        my $val = $data->textContent;
+        $val = NFD($val);
+        $val =~ s/\pM+//sgo;
+        $val =~ s/\pC+//sgo;
+        $val =~ s/\W+$//sgo;
+        # Split date ranges and ISSNs on the hyphen
+        $val =~ s/(\d{4})-(\d{3,4}x?)/ $1 $2 /goi;
+        $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
+        $ns->value( lc($val) );
+
+        push @$ns_list, $ns;
+    }
+}
+
+sub flat_marc_xml {
+    my $self = shift;
+    my $client = shift;
+    my $xml = shift;
+
+    $log->debug("processing [$xml]");
+
+    $xml = $parser->parse_string(OpenILS::Application::AppUtils->entityize($xml)) unless (ref $xml);
+
+    my $type = 'metabib';
+    $type = 'authority' if ($self->api_name =~ /authority/o);
+
+    OpenILS::Application::Ingest->post_init();
+
+    $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.flat_marc.authority.xml",
+    method        => "flat_marc_xml",
+    api_level    => 1,
+    argc        => 1,
+    stream        => 1,
+);                      
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.flat_marc.biblio.xml",
+    method        => "flat_marc_xml",
+    api_level    => 1,
+    argc        => 1,
+    stream        => 1,
+);                      
+
+sub flat_marc_record {
+    my $self = shift;
+    my $client = shift;
+    my $rec = shift;
+
+    my $type = 'biblio';
+    $type = 'authority' if ($self->api_name =~ /authority/o);
+
+    OpenILS::Application::Ingest->post_init();
+    my $r = OpenSRF::AppSession
+            ->create('open-ils.cstore')
+            ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
+            ->gather(1);
+
+
+    return undef unless ($r and $r->marc);
+
+    my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
+    for my $row (@rows) {
+        $client->respond($row);
+        $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
+    }
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.flat_marc.biblio.record_entry",
+    method        => "flat_marc_record",
+    api_level    => 1,
+    argc        => 1,
+    stream        => 1,
+);                      
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.flat_marc.authority.record_entry",
+    method        => "flat_marc_record",
+    api_level    => 1,
+    argc        => 1,
+    stream        => 1,
+);                      
+
+# --------------------------------------------------------------------------------
+# URI extraction
+
+package OpenILS::Application::Ingest::Biblio::URI;
+use base qw/OpenILS::Application::Ingest/;
+use Unicode::Normalize;
+use OpenSRF::EX qw/:try/;
+
+
+sub _extract_856_uris {
+
+    my $rec   = shift;
+    my $max_cn = shift;
+    my $max_uri = shift;
+    my @objects;
+    
+    my $recid = $rec->id;
+    my $marcxml = $rec->marc;
+
+    my $document = $parser->parse_string($marcxml);
+    my @nodes = $document->findnodes('//*[local-name()="datafield" and @tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]');
+
+    my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
+
+    my %cn_cache;
+
+    for my $node (@nodes) {
+        # first, is there a URI?
+        my $href = $node->findvalue('*[local-name()="subfield" and @code="u"]/text()');
+        next unless ($href);
+
+        # now, find the best possible label
+        my $label = $node->findvalue('*[local-name()="subfield" and @code="y"]/text()');
+        $label ||= $node->findvalue('*[local-name()="subfield" and @code="3"]/text()');
+        $label ||= $href;
+
+        # look for use info
+        my $use = $node->findvalue('*[local-name()="subfield" and @code="z"]/text()');
+        $use ||= $node->findvalue('*[local-name()="subfield" and @code="2"]/text()');
+        $use ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()');
+
+        # moving on to the URI owner
+        my $owner = $node->findvalue('*[local-name()="subfield" and @code="9"]/text()'); # Evergreen special sauce
+        $owner ||= $node->findvalue('*[local-name()="subfield" and @code="w"]/text()');
+        $owner ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()');
+
+        $owner =~ s/^.*?\((\w+)\).*$/$1/o; # unwrap first paren-enclosed string and then ...
+
+        # no owner? skip it :(
+        next unless ($owner);
+
+        my $org = $cstore
+            ->request( 'open-ils.cstore.direct.actor.org_unit.search' => { shortname => $owner} )
+            ->gather(1);
+
+        next unless ($org);
+
+        # now we can construct the uri object
+        my $uri = $cstore
+            ->request( 'open-ils.cstore.direct.asset.uri.search' => { label => $label, href => $href, use_restriction => $use, active => 't' } )
+            ->gather(1);
+
+        if (!$uri) {
+            $uri = Fieldmapper::asset::uri->new;
+            $uri->isnew( 1 );
+            $uri->id( $$max_uri++ );
+            $uri->label($label);
+            $uri->href($href);
+            $uri->active('t');
+            $uri->use_restriction($use);
+        }
+
+        # see if we need to create a call number
+        my $cn = $cn_cache{$org->id};
+        $cn = $cn->clone if ($cn);
+        $cn->clear_isnew if ($cn);
+
+        $cn ||= $cstore
+            ->request( 'open-ils.cstore.direct.asset.call_number.search' => { owning_lib => $org->id, record => $recid, label => '##URI##' } )
+            ->gather(1);
+
+        if (!$cn) {
+            $cn = Fieldmapper::asset::call_number->new;
+            $cn->isnew( 1 );
+            $cn->deleted('f');
+            $cn->id( $$max_cn++ );
+            $cn->owning_lib( $org->id );
+            $cn->record( $recid );
+            $cn->create_date( 'now' );
+            $cn->creator( $rec->creator );
+            $cn->editor( $rec->editor );
+            $cn->edit_date( 'now' );
+            $cn->label( '##URI##' );
+        }
+
+        $cn_cache{$org->id} = $cn;
+
+        push @objects, { uri => $uri, call_number => $cn };
+    }
+
+    $log->debug("Returning ".scalar(@objects)." URI nodes for record $recid");
+    $cstore->disconnect;
+    return @objects;
+}
+
+sub get_uris_record {
+    my $self = shift;
+    my $client = shift;
+    my $rec = shift;
+
+    OpenILS::Application::Ingest->post_init();
+    my $r = OpenSRF::AppSession
+            ->create('open-ils.cstore')
+            ->request( "open-ils.cstore.direct.biblio.record_entry.retrieve" => $rec )
+            ->gather(1);
+
+    return undef unless ($r and $r->marc);
+
+    $client->respond($_) for (_extract_856_uris($r));
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.856_uri.record",
+    method        => "get_uris_record",
+    api_level    => 1,
+    argc        => 1,
+    stream        => 1,
+);                      
+
+sub get_uris_object {
+    my $self = shift;
+    my $client = shift;
+    my $obj = shift;
+    my $max_cn = shift;
+    my $max_uri = shift;
+
+    return undef unless ($obj and $obj->marc);
+
+    $client->respond($_) for (_extract_856_uris($obj, \$max_cn, \$max_uri));
+    return undef;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.856_uri.object",
+    method        => "get_uris_object",
+    api_level    => 1,
+    argc        => 1,
+    stream        => 1,
+);                      
+
+
+# --------------------------------------------------------------------------------
+# Fingerprinting
+
+package OpenILS::Application::Ingest::Biblio::Fingerprint;
+use base qw/OpenILS::Application::Ingest/;
+use Unicode::Normalize;
+use OpenSRF::EX qw/:try/;
+
+sub biblio_fingerprint_record {
+    my $self = shift;
+    my $client = shift;
+    my $rec = shift;
+
+    OpenILS::Application::Ingest->post_init();
+
+    my $r = OpenSRF::AppSession
+            ->create('open-ils.cstore')
+            ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
+            ->gather(1);
+
+    return undef unless ($r and $r->marc);
+
+    my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
+    $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
+    $fp->{quality} = int($fp->{quality});
+    return $fp;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.fingerprint.record",
+    method        => "biblio_fingerprint_record",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+our $fp_script;
+sub biblio_fingerprint {
+    my $self = shift;
+    my $client = shift;
+    my $xml = OpenILS::Application::AppUtils->entityize(shift);
+
+    $log->internal("Got MARC [$xml]");
+
+    if(!$fp_script) {
+        my @pfx = ( "apps", "open-ils.ingest","app_settings" );
+        my $conf = OpenSRF::Utils::SettingsClient->new;
+
+        my $libs        = $conf->config_value(@pfx, 'script_path');
+        my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
+        my $script_libs = (ref($libs)) ? $libs : [$libs];
+
+        $log->debug("Loading script $script_file for biblio fingerprinting...");
+        
+        $fp_script = new OpenILS::Utils::ScriptRunner
+            ( file        => $script_file,
+              paths        => $script_libs,
+              reset_count    => 100 );
+    }
+
+    $fp_script->insert('environment' => {marc => $xml} => 1);
+
+    my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
+    $log->debug("Script for biblio fingerprinting completed successfully...");
+
+    return $res;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.fingerprint.xml",
+    method        => "biblio_fingerprint",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+our $rd_script;
+sub biblio_descriptor {
+    my $self = shift;
+    my $client = shift;
+    my $xml = OpenILS::Application::AppUtils->entityize(shift);
+
+    $log->internal("Got MARC [$xml]");
+
+    if(!$rd_script) {
+        my @pfx = ( "apps", "open-ils.ingest","app_settings" );
+        my $conf = OpenSRF::Utils::SettingsClient->new;
+
+        my $libs        = $conf->config_value(@pfx, 'script_path');
+        my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
+        my $script_libs = (ref($libs)) ? $libs : [$libs];
+
+        $log->debug("Loading script $script_file for biblio descriptor extraction...");
+        
+        $rd_script = new OpenILS::Utils::ScriptRunner
+            ( file        => $script_file,
+              paths        => $script_libs,
+              reset_count    => 100 );
+    }
+
+    $log->debug("Setting up environment for descriptor extraction script...");
+    $rd_script->insert('environment.marc' => $xml => 1);
+    $log->debug("Environment building complete...");
+
+    my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
+    $log->debug("Script for biblio descriptor extraction completed successfully");
+
+    my $d1 = $res->date1;
+    if ($d1 && $d1 ne '    ') {
+        $d1 =~ tr/ux/00/;
+        $res->date1( $d1 );
+    }
+
+    my $d2 = $res->date2;
+    if ($d2 && $d2 ne '    ') {
+        $d2 =~ tr/ux/99/;
+        $res->date2( $d2 );
+    }
+
+    return $res;
+}
+__PACKAGE__->register_method(  
+    api_name    => "open-ils.ingest.descriptor.xml",
+    method        => "biblio_descriptor",
+    api_level    => 1,
+    argc        => 1,
+);                      
+
+
+1;
+
+# vim:et:ts=4:sw=4:

Added: conifer/branches/rel_1_6_1/src/perlmods/OpenILS/Application/Storage/Driver/Pg/fts.pm
===================================================================
--- conifer/branches/rel_1_6_1/src/perlmods/OpenILS/Application/Storage/Driver/Pg/fts.pm	                        (rev 0)
+++ conifer/branches/rel_1_6_1/src/perlmods/OpenILS/Application/Storage/Driver/Pg/fts.pm	2010-09-13 14:27:59 UTC (rev 986)
@@ -0,0 +1,89 @@
+{ # Every driver needs to provide a 'compile()' method to OpenILS::Application::Storage::FTS.
+  # If that driver wants to support FTI, that is...
+	#-------------------------------------------------------------------------------
+	package OpenILS::Application::Storage::FTS;
+	use OpenSRF::Utils::Logger qw/:level/;
+	use Unicode::Normalize;
+	my $log = 'OpenSRF::Utils::Logger';
+
+	sub compile {
+		my $self = shift;
+		my $class = shift;
+		my $term = NFD(shift());
+
+		$log->debug("Raw term: $term",DEBUG);
+		$log->debug("Search class: $class",DEBUG);
+
+		$term =~ s/\&//go;
+		$term =~ s/\|//go;
+
+		$self = ref($self) || $self;
+		$self = bless {} => $self;
+		$self->{class} = $class;
+
+		$term =~ s/(\pM+)//gos;
+		$term =~ s/(\b\.\b)//gos;
+
+		# hack to normalize ratio-like strings
+		while ($term =~ /\b\d{1}:[, ]?\d+(?:[ ,]\d+[^:])+/o) {
+			$term = $` . join ('', split(/[, ]/, $&)) . $';
+		}
+
+		$self->decompose($term);
+
+		my $newterm = '';
+		$newterm = join('&', $self->words) if ($self->words);
+
+		if (@{$self->nots}) {
+			$newterm = '('.$newterm.')&' if ($newterm);
+			$newterm .= '!('. join('|', $self->nots) . ')';
+		}
+
+		$log->debug("Compiled term is [$newterm]", DEBUG);
+		$newterm = OpenILS::Application::Storage::Driver::Pg->quote($newterm);
+		$log->debug("Quoted term is [$newterm]", DEBUG);
+
+		$self->{fts_query} = ["to_tsquery('$$self{class}',$newterm)"];
+		$self->{fts_query_nots} = [];
+		$self->{fts_op} = '@@';
+		$self->{text_col} = shift;
+		$self->{fts_col} = shift;
+
+		return $self;
+	}
+
+	sub sql_where_clause {
+		my $self = shift;
+		my $column = $self->fts_col;
+		my @output;
+	
+		my @ranks;
+		for my $fts ( $self->fts_query ) {
+			push @output, join(' ', $self->fts_col, $self->{fts_op}, $fts);
+			push @ranks, "rank($column, $fts)";
+		}
+		$self->{fts_rank} = \@ranks;
+	
+		my $phrase_match = $self->sql_exact_phrase_match();
+		return join(' AND ', @output) . $phrase_match;
+	}
+
+	sub sql_exact_phrase_match {
+		my $self = shift;
+		my $column = $self->text_col;
+		my $output = '';
+		for my $phrase ( $self->phrases ) {
+			$phrase =~ s/\*/\\*/go;
+			$phrase =~ s/\./\\./go;
+			$phrase =~ s/'/\\'/go;
+			$phrase =~ s/\s+/\\s+/go;
+			$log->debug("Adding phrase [$phrase] to the match list", DEBUG);
+			$output .= " AND $column ~* \$\$(^|\\W+)$phrase(\\W+|\$)\$\$";
+		}
+		$log->debug("Phrase list is [$output]", DEBUG);
+		return $output;
+	}
+
+}
+
+1;



More information about the open-ils-commits mailing list