[open-ils-commits] r258 - in servres/trunk/conifer: custom libsystems/evergreen libsystems/z3950 syrup templates/phys (gfawcett)
svn at svn.open-ils.org
svn at svn.open-ils.org
Fri Apr 3 22:42:26 EDT 2009
Author: gfawcett
Date: 2009-04-03 22:42:24 -0400 (Fri, 03 Apr 2009)
New Revision: 258
Added:
servres/trunk/conifer/libsystems/z3950/marcxml.py
servres/trunk/conifer/syrup/fuzzy_match.py
servres/trunk/conifer/templates/phys/mark_arrived.xhtml
servres/trunk/conifer/templates/phys/mark_arrived_choose.xhtml
Modified:
servres/trunk/conifer/custom/lib_integration.py
servres/trunk/conifer/libsystems/evergreen/item_status.py
servres/trunk/conifer/libsystems/z3950/marctools.py
servres/trunk/conifer/libsystems/z3950/yaz_search.py
servres/trunk/conifer/syrup/models.py
servres/trunk/conifer/syrup/urls.py
servres/trunk/conifer/syrup/views.py
Log:
working on Mark Physical Items As Arrived: fuzzy match is working.
The fuzzy-match is in place; not tuned yet, but it's there. It's a
relevance-engine based on Levenshtein-distance comparison of the
title, author (and to a lesser degree the publisher and
pubdate). Ideas for improvements are most welcome.
Note that this version of the code takes a full snapshot of the MARC
record when a Physical Item is requested from the catalogue. So there
are more opportunities for item comparison.
The code is a horrible mess. Much cleanup to do.
Modified: servres/trunk/conifer/custom/lib_integration.py
===================================================================
--- servres/trunk/conifer/custom/lib_integration.py 2009-04-04 02:42:19 UTC (rev 257)
+++ servres/trunk/conifer/custom/lib_integration.py 2009-04-04 02:42:24 UTC (rev 258)
@@ -12,9 +12,30 @@
# SIP for patron and item_info, and for item checkout and checkin,
# OpenSRF for extended item info.
+
+# define a @caching decorator to exploit the Django cache. Fixme, move
+# this somewhere else.
+from django.core.cache import cache
+def caching(prefix, timeout=60):
+ def g(func):
+ def f(*args):
+ v = cache.get((prefix, args))
+ if v:
+ return v
+ else:
+ v = func(*args)
+ if v:
+ cache.set((prefix, args), v, timeout)
+ return v
+ return f
+ return g
+
+
from django.conf import settings
#LIBINT = settings.LIBRARY_INTEGRATION # more on this later.
+
+from conifer.libsystems.evergreen import item_status as I
from conifer.libsystems.sip.sipclient import SIP
@@ -34,6 +55,15 @@
def checkin(conn, item_barcode):
return conn.checkin(item_barcode, institution='', location='')
-
+ at caching('bcbi', timeout=3600)
+def barcode_to_bib_id(barcode):
+ return I.barcode_to_bib_id(barcode)
+ at caching('bccp', timeout=3600)
+def barcode_to_copy(barcode):
+ return I.barcode_to_copy(barcode)
+
+ at caching('bimx', timeout=3600)
+def bib_id_to_marcxml(bib_id):
+ return I.bib_id_to_marcxml(bib_id)
Modified: servres/trunk/conifer/libsystems/evergreen/item_status.py
===================================================================
--- servres/trunk/conifer/libsystems/evergreen/item_status.py 2009-04-04 02:42:19 UTC (rev 257)
+++ servres/trunk/conifer/libsystems/evergreen/item_status.py 2009-04-04 02:42:24 UTC (rev 258)
@@ -1,29 +1,15 @@
-import warnings
from support import ER, E1
-from pprint import pprint
-# Proposing this as an interface method. Given a bib ID, return a dict
-# giving the item's bibid, barcode, availability (boolean),
-# holdability (boolean), and location (a string description). If the
-# bib ID is invalid, return None.
-
-def lookup_availability(bib_id):
- rec = E1('open-ils.search.asset.copy.fleshed2.retrieve', bib_id)
- if 'stacktrace' in rec:
- warnings.warn(repr(('no such bib id', bib_id, repr(rec))))
+def barcode_to_bib_id(barcode):
+ bib_id = (E1('open-ils.search.bib_id.by_barcode', barcode))
+ if isinstance(bib_id, basestring): # it would be a dict if barcode not found.
+ return bib_id
+ else:
return None
- resp = {
- 'bibid': bib_id,
- 'barcode': rec['barcode'],
- 'available': rec['status']['name'] == 'Available',
- 'holdable': rec['status']['holdable'] == 't',
- 'location': rec['location']['name']}
- return resp
+def bib_id_to_marcxml(bib_id):
+ return E1('open-ils.supercat.record.marcxml.retrieve', bib_id)
if __name__ == '__main__':
- DYLAN = 1321798
- #print lookup_availability(DYLAN)
-
- MISCHIEF = 2063351
- pprint(E1('open-ils.search.biblio.record.mods_slim.retrieve', MISCHIEF))
+ from pprint import pprint
+ print bib_id_to_marcxml(barcode_to_bib_id(31862016799294))
Modified: servres/trunk/conifer/libsystems/z3950/marctools.py
===================================================================
--- servres/trunk/conifer/libsystems/z3950/marctools.py 2009-04-04 02:42:19 UTC (rev 257)
+++ servres/trunk/conifer/libsystems/z3950/marctools.py 2009-04-04 02:42:24 UTC (rev 258)
@@ -91,6 +91,8 @@
def replace(self, str):
"Given string str, returns unicode string with correct character replcements"
+ if isinstance(str, unicode): # added by Graham
+ return str
searchchars = []
# build subset of search/replace pairs to use based on if first char of search appears in str
prev = range(0,3)
Added: servres/trunk/conifer/libsystems/z3950/marcxml.py
===================================================================
--- servres/trunk/conifer/libsystems/z3950/marcxml.py (rev 0)
+++ servres/trunk/conifer/libsystems/z3950/marcxml.py 2009-04-04 02:42:24 UTC (rev 258)
@@ -0,0 +1,32 @@
+from xml.etree import ElementTree
+import marctools
+
+loc_to_unicode = marctools.locToUTF8().replace
+
+def marcxml_to_dictionary(rec):
+ tree = ElementTree.fromstring(rec)
+ dct = {}
+ for df in tree.findall('{http://www.loc.gov/MARC21/slim}datafield'):
+ t = df.attrib['tag']
+ for sf in df.findall('{http://www.loc.gov/MARC21/slim}subfield'):
+ c = sf.attrib['code']
+ v = sf.text
+ dct[t+c] = loc_to_unicode(v)
+ return dct
+
+def marcxml_dictionary_to_dc(dct):
+ """Take a dictionary generated by marcxml_to_dictionary, and
+ extract some Dublin Core elements from it. Fixme, I'm sure this
+ could be way improved."""
+ out = {}
+ meta = [('245a', 'dc:title'), ('100a', 'dc:creator'), ('260b', 'dc:publisher'),
+ ('260c', 'dc:date'), ('700a', 'dc:contributor')]
+ for marc, dc in meta:
+ value = dct.get(marc)
+ if value:
+ out[dc] = value
+ if '245b' in meta and 'dc:title' in out:
+ out['dc:title'] += (' %s' % meta['245b'])
+ return out
+
+
Modified: servres/trunk/conifer/libsystems/z3950/yaz_search.py
===================================================================
--- servres/trunk/conifer/libsystems/z3950/yaz_search.py 2009-04-04 02:42:19 UTC (rev 257)
+++ servres/trunk/conifer/libsystems/z3950/yaz_search.py 2009-04-04 02:42:24 UTC (rev 258)
@@ -6,19 +6,15 @@
import warnings
import re
-from xml.etree import ElementTree
import pexpect
-import marctools
import sys
+from marcxml import marcxml_to_dictionary
-loc_to_unicode = marctools.locToUTF8().replace
-
LOG = sys.stderr #None # for pexpect debugging, try LOG = sys.stderr
YAZ_CLIENT = 'yaz-client'
GENERAL_TIMEOUT = 10
PRESENT_TIMEOUT = 30
-
def search(host, database, query, start=1, limit=None):
server = pexpect.spawn('yaz-client', timeout=GENERAL_TIMEOUT, logfile=LOG)
@@ -60,26 +56,18 @@
parsed = []
for rec in raw_records:
- dct = {}
- parsed.append(dct)
try:
- tree = ElementTree.fromstring(rec)
+ dct = marcxml_to_dictionary(rec)
except:
raise rec
- for df in tree.findall('{http://www.loc.gov/MARC21/slim}datafield'):
- t = df.attrib['tag']
- for sf in df.findall('{http://www.loc.gov/MARC21/slim}subfield'):
- c = sf.attrib['code']
- v = sf.text
- dct[t+c] = loc_to_unicode(v)
-
+ parsed.append(dct)
return parsed
+
#------------------------------------------------------------
# some tests
if __name__ == '__main__':
- print loc_to_unicode('A\\XCC\\X81n')
tests = [
('dwarf.cs.uoguelph.ca:2210', 'conifer', '@and "Musson" "Evil"'),
('dwarf.cs.uoguelph.ca:2210', 'conifer', '@and "Denis" "Gravel"'),
Added: servres/trunk/conifer/syrup/fuzzy_match.py
===================================================================
--- servres/trunk/conifer/syrup/fuzzy_match.py (rev 0)
+++ servres/trunk/conifer/syrup/fuzzy_match.py 2009-04-04 02:42:24 UTC (rev 258)
@@ -0,0 +1,46 @@
+from conifer.syrup import models
+
+#http://www.poromenos.org/node/87. Credit to Poromenos. It's under BSD.
+def levenshtein_distance(first, second):
+ """Find the Levenshtein distance between two strings."""
+ if len(first) > len(second):
+ first, second = second, first
+ if len(second) == 0:
+ return len(first)
+ first_length = len(first) + 1
+ second_length = len(second) + 1
+ distance_matrix = [range(second_length) for x in range(first_length)]
+ for i in xrange(1, first_length):
+ for j in range(1, second_length):
+ deletion = distance_matrix[i-1][j] + 1
+ insertion = distance_matrix[i][j-1] + 1
+ substitution = distance_matrix[i-1][j-1]
+ if first[i-1] != second[j-1]:
+ substitution += 1
+ distance_matrix[i][j] = min(insertion, deletion, substitution)
+
+ return distance_matrix[first_length-1][second_length-1]
+
+def rank_pending_items(dct):
+ title = dct.get('dc:title','')
+ author = dct.get('dc:creator','')
+ publisher = dct.get('dc:publisher','')
+ pubdate = dct.get('dc:pubdate','')
+
+ all_pending_items = models.Item.objects.filter(item_type='PHYS') # not right... also, prefetch metadata
+ results = []
+ # not sure I like these weights, but let's play a bit.
+ METRICS = (('dc:title', 1), ('dc:creator', 1), ('dc:publisher', 0.5), ('dc:pubdate', 0.25))
+ for item in all_pending_items:
+ scores = []
+ for heading, weight in METRICS:
+ try:
+ ival = item.metadata_set.get(name=heading).value or ''
+ except:
+ ival = ''
+ dist = levenshtein_distance(dct.get(heading) or '', ival)
+ scores.append(dist/weight)
+ score = sum(scores)
+ results.append((score, item))
+ results.sort()
+ return results
Modified: servres/trunk/conifer/syrup/models.py
===================================================================
--- servres/trunk/conifer/syrup/models.py 2009-04-04 02:42:19 UTC (rev 257)
+++ servres/trunk/conifer/syrup/models.py 2009-04-04 02:42:24 UTC (rev 258)
@@ -281,6 +281,10 @@
def get_students(self):
return User.objects.filter(member__course__exact=self, member__role__exact='STUDT') \
.order_by('last_name', 'first_name')
+
+ def get_instructors(self):
+ return User.objects.filter(member__course__exact=self, member__role__exact='INSTR') \
+ .order_by('last_name', 'first_name')
def _merge_sections(secs):
delim = course_sections.sections_tuple_delimiter
@@ -544,3 +548,4 @@
completed = m.DateTimeField(default=None, null=True)
outcome = m.CharField(max_length=100, null=True)
+
Modified: servres/trunk/conifer/syrup/urls.py
===================================================================
--- servres/trunk/conifer/syrup/urls.py 2009-04-04 02:42:19 UTC (rev 257)
+++ servres/trunk/conifer/syrup/urls.py 2009-04-04 02:42:24 UTC (rev 258)
@@ -48,6 +48,7 @@
(r'^phys/$', 'phys_index'),
(r'^phys/checkout/$', 'phys_checkout'),
+ (r'^phys/mark_arrived/$', 'phys_mark_arrived'),
(r'^course/(?P<course_id>\d+)/reseq$', 'course_reseq'),
(ITEM_PREFIX + r'reseq', 'item_heading_reseq'),
Modified: servres/trunk/conifer/syrup/views.py
===================================================================
--- servres/trunk/conifer/syrup/views.py 2009-04-04 02:42:19 UTC (rev 257)
+++ servres/trunk/conifer/syrup/views.py 2009-04-04 02:42:24 UTC (rev 258)
@@ -35,6 +35,8 @@
import sys
from django.forms.models import modelformset_factory
from conifer.custom import lib_integration
+from conifer.libsystems.z3950.marcxml import marcxml_to_dictionary, marcxml_dictionary_to_dc
+from fuzzy_match import rank_pending_items
#-----------------------------------------------------------------------------
# Z39.50 Support
@@ -268,6 +270,7 @@
return g.render('z3950_test.xhtml', res_str=res_str)
def graham_z3950_test(request):
+ raise NotImplementedError # delete this function, its template, etc.
query = request.GET.get('query', '@and "Denis" "Gravel"')
from conifer.libsystems.z3950 import yaz_search
from conifer.libsystems.evergreen.item_status import lookup_availability
@@ -743,18 +746,18 @@
return _access_denied(_('You are not an editor.'))
pickitem = eval(_pickitem) # fixme, dangerous. cache result server-side instead, or encrypt it.
+ dublin = marcxml_dictionary_to_dc(pickitem)
+
item = course.item_set.create(parent_heading=parent_item,
title=pickitem.get('245a', 'Untitled'),
item_type='PHYS')
item.save()
- # these are a temporary hack, must replace
- meta = [('245a', 'dc:title'), ('100a', 'dc:creator'), ('260b', 'dc:publisher'),
- ('260c', 'dc:date'), ('700a', 'dc:contributor')]
- for marc, dc in meta:
- value = pickitem.get(marc)
- if value:
- md = item.metadata_set.create(item=item, name=dc, value=value)
- item.metadata_set.create(item=item, name='syrup:marc', value=simplejson.dumps(pickitem))
+
+ for dc, value in dublin.items():
+ md = item.metadata_set.create(item=item, name=dc, value=value)
+ # store the whole darn MARC-dict as well.
+ json = simplejson.dumps(pickitem)
+ item.metadata_set.create(item=item, name='syrup:marc', value=json)
item.save()
return HttpResponseRedirect('../../../%d/' % item.id)
@@ -1264,4 +1267,24 @@
return g.render('phys/checkout.xhtml', step=2,
patron=patron,
patron_descrip=post('patron_descrip'))
+
+def phys_mark_arrived(request):
+ if request.method != 'POST':
+ return g.render('phys/mark_arrived.xhtml')
+ else:
+ barcode = request.POST.get('item', '').strip()
+ bib_id = lib_integration.barcode_to_bib_id(barcode)
+ marcxml = lib_integration.bib_id_to_marcxml(bib_id)
+ dct = marcxml_to_dictionary(marcxml)
+ dublin = marcxml_dictionary_to_dc(dct)
+ # merge them
+ dct.update(dublin)
+ ranked = rank_pending_items(dct)
+ return g.render('phys/mark_arrived_choose.xhtml',
+ barcode=barcode,
+ bib_id=bib_id,
+ ranked=ranked,
+ metadata=dct)
+
+
Added: servres/trunk/conifer/templates/phys/mark_arrived.xhtml
===================================================================
--- servres/trunk/conifer/templates/phys/mark_arrived.xhtml (rev 0)
+++ servres/trunk/conifer/templates/phys/mark_arrived.xhtml 2009-04-04 02:42:24 UTC (rev 258)
@@ -0,0 +1,38 @@
+<?python
+sample_item = '31862016799294' # fixme, just for testing.
+title = _('Mark Items as Arrived')
+?>
+<html xmlns="http://www.w3.org/1999/xhtml"
+ xmlns:xi="http://www.w3.org/2001/XInclude"
+ xmlns:py="http://genshi.edgewall.org/">
+<xi:include href="../master.xhtml"/>
+<head>
+ <title>${title}</title>
+ <script>
+ $(function() { $('form:last input:visible:first').focus(); });
+ </script>
+ <style>
+ .success { background-color: #dfd; }
+ .failure { background-color: #fdd; }
+ </style>
+</head>
+<body>
+ <h1>${title}</h1>
+ <form action="." method="POST">
+ <div>
+ <table class="metadata_table">
+ <tr>
+ <th>Item Barcode</th>
+ <td>
+ <input type="text" id="item" name="item" style="width: 400;" value="${sample_item}"/>
+ </td>
+ </tr>
+ <tr>
+ <th/>
+ <td><input type="submit" value="${False and _('Check out another item') or _('Continue')}"/></td>
+ </tr>
+ </table>
+ </div>
+ </form>
+</body>
+</html>
Added: servres/trunk/conifer/templates/phys/mark_arrived_choose.xhtml
===================================================================
--- servres/trunk/conifer/templates/phys/mark_arrived_choose.xhtml (rev 0)
+++ servres/trunk/conifer/templates/phys/mark_arrived_choose.xhtml 2009-04-04 02:42:24 UTC (rev 258)
@@ -0,0 +1,58 @@
+<?python
+import re
+sample_item = '31862016799294' # fixme, just for testing.
+title = _('Mark Items as Arrived: Choose Match')
+?>
+<html xmlns="http://www.w3.org/1999/xhtml"
+ xmlns:xi="http://www.w3.org/2001/XInclude"
+ xmlns:py="http://genshi.edgewall.org/">
+<xi:include href="../master.xhtml"/>
+<head>
+ <title>${title}</title>
+ <script>
+ $(function() { $('form:last input:visible:first').focus(); });
+ </script>
+ <style>
+ .success { background-color: #dfd; }
+ .failure { background-color: #fdd; }
+ .likely { background-color: #dfd; }
+ .doubtful { font-size: 75%; }
+ </style>
+</head>
+<body>
+ <h1>${title}</h1>
+ <form action="." method="POST">
+ <div>
+ <table class="metadata_table">
+ <tr><th>Item Barcode</th><td>${barcode}</td></tr>
+ <tr py:for="k in ['dc:title', 'dc:creator', 'dc:publisher', 'dc:date']" py:if="k in metadata">
+ <th>${k}</th><td>${metadata[k]}</td>
+ </tr>
+ <tr><th/><td><a href="javascript:$('#more_detail').toggle(); void(0);">Detail</a></td></tr>
+ </table>
+ <table id="more_detail" style="display: none;" class="metadata_table">
+ <tr py:for="k in metadata">
+ <th>${k}</th><td>${metadata[k]}</td>
+ </tr>
+ </table>
+ <h2>Matches</h2>
+ <p><button>Associate with matches selected below</button></p>
+ <table class="metadata_table">
+ <thead style="font-size: 70%;">
+ <tr><th py:for="v in 'Select Title Author Course Instructor Score'.split(' ')">${v}</th></tr>
+ </thead>
+ <tbody>
+ <tr py:for="score, item in ranked" class="${score < 5 and 'likely' or score < 50 and 'maybe' or 'doubtful'}">
+ <td><input type="checkbox" name="choose_${item.id}" id="choose_${item.id}"/></td>
+ <td><label for="choose_${item.id}">${item}</label></td>
+ <td>${item.author()}</td>
+ <td>${item.course.title}</td>
+ <td>${','.join(n.last_name for n in item.course.get_instructors())}</td>
+ <td>${repr(score)}</td>
+ </tr>
+ </tbody>
+ </table>
+ </div>
+ </form>
+</body>
+</html>
More information about the open-ils-commits
mailing list