[Opensrf-commits] r1323 - in trunk: include/opensrf src/libopensrf

Fri May 16 08:45:13 EDT 2008

Author: erickson
Date: 2008-05-16 08:45:11 -0400 (Fri, 16 May 2008)
New Revision: 1323

Modified:
   trunk/include/opensrf/osrf_hash.h
   trunk/src/libopensrf/osrf_hash.c
Log:
Patch from Scott McKellar:

These patches provide a new and more efficient implementation of
osrfHash, using a hash table for random lookups and a doubly linked
list for iterations.

It is more efficient for two main reasons.  First, for iterations
it doesn't maintain a separate copy of the keys, that then have to go 
through the hashing algorithm.  Second, when updating an existing
item, it updates it in place rather than deleting it and creating 
a new one.



Modified: trunk/include/opensrf/osrf_hash.h
===================================================================

--- trunk/include/opensrf/osrf_hash.h	2008-05-15 20:55:02 UTC (rev 1322)
+++ trunk/include/opensrf/osrf_hash.h	2008-05-16 12:45:11 UTC (rev 1323)
@@ -5,21 +5,10 @@
 #include <opensrf/string_array.h>
 #include <opensrf/osrf_list.h>
 
-struct _osrfHashStruct {
-	osrfList* hash; /* this hash */
-	void (*freeItem) (char* key, void* item);	/* callback for freeing stored items */
-	unsigned int size;
-	osrfStringArray* keys;
-};
+struct _osrfHashStruct;
 typedef struct _osrfHashStruct osrfHash;
 
-struct _osrfHashIteratorStruct {
-	char* current;
-	size_t currsize;  // length of "current" buffer
-	int currentIdx;
-	osrfHash* hash;
-	osrfStringArray* keys;
-};
+struct _osrfHashIteratorStruct;
 typedef struct _osrfHashIteratorStruct osrfHashIterator;
 
 /**
@@ -28,7 +17,7 @@
 osrfHash* osrfNewHash();
 
 /** Installs a callback function for freeing stored items
-    */
+ */
 void osrfHashSetCallback( osrfHash* hash, void (*callback) (char* key, void* item) );
 
 /**
@@ -59,8 +48,6 @@
   */
 osrfStringArray* osrfHashKeys( osrfHash* hash );
 
-osrfStringArray* osrfHashKeysInc( osrfHash* hash );
-
 /**
   Frees a hash
   */
@@ -71,9 +58,6 @@
   */
 unsigned long osrfHashGetCount( osrfHash* hash );
 
-
-
-
 /**
   Creates a new list iterator with the given list
   */
@@ -89,7 +73,7 @@
 
 /**
   Returns a pointer to the key of the current hash item
- */
+  */
 const char* osrfHashIteratorKey( const osrfHashIterator* itr );
 
 /**

Modified: trunk/src/libopensrf/osrf_hash.c
===================================================================
--- trunk/src/libopensrf/osrf_hash.c	2008-05-15 20:55:02 UTC (rev 1322)
+++ trunk/src/libopensrf/osrf_hash.c	2008-05-16 12:45:11 UTC (rev 1323)
@@ -1,3 +1,32 @@
+/*
+Copyright (C) 2007, 2008  Georgia Public Library Service
+Bill Erickson <erickson at esilibrary.com>
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+-----------
+
+An osrfHash is a hybrid between a hash table and a doubly linked
+list.  The hash table supports random lookups by key.  The list
+supports iterative traversals.  The sequence of entries in the
+list reflects the sequence in which the entries were added.
+
+osrfHashIterators are somewhat unusual in that, if an iterator
+is positioned on a given entry, deletion of that entry does
+not invalidate the iterator.  The entry to which it points is
+logically but not physically deleted.  You can still advance
+the iterator to the next entry in the list.
+
+*/
+
 #include <opensrf/osrf_hash.h>
 
 struct _osrfHashNodeStruct {
@@ -3,32 +32,46 @@
 	char* key;
 	void* item;
+	struct _osrfHashNodeStruct* prev;
+	struct _osrfHashNodeStruct* next;
 };
 typedef struct _osrfHashNodeStruct osrfHashNode;
 
+struct _osrfHashStruct {
+	osrfList* hash; /* this hash */
+	void (*freeItem) (char* key, void* item);	/* callback for freeing stored items */
+	unsigned int size;
+	osrfHashNode* first_key;
+	osrfHashNode* last_key;
+};
+
+struct _osrfHashIteratorStruct {
+	osrfHash* hash;
+	osrfHashNode* curr_node;
+};
+
 /* 0x100 is a good size for small hashes */
 //#define OSRF_HASH_LIST_SIZE 0x100  /* size of the main hash list */
 #define OSRF_HASH_LIST_SIZE 0x10  /* size of the main hash list */
 
+
 /* used internally */
 #define OSRF_HASH_NODE_FREE(h, n) \
 	if(h && n) { \
-		if(h->freeItem) h->freeItem(n->key, n->item);\
+		if(h->freeItem && n->key) h->freeItem(n->key, n->item);\
 		free(n->key); free(n); \
 }
 
-static osrfHashNode* osrfNewHashNode(char* key, void* item);
-static void* osrfHashNodeFree(osrfHash*, osrfHashNode*);
-
 osrfHash* osrfNewHash() {
 	osrfHash* hash;
 	OSRF_MALLOC(hash, sizeof(osrfHash));
 	hash->hash		= osrfNewList();
-	hash->freeItem  = NULL;
-	hash->size      = 0;
-	hash->keys		= osrfNewStringArray(64);
+	hash->first_key = NULL;
+	hash->last_key  = NULL;
 	return hash;
 }
 
-/* algorithm proposed by Donald E. Knuth
+static osrfHashNode* osrfNewHashNode(char* key, void* item);
+
+/* algorithm proposed by Donald E. Knuth 
  * in The Art Of Computer Programming Volume 3 (more or less..)*/
 /*
@@ -44,53 +87,63 @@
 }
 */
 
-
 /* macro version of the above function */
 #define OSRF_HASH_MAKE_KEY(str,num) \
    do {\
-      char* __k = str;\
-      unsigned int __len = strlen(__k); \
-      unsigned int __h = __len;\
-      unsigned int __i = 0;\
-      for(__i = 0; __i < __len; __k++, __i++)\
-         __h = ((__h << 5) ^ (__h >> 27)) ^ (*__k);\
-      num = (__h & (OSRF_HASH_LIST_SIZE-1));\
+      const char* k__ = str;\
+      unsigned int len__ = strlen(k__); \
+      unsigned int h__ = len__;\
+      unsigned int i__ = 0;\
+      for(i__ = 0; i__ < len__; k__++, i__++)\
+         h__ = ((h__ << 5) ^ (h__ >> 27)) ^ (*k__);\
+      num = (h__ & (OSRF_HASH_LIST_SIZE-1));\
    } while(0)
 
-/** Installs a callback function for freeing stored items
-    */
+/* Installs a callback function for freeing stored items */
 void osrfHashSetCallback( osrfHash* hash, void (*callback) (char* key, void* item) )
 {
 	if( hash ) hash->freeItem = callback;
 }
 
-/* returns the index of the item and points l to the sublist the item
- * lives in if the item and points n to the hashnode the item 
- * lives in if the item is found.  Otherwise -1 is returned */
-static unsigned int osrfHashFindItem( osrfHash* hash, char* key, osrfList** l, osrfHashNode** n ) {
-	if(!(hash && key)) return -1;
+/* Returns a pointer to the item's node if found; otherwise returns NULL. */
+static osrfHashNode* find_item( const osrfHash* hash,
+		const char* key, unsigned int* bucketkey ) {
 
+	// Find the sub-list in the hash table
 
+	if( hash->size < 6 && !bucketkey )
+	{
+		// For only a few entries, when we don't need to identify
+		// the hash bucket, it's probably faster to search the
+		// linked list instead of hashing
+
+		osrfHashNode* currnode = hash->first_key;
+		while( currnode && strcmp( currnode->key, key ) )
+			 currnode = currnode->next;
+
+		return currnode;
+	}
+				
 	unsigned int i = 0;
 	OSRF_HASH_MAKE_KEY(key,i);
 
+	// If asked, report which slot the key hashes to
+	if( bucketkey ) *bucketkey = i;
+
 	osrfList* list = OSRF_LIST_GET_INDEX( hash->hash, i );
-	if( !list ) { return -1; }
+	if( !list ) { return NULL; }
 
+	// Search the sub-list
+	
 	int k;
 	osrfHashNode* node = NULL;
 	for( k = 0; k < list->size; k++ ) {
 		node = OSRF_LIST_GET_INDEX(list, k);
 		if( node && node->key && !strcmp(node->key, key) )
-			break;
-		node = NULL;
+			return node;
 	}
 
-	if(!node) return -1;
-
-	if(l) *l = list;
-	if(n) *n = node;
-	return k;
+	return NULL;
 }
 
 static osrfHashNode* osrfNewHashNode(char* key, void* item) {
@@ -99,28 +152,36 @@
 	OSRF_MALLOC(n, sizeof(osrfHashNode));
 	n->key = strdup(key);
 	n->item = item;
+	n->prev = NULL;
+	n->prev = NULL;
 	return n;
 }
 
-static void* osrfHashNodeFree(osrfHash* hash, osrfHashNode* node) {
-	if(!(node && hash)) return NULL;
-	void* item = NULL;
-	if( hash->freeItem )
-		hash->freeItem( node->key, node->item );
-	else item = node->item;
-	free(node->key);
-	free(node);
-	return item;
-}
-
+/* If an entry exists for a given key, update it; otherwise create it.
+   If an entry exists, and there is no callback function to destroy it,
+   return a pointer to it so that the calling code has the option of
+   destroying it.  Otherwise return NULL.
+*/
 void* osrfHashSet( osrfHash* hash, void* item, const char* key, ... ) {
 	if(!(hash && item && key )) return NULL;
 
+	void* olditem = NULL;
+	unsigned int bucketkey;
+	
 	VA_LIST_TO_STRING(key);
-	void* olditem = osrfHashRemove( hash, VA_BUF );
+	osrfHashNode* node = find_item( hash, VA_BUF, &bucketkey );
+	if( node ) {
 
-	unsigned int bucketkey = 0;
-	OSRF_HASH_MAKE_KEY(VA_BUF,bucketkey);
+		// We already have an item for this key.  Update it in place.
+		if( hash->freeItem ) {
+			hash->freeItem( node->key, node->item );
+		}
+		else
+			olditem = node->item;
+
+		node->item = item;
+		return olditem;
+	}
 	
 	osrfList* bucket;
 	if( !(bucket = OSRF_LIST_GET_INDEX(hash->hash, bucketkey)) ) {
@@ -128,31 +189,65 @@
 		osrfListSet( hash->hash, bucket, bucketkey );
 	}
 
-	osrfHashNode* node = osrfNewHashNode(VA_BUF, item);
+	node = osrfNewHashNode(VA_BUF, item);
 	osrfListPushFirst( bucket, node );
 
-	if(!osrfStringArrayContains(hash->keys, VA_BUF))
-		osrfStringArrayAdd( hash->keys, VA_BUF );
+	hash->size++;
 
-	hash->size++;
+	// Add the new hash node to the end of the linked list
+
+	if( NULL == hash->first_key )
+		hash->first_key = hash->last_key = node;
+	else {
+		node->prev = hash->last_key;
+		hash->last_key->next = node;
+		hash->last_key = node;
+	}
+	
 	return olditem;
 }
 
+/* Delete the entry for a specified key.  If the entry exists,
+   and there is no callback function to destroy the associated
+   item, return a pointer to the formerly associated item.
+   Otherwise return NULL.
+*/
 void* osrfHashRemove( osrfHash* hash, const char* key, ... ) {
 	if(!(hash && key )) return NULL;
 
 	VA_LIST_TO_STRING(key);
 
-	osrfList* list = NULL;
-	osrfHashNode* node;
-	int index = osrfHashFindItem( hash, (char*) VA_BUF, &list, &node );
-	if( index == -1 ) return NULL;
+	osrfHashNode* node = find_item( hash, VA_BUF, NULL );
+	if( !node ) return NULL;
 
-	osrfListRemove( list, index );
 	hash->size--;
 
-	void* item = osrfHashNodeFree(hash, node);
-	osrfStringArrayRemove(hash->keys, VA_BUF);
+	void* item = NULL;  // to be returned
+	if( hash->freeItem )
+		hash->freeItem( node->key, node->item );
+	else
+		item = node->item;
+
+	// Mark the node as logically deleted
+	
+	free(node->key);
+	node->key = NULL;
+	node->item = NULL;
+
+	// Make the node unreachable from the rest of the linked list.
+	// We leave the next and prev pointers in place so that an
+	// iterator parked here can find its way to an adjacent node.
+
+	if( node->prev )
+		node->prev->next = node->next;
+	else
+		hash->first_key = node->next;
+
+	if( node->next )
+		node->next->prev = node->prev;
+	else
+		hash->last_key = node->prev;
+	
 	return item;
 }
 
@@ -161,36 +256,26 @@
 	if(!(hash && key )) return NULL;
 	VA_LIST_TO_STRING(key);
 
-	osrfHashNode* node = NULL;
-	int index = osrfHashFindItem( hash, (char*) VA_BUF, NULL, &node );
-	if( index == -1 ) return NULL;
+	osrfHashNode* node = find_item( hash, (char*) VA_BUF, NULL );
+	if( !node ) return NULL;
 	return node->item;
 }
 
-
-osrfStringArray* osrfHashKeysInc( osrfHash* hash ) {
-	if(!hash) return NULL;
-	return hash->keys;
-}
-
 osrfStringArray* osrfHashKeys( osrfHash* hash ) {
 	if(!hash) return NULL;
 	
-	int i, k;
-	osrfList* list;
 	osrfHashNode* node;
-	osrfStringArray* strings = osrfNewStringArray(8);
+	osrfStringArray* strings = osrfNewStringArray( hash->size );
 
-	for( i = 0; i != hash->hash->size; i++ ) {
-		list = OSRF_LIST_GET_INDEX( hash->hash, i );
-		if(list) {
-			for( k = 0; k != list->size; k++ ) {
-				node = OSRF_LIST_GET_INDEX( list, k );	
-				if( node ) osrfStringArrayAdd( strings, node->key );
-			}
-		}
+	// Add every key on the linked list
+	
+	node = hash->first_key;
+	while( node ) {
+		if( node->key )  // should always be true
+			osrfStringArrayAdd( strings, node->key );
+		node = node->next;
 	}
-
+	
 	return strings;
 }
 
@@ -219,68 +304,57 @@
 	}
 
 	osrfListFree(hash->hash);
-    OSRF_STRING_ARRAY_FREE(hash->keys);
 	free(hash);
 }
 
-
-
 osrfHashIterator* osrfNewHashIterator( osrfHash* hash ) {
 	if(!hash) return NULL;
 	osrfHashIterator* itr;
 	OSRF_MALLOC(itr, sizeof(osrfHashIterator));
 	itr->hash = hash;
-	itr->currentIdx = 0;
-	itr->current = NULL;
-	itr->currsize = 0;
-	itr->keys = osrfHashKeysInc(hash);
+	itr->curr_node = NULL;
 	return itr;
 }
 
 void* osrfHashIteratorNext( osrfHashIterator* itr ) {
 	if(!(itr && itr->hash)) return NULL;
-	if( itr->currentIdx >= itr->keys->size ) return NULL;
 
-	// Copy the string to iter->current
-	const char * curr = osrfStringArrayGetString(itr->keys, itr->currentIdx++);
-	size_t new_len = strlen(curr);
-	if( new_len >= itr->currsize ) {
-		// We need a bigger buffer
+	// Advance to the next node in the linked list
+	
+	if( NULL == itr->curr_node )
+		itr->curr_node = itr->hash->first_key;
+	else
+		itr->curr_node = itr->curr_node->next;
 
-		if(0 == itr->currsize) itr->currsize = 64; //default size
-		do {
-			itr->currsize *= 2;
-		} while( new_len >= itr->currsize );
+	if( itr->curr_node )
+		return itr->curr_node->item;
+	else
+		return NULL;
+}
 
-		if(itr->current)
-			free(itr->current);
-		itr->current = safe_malloc(itr->currsize);
-	}
-	strcpy(itr->current, curr);
-	
-	char* val = osrfHashGet( itr->hash, itr->current );
-	return val;
+const char* osrfHashIteratorKey( const osrfHashIterator* itr ) {
+	if( itr && itr->curr_node )
+		return itr->curr_node->key;
+	else
+		return NULL;
 }
 
 void osrfHashIteratorFree( osrfHashIterator* itr ) {
-	if(!itr) return;
-	free(itr->current);
-	free(itr);
+	if(itr)
+		free(itr);
 }
 
-const char* osrfHashIteratorKey( const osrfHashIterator* itr ) {
-    if( ! itr ) return NULL;
-    return itr->current;
-}
-
 void osrfHashIteratorReset( osrfHashIterator* itr ) {
 	if(!itr) return;
-    if(itr->current) itr->current[0] = '\0';
-	itr->keys = osrfHashKeysInc(itr->hash);
-	itr->currentIdx = 0;
+	itr->curr_node = NULL;
 }
 
 
 int osrfHashIteratorHasNext( osrfHashIterator* itr ) {
-	return ( itr->currentIdx < itr->keys->size ) ? 1 : 0;
+	if( !itr )
+		return 0;
+	else if( itr->curr_node )
+		return itr->curr_node->next ? 1 : 0;
+	else
+		return itr->hash->first_key ? 1 : 0;
 }