[Opensrf-commits] r1775 - in trunk: include/opensrf src/libopensrf (scottmk)

svn at svn.open-ils.org svn at svn.open-ils.org
Fri Sep 11 15:38:53 EDT 2009


Author: scottmk
Date: 2009-09-11 15:38:48 -0400 (Fri, 11 Sep 2009)
New Revision: 1775

Modified:
   trunk/include/opensrf/osrf_hash.h
   trunk/src/libopensrf/osrf_hash.c
Log:
In osrfNewHash(): specify a size for the osrfList used as a hash table,
so as to avoid wasting memory.

In osrfHashSet(): rearranged the logic a bit for clarity; no change in
behavior.

In osrfHashIteratorNext(): added a bit of protection against a corrupted
iterator.

Throughout: added Doxygen-style comments for documentation.  Removed
some comments from the header so that they wouldn't override more
complete comments from the implementation file.


Modified: trunk/include/opensrf/osrf_hash.h
===================================================================
--- trunk/include/opensrf/osrf_hash.h	2009-09-10 22:25:26 UTC (rev 1774)
+++ trunk/include/opensrf/osrf_hash.h	2009-09-11 19:38:48 UTC (rev 1775)
@@ -1,6 +1,18 @@
 #ifndef OSRF_HASH_H
 #define OSRF_HASH_H
 
+/**
+	@file osrf_hash.h
+	@brief A hybrid between a hash table and a doubly linked list.
+
+	The hash table supports random lookups by key.  The list supports iterative traversals.
+	The sequence of entries in the list reflects the sequence in which the keys were added.
+
+	osrfHashIterators are somewhat unusual in that, if an iterator is positioned on a given
+	entry, deletion of that entry does not invalidate the iterator.  The entry to which it
+	points is logically but not physically deleted.  You can still advance the iterator to the
+	next entry in the list.
+*/
 #include <opensrf/utils.h>
 #include <opensrf/string_array.h>
 #include <opensrf/osrf_list.h>
@@ -15,75 +27,32 @@
 struct _osrfHashIteratorStruct;
 typedef struct _osrfHashIteratorStruct osrfHashIterator;
 
-/**
-  Allocates a new hash object
-  */
 osrfHash* osrfNewHash();
 
-/** Installs a callback function for freeing stored items
- */
 void osrfHashSetCallback( osrfHash* hash, void (*callback) (char* key, void* item) );
 
-/**
-  Sets the given key with the given item
-  if "freeItem" is defined and an item already exists at the given location, 
-  then old item is freed and the new item is put into place.
-  if "freeItem" is not defined and an item already exists, the old item
-  is returned.
-  @return The old item if exists and there is no 'freeItem', returns NULL
-  otherwise
-  */
 void* osrfHashSet( osrfHash* hash, void* item, const char* key, ... );
 
-/**
-  Removes an item from the hash.
-  if 'freeItem' is defined it is used and NULL is returned,
-  else the freed item is returned
-  */
 void* osrfHashRemove( osrfHash* hash, const char* key, ... );
 
 void* osrfHashGet( osrfHash* hash, const char* key );
 
 void* osrfHashGetFmt( osrfHash* hash, const char* key, ... );
 
-/**
-  @return A list of strings representing the keys of the hash. 
-  caller is responsible for freeing the returned string array 
-  with osrfStringArrayFree();
-  */
 osrfStringArray* osrfHashKeys( osrfHash* hash );
 
-/**
-  Frees a hash
-  */
 void osrfHashFree( osrfHash* hash );
 
-/**
-  @return The number of items in the hash
-  */
 unsigned long osrfHashGetCount( osrfHash* hash );
 
-/**
-  Creates a new list iterator with the given list
-  */
 osrfHashIterator* osrfNewHashIterator( osrfHash* hash );
 
 int osrfHashIteratorHasNext( osrfHashIterator* itr );
 
-/**
-  Returns the next non-NULL item in the list, return NULL when
-  the end of the list has been reached
-  */
 void* osrfHashIteratorNext( osrfHashIterator* itr );
 
-/**
-  Returns a pointer to the key of the current hash item
-  */
 const char* osrfHashIteratorKey( const osrfHashIterator* itr );
 
-/**
-  Deallocates the given list
-  */
 void osrfHashIteratorFree( osrfHashIterator* itr );
 
 void osrfHashIteratorReset( osrfHashIterator* itr );

Modified: trunk/src/libopensrf/osrf_hash.c
===================================================================
--- trunk/src/libopensrf/osrf_hash.c	2009-09-10 22:25:26 UTC (rev 1774)
+++ trunk/src/libopensrf/osrf_hash.c	2009-09-11 19:38:48 UTC (rev 1775)
@@ -1,3 +1,8 @@
+/**
+	@file osrf_hash.c
+	@brief A hybrid between a hash table and a doubly linked list.
+*/
+
 /*
 Copyright (C) 2007, 2008  Georgia Public Library Service
 Bill Erickson <erickson at esilibrary.com>
@@ -11,61 +16,103 @@
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
-
------------
-
-An osrfHash is a hybrid between a hash table and a doubly linked
-list.  The hash table supports random lookups by key.  The list
-supports iterative traversals.  The sequence of entries in the
-list reflects the sequence in which the entries were added.
-
-osrfHashIterators are somewhat unusual in that, if an iterator
-is positioned on a given entry, deletion of that entry does
-not invalidate the iterator.  The entry to which it points is
-logically but not physically deleted.  You can still advance
-the iterator to the next entry in the list.
-
 */
 
 #include <opensrf/osrf_hash.h>
 
+/**
+	@brief A node storing a single item within an osrfHash.
+*/
 struct _osrfHashNodeStruct {
+	/** @brief String containing the key for the item */
 	char* key;
+	/** @brief Pointer to the stored item data */
 	void* item;
+	/** @brief Pointer to the next node in a doubly linked list */
 	struct _osrfHashNodeStruct* prev;
+	/** @brief Pointer to the previous node in a doubly linked list */
 	struct _osrfHashNodeStruct* next;
 };
 typedef struct _osrfHashNodeStruct osrfHashNode;
 
+/**
+	@brief osrfHash structure
+
+	An osrfHash is partly a key/value store based on a hash table, and partly a linear
+	structure implemented as a linked list.
+
+	The hash table is an array of pointers, managed by an osrfList.  Each pointer in that array
+	points to another layer of osrfList, which stores pointers to osrfHashNodes for keys that
+	hash to the same value.
+
+	Besides residing in this hash table structure, each osrfHashNode resides in a doubly
+	linked list that includes all the osrfHashNodes in the osrfHash (except for any nodes
+	that have been logically deleted).  New nodes are added to the end of the list.
+
+	The hash table supports lookups based on a key.
+
+	The linked list supports sequential traversal, reflecting the sequence in which the nodes
+	were added.
+*/
 struct _osrfHashStruct {
-	osrfList* hash; /* this hash */
-	void (*freeItem) (char* key, void* item);	/* callback for freeing stored items */
+	/** @brief Pointer to the osrfList used as a hash table */
+	osrfList* hash;
+	/** @brief Callback function for freeing stored items */
+	void (*freeItem) (char* key, void* item);
+	/** @brief How many items are in the osrfHash */
 	unsigned int size;
+	/** @brief Pointer to the first node in the linked list */
 	osrfHashNode* first_key;
+	/** @brief Pointer to the last node in the linked list */
 	osrfHashNode* last_key;
 };
 
+/**
+	@brief Maintains a position in an osrfHash, for traversing the linked list
+*/
 struct _osrfHashIteratorStruct {
+	/** @brief Pointer to the associated osrfHash */
 	osrfHash* hash;
+	/** @brief Pointer to the current osrfHashNode (the one previously returned, if any) */
 	osrfHashNode* curr_node;
 };
 
+/**
+	@brief How many slots in the hash table.
+
+	Must be a power of 2, or the hashing algorithm won't work properly.
+*/
 /* 0x100 is a good size for small hashes */
 //#define OSRF_HASH_LIST_SIZE 0x100  /* size of the main hash list */
 #define OSRF_HASH_LIST_SIZE 0x10  /* size of the main hash list */
 
 
 /* used internally */
+/**
+	@brief Free an osrfHashNode, and its cargo, from within a given osrfHash.
+	@param h Pointer to the osrfHash
+	@param n Pointer to the osrfHashNode
+
+	If there is a callback function for freeing the item, call it.
+
+	We use this macro only when freeing an entire osrfHash.
+*/
 #define OSRF_HASH_NODE_FREE(h, n) \
 	if(h && n) { \
 		if(h->freeItem && n->key) h->freeItem(n->key, n->item);\
 		free(n->key); free(n); \
 }
 
+/**
+	@brief Create and initialize a new (and empty) osrfHash.
+	@return Pointer to the newly created osrfHash.
+
+	The calling code is responsible for freeing the osrfHash.
+*/
 osrfHash* osrfNewHash() {
 	osrfHash* hash;
 	OSRF_MALLOC(hash, sizeof(osrfHash));
-	hash->hash		= osrfNewList();
+	hash->hash		= osrfNewListSize( OSRF_HASH_LIST_SIZE );
 	hash->first_key = NULL;
 	hash->last_key  = NULL;
 	return hash;
@@ -73,8 +120,6 @@
 
 static osrfHashNode* osrfNewHashNode(char* key, void* item);
 
-/* algorithm proposed by Donald E. Knuth 
- * in The Art Of Computer Programming Volume 3 (more or less..)*/
 /*
 static unsigned int osrfHashMakeKey(char* str) {
 	if(!str) return 0;
@@ -88,6 +133,14 @@
 */
 
 /* macro version of the above function */
+/**
+	@brief Hashing algorithm: derive a mangled number from a string.
+	@param str Pointer to the string to be hashed.
+	@param num A numeric variable to receive the resulting value.
+
+	This macro implements an algorithm proposed by Donald E. Knuth
+	in The Art of Computer Programming Volume 3 (more or less..)
+*/
 #define OSRF_HASH_MAKE_KEY(str,num) \
    do {\
       const char* k__ = str;\
@@ -99,13 +152,32 @@
       num = (h__ & (OSRF_HASH_LIST_SIZE-1));\
    } while(0)
 
-/* Installs a callback function for freeing stored items */
+/**
+	@brief Install a callback function for freeing a stored item.
+	@param hash The osrfHash in which the callback function is to be installed.
+	@param callback A function pointer.
+
+	The callback function must accept a key value and a void pointer, and return void.  The
+	key value enables the callback function to treat different kinds of items differently,
+	provided that it can distinguish them by key.
+*/
 void osrfHashSetCallback( osrfHash* hash, void (*callback) (char* key, void* item) )
 {
 	if( hash ) hash->freeItem = callback;
 }
 
 /* Returns a pointer to the item's node if found; otherwise returns NULL. */
+/**
+	@brief Search for a given key in an osrfHash.
+	@param hash Pointer to the osrfHash.
+	@param key The key to be sought.
+	@param bucketkey Pointer through which we can report which slot the item belongs in.
+	@return A pointer to the osrfHashNode where the item resides; or NULL, if it isn't there.
+
+	If the bucketkey parameter is not NULL, we update the index of the hash bucket where the
+	item belongs (whether or not we actually found it).  In some cases this feedback enables the
+	calling function to avoid hashing the same key twice.
+*/
 static osrfHashNode* find_item( const osrfHash* hash,
 		const char* key, unsigned int* bucketkey ) {
 
@@ -123,7 +195,7 @@
 
 		return currnode;
 	}
-				
+
 	unsigned int i = 0;
 	OSRF_HASH_MAKE_KEY(key,i);
 
@@ -134,7 +206,7 @@
 	if( !list ) { return NULL; }
 
 	// Search the sub-list
-	
+
 	int k;
 	osrfHashNode* node = NULL;
 	for( k = 0; k < list->size; k++ ) {
@@ -146,6 +218,12 @@
 	return NULL;
 }
 
+/**
+	@brief Create and populate a new osrfHashNode.
+	@param key The key string.
+	@param item A pointer to the item associated with the key.
+	@return A pointer to the newly created node.
+*/
 static osrfHashNode* osrfNewHashNode(char* key, void* item) {
 	if(!(key && item)) return NULL;
 	osrfHashNode* n;
@@ -157,22 +235,36 @@
 	return n;
 }
 
-/* If an entry exists for a given key, update it; otherwise create it.
-   If an entry exists, and there is no callback function to destroy it,
-   return a pointer to it so that the calling code has the option of
-   destroying it.  Otherwise return NULL.
+/**
+	@brief Store an item for a given key in an osrfHash.
+	@param hash Pointer to the osrfHash in which the item is to be stored.
+	@param item Pointer to the item to be stored.
+	@param key A printf-style format string to be expanded into the key for the item.  Subsequent
+		parameters, if any, will be formatted and inserted into the expanded key.
+	@return Pointer to an item previously stored for the same key, if any (see discussion).
+
+	If no item is already stored for the same key, osrfHashSet creates a new entry for it,
+	and returns NULL.
+
+	If an item is already stored for the same key, osrfHashSet updates the entry in place.  The
+	fate of the previously stored item varies.  If there is a callback defined for freeing the
+	item, osrfHashSet calls it, and returns NULL.  Otherwise it returns a pointer to the
+	previously stored item, so that the calling function can dispose of it.
+
+	osrfHashSet returns NULL if any of its first three parameters is NULL.
 */
 void* osrfHashSet( osrfHash* hash, void* item, const char* key, ... ) {
 	if(!(hash && item && key )) return NULL;
 
-	void* olditem = NULL;
 	unsigned int bucketkey;
-	
+
 	VA_LIST_TO_STRING(key);
 	osrfHashNode* node = find_item( hash, VA_BUF, &bucketkey );
 	if( node ) {
 
 		// We already have an item for this key.  Update it in place.
+		void* olditem = NULL;
+
 		if( hash->freeItem ) {
 			hash->freeItem( node->key, node->item );
 		}
@@ -182,7 +274,8 @@
 		node->item = item;
 		return olditem;
 	}
-	
+
+	// There is no entry for this key.  Create a new one.
 	osrfList* bucket;
 	if( !(bucket = OSRF_LIST_GET_INDEX(hash->hash, bucketkey)) ) {
 		bucket = osrfNewList();
@@ -203,8 +296,8 @@
 		hash->last_key->next = node;
 		hash->last_key = node;
 	}
-	
-	return olditem;
+
+	return NULL;
 }
 
 /* Delete the entry for a specified key.  If the entry exists,
@@ -212,6 +305,29 @@
    item, return a pointer to the formerly associated item.
    Otherwise return NULL.
 */
+/**
+	@brief Remove the item for a specified key from an osrfHash.
+	@param hash Pointer to the osrfHash from which the item is to be removed.
+	@param key A printf-style format string to be expanded into the key for the item.  Subsequent
+		parameters, if any, will be formatted and inserted into the expanded key.
+	@return Pointer to the removed item, if any (see discussion).
+
+	If no entry is present for the specified key, osrfHashRemove returns NULL.
+
+	If there is such an entry, osrfHashRemove removes it.  The fate of the associated item
+	varies.  If there is a callback function for freeing items, osrfHashRemove calls it, and
+	returns NULL.  Otherwise it returns a pointer to the removed item, so that the calling
+	code can dispose of it.
+
+	osrfHashRemove returns NULL if either of its first two parameters is NULL.
+
+	Note: the osrfHashNode for the removed item is logically deleted so that subsequent searches
+	and traversals will ignore it.  However it is physically left in place so that an
+	osrfHashIterator pointing to it can advance to the next node.  The downside of this
+	design is that, if a lot of items are removed, the accumulation of logically deleted nodes
+	will slow down searches.  In addition, the memory used by a logically deleted osrfHashNode
+	remains allocated until the entire osrfHashNode is freed.
+*/
 void* osrfHashRemove( osrfHash* hash, const char* key, ... ) {
 	if(!(hash && key )) return NULL;
 
@@ -229,7 +345,7 @@
 		item = node->item;
 
 	// Mark the node as logically deleted
-	
+
 	free(node->key);
 	node->key = NULL;
 	node->item = NULL;
@@ -247,18 +363,32 @@
 		node->next->prev = node->prev;
 	else
 		hash->last_key = node->prev;
-	
+
 	return item;
 }
 
 
+/**
+	@brief Fetch the item stored in an osrfHash for a given key.
+	@param hash Pointer to the osrfHash from which to fetch the item.
+	@param key The key for the item sought.
+	@return A pointer to the item, if it exists; otherwise NULL.
+*/
 void* osrfHashGet( osrfHash* hash, const char* key ) {
 	if(!(hash && key )) return NULL;
+
 	osrfHashNode* node = find_item( hash, key, NULL );
 	if( !node ) return NULL;
 	return node->item;
 }
 
+/**
+	@brief Fetch the item stored in an osrfHash for a given key.
+	@param hash Pointer to the osrfHash from which to fetch the item.
+	@param key A printf-style format string to be expanded into the key for the item.  Subsequent
+		parameters, if any, will be formatted and inserted into the expanded key.
+	@return A pointer to the item, if it exists; otherwise NULL.
+ */
 void* osrfHashGetFmt( osrfHash* hash, const char* key, ... ) {
 	if(!(hash && key )) return NULL;
 	VA_LIST_TO_STRING(key);
@@ -268,30 +398,57 @@
 	return node->item;
 }
 
+/**
+	@brief Create an osrfStringArray containing all the keys in an osrfHash.
+	@param hash Pointer to the osrfHash whose keys are to be extracted.
+	@return A pointer to the newly created osrfStringArray.
+
+	The strings in the osrfStringArray are arranged in the sequence in which they were added to
+	the osrfHash.
+
+	The calling code is responsible for freeing the osrfStringArray by calling
+	osrfStringArrayFree().
+
+	osrfHashKeys returns NULL if its parameter is NULL.
+*/
 osrfStringArray* osrfHashKeys( osrfHash* hash ) {
 	if(!hash) return NULL;
-	
+
 	osrfHashNode* node;
 	osrfStringArray* strings = osrfNewStringArray( hash->size );
 
 	// Add every key on the linked list
-	
+
 	node = hash->first_key;
 	while( node ) {
 		if( node->key )  // should always be true
 			osrfStringArrayAdd( strings, node->key );
 		node = node->next;
 	}
-	
+
 	return strings;
 }
 
 
+/**
+	@brief Count the items an osrfHash.
+	@param hash Pointer to the osrfHash whose items are to be counted.
+	@return The number of items in the osrfHash.
+
+	If the parameter is NULL, osrfHashGetCount returns -1.
+*/
 unsigned long osrfHashGetCount( osrfHash* hash ) {
 	if(!hash) return -1;
 	return hash->size;
 }
 
+/**
+	@brief Free an osrfHash and all of its contents.
+	@param hash Pointer to the osrfHash to be freed.
+
+	If a callback function has been defined for freeing items, osrfHashFree calls it for
+		each stored item.
+*/
 void osrfHashFree( osrfHash* hash ) {
 	if(!hash) return;
 
@@ -314,6 +471,16 @@
 	free(hash);
 }
 
+/**
+	@brief Create and initialize an osrfHashIterator for a given osrfHash.
+	@param hash Pointer to the osrfHash with which the iterator will be associated.
+	@return Pointer to the newly created osrfHashIterator.
+
+	An osrfHashIterator may be used to traverse the items stored in an osrfHash.
+
+	The calling code is responsible for freeing the osrfHashIterator by calling
+	osrfHashIteratorFree().
+*/
 osrfHashIterator* osrfNewHashIterator( osrfHash* hash ) {
 	if(!hash) return NULL;
 	osrfHashIterator* itr;
@@ -323,11 +490,18 @@
 	return itr;
 }
 
+/**
+	@brief Advance an osrfHashIterator to the next item in an osrfHash.
+	@param itr Pointer to the osrfHashIterator to be advanced.
+	@return A Pointer to the next stored item, if there is one; otherwise NULL.
+
+	The items are returned in the sequence in which their keys were added to the osrfHash.
+*/
 void* osrfHashIteratorNext( osrfHashIterator* itr ) {
 	if(!(itr && itr->hash)) return NULL;
 
 	// Advance to the next node in the linked list
-	
+
 	if( NULL == itr->curr_node )
 		itr->curr_node = itr->hash->first_key;
 	else
@@ -339,6 +513,15 @@
 		return NULL;
 }
 
+/**
+	@brief Fetch the key where an osrfHashIterator is currently positioned.
+	@param itr Pointer to the osrfHashIterator.
+	@return A const pointer to the key, if the iterator is currently positioned on an item;
+		otherwise NULL.
+
+	The key returned is the one associated with the previous call to osrfHashIteratorNext(),
+	unless there has been a call to osrfHashIteratorReset() in the meanwhile.
+*/
 const char* osrfHashIteratorKey( const osrfHashIterator* itr ) {
 	if( itr && itr->curr_node )
 		return itr->curr_node->key;
@@ -346,19 +529,37 @@
 		return NULL;
 }
 
+/**
+	@brief Free an osrfHashIterator.
+	@param itr Pointer to the osrfHashIterator to be freed.
+*/
 void osrfHashIteratorFree( osrfHashIterator* itr ) {
 	if(itr)
 		free(itr);
 }
 
+/**
+	@brief Restore an osrfHashIterator to its original pristine state.
+	@param itr Pointer to the osrfHashIterator to be reset.
+
+	After resetting the iterator, the next call to osrfHashIteratorNext() will return a pointer
+	to the first item in the list.
+*/
 void osrfHashIteratorReset( osrfHashIterator* itr ) {
 	if(!itr) return;
 	itr->curr_node = NULL;
 }
 
 
+/**
+	@brief Determine whether there is another entry in an osrfHash beyond the current
+		position of an osrfHashIterator.
+	@param itr Pointer to the osrfHashIterator in question.
+	@return A boolean: 1 if there is another entry beyond the current position, or 0 if the
+		iterator is already at the last entry (or at no entry).
+*/
 int osrfHashIteratorHasNext( osrfHashIterator* itr ) {
-	if( !itr )
+	if( !itr || !itr->hash )
 		return 0;
 	else if( itr->curr_node )
 		return itr->curr_node->next ? 1 : 0;



More information about the opensrf-commits mailing list