[Opensrf-commits] r1775 - in trunk: include/opensrf src/libopensrf (scottmk)
svn at svn.open-ils.org
svn at svn.open-ils.org
Fri Sep 11 15:38:53 EDT 2009
Author: scottmk
Date: 2009-09-11 15:38:48 -0400 (Fri, 11 Sep 2009)
New Revision: 1775
Modified:
trunk/include/opensrf/osrf_hash.h
trunk/src/libopensrf/osrf_hash.c
Log:
In osrfNewHash(): specify a size for the osrfList used as a hash table,
so as to avoid wasting memory.
In osrfHashSet(): rearranged the logic a bit for clarity; no change in
behavior.
In osrfHashIteratorNext(): added a bit of protection against a corrupted
iterator.
Throughout: added Doxygen-style comments for documentation. Removed
some comments from the header so that they wouldn't override more
complete comments from the implementation file.
Modified: trunk/include/opensrf/osrf_hash.h
===================================================================
--- trunk/include/opensrf/osrf_hash.h 2009-09-10 22:25:26 UTC (rev 1774)
+++ trunk/include/opensrf/osrf_hash.h 2009-09-11 19:38:48 UTC (rev 1775)
@@ -1,6 +1,18 @@
#ifndef OSRF_HASH_H
#define OSRF_HASH_H
+/**
+ @file osrf_hash.h
+ @brief A hybrid between a hash table and a doubly linked list.
+
+ The hash table supports random lookups by key. The list supports iterative traversals.
+ The sequence of entries in the list reflects the sequence in which the keys were added.
+
+ osrfHashIterators are somewhat unusual in that, if an iterator is positioned on a given
+ entry, deletion of that entry does not invalidate the iterator. The entry to which it
+ points is logically but not physically deleted. You can still advance the iterator to the
+ next entry in the list.
+*/
#include <opensrf/utils.h>
#include <opensrf/string_array.h>
#include <opensrf/osrf_list.h>
@@ -15,75 +27,32 @@
struct _osrfHashIteratorStruct;
typedef struct _osrfHashIteratorStruct osrfHashIterator;
-/**
- Allocates a new hash object
- */
osrfHash* osrfNewHash();
-/** Installs a callback function for freeing stored items
- */
void osrfHashSetCallback( osrfHash* hash, void (*callback) (char* key, void* item) );
-/**
- Sets the given key with the given item
- if "freeItem" is defined and an item already exists at the given location,
- then old item is freed and the new item is put into place.
- if "freeItem" is not defined and an item already exists, the old item
- is returned.
- @return The old item if exists and there is no 'freeItem', returns NULL
- otherwise
- */
void* osrfHashSet( osrfHash* hash, void* item, const char* key, ... );
-/**
- Removes an item from the hash.
- if 'freeItem' is defined it is used and NULL is returned,
- else the freed item is returned
- */
void* osrfHashRemove( osrfHash* hash, const char* key, ... );
void* osrfHashGet( osrfHash* hash, const char* key );
void* osrfHashGetFmt( osrfHash* hash, const char* key, ... );
-/**
- @return A list of strings representing the keys of the hash.
- caller is responsible for freeing the returned string array
- with osrfStringArrayFree();
- */
osrfStringArray* osrfHashKeys( osrfHash* hash );
-/**
- Frees a hash
- */
void osrfHashFree( osrfHash* hash );
-/**
- @return The number of items in the hash
- */
unsigned long osrfHashGetCount( osrfHash* hash );
-/**
- Creates a new list iterator with the given list
- */
osrfHashIterator* osrfNewHashIterator( osrfHash* hash );
int osrfHashIteratorHasNext( osrfHashIterator* itr );
-/**
- Returns the next non-NULL item in the list, return NULL when
- the end of the list has been reached
- */
void* osrfHashIteratorNext( osrfHashIterator* itr );
-/**
- Returns a pointer to the key of the current hash item
- */
const char* osrfHashIteratorKey( const osrfHashIterator* itr );
-/**
- Deallocates the given list
- */
void osrfHashIteratorFree( osrfHashIterator* itr );
void osrfHashIteratorReset( osrfHashIterator* itr );
Modified: trunk/src/libopensrf/osrf_hash.c
===================================================================
--- trunk/src/libopensrf/osrf_hash.c 2009-09-10 22:25:26 UTC (rev 1774)
+++ trunk/src/libopensrf/osrf_hash.c 2009-09-11 19:38:48 UTC (rev 1775)
@@ -1,3 +1,8 @@
+/**
+ @file osrf_hash.c
+ @brief A hybrid between a hash table and a doubly linked list.
+*/
+
/*
Copyright (C) 2007, 2008 Georgia Public Library Service
Bill Erickson <erickson at esilibrary.com>
@@ -11,61 +16,103 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
------------
-
-An osrfHash is a hybrid between a hash table and a doubly linked
-list. The hash table supports random lookups by key. The list
-supports iterative traversals. The sequence of entries in the
-list reflects the sequence in which the entries were added.
-
-osrfHashIterators are somewhat unusual in that, if an iterator
-is positioned on a given entry, deletion of that entry does
-not invalidate the iterator. The entry to which it points is
-logically but not physically deleted. You can still advance
-the iterator to the next entry in the list.
-
*/
#include <opensrf/osrf_hash.h>
+/**
+ @brief A node storing a single item within an osrfHash.
+*/
struct _osrfHashNodeStruct {
+ /** @brief String containing the key for the item */
char* key;
+ /** @brief Pointer to the stored item data */
void* item;
+ /** @brief Pointer to the next node in a doubly linked list */
struct _osrfHashNodeStruct* prev;
+ /** @brief Pointer to the previous node in a doubly linked list */
struct _osrfHashNodeStruct* next;
};
typedef struct _osrfHashNodeStruct osrfHashNode;
+/**
+ @brief osrfHash structure
+
+ An osrfHash is partly a key/value store based on a hash table, and partly a linear
+ structure implemented as a linked list.
+
+ The hash table is an array of pointers, managed by an osrfList. Each pointer in that array
+ points to another layer of osrfList, which stores pointers to osrfHashNodes for keys that
+ hash to the same value.
+
+ Besides residing in this hash table structure, each osrfHashNode resides in a doubly
+ linked list that includes all the osrfHashNodes in the osrfHash (except for any nodes
+ that have been logically deleted). New nodes are added to the end of the list.
+
+ The hash table supports lookups based on a key.
+
+ The linked list supports sequential traversal, reflecting the sequence in which the nodes
+ were added.
+*/
struct _osrfHashStruct {
- osrfList* hash; /* this hash */
- void (*freeItem) (char* key, void* item); /* callback for freeing stored items */
+ /** @brief Pointer to the osrfList used as a hash table */
+ osrfList* hash;
+ /** @brief Callback function for freeing stored items */
+ void (*freeItem) (char* key, void* item);
+ /** @brief How many items are in the osrfHash */
unsigned int size;
+ /** @brief Pointer to the first node in the linked list */
osrfHashNode* first_key;
+ /** @brief Pointer to the last node in the linked list */
osrfHashNode* last_key;
};
+/**
+ @brief Maintains a position in an osrfHash, for traversing the linked list
+*/
struct _osrfHashIteratorStruct {
+ /** @brief Pointer to the associated osrfHash */
osrfHash* hash;
+ /** @brief Pointer to the current osrfHashNode (the one previously returned, if any) */
osrfHashNode* curr_node;
};
+/**
+ @brief How many slots in the hash table.
+
+ Must be a power of 2, or the hashing algorithm won't work properly.
+*/
/* 0x100 is a good size for small hashes */
//#define OSRF_HASH_LIST_SIZE 0x100 /* size of the main hash list */
#define OSRF_HASH_LIST_SIZE 0x10 /* size of the main hash list */
/* used internally */
+/**
+ @brief Free an osrfHashNode, and its cargo, from within a given osrfHash.
+ @param h Pointer to the osrfHash
+ @param n Pointer to the osrfHashNode
+
+ If there is a callback function for freeing the item, call it.
+
+ We use this macro only when freeing an entire osrfHash.
+*/
#define OSRF_HASH_NODE_FREE(h, n) \
if(h && n) { \
if(h->freeItem && n->key) h->freeItem(n->key, n->item);\
free(n->key); free(n); \
}
+/**
+ @brief Create and initialize a new (and empty) osrfHash.
+ @return Pointer to the newly created osrfHash.
+
+ The calling code is responsible for freeing the osrfHash.
+*/
osrfHash* osrfNewHash() {
osrfHash* hash;
OSRF_MALLOC(hash, sizeof(osrfHash));
- hash->hash = osrfNewList();
+ hash->hash = osrfNewListSize( OSRF_HASH_LIST_SIZE );
hash->first_key = NULL;
hash->last_key = NULL;
return hash;
@@ -73,8 +120,6 @@
static osrfHashNode* osrfNewHashNode(char* key, void* item);
-/* algorithm proposed by Donald E. Knuth
- * in The Art Of Computer Programming Volume 3 (more or less..)*/
/*
static unsigned int osrfHashMakeKey(char* str) {
if(!str) return 0;
@@ -88,6 +133,14 @@
*/
/* macro version of the above function */
+/**
+ @brief Hashing algorithm: derive a mangled number from a string.
+ @param str Pointer to the string to be hashed.
+ @param num A numeric variable to receive the resulting value.
+
+ This macro implements an algorithm proposed by Donald E. Knuth
+ in The Art of Computer Programming Volume 3 (more or less..)
+*/
#define OSRF_HASH_MAKE_KEY(str,num) \
do {\
const char* k__ = str;\
@@ -99,13 +152,32 @@
num = (h__ & (OSRF_HASH_LIST_SIZE-1));\
} while(0)
-/* Installs a callback function for freeing stored items */
+/**
+ @brief Install a callback function for freeing a stored item.
+ @param hash The osrfHash in which the callback function is to be installed.
+ @param callback A function pointer.
+
+ The callback function must accept a key value and a void pointer, and return void. The
+ key value enables the callback function to treat different kinds of items differently,
+ provided that it can distinguish them by key.
+*/
void osrfHashSetCallback( osrfHash* hash, void (*callback) (char* key, void* item) )
{
if( hash ) hash->freeItem = callback;
}
/* Returns a pointer to the item's node if found; otherwise returns NULL. */
+/**
+ @brief Search for a given key in an osrfHash.
+ @param hash Pointer to the osrfHash.
+ @param key The key to be sought.
+ @param bucketkey Pointer through which we can report which slot the item belongs in.
+ @return A pointer to the osrfHashNode where the item resides; or NULL, if it isn't there.
+
+ If the bucketkey parameter is not NULL, we update the index of the hash bucket where the
+ item belongs (whether or not we actually found it). In some cases this feedback enables the
+ calling function to avoid hashing the same key twice.
+*/
static osrfHashNode* find_item( const osrfHash* hash,
const char* key, unsigned int* bucketkey ) {
@@ -123,7 +195,7 @@
return currnode;
}
-
+
unsigned int i = 0;
OSRF_HASH_MAKE_KEY(key,i);
@@ -134,7 +206,7 @@
if( !list ) { return NULL; }
// Search the sub-list
-
+
int k;
osrfHashNode* node = NULL;
for( k = 0; k < list->size; k++ ) {
@@ -146,6 +218,12 @@
return NULL;
}
+/**
+ @brief Create and populate a new osrfHashNode.
+ @param key The key string.
+ @param item A pointer to the item associated with the key.
+ @return A pointer to the newly created node.
+*/
static osrfHashNode* osrfNewHashNode(char* key, void* item) {
if(!(key && item)) return NULL;
osrfHashNode* n;
@@ -157,22 +235,36 @@
return n;
}
-/* If an entry exists for a given key, update it; otherwise create it.
- If an entry exists, and there is no callback function to destroy it,
- return a pointer to it so that the calling code has the option of
- destroying it. Otherwise return NULL.
+/**
+ @brief Store an item for a given key in an osrfHash.
+ @param hash Pointer to the osrfHash in which the item is to be stored.
+ @param item Pointer to the item to be stored.
+ @param key A printf-style format string to be expanded into the key for the item. Subsequent
+ parameters, if any, will be formatted and inserted into the expanded key.
+ @return Pointer to an item previously stored for the same key, if any (see discussion).
+
+ If no item is already stored for the same key, osrfHashSet creates a new entry for it,
+ and returns NULL.
+
+ If an item is already stored for the same key, osrfHashSet updates the entry in place. The
+ fate of the previously stored item varies. If there is a callback defined for freeing the
+ item, osrfHashSet calls it, and returns NULL. Otherwise it returns a pointer to the
+ previously stored item, so that the calling function can dispose of it.
+
+ osrfHashSet returns NULL if any of its first three parameters is NULL.
*/
void* osrfHashSet( osrfHash* hash, void* item, const char* key, ... ) {
if(!(hash && item && key )) return NULL;
- void* olditem = NULL;
unsigned int bucketkey;
-
+
VA_LIST_TO_STRING(key);
osrfHashNode* node = find_item( hash, VA_BUF, &bucketkey );
if( node ) {
// We already have an item for this key. Update it in place.
+ void* olditem = NULL;
+
if( hash->freeItem ) {
hash->freeItem( node->key, node->item );
}
@@ -182,7 +274,8 @@
node->item = item;
return olditem;
}
-
+
+ // There is no entry for this key. Create a new one.
osrfList* bucket;
if( !(bucket = OSRF_LIST_GET_INDEX(hash->hash, bucketkey)) ) {
bucket = osrfNewList();
@@ -203,8 +296,8 @@
hash->last_key->next = node;
hash->last_key = node;
}
-
- return olditem;
+
+ return NULL;
}
/* Delete the entry for a specified key. If the entry exists,
@@ -212,6 +305,29 @@
item, return a pointer to the formerly associated item.
Otherwise return NULL.
*/
+/**
+ @brief Remove the item for a specified key from an osrfHash.
+ @param hash Pointer to the osrfHash from which the item is to be removed.
+ @param key A printf-style format string to be expanded into the key for the item. Subsequent
+ parameters, if any, will be formatted and inserted into the expanded key.
+ @return Pointer to the removed item, if any (see discussion).
+
+ If no entry is present for the specified key, osrfHashRemove returns NULL.
+
+ If there is such an entry, osrfHashRemove removes it. The fate of the associated item
+ varies. If there is a callback function for freeing items, osrfHashRemove calls it, and
+ returns NULL. Otherwise it returns a pointer to the removed item, so that the calling
+ code can dispose of it.
+
+ osrfHashRemove returns NULL if either of its first two parameters is NULL.
+
+ Note: the osrfHashNode for the removed item is logically deleted so that subsequent searches
+ and traversals will ignore it. However it is physically left in place so that an
+ osrfHashIterator pointing to it can advance to the next node. The downside of this
+ design is that, if a lot of items are removed, the accumulation of logically deleted nodes
+ will slow down searches. In addition, the memory used by a logically deleted osrfHashNode
+ remains allocated until the entire osrfHashNode is freed.
+*/
void* osrfHashRemove( osrfHash* hash, const char* key, ... ) {
if(!(hash && key )) return NULL;
@@ -229,7 +345,7 @@
item = node->item;
// Mark the node as logically deleted
-
+
free(node->key);
node->key = NULL;
node->item = NULL;
@@ -247,18 +363,32 @@
node->next->prev = node->prev;
else
hash->last_key = node->prev;
-
+
return item;
}
+/**
+ @brief Fetch the item stored in an osrfHash for a given key.
+ @param hash Pointer to the osrfHash from which to fetch the item.
+ @param key The key for the item sought.
+ @return A pointer to the item, if it exists; otherwise NULL.
+*/
void* osrfHashGet( osrfHash* hash, const char* key ) {
if(!(hash && key )) return NULL;
+
osrfHashNode* node = find_item( hash, key, NULL );
if( !node ) return NULL;
return node->item;
}
+/**
+ @brief Fetch the item stored in an osrfHash for a given key.
+ @param hash Pointer to the osrfHash from which to fetch the item.
+ @param key A printf-style format string to be expanded into the key for the item. Subsequent
+ parameters, if any, will be formatted and inserted into the expanded key.
+ @return A pointer to the item, if it exists; otherwise NULL.
+ */
void* osrfHashGetFmt( osrfHash* hash, const char* key, ... ) {
if(!(hash && key )) return NULL;
VA_LIST_TO_STRING(key);
@@ -268,30 +398,57 @@
return node->item;
}
+/**
+ @brief Create an osrfStringArray containing all the keys in an osrfHash.
+ @param hash Pointer to the osrfHash whose keys are to be extracted.
+ @return A pointer to the newly created osrfStringArray.
+
+ The strings in the osrfStringArray are arranged in the sequence in which they were added to
+ the osrfHash.
+
+ The calling code is responsible for freeing the osrfStringArray by calling
+ osrfStringArrayFree().
+
+ osrfHashKeys returns NULL if its parameter is NULL.
+*/
osrfStringArray* osrfHashKeys( osrfHash* hash ) {
if(!hash) return NULL;
-
+
osrfHashNode* node;
osrfStringArray* strings = osrfNewStringArray( hash->size );
// Add every key on the linked list
-
+
node = hash->first_key;
while( node ) {
if( node->key ) // should always be true
osrfStringArrayAdd( strings, node->key );
node = node->next;
}
-
+
return strings;
}
+/**
+ @brief Count the items an osrfHash.
+ @param hash Pointer to the osrfHash whose items are to be counted.
+ @return The number of items in the osrfHash.
+
+ If the parameter is NULL, osrfHashGetCount returns -1.
+*/
unsigned long osrfHashGetCount( osrfHash* hash ) {
if(!hash) return -1;
return hash->size;
}
+/**
+ @brief Free an osrfHash and all of its contents.
+ @param hash Pointer to the osrfHash to be freed.
+
+ If a callback function has been defined for freeing items, osrfHashFree calls it for
+ each stored item.
+*/
void osrfHashFree( osrfHash* hash ) {
if(!hash) return;
@@ -314,6 +471,16 @@
free(hash);
}
+/**
+ @brief Create and initialize an osrfHashIterator for a given osrfHash.
+ @param hash Pointer to the osrfHash with which the iterator will be associated.
+ @return Pointer to the newly created osrfHashIterator.
+
+ An osrfHashIterator may be used to traverse the items stored in an osrfHash.
+
+ The calling code is responsible for freeing the osrfHashIterator by calling
+ osrfHashIteratorFree().
+*/
osrfHashIterator* osrfNewHashIterator( osrfHash* hash ) {
if(!hash) return NULL;
osrfHashIterator* itr;
@@ -323,11 +490,18 @@
return itr;
}
+/**
+ @brief Advance an osrfHashIterator to the next item in an osrfHash.
+ @param itr Pointer to the osrfHashIterator to be advanced.
+ @return A Pointer to the next stored item, if there is one; otherwise NULL.
+
+ The items are returned in the sequence in which their keys were added to the osrfHash.
+*/
void* osrfHashIteratorNext( osrfHashIterator* itr ) {
if(!(itr && itr->hash)) return NULL;
// Advance to the next node in the linked list
-
+
if( NULL == itr->curr_node )
itr->curr_node = itr->hash->first_key;
else
@@ -339,6 +513,15 @@
return NULL;
}
+/**
+ @brief Fetch the key where an osrfHashIterator is currently positioned.
+ @param itr Pointer to the osrfHashIterator.
+ @return A const pointer to the key, if the iterator is currently positioned on an item;
+ otherwise NULL.
+
+ The key returned is the one associated with the previous call to osrfHashIteratorNext(),
+ unless there has been a call to osrfHashIteratorReset() in the meanwhile.
+*/
const char* osrfHashIteratorKey( const osrfHashIterator* itr ) {
if( itr && itr->curr_node )
return itr->curr_node->key;
@@ -346,19 +529,37 @@
return NULL;
}
+/**
+ @brief Free an osrfHashIterator.
+ @param itr Pointer to the osrfHashIterator to be freed.
+*/
void osrfHashIteratorFree( osrfHashIterator* itr ) {
if(itr)
free(itr);
}
+/**
+ @brief Restore an osrfHashIterator to its original pristine state.
+ @param itr Pointer to the osrfHashIterator to be reset.
+
+ After resetting the iterator, the next call to osrfHashIteratorNext() will return a pointer
+ to the first item in the list.
+*/
void osrfHashIteratorReset( osrfHashIterator* itr ) {
if(!itr) return;
itr->curr_node = NULL;
}
+/**
+ @brief Determine whether there is another entry in an osrfHash beyond the current
+ position of an osrfHashIterator.
+ @param itr Pointer to the osrfHashIterator in question.
+ @return A boolean: 1 if there is another entry beyond the current position, or 0 if the
+ iterator is already at the last entry (or at no entry).
+*/
int osrfHashIteratorHasNext( osrfHashIterator* itr ) {
- if( !itr )
+ if( !itr || !itr->hash )
return 0;
else if( itr->curr_node )
return itr->curr_node->next ? 1 : 0;
More information about the opensrf-commits
mailing list