[Opensrf-commits] r1802 - in trunk: include/opensrf src/libopensrf (scottmk)

Fri Oct 2 12:34:16 EDT 2009

Author: scottmk
Date: 2009-10-02 12:34:12 -0400 (Fri, 02 Oct 2009)
New Revision: 1802

Modified:
   trunk/include/opensrf/osrf_json.h
   trunk/src/libopensrf/osrf_parse_json.c
Log:
1. Add doxygen-style comments to document functions in the 
recursive-descent JSON parser.  Tidy up or correct existing
comments here and there.

2. In report_error(): add the const qualifier to the
third parameter.

M    include/opensrf/osrf_json.h
M    src/libopensrf/osrf_parse_json.c


Modified: trunk/include/opensrf/osrf_json.h
===================================================================

--- trunk/include/opensrf/osrf_json.h	2009-09-29 05:13:38 UTC (rev 1801)
+++ trunk/include/opensrf/osrf_json.h	2009-10-02 16:34:12 UTC (rev 1802)
@@ -275,9 +275,9 @@
 
 	...and a newer series:
 
-	- jsonParseString();
-	- jsonParseStringRaw();
-	- jsonParseStringFmt();
+	- jsonParse();
+	- jsonParseRaw();
+	- jsonParseFmt();
 
 	The first series is based on a finite state machine.  Its innards are accessible, in
 	theory, through the jsonParserContext structure and through callback functions.  In
@@ -290,22 +290,47 @@
 */
 /*@{*/
 /**
-	@brief Parse a JSON string;
+	@brief Parse a JSON string, with translation to classname hints.
 	@param str Pointer to the JSON string to parse.
-	@return The resulting JSON object, or NULL on error.
+	@return A pointer to the resulting JSON object, or NULL on error.
+
+	If any node in the jsonObject tree is of type JSON_HASH, with a tag of JSON_CLASS_KEY
+	and another tag of JSON_DATA_KEY, the parser will collapse a level.  The subobject
+	tagged with JSON_DATA_KEY will replace the JSON_HASH, and the string tagged as
+	JSON_CLASS_KEY will be stored as its classname.
+
+	The calling code is responsible for freeing the resulting jsonObject.
 */
 jsonObject* jsonParseString( const char* str );
+
+/**
+	@brief Parse a JSON string, with no translation to classname hints.
+	@param str Pointer to the JSON string to parse.
+	@return A pointer to the resulting JSON object, or NULL on error.
+
+	This function is similar to jsonParseString(), except that it does not give any special
+	treatment to a JSON_HASH with tags JSON_CLASS_KEY or JSON_DATA_KEY.
+
+	The calling code is responsible for freeing the resulting jsonObject.
+*/
 jsonObject* jsonParseStringRaw( const char* str );
+
+/**
+	@brief Parse a JSON string received as a printf-style format string.
+	@param str A printf-style format string.  Subsequent arguments, if any, are formatted
+		and inserted into the JSON string before parsing.
+	@return A pointer to the resulting JSON object, or NULL on error.
+
+	Unlike jsonParseString(), this function does not give any special treatment to a
+	JSON_HASH with tags JSON_CLASS_KEY or JSON_DATA_KEY.
+
+	The calling code is responsible for freeing the resulting jsonObject.
+*/
 jsonObject* jsonParseStringFmt( const char* str, ... );
 
-/**
-	@brief Parse a JSON string;
-	@param s Pointer to the JSON string to parse.
-	@return The resulting JSON object, or NULL on error.
- */
-jsonObject* jsonParse( const char* s );
-jsonObject* jsonParseRaw( const char* s );
-jsonObject* jsonParseFmt( const char* str, ... );
+jsonObject* jsonParse( const char* str );
+jsonObject* jsonParseRaw( const char* str );
+jsonObject* jsonParseFmt( const char* s, ... );
 /*@}*/
 
 /**
@@ -313,7 +338,7 @@
 	@param errorHandler A function pointer to an error-handling function.
 	@param str The string to parse.
 	@return The resulting JSON object, or NULL on error.
- */
+*/
 jsonObject* jsonParseStringHandleError( void (*errorHandler) (const char*), char* str, ... );
 
 jsonObject* jsonNewObject(const char* data);

Modified: trunk/src/libopensrf/osrf_parse_json.c
===================================================================
--- trunk/src/libopensrf/osrf_parse_json.c	2009-09-29 05:13:38 UTC (rev 1801)
+++ trunk/src/libopensrf/osrf_parse_json.c	2009-10-02 16:34:12 UTC (rev 1802)
@@ -13,6 +13,11 @@
 GNU General Public License for more details.
 */
 
+/**
+	@file osrf_parse_json.c
+	@brief  Recursive descent parser for JSON.
+*/
+
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
@@ -20,14 +25,27 @@
 #include <opensrf/osrf_json.h>
 #include <opensrf/osrf_json_utils.h>
 
+/**
+	@brief A collection of things the parser uses to keep track of what it's doing.
+*/
 typedef struct {
-	growing_buffer* str_buf;  // for building strings
-	size_t index;             // index into buffer
-	const char* buff;         // client's buffer holding current chunk of input
+	growing_buffer* str_buf;  /**< for building strings */
+	size_t index;             /**< index into input buffer */
+	const char* buff;         /**< client's buffer holding current chunk of input */
 } Parser;
 
-// For building Unicode byte sequences
+/**
+	@brief A small buffer for building Unicode byte sequences.
+
+	Because we pass a Unibuff* instead of a bare char*, the receiving function doesn't
+	have to worry about the size of the supplied buffer.  The size is known.
+*/
 typedef struct {
+	/** @brief A small working buffer.
+
+		We fill the buffer with four hex characters, and then transform them into a byte
+		sequence up to three bytes long (plus terminal nul) encoding a UTF-8 character.
+	*/
 	unsigned char buff[ 4 ];
 } Unibuff;
 
@@ -46,12 +64,22 @@
 static char skip_white_space( Parser* parser );
 static inline void parser_ungetc( Parser* parser );
 static inline char parser_nextc( Parser* parser );
-static void report_error( Parser* parser, char badchar, char* err );
+static void report_error( Parser* parser, char badchar, const char* err );
 
 /* ------------------------------------- */
 
-// Parse a JSON string; expand classes; construct a jsonObject.
-// Return NULL if the JSON string is invalid.
+/**
+	@brief Parse a JSON string, with translation to classname hints.
+	@param str Pointer to the JSON string to parse.
+	@return A pointer to the resulting JSON object, or NULL on error.
+
+	If any node in the jsonObject tree is of type JSON_HASH, with a tag of JSON_CLASS_KEY
+	and another tag of JSON_DATA_KEY, the parser will collapse a level.  The subobject
+	tagged with JSON_DATA_KEY will replace the JSON_HASH, and the string tagged as
+	JSON_CLASS_KEY will be stored as its classname.
+
+	The calling code is responsible for freeing the resulting jsonObject.
+*/
 jsonObject* jsonParse( const char* str ) {
 	if(!str)
 		return NULL;
@@ -67,8 +95,17 @@
 	return obj2;
 }
 
-// Parse a JSON string with variable arguments; construct a jsonObject.
-// Return NULL if the resulting JSON string is invalid.
+/**
+	@brief Parse a JSON string received as a printf-style format string.
+	@param str A printf-style format string.  Subsequent arguments, if any, are formatted
+		and inserted into the JSON string before parsing.
+	@return A pointer to the resulting JSON object, or NULL on error.
+
+	Unlike jsonParse(), this function does not give any special treatment to a JSON_HASH
+	with tags JSON_CLASS_KEY or JSON_DATA_KEY.
+
+	The calling code is responsible for freeing the resulting jsonObject.
+*/
 jsonObject* jsonParseFmt( const char* str, ... ) {
 	if( !str )
 		return NULL;
@@ -76,8 +113,16 @@
 	return jsonParseRaw( VA_BUF );
 }
 
-// Parse a JSON string; construct a jsonObject.
-// Return NULL if the JSON string is invalid.
+/**
+	@brief Parse a JSON string, with no translation to classname hints.
+	@param s Pointer to the JSON string to parse.
+	@return A pointer to the resulting JSON object, or NULL on error.
+
+	This function is similar to jsonParse(), except that it does not give any special
+	treatment to a JSON_HASH with tags JSON_CLASS_KEY or JSON_DATA_KEY.
+
+	The calling code is responsible for freeing the resulting jsonObject.
+*/
 jsonObject* jsonParseRaw( const char* s ) {
 
 	if( !s || !*s )
@@ -95,7 +140,18 @@
 	return obj;
 }
 
-// Parse a text string into a jsonObject.
+/**
+	@brief Parse a JSON string into a jsonObject.
+	@param parser Pointer to a Parser.
+	@return Pointer to the newly created jsonObject.
+
+	Call get_json_thing() to do the real work, then make sure that there's nothing but
+	white spaqce at the end.
+
+	Currently we call this function only from jsonParseRaw(), and its code could have been
+	incorporated there in-line.  Having it in a separate function is intended to make
+	certain future developments easier.
+*/
 static jsonObject* parse( Parser* parser ) {
 
 	if( ! parser->buff ) {
@@ -115,8 +171,18 @@
 	return obj;
 }
 
-// Get the next JSON node -- be it string, number, hash, or whatever.
-// Return a pointer to it if successful, or NULL if not.
+/**
+	@brief Get the next JSON node -- be it string, number, hash, or whatever.
+	@param parser Pointer to a Parser.
+	@param firstc The first character in the part that we're parsing.
+	@return Pointer to the next JSON node, or NULL upon error.
+
+	The first character tells us what kind of thing we're parsing next: a string, an array,
+	a hash, a number, a boolean, or a null.  Branch accordingly.
+
+	In the case of an array or a hash, this function indirectly calls itself in order to
+	parse subordinate nodes.
+*/
 static jsonObject* get_json_thing( Parser* parser, char firstc ) {
 
 	jsonObject* obj = NULL;
@@ -154,9 +220,17 @@
 	return obj;
 }
 
-// Collect characters from the input stream into a character
-// string, terminated by '"'.  Return a char* if successful,
-// or NULL if not.
+/**
+	@brief Collect characters into a character string.
+	@param parser Pointer to a Parser.
+	@return Pointer to parser->str_buf if successful, or NULL upon error.
+
+	Translate the usual escape sequences.  In particular, "\u" escapes a sequence of four
+	hex characters; turn the hex into the corresponding UTF-8 byte sequence.
+
+	Return the string we have built, without the enclosing quotation marks, in
+	parser->str_buf.  In case of error, log an error message.
+*/
 static const char* get_string( Parser* parser ) {
 
 	if( parser->str_buf )
@@ -167,8 +241,6 @@
 	growing_buffer* gb = parser->str_buf;
 
 	// Collect the characters.
-	// This is a naive implementation so far.
-	// We need to worry about UTF-8.
 	for( ;; ) {
 		char c = parser_nextc( parser );
 		if( '"' == c )
@@ -180,9 +252,9 @@
 		} else if( '\\' == c ) {
 			c = parser_nextc( parser );
 			switch( c ) {
-				case '"'  : OSRF_BUFFER_ADD_CHAR( gb, '"'  );  break;
+				case '"'  : OSRF_BUFFER_ADD_CHAR( gb, '"'  ); break;
 				case '\\' : OSRF_BUFFER_ADD_CHAR( gb, '\\' ); break;
-				case '/'  : OSRF_BUFFER_ADD_CHAR( gb, '/'  );  break;
+				case '/'  : OSRF_BUFFER_ADD_CHAR( gb, '/'  ); break;
 				case 'b'  : OSRF_BUFFER_ADD_CHAR( gb, '\b' ); break;
 				case 'f'  : OSRF_BUFFER_ADD_CHAR( gb, '\f' ); break;
 				case 'n'  : OSRF_BUFFER_ADD_CHAR( gb, '\n' ); break;
@@ -210,10 +282,20 @@
 	return OSRF_BUFFER_C_STR( gb );
 }
 
-// We found what looks like the first character of a number.
-// Collect all the eligible characters, and verify that they
-// are numeric (possibly after some scrubbing).  Return a
-// pointer to a JSON_NUMBER if successful, or NULL if not.
+/**
+	@brief Collect characters into a number, and create a JSON_NUMBAER for it.
+	@param parser Pointer to a parser.
+	@param firstc The first character in the number.
+	@return Pointer to a newly created jsonObject of type JSON_NUMBER, or NULL upon error.
+
+	Collect digits, signs, decimal points, and 'E' or 'e' (for scientific notation) into
+	a buffer.  Make sure that the result is numeric.  If it's not numeric by strict JSON
+	rules, try to make it numeric by some judicious massaging (we aren't quite as strict
+	as the official JSON rules).
+
+	If successful, construct a jsonObject of type JSON_NUMBER containing the resulting
+	numeric string.  Otherwise log an error message and return NULL.
+*/
 static jsonObject* get_number( Parser* parser, char firstc ) {
 
 	growing_buffer* gb = buffer_init( 32 );
@@ -256,7 +338,17 @@
 	return obj;
 }
 
-// We found a '['.  Create a JSON_ARRAY with all its subordinates.
+/**
+	@brief Parse an array, and create a JSON_ARRAY for it.
+	@param parser Pointer to a Parser.
+	@return Pointer to a newly created jsonObject of type JSON_ARRAY, or NULL upon error.
+
+	Look for a series of JSON nodes, separated by commas and terminated by a right square
+	bracket.  Parse each node recursively, collect them all into a newly created jsonObject
+	of type JSON_ARRAY, and return a pointer to the result.
+
+	Upon error, log an error message and return NULL.
+*/
 static jsonObject* get_array( Parser* parser ) {
 
 	jsonObject* array = jsonNewObjectType( JSON_ARRAY );
@@ -290,7 +382,20 @@
 	return array;
 }
 
-// We found '{' Get a JSON_HASH, with all its subordinates.
+/**
+	@brief Parse a hash (JSON object), and create a JSON_HASH for it.
+	@param parser Pointer to a Parser.
+	@return Pointer to a newly created jsonObject of type JSON_HASH, or NULL upon error.
+
+	Look for a series of name/value pairs, separated by commas and terminated by a right
+	curly brace.  Each name/value pair consists of a quoted string, followed by a colon,
+	followed a JSON node of any sort.  Parse the value recursively.
+
+	Collect the name/value pairs into a newly created jsonObject of type JSON_ARRAY, and
+	return a pointer to it.
+
+	Upon error, log an error message and return NULL.
+*/
 static jsonObject* get_hash( Parser* parser ) {
 	jsonObject* hash = jsonNewObjectType( JSON_HASH );
 
@@ -359,8 +464,17 @@
 	return hash;
 }
 
-// We found an 'n'.  Verify that the next four characters are "ull",
-// and that there are no further characters in the token.
+/**
+	@brief Parse the JSON keyword "null", and create a JSON_NULL for it.
+	@param parser Pointer to a Parser.
+	@return Pointer to a newly created jsonObject of type JSON_NULL, or NULL upon error.
+
+	We already saw an 'n', or we wouldn't be here.  Make sure that the next three characters
+	are 'u', 'l', and 'l', and that the character after that is not a letter or a digit.
+
+	If all goes well, create a jsonObject of type JSON_NULL, and return a pointer to it.
+	Otherwise log an error message and return NULL.
+*/
 static jsonObject* get_null( Parser* parser ) {
 
 	if( parser_nextc( parser ) != 'u' ||
@@ -371,24 +485,31 @@
 		return NULL;
 	}
 
-	// Sneak a peek at the next character
-	// to make sure that it's kosher
+	// Peek at the next character to make sure that it's kosher
 	char c = parser_nextc( parser );
 	if( ! isspace( (unsigned char) c ) )
 		parser_ungetc( parser );
 
 	if( isalnum( (unsigned char) c ) ) {
-		report_error( parser, c,
-				"Found letter or number after \"null\"" );
+		report_error( parser, c, "Found letter or number after \"null\"" );
 		return NULL;
 	}
 
-	// Everythings okay.  Return a JSON_BOOL.
+	// Everything's okay.  Return a JSON_NULL.
 	return jsonNewObject( NULL );
 }
 
-// We found a 't'.  Verify that the next four characters are "rue",
-// and that there are no further characters in the token.
+/**
+	@brief Parse the JSON keyword "true", and create a JSON_BOOL for it.
+	@param parser Pointer to a Parser.
+	@return Pointer to a newly created jsonObject of type JSON_BOOL, or NULL upon error.
+
+	We already saw a 't', or we wouldn't be here.  Make sure that the next three characters
+	are 'r', 'u', and 'e', and that the character after that is not a letter or a digit.
+
+	If all goes well, create a jsonObject of type JSON_BOOL, and return a pointer to it.
+	Otherwise log an error message and return NULL.
+*/
 static jsonObject* get_true( Parser* parser ) {
 
 	if( parser_nextc( parser ) != 'r' ||
@@ -399,24 +520,31 @@
 		return NULL;
 	}
 
-	// Sneak a peek at the next character
-	// to make sure that it's kosher
+	// Peek at the next character to make sure that it's kosher
 	char c = parser_nextc( parser );
 	if( ! isspace( (unsigned char) c ) )
 		parser_ungetc( parser );
 
 	if( isalnum( (unsigned char) c ) ) {
-		report_error( parser, c,
-				"Found letter or number after \"true\"" );
+		report_error( parser, c, "Found letter or number after \"true\"" );
 		return NULL;
 	}
 
-	// Everythings okay.  Return a JSON_NULL.
+	// Everything's okay.  Return a JSON_BOOL.
 	return jsonNewBoolObject( 1 );
 }
 
-// We found an 'f'.  Verify that the next four characters are "alse",
-// and that there are no further characters in the token.
+/**
+	@brief Parse the JSON keyword "false", and create a JSON_BOOL for it.
+	@param parser Pointer to a Parser.
+	@return Pointer to a newly created jsonObject of type JSON_BOOL, or NULL upon error.
+
+	We already saw a 'f', or we wouldn't be here.  Make sure that the next four characters
+	are 'a', 'l', 's', and 'e', and that the character after that is not a letter or a digit.
+
+	If all goes well, create a jsonObject of type JSON_BOOL, and return a pointer to it.
+	Otherwise log an error message and return NULL.
+*/
 static jsonObject* get_false( Parser* parser ) {
 
 	if( parser_nextc( parser ) != 'a' ||
@@ -428,24 +556,51 @@
 		return NULL;
 	}
 
-	// Sneak a peek at the next character
-	// to make sure that it's kosher
+	// Peek at the next character to make sure that it's kosher
 	char c = parser_nextc( parser );
 	if( ! isspace( (unsigned char) c ) )
 		parser_ungetc( parser );
 
 	if( isalnum( (unsigned char) c ) ) {
-		report_error( parser, c,
-				"Found letter or number after \"false\"" );
+		report_error( parser, c, "Found letter or number after \"false\"" );
 		return NULL;
 	}
 
-	// Everythings okay.  Return a JSON_BOOL.
+	// Everything's okay.  Return a JSON_BOOL.
 	return jsonNewBoolObject( 0 );
 }
 
-// We found \u.  Grab the next 4 characters, confirm that they are hex,
-// and convert them to Unicode.
+/**
+	@brief Convert a hex digit to the corresponding numeric value.
+	@param x A hex digit
+	@return The corresponding numeric value.
+
+	Warning #1: The calling code must ensure that the character to be converted is, in fact,
+	a hex character.  Otherwise the results will be strange.
+
+	Warning #2. This macro evaluates its argument three times.  Beware of side effects.
+	(It might make sense to convert this macro to a static inline function.)
+
+	Warning #3: This code assumes that the characters [a-f] and [A-F] are contiguous in the
+	execution character set, and that the lower 4 bits for 'a' and 'A' are 0001.  Those
+	assumptions are true for ASCII and EBCDIC, but there may be some character sets for
+	which it is not true.
+*/
+#define hexdigit(x) ( ((x) <= '9') ? (x) - '0' : ((x) & 7) + 9)
+
+/**
+	@brief Translate the next four characters into a UTF-8 character.
+	@param parser Pointer to a Parser.
+	@param unibuff Pointer to a small buffer in which to return the results.
+	@return 0 if successful, or 1 if not.
+
+	Collect the next four characters into @a unibuff, and make sure that they're all hex.
+	Translate them into a nul-terminated UTF-8 byte sequence, and return the result via
+	@a unibuff.
+
+	(Note that a UTF-8 byte sequence is guaranteed not to contain a nul byte.  Hence using
+	a nul as a terminator creates no ambiguity.)
+*/
 static int get_utf8( Parser* parser, Unibuff* unibuff ) {
 	char ubuff[ 5 ];
 	int i = 0;
@@ -470,9 +625,8 @@
 	/* The following code is adapted with permission from
 	 * json-c http://oss.metaparadigm.com/json-c/
 	 */
-	#define hexdigit(x) ( ((x) <= '9') ? (x) - '0' : ((x) & 7) + 9)
 
-	// Convert the hex sequence into a single integer
+	// Convert the hex sequence to a single integer
 	unsigned int ucs_char =
 			(hexdigit(ubuff[ 0 ]) << 12) +
 			(hexdigit(ubuff[ 1 ]) <<  8) +
@@ -500,7 +654,11 @@
 	return 0;
 }
 
-// Return the next non-whitespace character in the input stream.
+/**
+	@brief Skip over white space.
+	@param parser Pointer to a Parser.
+	@return The next non-whitespace character.
+*/
 static char skip_white_space( Parser* parser ) {
 	char c;
 	do {
@@ -510,22 +668,41 @@
 	return c;
 }
 
-// Put a character back into the input stream.
-// It is the responsibility of the caller not to back up
-// past the beginning of the input string.
+/**
+	@brief Back up by one character.
+	@param parser Pointer to a Parser.
+
+	Decrement an index into the input string.  We don't guard against a negative index, so
+	the calling code should make sure that it doesn't do anything stupid.
+*/
 static inline void parser_ungetc( Parser* parser ) {
 	--parser->index;
 }
 
-// Get the next character.  It is the responsibility of
-//the caller not to read past the end of the input string.
+/**
+	@brief Get the next character
+	@param parser Pointer to a Parser.
+	@return The next character.
+
+	Increment an index into the input string and return the corresponding character.
+	The calling code should make sure that it doesn't try to read past the terminal nul.
+*/
 static inline char parser_nextc( Parser* parser ) {
 	return parser->buff[ parser->index++ ];
 }
 
-// Report a syntax error to standard error.
-static void report_error( Parser* parser, char badchar, char* err ) {
+/**
+	@brief Report a syntax error to the log.
+	@param parser Pointer to a Parser.
+	@param badchar The character at the position where the error was detected.
+	@param err Pointer to a descriptive error message.
 
+	Format and log an error message.  Identify the location of the error and
+	the character at that location.  Show the neighborhood of the error within
+	the input string.
+*/
+static void report_error( Parser* parser, char badchar, const char* err ) {
+
 	// Determine the beginning and ending points of a JSON
 	// fragment to display, from the vicinity of the error
 
@@ -545,7 +722,6 @@
 	}
 
 	// Copy the fragment into a buffer
-
 	int len = post - pre + 1;  // length of fragment
 	char buf[len + 1];
 	memcpy( buf, parser->buff + pre, len );