[Opensrf-commits] r1199 - in trunk: include/opensrf src/libopensrf

svn at svn.open-ils.org svn at svn.open-ils.org
Sat Jan 5 14:31:09 EST 2008


Author: miker
Date: 2008-01-05 14:07:02 -0500 (Sat, 05 Jan 2008)
New Revision: 1199

Modified:
   trunk/include/opensrf/osrf_json.h
   trunk/include/opensrf/osrf_json_utils.h
   trunk/src/libopensrf/osrf_json_object.c
   trunk/src/libopensrf/osrf_json_parser.c
Log:
Patch from Scott McKellar:

These patches are the culmination of several postings on this subject.
The overall effect is to store numbers in jsonObjects as strings,
rather than as doubles, in order to avoid needless loss of precision
in translating back and forth between text and floating point
representations.

I shall not repeat the details outlined in previous posts, but rather
focus on what's new:

1. A new extern function jsonNewNumberStringObject constructs a
JSON_NUMBER from a character string.  If the string is not numeric
according to JSON rules, the function returns NULL.

2. A new extern function jsonScrubNumber accepts a character string
and reformats it, if possible, into a numeric string that is valid
according to JSON rules.  For example, it transforms "  +00.42"
into "0.42".  The transformed string is returned as a char* that
the caller is responsible for freeing.

jsonScrubNumber performs this transformation by manipulating text,
not by passing the value through a double.  Therefore it can handle
numbers that would be too long, too large, or too small for strtod()
and its kindred to handle.

It accepts leading white space and scientific notation, but not
trailing white space, hex, or octal.

If the input string is not numeric, jsonScrubNumber returns NULL.

3. The doubleToString function now translates the incoming double
to a character string with up to 30 decimal digits of precision.
That should be enough to minimize the impact on existing code,
depending of course on how faithfully snprintf() does the formatting.

4. In osrf_json.h: I changed the signature of the next-to-last
function pointer in a jsonParserHandler, so that it accepts a
character pointer instead of a double.  Likewise for the corresponding
declaration of _jsonHandleNumber in osrf_json_utils.h.

5. In osrf_json_parser.c: I construct a JSON_NUMBER from the input
character string without passing it through a double.  If the input
character string is not valid according to JSON rules, I try to use
the new jsonScrubNumber() to normalize the formatting so that JSON
can accept it.



Modified: trunk/include/opensrf/osrf_json.h
===================================================================
--- trunk/include/opensrf/osrf_json.h	2008-01-03 22:28:52 UTC (rev 1198)
+++ trunk/include/opensrf/osrf_json.h	2008-01-05 19:07:02 UTC (rev 1199)
@@ -94,7 +94,7 @@
 	void (*handleNull)			(void* userData);
 	void (*handleString)			(void* userData, char* string);
 	void (*handleBool)			(void* userData, int boolval);
-	void (*handleNumber)			(void* userData, double num);
+	void (*handleNumber)		(void* userData, const char* numstr);
 	void (*handleError)			(void* userData, char* err, ...);
 };
 typedef struct jsonParserHandlerStruct jsonParserHandler;
@@ -185,10 +185,14 @@
 jsonObject* jsonNewObjectType(int type);
 
 /**
- * Creates a new number object
+ * Creates a new number object from a double
  */
 jsonObject* jsonNewNumberObject( double num );
 
+/**
+ * Creates a new number object from a numeric string
+ */
+jsonObject* jsonNewNumberStringObject( const char* numstr );
 
 /**
  * Creates a new json bool
@@ -297,6 +301,7 @@
 
 /* sets the number value for the object */
 void jsonObjectSetNumber(jsonObject* dest, double num);
+int jsonObjectSetNumberString(jsonObject* dest, const char* string);
 
 /* sets the class hint for this object */
 void jsonObjectSetClass(jsonObject* dest, const char* classname );
@@ -319,8 +324,27 @@
 	*/
 char* jsonObjectToSimpleString( const jsonObject* o );
 
+/**
+ Allocate a buffer and format a specified numeric value into it,
+ with up to 30 decimal digits of precision.   Caller is responsible
+ for freeing the buffer.
+ **/
+char* doubleToString( double num );
 
+/**
+ Return 1 if the string is numeric, otherwise return 0.
+ This validation follows the rules defined by the grammar at:
+ http://www.json.org/
+ **/
+int jsonIsNumeric( const char* s );
 
+/**
+ Allocate and reformat a numeric string into one that is valid
+ by JSON rules.  If the string is not numeric, return NULL.
+ Caller is responsible for freeing the buffer.
+ **/
+char* jsonScrubNumber( const char* s );
+
 /* provides an XPATH style search interface (e.g. /some/node/here) and 
 	return the object at that location if one exists.  Naturally,  
 	every element in the path must be a proper object ("hash" / {}).

Modified: trunk/include/opensrf/osrf_json_utils.h
===================================================================
--- trunk/include/opensrf/osrf_json_utils.h	2008-01-03 22:28:52 UTC (rev 1198)
+++ trunk/include/opensrf/osrf_json_utils.h	2008-01-05 19:07:02 UTC (rev 1199)
@@ -46,7 +46,7 @@
 void _jsonHandleNull(void*);
 void _jsonHandleString(void*, char* string);
 void _jsonHandleBool(void*, int boolval);
-void _jsonHandleNumber(void*, double num);
+void _jsonHandleNumber(void*, const char* numstr);
 void _jsonHandleError(void*, char* str, ...);
 
 struct jsonInternalParserStruct {

Modified: trunk/src/libopensrf/osrf_json_object.c
===================================================================
--- trunk/src/libopensrf/osrf_json_object.c	2008-01-03 22:28:52 UTC (rev 1198)
+++ trunk/src/libopensrf/osrf_json_object.c	2008-01-05 19:07:02 UTC (rev 1199)
@@ -13,6 +13,9 @@
 GNU General Public License for more details.
 */
 
+#include <stdlib.h>
+#include <ctype.h>
+#include <errno.h>
 #include <limits.h>
 #include <opensrf/log.h>
 #include <opensrf/osrf_json.h>
@@ -27,7 +30,7 @@
 } else if( _obj_->type == JSON_ARRAY && newtype != JSON_ARRAY ) {	\
 		osrfListFree(_obj_->value.l);			\
 		_obj_->value.l = NULL;					\
-} else if( _obj_->type == JSON_STRING && newtype != JSON_STRING ) { \
+} else if( _obj_->type == JSON_STRING || _obj_->type == JSON_NUMBER ) { \
 		free(_obj_->value.s);						\
 		_obj_->value.s = NULL;					\
 } \
@@ -137,10 +140,25 @@
 jsonObject* jsonNewNumberObject( double num ) {
 	jsonObject* o = jsonNewObject(NULL);
 	o->type = JSON_NUMBER;
-	o->value.n = num;
+	o->value.s = doubleToString( num );
 	return o;
 }
 
+/**
+ * Creates a new number object from a numeric string
+ */
+jsonObject* jsonNewNumberStringObject( const char* numstr ) {
+	if( !numstr )
+		numstr = "0";
+	else if( !jsonIsNumeric( numstr ) )
+		return NULL;
+
+	jsonObject* o = jsonNewObject(NULL);
+	o->type = JSON_NUMBER;
+	o->value.s = strdup( numstr );
+	return o;
+}
+
 jsonObject* jsonNewBoolObject(int val) {
     jsonObject* o = jsonNewObject(NULL);
     o->type = JSON_BOOL;
@@ -163,6 +181,7 @@
 		case JSON_HASH		: osrfHashFree(o->value.h); break;
 		case JSON_ARRAY	: osrfListFree(o->value.l); break;
 		case JSON_STRING	: free(o->value.s); break;
+		case JSON_NUMBER	: free(o->value.s); break;
 	}
 
 	// Stick the old jsonObject onto a free list
@@ -261,15 +280,8 @@
 			break;
 
 		case JSON_NUMBER: {
-			double x = obj->value.n;
-			if( x <= INT_MAX && x >= INT_MIN && x == (int) x ) {
-				INT_TO_STRING((int)x);
-				OSRF_BUFFER_ADD(buf, INTSTR);
-
-			} else {
-				DOUBLE_TO_STRING(x);
-				OSRF_BUFFER_ADD(buf, DOUBLESTR);
-			}
+			if(obj->value.s) OSRF_BUFFER_ADD( buf, obj->value.s );
+			else OSRF_BUFFER_ADD_CHAR( buf, '0' );
 			break;
 		}
 
@@ -389,25 +401,74 @@
 	return -1;
 }
 
+/**
+ Allocate a buffer and format a specified numeric value into it.
+ Caller is responsible for freeing the buffer.
+**/
+char* doubleToString( double num ) {
+	
+	char buf[ 64 ];
+	size_t len = snprintf(buf, sizeof( buf ), "%.30g", num) + 1;
+	if( len < sizeof( buf ) )
+		return strdup( buf );
+	else
+	{
+		// Need a bigger buffer (should never be necessary)
+		
+		char* bigger_buff = safe_malloc( len + 1 );
+		(void) snprintf(bigger_buff, len + 1, "%.30g", num);
+		return bigger_buff;
+	}
+}
+
 char* jsonObjectGetString(const jsonObject* obj) {
-	return (obj && obj->type == JSON_STRING) ? obj->value.s : NULL;
+	if(obj)
+	{
+		if( obj->type == JSON_STRING )
+			return obj->value.s;
+		else if( obj->type == JSON_NUMBER )
+			return obj->value.s ? obj->value.s : "0";
+		else
+			return NULL;
+	}
+	else
+		return NULL;
 }
 
 double jsonObjectGetNumber( const jsonObject* obj ) {
-	return (obj && obj->type == JSON_NUMBER) ? obj->value.n : 0;
+	return (obj && obj->type == JSON_NUMBER && obj->value.s)
+			? strtod( obj->value.s, NULL ) : 0;
 }
 
 void jsonObjectSetString(jsonObject* dest, const char* string) {
 	if(!(dest && string)) return;
 	JSON_INIT_CLEAR(dest, JSON_STRING);
-	free(dest->value.s);
 	dest->value.s = strdup(string);
 }
 
+/**
+ Turn a jsonObject into a JSON_NUMBER (if it isn't already one) and store
+ a specified numeric string in it.  If the string is not numeric,
+ store the equivalent of zero, and return an error status.
+**/
+int jsonObjectSetNumberString(jsonObject* dest, const char* string) {
+	if(!(dest && string)) return -1;
+	JSON_INIT_CLEAR(dest, JSON_NUMBER);
+
+	if( jsonIsNumeric( string ) ) {
+		dest->value.s = strdup(string);
+		return 0;
+	}
+	else {
+		dest->value.s = NULL;  // equivalent to zero
+		return -1;
+	}
+}
+
 void jsonObjectSetNumber(jsonObject* dest, double num) {
 	if(!dest) return;
 	JSON_INIT_CLEAR(dest, JSON_NUMBER);
-	dest->value.n = num;
+	dest->value.s = doubleToString( num );
 }
 
 void jsonObjectSetClass(jsonObject* dest, const char* classname ) {
@@ -438,7 +499,8 @@
             result = jsonNewObject(jsonObjectGetString(o));
             break;
         case JSON_NUMBER:
-            result = jsonNewNumberObject(jsonObjectGetNumber(o));
+			result = jsonNewObject( o->value.s );
+			result->type = JSON_NUMBER;
             break;
         case JSON_BOOL:
             result = jsonNewBoolObject(jsonBoolIsTrue((jsonObject*) o));
@@ -479,25 +541,230 @@
 
 	switch( o->type ) {
 
-		case JSON_NUMBER: {
+		case JSON_NUMBER:
+			value = strdup( o->value.s ? o->value.s : "0" );
+			break;
 
-			if( o->value.n == (int) o->value.n ) {
-				INT_TO_STRING((int) o->value.n);	
-				value = strdup(INTSTR);
+		case JSON_STRING:
+			value = strdup(o->value.s);
+	}
 
-			} else {
-				DOUBLE_TO_STRING(o->value.n);
-				value = strdup(DOUBLESTR);
+	return value;
+}
+
+/**
+ Return 1 if the string is numeric, otherwise return 0.
+ This validation follows the rules defined by the grammar at:
+ http://www.json.org/
+ **/
+int jsonIsNumeric( const char* s ) {
+
+	if( !s || !*s ) return 0;
+
+	const char* p = s;
+
+	// skip leading minus sign, if present (leading plus sign not allowed)
+
+	if( '-' == *p )
+		++p;
+
+	// There must be at least one digit to the left of the decimal
+
+	if( isdigit( (unsigned char) *p ) ) {
+		if( '0' == *p++ ) {
+
+			// If the first digit is zero, it must be the
+			// only digit to the lerft of the decimal
+
+			if( isdigit( (unsigned char) *p ) )
+				return 0;
+		}
+		else {
+
+			// Skip oer the following digits
+
+			while( isdigit( (unsigned char) *p ) ) ++p;
+		}
+	}
+	else
+		return 0;
+
+	if( !*p )
+		return 1;             // integer
+
+	if( '.' == *p ) {
+
+		++p;
+
+		// If there is a decimal point, there must be
+		// at least one digit to the right of it
+
+		if( isdigit( (unsigned char) *p ) )
+			++p;
+		else
+			return 0;
+
+		// skip over contiguous digits
+
+		while( isdigit( (unsigned char) *p ) ) ++p;
+	}
+
+	if( ! *p )
+		return 1;  // decimal fraction, no exponent
+	else if( *p != 'e' && *p != 'E' )
+		return 0;  // extra junk, no exponent
+	else
+		++p;
+
+	// If we get this far, we have the beginnings of an exponent.
+	// Skip over optional sign of exponent.
+
+	if( '-' == *p || '+' == *p )
+		++p;
+
+	// There must be at least one digit in the exponent
+	
+	if( isdigit( (unsigned char) *p ) )
+		++p;
+	else
+		return 0;
+
+	// skip over contiguous digits
+
+	while( isdigit( (unsigned char) *p ) ) ++p;
+
+	if( *p )
+		return 0;  // extra junk
+	else
+		return 1;  // number with exponent
+}
+
+/**
+ Allocate and reformat a numeric string into one that is valid
+ by JSON rules.  If the string is not numeric, return NULL.
+ Caller is responsible for freeing the buffer.
+ **/
+char* jsonScrubNumber( const char* s ) {
+	if( !s || !*s ) return NULL;
+
+	growing_buffer* buf = buffer_init( 64 );
+
+	// Skip leading white space, if present
+
+	while( isspace( (unsigned char) *s ) ) ++s;
+
+	// Skip leading plus sign, if present, but keep a minus
+
+	if( '-' == *s )
+	{
+		buffer_add_char( buf, '-' );
+		++s;
+	}
+	else if( '+' == *s )
+		++s;
+
+	if( '\0' == *s ) {
+		// No digits found
+
+		buffer_free( buf );
+		return NULL;
+	}
+	// Skip any leading zeros
+
+	while( '0' == *s ) ++s;
+
+	// Capture digits to the left of the decimal,
+	// and note whether there are any.
+
+	int left_digit = 0;  // boolean
+
+	if( isdigit( (unsigned char) *s ) ) {
+		buffer_add_char( buf, *s++ );
+		left_digit = 1;
+	}
+	
+	while( isdigit( (unsigned char) *s  ) )
+		buffer_add_char( buf, *s++ );
+
+	// Now we expect to see a decimal point,
+	// an exponent, or end-of-string.
+
+	switch( *s )
+	{
+		case '\0' :
+			break;
+		case '.' :
+		{
+			// Add a single leading zero, if we need to
+
+			if( ! left_digit )
+				buffer_add_char( buf, '0' );
+			buffer_add_char( buf, '.' );
+			++s;
+
+			if( ! left_digit && ! isdigit( (unsigned char) *s ) )
+			{
+				// No digits on either side of decimal
+
+				buffer_free( buf );
+				return NULL;
 			}
 
+			// Collect digits to right of decimal
+
+			while( isdigit( (unsigned char) *s ) )
+				buffer_add_char( buf, *s++ );
+
 			break;
 		}
+		case 'e' :
+		case 'E' :
 
-		case JSON_STRING:
-			value = strdup(o->value.s);
+			// Exponent; we'll deal with it later, but
+			// meanwhile make sure we have something
+			// to its left
+
+			if( ! left_digit )
+				buffer_add_char( buf, '1' );
+			break;
+		default :
+
+			// Unexpected character; bail out
+
+			buffer_free( buf );
+			return NULL;
 	}
 
-	return value;
-}
+	if( '\0' == *s )    // Are we done yet?
+		return buffer_release( buf );
 
+	if( 'e' != *s && 'E' != *s ) {
 
+		// Unexpected character: bail out
+
+		buffer_free( buf );
+		return NULL;
+	}
+
+	// We have an exponent.  Load the e or E,
+	// and the sign if there is one.
+
+	buffer_add_char( buf, *s++ );
+
+	if( '+' == *s || '-' == *s )
+		buffer_add_char( buf, *s++ );
+
+	// Collect digits of the exponent
+
+	while( isdigit( (unsigned char) *s ) )
+		buffer_add_char( buf, *s++ );
+
+	// There better not be anything left
+
+	if( *s ) {
+		buffer_free( buf );
+		return NULL;
+	}
+
+	return buffer_release( buf );
+}

Modified: trunk/src/libopensrf/osrf_json_parser.c
===================================================================
--- trunk/src/libopensrf/osrf_json_parser.c	2008-01-03 22:28:52 UTC (rev 1198)
+++ trunk/src/libopensrf/osrf_json_parser.c	2008-01-05 19:07:02 UTC (rev 1199)
@@ -349,22 +349,30 @@
 		return 1;
 	}
 
-	/* make me more strict */
-	char* err = NULL;
-	double d = strtod(ctx->buffer->buf, &err);
-	if(err && err[0] != '\0') 
-		return _jsonParserError(ctx, "Invalid number sequence");
+	if(ctx->handler->handleNumber)
+	{
+		if( jsonIsNumeric( ctx->buffer->buf ) )
+			ctx->handler->handleNumber( ctx->userData, ctx->buffer->buf );
+		else {
+			// The number string is not numeric according to JSON rules.
+			// Scrub it into an acceptable format.
+
+			char* scrubbed = jsonScrubNumber( ctx->buffer->buf );
+			if( !scrubbed )
+				return _jsonParserError(ctx, "Invalid number sequence");
+			else {
+				ctx->handler->handleNumber( ctx->userData, scrubbed );
+				free( scrubbed );
+			}
+		}
+	}
+	
+	ctx->index--; /* scooch back to the first non-digit number */
 	JSON_STATE_REMOVE(ctx, JSON_STATE_IN_NUMBER);
 	OSRF_BUFFER_RESET(ctx->buffer);
-	if(ctx->handler->handleNumber)
-		ctx->handler->handleNumber( ctx->userData, d );
-	ctx->index--; /* scooch back to the first non-digit number */
 	return 0;
 }
 
-
-
-
 int jsonParseChunk( jsonParserContext* ctx, const char* data, int datalen, int flags ) {
 
 	if( !( ctx && ctx->handler && data && datalen > 0 )) return -1;
@@ -638,9 +646,10 @@
 	_jsonInsertParserItem(p, obj);
 }
 
-void _jsonHandleNumber(void* ctx, double num) {
+void _jsonHandleNumber(void* ctx, const char* numstr) {
+	jsonObject* obj = jsonNewNumberStringObject(numstr);
 	jsonInternalParser* p = (jsonInternalParser*) ctx;
-	_jsonInsertParserItem(p, jsonNewNumberObject(num));
+	_jsonInsertParserItem(p, obj);
 }
 
 void _jsonHandleError(void* ctx, char* str, ...) {



More information about the opensrf-commits mailing list