[OpenSRF-GIT] OpenSRF branch master updated. be2b2645f7da0907366eb0c99b5d6c231c366b30

Evergreen Git git at git.evergreen-ils.org
Mon Sep 11 09:38:39 EDT 2017


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "OpenSRF".

The branch, master has been updated
       via  be2b2645f7da0907366eb0c99b5d6c231c366b30 (commit)
       via  3d91aeb066c52213f2c82f04bdfc84b87ec8d1d0 (commit)
       via  cb4587be75bf9294d65c7173ff41c275a14e0f04 (commit)
      from  af4ab2355707083a22a2828202a541bf0e718bc0 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit be2b2645f7da0907366eb0c99b5d6c231c366b30
Author: Bill Erickson <berickxx at gmail.com>
Date:   Fri Sep 8 17:53:52 2017 -0400

    LP#1709710 Count Perl chunk/bundle sizes in bytes
    
    For the purposes of bundling/chunking, count the number of bytes in each
    affected string instead of the number of characters.
    
    See also https://perldoc.perl.org/bytes.html and 'perldoc -f length'
    
    Signed-off-by: Bill Erickson <berickxx at gmail.com>
    Signed-off-by: Jason Stephenson <jason at sigio.com>
    Signed-off-by: Galen Charlton <gmc at equinoxinitiative.org>

diff --git a/src/perl/lib/OpenSRF/AppSession.pm b/src/perl/lib/OpenSRF/AppSession.pm
index bb99787..f7b3edf 100644
--- a/src/perl/lib/OpenSRF/AppSession.pm
+++ b/src/perl/lib/OpenSRF/AppSession.pm
@@ -10,6 +10,7 @@ use OpenSRF::Utils::Config;
 use OpenSRF::EX;
 use OpenSRF;
 use Exporter;
+use Encode;
 use base qw/Exporter OpenSRF/;
 use Time::HiRes qw( time usleep );
 use warnings;
@@ -1057,7 +1058,7 @@ sub respond {
             # Example: If escaping doubles the length of the string then $ratio
             # will be 0.5 and we'll cut the chunk size for this message in half.
 
-            my $raw_length = length($str);
+            my $raw_length = length(Encode::encode_utf8($str)); # count bytes
             my $escaped_length = $raw_length;
             $escaped_length += 11 * (() = ( $str =~ /"/g)); # 7 \s and "
             $escaped_length += 4 * (() = ( $str =~ /&/g)); # &
@@ -1070,7 +1071,8 @@ sub respond {
             }
 
             if ($raw_length > $chunk_size) { # send partials ("chunking")
-                for (my $i = 0; $i < length($str); $i += $chunk_size) {
+                my $num_bytes = length(Encode::encode_utf8($str));
+                for (my $i = 0; $i < $num_bytes; $i += $chunk_size) {
                     $response = new OpenSRF::DomainObject::oilsResult::Partial;
                     $response->content( substr($str, $i, $chunk_size) );
                     $self->session->send($type, $response, $self->threadTrace);
@@ -1088,7 +1090,8 @@ sub respond {
 
     if ($self->{max_bundle_count} > 0 or $self->{max_bundle_size} > 0) { # we are bundling, and we need to test the size or count
 
-        $self->{current_bundle_size} += length(OpenSRF::Utils::JSON->perl2JSON($response));
+        $self->{current_bundle_size} += length(
+            Encode::encode_utf8(OpenSRF::Utils::JSON->perl2JSON($response)));
         push @{$self->{current_bundle}}, $type, $response;  
         $self->{current_bundle_count}++;
 

commit 3d91aeb066c52213f2c82f04bdfc84b87ec8d1d0
Author: Galen Charlton <gmc at equinoxinitiative.org>
Date:   Fri Sep 8 15:34:29 2017 -0400

    LP#1709710: write unit tests for osrfXmlEscapingLength()
    
    Signed-off-by: Galen Charlton <gmc at equinoxinitiative.org>
    Signed-off-by: Bill Erickson <berickxx at gmail.com>
    Signed-off-by: Jason Stephenson <jason at sigio.com>
    Signed-off-by: Galen Charlton <gmc at equinoxinitiative.org>

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 107872d..4e6b3ad 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -3,9 +3,9 @@ OSRF_INC = $(top_srcdir)/include/opensrf
 AM_LDFLAGS = $(DEF_LDFLAGS) -R $(libdir)
 
 TESTS = check_osrf_message check_osrf_json_object check_osrf_list check_osrf_stack check_transport_client \
-		check_transport_message
+		check_transport_message check_osrf_utils
 check_PROGRAMS = check_osrf_message check_osrf_json_object check_osrf_list check_osrf_stack check_transport_client \
-				 check_transport_message
+				 check_transport_message check_osrf_utils
 
 check_osrf_message_SOURCES = $(COMMON) $(OSRF_INC)/osrf_message.h check_osrf_message.c
 check_osrf_message_CFLAGS = @CHECK_CFLAGS@ $(DEF_CFLAGS)
@@ -30,3 +30,7 @@ check_transport_client_LDADD = @CHECK_LIBS@ $(top_builddir)/src/libopensrf/libop
 check_transport_message_SOURCES = $(COMMON) $(OSRF_INC)/transport_message.h check_transport_message.c
 check_transport_message_CFLAGS = @CHECK_CFLAGS@ $(DEF_CFLAGS)
 check_transport_message_LDADD = @CHECK_LIBS@ $(top_builddir)/src/libopensrf/libopensrf.la
+
+check_osrf_utils_SOURCES = $(COMMON) $(OSRF_INC)/utils.h check_osrf_utils.c
+check_osrf_utils_CFLAGS = @CHECK_CFLAGS@ $(DEF_CFLAGS)
+check_osrf_utils_LDADD = @CHECK_LIBS@ $(top_builddir)/src/libopensrf/libopensrf.la
diff --git a/tests/check_osrf_utils.c b/tests/check_osrf_utils.c
new file mode 100644
index 0000000..b0a3a6d
--- /dev/null
+++ b/tests/check_osrf_utils.c
@@ -0,0 +1,43 @@
+#include <check.h>
+#include "opensrf/utils.h"
+
+
+
+//Set up the test fixture
+void setup(void){
+}
+
+//Clean up the test fixture
+void teardown(void){
+}
+
+// BEGIN TESTS
+
+START_TEST(test_osrfXmlEscapingLength)
+  const char* ordinary = "12345";
+  fail_unless(osrfXmlEscapingLength(ordinary) == 0,
+      "osrfXmlEscapingLength should return 0 if string has no special characters");
+  const char* special = "<tag attr=\"attribute value\">a & b</tag>";
+  ck_assert_int_eq(osrfXmlEscapingLength(special), 38);
+END_TEST
+
+//END TESTS
+
+Suite *osrf_utils_suite(void) {
+  //Create test suite, test case, initialize fixture
+  Suite *s = suite_create("osrf_utils");
+  TCase *tc_core = tcase_create("Core");
+  tcase_add_checked_fixture(tc_core, setup, teardown);
+
+  //Add tests to test case
+  tcase_add_test(tc_core, test_osrfXmlEscapingLength);
+
+  //Add test case to test suite
+  suite_add_tcase(s, tc_core);
+
+  return s;
+}
+
+void run_tests(SRunner *sr) {
+  srunner_add_suite(sr, osrf_utils_suite());
+}

commit cb4587be75bf9294d65c7173ff41c275a14e0f04
Author: Mike Rylander <miker at esilibrary.com>
Date:   Fri Aug 18 11:43:31 2017 -0400

    LP#1709710: Make chunk sizing smart about XML quoting
    
    XML inside JSON as a quoted string that's itself inside XML causes quite the
    pile up of nested excaping of certain characters in OpenSRF PARTIAL_RESPONSE
    messages.  Here we check for the worst offenders (<, >, &, and ") and account
    for the cost of escaping them in chunked response stanzas.
    
    Signed-off-by: Mike Rylander <mrylander at gmail.com>
    Signed-off-by: Galen Charlton <gmc at equinoxinitiative.org>
    Signed-off-by: Bill Erickson <berickxx at gmail.com>
    Signed-off-by: Jason Stephenson <jason at sigio.com>
    Signed-off-by: Galen Charlton <gmc at equinoxinitiative.org>

diff --git a/include/opensrf/utils.h b/include/opensrf/utils.h
index 2276dd6..34e0ba6 100644
--- a/include/opensrf/utils.h
+++ b/include/opensrf/utils.h
@@ -377,6 +377,12 @@ char* md5sum( const char* text, ... );
 */
 int osrfUtilsCheckFileDescriptor( int fd );
 
+/*
+	Returns the approximate additional length of
+	a string after XML escaping <, >, &, and ".
+*/
+size_t osrfXmlEscapingLength ( const char* str );
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/libopensrf/osrf_app_session.c b/src/libopensrf/osrf_app_session.c
index 5633e1b..28242e7 100644
--- a/src/libopensrf/osrf_app_session.c
+++ b/src/libopensrf/osrf_app_session.c
@@ -1363,13 +1363,20 @@ int osrfAppRequestRespondComplete(
 			OSRF_STATUS_COMPLETE );
 
 	if (data) {
-
 		char* json = jsonObjectToJSON(data);
-		size_t data_size = strlen(json);
+		size_t raw_size = strlen(json);
+		size_t extra_size = osrfXmlEscapingLength(json);
+		size_t data_size = raw_size + extra_size;
 		size_t chunk_size = OSRF_MSG_CHUNK_SIZE;
-		if (chunk_size > 0 && chunk_size < data_size) {
 
-			osrfSendChunkedResult(ses, requestId, json, data_size, chunk_size);
+		if (data_size > chunk_size) // calculate an escape-scaled chunk size
+			chunk_size = ((double)raw_size / (double)data_size) * (double)chunk_size;
+
+		if (chunk_size > 0 && chunk_size < raw_size) {
+			// chunking -- response message exceeds max message size.
+			// break it up into chunks for partial delivery
+
+			osrfSendChunkedResult(ses, requestId, json, raw_size, chunk_size);
 			osrfAppSessionSendBatch( ses, &status, 1 );
 
 		} else {
diff --git a/src/libopensrf/osrf_application.c b/src/libopensrf/osrf_application.c
index ca6c219..4a5f53e 100644
--- a/src/libopensrf/osrf_application.c
+++ b/src/libopensrf/osrf_application.c
@@ -734,15 +734,20 @@ static int _osrfAppRespond( osrfMethodContext* ctx, const jsonObject* data, int
 
 		if( data ) {
             char* data_str = jsonObjectToJSON(data); // free me (below)
-            size_t data_size = strlen(data_str);
+            size_t raw_size = strlen(data_str);
+            size_t extra_size = osrfXmlEscapingLength(data_str);
+            size_t data_size = raw_size + extra_size;
             size_t chunk_size = ctx->method->max_chunk_size;
 
-            if (chunk_size > 0 && chunk_size < data_size) {
+            if (data_size > chunk_size) // calculate an escape-scaled chunk size
+                chunk_size = ((double)raw_size / (double)data_size) * (double)chunk_size;
+
+            if (chunk_size > 0 && chunk_size < raw_size) {
                 // chunking -- response message exceeds max message size.
                 // break it up into chunks for partial delivery
 
 				osrfSendChunkedResult(ctx->session, ctx->request,
-									  data_str, data_size, chunk_size);
+									  data_str, raw_size, chunk_size);
 
             } else {
 
diff --git a/src/libopensrf/utils.c b/src/libopensrf/utils.c
index 6628c8c..1c049c0 100644
--- a/src/libopensrf/utils.c
+++ b/src/libopensrf/utils.c
@@ -781,3 +781,26 @@ int osrfUtilsCheckFileDescriptor( int fd ) {
 	return 0;
 }
 
+size_t osrfXmlEscapingLength ( const char* str ) {
+	int extra = 0;
+	const char* s;
+	for (s = str; *s; ++s) {
+		switch (*s) {
+			case '>':
+			case '<':
+				extra += 3;
+				break;
+			case '&':
+				extra += 4;
+				break;
+			case '"':
+				extra += 11;
+				break;
+			default:
+				break;
+		}
+	}
+
+	return extra;
+}
+
diff --git a/src/perl/lib/OpenSRF/AppSession.pm b/src/perl/lib/OpenSRF/AppSession.pm
index 36d56b0..bb99787 100644
--- a/src/perl/lib/OpenSRF/AppSession.pm
+++ b/src/perl/lib/OpenSRF/AppSession.pm
@@ -1051,10 +1051,28 @@ sub respond {
 
         if ($self->max_chunk_size > 0) { # we might need to chunk
             my $str = OpenSRF::Utils::JSON->perl2JSON($msg);
-            if (length($str) > $self->max_chunk_size) { # send partials ("chunking")
-                for (my $i = 0; $i < length($str); $i += $self->max_chunk_size) {
+
+            # XML can add a lot of length to a chunk due to escaping, so we
+            # calculate chunk size based on an XML-escaped version of the message.
+            # Example: If escaping doubles the length of the string then $ratio
+            # will be 0.5 and we'll cut the chunk size for this message in half.
+
+            my $raw_length = length($str);
+            my $escaped_length = $raw_length;
+            $escaped_length += 11 * (() = ( $str =~ /"/g)); # 7 \s and "
+            $escaped_length += 4 * (() = ( $str =~ /&/g)); # &
+            $escaped_length += 3 * (() = ( $str =~ /[<>]/g)); # < / >
+
+            my $chunk_size = $self->max_chunk_size;
+
+            if ($escaped_length > $self->max_chunk_size) {
+                $chunk_size = ($raw_length / $escaped_length) * $self->max_chunk_size;
+            }
+
+            if ($raw_length > $chunk_size) { # send partials ("chunking")
+                for (my $i = 0; $i < length($str); $i += $chunk_size) {
                     $response = new OpenSRF::DomainObject::oilsResult::Partial;
-                    $response->content( substr($str, $i, $self->max_chunk_size) );
+                    $response->content( substr($str, $i, $chunk_size) );
                     $self->session->send($type, $response, $self->threadTrace);
                 }
                 # This triggers reconstruction on the remote end

-----------------------------------------------------------------------

Summary of changes:
 include/opensrf/utils.h            |    6 +++++
 src/libopensrf/osrf_app_session.c  |   15 +++++++++---
 src/libopensrf/osrf_application.c  |   11 ++++++--
 src/libopensrf/utils.c             |   23 +++++++++++++++++++
 src/perl/lib/OpenSRF/AppSession.pm |   29 +++++++++++++++++++++---
 tests/Makefile.am                  |    8 +++++-
 tests/check_osrf_utils.c           |   43 ++++++++++++++++++++++++++++++++++++
 7 files changed, 122 insertions(+), 13 deletions(-)
 create mode 100644 tests/check_osrf_utils.c


hooks/post-receive
-- 
OpenSRF


More information about the opensrf-commits mailing list