[Opensrf-commits] r1878 - trunk/src/router (scottmk)

svn at svn.open-ils.org svn at svn.open-ils.org
Sun Dec 20 01:37:06 EST 2009


Author: scottmk
Date: 2009-12-20 01:37:02 -0500 (Sun, 20 Dec 2009)
New Revision: 1878

Modified:
   trunk/src/router/osrf_router.c
   trunk/src/router/osrf_router_main.c
Log:
1. In the parent router process: wait for all of the immediate
child processes to terminate before exiting.

This change eliminates the need for the shell script invoking
the router to sleep before running a ps to identify the effective
router processes (which are grandchildren of the parent). By the
time the parent exits, the children will have launched the
grandchildren and exited.

2. If any of the immediate child processes terminates abnormally
(either a non-zero return code or termination by a signal), issue
a warning message to that effect. This message goes to standard
error, since the parent process never opens a log file.

3. Apply the volatile qualifier to a couple of variables that
are updated asynchronously by a signal handler.

M    src/router/osrf_router.c
M    src/router/osrf_router_main.c


Modified: trunk/src/router/osrf_router.c
===================================================================
--- trunk/src/router/osrf_router.c	2009-12-16 16:57:39 UTC (rev 1877)
+++ trunk/src/router/osrf_router.c	2009-12-20 06:37:02 UTC (rev 1878)
@@ -40,7 +40,7 @@
 	char* resource;       /**< Router's resource name for the Jabber logon. */
 	char* password;       /**< Router's password for the Jabber logon. */
 	int port;             /**< Jabber's port number. */
-	sig_atomic_t stop;    /**< To be set by signal handler to interrupt main loop. */
+	volatile sig_atomic_t stop; /**< To be set by signal handler to interrupt main loop. */
 
 	/** Array of client domains that we allow to send requests through us. */
 	osrfStringArray* trustedClients;

Modified: trunk/src/router/osrf_router_main.c
===================================================================
--- trunk/src/router/osrf_router_main.c	2009-12-16 16:57:39 UTC (rev 1877)
+++ trunk/src/router/osrf_router_main.c	2009-12-20 06:37:02 UTC (rev 1878)
@@ -16,6 +16,9 @@
 */
 
 #include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
 #include "opensrf/utils.h"
 #include "opensrf/log.h"
 #include "opensrf/osrf_list.h"
@@ -25,7 +28,7 @@
 
 static osrfRouter* router = NULL;
 
-static sig_atomic_t stop_signal = 0;
+static volatile sig_atomic_t stop_signal = 0;
 
 static void setupRouter(jsonObject* configChunk);
 
@@ -86,6 +89,8 @@
 
 	/* Spawn child process(es) */
 
+	int rc = EXIT_SUCCESS;
+	int parent = 1;    // boolean
 	int i;
 	for(i = 0; i < configInfo->size; i++) {
 		jsonObject* configChunk = jsonObjectGetIndex(configInfo, i);
@@ -104,18 +109,59 @@
 		}
 		if(fork() == 0) { /* create a new child to run this router instance */
 			setupRouter(configChunk);
+			parent = 0;
 			break;  /* We're a child; don't spawn any more children here */
 		}
 	}
 
+	if( parent ) {
+		// Wait for all child processes to terminate.
+		// If any ended abnormally, report it.
+		while( 1 ) {  // Loop until all children terminate
+			int status;
+			errno = 0;
+			pid_t child_pid = wait( &status );
+			if( -1 == child_pid ) {
+				// ECHILD means no children are left.  Anything else we ignore.
+				if( ECHILD == errno )
+					break;
+			} else if( WIFEXITED( status ) ) {
+				// Relatively normal exit, i.e. via calling exit()
+				// or _exit(), or by returning from main()
+				int child_rc = WEXITSTATUS( status );
+				if( child_rc ) {
+					osrfLogWarning( OSRF_LOG_MARK,
+						"Child router process %ld exited with return status %d",
+						(long) child_pid, child_rc );
+					rc = EXIT_FAILURE;
+				} else {
+					;    // Terminated successfully; silently ignore
+				}
+			} else if( WIFSIGNALED( status ) ) {
+				// Killed by a signal
+				int signo = WTERMSIG( status );
+				const char* extra = "";
+#ifdef WCOREDUMP
+				if( WCOREDUMP( status ) )
+					extra = "with core dump ";
+#endif
+				osrfLogWarning( OSRF_LOG_MARK, "Child router process %ld killed %sby signal %d",
+					(long) child_pid, extra, signo );
+
+				rc = EXIT_FAILURE;
+			}
+		}
+	}
+
 	if( stop_signal ) {
-		// Interrupted by a signal?  Re raise so the parent can see it.
+		// Interrupted by a signal?  Re-raise so the parent can see it.
 		osrfLogWarning( OSRF_LOG_MARK, "Interrupted by signal %d; re-raising",
 				(int) stop_signal );
+		signal( stop_signal, SIG_DFL );
 		raise( stop_signal );
 	}
 
-	return EXIT_SUCCESS;
+	return rc;
 }
 
 /**
@@ -144,7 +190,7 @@
 
 	if(!log_file)
 	{
-		fprintf(stderr, "Log file name not specified for router\n");
+		osrfLogError( OSRF_LOG_MARK, "Log file name not specified for router" );
 		return;
 	}
 
@@ -196,7 +242,6 @@
 		osrfStringArrayAdd(tclients, clientDomain);
 	}
 
-
 	if( tclients->size == 0 || tservers->size == 0 ) {
 		osrfLogError( OSRF_LOG_MARK,
 				"We need trusted servers and trusted client to run the router...");
@@ -213,7 +258,8 @@
 	signal(SIGTERM,routerSignalHandler);
 
 	if( (osrfRouterConnect(router)) != 0 ) {
-		fprintf(stderr, "Unable to connect router to jabber server %s... exiting\n", server );
+		osrfLogError( OSRF_LOG_MARK, "Unable to connect router to jabber server %s... exiting",
+			server );
 		osrfRouterFree(router);
 		return;
 	}



More information about the opensrf-commits mailing list