[open-ils-commits] r884 - ESI-Examples/sys/lib/nagios-plugins (dmcmorris)

svn at svn.open-ils.org svn at svn.open-ils.org
Thu May 6 15:19:55 EDT 2010


Author: dmcmorris
Date: 2010-05-06 15:19:50 -0400 (Thu, 06 May 2010)
New Revision: 884

Modified:
   ESI-Examples/sys/lib/nagios-plugins/check_null
Log:
Updating check_null script with changes from Thomas Berezansky - now more efficient.  Thanks Thomas!

Modified: ESI-Examples/sys/lib/nagios-plugins/check_null
===================================================================
--- ESI-Examples/sys/lib/nagios-plugins/check_null	2010-05-06 01:24:37 UTC (rev 883)
+++ ESI-Examples/sys/lib/nagios-plugins/check_null	2010-05-06 19:19:50 UTC (rev 884)
@@ -2,6 +2,7 @@
 #
 # Copyright (C) 2008-2009  Equinox Software, Inc.
 # Written by Don McMorris <dmcmorris at esilibrary.com>
+# Partially Re-written by Thomas Berezansky <tsbere at mvlc.org>
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -15,78 +16,59 @@
 #
 #
 
-
+typeset -i PERIOD
+typeset -i WARNLIMIT
+typeset -i CRITLIMIT
+typeset -i NULLTOT
+PERIOD=$1
 WARNLIMIT=$2
 CRITLIMIT=$3
-PERIOD=$1
-# Note: These should really be checked to ensure they are defined and within range...
+if [ ! $PERIOD -gt 0 -o ! $PERIOD -le 60 ]; then
+	echo "WARN: Period must be defined and greater than 0 but less than or equal to 60"
+	exit 1
+elif [ ! $WARNLIMIT -gt 0 ]; then
+	echo "WARN: Warn limit must be defined and greater than 0"
+	exit 1
+elif [ ! $CRITLIMIT -gt $WARNLIMIT ]; then
+	echo "WARN: Crit limit must be defined and greater than warn limit"
+	exit 1
+fi
 
-PREVTOT=0
-LOGFILE="/var/log/remote/prod/$(date +%Y/%m/%d)/gateway.$(date +%H).log"
+NULLTOT=0
+CURLOGFILE="/var/log/remote/prod/$(date +%Y/%m/%d/gateway.%H).log"
+PREVLOGFILE="/var/log/remote/prod/$(date --date="$PERIOD minutes ago" +%Y/%m/%d/gateway.%H).log"
+STARTMIN=$(date --date="$PERIOD minutes ago" +%M)
 
-if [ $(date +%H | cut -b1) = 0 ]; then
-	CURRHOUR=$(date +%H | cut -b2)
-else
-	CURRHOUR=$(date +%H)
+if [ ! -f $CURLOGFILE ]; then
+	echo "WARN: Current log file missing"
+	exit 1
 fi
 
-if [ $(date +%M | cut -b1) = 0 ]; then
-        CURRMIN=$(date +%M | cut -b2 )
-else
-        CURRMIN=$(date +%M)
+if [ $CURLOGFILE != $PREVLOGFILE ]; then
+	[ -f $PREVLOGFILE ] && NULLTOT=$(grep "Returning NULL" $PREVLOGFILE | cut -d":" -f2 | grep -c "^$(seq -s"\|" -f"%02.0f" $STARTMIN 59)$")
+	STARTMIN=0
 fi
 
-if [ $CURRMIN -lt $PERIOD ]; then
-	# How many minutes of the last hour do we need to check?
-        TMPDIFFM2=$((60 - $(($PERIOD - $CURRMIN))))
+NULLTOT=$(($NULLTOT + $(grep "Returning NULL" $CURLOGFILE | cut -d":" -f2 | grep -c "^$(seq -s"\|" -f"%02.0f" $STARTMIN $(date +%M))$")))
 
-	# This logic will mean that "Returning NULL"'s logged at the late 2300 hour will not be counted during the early Midnight hour check.
-	# This is acceptable for now.
-        if [ $CURRHOUR -gt 0 ]; then
-		# define LOGFILE2 (last hours' log)
-		if [ $CURRHOUR -gt 11 ]; then
-			LOGFILE2="/var/log/remote/prod/$(date +%Y/%m/%d)/gateway.$(($CURRHOUR - 1)).log"
-		else
-			LOGFILE2="/var/log/remote/prod/$(date +%Y/%m/%d)/gateway.0$(($CURRHOUR - 1)).log"
-		fi
+TOPSERVER=$(grep "Returning NULL" $CURLOGFILE | cut -d" " -f3 | sort | uniq -c | sort -nr | head -1)
 
-		while [ $TMPDIFFM2 -lt 60 ]; do
-			PREVTOT=$(($PREVTOT + $(grep "Returning NULL" $LOGFILE2 | cut -d":" -f2 | grep -c $TMPDIFFM2)))
-			TMPDIFFM2=$(($TMPDIFFM2 + 1))
-		done
-        fi
-	while [ $TMPDIFF1 -le $CURRMIN ]; do
-		PREVTOT=$(($PREVTOT + $(grep "Returning NULL" $LOGFILE | cut -d":" -f2 | grep -c $TMPDIFF1)))
-	        TMPDIFF1=$(($TMPDIFF1 + 1))
-	done
-else
-	TMPDIFF1=$(($CURRMIN-$PERIOD))
-	while [ $TMPDIFF1 -le $CURRMIN ]; do
-		PREVTOT=$(($PREVTOT + $(grep "Returning NULL" $LOGFILE | cut -d":" -f2 | grep -c $TMPDIFF1)))
-	        TMPDIFF1=$(($TMPDIFF1 + 1))
-	done
-
-fi
-
-
-TOPSERVER=$(grep "Returning NULL" $LOGFILE | cut -d" " -f3 | sort | uniq -c | sort -nr | head -1)
-
 if [ "$TOPSERVER" != null ]; then
 	SVRMSG=" (Top server this hour: $TOPSERVER)"
 else
 	SVRMSG="."
 fi
 
-if [ $PREVTOT -ge $CRITLIMIT ]; then
-        echo "CRIT: $PREVTOT NULLs returned in past $PERIOD minutes$SVRMSG"
+if [ $NULLTOT -ge $CRITLIMIT ]; then
+        echo "CRIT: $NULLTOT NULLs returned in past $PERIOD minutes$SVRMSG"
         exit 2
-elif [ $PREVTOT -ge $WARNLIMIT ]; then
-        echo "WARN: $PREVTOT NULLs returned in the past $PERIOD minutes$SVRMSG"
+elif [ $NULLTOT -ge $WARNLIMIT ]; then
+        echo "WARN: $NULLTOT NULLs returned in the past $PERIOD minutes$SVRMSG"
         exit 1
-elif [ $PREVTOT -lt $WARNLIMIT ]; then
-        echo "OK: $PREVTOT NULLs returned in the past $PERIOD minutes$SVRMSG"
+elif [ $NULLTOT -lt $WARNLIMIT ]; then
+        echo "OK: $NULLTOT NULLs returned in the past $PERIOD minutes$SVRMSG"
         exit 0
 else
-        echo "WARN: An error has occurred $PREVTOT $PERIOD"
+        echo "WARN: An error has occurred $NULLTOT $PERIOD"
         exit 1
 fi



More information about the open-ils-commits mailing list