[open-ils-commits] r640 - in ESI-Examples/sys: . lib lib/nagios-plugins (dmcmorris)
svn at svn.open-ils.org
svn at svn.open-ils.org
Wed Sep 2 15:39:54 EDT 2009
Author: dmcmorris
Date: 2009-09-02 15:39:50 -0400 (Wed, 02 Sep 2009)
New Revision: 640
Added:
ESI-Examples/sys/lib/
ESI-Examples/sys/lib/nagios-plugins/
ESI-Examples/sys/lib/nagios-plugins/check_lock
ESI-Examples/sys/lib/nagios-plugins/check_null
ESI-Examples/sys/lib/nagios-plugins/check_null-README
Log:
nagios plugins
Added: ESI-Examples/sys/lib/nagios-plugins/check_lock
===================================================================
--- ESI-Examples/sys/lib/nagios-plugins/check_lock (rev 0)
+++ ESI-Examples/sys/lib/nagios-plugins/check_lock 2009-09-02 19:39:50 UTC (rev 640)
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+# Author : Dave Brown, Sys Admin, ESI
+# Last Updated : July 7, 2009
+# Purpose : Check status of file name and process from CL variables.
+# Usage : simple.sh <filename> <process>
+
+
+scriptname=$0
+filetocheck=$1
+proctocheck=$2
+
+if [ -f $filetocheck ]; then
+ if [ "$(ps aux | grep -i $proctocheck | grep -v grep | grep -v $scriptname | wc -l)" -gt 0 ]; then
+ echo "OK: $filetocheck exists and $proctocheck running"
+ exit 0
+ else
+ echo "CRIT: $filetocheck exists but $proctocheck not running"
+ exit 2
+ fi
+else
+ echo "OK: $filetocheck not found"
+ exit 0
+fi
+
Added: ESI-Examples/sys/lib/nagios-plugins/check_null
===================================================================
--- ESI-Examples/sys/lib/nagios-plugins/check_null (rev 0)
+++ ESI-Examples/sys/lib/nagios-plugins/check_null 2009-09-02 19:39:50 UTC (rev 640)
@@ -0,0 +1,78 @@
+#!/bin/bash
+# Written by Don McMorris <dmcmorris at esilibrary.com>
+# for Equinox Software. (c) ESI 2008
+
+WARNLIMIT=$2
+CRITLIMIT=$3
+PERIOD=$1
+# Note: These should really be checked to ensure they are defined and within range...
+
+PREVTOT=0
+LOGFILE="/var/log/remote/prod/$(date +%Y/%m/%d)/gateway.$(date +%H).log"
+
+if [ $(date +%H | cut -b1) = 0 ]; then
+ CURRHOUR=$(date +%H | cut -b2)
+else
+ CURRHOUR=$(date +%H)
+fi
+
+if [ $(date +%M | cut -b1) = 0 ]; then
+ CURRMIN=$(date +%M | cut -b2 )
+else
+ CURRMIN=$(date +%M)
+fi
+
+if [ $CURRMIN -lt $PERIOD ]; then
+ # How many minutes of the last hour do we need to check?
+ TMPDIFFM2=$((60 - $(($PERIOD - $CURRMIN))))
+
+ # This logic will mean that "Returning NULL"'s logged at the late 2300 hour will not be counted during the early Midnight hour check.
+ # This is acceptable for now.
+ if [ $CURRHOUR -gt 0 ]; then
+ # define LOGFILE2 (last hours' log)
+ if [ $CURRHOUR -gt 11 ]; then
+ LOGFILE2="/var/log/remote/prod/$(date +%Y/%m/%d)/gateway.$(($CURRHOUR - 1)).log"
+ else
+ LOGFILE2="/var/log/remote/prod/$(date +%Y/%m/%d)/gateway.0$(($CURRHOUR - 1)).log"
+ fi
+
+ while [ $TMPDIFFM2 -lt 60 ]; do
+ PREVTOT=$(($PREVTOT + $(grep "Returning NULL" $LOGFILE2 | cut -d":" -f2 | grep -c $TMPDIFFM2)))
+ TMPDIFFM2=$(($TMPDIFFM2 + 1))
+ done
+ fi
+ while [ $TMPDIFF1 -le $CURRMIN ]; do
+ PREVTOT=$(($PREVTOT + $(grep "Returning NULL" $LOGFILE | cut -d":" -f2 | grep -c $TMPDIFF1)))
+ TMPDIFF1=$(($TMPDIFF1 + 1))
+ done
+else
+ TMPDIFF1=$(($CURRMIN-$PERIOD))
+ while [ $TMPDIFF1 -le $CURRMIN ]; do
+ PREVTOT=$(($PREVTOT + $(grep "Returning NULL" $LOGFILE | cut -d":" -f2 | grep -c $TMPDIFF1)))
+ TMPDIFF1=$(($TMPDIFF1 + 1))
+ done
+
+fi
+
+
+TOPSERVER=$(grep "Returning NULL" $LOGFILE | cut -d" " -f3 | sort | uniq -c | sort -nr | head -1)
+
+if [ "$TOPSERVER" != null ]; then
+ SVRMSG=" (Top server this hour: $TOPSERVER)"
+else
+ SVRMSG="."
+fi
+
+if [ $PREVTOT -ge $CRITLIMIT ]; then
+ echo "CRIT: $PREVTOT NULLs returned in past $PERIOD minutes$SVRMSG"
+ exit 2
+elif [ $PREVTOT -ge $WARNLIMIT ]; then
+ echo "WARN: $PREVTOT NULLs returned in the past $PERIOD minutes$SVRMSG"
+ exit 1
+elif [ $PREVTOT -lt $WARNLIMIT ]; then
+ echo "OK: $PREVTOT NULLs returned in the past $PERIOD minutes$SVRMSG"
+ exit 0
+else
+ echo "WARN: An error has occurred $PREVTOT $PERIOD"
+ exit 1
+fi
Added: ESI-Examples/sys/lib/nagios-plugins/check_null-README
===================================================================
--- ESI-Examples/sys/lib/nagios-plugins/check_null-README (rev 0)
+++ ESI-Examples/sys/lib/nagios-plugins/check_null-README 2009-09-02 19:39:50 UTC (rev 640)
@@ -0,0 +1,24 @@
+check_null is intended to be an NRPE plugin on the primary logger server... It's pretty quick and dirty, but works.
+
+The string "Returning NULL" in the gateway.log is indicitive of a timeout. Numerous timeouts (greater than half a
+dozen or so) can indicate a larger issue and/or problems that will be visible to the end user.
+
+This script is currently considered PROPRIETARY of Equinox. Please contact <operations at esilibrary.com> requesting
+permission to use it. Equinox Software will generally allow no-cost use of the script, and /may/ re-vamp/release
+to the community under an open-source license. For now, however, it is proprietary and without support.
+
+May want to start with a warn/crit combo of 5/20 or so for a period of 15 (minutes). Obviously, this will need to
+be adjusted to your specific environment.
+
+Most service timeouts come from poor database responsiveness (either communication issues between apps and db, or
+overloaded database server pool).
+
+And as usual, I will disclaim that I am NOT a programmer. There are most certainly better ways to do what this
+script is intended to do, but the limited knowledge I had at the time resulted in the "hack until it works"
+release.
+
+If you would like to modify the script, please e-mail <operations at esilibrary.com>. We will very likely GPL
+the script and add to the ILS-Contrib repo if there is interest in modifications.
+
+Again, the current license is PROPRIETARY, contact <operations at esilibrary.com> for permission to use, and
+return any modifications to <operations at esilibrary.com>.
More information about the open-ils-commits
mailing list