[OpenSRF-GIT] OpenSRF branch rel_2_1 updated. osrf_rel_2_1_0-rc1-20-g3dd57f2

Evergreen Git git at git.evergreen-ils.org
Fri May 25 23:37:50 EDT 2012


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "OpenSRF".

The branch, rel_2_1 has been updated
       via  3dd57f268b8b5d39f85139e4c4e5246bd9e2cda5 (commit)
      from  e7431d6fa114c35d3dc5b430fbb4bdae99edaa88 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 3dd57f268b8b5d39f85139e4c4e5246bd9e2cda5
Author: Thomas Berezansky <tsbere at mvlc.org>
Date:   Wed May 23 10:13:41 2012 -0400

    Nagios Example Plugin for monitoring services
    
    Does not check that all drones in a brick are fully up, just that the
    entire brick will respond to each service, even if only one drone happens
    to be running the listener properly (and said listener is responding).
    
    Signed-off-by: Thomas Berezansky <tsbere at mvlc.org>
    Signed-off-by: Dan Scott <dan at coffeecode.net>

diff --git a/examples/nagios/check_osrf_services b/examples/nagios/check_osrf_services
new file mode 100755
index 0000000..eb63c6e
--- /dev/null
+++ b/examples/nagios/check_osrf_services
@@ -0,0 +1,219 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use Getopt::Long;
+use OpenSRF::System;
+use OpenSRF::AppSession;
+use OpenSRF::EX qw(:try);
+
+# Sane-ish default
+my $opt_osrf_config = '/openils/conf/opensrf_core.xml';
+
+# For storing the list of supposedly active services
+my @services;
+# For storing our list of routers to check
+my @routers;
+
+GetOptions(
+    'osrf-config=s' => \$opt_osrf_config,
+);
+
+# If we can't bootstrap then something is horribly wrong!
+# Probably "ejabberd isn't running"
+try {
+    OpenSRF::System->bootstrap_client(config_file => $opt_osrf_config);
+} otherwise {
+    print "Bootstrap failed\n";
+    exit 2;
+};
+
+# This gets the list of supposedly active services
+sub prep_service_list {
+    # Using settings directly, as I don't know how to ask with pre-existing classes
+    my $session = OpenSRF::AppSession->create('opensrf.settings');
+    try {
+        $session->connect;
+    } otherwise {
+        print "Settings Connect Failed\n";
+        exit 2;
+    };
+    # This xpath is "Find every instace of an appname node under an activeapps node, anywhere"
+    # It should grab every app configured to run on any drone
+    # If your config contains apps that are not run on real drones you will get errors ;)
+    my $req = $session->request('opensrf.settings.xpath.get', '//activeapps/appname');
+    my $list = $req->recv;
+
+    if(UNIVERSAL::isa($list,"Error")) {
+        print "Active Apps List Failed\n";
+        exit 2;
+    }
+
+    $req->finish;
+    # Quick and dirty de-dupe
+    my %u_list = map { ($_ => 1) } @{$list->content};
+    # And save for later
+    @services = keys(%u_list);
+
+    $session->finish;
+    $session->disconnect;
+}
+
+# This gets the list of supposedly active routers
+# This relies on the bootstrap being accurate in that regard
+sub prep_routers_list {
+    # First, we grab our (hopefully) cached config
+    my $config = OpenSRF::Utils::Config->current;
+    # Loop over it quick
+    foreach(@{$config->bootstrap->routers}) {
+        # And make entries for each router
+        my $router = {};
+        $router->{name} = $_->{name};
+        $router->{domain} = $_->{domain};
+        # If we don't have a services list assume all active ones (aka, private router)
+        $router->{services} = \@services unless $_->{services};
+        # Otherwise, make note of what we are supposed to be running (aka, public router)
+        $router->{services} = $_->{services}->{service} if $_->{services};
+        # And tack it onto the list
+        push @routers, $router;
+    }
+}
+
+# This does the actual checking of routers/services
+sub check_routers {
+    # Shortcut
+    my $conf = OpenSRF::Utils::Config->current;
+    foreach my $router (@routers) {
+        # HACK WARNING - This changes the router we will be querying
+        # This basically edits the cached bootstrap file. This is not guaranteed to keep working.
+        # This does NOT change what domain we are querying from
+        $conf->bootstrap->router_name($router->{name});
+        $conf->bootstrap->domain($router->{domain});
+        # Assume things failed unless they didn't.
+        my $failed = 1;
+        # First, check the router to see what it claims to have active services-wise
+        my $session = OpenSRF::AppSession->create('router');
+        try {
+            $failed = 0 if $session->connect;
+        } otherwise {
+            $failed = 1;
+        };
+        if($session->state != $session->CONNECTED || $failed) {
+            $router->{online} = 0;
+            next;
+        }
+        # Yay router commands! This should give us all services with at least one listener
+        my $req = $session->request('opensrf.router.info.class.list');
+        my $class_list = $req->recv;
+        $req->finish;
+
+        if(UNIVERSAL::isa($class_list,"Error")) {
+            $session->finish;
+            $session->disconnect;
+            $router->{online} = 0;
+            next;
+        }
+
+        # If we got an answer then this router is online!
+        $router->{online} = 1;
+        # Counters and storage for services checks
+        $router->{checked} = 0;
+        $router->{pass} = 0;
+        $router->{failed} = [];
+        # Quick reference of what the router told us it has
+        my %online_services = map { ($_ => 1) } @{$class_list->content};
+        foreach my $service (@{$router->{services}}) {
+            # This skips services not in the active list. Mainly for routers with explicit lists (aka, public routers) that not all may be configured to run.
+            next unless grep { $service eq $_ } @services;
+            # Assume we did not pass until proven otherwise
+            my $passed = 0;
+            $router->{checked} += 1;
+            if($online_services{$service}) {
+                # Check the service, even if a listener is registered it may be dead
+                my $session2 = OpenSRF::AppSession->create($service);
+                try {
+                    $session2->connect;
+                };
+                if($session2->state == $session2->CONNECTED) {
+                    # To my knowledge, EVERY service should have atomic echo available
+                    my $req2 = $session2->request('opensrf.system.echo.atomic','Test');
+                    my $testresult = $req2->recv;
+                    if(!UNIVERSAL::isa($testresult,"Error")) {
+                        # If we got back what we passed in the service is working! Ish. Not a flawless test.
+                        $passed = 1 if @{$testresult->content}[0] eq 'Test';
+                    }
+                    $req2->finish;
+                    $session2->finish;
+                    $session2->disconnect;
+                }
+            }
+            if($passed) {
+                # Looks like it works, make note!
+                $router->{pass} += 1;
+            } else {
+                # Doesn't work! Save for later reporting.
+                push @{$router->{failed}}, $service;
+            }
+        }
+        $session->finish;
+        $session->disconnect;        
+    }
+}
+
+# This outputs the result for Nagios
+sub output_result {
+    # Counters/storage
+    my $checked_services = 0;
+    my $up_services = 0;
+    my @down_services;
+    my @down_routers;
+    # Assume all is good until proven otherwise
+    my $retcode = 0;
+    foreach my $router (@routers) {
+        # If the router isn't online then we don't need to look at services - We didn't check any!
+        if(!$router->{online}) {
+            push @down_routers, $router->{domain};
+            next;
+        }
+        # Otherwise increment our counters as needed
+        $checked_services += $router->{checked};
+        $up_services += $router->{pass};
+        foreach (@{$router->{failed}}) {
+            # Keep track of any down services for reporting in a minute
+            push @down_services, $router->{domain} . ':' . $_;
+        }
+    }
+    if(@down_routers) {
+        # Down routers are really bad. Chances are there will only ever be one here (public), but join with commas anyway.
+        print "Router(s) Offline: " . join(', ', @down_routers) . "\n";
+        $retcode = 2;
+    } elsif ($checked_services != $up_services) {
+        # Non-responsive services are also really bad
+        print "Service(s) not responding\n";
+        $retcode = 2;
+    } else {
+        # But if we have nothing then things are good!
+        print "Routers/Services OK\n";
+    }
+    # If there are down services then spit them out as additional information.
+    print "$_\n" foreach (@down_services);
+    # And return our response code
+    exit $retcode;
+}
+
+# CHEAT - We need SettingsClient to have cached stuff
+try {
+    OpenSRF::Utils::SettingsClient->new()->config_value('none');
+} otherwise {
+    print "Settings Fetch Failed\n";
+    exit 2;
+};
+# And run all of the above functions
+prep_service_list();
+prep_routers_list();
+check_routers();
+output_result();
+
+# This code should NEVER run, as the only way out of output_result is an exit statement
+print "What? I shouldn't have reached here.";
+exit 3;

-----------------------------------------------------------------------

Summary of changes:
 examples/nagios/check_osrf_services |  219 +++++++++++++++++++++++++++++++++++
 1 files changed, 219 insertions(+), 0 deletions(-)
 create mode 100755 examples/nagios/check_osrf_services


hooks/post-receive
-- 
OpenSRF


More information about the opensrf-commits mailing list