home *** CD-ROM | disk | FTP | other *** search
- From: tchrist@convex.com (Tom Christiansen)
- Newsgroups: comp.lang.perl,alt.sources
- Subject: statmon: monitor up/down and time status of hosts
- Message-ID: <100508@convex.convex.com>
- Date: 10 Mar 90 14:48:06 GMT
-
- Here's a little program I hacked up the other night to watch when hosts
- went up and down, as well as how their clocks strayed. To find out
- how this thing works, you can do these things:
-
- * read the following description
- * call statmon w/o any args for a usage message
- * type `h' while in the program
- * read the source
-
- I suggest all in that order. If nothing else, the source is
- a decent example of playing with cbreak and echo mode, using
- UDP sockets, using select to multiplex i/o and timeouts, and for
- using the dump operator to greatly speed up start up time.
- It probably won't work very well, if at all, for non-BSD(ish) sites.
-
- Here's what it does: given a list of hosts, which can be read in from
- a file (a simplified ghosts-type file) it tries to talk to the time/udp
- service of their inetd's, and if they go too long without any answer
- after repeated attempts, it considers them down and tells you so. When
- they come back up again, you get a message that this has happened.
- This is better than mere pings, as it requires a coherent inetd to
- answer you and is pretty cheap. The program will also tell you which
- hosts have times that are far astray from your own. The retry,
- timeout, clock tolerance, and sleep interval between sends are all
- command-line configurable. This is all done asynchronously with
- select()s, including your keyboard inputs, which are in cbreak mode.
-
- Porting notes: you'll need the following include files, probably in
- the perl library directory, which you should have generated from the
- corresponding C include files using the makelib program in the perl
- source directory:
-
- sys/errno.h
- sys/socket.h
- sys/ioctl.h
-
- The last one needs a %sizeof array to work right. I put mine
- in sizeof.h in the perl library. Mine happens to look like this.
- Yours, unless you're on a Convex, will almost surely vary.
-
- $sizeof{'char'} = 1;
- $sizeof{'int'} = 4;
- $sizeof{'long'} = 4;
- $sizeof{'float'} = 4;
- $sizeof{'double'} = 8;
- $sizeof{'long long'} = 8;
- $sizeof{'struct arpreq'} = 36;
- $sizeof{'struct ifconf'} = 8;
- $sizeof{'struct ifreq'} = 32;
- $sizeof{'struct ltchars'} = 6;
- $sizeof{'struct pcntl'} = 116;
- $sizeof{'struct rtentry'} = 52;
- $sizeof{'struct sgttyb'} = 6;
- $sizeof{'struct tchars'} = 6;
- $sizeof{'struct ttychars'} = 14;
- $sizeof{'struct winsize'} = 8;
- $sizeof{'struct system_information'} = 12;
- 1;
-
- It also wants getopts.pl and ctime.pl.
-
- If you find yourself with copious quantities of unwanted disk
- space, you can spare yourself the costs of initialization at
- each startup by calling 'statmon -u' to dump the state of the
- program. This will skip all the include files and static init
- code when restarted. I suggest you make sure that the program
- actually runs first, though, before you bother to dump it. Also,
- those are big include files, so your dump will be pretty huge.
-
- --tom
-
- #! /bin/sh
- # This is a shell archive, meaning:
- # 1. Remove everything above the #! /bin/sh line.
- # 2. Save the resulting text in a file.
- # 3. Execute the file with /bin/sh (not csh) to create:
- # statmon
- # This archive created: Sat Mar 10 08:23:23 1990
- export PATH; PATH=/bin:/usr/bin:$PATH
- echo shar: "extracting 'statmon'" '(9588 characters)'
- if test -f 'statmon'
- then
- echo shar: "will not over-write existing file 'statmon'"
- else
- sed 's/^ X//' << \SHAR_EOF > 'statmon'
- X#!/usr/bin/perl
- X#
- X# statmon - check for hosts going up and down, or with bad clocks
- X# tom christiansen <tchrist@convex.com> on 3/8/90
- X#
- X
- XRESTART: # shouldn't really need this...
- X
- X($program = $0) =~ s%.*/%%;
- X$version = 0.3;
- X
- X$| = 1;
- X
- X&bad_usage unless $#ARGV >= 0;
- X
- Xprintf "%s v%3.1g; ", $program, $version;
- X
- Xif ($compiled) {
- X print "quick start.... ";
- X} else {
- X print "initializing... ";
- X
- X # some useful constants
- X $sockaddr_t = 'S n a4 x8';
- X $inetaddr_t = 'C4';
- X $sgttyb_t = 'C4 S';
- X
- X $SINCE_1970 = 2208988800;
- X
- X $def_timeout = 5; # how long we give a host to answer us
- X $def_timewarp = 10; # how far time may vary until we complain
- X $def_retries = 5; # he gets this many tries to answer us
- X $def_sleep = 5; # between send loops
- X
- X $retries = $def_retries;
- X $timeout = $def_timeout;
- X $timewarp = $def_timewarp;
- X $sleep = $def_sleep;
- X
- X $OOPS = ", can't continue";
- X
- X $dashes = ('-' x 75) . "\n";
- X
- X %cmds = (
- X 'q', 'quit',
- X 'x', 'quit',
- X 'h', 'help',
- X '?', 'help',
- X 't', 'timers',
- X 'd', 'downers',
- X 'u', 'uppers' ,
- X 'm', 'missing',
- X 'U', 'usage'
- X );
- X
- X &source('sys/errno.h');
- X &source('sys/socket.h');
- X &source('sizeof.h');
- X &source('sys/ioctl.h');
- X &source('ctime.pl');
- X &source('getopts.pl');
- X}
- X
- X
- X&Getopts('udmt:r:c:s:') || &bad_usage;
- X
- X$debug = $opt_d;
- X
- X
- X$retries = $opt_r if defined $opt_r;
- X$timeout = $opt_t if defined $opt_t;
- X$timewarp = $opt_c if defined $opt_c;
- X$sleep = $opt_s if defined $opt_s;
- X
- X
- Xif ($opt_u) { # dump this puppy
- X $compiled = 1;
- X print "dumping\n";
- X reset 'o'; # so the opt_* vars (especially $opt_u!) go away
- X dump RESTART;
- X # not reached
- X}
- X
- X@SIG{'INT','HUP','TERM','QUIT'} = ('quit','quit','quit','quit');
- X
- X$SIG{'CONT'} = 'continue';
- X
- X# if they say -m, then they want to take stuff from /usr/adm/MACHINES
- X#
- X# which is of the general form:
- X#
- X# NAME features
- X#
- X# spool vax bsd
- X# coyote sunos4 diskserver
- X# pokey sunos4 diskless slow
- X# gort convex bsd
- X#
- Xif ($opt_m) {
- X # try very hard to find a machines file
- X $MACHINES = $ENV{'GHOSTS'};
- X $MACHINES = $ENV{'MACHINES'} unless $MACHINES;
- X $MACHINES = $ENV{'HOME'} . '/.ghosts' unless $MACHINES;
- X $MACHINES = $ENV{'HOME'} . '/.machines' unless -f $MACHINES;
- X $MACHINES = '/usr/adm/MACHINES' unless -f $MACHINES;
- X
- X die "Can't find any MACHINES file" unless -f $MACHINES;
- X
- X open MACHINES || die "can't open $MACHINES: $!";
- X
- X print "opened $MACHINES\n" if $debug;
- X @hosts = <MACHINES>;
- X close MACHINES;
- X
- X @hosts = grep(/^\w+\s/, @hosts);
- X
- X while ($criterion = shift) {
- X @hosts = grep(/\b$criterion\b/, @hosts);
- X }
- X
- X for (@hosts) {
- X chop;
- X s/^(\w+).*/$1/;
- X }
- X} else {
- X @hosts = @ARGV;
- X}
- X
- Xif ($#hosts < 0) {
- X print "No hosts\n";
- X &bad_usage;
- X}
- X
- Xprint "hosts are @hosts\n" if $debug;
- X
- X#
- X# ok, now create our socket we want everyone to talk to us at
- X#
- X
- Xchop ($localhost = `hostname`);
- X
- X(($name, $aliases, $type, $len, $thisaddr) = gethostbyname($localhost))
- X || die "no localhost \"$localhost\"$OOPS";
- X
- X(($name, $aliases, $port, $proto) = getservbyname('time', 'udp'))
- X || die "no udp service for \"time\"$OOPS";
- X
- Xprint "service is $name, port is $port\n"
- X if $debug;
- X
- X
- X(($name, $aliases, $proto) = getprotobyname('udp'))
- X || die "can't get udp proto$OOPS" ;
- X
- X
- Xsocket(SOCKET, &AF_INET, &SOCK_DGRAM, $proto)
- X || die "can't get socket$OOPS";
- X
- X$this = &sockaddr(&AF_INET, 0, $thisaddr);
- X
- Xbind(SOCKET, $this)
- X || die "can't bind socket: $!$OOPS";
- X
- X#
- X# now go find all of our hosts' addresses, storing
- X# these in %hosts keyed on $name
- X#
- X
- X
- Xprint "fetching addrs... ";
- X
- for $host (@hosts) {
- X (($name, $aliases, $type, $len, @addrs) = gethostbyname($host))
- X || die "no remote \"$host\"\n";
- X
- X $name =~ s/\.convex\.com$//;
- X
- X $hosts{$name} = $addrs[0];
- X}
- X
- Xprint "done.\nType 'h' for help.\n";
- X
- X$rin = $win = $ein = '';
- Xvec($rin,fileno(SOCKET),1) = 1;
- Xvec($ttyin,fileno(STDIN),1) = 1;
- X$rin |= $ttyin;
- X
- X
- X
- X# now keep interrogating forever
- Xfor (;;) {
- X %sent = (); # haven't sent anybody anything yet
- X $sent = 0;
- X
- X &cbreak;
- X
- X print $dashes, "entering send loop\n" if $debug;
- X
- X while (($name, $addr) = each %hosts) {
- X $that = &sockaddr(&AF_INET, $port, $addr);
- X
- X if (!send(SOCKET,0,0,$that)) {
- X printf STDERR "couldn't send to %-12s %-16s\n", $name, &fmtaddr($addr);
- X next;
- X }
- X
- X $sent{$name}++;
- X $sent++;
- X
- X #printf "sent to %-12s %s\n", $name, &fmtaddr($addr) if $debug;
- X }
- X
- X print $dashes, "entering recv loop\n" if $debug;
- X
- X $ntimeout = $timeout;
- X
- X while ($sent > 0) {
- X $then = time;
- X last unless $nfound = select($rout=$rin, $wout=$win, $eout=$ein, $ntimeout);
- X if ($nfound < 0) {
- X warn "select failed: $!\n" unless $! == &EINTR;
- X redo;
- X }
- X $took = (time - $then);
- X $ntimeout -= $took;
- X
- X &readsock if vec($rout,fileno(SOCKET),1);
- X &readtty if vec($rout,fileno(STDIN),1);
- X }
- X
- X for $name (sort keys %sent) {
- X $missed{$name}++;
- X printf "%-12s missed %d times\n", $name, $missed{$name} if $debug;
- X if (! $down{$name}) {
- X next unless $missed{$name} > $retries;
- X next if $down{$name};
- X $down{$name} = time;
- X printf "%-12s %-16s down at %s",
- X $name, &fmtaddr($hosts{$name}), &ctime($down{$name});
- X }
- X }
- X
- X print "sleeping $sleep -- hit any key to interrupt\n" if $debug;
- X select($ttyout = $ttyin, $wout=$win, $eout = $ein, $sleep);
- X &readtty if vec($ttyout,fileno(STDIN),1);
- X}
- X
- Xsub sockaddr {
- X if (wantarray) {
- X unpack($sockaddr_t, $_[0]);
- X } else {
- X pack($sockaddr_t, $_[0], $_[1], $_[2]);
- X }
- X}
- X
- Xsub inetaddr {
- X if (wantarray) {
- X unpack($inetaddr_t, $_[0]);
- X } else {
- X pack($inetaddr_t, $_[0], $_[1], $_[2]);
- X }
- X}
- X
- Xsub source {
- X local($file) = @_;
- X local($return) = 0;
- X
- X $return = do $file;
- X die "couldn't do \"$file\": $!" unless defined $return;
- X die "couldn't parse \"$file\": $@" if $@;
- X die "couldn't run \"$file\"" unless $return;
- X}
- X
- Xsub usage {
- X print STDERR <<EOM;
- Xusage: $program [switches] host ...
- X or: $program [switches] -m [criterion ...]
- X
- Xswitches are:
- X -m look in MACHINES file for hosts matching criteria
- X
- X -t timeout for responses (default $def_timeout)
- X -r retries until timed-out host considered down (default $def_retries)
- X -c clock drift tolerance (default $def_timewarp)
- X -s sleep interval between send loops (default $def_sleep)
- X
- X -d print out debugging information
- X -u dump state to disk for faster init
- XEOM
- X}
- X
- Xsub bad_usage {
- X &usage;
- X exit(1);
- X}
- X
- Xsub fmtaddr {
- X sprintf("[%d.%d.%d.%d]", &inetaddr($_[0]));
- X}
- X
- X
- Xsub readsock {
- X ($hisaddr = recv(SOCKET,$histime='',4,0))
- X || (warn "couldn't recv: $!$OOPS", return);
- X
- X $sent--;
- X
- X ($addrtype, $port, $iaddr) = &sockaddr($hisaddr);
- X
- X $histime = unpack('L',$histime);
- X $histime -= $SINCE_1970;
- X
- X unless (($name,$aliases,$addrtype,$length,@addrs) =
- X gethostbyaddr($iaddr,$addrtype))
- X {
- X printf STDERR "received reply from unknown address %sn",
- X &fmtaddr($iaddr);
- X next;
- X }
- X $name =~ s/\.convex\.com$//;
- X
- X printf "%-12s %-16s thinks it's %s",
- X $name, &fmtaddr($iaddr), &ctime($histime) if $debug;
- X
- X $delta = ($histime - time);
- X $delta = -$delta if $delta < 0;
- X $delta{$name} = $delta;
- X
- X delete $missed{$name};
- X
- X if ($down{$name}) {
- X printf "%-12s %-16s back at %s",
- X $name, &fmtaddr($iaddr), &ctime(time);
- X delete $down{$name};
- X }
- X
- X printf "funny, i didn't send $name anything\n" unless $hosts{$name};
- X delete $sent{$name};
- X}
- X
- Xsub readtty {
- X local($cmd) = getc;
- X local($routine) = '';
- X
- X $cmd = sprintf ("%c", ord($cmd) & 0x7f);
- X
- X if (defined $cmds{$cmd}) {
- X $routine = $cmds{$cmd};
- X print "\n",$dashes unless $routine eq 'quit';
- X &$routine;
- X print $dashes;
- X } else {
- X printf " -- unknown command: `%s' (0x%02x)\n", $cmd, ord($cmd);
- X }
- X}
- X
- Xsub quit {
- X $SIG{'TTOU'} = "IGNORE";
- X &cooked;
- X exit 0;
- X}
- X
- Xsub help {
- X local($cmd);
- X print "Key\tCommand\n";
- X for $cmd (sort keys %cmds) {
- X printf "%s\t%s\n", $cmd, $cmds{$cmd};
- X }
- X}
- X
- Xsub timers {
- X local($name);
- X print "Bad Clocks exceeding $timewarp seconds\n";
- X for $name (sort keys %delta) {
- X next unless $delta{$name} > $timewarp;
- X printf "%-12s %-16s has a clock that's %4d seconds off\n",
- X $name, &fmtaddr($hosts{$name}), $delta{$name};
- X }
- X}
- X
- X
- Xsub missing {
- X local($name);
- X print "Missing Hosts\n";
- X for $name (sort keys %missed) {
- X printf "%-12s %-16s has missed %d timeout%s of %d seconds\n",
- X $name, &fmtaddr($hosts{$name}), $missed{$name},
- X ($missed{$name} == 1) ? " " : "s", $timeout;
- X }
- X}
- X
- Xsub downers {
- X local($name);
- X print "Down Hosts\n";
- X for $name (sort keys %down) {
- X printf "%-12s %-16s down since %s",
- X $name, &fmtaddr($hosts{$name}), &ctime($down{$name});
- X }
- X}
- X
- Xsub uppers {
- X local ($name);
- X
- X print "Up Hosts\n";
- X
- X for $name (sort keys %hosts) {
- X next if $down{$name};
- X printf "%-12s up\n", $name;
- X }
- X}
- X
- Xsub continue {
- X print "continuing...\n";
- X &cbreak;
- X}
- X
- Xsub cbreak {
- X &set_cbreak(1);
- X}
- X
- Xsub cooked {
- X &set_cbreak(0);
- X}
- X
- Xsub set_cbreak {
- X local($on) = @_;
- X
- X ioctl(STDIN,&TIOCGETP,$sgttyb)
- X || die "Can't ioctl TIOCGETP: $!";
- X
- X @ary = unpack($sgttyb_t,$sgttyb);
- X if ($on) {
- X $ary[4] |= &CBREAK;
- X $ary[4] &= ~&ECHO;
- X } else {
- X $ary[4] &= ~&CBREAK;
- X $ary[4] |= &ECHO;
- X }
- X $sgttyb = pack($sgttyb_t,@ary);
- X ioctl(STDIN,&TIOCSETP,$sgttyb)
- X || die "Can't ioctl TIOCSETP: $!";
- X
- X}
- SHAR_EOF
- if test 9588 -ne "`wc -c < 'statmon'`"
- then
- echo shar: "error transmitting 'statmon'" '(should have been 9588 characters)'
- fi
- chmod 775 'statmon'
- fi
- exit 0
- # End of shell archive
- --
-
- Tom Christiansen {uunet,uiucdcs,sun}!convex!tchrist
- Convex Computer Corporation tchrist@convex.COM
- "EMACS belongs in <sys/errno.h>: Editor too big!"
-