#!/bin/bash
#
# $Id: ntpgrabber,v 1.4 2006/03/14 08:49:35 geoff Exp $
#
# Extract NTP information and format it for display by various gkrellm
# plugins.
#
# USAGE:
#
USAGE='Usage: ntpgrabber [options]
Options:
    -b base	Change base path for output files
    -h host	Specify host to be monitored
    -i interval	Interval between NTP probes
    -m max	Max hosts to list in MON file (default no limit)
    -o warn alert
		Give peer offsets (in seconds) above which warnings/alerts
		are activated in MON file
    -r warn alert
		Give peer reachability counts below which warnings/alerts
		are activated in MON file
    -s method	Select method for choosing primary, secondary, and tertiary
		hosts in chart (default ntporder):
	alpha	Alphabetical sort by host name
	ralpha	Reverse alphabetical sort by host name
	ntporder Choose first three valid lines in ntpq output
	synch	Synchronization host is primary; three names alpha-sorted
	offset	Lowest absolute offset is primary
	roffset	Highest absolute offset is primary
	h,h,h	Use three specified hosts
    -x c,c...	Specify tally codes to exclude (default ux-):
	u	Unreachable or outrageous (<space> also accepted)
	x	Falseticker
	.	Excess
	-	Outlier
	+	Candidate
	#	Survivor not in first 6
	*	Selected system peer'
#
# Every "interval" seconds (default 4), the script queries "host"
# (default localhost) for its ntp status.  The status is then written
# to several files, formatted for gkrellm use, as given below.  The
# files are named by appending various suffixes to a base name, which
# by default is "/tmp/ntplog".
#
# OUTPUT FILES:
#
# The script produces several output files, named as follows:
#
#	$BASE.raw	The raw output of "ntpq -p $HOST"
#	$BASE.mon	NTP output formatted for the FMonitor plugin
#	$BASE.chart	NTP output formatted for the FChart plugin
#	$BASE.tchart	Temporary used for creating $BASE.chart
#
# THE MON FILE:
#
# The "mon" file contains a pair of lines per NTP peer.  Each line is in
# FMonitor format (i.e., three fields separated by colons).  The first
# line gives the peer name and the offset in milliseconds, seconds,
# minutes, or hours.  The second line gives the octal reachability
# code for the last 8 packets, and time (usually in seconds) since the
# peer has been contacted.  A WARNING is set if the peer hasn't been
# contacted for very long time, if the offset from the peer is more
# than 10 seconds, or if the peer was 4 or fewer (by default) of the
# last 8 packets.  An ALERT is set if the primary peer is local
# (indicating no NTP connection to the outside world, if the offset is
# more than 30 seconds, or if the peer was unreachable for 2 or fewer
# of the last 8 packets.
#
# THE CHART FILE:
#
# If there is no NTPD, the chart file contains three zeros for
# charting, an alert, and a label string reading "NO NTPD".
# Otherwise, the script selects a primary, secondary, and tertiary
# peer to report, and charts the offsets (in hundreds of microseconds)
# to those peers.  For the 10 strings supported by FChart, it chooses
# values from the primary peer, as given below.  Finally, regardless
# of whether there is a peer, the tooltip is set equal to the raw
# output of "ntpq -p".  (Unfortunately, the tooltip is hard to read
# unless you select a fixed-width font for gkrellm.)
#
# The 10 strings from the primary peer that are placed in the chart file are:
#
#	$0	The tally code ("*", "+", "-", "x", ".", "#", or blank)
#	$1	The name of the peer
#	$2	The stratum at which the peer operates
#	$3	The type of the peer ("l" for local, "u" for remote)
#	$4	When the peer was last contacted
#	$5	The polling interval for the peer, in seconds
#	$6	The reachability byte (in octal) for the peer
#	$7	The measured delay to the peer, in ms
#	$8	The measured offset to the peer, in ms
#	$9	The measured jitter in the delay to the peer, in ms
#
# The chart file also contains some debugging information following
# the !!EOF!! indicator.
#
# BUGS, LIMITATIONS, AND RESTRICTIONS
#
# This script requires gawk or another awk that supports tolower and asort.
#
# $Log: ntpgrabber,v $
# Revision 1.4  2006/03/14 08:49:35  geoff
# Reject non-synchronized hosts based on .INIT. as well as .STEP.
#
# Revision 1.3  2006/03/07 08:02:19  geoff
# Add ID keywords.  Add the -m, -s, and -x switches.  Add a BUGS section.
# Reject non-synchronized hosts based on the .STEP. field rather than based
# on a jitter of 4000.
#

BASE=/tmp/ntplog
NTPHOST=localhost
INTERVAL=4

WARNING_OFFSET=1
ALERT_OFFSET=5
WARNING_REACHCOUNT=4
ALERT_REACHCOUNT=2

MAX_HOSTS=0
SORT_METHOD=ntporder
TALLY_REJECT='ux-'

while [ $# -gt 0 ]
do
    case "$1" in
	-b)
	    BASE="$2"
	    shift
	    ;;
	-h)
	    NTPHOST="$2"
	    shift
	    ;;
	-i)
	    INTERVAL="$2"
	    shift
	    ;;
	-m)
	    MAX_HOST="$2"
	    shift
	    ;;
	-o)
	    WARNING_OFFSET="$2"
	    ALERT_OFFSET="$3"
	    shift
	    shift
	    ;;
	-r)
	    WARNING_REACHCOUNT="$2"
	    ALERT_REACHCOUNT="$3"
	    shift
	    shift
	    ;;
	-s)
	    SORT_METHOD="$2"
	    shift
	    ;;
	-x)
	    TALLY_REJECT="$2"
	    shift
	    ;;
	--)
	    shift
	    break
	    ;;
	-*)
	    echo "$USAGE" 1>&2
	    exit 2
	    ;;
	*)
	    break
	    ;;
    esac
    shift
done

if [ $# -gt 0 ]
then
    echo "$USAGE" 1>&2
    exit 2
fi


trap "" 1
while sleep "$INTERVAL"
do
    #
    # Output format for use with FMonitor
    #
    ntpq -p "$NTPHOST" 2>/dev/null > "$BASE".raw
    awk 'BEGIN \
	    {
	    maxHosts = "'"$MAX_HOSTS"'" + 0
	    tallyReject = "'"$TALLY_REJECT"'"
	    nSaved = 0
	    }
	NR > 2 \
	    {
	    syncHost = $1
	    when = $5
	    reach = strtonum("0" $7)
	    offset = $9
	    absOffset = (offset < 0) ? -offset : offset
	    if ($2 == ".STEP."  ||  $2 == ".INIT.")
		next			# Non-live connection

	    tallyCode = substr($0, 1, 1)
	    if (tallyCode != " ")
		syncHost = substr(syncHost, 2)

	    reject = index(tallyReject, tallyCode)
	    if (reject)
		next

	    if (syncHost == "LOCAL(0)"  &&  tallyCode == " ")
		next			# Ignore local when synced elsewhere

	    warnType = ""
	    if (syncHost == "LOCAL(0)"  &&  tallyCode == "*")
		warnType = "ALERT"
	    if (when ~ /[a-z]$/  &&  warnType == "")
		warnType = "WARNING"
	    if (absOffset >= '"$ALERT_OFFSET"' * 1000)
		warnType = "ALERT"
	    else if (absOffset >= '"$WARNING_OFFSET"' * 1000 \
	      &&  warnType == "")
		warnType = "WARNING"
	    reachCount = 0
	    for (i = 1;  i <= 0x80;  i *= 2)
		{
		if (int(reach / i) % 2 == 1)
		    reachCount++
		}
	    if (reachCount <= '"$ALERT_REACHCOUNT"')
		warnType = "ALERT"
	    else if (warnType == ""  &&  reachCount <= '"$WARNING_REACHCOUNT"')
		warnType = "WARNING"
	    if (absOffset < 0.1)
		offsetString = sprintf("%.1f", offset * 1000) "us"
	    else if (absOffset < 99.5)
		offsetString = sprintf("%.1f", offset) "ms"
	    else if (absOffset < 180000)
		offsetString = sprintf("%.1f", offset / 1000) "s"
	    else if (absOffset < 3600000)
		offsetString = sprintf("%.1f", offset / 60000) "m"
	    else
		offsetString = sprintf("%.1f", offset / 3600000) "h"
	    line1[++nSaved] = sprintf("%s:%s:%s", $1, offsetString, warnType)
	    line2[++nSaved] = sprintf(" %3o:%s:", reach, when)
	    }
	END \
	    {
	    if (NR <= 2)
		print "NO NTPD:!!!!:ALERT"
	    else if (nSaved == 0)
		print "NO PEERS:!!!!:ALERT"
	    else
		{
		if (maxHosts != 0  &&  maxHosts < nSaved)
		    nSaved = maxHosts
		for (i = 1;  i <= nSaved;  i++)
		    printf "%s\n%s\n", line1[i], line2[i]
		}
	    }' \
      "$BASE".raw \
      > "$BASE".mon
    #
    # Output format for use with FChart
    #
    awk 'BEGIN \
	    {
	    sortMethod = "'"$SORT_METHOD"'"
	    tallyReject = "'"$TALLY_REJECT"'"

	    reportTallyCodes[0] = ""
	    reportSyncHosts[0] = ""
	    reportStratums[0] = ""
	    reportTypes[0] = ""
	    reportWhens[0] = ""
	    reportPolls[0] = ""
	    reportReaches[0] = ""
	    reportDelays[0] = ""
	    reportOffsets[0] = ""
	    reportJitters[0] = ""

	    warnType = "ALERT"
	    }
	    {
	    # ALL LINES
	    rawText[NR] = $0
	    # FALLS THROUGH
	    }
	NR >= 3 \
	    {
	    if ($0 ~ /LOCAL\([0-9]*\)/)
		next
	    syncHost = $1
	    refID = $2
	    stratum = $3
	    type = $4
	    when = $5
	    poll = $6
	    reach = $7
	    delay = $8
	    offset = $9
	    absOffset = (offset < 0) ? -offset : offset
	    jitter = $10

	    if (refID == ".STEP."  ||  refID == ".INIT.")
		next			# Non-live connection

	    tallyCode = substr($0, 1, 1)
	    if (tallyCode != " ")
		syncHost = substr(syncHost, 2)

	    reject = index(tallyReject, tallyCode)
	    if (reject)
		{
		debug[++debugLine] = \
		  sprintf("Rejecting %s due to tally code %s\n", \
		    syncHost, tallyCode)
		next
		}

	    if (tallyCode == "*")
		warnType = "NORMAL"
	    else if (tallyCode == "+"  &&  warnType == "ALERT")
		warnType = "WARNING"

	    ++plotIndex

	    # Since a good synchronization is sub-millisecond, we will
	    # multiply by 10 to get to the 100-microsecond range.
	    plotValues[plotIndex] = int(absOffset * 1000 + 0.5)
	    absOffsets[plotIndex] = absOffset

	    reportTallyCodes[plotIndex] = tallyCode
	    reportSyncHosts[plotIndex] = syncHost
	    reportStratums[plotIndex] = stratum
	    reportTypes[plotIndex] = type
	    reportWhens[plotIndex] = when
	    reportPolls[plotIndex] = poll
	    reportReaches[plotIndex] = reach
	    reportDelays[plotIndex] = delay
	    reportOffsets[plotIndex] = offset
	    reportJitters[plotIndex] = jitter
	    }
	END \
	    {
	    if (NR <= 2)
		{
		print "0 0 0"
		print "ALERT: NO NTPD"
		#
		# Make one warning string
		#
		print "NO NTPD"
		}
	    else
		{
		#
		# Pick primary, etc. according to user-chosen method
		#
		primary = secondary = tertiary = 0
		if (sortMethod == "alpha"  ||  sortMethod == "ralpha")
		    {
		    for (i = 1;  i <= plotIndex;  i++)
			plotXref[tolower(reportSyncHosts[i])] = i
		    asorti(plotXref, plotSort)
		    if (sortMethod == "alpha")
			{
			debug[++debugLine] = \
			  sprintf("Choosing alpha-sorted hosts\n")
			if (plotIndex >= 1)
			    {
			    primary = plotXref[plotSort[1]]
			    debug[++debugLine] = \
			      sprintf("Primary %d = %s\n", primary, \
				reportSyncHosts[primary])
			    }
			if (plotIndex >= 2)
			    {
			    secondary = plotXref[plotSort[2]]
			    debug[++debugLine] = \
			      sprintf("Secondary %d = %s\n", secondary, \
				reportSyncHosts[secondary])
			    }
			if (plotIndex >= 3)
			    {
			    tertiary = plotXref[plotSort[3]]
			    debug[++debugLine] = \
			      sprintf("Tertiary %d = %s\n", tertiary, \
				reportSyncHosts[tertiary])
			    }
			}
		    else
			{
			debug[++debugLine] = \
			  sprintf("Choosing reverse-alpha-sorted hosts\n")
			if (plotIndex >= 1)
			    {
			    primary = plotXref[plotSort[plotIndex]]
			    debug[++debugLine] = \
			      sprintf("Primary %d = %s\n", primary, \
				reportSyncHosts[primary])
			    }
			if (plotIndex >= 2)
			    {
			    secondary = plotXref[plotSort[plotIndex - 1]]
			    debug[++debugLine] = \
			      sprintf("Secondary %d = %s\n", secondary, \
				reportSyncHosts[secondary])
			    }
			if (plotIndex >= 3)
			    {
			    tertiary = plotXref[plotSort[plotIndex - 2]]
			    debug[++debugLine] = \
			      sprintf("Tertiary %d = %s\n", tertiary, \
				reportSyncHosts[tertiary])
			    }
			}
		    }
		else if (sortMethod == "ntporder")
		    {
		    debug[++debugLine] = \
		      sprintf("Choosing in NTP order\n")
		    if (plotIndex >= 1)
			primary = 1
		    if (plotIndex >= 2)
			secondary = 2
		    if (plotIndex >= 3)
			tertiary = 3
		    }
		else if (sortMethod == "synch")
		    {
		    #
		    # For the primary, first search for a tally code
		    # of "*".  If none is found, then choose the first
		    # tally code without a "+".
		    #
		    primary = 0
		    for (i = 1;  i <= plotIndex;  i++)
			{
			debug[++debugLine] = \
			  sprintf("Choosing primary: i = %d, saw %s\n", \
			    i, reportTallyCodes[i])
			if (reportTallyCodes[i] == "*")
			    {
			    debug[++debugLine] = \
			      sprintf("Setting primary to %d\n", \
				i, reportTallyCodes[i])
			    primary = i
			    break
			    }
			else if (primary == 0  &&  reportTallyCodes[i] != "+")
			    primary = i
			}
		    #
		    # Picking a secondary is trickier.  We want something
		    # that is different from the primary, but "nice".
		    # We will use a priority scheme, as follows: +, then #,
		    # blank, -, ., x, other.
		    #
		    #
		    secondary = 0
		    tallyPriority = "x.- #+"
		    for (i = 1;  i <= plotIndex;  i++)
			{
			if (i == primary)
			    continue
			debug[++debugLine] = \
			  sprintf("Choosing secondary: i = %d, saw %s\n", \
			    i, reportTallyCodes[i])
			priority = index(tallyPriority, reportTallyCodes[i])
			if (secondary < 0  ||  priority > secondaryPriority)
			    {
			    secondary = i
			    secondaryPriority = priority
			    debug[++debugLine] = \
			      sprintf( \
				  "Setting secondary to %d, priority %d\n", \
				i, priority)
			    continue
			    }

			if (reportTallyCodes[i] == "+"  &&  primary != i)
			    {
			    secondary = i
			    break
			    }
			}
		    if (secondary < 0)
			{
			for (i = 1;  i <= plotIndex;  i++)
			    {
			    if (i != primary)
				{
				secondary = i
				break
				}
			    }
			}
		    #
		    # Finally, choose a tertiary by taking the first
		    # unassigned peer.
		    #
		    for (i = 1;  i <= plotIndex;  i++)
			{
			if (i != primary  &&  i != secondary)
			    {
			    debug[++debugLine] = \
			      sprintf("Choosing tertiary: i = %d\n", i)
			    tertiary = i
			    break
			    }
			}

		    #
		    # The plot makes more sense if the various colors
		    # remain stable no matter who is the synchronization
		    # host.  To help this happen (though not ensure it),
		    # print the plot values in alphabetical order by host
		    # name.
		    #
		    v1 = primary
		    v2 = secondary
		    v3 = tertiary
		    debug[++debugLine] = \
		      sprintf("Unsorted names: %s %s %s\n", \
			reportSyncHosts[v1], reportSyncHosts[v2], \
			reportSyncHosts[v3])
		    if (v1 == 0 \
		      ||  (v2 != 0 \
			&&  reportSyncHosts[v1] > reportSyncHosts[v2]))
			{
			v1 = secondary
			v2 = primary
			}
		    if (v1 == 0 \
		      ||  (v3 != 0 \
			&&  reportSyncHosts[v1] > reportSyncHosts[v3]))
			{
			t = v1
			v1 = v3
			v3 = t
			}
		    if (v2 == 0 \
		      ||  (v3 != 0 \
			&&  reportSyncHosts[v2] > reportSyncHosts[v3]))
			{
			t = v2
			v2 = v3
			v3 = t
			}
		    debug[++debugLine] = \
		      sprintf("v1 = %d, v2 = %d, v3 = %d\n", \
			v1, v2, v3)
		    debug[++debugLine] = sprintf("Sorted names: %s %s %s\n", \
		      reportSyncHosts[v1], reportSyncHosts[v2], \
		      reportSyncHosts[v3])

		    primary = v1
		    secondary = v2
		    tertiary = v3
		    }
		else if (sortMethod == "offset"  ||  sortMethod == "roffset")
		    {
		    #
		    # The implementation of asort is pretty useless,
		    # so we have to abuse it to get the permutation we
		    # need.
		    #
		    for (i = 1;  i <= plotIndex;  i++)
			plotSort[i] = sprintf("%9.6f:%d", absOffsets[i], i)
		    asort(plotSort)
		    for (i = 1;  i <= plotIndex;  i++)
			plotSort[i] = \
			  substr(plotSort[i], index(plotSort[i], ":") + 1) + 0
		    if (sortMethod == "offset")
			{
			debug[++debugLine] = \
			  sprintf("Choosing offset-sorted hosts\n")
			if (plotIndex >= 1)
			    {
			    primary = plotSort[1]
			    debug[++debugLine] = \
			      sprintf("Primary %d = %s, offset %s\n", \
			        primary, reportSyncHosts[primary], \
			        reportOffsets[primary])
			    }
			if (plotIndex >= 2)
			    {
			    secondary = plotSort[2]
			    debug[++debugLine] = \
			      sprintf("Secondary %d = %s, offset %s\n", \
			        secondary, reportSyncHosts[secondary], \
			        reportOffsets[secondary])
			    }
			if (plotIndex >= 3)
			    {
			    tertiary = plotSort[3]
			    debug[++debugLine] = \
			      sprintf("Tertiary %d = %s, offset %s\n", \
			        tertiary, reportSyncHosts[tertiary], \
			        reportOffsets[tertiary])
			    }
			}
		    else
			{
			debug[++debugLine] = \
			  sprintf("Choosing reverse-offset-sorted hosts\n")
			if (plotIndex >= 1)
			    {
			    primary = plotSort[plotIndex]
			    debug[++debugLine] = \
			      sprintf("Primary %d = %s, offset %s\n", \
			        primary, reportSyncHosts[primary], \
			        reportOffsets[primary])
			    }
			if (plotIndex >= 2)
			    {
			    secondary = plotSort[plotIndex - 1]
			    debug[++debugLine] = \
			      sprintf("Secondary %d = %s, offset %s\n", \
			        secondary, reportSyncHosts[secondary], \
			        reportOffsets[secondary])
			    }
			if (plotIndex >= 3)
			    {
			    tertiary = plotSort[plotIndex - 2]
			    debug[++debugLine] = \
			      sprintf("Tertiary %d = %s, offset %s\n", \
			        tertiary, reportSyncHosts[tertiary], \
			        reportOffsets[tertiary])
			    }
			}
		    }
		else
		    {
		    # Assume that the sort method is a host list
		    nSortHosts = split(sortMethod, sortHosts, ",")
		    debug[++debugLine] = \
		      sprintf("Choosing %d specific host(s)\n", nSortHosts)
		    for (i = 1;  i <= nSortHosts;  i++)
			sortHosts[i] = tolower(substr(sortHosts[i], 1, 15))
		    for (i = nSortHosts + 1;  i <= 3;  i++)
			sortHosts[i] = ""
		    for (i = 1;  i <= plotIndex;  i++)
			{
			if (tolower(reportSyncHosts[i]) == sortHosts[1])
			    primary = i
			else if (tolower(reportSyncHosts[i]) == sortHosts[2])
			    secondary = i
			else if (tolower(reportSyncHosts[i]) == sortHosts[3])
			    tertiary = i
			}
		    }

		printf "%d %d %d\n", plotValues[primary], \
		  plotValues[secondary], plotValues[tertiary]

		print warnType

		print reportTallyCodes[primary]	#$0
		print reportSyncHosts[primary]	#$1
		print reportStratums[primary]	#$2
		print reportTypes[primary]	#$3
		print reportWhens[primary]	#$4
		print reportPolls[primary]	#$5
		print reportReaches[primary]	#$6
		print reportDelays[primary]	#$7
		print reportOffsets[primary]	#$8
		print reportJitters[primary]	#$9
		}
	    print "!!TOOLTIP!!"
	    for (i = 1;  i <= NR;  i++)
		print rawText[i]
	    print "!!EOF!!"
	    #
	    # After this point, we can safely produce debugging output.
	    #
	    for (i = 1;  i <= debugLine;  i++)
		printf "%s", debug[i]
	    printf "Primary %d, secondary %d, tertiary %d, max %d\n", \
	      primary, secondary, tertiary, plotIndex
	    for (i = 1;  i <= plotIndex;  i++)
		printf "%d: %s %s %s %s %s %s %s %s %s %s\n", \
		  i, reportTallyCodes[i], reportSyncHosts[i], \
		  reportStratums[i], reportTypes[i], reportWhens[i], \
		  reportPolls[i], reportReaches[i], reportDelays[i], \
		  reportOffsets[i], reportJitters[i]
	    print "DONE"
	    }' \
      $BASE.raw \
      > "$BASE".tchart
    cp "$BASE".tchart "$BASE".chart
done
