#!/usr/bin/perl
#NAME
#  showbadsites - Show sites that took too long to find nothing
#
#SYNOPSIS
#  showbadsites [pattern [time [file]]]
#
#DESCRIPTION
# Look through the scandata file to find sites that turned up  no  tunes  and
# used more than a critical amount of time. Currently we have the time set to
# ten minutes (600 sec).
#
# Here's a typical scandata line:
#   20070608 12:25  19669 sec ( 5:27:49)  0 files  0 tunes  0 titles at www.foo.bar.com
#
#DEFAULTS
	$pat     = shift || '^\d+';	# Scan only lines that match this pattern
	$limit   = shift || 600;	# Seconds for "too much time used" test
	$summary = shift || 'scandata';	# File to scan
#
#AUTHOR
#  John Chambers <jc@trillian.mit.edu>
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#
# This replaces the original shell command:
#   grep -v ' sec ( 0:' scandata | grep ' 0 tunes' 

$| = 1;

unless (open(S,$summary)) {
	print STDERR "$0: Can't read $summary [$!]\n";
	exit 1;
}
while ($l = <S>) {
	$l =~ s/[\r\n]+$//;
	next unless $l =~ $pat;	# Is the line acceptable?
	# fields:   1       2          3                  4            5             6             7               8
	if ($l =~ /^(\d+)\s+([:\d]+)\s+(\d+)\s+sec\s+\(\s*([:\d]+)\)\s+(\d+) files\s+(\d+) tunes\s+(\d+) titles at (.*)/) {
		$dat = $1;	# date (CCYYMMDD)
		$tod = $2;	# time of day (hh:mm)
		$tim = $3;	# time elapsed (seconds)
		$hms = $4;	# time elapsed (hh:mm:ss)
		$fil = $5;	# files with ABC
		$tun = $6;	# tunes with ABC
		$ttl = $7;	# titles with ABC
		$hst = $8;	# host (FQDN)
		unless ($fil || $tun || $ttl) {
			if ($tim > $limit) {
				print "$l\n";
			}
		}
	}
}
exit 0;
