#!/usr/bin/perl
#
#SYNOPSIS
#  FindWaste -s<size> -a<time> -b<byteyears> [dirs]
#
#DESCRIPTION
#  This program searches through the  directories  ("."  by  default)
#  looking  for  large  files  that  haven't been referenced for some
#  time. The output is a listing of the files, showing their size and
#  how  long  since  they  were last used.  The output also shows the
#  measure called "byteyears", which is the space-time produce of the
#  file's  size  (in bytes) and how long since its last reference (in
#  years).  This turns out to be a useful  measure  of  space  usage.
#  Byteyears in the range of 10K or more are probably suspicious, and
#  this is our default threshold.
#
#BUGS
#  There is no way to distinguish a "real" use of a file from  things
#  like  grep and tar accesses.  Some backup programs don't bother to
#  reset the access times, so they also qualify as a reference.
#
#AUTHOR
#  John Chambers <jc@trillian.mit.edu>

@ARGV = ('.') if !@ARGV;
$spy = 60 * 60 * 24 * 365.24;	# Seconds in a year.
$bym = 10000;		# Default byteyears limit.
$now = time;

while ($ARGV[0] =~ /^[-+](.*)/) {
	$opt = $1;
	if ($opt =~ /^s(\d+)$/) {
		$siz = $1;
	} elsif ($opt =~ /^a(\d+)$/) {
		$tim = $1;
	} elsif ($opt =~ /^b(\d+)$/) {
		$bym = $1;
	} else {
		print STDERR "$0: Unknown option \"$ARGV[0]\" ignored.\n";
	}
	shift @ARGV;
}

dir: foreach $d (@ARGV) {
	&scandir($d);
}
exit 0;

sub scandir {
	local($d) = @_;
	local(@files,$p,$x);
	if (opendir(D,$d)) {
		@files = grep(!/^\.\.?$/,readdir(D));
		foreach $f (@files) {
			$p = "$d/$f";
			$x = 0;
			if (-d "$p") {
				&scandir($p);
			} else {
				($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
					$atime,$mtime,$ctime,$blksize,$blocks)
						 = stat($p);
				$atim = $t = $now - $atime;	# Time since last use.
				$byrs = $size * ($atim / $spy);	# Byteyears.
				++ $x if ($tim && $atim > $tim);
				++ $x if ($siz && $size > $siz);
				++ $x if ($bym && $byrs > $bym);
				if ($x) {
					$s = $t % 60; $t = int($t / 60);
					$m = $t % 60; $t = int($t / 60);
					$h = $t % 24; $d = int($t / 24);
					$l = sprintf("%dd%dh%dm%ds",$d, $h, $m, $s);
					print "#byteyears       size howlong      file\n"
						if (!($files % 20));
					printf "%10d %10d %-12s %s\n",
						$byrs, $size, $l, $p;
					++ $files;
				}
			}
		}
	} else {
		printf STDERR "$0: Can't read $d [$!]\n";
	}
#	print "$0: Done with $d\n";
}
