#!/usr/bin/perl
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#NAME
#  ntlTree - Do things in NTL cust/acct trees
#
#SYNOPSIS
#  ntlTree [dir] [<parse.log]
#
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#EXAMPLES
#
#  ntlTree /vol1/cust
#
#  This runs through the /vol1/cust tree looking for directories,  and  doing
#  things in each directory.
#
#  setenv NTL_BaseDir /vol1
#  cat log/Fran_1_*.log | ntlTree
#
#  This will extract cust/acct numbers from the logs and  do  things  in  the
#  directory  for  each  cust  and acct number.  NTL_BaseDir is needed in the
#  environment to tell it where the cust/acct trees are.  It will assume that
#  $NTL_BaseDir/cust  is  the base of the customer tree and $NTL_BaseDir/acct
#  is the base of the account tree.
#
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#DESCRIPTION
#  This  hunts  through  directories under dir looking for certain things and
#  doing some polishing up.  The tasks done so far are:
#
#  XML files:
#
#    If there are *_summary_*.xml files, they  are  combined  into  a  single
#    summary.xml file.
#
#  HTML files:
#
#    Files named *_bill.html are converted to *_bill.pdf, unless there  is  a
#    newer *_bill.pdf file already present.
#
#    Files names *.html are gzipped. We have two versions of this command, to
#    delete the original *.html file or to leave it in place.  One of them is
#    commented out.
#
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#INPUT
#  If directories are given on the command line,  ntlTree  searches  for  the
#  bottom-level  directories, and processes all the XML and HTML files in any
#  of them.
#
#  With no command-line args, ntlTree reads from standard input, which should
#  be the output logs from a parse run, and looks for files of the form:
#     ...  cust 0123456789 acct 0123456789 ...
#  It  calculates  the  customer and account directories from these, and does
#  its job on any such directory that exists.  Thus, by feeding  ntlTree  the
#  output  of  a  parse  run, you can process all the directories affected by
#  that run.
#
#ENVIRONMENT
#  If there is an NTL_BaseDir variable, its value will be  prepended  to  all
#  the directories calculated from the standard input.  This is used to point
#  ntlTree at a global directory at the base of the customer/account trees.
#  
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#OPTIONS
#
#  -hgz  Don't compress .html files (default).
#  +hgz  Compress .html files.
#
#  -pdf  Don't convert _bill.html files to  _bill.pdf files.
#  +pdf  Convert _bill.html files to .pdf files (default).
#
#  -xml  Don't look for summary.xml files.
#  +xml  Process summary.xml files (default).
#
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#FILES
#
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#BUGS
#
#  We might want a runtime flag to control whether *.html files are  kept  or
#  deleted after we gzip them.
#
#  The "cust .* acct .*" lines in the parser's log  have  been  at  different
#  verbose levels at different times.  The code that looks for them will only
#  find them if they exist in the log, of course.   This  message  should  be
#  moved to level 1.
#
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#SEE ALSO
#  parse.d (NTL bill parser)
#
#AUTHOR
#  John Chambers <jc@trillian.mit.edu>                                        #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

$| = 1;
$exitstat = 0;
($P = $0) =~ s".*/"";
$relinking = 0;			# Run relink on cust/acct directory pairs
$V = $ENV{"V_$P"} || $ENV{"D_$P"} || 1; # Verbose level.

$dohgz = 0;
$dopdf = 1;
$doxml = 1;

for $arg (@ARGV) {
	if (($f,$a) = ($arg =~ /^([-+])(.*)$/)) {
		$A = uc($a);
		if ($A eq 'PDF') {
			$dopdf = ($f eq '+');
		} elsif ($A eq 'XML') {
			$doxml = ($f eq '+');
		} else {
			printf STDERR "$0: Unknown option \"$arg\" ignored.\n";
		}
	} else {
		++$dirs;
		&onedir($arg);	
	}
}

# If no dirs on command line, read input for cust/acct numbers
unless ($dirs) {
	$basedir = $ENV{NTL_BaseDir} || '.';
	while ($l = <STDIN>) {
		$custdir = $acctdir = '';
		if ($l =~ / cust (\d+) /) {
			($cd = $1) =~ s"(\d\d)"$1/"g;
			$custdir = "$basedir/cust/$cd";
			&onedir($custdir);
		}
		if ($l =~ / acct (\d+) /) {
			($ad = $1) =~ s"(\d\d)"$1/"g;
			$acctdir = "$basedir/acct/$ad";
			&onedir($acctdir);
		}
		if ($relinking && $custdir && $acctdir) {
			system "relink +v $custdir $acctdir";
		}
	}
}

print "$P: Exit with status $exitstat.\n" if $V>3;
exit $exitstat;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Scan one directory for interesting files. This routine is highly recursive. #
# If  it finds a subdirectory, it calls itself to scan that directory.  So we #
# can't be lax about local variables here.                                    #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub onedir {
	local($dir) = @_;
	local($chg,$cust,*DIR,$fil,$l,$pth,@st1,@st2,$zip);
	local(@files,$hgz,@htmlfiles,$pdf,@sum,$sumf,$sumt,@xmlfiles);
	$dir =~ s"/+$"";
	print "$P: $dir/\n" if $V>1;
	unless (opendir(DIR,$dir)) {
		printf STDERR "$0: Can't scan \"$dir\" ($!)\n" if $V>1;
		return;
	}
	@files = readdir(DIR);
	closedir(DIR);
#
# Run through the files and note what kinds exist:
	for $fil (@files) {
		next if $fil =~ /^\./;
		$pth = "$dir/$fil";
		if (-d $pth) {
			print "$P: Scan \"$pth\"\n" if $V>3;
			&onedir($pth);
		} elsif ($dopdf && ($pth =~ /\.html$/)) {
			print "$P: HTML \"$pth\"\n" if $V>3;
			push @htmlfiles, $pth;
		} elsif ($doxml &&-($pth =~ /_summary\.xml$/)) {
			print "$P: XML: \"$pth\"\n" if $V>3;
			push @xmlfiles, $pth;
			@st1 = stat($pth);
			$sumt = $st1[9] if $sumt < $st1[9];
		}
	}
#
	print "Do HTML files ...\n" if $V>4;
	for $pth (@htmlfiles) {
		print "html \"$pth\"\n" if $V>3;
		($base = $pth) =~ s/\.html$//;
		$pdf = "$base.pdf";
		@st1 = stat($pth);
		@st2 = stat($pdf);
		if (@st2 && ($st1[9] < $st2[9])) {	# Compare mtime values
			print "File \"$pdf\" is newer.\n" if $V>4;
		} elsif ($pth =~ /_bill\.html$/) {	# Convert *_bill.html to *_bill.pdf
			print "Make file \"$pdf\" from \"$pth\"\n" if $V>4;
#			$cmd = "/usr/bin/htmldoc --quiet --no-title --path /home/jc/public_html --header '   ' --footer ' / ' --webpage -f $pdf $pth";
			$cmd = "html2pdf $base";
			print "cmd=\"$cmd\"\n" if $V>4;
			$rsp = `$cmd`;
			if ($rsp && $V>2) {
				print "### Failure ($!)\n";
				print "CMD $cmd\n";
				print "$rsp\n";
			}
		}
		if ($dohgz) {
			$hgz = "$pth.gz";
			@st2 = stat($hgz);
			if (@st2 && ($st1[9] < $st2[9])) {   # Compare mtime values
				print "File \"$hgz\" is newer.\n" if $V>4;
			} else {
				print "Make file \"$hgz\" from \"$pth\"\n" if $V>4;
#				$cmd = "gzip -c $pth >$hgz";	# To get both zipped and unzipped
				$cmd = "gzip -f $pth";			# To get only zipped file
				print "cmd=\"$cmd\"\n" if $V>4;
				$rsp = `$cmd`;
				if ($rsp && $V>2) {
					print "### Failure ($!)\n";
					print "CMD $cmd\n";
					print "$rsp\n";
				} else {
					print "$P: $hgz\n" if $V>1;
					++$chg;
				}
			}
		}
	}
#
	print "Do XML files ...\n" if $V>4;
	$sumf = "$dir/summary.xml";
	@st1 = stat($sumf);
	print "$P: xml: st1[9]=$st1[9] sumt=$sumt.\n" if $V>4;
	if (@st1 && ($st1[9] > $sumt)) {
		print "$P: xml: \"$sumf\" is newer than *_summary.xml files.\n" if $V>3;
		print "$P: $dir\n" if $V>2;
	} else {
		print "$P: xml: \"$sumf\" to be created ...\n" if $V>4;
		for $pth (@xmlfiles) {
			print "$P: xml: \"$pth\"\n" if $V>3;
			if ($pth =~ /\.gz$/) {	# Is the file gzipped?
				$zip = $pth;
				$pth =~ s/\.gz$//;	# Unzipped path
				if (-f $pth) {		# Is there an unzipped version?
					next;			# If so, ignore the zipped summary
				} else {			# If not, unzip it so we can read it
					print "$P: Unzip \"$zip\"\n" if $V>4;
					system "gunzip -f $zip";
				}
			}
			if ($pth =~ /_summary\.xml$/) {
				print "$P: xml: \"$pth\"\n" if $V>3;
				unless (open(FIL,$pth)) {
					printf STDERR "$0: Can't read \"$pth\" ($!)\n";
					next;
				}
				$lines = 0;
			line:
				while ($l = <FIL>) {
					next if ($l =~ m"^\s*<[?/]*xml\b");
					$l =~ s/[\s\r]+$//;
					if ($l =~ m"<cust>(\d+)</cust>") {
						$cust = $1;
					} elsif ($l =~ m"^\s*</*NTL>$") {
						print "Ignore \"$l\"\n" if $V>3;
						next line;
					} elsif ($l =~ m"^(.*)(file>)(\d+_\d+_bill\..*)$") {
						print STDERR "Found \"$l\" in $pth\n";
						$l = $1 . $2 . $cust . '_' . $3;
						print STDERR "Write \"$l\" instead\n";
					}
					push @sum, "$l\n";
					++$lines;
				}
				close FIL;
				print "Read $lines in $pth\n" if $V>4;
			}
		}
		if (@sum) {
			unless (open(FIL,">$sumf")) {
				printf STDERR "$0: Can't make \"$sumf\" ($!)\n";
				return;
			}
			++$chg;
			print "$0: Write $dir/summary.xml\n" if $V>3;
			print FIL "<NTL>\n";
			print FIL @sum;
			print FIL "</NTL>\n";
			close FIL;
			print "$P: $dir/summary.xml\n" if $V>1;
		}
	}
}