#!/usr/bin/perl
#!/space/imail/perl/bin/perl
#
# NAME
#   H_fmt - HTML formatter
#   H_txt - HTML to plain text conversion.
#
# SYNOPSIS
#   H_fmt [option]..  [file]..
#   H_txt [option]..  [file]..
#
# DESCRIPTION
#   Read in a chunk of HTML, and reformat it.  This program  functions  as  a
#   conventional  Unix-style filter program, reading from stdin or from files
#   named on the command line, and writing to stdout.  Depending on  name  or
#   the  H  option,  we  produce either "pretty-printed" HTML output or plain
#   ASCII text.
#
#   The main use of this program is in making HTML readable to a human.   The
#   growing  population of HTML tools have this unfortunate characteristic of
#   producing "machine readable" HTML. You don't have to tolerate it.  Run it
#   through this program.
#
#   The main formatting we do is to produce indenting (using tabs by default,
#   though  you  can specify the indent string).  We have some tables of HTML
#   tags saying which of them are to start a new line, which are to  end  the
#   line, and whether to indent the contents. We also do a bit of cleaning up
#   of whitespace.
#
#   There are some special transformations done by this program that were put
#   here for the benefit of specific projects that I've worked on. I've tried
#   to  delete  each  project's goodies as I move on to the next one, but you
#   may find some relics.  Use them if you wish.  Here are some of the things
#   in the current version:
#
# OPTIONS
#   Options on the command line may start with '-' or '+'.  For some options,
#   '-' means "disable" or "no", while '+' means "enable" or "yes". For other
#   options, there is no such distinction, and either may be used.  Options
#   with no arg may be combined into one; if an option letter is encountered
#   that takes an arg, the rest of the arg string will be used.
#
# -C
#   Delete comments.
# +C
#   Keep comments.  Default.
#
# -LNK
#   Disable the special LNK-project processing.
# +LNK
# +LNK=<dir>
# +LNK.<suf>=<dir>
#   Special  processing  for  hyperlinks.   We  look  for  several  sorts  of
#   hyperlinks,  and rewrite them so they will work when the pages are in the
#   <dir> directory. The .<suf>=<dir> means that files ending with .<suf> are
#   assumed  to  be found in the directory named.  If no suffix is given, the
#   directory will be used for any file without a recognized suffix.  Several
#   LNK options may be used to specify directories for different suffixes.
#
# -I'str'
#   Indent string, used once for each level of indentation.  The default is a
#   single  tab.   For  debugging  purposes, -I'| ' is very useful, though it
#   gives invalid HTML.
#
# -H
#   Suppress HTML markup.  The result is the "content" text formatted as
#   usual, but without any HTML tags.  This is useful for converting HTML to
#   plain text.  You may want to feed the result to further text formatters.
#
# -LM<n>
# -RM<n>
#   (Not fully implemented yet)
#   Define margins.  -LM sets the left margin to column <n>; -RM sets the
#   right margin to <n>.  The defaults are 0 and 70.  Note that we treat
#   the tabs used for indenting as just a single character for this purpose.
#
# -Vn
#   Verbose level n.  The default is -V1, which gives only error messages.
#
# FILES
#   This program functions like a "standard Unix filter".  If you give it one
#   or  more file names on its command line, it'll read them in sequence, and
#   effectively catenate them into one long file.  If there are no file names
#   on the command line, we read from standard input.
#
#   Output is always to standard output, which you can redirect as you wish.
#
# AUTHOR
#  John Chambers <jc@trillian.mit.edu>
#
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

$| = 1;
($me = $0) =~ s'.*/'';
#$V = $ENV{"V_$me"} || $ENV{"D_$me"} || 1;
#$ENV{'PATH'} = '/sbin:/usr/sbin:/usr/bin:/sh:/usr/local/bin';
$exitstat = 0;	# Anyone can leave behind an exit status.
&Vinit($ENV{"V_$me"} || $ENV{"D_$me"} || '1');
&EntTables;		# Initialize the entity tables.
&TagTables;		# Initialize the tag tables.

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Initialize assorted global variables:
$html     = ($me =~ /te*xt$/) ? 0 : 1;	# Produce HTML tags.
$ind      = "\t";	# Indent string.
$wantNL       =  1;	# True if we just wrote a newline.
$NI       =  0;	# True if we just wrote indentation.
$comments =  1;	# Produce comments.
$stag     =  1;	# Special handling for %Stag tags?
$txtmax   = 60;	# Length limit for output text lines.

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Initialize some global level-related lists:
$IT[0]  =  0;	# Indent level for each tag.
$I[0]   = "";	# Indent strings (zero or more copies of $ind).
$Ilvl   =  0;	# Current indent level.
$T[0]   = "";	# Tags at various levels.
$Tlvl   =  0;	# Current tag level.

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here we run thru the command-line args, looking for options.  Anything that
# doesn't look like an option gets stored in @files for later use.

for $a (@ARGV) {
	VC("$me: Arg \"$a\"") if $V>5;
	if (($flg,$opts) = ($a =~ /^([-+])(.*)/)) {		# + and - indicate options.
		VC("$me: flg=\"$flg\" opts=\"$opts\"") if $V>5;
		while ($opts =~ /^(.)(.*)/) {
			$opt  = uc($1);
			$opts = $2;
			VC("$me: opt=\"$opt\" opts=\"$opts\"") if $V>5;
			if ($opt eq 'C') {
				$comments = ($flg eq '-' ? 0 : 1);	# Whether to produce comments.
			} elsif ($opt eq 'I') {	# Indentation.
				$ind = $opts;		# The rest of the arg is the indent string.
				$opts = '';
			} elsif ($opt eq 'H') {	# Whether to produce HTML tags.
				$html = ($flg eq '-' ? 0 : 1);
			} elsif (($opt eq 'L' or $opt eq 'R') and ($opts =~ s"^M(\d*)"")) {
				$lmargin = ($1 or  3) if ($opt eq 'L');
				$rmargin = ($1 or 72) if ($opt eq 'R');
			} elsif ($opt eq 'L') {
				if ($opts =~ /^NK\b(.*)/i) {	# Rewriting links.
					$LNK = 1;
					$LNKopt = $1;
					VC("$me: LNKopt=\"$LNKopt\"") if $V>2;
					if (($suf,$dir) = ($LNKopt =~ /^\.(\w+)=(.*)$/)) {
						VC("$me: LNK option is suf \"$suf\" = dir \"$dir\"") if $V>2;
						$LNKdir{$suf} = $dir;
					} elsif (($dir) = ($LNKopt =~ /^=(.*)$/)) {
						VC("$me: LNK option is dir \"$dir\"") if $V>2;
						$LNKdir{'='} = $dir;
					} else {
						VC("$me: LNK option \"$LNKopt\".") if $V>2;
					}
					$opts = '';
				} else {
					print STDERR "$me: Unknown R option \"$1\" ignored.\n" if $V;
				}
			} elsif ($opt eq 'V') {	# Verbose level.
				if ($opts =~ s/^(\d)//) {$V = $1} else {++$V}
			} else {
				print STDERR "$me: Unknown option \"$1\" ignored.\n" if $V;
			}
		}
	} else {
		push @files, $a;
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here we read the input data, accumulating it in $txt, and  calling  oneline
# to process as much as it can. Note that oneline may not process all of $txt
# if there is an incomplete tag. When oneline returns, we simply read another
# line and append it to $txt, and then ask oneline "Can you handle it now?"

if (@files) {
	for $f (@files) {
		if (open(F,$f)) {
			for $l (<F>) {
				$txt .= $l;
				&oneline();
			}
		} else {
			print STDERR "$me: Can't read \"$f\" [$!]\n" if $V;
		}
	}
} else {
	for $l (<STDIN>) {
		$txt .= $l;
		&oneline();
	}
}
if ($txt) {			# Any relic text left over?
	print "$txt";	# It's probably junk, but print it anyway.
	$wantNL = (substr($txt,-1,1) eq "\n") ? 1 : 0;
	$NI = 0;
}
done:
	print "\n" if !$wantNL;
	exit $exitstat;

# That's the main program; here are the subroutines:
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# We process as much of $txt as we can.  The only case where we  return  with
# data  still in $txt is when we find the start of a HTML tag with no closing
# '>'.  The above code will tack on another line, and call us again.

sub oneline {
	while ($txt) {
		if ($txt =~ /^</) {
			if ($txt =~ /^<!--/) {
				if ($txt =~ s"^(<!--.*?-->\s*)""s) {
					&comment($1);
				} else {
					return;		# Return with incomplete comment tag.
				}
			} elsif ($txt =~ s"^</([^>]*)>""s) {
				&closetag($1);
			} elsif ($txt =~ s"^<([^>]*)>""s) {
				&opentag($1);
			} else {
				VL("$me: > not found.") if $V>6;
				return;		# Return with incomplete comment tag.
			}
		} elsif ($txt =~ s"^(\s+)"") {
			if ($PRE) {
				print $1;
			} else {
				unless ($wantNL or $NI) {
					print ' ';
					++$txtlen;
					$wantNL = $NI = 0;
				}
			}
		} elsif ($txt =~ s"^([^<\s]+)"") {
			&wordout($1);
		} else {
			print "\n<!-- CAN'T PARSE -->"if $V;
			print $txt;
			$NI = $wantNL = 0;
			$txt = '';
		}
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# This is our HTML-entity-to-ANSI-char routine. It is only called when we are
# doing text output; if $html is true, we pass on such entities unchanged.
#
sub e2c {
	local($ent) = @_;
	local($val) = $HTMLentval{$ent};
	if (defined $val) {
		return pack('c',$val);
	}
	if ($ent eq 'quot') {return '"'}	# They left this one out.
	return "&$ent;"
}
sub d2c {
	local($ent) = @_;
	local($val) = int($ent);
	return pack('c',$val);
}
sub x2c {
	local($ent) = @_;
	return pack('c',hex($ent));
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub wordout {
	local($word) = @_;
	if (!$html) {
		$word =~ s/\&#(x[0-9A-Fa-f]+);/&x2c($1)/ge;
		$word =~ s/\&#(\d+);/&d2c($1)/ge;
		$word =~ s/\&(\w+);/&e2c($1)/ge;
	}
	unless ($PRE) {
		if ($txtlen > $txtmax) {
			VL("$me: wantNL+I because txtlen=$txtlen > txtmax=$txtmax.") if $V>2;
			&NLI();
			$txtlen = 0;
			$wantNL = 0;
			$NI = 1;
		} elsif ($wantNL and !$NI and $I[$Ilvl]) {
			VL("$me: I because wantNL=$wantNL NI=$NI I[$Ilvl]=\"$I[$Ilvl]\"") if $V>2;
			&NLI();
			$txtlen = 0;
			$wantNL = 0;
			$NI = 1;
		}
	}
	print $word;
	$wantNL = $NI = 0;
	$txtlen += length($word);
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub comment {
	local($cmt) = @_;
#	if ($cmt =~ /^<!--webbot\s/i) {
#		print "<!--webbot ... -->\n";
#		$wantNL = 1;
#		return;
#	}
	print $cmt if $comments || $inscript;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Handle a close tag.  We scan the tag stack for the matching open  tag,  and #
# pop the stack back to there, resetting the indentation. The VC() line is to #
# flag mismatches in nesting tags, but we usually don't use it.               #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub closetag {
	local($Tag) = @_;		# Tag in random case.
	local($TAG) = uc($Tag);	# Tag in upper case.
	local($CTAG) = "/$TAG";	# Tag with initial '/'.
	if ($PRE) {
		print "</$Tag>" if $html;
		if ($TAG eq 'PRE') {
			VL("$me/opentag: /PRE") if $V>1;
			$PRE = 0;
		}
		return;
	}
	if ($stag and $Stag{$TAG}) {	# Optional kludge to discard some attributes.
		$Tag = &specialtag($TAG,$Tag,'/');
	}
	if ($Ntag{$TAG} or $Xtag{$TAG}) {	# Is tag nesting type?
		while (($Tlvl > 0) and ($TAG ne $T[$Tlvl])) {
#			VC("<$T[$Tlvl]> closed by </$TAG>");
			if ($Ntag{$T[$Tlvl]}) {
				closetag($T[$Tlvl]);
			} else {
				if (--$Tlvl < 0) {$Tlvl = 0}
			}
		}
		if (--$Tlvl < 0) {$Tlvl = 0}
#		$Ilvl = $IT[$Tlvl];	# Restore this tag's indent level.
	} else {
		if ($Itag{$TAG} > 0) {if (--$Ilvl < 0) {$Ilvl = 0}}
	}
	if ($html) {			# Do we need newline + indent before this tag?
		if ($Ftag{$CTAG}) {	# Is tag first on new line?
			VL("$me/closetag: NLI because Ftag{$CTAG}=$Ftag{$CTAG} and wantNL=$wantNL") if $V>2;
			&NLI();			# Newline + previous indent.
		}
		print "</$Tag>";
		$wantNL = $NI = 0;
	}
	$Ilvl = $IT[$Tlvl];		# Restore previous tag's indent level.
	if ($html and $Ltag{$CTAG}) {
		VL("$me/closetag: NLI because Ltag{$CTAG}=$Ltag{$CTAG} and wantNL=$wantNL") if $V>2;
		print "\n"; $wantNL = 1
	} elsif ($Ttag{$TAG} && $Ltag{$TAG}) {	# Text mode.
		VL("$me/closetag: NLI after $TAG because Ttag{/$TAG}=$Ttag{$CTAG} Ltag{$TAG}=$Ltag{$TAG}") if $V>2;
		&NLI();
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Produce newline.
sub wantNL {
	if (!$wantNL) {
		VL("$me/wantNL: Produce wantNL with wantNL=$wantNL.") if $V>2;
		print "\n"; $wantNL = 1
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Produce newline and indent.
sub NLI {
	if (!$wantNL) {
		VL("$me/NLI: Produce wantNL with wantNL=$wantNL.") if $V>2;
		print "\n"; $wantNL = 1
	}
	if ($I[$Ilvl]) {
		VL("$me/NLI: Indent I[$Ilvl]=\"$I[$Ilvl]\"") if $V>2;
		if ($html) {
			print "$I[$Ilvl]"
		} else {
			print "\t" if $I[$Ilvl]
		}
		$NI = 1; $wantNL = 0
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Process an open tag.  This mostly concerns getting the  indentation  right, #
# setting  Ilvl  and  Tlvl appropriately.  We also do a check for the special #
# hyperlink  processing.  Note that our param contains the tag and attributes #
# but not the <> delimiters.                                                  #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

sub opentag {
	local($Tag) = @_;
	if ($PRE) {
		print "</$Tag>" if $html;
		return;
	}
	local($TAG,$tag,$att);
	if (($tag,$att) = ($Tag =~ m"^([^\s]+)\s+(.*)$")) {
	} else {				# Split TAG into tag and attributes.
		$tag = $Tag;
		$att = '';
	}
	$TAG = uc($tag);		# Canonicalize tags to upper case.
	if ($TAG eq 'PRE') {	# Special kludge for PRE tag.
		VL("$me/opentag: PRE") if $V>1;
		$PRE = 1;
		print "\n<$Tag>";
		return;
	}
	if ($Xtag{$TAG}) {		# Exclusive tag?
		if ($T[$Tlvl] eq $TAG or $T[$Tlvl] eq $Xtag{$TAG}) {
			if (--$Tlvl < 0) {$Tlvl = 0}	# Close previous instance of this tag.
			$Ilvl = $IT[$Tlvl];				# Restore this tag's indent level.
		}
	}
	if ($Ntag{$TAG} or $Xtag{$TAG}) {	# Nesting tag?
		$T[++$Tlvl] = $TAG;	# Add to stack of nesting tags.
	}
	if ($html) {
		if ($Ftag{$TAG}) {
			VL("$me/opentag: NLI because html=$HTML Ftag{$TAG}=\"Ftag{$TAG}\"") if $V>2;
			&NLI();
		}
	} elsif ($Ttag{$TAG}) {	# Newline during text mode for these tags.
		VL("$me/opentag: NLI because html=$HTML Ttag{$TAG}=\"$Ttag{$TAG}\"") if $V>2;
		&NLI();
	}
	if ($stag and $Stag{$TAG}) {	# Optional kludge to discard some attributes.
		$Tag = &specialtag($TAG,$Tag);
	}
	if ($LNK and $Htag{$TAG}) {
		$Tag = &Htag($TAG,$tag,$att);
	}
	print "<$Tag>" if $html;
	$wantNL = $NI = 0;
	if ($wantNL = $Ltag{$TAG}) {	# Does this tag end a line?
		if ($wantNL > 0) {			# Positive means it does.
			print "\n"; $wantNL = 1;
			$txtlen = 0;
		} elsif ($wantNL < 0) {		# Negative means yes if at end of line.
			if ($txt =~ s"^\s*\n\s*"") {
				print "\n"; $wantNL = 1;
				$txtlen = 0;
			} else {			# Text follows on same line.
				$wantNL = 1;		# Lie to suppress newline.
				$txtlen = length($Tag) + 2;
			}
		}
	}
	if ($Itag{$TAG} > 0) {	# Is this an indenting tag?
		$Ilvl++;
		$I[$Ilvl] = $ind x $Ilvl;
	}
	$IT[$Tlvl] = $Ilvl;		# Note indent level for current tag.
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Hyper-link tags need some extra handling. Each of these is a unique kludge. #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub Htag {
	local($TAG,$tag,$att) = @_;
	local($new,$a,$v);
	while ($att) {
		VC("Htag att=\"$att\"");
		if ($att =~ s/^(\w+)="([^"]*)"\s*//) {
			VC("Htag 1=$1 2=\"$2\"");
			$a = lc($1);
			if ($a eq 'href') {
				$new .= " $1=\"" . &LNKurl($TAG,$2) . "\"";
			} elsif ($a eq 'src') {
				$new .= " $1=\"" . &LNKurl($TAG,$2) . "\"";
			} else {
				$new .= " $1=\"$2\"";
			}
		} elsif ($att =~ s/^(\w+)=([^\s]*)\s*//) {
			$new .= " $1=$2";
			VC("Htag 1=$1 2=$2");
		} else {
			$att =~ s/^([^\s]*)\s*//;
			VC("Htag 1=$1");
			$new .= " $1";
		}
	}
	return "$tag$new";
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub LNKjs {
	local($x) = @_;
	if (($init,$base,$suff,$rest) = ($x =~ /^(.*)'([-.\w]+)\.(html*|gif|js)'(.*)/)) {
		return $init . "'" . ($LNKdir{lc($suff)} || $LNKdir{'='} || '') . "/$base.$suff" . "'" . $rest;
	}
	return $x;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub Min {local $v = shift; for (@_) {$v = $_ if $_ < $v}; $v}
sub Max {local $v = shift; for (@_) {$v = $_ if $_ > $v}; $v}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Some URL rewrite.
sub URL {
	local($url) = @_;
	while ($url =~ s"[^/]/\.\./"") {}
	return $url
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here we get a tag and its URL.  We return the URL, possibly modified.
sub LNKurl {
	local($TAG,$url) = @_;
	local($p,$uri,$v);
	local($id) = "$me/LNKurl";
	VC("$id: ../ in URL") if ($url =~ /\.\.\//);
	return URL($url) if ($url =~ m"^\w+://");
	return URL($url) if ($url =~ m"^/");
	if (($p,$uri) = ($url =~ /^(\w+:)(.*)$/)) {
		VC("$id protocol \"$p\" \"$uri\"");
		if (uc($p) eq 'JAVASCRIPT:') {
			return &URL($p . &LNKjs($uri));
		}
	}
	if (%LNKdir) {
		VC("$id LNKdir defined.");
		if ($url =~ /\.(\w+)$/) {
			VC("$id $1 suffix found.");
			if ($dir = $LNKdir{$1}) {
				VC("$id $1 suffix is \"$dir\"");
				return &URL("$p$dir/$url");
			}
		} else {
			VC("$id no suffix on \"$url\"");
		}
		if ($dir = $LNKdir{'='}) {
			VC("$id dir is \"$dir\"");
			return &URL("$p$dir/$url");
		} else {
			VC("$id no default directory");
		}
	} else {
		VC("$id no directories at all");
	}
	return &URL($url);
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# This is used to implement special-case kludges for some tags. It's entirely 
# ad hoc, depending on the needs of the current project.
#
sub specialtag {
	local($T,$t,$f) = @_;
	if ($T eq 'SCRIPT') {
		if ($f eq '/') {
			if (--$inscript) {$inscript = 0}
		} else {
			++$inscript;
		}
		return $t;
	}
#	if ($T eq 'IMG' or $T eq 'TD' or $T eq 'TABLE') {
#		$t =~ s/\s*\bwidth="*\d+%*"*//i;
#		$t =~ s/\s*\bheight="*\d+%*"*//i;
#	}
#	if ($T eq 'AREA') {
#		$t =~ s/\bhref="telnet:(.*?)"/href="<#1:TelnetLink(___)#>"/i;
#	}
	$t;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Print the @T stack around $T[$t].  This is a debug kludge.
#
sub PT {
	local($t) = @_;
	local($t1,$t2,$t3);
		$t1 = $T[$t-1];
		$t2 = $T[$t];
		$t3 = $T[$t+1];
		print V "<!-- T[$t]=[$t1,$t2,$t3] -->";
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here's our verbose-message output routines. We're currently set up to write
# all our verbose messages to a log file. If we decide to send it to stdout, 
# here is where the changes should be made.
#
sub VC {print V "<!--@_-->\n"}	# Message as HTML comment.
sub VM {print V @_,"<br>\n"}	# Message with break and newline.
sub VL {print V @_,"\n"}		# Message with just newline.
sub VS {print V @_}				# Message string only.

sub Vinit {				# Verbose-mode initialization.
	%Vhost = (			# Max verbose level for remote hosts.
		'localhost'    => 1, 
	);
	%Vprog = (			# Max verbose level for CGI programs.
		'Session'      => 3,
		'Verify'       => 1, 
	);
	$V = &Min(($Vprog{$me} || 1), ($Vhost{$ENV{REMOTE_HOST}} || 1)) if !defined($V);
	$Vfile = "$me.log";	# File for verbose output.
	if ($V > 0) {		# Do we need a verbose-output file?
		if ($Vfile) {	# Do we have a verbose-output file name?
			if (!open(V,">$Vfile")) {	# Open the verbose-output file.
				print "<br><b>Can't write \"$Vfile\" [$!]</b><br>\n" if $V;
				open(V,">-");
			}
		} else {		# No verbose-output file, use STDOUT.
			open(V,">-");
		}
	}
	select V; $| = 1; select STDOUT;
	if ($V>3) {			# Explain the value of $V ...
		VM("$me: Vprog{$me}=\"$Vprog{$me}\"") if $Vprog{$me};
		VM("$me: Vhost{$ENV{REMOTE_HOST}}=\"$Vhost{$ENV{REMOTE_HOST}}\"") 
			if defined $Vhost{$ENV{REMOTE_HOST}};
		VM("$me: V=\"$V\" initially.") if $V>2;
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here's the table of HTML3.2 entity encodings:
sub EntTables {
	%HTMLentval = (
		'quot'   =>  34,	# quotation mark = APL quote, U+0022 ISOnum 
		'amp'    =>  38,	# ampersand, U+0026 ISOnum
		'lt'     =>  60,	# less-than sign, U+003C ISOnum
		'gt'     =>  62,	# greater-than sign, U+003E ISOnum
		'nbsp'   => 160,	# no-break space = non-breaking space, U+00A0 ISOnum
		'iexcl'  => 161,	# inverted exclamation mark, U+00A1 ISOnum
		'cent'   => 162,	# cent sign, U+00A2 ISOnum
		'pound'  => 163,	# pound sign, U+00A3 ISOnum
		'curren' => 164,	# currency sign, U+00A4 ISOnum
		'yen'    => 165,	# yen sign = yuan sign, U+00A5 ISOnum
		'brvbar' => 166,	# broken bar = broken vertical bar, U+00A6 ISOnum
		'sect'   => 167,	# section sign, U+00A7 ISOnum
		'uml'    => 168,	# diaeresis = spacing diaeresis, U+00A8 ISOdia
		'copy'   => 169,	# copyright sign, U+00A9 ISOnum
		'ordf'   => 170,	# feminine ordinal indicator, U+00AA ISOnum
		'laquo'  => 171,	# left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
		'not'    => 172,	# not sign, U+00AC ISOnum
		'shy'    => 173,	# soft hyphen = discretionary hyphen, U+00AD ISOnum
		'reg'    => 174,	# registered sign = registered trade mark sign, U+00AE ISOnum
		'macr'   => 175,	# macron = spacing macron = overline = APL overbar, U+00AF ISOdia
		'deg'    => 176,	# degree sign, U+00B0 ISOnum
		'plusmn' => 177,	# plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
		'sup2'   => 178,	# superscript two = superscript digit two = squared, U+00B2 ISOnum
		'sup3'   => 179,	# superscript three = superscript digit three = cubed, U+00B3 ISOnum
		'acute'  => 180,	# acute accent = spacing acute, U+00B4 ISOdia
		'micro'  => 181,	# micro sign, U+00B5 ISOnum
		'para'   => 182,	# pilcrow sign = paragraph sign, U+00B6 ISOnum
		'middot' => 183,	# middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
		'cedil'  => 184,	# cedilla = spacing cedilla, U+00B8 ISOdia
		'sup1'   => 185,	# superscript one = superscript digit one, U+00B9 ISOnum
		'ordm'   => 186,	# masculine ordinal indicator, U+00BA ISOnum
		'raquo'  => 187,	# right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
		'frac14' => 188,	# vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
		'frac12' => 189,	# vulgar fraction one half = fraction one half, U+00BD ISOnum
		'frac34' => 190,	# vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
		'iquest' => 191,	# inverted question mark = turned question mark, U+00BF ISOnum
		'Agrave' => 192,	# latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
		'Aacute' => 193,	# latin capital letter A with acute, U+00C1 ISOlat1
		'Acirc'  => 194,	# latin capital letter A with circumflex, U+00C2 ISOlat1
		'Atilde' => 195,	# latin capital letter A with tilde, U+00C3 ISOlat1
		'Auml'   => 196,	# latin capital letter A with diaeresis, U+00C4 ISOlat1
		'Aring'  => 197,	# latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
		'AElig'  => 198,	# latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
		'Ccedil' => 199,	# latin capital letter C with cedilla, U+00C7 ISOlat1
		'Egrave' => 200,	# latin capital letter E with grave, U+00C8 ISOlat1
		'Eacute' => 201,	# latin capital letter E with acute, U+00C9 ISOlat1
		'Ecirc'  => 202,	# latin capital letter E with circumflex, U+00CA ISOlat1
		'Euml'   => 203,	# latin capital letter E with diaeresis, U+00CB ISOlat1
		'Igrave' => 204,	# latin capital letter I with grave, U+00CC ISOlat1 <!ENTITY Iacute CDATA "&#205;" -- latin capital letter I with acute, U+00CD ISOlat1 -->
		'Icirc'  => 206,	# latin capital letter I with circumflex, U+00CE ISOlat1
		'Iuml'   => 207,	# latin capital letter I with diaeresis, U+00CF ISOlat1
		'ETH'    => 208,	# latin capital letter ETH, U+00D0 ISOlat1
		'Ntilde' => 209,	# latin capital letter N with tilde, U+00D1 ISOlat1
		'Ograve' => 210,	# latin capital letter O with grave, U+00D2 ISOlat1
		'Oacute' => 211,	# latin capital letter O with acute, U+00D3 ISOlat1
		'Ocirc'  => 212,	# latin capital letter O with circumflex, U+00D4 ISOlat1
		'Otilde' => 213,	# latin capital letter O with tilde, U+00D5 ISOlat1
		'Ouml'   => 214,	# latin capital letter O with diaeresis, U+00D6 ISOlat1
		'times'  => 215,	# multiplication sign, U+00D7 ISOnum
		'Oslash' => 216,	# latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
		'Ugrave' => 217,	# latin capital letter U with grave, U+00D9 ISOlat1
		'Uacute' => 218,	# latin capital letter U with acute, U+00DA ISOlat1
		'Ucirc'  => 219,	# latin capital letter U with circumflex, U+00DB ISOlat1
		'Uuml'   => 220,	# latin capital letter U with diaeresis, U+00DC ISOlat1
		'Yacute' => 221,	# latin capital letter Y with acute, U+00DD ISOlat1
		'THORN'  => 222,	# latin capital letter THORN, U+00DE ISOlat1
		'szlig'  => 223,	# latin small letter sharp s = ess-zed, U+00DF ISOlat1
		'agrave' => 224,	# latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
		'aacute' => 225,	# latin small letter a with acute, U+00E1 ISOlat1
		'acirc'  => 226,	# latin small letter a with circumflex, U+00E2 ISOlat1
		'atilde' => 227,	# latin small letter a with tilde, U+00E3 ISOlat1
		'auml'   => 228,	# latin small letter a with diaeresis, U+00E4 ISOlat1
		'aring'  => 229,	# latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
		'aelig'  => 230,	# latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
		'ccedil' => 231,	# latin small letter c with cedilla, U+00E7 ISOlat1
		'egrave' => 232,	# latin small letter e with grave, U+00E8 ISOlat1
		'eacute' => 233,	# latin small letter e with acute, U+00E9 ISOlat1
		'ecirc'  => 234,	# latin small letter e with circumflex, U+00EA ISOlat1
		'euml'   => 235,	# latin small letter e with diaeresis, U+00EB ISOlat1
		'igrave' => 236,	# latin small letter i with grave, U+00EC ISOlat1
		'iacute' => 237,	# latin small letter i with acute, U+00ED ISOlat1
		'icirc'  => 238,	# latin small letter i with circumflex, U+00EE ISOlat1
		'iuml'   => 239,	# latin small letter i with diaeresis, U+00EF ISOlat1
		'eth'    => 240,	# latin small letter eth, U+00F0 ISOlat1
		'ntilde' => 241,	# latin small letter n with tilde, U+00F1 ISOlat1
		'ograve' => 242,	# latin small letter o with grave, U+00F2 ISOlat1
		'oacute' => 243,	# latin small letter o with acute, U+00F3 ISOlat1
		'ocirc'  => 244,	# latin small letter o with circumflex, U+00F4 ISOlat1
		'otilde' => 245,	# latin small letter o with tilde, U+00F5 ISOlat1
		'ouml'   => 246,	# latin small letter o with diaeresis, U+00F6 ISOlat1
		'divide' => 247,	# division sign, U+00F7 ISOnum
		'oslash' => 248,	# latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
		'ugrave' => 249,	# latin small letter u with grave, U+00F9 ISOlat1
		'uacute' => 250,	# latin small letter u with acute, U+00FA ISOlat1
		'ucirc'  => 251,	# latin small letter u with circumflex, U+00FB ISOlat1
		'uuml'   => 252,	# latin small letter u with diaeresis, U+00FC ISOlat1
		'yacute' => 253,	# latin small letter y with acute, U+00FD ISOlat1
		'thorn'  => 254,	# latin small letter thorn with, U+00FE ISOlat1
		'yuml'   => 255,	# latin small letter y with diaeresis,
	);
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here's a routine to initialize our tag tables.  This task was moved into  a #
# subroutine  so that it can be positioned out of the way, and not clutter up #
# the start of the program.                                                   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub TagTables {

	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	# Here is a table that controls indentation.  The usual value  is  1, #
	# meaning to indent the contents by 1 level. A few have a value of 0, #
	# meaning to not indent the contents. Two special kludges: A value of #
	# '' means to outdent the contents to the left edge; this may be used #
	# for such tags as <PRE> and <TEXTAREA>.  And -1 means to indent  the #
	# data if it is on a new line.                                        #
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	%Itag = (
		'A' => 1,
		'ADDRESS' => 1,
		'APPLET' => 1,
		'AREA' => 0,
		'B' => 0,
		'BASE' => 1,
		'BGSOUND' => 1,
		'BIG' => 1,
		'BL' => 1,
		'BLINK' => 1,
		'BLOCKQUOTE' => 1,
		'BODY' => 0,
		'BR' => 0,
		'CAPTION' => 1,
		'CENTER' => -1,
		'CITE' => 1,
		'CODE' => 1,
		'COMMENT' => 1,
		'DD' => 1,
		'DFN' => 1,
		'DIR' => -1,
		'DIV' => 1,
		'DL' => 0,
		'DT' => 1,
		'EM' => 1,
		'FONT' => 1,
		'FORM' => 1,
		'FRAME' => 0,
		'FRAMESET' => 1,
		'H1' => 1,
		'H2' => 1,
		'H3' => 1,
		'H4' => 1,
		'H5' => 1,
		'H6' => 1,
		'HEAD' => 1,
		'HN' => 1,
		'HR' => 0,
		'HTML' => 0,
		'I' => 1,
		'IMG' => 0,
		'INPUT' => 0,
		'ISINDEX' => 1,
		'KBD' => 1,
		'LI' => 1,
		'LINK' => 1,
		'LISTING' => '',
		'MAP' => 1,
		'MARQUEE' => 1,
		'META' => 1,
		'NETXID' => 1,
		'NOBR' => 0,
		'NOFRAMES' => 1,
		'OL' => 1,
		'OPTION' => 1,
		'P' => '',
		'PLAINTEXT' => '',
		'PRE' => '',
		'SAMP' => 0,
		'SCRIPT' => 0,
		'SELECT' => 1,
		'SMALL' => 1,
		'STRIKE' => 1,
		'STRONG' => 1,
		'SUB' => 1,
		'SUP' => 1,
		'TABLE' => 1,
		'TCL' => 1,
		'TD' => 1,
		'TEXTAREA' => '',
		'TH' => 1,
		'TITLE' => 1,
		'TR' => 1,
		'TT' => 1,
		'UL' => 1,
		'VAR' => 1,
		'WBR' => 1,
		'XMP' => 1,
	);

	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	# Here is a table that says which tags are to start their  own  line. #
	# If  in  this  table,  a  tag  will  be preceded by a newline and an #
	# indent.  If not in this table, a tag will be put out  as-is,  right #
	# after  any  preceding  text or tag.  Note that close tags may be in #
	# this table, too.                                                    #
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	%Ftag = (
		'A' => 1,
		'ADDRESS' => 1,
		'APPLET' => 1,
		'AREA' => 1,
		'B' => 0,
		'BASE' => 1,
		'BGSOUND' => 1,
		'BIG' => 1,
		'/BIG' => 1,
		'BL' => 1,
		'BLINK' => 1,
		'BLOCKQUOTE' => 1,
		'/BLOCKQUOTE' => 1,
		'BODY' => 1,
		'/BODY' => 1,
		'BR' => 0,
		'CAPTION' => 1,
		'CENTER' => 1,
		'CITE' => 1,
		'CODE' => 1,
		'COMMENT' => 1,
		'DD' => 1,
		'DFN' => 1,
		'DIR' => 1,
		'DIV' => 1,
		'/DIV' => 1,
		'DL' => 1,
		'/DL' => 1,
		'DT' => 1,
		'EM' => 1,
		'/EM' => 1,
		'FONT' => 1,
		'/FONT' => 0,
		'FORM' => 1,
		'/FORM' => 1,
		'FRAME' => 1,
		'/FRAME' => 1,
		'FRAMESET' => 1,
		'/FRAMESET' => 1,
		'HEAD' => 1,
		'HN' => 1,
		'HR' => 1,
		'HTML' => 1,
		'I' => 1,
		'/I' => 1,
		'IMG' => 1,
		'INPUT' => 1,
		'ISINDEX' => 1,
		'KBD' => 1,
		'LI' => 1,
		'LINK' => 1,
		'LISTING' => '',
		'MAP' => 1,
		'/MAP' => 1,
		'MARQUEE' => 1,
		'META' => 1,
		'NETXID' => 1,
		'NOBR' => 0,
		'NOFRAMES' => 1,
		'/NOFRAMES' => 1,
		'OL' => 1,
		'OPTION' => 1,
		'P' => 1,
		'/P' => 1,
		'PLAINTEXT' => '',
		'PRE ' => 1,
		'SAMP' => 0,
		'SCRIPT' => 1,
		'/SCRIPT' => 1,
		'SELECT' => 1,
		'/SELECT' => 1,
		'SMALL' => 1,
		'STRIKE' => 1,
		'STRONG' => 1,
		'/STRONG' => 1,
		'SUB' => 1,
		'SUP' => 1,
		'TABLE' => 1,
		'/TABLE' => 1,
		'TCL' => 1,
		'TD' => 1,
		'/TD' => 1,
		'TEXTAREA' => '',
		'TH' => 1,
		'/TH' => 1,
		'TR' => 1,
		'/TR' => 1,
		'TT' => 1,
		'UL' => 1,
		'/UL' => 1,
		'VAR' => 1,
		'WBR' => 1,
		'XMP' => 1,
	);

	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	# Here is a table that says which tags are to end their line.   If  a #
	# tag's value here is true, the tag will be followed by a newline. If #
	# false in this table,  a  tag  will  be  put  out  as-is,  with  any #
	# following  text or tags on the same line.  Note that close tags may #
	# be in this table, too.                                              #
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	%Ltag = (
		'A' => 0,
		'/A' => 1,
		'ADDRESS' => 1,
		'APPLET' => 1,
		'AREA' => 1,
		'B' => 0,
		'BASE' => 1,
		'BGSOUND' => 1,
		'BIG' => 1,
		'/BIG' => 1,
		'BL' => 1,
		'BLINK' => 1,
		'BLOCKQUOTE' => 1,
		'/BLOCKQUOTE' => 1,
		'BODY' => 1,
		'/BODY' => 1,
		'BR' => 0,
		'CAPTION' => 1,
		'CENTER' => 0,
		'CITE' => 1,
		'CODE' => 1,
		'COMMENT' => 1,
		'DD' => 1,
		'DFN' => 1,
		'DIR' => 1,
		'DIV' => 1,
		'/DIV' => 1,
		'DL' => 1,
		'/DL' => 1,
		'DT' => 1,
		'EM' => 1,
		'/EM' => 1,
		'FONT' => 1,
		'/FONT' => 0,
		'FORM' => 1,
		'/FORM' => 1,
		'FRAME' => 1,
		'/FRAME' => 1,
		'FRAMESET' => 1,
		'/FRAMESET' => 1,
		'HEAD' => 1,
		'HN' => 1,
		'HR' => 1,
		'HTML' => 1,
		'I' => 1,
		'/I' => 1,
		'IMG' => 0,
		'INPUT' => 0,
		'ISINDEX' => 1,
		'KBD' => 1,
		'LI' => 1,
		'LINK' => 1,
		'LISTING' => '',
		'MAP' => 1,
		'/MAP' => 1,
		'MARQUEE' => 1,
		'META' => 1,
		'NETXID' => 1,
		'NOBR' => 0,
		'OL' => 1,
		'OPTION' => -1,
		'P' => 1,
		'/P' => 1,
		'PLAINTEXT' => '',
		'PRE ' => 1,
		'SAMP' => 0,
		'SCRIPT' => 1,
		'/SCRIPT' => 1,
		'SELECT' => 1,
		'/SELECT' => 1,
		'SMALL' => 1,
		'STRIKE' => 1,
		'STRONG' => 1,
		'/STRONG' => 1,
		'SUB' => 1,
		'SUP' => 1,
		'TABLE' => 1,
		'/TABLE' => 1,
		'TCL' => 1,
		'TD' => 1,
		'/TD' => 1,
		'TEXTAREA' => '',
		'TH' => 1,
		'/TH' => 1,
		'TR' => 1,
		'/TR' => 1,
		'TT' => 1,
		'UL' => 1,
		'/UL' => 1,
		'VAR' => 1,
		'WBR' => 1,
		'XMP' => 1,
	);

	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	# Nesting tags that need to be closed.  For the optional cases, don't #
	# put  an  entry here, and we will pass close tags through as-is.  We #
	# keep track of the tags listed here in the $T[$Tlvl] stack.          #
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	%Ntag = (
		'A' => 1,
		'B' => 1,
		'BIG' => 1,
		'BL' => 1,
		'BLOCKQUOTE' => 1,
		'BODY' => 1,
		'CENTER' => 1,
		'DIV' => 1,
		'DL' => 1,
		'EM' => 1,
		'FORM' => 1,
		'FONT' => 1,
		'FRAME' => 0,
		'H1' => 1,
		'H2' => 1,
		'H3' => 1,
		'H4' => 1,
		'H5' => 1,
		'H6' => 1,
		'HEAD' => 1,
		'HTML' => 1,
		'I' => 1,
		'OL' => 1,
		'P' => 0,
		'PRE' => 1,
		'SCRIPT' => 1,
		'SELECT' => 1,
		'STRONG' => 1,
		'TABLE' => 1,
		'TD' => 1,
		'TH' => 1,
		'TR' => 1,
		'TCL' => 1,
		'TITLE' => 1,
		'UL' => 1,
	);

	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	# Tags that need special processing for hyperlinks.                   #
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	%Htag = (
		'A' => 1,
		'AREA' => 1,
		'IMG' => 1,
		'SCRIPT' => 1,
	);

	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	# Tags that are noticed during "text" mode, when $html is false.  The #
	# main reason for this table is to generate newlines and indents when #
	# these tags are encountered.                                         #
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	%Ttag = (
		'CAPTION' => 1,
		'DL' => 1,
		'DT' => 1,
		'H1' => 1,
		'H2' => 1,
		'H3' => 1,
		'H4' => 1,
		'H5' => 1,
		'H6' => 1,
		'LI' => 1,
		'P' => 1,
		'TD' => 1,
		'TH' => 1,
		'TR' => 1,
	);

	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	# Tags that need to be trimmed or somehow kludged. These tags are all
	# passed to specialtag() for processing.
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	%Stag = (
		'AREA' => 1,
		'IMG' => 1,
		'SCRIPT' => 1,
		'TABLE' => 1,
		'TD' => 1,
	);

	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	# An "exclusive" tag is one that can't  be  used  recursively.   This #
	# isn't  necessarily  an error, of course; it just means that the tag #
	# terminates any earlier instance of itself.  Closing tags are  often #
	# omitted  for  these,  except for <A>...</A> which is a very special #
	# case. Note also the kludge for <DT> and <DD>: Each has the other as #
	# its  'value'.  This causes each to end the other as well as itself. #
	# (If we find a case of more than two  such  paired  exclusive  tags, #
	# this scheme won't work.)                                            #
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	%Xtag = (
		'A' => 1,
		'AREA' => 1,
		'DD' => 'DT',
		'DT' => 'DD',
		'IMG' => 1,
		'INPUT' => 1,
		'LI' => 1,
		'OPTION' => 1,
	);
}