#!/usr/bin/perl -T
#
# NAME
#   m2h - Convert Unix man page to HTML.
#
# SYNOPSIS
#   <a href="/cgi/m2h?-2+errno&T=errno">errno</a>(2),
#   <a href="/cgi/m2h?-1+csh&T=csh">csh</a>(1),
#   <a href="/cgi/m2h?S=1&T=csh">csh</a>(1),
#
# DESCRIPTION
#   Here's a CGI script that implements the Unix "man" command via the web.  To
#   use it, put it in your cgi-bin directory as "m2h", and put anchors like the
#   above in your html documents.  Then when the user clicks on the link,  your
#   system's  "man"  command  will  be  run, and the output will be sent to the
#   client with minimal HTML wrappings.
#
#   This script can also be invoked in forms.  It uses the cgi-lib.pl module to
#   parse the HTML form arguments. There should be a Man.html file next to this
#   script that shows you how you might do this to make your  Unix-style  "man"
#   pages available to web browsers.
#
# REQUIRES
#
	use CGI;
	use CGI::Carp qw(fatalsToBrowser);
	$ENV{MANPATH} = '/usr/man:/usr/share/man:/usr/local/man:/usr/local/lib/perl5/man:/usr/eecs/man';
	$home = $ENV{HOME};
	$ENV{PATH} = "/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin:/bin:/sbin:$home/p/cgi";
#
# SETTINGS
#
	$cgidir = '/~jc/cgi';
#
# AUTHOR
#   <a href="mailto:jc@trillian.mit.edu">John Chambers</a>

$| = 1;
($me = $0) =~ s".*/"";
$V = $ENV{"V_$me"} || 1;

print "Content-type: text/html\n\n";
print "<html>\n";
print "<!-- $0 -->\n" if $V;

# commands:
$man = '/usr/bin/man';

$query = new CGI;
if ($v = $query->param('V')) {$V = $v}
@names = $query->param;
if ($V>1) {
	print "<br>Names:<br>\n";
	for $n (@names) {
		$v = $query->param($n);
		print "name $n=\"$v\"<br>";
	}
	for $e (%ENV) {
		print "ENV $e=\"$ENV{$e}\"<br>";
	}
}
if ($s = $query->param('S')) {
	splice @ARGV, 0, 0, "-$s";
	print "<br>Section $s\n" if $V>1;
}
if ($t = $query->param('T')) {
	push @ARGV, split(/\s+/,$t);
	if ($s = $query->param('section')) {$t .= "($s)"}
	print "<title>Unix Manual Page for $t</title>\n";
} else {
	print "<title>Unix Manual Page</title>\n";
}
if ($p = $query->param('P')) {
	$ENV{MANPATH} = $p;
}
($host = `uname -n`) =~ s/\s*$//;
($stype = `uname -s`) =~ s/\s*$//;

for $a (@ARGV) {
	print "Arg: \"$a\"\n" if $V>1;
	if ($a =~ /^-/) {
		if ($a =~ /^-([fk])/) {
			$S = "-$1";
			print "<br>Option: \"$S\"\n" if $V>1;
		} else {
			print "<br>Unknown option: \"$a\"\n" if $V>0;
		}
		next;
	} elsif ($a =~ /^\s*([\w:.]*)\s*$/) {
		$A = "$1";
	} else {
		print "Rejected \"$a\" (bad syntax)\n";
		next;
	}
	$cmd = "$man $S $A 2>&1";
	print "Unix manual page for $A.$S (host=$host system=$stype)\n";
	print "<hr>\n";
	print "Command: $cmd\n" if $V>1;
	print "<br>Hostname: ", `hostname` if $V>1;
	print "<br>System: ", `uname -a` if $V>1;
	print "<br>MANPATH: ", $ENV{MANPATH} if $V>1;
	print "<br>uname: ", `which uname` if $V>1;
	print "<br>man: ", `which man` if $V>1;
	print "<hr>\n" if $V>1;
	open(P,"$cmd |") || die "### Can't run \"$cmd\" [$!]\n";
	print "<pre>\n";
	$wasblank = 1;
	while (chomp($line = <P>)) {
		$line = &HTMLencode($line);
		if ($line =~ /^[A-Z][A-Za-z\s]+$/) {	# Manual section marker.
			$line = '<strong>' . $line . '</strong>';
		}
		print "Line: \"$line\"\n" if $V>4;
		print "$line\n"
			unless ($wasblank && !$line);
		$wasblank = $line ? 0 : 1;
	}
	print "</pre>\n";
}
print "</html>\n";
exit 0;

# Here's a routine to do convert some of the things in a man
# page to html.  Mostly what we do is:
#   0. Trim away trailing white stuff.
#   1. replace html metachars with escape sequences,
#   2. replace overstrikes and underscores with <b>...</b>
sub HTMLencode {
	my($line) = @_;
	# Trim whitespace.
	$line =~ s/\s+$//;
	# Reduce overstruck metachars.
	$line =~ s/<</</g;
	$line =~ s/>>/>/g;
	# Replace HTML metachars.
	$line =~ s/""/"/g;
	$line =~ s/&/&amp;/g;
	$line =~ s/"/&quot;/g;
	$line =~ s/</&lt;/g;
	$line =~ s/>/&gt;/g;
	# Replace overstrikes and underscores.
	$line =~ s"(.)\1""g;
	$line =~ s"(.)\1"<b>$1</b>"g;
	$line =~ s"_([^&])"<b>$1</b>"g;
	# Eliminate remaining overstrikes and formfeeds.
	$line =~ s/.//g;
	$line =~ s///g;
	# Reduce adjacent bolds.
	$line =~ s"</b><b>""g;
	$line =~ s"</b>\s+<b>" "g;
	# Look for pointers to other man pages.  This isn't quite perfect.
	# It can be fooled by things that look like references but aren't,
	# and some man pages use rather strange ways of pointing to other pages.
	# But it's close enough that there's not much motive to improve it.
	while (($a,$p,$s,$z) = ($line =~ 
			/^(.*?)([-\w.]+)\s*\((\d+)\)(\s*-\s.*)$/)) {
		$line = "$a<A HREF=\"$cgidir/m2h?S=$s&$p&T=$p\">$p&#40;$s&#41;</A>$z";
	}
	while (($a,$p,$s,$z) = ($line =~
			/^(.*)<b>([-\w.:]+)<\/b>\s*\(<b>(\d\w*)(<\/b>\).*)$/)) {
		$line = "$a<A HREF=\"$cgidir/m2h?S=$s&$p&T=$p\">$p&#40;$s&#41;</A>$z";
	}
	while (($a,$p,$s,$z) = ($line =~
			/^(.*)<b>([\w.]+)<\/b>\s*\(<b>(\d\w*)<\/b>\)(.*)$/)) {
		$line = "$a<A HREF=\"$cgidir/m2h?S=$s&$p&T=$p\">$p&#40;$s&#41;</A>$z";
	}
	while (($a,$p,$s,$z) = ($line =~
			/^(.*)<b>([-\w:.]+)\s*\((\d\w*)\)(<\/b>.*)$/)) {
		$line = "$a<A HREF=\"$cgidir/m2h?S=$s&$p&T=$p\">$p&#40;$s&#41;</A>$z";
	}
	while (($a,$p,$s,$z) = ($line =~
			/^(.*)<b>([\w.]+)\s*\((\d\w*)\)<\/b>(.*)$/)) {
		$line = "$a<A HREF=\"$cgidir/m2h?S=$s&$p&T=$p\">$p&#40;$s&#41;</A>$z";
	}
	while (($a,$p,$s,$z) = ($line =~
			/^(.*?)\b([-\w:.]+)\s*\((\d\w*)\)(.*)$/)) {
		$line = "$a<A HREF=\"$cgidir/m2h?S=$s&$p&T=$p\">$p&#40;$s&#41;</A>$z";
	}
	while (($a,$p,$s,$z) = ($line =~
			/^(.*?)\b([\w.]+)\s*\((\d\w*)\)(.*)$/)) {
		$line = "$a<A HREF=\"$cgidir/m2h?S=$s&$p&T=$p\">$p&#40;$s&#41;</A>$z";
	}
	$line
}
