#!/usr/bin/perl
#
# NAME
#   w3ld - (down)load the contents of a web directory.
#
# SYNOPSIS
#   w3ld [options].. URL..
#
# DESCRIPTION
#   This program expects one or more URLs on the command line.   Each
#   URL  is  treated  as a directory and its contents are copied into
#   the current directory.  If the -r ("recursive") option  is  used,
#   subdirectories will be identified and copied recursively.
#
#   The URLs on the command line should all end with '/', but if  one
#   doesn't, we add '/'. Thus, this program currently can't grab just
#   a single file. (That's done by w3cat, which should be in the same
#   place you found this program.)
#
#   By "treated as a directory", we mean that the URL's contents are
#   scanned for hyperlinks.  Those hyperlinks are in turn downloaded,
#   and written to local files that have the same names.
#
# OPTIONS
#   Options start with '-' or '+'. For some of the options, '+' means
#   "enable"  and  '-'  means  "disable"; for others, there's no such
#   concept and you may use either flag char.  The options that don't
#   have  args may be combined with the next option in the usual way,
#   so that "-mnr2" is the same as "-m -n -r2".
#
#   Default options are set up to whatever I mostly  find  useful  at
#   the moment, so they may change from time to time. Check below for
#   what the true defaults are in whatever version you have.
#
# # The following option isn't yet fully implemented; don't use it.
# # -b
# #   Don't do any backup.  This is usually the default.
# # +b<c><l>
# #   Back up existing files. Several backup styles are supported. if
# #   <c>  is  '-'  or '~', then backups are made by appending <c> to
# #   the name.  Such backups are recursive, i.e., if a  backup  file
# #   exists,  it  will  in  turn be backed up, resulting in multiple
# #   flag chars on the end of its name. If <l> is present, it is the
# #   limit  to the number of backups.  The in some versions is -b-7,
# #   which will produce the file and up to 7 backup copies.  If  <c>
# #   is  '.',  then the backup will be done by appending .<l> to the
# #   name, and only one backup is possible.  Thus -b.bak will rename
# #   foo.c as foo.c.bak, as is conventional of DOS systems.
#
#   -c
#     Don't do a copyright check (described below).
#   +c
#     Do a copyright check (default).
#
#   -m
#     Don't copy modification times.  Copied files will have mtimes
#     as of when they are copied.
#   +m
#     Set mtimes (modification time) from the  "Last-modified"  times
#     for URLs that are copied.
#
#   -n
#     Copy all files, ignoring modification times.
#   +n
#     If a file already exists, copy only the newer files. If the web
#     server doesn't give a coherent date, we will assume the file is
#     newer and copy it. At verbose level -v3, URLs not copied due to
#     age are flagged with '<'.
#
#   -p<proxy>
#     Sents requests via a proxy HTTP server.  The <proxy> should be
#     the "host.name:port" portion of a URL.   
#
#   -r<N>
#     Recurse to <N> levels.  The default, -r1, means to  treat  only
#     the top-level URLs as directories.  The -r2 option means to try
#     to determine which of the second-level  URLs  are  directories.
#     When  such  a  subdirectory  is found, a local directory by the
#     same name is created, and filled with the URL's  contents.   If
#     <N> is omitted, -r2 is assumed.
#
#   -v<N>[file]
#     Verbose option.  Sets the verbose level to <N>, and if the file
#     name is present, writes to that file.  The default is currently
#     -v2, which will get you a list of URLs copied, flagged by  '>',
#     and  written  to STDOUT.  Use -v3 if you'd like to see the URLs
#     that are not copied, flagged by '<' or a reason.
#
#  -x<pat>\
#     Exceptions. Any URLs that match the pattern will be ignored and
#     not fetched.
#
# EXAMPLES
#
#   mkdir RFC; cd RFC
#   w3ld http://ds.internic.net/rfc/
#     Download all the RFCs from the InterNIC.
#
#   mkdir Scot; cd Scot
#   w3ld -r2 http://trillian.mit.edu:8739/~jc/abc/Scotland/
#     This downloads John Chambers' Scottish Country Dance music into
#     the local directory 'Scot'.  The -r2 is needed because the tune
#     files are in subdirectories 'jig', 'reel', and so on.
#
#   mkdir Scot; cd Scot
#   w3ld -mr2 http://trillian.mit.edu:8739/~jc/abc/Scotland/
#     The same, but preserve the "mtime" modification times.
#
#   mkdir Celtic; cd Celtic
#   w3ld -r2 -mnv3 http://trillian.mit.edu:8739/~jc/abc/Scotland/ \
#                  http://trillian.mit.edu:8739/~jc/abc/Ireland/
#     Similar, but copy only files that are newer than those  already
#     in  the  local directory.  The -v3 causes the output to include
#     all file names; files not copied are marked "(not copied)". The
#     two  remote  directories  will  be merged into one local Celtic
#     directory.  (If there are two different files by the same name,
#     the one under Scotland/ will will be lost.)
#
#   mkdir celtic; cd celtic
#   w3ld http://celtic.stanford.edu/pub/tunes/abc.tunes/
#     Download the abc tune collections from the Ceolas archive.   No
#     -r option here so we don't go into the abc.tunes.ps/ directory.
#
#   mkdir hn; cd hn
#   w3ld http://home1.swipnet.se/~w-11382/abc/
#     Download Henrik Norbeck's abc files.  No -r option, so we don't
#     go into his subdirectories.  This gets a lot of big abc files.
#
# LIMITATIONS
#   So far only the http:// protocol is implemented; ftp://,  file://
#   and  others  may  appear  if  I need them.  If someone feels like
#   adding FTP code, you might send me a copy.
#
# DEBUGGING
#    You can use "perl -dw", of course. Or you can do the following:
#     setenv V_w3ld 5/tmp/w3ld.out	# csh or tcsh users.
#     export V_w3ld=5/tmp/w3ld.out	# ksh or bash users.
#   This will turn on the "print V" lines and write to /tmp/w3ld.out.
#   Or use the -v option on the command line.
#
# REQUIRES
#   If the following lines are commented out,  then  I've  read  them
#   into this script, and nothing is required. If these are live perl
#   commands, then you'll have to make sure that these  .pmfiles  are
#   found on your system:
#
#   $H = $ENV{'HOME'} || '.';
#   push(@INC, "$H/sh", "$H/pl");
#   require "HTTPcon.pm";	# To make HTTP connections.
#   require "URLopen.pm";	# To open a URL and get a file handle.
#   require "URLhref.pm";	# URL + HREF -> new URL.
#   require "URLtrim.pm";	# Remove /../ and trailing junk from URL.
#
# BUGS
#   Despite  many attempts to detect failure, we still don't optimally
#   handle all the myriad things that can go wrong.
#
#   It isn't technically possible to determine whether a URL names  a
#   directory  or  a file.  Our scheme is to treat a final '/' as the
#   sign of a directory.  Most web servers use this  convention  when
#   they  produce directory listings.  But there are web pages around
#   that lie, putting '/' on non-directory URLs and leaving  '/'  off
#   of directory URLs, and we have no defense against this.
#
#   Many servers have a low limit to the number of  links  that  they
#   can have open simultaneously.  This program's "recursive" copy is
#   truly recursive, and a URL is kept  open  while  lower  URLs  are
#   read.   So  if  a  server limits links to, say, 4, then -r5 won't
#   work, because it requires keeping 5 links open.
#
#   It's impossible to detect multiply-linked files via HTTP, and  we
#   don't  try.   The  result  will be duplicate files with different
#   names.  If you want to fix this, look for my "relink" program.
#
# COPY BLOCKING
#   This program implements a simple kind of  protection  to  prevent
#   downloading  files,  such as those that contain copyrighted data.
#   If a file foo.bar is a link to the  file  .Copyright.bar  in  the
#   same directory, we will not download foo.bar from another system.
#   For files without a suffix, the file .Copyright is checked.   For
#   this  to  be  effective, the .Copyright* file must be in the same
#   directory and must have the same suffix.
#
#   For example, if you know that the  "MissRowanDavies.abc"  is  the
#   name  of  a file that contains copyrighted data, you can create a
#   file .Copyright.abc, link it to MissRowanDavies.abc, and  a  file
#   by  that name will not be downloaded into your directory, even if
#   newer.
#
# FUTURE
#   Lots of options are possible.  One useful option might be to write
#   the files to STDOUT, as one big file.
#
# AUTHOR
#   <a href="mailto:jc@trillian.mit.edu">John Chambers</a>
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

$| = 1;					# Unbuffer STDOUT.
$exitstat = 0;			# Our exit status.
($me = $0) =~ s"^.*/"";	# Our name, minus directory.
$Vdfl = 2;				# Default verbose level.
&Voption($ENV{"V_$me"} || $ENV{"T_$me"} || $ENV{"D_$me"} || $Vdfl);
print V "$me: Started ", `date` if $V>1;

### Here are some defaults that you may wish to twiddle:
$copyright = 1;	# Check for copyright files.
$mtimes    = 1;	# Copy modification times.
$newer     = 0;	# Copy only newer files.
$rlevel    = 0;	# Current recursion level.
$rlimit    = 2;	# Maximum recursion depth allowed.

### Some globals that you probably don't want to change:
%ML = ('Jan',0,'Feb',1,'Mar',2,'Apr',3,'May',4,'Jun',5,'Jul',6,'Aug',7,'Sep',8,'Oct',9,'Nov',10,'Dec',11);
@ML = (31,28,31,30,31,30,31,31,30,31,30,31);
$LMpat = '^(Last-modified):\s*(\w+),\s*(\d+)\s*(\w+)\s*(\d+)\s*(\d+):(\d+):(\d+)\s*(\w+)';
$spm = 60; $mph = 60; $hpd = 24; $dpy = 365;
$sph = $spm * $mph; $spd = $sph * $hpd; $spy = $spd * $dpy;

### Run thru the command-line args.
for $a (@ARGV) {
	if (($flg,$opt) = ($a =~ /^([-+])(.*)/)) {
		while ($opt) {
			if ($opt =~ s/^b([-~]\d*)//i) {		# Backup.
				if ($flg eq '-') {$bplim = 0} else {&bpinit($1)}
			} elsif ($opt =~ s/^b(\..*)//i) {	# Backup.
				if ($flg eq '-') {$bplim = 0} else {&bpinit($1)}
			} elsif ($opt =~ s/^c//i) {			# Check for copyright files.
				$copyright = ($flg eq '+');
			} elsif ($opt =~ s/^m//i) {			# Copy mtime values.
				$mtimes = ($flg eq '+');
			} elsif ($opt =~ s/^n//i) {			# Copy newer files.
				$newer = ($flg eq '+');
			} elsif ($opt =~ s/^p(.*)//i) {		# Proxy host.
				$HTTPpxy = $1;
			} elsif ($opt =~ s/^r(\d*)//i) {	# Recursion limit.
				$rlimit = $1 || 2;
			} elsif ($opt =~ s/^v(\d+)(.*)//i) {	# Verbose.
				&Voption("$1$2");
			} elsif ($opt =~ s/(.)//) {
				print STDERR "$me: Unknown option \"$1\" ignored.\n" if $V>0;
			} elsif ($opt =~ s/^x(.*)//i) {
				push(@Xpats, $1);
			}
		}
		next;
	}
	# If it's not an option, treat it as a URL:
	$a =~ s"/*$"/";		# Make sure there's exactly one final '/'
	print V "$me: URL \"$a\"\n" if $V>3;
	$baseurl = $a;
	$baselen = length($a);
	&ldir($a,'');
}

print V "$me: Finished ", `date` if $V>1;
exit $exitstat;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub bpinit {
	local($bpopt) = @_;
	if ($bpopt =~ /^([-\~])(\d*)/) {
		$bpchr =
		$bpsuf = $1;
		$bplim = int($2) || 7;
		print V "$me: Backup chr=suf=\"$bpchr\" lim=$bplim.\n" if $V>2;
	} elsif ($bpopt =~ /\./) {
		$bpchr = '.';
		$bpsuf = $bpopt;
		$bplim = 1;
		print V "$me: Backup chr='$bpchr' suf=\"$bpsuf\" lim=$bplim.\n" if $V>2;
	} else {
		print V "$me: Backup option \"$bpopt\" not recognized.\n" if $V>0;
		$bpchr = $bpsuf = '';
		$bplim = 0;
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Extract a directory listing from a server. We make some assumptions #
# here  about  how web servers return directory listings; if they get #
# too weird, we won't recognize the contents.  The first arg is a URL #
# that should be a directory (or perhaps an index.html file that acts #
# like a directory).  The second argument is the local directory that #
# files  are  to  be copied into; we use it to keep track of where we #
# are in recursive loads.                                             #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub ldir {
	local($url,$dir) = @_;
	local($buf,$href,$htag,$init,$line,$tmp,*U,$URL);
	++$rlevel;
	$URLerr = "Don't know why";
	return 0 if !&chkurl($u);
	if ($dir) {
		$URL = "$url$dir";
	} else {
		$URL = $url;
	}
	if (!&URLopen(U,$URL)) {
		print V "ldir: Can't open \"$URL\" ($URLerr)\n" if $V>0;
		$exitstat = 1;
		close U;	# Paranoia.
		--$rlevel;
		return 0;
	}
more:
	while ($buf = <U>) {
		print V "ldir: Got \"$buf\"\n" if $V>5;
		$buf =~ s/\s*$/ /;
		$line .= $buf;
		while ($line) {
			print V "ldir: \"$line\"\n" if $V>5;
			$line =~ s/^[^<]+//;			# Delete junk before first '<'.
			if ($line =~ s/^<([^>]*)>//) {	# Scan for a tag.
				$htag = $1;
				print V "ldir: htag=\"$htag\"\n" if $V>4;
				if ($htag =~ /^a\b/i) {
					print V "ldir: htag=\"$htag\" is anchor.\n" if $V>3;
					if ($htag =~ /href="([^"]+)"/i) {
						$href = $1;
						print V "ldir: href=\"$href\"\n" if $V>3;
						next if ($href =~ /^#/);		# Skip relative URLs.
						next if ($href =~ /\/\.\.\//);	# Skip upward links.
						if ($line =~ /^\s*Parent dir/i) {
							print V "ldir: href=\"$href\" rejected (parent dir)\n" if $V>3;
						} else {
							print V "ldir: href=\"$href\" looks good ...\n" if $V>3;
							if ($href =~ m"/$") {	# Final '/'; treat as directory.
								$U = &URLhref($URL,$href);
								print V "ldir: New URL: \"$U\"\n" if $V>3;
								$init = substr($U,0,$baselen);
								$sdir = substr($U,length($init));
								if ($rlevel < $rlimit) {
									if ($init eq $baseurl) {
										print V "ldir: sdir=\"$sdir\"\n" if $V>4;
										if (&chkurl($U)) {
											print V "ldir: \"$U\" is new URL.\n" if $V>4;
											&ldir($init,$sdir);
										}
									} else {
										print V "--- Ignoring \"$U\"\n" if $V>1;
										print V "    Notunder \"$baseurl\"\n" if $V>1;
									}
								} else {
									print V "$U ignored (past recursion limit $rlimit)\n" if $V>0;
								}
							} else {		# No final '/'; treat as file.
								print V "ldir: File u=\"$URL\" h=\"$href\"\n" if $V>3;
								&Copy($URL,$dir,$href);
							}
						}
					} else {
						print V "ldir: htag=\"$htag\" has no href.\n" if $V>3;
					}
				} else {
					print V "ldir: htag=\"$htag\" ignored.\n" if $V>6;
				}
			} else {
				print V "ldir: \"$line\" contains no tag.\n" if $V>5;
				$line = '';
			}
		}
	}
	close U;
	--$rlevel;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Examine a URL and decide whether we want to read it.  We return 1
# if it looks interesting; 0 if we should ignore it.  If the second
# arg is present, it is a URL that we "came from"; we might want to
# use that as part of the decision.. 
sub chkurl {
	local($u,$f) = @_;
	local($x);
	if ($u =~ /^(ftp|file|mailto):/i) {
		print V "$u\t--- Can't handle $1 protocol yet.\n" if $V>2;
		return 0;
	}
	if ($Ufrom{$U}) {
		print V "$U\tdone already (from:$Ufrom{$U})\n" if $V>2;
		$Ufrom{$U} .= ' ' . $f if $f;
		return 0;
	}
	for $x (@Xpats) {
		if ($u =~ $x) {
			print V "$U\tskipped (/$x/)\n" if $V>1;
			return 0;
		}
	}
	return 1;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Given a local file pathname, we check to see whether it is  a  link #
# to  a  local  file  whose name starts with ".Copyright" and has the #
# same suffix.  If so, we return true, otherwise we return 1.         #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub copyrightcheck {
	local($pth) = @_;
	local($dir,$fil,$bas,$suf,$cpf);
	print V "copyrightcheck: pth=\"$pth\"\n" if $V>3;
	if (($dir,$fil) = ($pth =~ m"^(.*/)(.*)")) {
	} else {
		$dir = ''; $fil = $pth;
	}
	print V "copyrightcheck: dir=\"$dir\" fil=\"$fil\"\n" if $V>3;
	if (($bas,$suf) = ($fil =~ m"^(.*)(\..*)$")) {
		$cpf = $dir . '.Copyright' . $suf;
	} else {
		$cpf = $dir . '.Copyright';
	}
	print V "copyrightcheck: bas=\"$bas\" suf=\"$suf\"\n" if $V>3;
	print V "copyrightcheck: cpf=\"$cpf\"\n" if $V>3;
	if (($dv1,$in1) = stat($cpf)) {
		print V "copyrightcheck: dv1=$dv1 in1=$in1 cpf=\"$cpf\"\n" if $V>3;
		if (($dv2,$in2) = stat($pth)) {
			print V "copyrightcheck: dv2=$dv2 in2=$in2 pth=\"$pth\"\n" if $V>3;
			if ($dv1 == $dv2 && $in1 == $in2) {
				print "$pth\tis a copyright file.\n" if $V>2;
				return 1;
			}
		}

	}
	return 0;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Copy a single remote file into a local file. We are passed the base #
# URL, the directory, and the HREF.  We combine the directory and the #
# href to get the local pathname.  We might  have  to  use  mkdir  to #
# create  the  directory; we trust that only the final directory will #
# be needed, due to the recursive nature of our work here.            #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub Copy {
	local($u,$d,$h) = @_;
	local($buf,$dir,$fil,$hdr,$n,$path,$url,*F);
	local($pro,$err,$msg);
	local($UWD,$UD0,$UD1,$UM0,$UM1,$UYY,$Uhh,$Umm,$Uss,$Utz,$Usi,$Uts);
	local($FWD,$FD0,$FD1,$FM0,$FM1,$FYY,$Fhh,$Fmm,$Fss,$Ftz,$Fsi,$Fts);
	print V "Copy: u=\"$u\"\n" if $V>3;
	print V "Copy: d=\"$d\"\n" if $V>3;
	print V "Copy: h=\"$h\"\n" if $V>3;
	return if !&chkurl($u);
	$url = &URLhref($u,$h);
	print V "Copy: url=\"$url\"\n" if $V>3;
	if (!&URLopen(*F,$url)) {
		print V "Copy: Can't open \"$url\" ($!)\n" if $V>4;
		close F; return;
	}
	print V "Copy: url=\"$url\" opened.\n" if $V>3;
	if ($d) {
		$path = "$d$h";
		print V "Copy: path=\"$path\"\n" if $V>3;
	} else {
		$path = $h;
		print V "Copy: path=\"$path\"\n" if $V>3;
	}
	while ($URLhdr) {
		($buf = <F>) =~ s/\s+$//;
		print V "Copy: hdr \"$buf\"\n" if $V>3;
		if ($buf =~ /^\s*$/) {
			$URLhdr = 0;	# Done with headers.
		} elsif (($pro,$err,$msg) = ($buf =~ m"(HTTP/[\d.]+)\s*([4-9]\d+)\s+(.*)$")) {
			print V "$url\t$pro error $err \"$msg\"\n" if $V>0;
			close F; return;
		} elsif (!$Usi) {	# Only notice one Last-modified line.
			if (($hdr,$UWD,$UD1,$UMo,$UYY,$Uhh,$Umm,$Uss,$Utz) = ($buf =~ /$LMpat/i)) {
				print V "Copy: $buf\n" if $V>3;
				$UM0 = $ML{$UMo}; $UM1 = $UM0 + 1; $UD0 = $UD1 - 1;
				$Usi = sprintf("%04d%02d%02d%02d%02d%02d",$UYY,$UM1,$UD1,$Uhh,$Umm,$Uss);
				print V "Copy: $Usi $tz ($url)\n" if $V>3;
				($FX1,$FX2,$FX3,$FX4,$FX5,$FX6,$FX7,$FX8,$FX9,$Fts) = stat($path);
				($Fss,$Fmm,$Fhh,$FD1,$FM0,$FYY) = gmtime($Fts);
				$FYY += 1900; $FM1 = $FM0 + 1; $FD0 = $FD1 - 1;
				$Fsi = sprintf("%04d%02d%02d%02d%02d%02d",$FYY,$FM1,$FD1,$Fhh,$Fmm,$Fss);
				print V "Copy: $Fsi GMT ($path)\n" if $V>3;
				if ($Usi le $Fsi) {
					print V "$url\t< $path\t(not copied)\n" if $V>2;
					close F;
					return;
				}
				print V "Copy: $url newer than $path\n" if $V>3;
			}
		}
	}
	if (-f $path) {
		print V "Copy: $url exists.\n" if $V>3;
		if ($copyright && &copyrightcheck($path)) {
			print V "$url\tskipped (copyright file)\n" if $V>1;
			close F; return;
		}
		if ($bplim > 0) {
			print V "Copy: No backup yet; unlink(\"$path\"\n" if $V>2;
			unlink($path);
		} else {
			print V "Copy: No backups; unlink(\"$path\"\n" if $V>3;
			unlink($path);
		}
	}
	if (!open(O,">$path")) {
		print V "Copy: Can't write \"$path\" ($!)\n" if $V>4;
		if (($dir,$fil) = ($path =~ m"^(.*)/(.+)$")) {	# Missing directory?
			print V "Copy: dir=\"$dir\" fil=\"$fil\"\n" if $V>3;
			if (!mkdir($dir,0777)) {		# Try to create the directory.
				print V "Copy: Can't mkdir \"$dir\" ($!)\n" if $V>0;
				close F; return;
			}
			print V "DIR\t\"$dir\" created.\n" if $V>1;
			if (!open(O,">$path")) {	# Retry the open.
				print V "Copy: Can't write \"$path\" ($!)\n" if $V>0;
				close F; return;
			}
		} else {
			close F; return;
		}
	}
	print V "$url\t> $path\t(..." if $V>1;
	while ($buf = <F>) {
		print V "Copy: Got \"$buf\"\n" if $V>5;
		print O $buf;
	}
	print V " copied)\n" if $V>1;
	close F;
	close O;
	if ($Usi && $mtimes) {
		print V "Copy: Set mtime to $Usi ...\n" if $V>4;
		$Uts = &ts($UYY,$UM0,$UD1,$Uhh,$Umm,$Uss);
		utime(time,$Uts,$path);
	}
}
# UWD UD0 UD1 UM1 UM0 UMo UM0 UYY Uhh Umm Uss Utz Usi Uts
# FWD FD0 FD1 FM1 FM0 FMo FM0 FYY Fhh Fmm Fss Ftz Fsi Fts

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Convert a 6-number time list to a timestamp.   This  probably  only
# works on Unix/POSIX systems.  It'll be off by a day after 21000301.
sub ts {
	local($YY,$M0,$D1,$hh,$mm,$ss) = @_;
	local($ly,$ts,$m,$yy);
	$yy = ($YY > 1900) ? ($YY - 1970) : $YY;	# Year since epoch.
	--$D1;							# Adjust for no zero days.
	$ts = ($yy * $spy);				# Secs to start of this year.
							print "ts: ts=$ts to start of year $yy.\n" if $V>3;
	$ly = int(($yy + 2) / 4);		# Leap years since epoch.
	$ts += $ly * $spd;				# Add a day per leap year.
							print "ts: ts=$ts for $ly leap years.\n" if $V>3;
	for ($m=0; $m<$M0; $m++) {		# Add months this year.
		$ts += $ML[$m] * $spd;
							print "ts: ts=$ts for month $m of this year.\n" if $V>3;
	}
							print "ts: ts=$ts for $m months this year.\n" if $V>3;
	if (($M0 > 2) && ($yy % 4 == 0)) {	# If this is a leap year.
		$ts += $spd;
							print "ts: ts=$ts for this leap year.\n" if $V>3;
	}
	$ts += ($D1 * $spd);			# Add days this month.
							print "ts: ts=$ts for $D1 days this month.\n" if $V>3;
	$ts += ($hh * $sph);			# Add hours in day.
							print "ts: ts=$ts for $hh hours this day.\n" if $V>3;
	$ts += ($mm * $spm);			# Add minutes in hour.
							print "ts: ts=$ts for $mm minutes this hour.\n" if $V>3;
	$ts += $ss;
							print "ts: ts=$ts for $ss seconds this minute.\n" if $V>3;
							print "ts: $YY/$M0/$D1 $hh:$mm:$ss => $ts.\n" if $V>3;
	@X = gmtime($ts);
							print "ts: $X[5]/$X[4]/$X[3] $X[2]:$X[1]:$X[0] <= $ts.\n" if $V>3;
	return $ts;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Parse a debug option, and set up the global $V verbose level and  V #
# verbose  file.  Note that we also leave the entire option string in #
# the global $Vopt variable, not in a local variable.                 #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub Voption {
	($Vopt) = @_;
	if ($Vopt =~ /^(\d)(.+)/) {
		$V = $1;
		$Vfil = $2;
		open(V,">$Vfil")
			|| die "$0: Can't write \"$Vfil\" ($!)\n";
	} elsif ($Vopt =~ /^(\d+)/) {
		$V = $1;
		open(V,">&STDOUT")
	} else {
		$V = $Vdfl;
		open(V,">&STDOUT");
	}
	select V; $| = 1; select STDOUT;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# NAME                                                                #
#   HTTPcon - make HTTP connection.                                  #
#                                                                     #
# SYNOPSIS                                                            #
#   $stat = &HTTPcon(*F,'fubar.com:1234');                           #
#                                                                     #
# DESCRIIPTION                                                        #
#   This  accepts a URL's host:port portion, and attempts to make the #
#   connection.  If successful, we return 1 with F open  to  the  TCP #
#   socket.   If  we fail, we return 0, and F may or may not be open. #
#   (Maybe we should close it.)                                       #
#                                                                     #
# AUTHOR                                                              #
#   <a href="mailto:jc@trillian.mit.edu">John Chambers</a>               #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub HTTPcon {
	local(*F,$hp) = @_;
	local($a,$b,$c,$d,$host,$port,$this,$that,$This,$That);
	if (($host,$port) = ($hp =~ m"^(.*):(\d+)$")) {
		print V "HTTPcon: host=\"$host\" port=\"$port\"\n" if $V>4;
	} else {
		$host = $hp;
		$port = 80;
		print V "HTTPcon: host=\"$host\" port=$port.\n" if $V>4;
	}
	$AF_INET = 2;
	$SOCK_STREAM = 1;
	$sockaddr = 'S n a4 x8';
	($name,$aliases,$proto) = getprotobyname('tcp');
	($name,$aliases,$port) = getservbyname($port,'tcp')
		unless $port =~ /^\d+$/;
	$thisaddr = "\0\0\0\0";			# Simpler way.
	($name,$aliases,$type,$len,$thataddr) = gethostbyname($host);
	$this = pack($sockaddr, $AF_INET, 0, $thisaddr);
	$that = pack($sockaddr, $AF_INET, $port, $thataddr);
	($a,$b,$c,$d) = unpack('C4',$thisaddr); $This = "$a.$b.$c.$d:0";
	($a,$b,$c,$d) = unpack('C4',$thataddr); $That = "$a.$b.$c.$d:$port";
	if (socket(F,$AF_INET,$SOCK_STREAM,$proto)) {
		print V "HTTPcon: Got socket.\n" if $V>3;
	} else {
		print V "HTTPcon: Can't get socket ($!)\n" if $V>1;
		$exitstat = $!;
		return 0;
	}
	if (bind(F,$this)) {
		print V "HTTPcon: Bind to \"$This\" succeeded.\n" if $V>4;
	} else {
		print V "HTTPcon: Bind to \"$This\" failed ($!)\n" if $V>1;
		$exitstat = $!;
		return 0;
	}
	if (connect(F,$that)) {
		print V "HTTPcon: Connect to \"$That\" succeeded.\n" if $V>4;
	} else {
		print V "HTTPcon: Connect to \"$That\" failed ($!)\n" if $V>1;
		$exitstat = $!;
		return 0;
	}
	select(F); $| = 1; select(STDOUT);
	return 1;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here, we take a URL that is a path to something, plus an href  that #
# is  believed  to  be a relative to the URL, and combine them into a #
# URL.  The main point of this routine is to try to  correctly  strip #
# off the last field from the path.                                   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub URLhref {
	local($path,$href) = @_;
	local($url,$prot,$host,$rest);
	if ($href =~ s"^/+"") {				# Does href have initial slash?
		print V "URLhref: /href \"$href\"\n" if $V>3;
		if (($prot,$host,$rest) = ($path =~ m"^([^/:]+)://([^/]+)(/.*)$")) {
			print V "URLhref: \"$path\" trimmed \"$rest\"\n" if $V>3;
			$path = "$prot://$host";
		}
	} elsif ($href =~ m"^\w*:") {
		print V "URLhref: URL \"$href\"\n" if $V>3;
		return $href;
	} else {
		if ($path =~ s"/([^/]+)$"") {			# No: Trim path to directory.
			print V "URLhref: \"$path\" removed \"$1\"\n" if $V>3;
		}
	}
	$path =~ s"/+$"";
	$url = &URLtrim("$path/$href");
	print V "URLhref: \"$path\" + \"$href\" =\n\t\"$url\"\n" if $V>3;
	$url;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# SYNOPSIS
#   URLopen(*FD,$URL) || die "...";
#
# DESCRIPTION
#   This routine accepts a URL and attempts to open it  for  reading.
#   If  successful,  the  return  value is 1, and FD will be the open
#   file (a socket, actually).  The caller can read the data from it.
#   If  you're  not  going  to exit after the EOF, t's a good idea to
#   close it when you're done, to prevent the connection from hanging
#   around.
#
# REQUIRES
#   We require the HTTPcon.pm module.
#
# AUTHOR
#   <a href="mailto:jc@trillian.mit.edu">John Chambers</a>
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub URLopen {
	local(*F,$url) = @_;
	local($p,$P,$h,$f);
	print V "URLopen: \"$url\"\n" if $V>4;
	return 0 if !&chkurl($url);
	if ($HTTPpxy) {
		print V "URLopen: pxy=\"$HTTPpxy\" url=\"$url\"\n" if $V>2;
		if (&HTTPcon(*F,$HTTPpxy)) {
			print V "URLopen: Connected to \"$HTTPpxy\"\n" if $V>3;
			print V "URLopen: Send \"GET $url HTTP/1.0\\n\\n\"\n" if $V>4;
			print F "GET $url HTTP/1.0\n\n";
			$URLhdr = 1;    # Note we're in the header.
			return 1;
		}
		print V "URLopen: Can't connect to proxy \"$HTTPpxy\"\n" if $V>0;
		return 0;
	} elsif (($p,$h,$f) = ($url =~ m'^(\w+)://([-_.:\w]+)(/.*)')) {
		print V "URLopen: \"$url\" p=\"$p\" h=\"$h\" f=\"$f\"\n" if $V>3;
		($P = $p) =~ tr/a-z/A-Z/;
		if ($P eq 'HTTP') {
			print V "URLopen: \"$url\" HTTP protocol\n" if $V>3;
			if (&HTTPcon(*F,$h)) {
				print V "URLopen: Connected to \"$h\"\n" if $V>3;
				print V "URLopen: Send \"GET $f HTTP/1.0\\n\\n\"\n" if $V>4;
				print F "GET $f HTTP/1.0\n\n";
				$URLhdr = 1;	# Note we're in the header.
				return 1;
			}
			print V "URLopen: Can't connect to \"$h\"\n" if $V>0;
			return 0;
		}
		print V "URLopen: \"$url\" can't do protocol \"$p\"\n" if $V>0;
		$exitstat = 254;
		return 0;
	}
	if (-d $url && !$Dirs{$url}) {
		print V "URLopen: Directory \"$url\" ...\n" if $V>4;
		++$Dirs{$url};
		return &HTMLdir(*F,$url);
	}
	if (open(F,$url)) {
		print V "URLopen: Local file \"$url\" opened.\n" if $V>4;
		return 1;
	}
	print V "URLopen: Can't read \"$url\"\n" if $V>0;
	return 0;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# This routine reads a  directory  and  produces  a  simplified  html #
# listing. But you might want to reformat it in some other way that's #
# more useful for your application.                                   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub HTMLdir {
	local(*F,$d) = @_;
	if ($child = open(F,'-|')) {	# Parent.
		print V "HTMLdir: Child started to list \"$d\"\n" if $V>1;
		return 1;	# Parent returns the open file.
	} else {		# Child reads the directory.
		if (opendir(DIR,$d)) {
			print "<HEAD><TITLE>Index of $u</TITLE></HEAD><BODY>\n";
			print "<PRE>\n";
			print "<IMG SRC=\"/icons/blank.xbm\" ALT=\"     \"> Name\n";
			while ($f = readdir(DIR)) {
				if (-d "$d/$f") {
					if ($f eq '.') {		# Ignore self reference.
					} elsif ($f eq '..') {	# Perent reference.
						($p = $d) =~ s"/[^/]+/+$"/";
						print "<!-- p=\"$p\" -->\n";
						print "<IMG SRC=\"/icons/back.gif\" ALT=\"[DIR]\"> <A NAME=\"$f\" HREF=\"$p\">Parent directory</A>\n";
					} else {
						print "<IMG SRC=\"/icons/menu.gif\" ALT=\"[DIR]\"> <A NAME=\"$f\" HREF=\"$f/\">$f/</A>\n";
					}
				} else {
					print "<IMG SRC=\"/icons/text.gif\" ALT=\"[TXT]\"> <A NAME=\"$f\" HREF=\"$f\">$f</A>\n";
				}
			}
			exit 0;
		} else {
			print V "HTMLdir: Can't open directory \"$d\" ($!)\n" if $V>0;
			exit $!;
		}
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Shorten a URL in various ways.
sub URLtrim {
	local($url) = @_;
	local($org) = @_ if $V>3;
	$url =~ s"/*\#.*"";					# Strip off name references.
	$url =~ s"/*\?.*"";					# Strip off qualifiers.
	$url =~ s"/*\%.*"";					# What are these?
	while ($url =~ s"/\./"/") {}		# Strip out /./ self-references.
	while ($url =~ s"/[^/]+/\.\./"/") {}	# Strip out /foo/../ up-references.
	print V "URLtrim <= \"$org\"\n   => \"$url\"\n" if (($V>3) && ($url ne $org));
	$url;
}
