#!/usr/bin/perl -Tw
#
# NAME
#   w3post
#
# SYNOPSIS
#   w3post [options] URL < data
#
# DESCRIPTION
#   This routine implements the HTML POST operation on a URL. We make
#   a  connection  to  the  web  server  for the URL, and sent a POST
#   request for the file.  We then pipe our stdin to the  connection,
#   ending  with  a  double  newline.  We then start reading from the
#   connection and copy the data to our stdout.
#
#   If you want to learn how to do this stuff,  you  can  study  this
#   program.   It  is  useful  as  a starting point for writing other
#   simple web clients.  It's not nearly as difficult as people would
#   like  you  to  believe.  But the socket stuff uses a lot of magic
#   incantations that "you just have to know"; see the required  perl
#   module files for this socket magic.
#
#   This is a webified perl  version  of  the  standard  Unix  cat(1)
#   program.
#
# OPTIONS
#
#	-E
#     Don't encode the data.
#
#	+E
#     Encode the data.  We do the application/x-www-form-urlencoded
#     encoding that is the standard for HTML forms. (default)
#
#	+H
#     Include the header info in the output.
#
#   -H
#     Don't include the header info in the output [default].
#
#   +P<proxy>
#     Proxy gateway.  If you are hidden behind a proxy, put the proxy
#     hostname (and optionally :port) in a -P option, and we will try
#     to indirect through the proxy server.
#
#   -P<noproxy>
#     Proxy exceptions. The <noproxy> string should be a pattern.  If
#     a URL matches this pattern, the proxy gateway isn't used.
#
#   +T
#     Enable WWW tracing. This sets a global flag that causes various
#     routines to produce lines of the form:
#       <!--W3trace/name:  ...  some message ...  -->
#     These look like both HTTP header lines and HTML comments.  Some
#     WWW tools (such as the "H" html viewer) can show these to  tell
#     you which stage of a GET operation we have reached.
#
#   -T
#     Disable WWW tracing [default].
#
# ENVIRONMENT
#   We use the following from the environment:
#
#   W3PROXY
#     The name (or  address)  and  an  optional  :port  for  a  proxy
#     gateway.   URLs  that don't match the W3NOPROXY will be fetched
#     indirectly via the proxy's web server.
#
#   W3NOPROXY
#     A pattern which is applied to URLs, and if they match, no proxy
#     is used.  That is, any URL that matches W3NOPROXY is considered
#     local, and we will access it directly.
#
# LIMITATIONS
#   So far only the http:// protocol is implemented; ftp://,  file://
#   and  others  may  appear  if  I need them.  If someone feels like
#   adding FTP code, you might send me a copy.
#
# REQUIRES
#   per4 or perl5 and the following modules, which should be found in
#   the same directory where you found this program.
#
	push @INC,"$ENV{HOME}/sh",'sh';
	require "URLopen.pm";
	require "HTMLdir.pm";
	require "HTTPcon.pm";
#
# DEBUGGING
#    You can use "perl -dw", of course. Or you can do the following:
#     setenv V_w3post 5/tmp/w3post.out	# csh or tcsh users.
#     export V_w3post=5/tmp/w3post.out	# ksh or bash users.
#   This will turn on the "print V" lines and write to /tmp/w3post.out.
#
# BUGS
#   Despite  many attempts to detect failure, we still don't optimally
#   handle all the myriad things that can go wrong.
#
# AUTHOR
#   <a href="mailto:jc@trillian.mit.edu">John Chambers</a>
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

$| = 1;
($me = $0) =~ s"^.*/"";
$Vopt = $ENV{"V_$me"} || $ENV{"D_$me"} || 1;	# Verbosity.
if (($V,$Vfil) = ($Vopt =~ /^(\d)(.+)/)) {		# Verbose output file.
	open(V,">$Vfil") || die "$0: Can't write \"$Vfil\" ($!)\n";
} else {$V = $Vopt; open(V,">&STDERR")}			# Defaults to stderr.
select V; $| = 1; select STDOUT;
print V "$me: Started ", `date` if $V>1;

#bufsiz =    10;	# Small for testing.
$bufsiz = 10000;	# Large for routine use.
$encode = 0;		# Do HTML encoding.
$exitstat =   0;	# Set this to get a failure exit status.
$W3hdrs = 0;		# Whether to output header lines.

for $u (@ARGV) {
	$URLerr = "Don't know why";		# Set by URLopen when failures.
	if ($u  =~ /^([-+])[Pp](.*)/) {	# +P<proxy> or -P<noproxy>
		if ($1 eq '-') {$W3nopxy = $2} else {$W3proxy = $2}
	} elsif ($u  =~ /^([-+])[Ee]/) {	# +E or -E
		$encode = ($1 eq '+') ? 1 : 0;
	} elsif ($u  =~ /^([-+])[Hh]/) {	# +H or -H
		$W3hdrs = ($1 eq '+') ? 1 : 0;
	} elsif ($u =~ /^([-+])[Oo](.*)/) {	# +O or -O (output file)
		$outfile = $2;
		print V "$me: Output to \"$outfile\"\n" if $V>1;
	} elsif ($u =~ /^([-+])[Tt]/) {		# +T or -T (WWW tracing)
		$W3trace = ($1 eq '+') ? 1 : 0;
		print V ($W3trace ? "Do" : "Don't"), " produce WWW tracing.\n" if $V>1;
	} elsif (&URLopen(*U,$u,'POST')) {
		print V "$me: Opened U=\"$u\"\n" if $V>1;
		select U; $| = 1; select STDOUT;
		# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
		# We first read from stdin and send it down the line.
		@hdr = ();
		$length = $h = 0;
		while ($x = <STDIN>) {	# Send our stdin to the socket.
			print V "$me: hdr: \"$x\"\n" if $V>4;
			($l = $x) =~ s/\s+$//;		# Trim away any trailing white stuff.
			last if !$l;		# Stop sending at first blank line.
			$l = URLencode($l) if $encode;
			$length += length($hdr[$h++] = $y = "$l&");
			print V "$me: HDR: length=$length \"$y\"\n" if $V>4;
		}
		print V "$me: Found $h header lines.\n" if $V>2;
		print V "Content-Length: $length\n" if $V>1;
		print U "Content-Length: $length\n";
		print V "$me: SEND \"\\n\"\n" if $V>1;
		print U "\n";
		print V "$me: SENT \"\\n\"\n" if $V>1;
		for $l (@hdr) {
			print V "$me: Send \"$l\"\n" if $V>1;
			print U $l;
			print V "$me: Sent \"$l\"\n" if $V>1;
		}
		if ($outfile && !$outopen) {	# Do we need to open the output?
			print V "$me: Open \"$outfile\"\n" if $V>1;
			if (open(O,">$outfile")) {	# Try to open it for writing.
				print V "$me: Writing \"$outfile\" [$!]\n" if $V>1;
			} else {
				print V "$me: Can't write \"$outfile\" [$!]\n" if $V>0;
				$outfile = '';
			}
			$outopen = 1;
		}
		# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
		# Here's where we read the data from one URL and write it  to #
		# standard output.  If you want to do something else with the #
		# data, you should rewrite this loop:                         #
		# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
		while ($n = read(U,$b,$bufsiz)) {
			print V "$me: Got $n bytes: \"$b\"\n" if $V>6;
			if (!$W3hdrs && $URLhdr) {	# Suppressing header lines.
				print V "$me: HDR check in \"$b\"\n" if $V>6;
				if ($b =~ s/^(.*\r\n\r\n)//s) {
					if ($W3hdrs) {if ($outfile) {print O "$1\n"} else {print STDOUT "$1\n"}}
					$URLhdr = 0;		# Found \n\n separator.
				} else {
					if ($W3hdrs) {if ($outfile) {print O "$b\n"} else {print STDOUT "$b\n"}}
					next;				# No separator, discard it all.
				}
			}
			if ($W3hdrs) {
				print V "$me: Writing NL\n" if $V>1;
				if ($outfile) {print O "\n"} else {print STDOUT "\n"}
			}
			if ($outfile) {print O $b} else {print STDOUT $b}
		}
		if (!defined $n) {
			print V "$me: Can't read \"$u\" ($URLerr)\n" if $V>0;
			$exitstat = $?;
		}
		exit $exitstat;					# Do only one URL.
	} else {
		print V "$me: Can't open \"$u\" ($URLerr)\n" if $V>0;
		$exitstat = 1;
	}
}

exit $exitstat;
