#!/usr/bin/perl -Tw # # NAME # w3post # # SYNOPSIS # w3post [options] URL < data # # DESCRIPTION # This routine implements the HTML POST operation on a URL. We make # a connection to the web server for the URL, and sent a POST # request for the file. We then pipe our stdin to the connection, # ending with a double newline. We then start reading from the # connection and copy the data to our stdout. # # If you want to learn how to do this stuff, you can study this # program. It is useful as a starting point for writing other # simple web clients. It's not nearly as difficult as people would # like you to believe. But the socket stuff uses a lot of magic # incantations that "you just have to know"; see the required perl # module files for this socket magic. # # This is a webified perl version of the standard Unix cat(1) # program. # # OPTIONS # # -E # Don't encode the data. # # +E # Encode the data. We do the application/x-www-form-urlencoded # encoding that is the standard for HTML forms. (default) # # +H # Include the header info in the output. # # -H # Don't include the header info in the output [default]. # # +P # Proxy gateway. If you are hidden behind a proxy, put the proxy # hostname (and optionally :port) in a -P option, and we will try # to indirect through the proxy server. # # -P # Proxy exceptions. The string should be a pattern. If # a URL matches this pattern, the proxy gateway isn't used. # # +T # Enable WWW tracing. This sets a global flag that causes various # routines to produce lines of the form: # # These look like both HTTP header lines and HTML comments. Some # WWW tools (such as the "H" html viewer) can show these to tell # you which stage of a GET operation we have reached. # # -T # Disable WWW tracing [default]. # # ENVIRONMENT # We use the following from the environment: # # W3PROXY # The name (or address) and an optional :port for a proxy # gateway. URLs that don't match the W3NOPROXY will be fetched # indirectly via the proxy's web server. # # W3NOPROXY # A pattern which is applied to URLs, and if they match, no proxy # is used. That is, any URL that matches W3NOPROXY is considered # local, and we will access it directly. # # LIMITATIONS # So far only the http:// protocol is implemented; ftp://, file:// # and others may appear if I need them. If someone feels like # adding FTP code, you might send me a copy. # # REQUIRES # per4 or perl5 and the following modules, which should be found in # the same directory where you found this program. # push @INC,"$ENV{HOME}/sh",'sh'; require "URLopen.pm"; require "HTMLdir.pm"; require "HTTPcon.pm"; # # DEBUGGING # You can use "perl -dw", of course. Or you can do the following: # setenv V_w3post 5/tmp/w3post.out # csh or tcsh users. # export V_w3post=5/tmp/w3post.out # ksh or bash users. # This will turn on the "print V" lines and write to /tmp/w3post.out. # # BUGS # Despite many attempts to detect failure, we still don't optimally # handle all the myriad things that can go wrong. # # AUTHOR # John Chambers # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # $| = 1; ($me = $0) =~ s"^.*/""; $Vopt = $ENV{"V_$me"} || $ENV{"D_$me"} || 1; # Verbosity. if (($V,$Vfil) = ($Vopt =~ /^(\d)(.+)/)) { # Verbose output file. open(V,">$Vfil") || die "$0: Can't write \"$Vfil\" ($!)\n"; } else {$V = $Vopt; open(V,">&STDERR")} # Defaults to stderr. select V; $| = 1; select STDOUT; print V "$me: Started ", `date` if $V>1; #bufsiz = 10; # Small for testing. $bufsiz = 10000; # Large for routine use. $encode = 0; # Do HTML encoding. $exitstat = 0; # Set this to get a failure exit status. $W3hdrs = 0; # Whether to output header lines. for $u (@ARGV) { $URLerr = "Don't know why"; # Set by URLopen when failures. if ($u =~ /^([-+])[Pp](.*)/) { # +P or -P if ($1 eq '-') {$W3nopxy = $2} else {$W3proxy = $2} } elsif ($u =~ /^([-+])[Ee]/) { # +E or -E $encode = ($1 eq '+') ? 1 : 0; } elsif ($u =~ /^([-+])[Hh]/) { # +H or -H $W3hdrs = ($1 eq '+') ? 1 : 0; } elsif ($u =~ /^([-+])[Oo](.*)/) { # +O or -O (output file) $outfile = $2; print V "$me: Output to \"$outfile\"\n" if $V>1; } elsif ($u =~ /^([-+])[Tt]/) { # +T or -T (WWW tracing) $W3trace = ($1 eq '+') ? 1 : 0; print V ($W3trace ? "Do" : "Don't"), " produce WWW tracing.\n" if $V>1; } elsif (&URLopen(*U,$u,'POST')) { print V "$me: Opened U=\"$u\"\n" if $V>1; select U; $| = 1; select STDOUT; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # We first read from stdin and send it down the line. @hdr = (); $length = $h = 0; while ($x = ) { # Send our stdin to the socket. print V "$me: hdr: \"$x\"\n" if $V>4; ($l = $x) =~ s/\s+$//; # Trim away any trailing white stuff. last if !$l; # Stop sending at first blank line. $l = URLencode($l) if $encode; $length += length($hdr[$h++] = $y = "$l&"); print V "$me: HDR: length=$length \"$y\"\n" if $V>4; } print V "$me: Found $h header lines.\n" if $V>2; print V "Content-Length: $length\n" if $V>1; print U "Content-Length: $length\n"; print V "$me: SEND \"\\n\"\n" if $V>1; print U "\n"; print V "$me: SENT \"\\n\"\n" if $V>1; for $l (@hdr) { print V "$me: Send \"$l\"\n" if $V>1; print U $l; print V "$me: Sent \"$l\"\n" if $V>1; } if ($outfile && !$outopen) { # Do we need to open the output? print V "$me: Open \"$outfile\"\n" if $V>1; if (open(O,">$outfile")) { # Try to open it for writing. print V "$me: Writing \"$outfile\" [$!]\n" if $V>1; } else { print V "$me: Can't write \"$outfile\" [$!]\n" if $V>0; $outfile = ''; } $outopen = 1; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Here's where we read the data from one URL and write it to # # standard output. If you want to do something else with the # # data, you should rewrite this loop: # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # while ($n = read(U,$b,$bufsiz)) { print V "$me: Got $n bytes: \"$b\"\n" if $V>6; if (!$W3hdrs && $URLhdr) { # Suppressing header lines. print V "$me: HDR check in \"$b\"\n" if $V>6; if ($b =~ s/^(.*\r\n\r\n)//s) { if ($W3hdrs) {if ($outfile) {print O "$1\n"} else {print STDOUT "$1\n"}} $URLhdr = 0; # Found \n\n separator. } else { if ($W3hdrs) {if ($outfile) {print O "$b\n"} else {print STDOUT "$b\n"}} next; # No separator, discard it all. } } if ($W3hdrs) { print V "$me: Writing NL\n" if $V>1; if ($outfile) {print O "\n"} else {print STDOUT "\n"} } if ($outfile) {print O $b} else {print STDOUT $b} } if (!defined $n) { print V "$me: Can't read \"$u\" ($URLerr)\n" if $V>0; $exitstat = $?; } exit $exitstat; # Do only one URL. } else { print V "$me: Can't open \"$u\" ($URLerr)\n" if $V>0; $exitstat = 1; } } exit $exitstat;