# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#
# NAME
#   URLopen - Open HTTP connection and request a URL
#
# SYNOPSIS
#   &URLopen(*HANDLE,$URL,$OP)
#
# DESCRIPTION
#   This routine accepts a URL and attempts  to  open  it  for  reading.   If
#   successful,  the  return  value  is  1,  and  FD will be the open file (a
#   socket, actually).  The caller can read the data from it.  If you're  not
#   going  to  exit  after  the  EOF, t's a good idea to close it when you're
#   done, to prevent the connection from hanging around.
#
#   The third parameter, the "operation", defaults to GET.  You may also pass
#   'POST' as the third arg, and we will send it, and not send the extra \n.
#
#   We send a "$OP $URL HTTP/$HTTPversion\n\n" request, and the  server  will
#   send back first the HEAD information for the URL, then \r\n\r\n, then the
#   data.  Note that \r\n\r\n is specified in the HTTP specs, and so far  not
#   even Microsoft has seen fit to violate this.
#
#   If the attempt to parse the URL fails, we will try to open it as a  local
#   file,  and  if  this succeeds, we will return success.  So any local file
#   whose name doesn't look like a URL can be used as a "remote" file.
#
# ENVIRONMENT
#   We use several global variables:
#
#   $W3proxy  is host name (or IP address) and port for HTTP proxy.
#   $W3nopxy  is perl pattern for local (non-proxied) hosts.
#   $URLhdr   is set to 1 indicating that we're in the header 
#   $URLerr   is set to an error message if we fail.
#
#   We also set $W3proxy and $W3nopxy from the environment variables  W3PROXY
#   and  W3NOPROXY,  if  the latter are defined and the former aren't, so the
#   caller should probably not worry about this in general.
#
# BUGS
#   At present, we only do the HTTP:// protocol.  Maybe eventually ...
#
# AUTHOR
#   John Chambers <jc@trillian.mit.edu>
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
$HTTPopens    = 0     unless defined HTTPopens;
$HTTPopentime = 0     unless defined HTTPopentime;
$HTTPversion  = '1.1' unless defined $HTTPversion;

sub URLopen { my $F='URLopen';
	local(*F,$url,$OP) = @_;
	local($p,$P,$h,$f,$t);
	local($savsig);
	$OP = 'GET' if !$OP;
	print V "$0/$F: \"$url\"\n" if $V>4;
	print "<!--$F: $url -->\n" if $W3trace;
	$W3proxy = $ENV{W3PROXY}   if !$W3proxy;
	$W3nopxy = $ENV{W3NOPROXY} if !$W3nopxy;
	if ($HTTPtimeout > 0) {
		alarm $HTTPtimeout;
		$savsig = $SIG{ALRM};
		$SIG{ALRM} = 'URLalarm';
		print V "$0/$F: Set alarm after $HTTPtimeout sec.\n" if $V>0;
	}
	++$HTTPopens;
	$HTTPopentime = time;
	if (($p,$h,$f) = ($url =~ m'^(\w+)://+([-_.:\w]+)(/.*)')) {
		;
	} elsif (($p,$h) = ($url =~ m'^(\w+)://+([-_.:\w]+)$')) {
		$f = '/';
	} else {
		print V "$0/$F: Can't parse \"$url\"\n" if $V>0;
		return 0;
	}
	if ($url = "$p://$h$f") {
		if ($W3proxy) {
			if ($W3nopxy && ($h =~ $W3nopxy)) {
				print V "$0/$F: host \"$h\" matches \"$W3nopxy\"\n" if $V>2;
			} else {
				print V "$0/$F: pxy=\"$W3proxy\" url=\"$url\"\n" if $V>2;
				print "<!--$F: Connecting to proxy \"$W3proxy\" -->\n" if $W3trace;
				if (&HTTPcon(*F,$W3proxy)) {
					&HTTPsend("GET $url HTTP/$HTTPversion\r\n");
					if ($HTTPversion eq '1.1') {
						&HTTPsend("Host: $h\r\n");
						&HTTPsend("Connection: Close\r\n");
					}
					print V "$0/$F: Sent \"$OP $url HTTP/$HTTPversion\\r\\n\"\n" if $V>5;
					if ($W3agentid) {
						print F "User-agent: $W3agentid\n";
						&HTTPsend("User-agent: $W3agentid\r\n");
					}
					if ($OP eq 'GET') {
						&HTTPsend("\r\n");
					}
					$URLhdr = 1;    # Note we're in the header.
					&URLalarmoff() if ($HTTPtimeout > 0);
					return 1;
				}
				$t = time - $HTTPopentime;
				$URLerr = "$F: Can't connect to proxy \"$W3proxy\"";
				print "<!--$F: $URLerr in $t sec ($!) -->\n" if $W3trace;
				print V "$0/$F: $URLerr\n" if $V>0;
				&URLalarmoff() if ($HTTPtimeout > 0);
				return 0;
			}
		}
		print V "$0/$F: \"$url\" p=\"$p\" h=\"$h\" f=\"$f\"\n" if $V>2;
		($P = $p) =~ tr/a-z/A-Z/;
		if ($P eq 'HTTP') {
			print V "$p/$F: \"$url\" HTTP protocol\n" if $V>2;
			print "<!--$F: Connecting to server \"$h\" -->\n" if $W3trace;
			if (&HTTPcon(*F,$h)) {
				print V "$0/$F: Connected to \"$h\"\n" if $V>2;
				&HTTPsend("$OP $f HTTP/$HTTPversion\r\n");
				if ($HTTPversion eq '1.1') {
					&HTTPsend("Host: $h\r\n");
					&HTTPsend("Connection: Close\r\n");
				}
				if ($W3agentid) {
					print F "User-agent: $W3agentid\n";
					&HTTPsend("User-agent: $W3agentid\r\n");
				}
				if ($OP eq 'GET') {
					&HTTPsend("\r\n");
				}
				$URLhdr = 1;    # Note we're in the header.
				&URLalarmoff() if ($HTTPtimeout > 0);
				return 1;
			}
			$URLerr = "Can't connect to \"$h\" ($!)";
			print "<!--$F: $URLerr -->\n" if $W3trace;
			print V "$P/$F: $URLerr\n" if $V>0;
			&URLalarmoff() if ($HTTPtimeout > 0);
			return 0;
		}
		$URLerr = "can't do protocol \"$p\"";
		print "<!--$F: Can't open \"$url\" $URLerr -->\n" if $W3trace;
		print V "$P/$F: Can't open \"$url\" $URLerr\n" if $V>0;
		$exitstat = 254;
		&URLalarmoff() if ($HTTPtimeout > 0);
		return 0;
	}
	if (-d $url) {
		print V "$0/$F: Directory \"$url\" ...\n" if $V>4;
		&URLalarmoff() if ($HTTPtimeout > 0);
		return &HTMLdir(*F,$url);
	}
	if (open(F,$url)) {
		print V "$0/$F: Local file \"$url\" opened.\n" if $V>4;
		$URLhdr = 0;    # No headers for local files (yet).
		&URLalarmoff() if ($HTTPtimeout > 0);
		return 1;
	}
	$URLerr = "$!";
	print "<!--$F: Can't read \"$url\" ($URLerr) -->\n" if $V>0;
	print V "$0/$F: Can't read \"$url\" ($URLerr)\n" if $V>0;
	$exitstat = int($!);
	&URLalarmoff() if ($HTTPtimeout > 0);
	return 0;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub HTTPsend { my $F='HTTPsend';
	local($str,$dsc);
	for $str (@_) {
		$dsc = $str;
		$dsc =~ s"\r"\\r"g;
		$dsc =~ s"\n"\\n"g;
		$dsc =~ s"\t"\\t"g;
		print "<!--$F: Send server \"$dsc\" -->\n" if $W3trace;
		print V "$0/$F: Send \"$dsc\"\n" if $V>4;
		print F $str;
		print V "$0/$F: Sent \"$dsc\"\n" if $V>5;
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub URLalarm { my $F='URLalarm';
	my $t = time - $HTTPopentime;
	print "<!--$F: ALARM after $t sec -->\n" if $W3trace;
	exit -1;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub URLalarmoff { my $F='URLalarmoff';
	alarm 0;
	$SIG{ALRM} = $savsig;
	print V "$0/$F: Set alarm 0.\n" if $V>2;
}

1;
