# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # # NAME # URLopen - Open HTTP connection and request a URL # # SYNOPSIS # &URLopen(*HANDLE,$URL,$OP) # # DESCRIPTION # This routine accepts a URL and attempts to open it for reading. If # successful, the return value is 1, and FD will be the open file (a # socket, actually). The caller can read the data from it. If you're not # going to exit after the EOF, t's a good idea to close it when you're # done, to prevent the connection from hanging around. # # The third parameter, the "operation", defaults to GET. You may also pass # 'POST' as the third arg, and we will send it, and not send the extra \n. # # We send a "$OP $URL HTTP/$HTTPversion\n\n" request, and the server will # send back first the HEAD information for the URL, then \r\n\r\n, then the # data. Note that \r\n\r\n is specified in the HTTP specs, and so far not # even Microsoft has seen fit to violate this. # # If the attempt to parse the URL fails, we will try to open it as a local # file, and if this succeeds, we will return success. So any local file # whose name doesn't look like a URL can be used as a "remote" file. # # ENVIRONMENT # We use several global variables: # # $W3proxy is host name (or IP address) and port for HTTP proxy. # $W3nopxy is perl pattern for local (non-proxied) hosts. # $URLhdr is set to 1 indicating that we're in the header # $URLerr is set to an error message if we fail. # # We also set $W3proxy and $W3nopxy from the environment variables W3PROXY # and W3NOPROXY, if the latter are defined and the former aren't, so the # caller should probably not worry about this in general. # # BUGS # At present, we only do the HTTP:// protocol. Maybe eventually ... # # AUTHOR # John Chambers # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # $HTTPopens = 0; $HTTPopentime = 0; $HTTPversion = '1.0'; sub URLopen { my $F='URLopen'; local(*F,$url,$OP) = @_; local($p,$P,$h,$f,$t); local($savsig); $OP = 'GET' if !$OP; print V "$0/$F: \"$url\"\n" if $V>4; print "\n" if $W3trace; $W3proxy = $ENV{W3PROXY} if !$W3proxy; $W3nopxy = $ENV{W3NOPROXY} if !$W3nopxy; if ($HTTPtimeout > 0) { alarm $HTTPtimeout; $savsig = $SIG{ALRM}; $SIG{ALRM} = 'URLalarm'; print V "$0/$F: Set alarm after $HTTPtimeout sec.\n" if $V>2; } ++$HTTPopens; $HTTPopentime = time; if (($p,$h,$f) = ($url =~ m'^(\w+)://+([-_.:\w]+)(/.*)')) { ; } elsif (($p,$h) = ($url =~ m'^(\w+)://+([-_.:\w]+)$')) { $f = '/'; } else { print V "$0/$F: Can't parse \"$url\"\n" if $V>0; return 0; } if ($url = "$p://$h$f") { if ($W3proxy) { if ($W3nopxy && ($h =~ $W3nopxy)) { print V "$0/$F: host \"$h\" matches \"$W3nopxy\"\n" if $V>2; } else { print V "$0/$F: pxy=\"$W3proxy\" url=\"$url\"\n" if $V>2; print "\n" if $W3trace; if (&HTTPcon(*F,$W3proxy)) { &HTTPsend("GET $url HTTP/$HTTPversion\r\n"); if ($HTTPversion eq '1.1') { &HTTPsend("Host: $h\r\n"); &HTTPsend("Connection: Close\r\n"); } print V "$0/$F: Sent \"$OP $url HTTP/$HTTPversion\\r\\n\"\n" if $V>5; if ($W3agentid) { print F "User-agent: $W3agentid\n"; &HTTPsend("User-agent: $W3agentid\r\n"); } if ($OP eq 'GET') { &HTTPsend("\r\n"); } $URLhdr = 1; # Note we're in the header. &URLalarmoff() if ($HTTPtimeout > 0); return 1; } $t = time - $HTTPopentime; $URLerr = "$F: Can't connect to proxy \"$W3proxy\""; print "\n" if $W3trace; print V "$0/$F: $URLerr\n" if $V>0; &URLalarmoff() if ($HTTPtimeout > 0); return 0; } } print V "$0/$F: \"$url\" p=\"$p\" h=\"$h\" f=\"$f\"\n" if $V>2; ($P = $p) =~ tr/a-z/A-Z/; if ($P eq 'HTTP') { print V "$p/$F: \"$url\" HTTP protocol\n" if $V>2; print "\n" if $W3trace; if (&HTTPcon(*F,$h)) { print V "$0/$F: Connected to \"$h\"\n" if $V>2; &HTTPsend("$OP $f HTTP/$HTTPversion\r\n"); if ($HTTPversion eq '1.1') { &HTTPsend("Host: $h\r\n"); &HTTPsend("Connection: Close\r\n"); } if ($W3agentid) { print F "User-agent: $W3agentid\n"; &HTTPsend("User-agent: $W3agentid\r\n"); } if ($OP eq 'GET') { &HTTPsend("\r\n"); } $URLhdr = 1; # Note we're in the header. &URLalarmoff() if ($HTTPtimeout > 0); return 1; } $URLerr = "Can't connect to \"$h\" ($!)"; print "\n" if $W3trace; print V "$P/$F: $URLerr\n" if $V>0; &URLalarmoff() if ($HTTPtimeout > 0); return 0; } $URLerr = "can't do protocol \"$p\""; print "\n" if $W3trace; print V "$P/$F: Can't open \"$url\" $URLerr\n" if $V>0; $exitstat = 254; &URLalarmoff() if ($HTTPtimeout > 0); return 0; } if (-d $url) { print V "$0/$F: Directory \"$url\" ...\n" if $V>4; &URLalarmoff() if ($HTTPtimeout > 0); return &HTMLdir(*F,$url); } if (open(F,$url)) { print V "$0/$F: Local file \"$url\" opened.\n" if $V>4; $URLhdr = 0; # No headers for local files (yet). &URLalarmoff() if ($HTTPtimeout > 0); return 1; } $URLerr = "$!"; print "\n" if $V>0; print V "$0/$F: Can't read \"$url\" ($URLerr)\n" if $V>0; $exitstat = int($!); &URLalarmoff() if ($HTTPtimeout > 0); return 0; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # sub HTTPsend { my $F='HTTPsend'; local($str,$dsc); for $str (@_) { $dsc = $str; $dsc =~ s"\r"\\r"g; $dsc =~ s"\n"\\n"g; $dsc =~ s"\t"\\t"g; print "\n" if $W3trace; print V "$0/$F: Send \"$dsc\"\n" if $V>4; print F $str; print V "$0/$F: Sent \"$dsc\"\n" if $V>5; } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # sub URLalarm { my $F='URLalarm'; my $t = time - $HTTPopentime; print "\n" if $W3trace; exit -1; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # sub URLalarmoff { my $F='URLalarmoff'; alarm 0; $SIG{ALRM} = $savsig; print V "$0/$F: Set alarm 0.\n" if $V>2; } 1;