#!/usr/bin/perl -w # This is a perl5 program # cmd line http client. # -s ==> strict, don't follow redirections # -v ==> verbose, print out header returned with request to stderr # -vv ==> very verbose, print out lots o' crap to stderr use Socket; $verbose = 0; # print http return codes to STDERR $veryverbose = 0; # print progress to STDERR $strict = 0; # don't do redirects # given the url and an array of strings that can be used in the http # request, return an array of strings containing the page sub GetPage { local($url) = $_[0]; local($url_referer) = $_[1]; local(@page_loaded) = (); local($url_host,$url_port,$url_document,$url_ip,$url_socket); local($my_ip,$my_socket,$proto); if($veryverbose) {print STDERR "fetching $url \n";} $url_port = 80; $url_host = $url; $url_host =~ s|^http://||; $url_host =~ s|\/.*||; $url_document = $url; $url_document =~ s|^http://${url_host}||; if($url_document eq "") { $url_document = "/"; } if($url_host =~ /:/) {$url_port = $url_host; $url_port =~ s|.*:||;} $url_host =~ s|:\d+||; if($veryverbose) { print STDERR "Opening socket to host $url_host, port $url_port", "\n to fetch $url_document\n"; } $my_ip = inet_aton('localhost'); if($veryverbose) { @ip_string = unpack('C4',$my_ip); print STDERR "This host's ip = @ip_string\n"; } $url_ip = inet_aton($url_host); if($veryverbose) { @ip_string = unpack('C4',$url_ip); print STDERR "Destination host's ip = @ip_string\n"; } $proto = getprotobyname('tcp'); if($veryverbose) { print STDERR "Protocol used is : $proto\n"; } $my_socket = sockaddr_in( 0, $my_ip); $url_socket = sockaddr_in($url_port, $url_ip); socket(S, PF_INET, SOCK_STREAM, $proto) || die "socket: $!"; # # Somehow I've bound the socket to socket 0 (STDIN). I want to be able # to print into it though, so I don't have to be doing this binding. # #bind(S, $my_socket) || die "bind: $!"; connect(S, $url_socket) || die "connect: $!"; if($veryverbose) {print STDERR "socket connection made, sending request\n";} select(S); $| = 1; print "GET $url_document HTTP/1.0\n"; print "Host: ${url_host}\n"; print "Agent: Fetch -- a little perl script by mjf\n"; if($url_referer) { print "Referer: $url_referer\n"; } print "\n"; select(STDOUT); #print STDERR "waiting for reply\n"; while() { if(!$strict) { # look for forwarding if( /^HTTP/ ) { # check return type local($http_version, $retn_code, $retn_mesg); ( $http_version, $retn_code, $retn_mesg ) = split(/ /,$_,3); if($retn_code == 301 || $retn_code == 302) { # slurp up the rest of the document an get the new URL $_ = ; while((! /; } my $newurl = ""; if(/a[\s]+href=/i) { /]*)"?.*>/i; $newurl = $1 ; } elsif(/^location: /i) { /^location: "?([^\r\n]*)"?/i; $newurl = $1 ; } else { print STDERR "How did I get here?\n"; exit -1; } chomp($newurl); if( ! ( $newurl =~ /^http:\/\// ) ) { chomp($newurl = &MakeAbsoluteURL($newurl, $url)); } if($verbose) { print STDERR "Redirected to $newurl ... fetching\n"; } while() {} ; close(S); &GetPage($newurl, $url); return; } } } if($verbose) { print STDERR $_; } last if /^$/; last if /^ $/; } while() { print; } @page_loaded; } sub MakeAbsoluteURL { my ($rel) = $_[0] ; # relative url. my ($ref) = $_[1] ; # referrer. my $url = ""; if( $rel =~ /^http:\/\// ) { $url = $rel ; } elsif( $ref =~ /(http:\/\/.*)\/[^\/]*$/ ) { $url = $1 . $rel ; } elsif( $ref =~ /^(http:\/\/[^\/]*)$/ ) { if( $rel =~ /^\// ) { $url = $ref . $rel ; } else { $url = $ref . "/" . $rel ; } } $url ; } while($ARGV[0] =~ /^-(.*)/) { @args = split(//,$1); foreach $i (@args) { SWITCH: { if($i eq 'v') { $verbose++; last SWITCH; } if($i eq 's') { $strict++; last SWITCH; } print STDERR "Unknown switch -$i\n"; } } shift; } if($verbose > 1) {$veryverbose = 1;} &GetPage($ARGV[0], $ARGV[1]);