home *** CD-ROM | disk | FTP | other *** search
- #!perl -w
-
- # $Id: lwp-request.PL,v 1.38 1999/06/01 10:40:51 gisle Exp $
- #
- # Simple user agent using LWP library.
-
- =head1 NAME
-
- lwp-request, GET, HEAD, POST - Simple WWW user agent
-
- =head1 SYNOPSIS
-
- lwp-request [-aeEdvhx] [-m method] [-b <base URL>] [-t <timeout>]
- [-i <if-modified-since>] [-c <content-type>] [-C <credentials>]
- [-p <proxy-url>] [-o <format>] <url>...
-
- =head1 DESCRIPTION
-
- This program can be used to send requests to WWW servers and your
- local file system. The request content for POST and PUT
- methods is read from stdin. The content of the response is printed on
- stdout. Error messages are printed on stderr. The program returns a
- status value indicating the number of URLs that failed.
-
- The options are:
-
- =over 4
-
- =item -m <method>
-
- Set which method to use for the request. If this option is not used,
- then the method is derived from the name of the program.
-
- =item -f
-
- Force request through, even if the program believes that the method is
- illegal. The server might reject the request eventually.
-
- =item -b <uri>
-
- This URI will be used as the base URI for resolving all relative URIs
- given as argument.
-
- =item -t <timeout>
-
- Set the timeout value for the requests. The timeout is the amount of
- time that the program will wait for a response from the remote server
- before it fails. The default unit for the timeout value is seconds.
- You might append "m" or "h" to the timeout value to make it minutes or
- hours, respectively. The default timeout is '3m', i.e. 3 minutes.
-
- =item -i <time>
-
- Set the If-Modified-Since header in the request. If I<time> it the
- name of a file, use the modification timestamp for this file. If
- I<time> is not a file, it is parsed as a literal date. Take a look at
- L<HTTP::Date> for recogniced formats.
-
- =item -c <content-type>
-
- Set the Content-Type for the request. This option is only allowed for
- requests that take a content, i.e. POST and PUT. You can
- force methods to take content by using the C<-f> option together with
- C<-c>. The default Content-Type for POST is
- C<application/x-www-form-urlencoded>. The default Content-type for
- the others is C<text/plain>.
-
- =item -p <proxy-url>
-
- Set the proxy to be used for the requests. The program also loads
- proxy settings from the environment. You can disable this with the
- C<-P> option.
-
- =item -H <header>
-
- Send this HTTP header with each request. You can specify several, e.g.:
-
- lwp-request \
- -H 'Referer: http://other.url/' \
- -H 'Host: somehost' \
- http://this.url/
-
- =item -C <username>:<password>
-
- Provide credentials for documents that are protected by Basic
- Authentication. If the document is protected and you did not specify
- the username and password with this option, then you will be prompted
- to provide these values.
-
- =back
-
- The following options controls what is displayed by the program:
-
- =over 4
-
- =item -u
-
- Print request method and absolute URL as requests are made.
-
- =item -U
-
- Print request headers in addition to request method and absolute URL.
-
- =item -s
-
- Print response status code. This option is always on for HEAD requests.
-
- =item -S
-
- Print response status chain. This shows redirect and autorization
- requests that are handled by the library.
-
- =item -e
-
- Print response headers. This option is always on for HEAD requests.
-
- =item -d
-
- Do B<not> print the content of the response.
-
- =item -o <format>
-
- Process HTML content in various ways before printing it. If the
- content type of the response is not HTML, then this option has no
- effect. The legal format values are; I<text>, I<ps>, I<links>,
- I<html> and I<dump>.
-
- If you specify the I<text> format then the HTML will be formatted as
- plain latin1 text. If you specify the I<ps> format then it will be
- formatted as Postscript.
-
- The I<links> format will output all links found in the HTML document.
- Relative links will be expanded to absolute ones.
-
- The I<html> format will reformat the HTML code and the I<dump> format
- will just dump the HTML syntax tree.
-
- =item -v
-
- Print the version number of the program and quit.
-
- =item -h
-
- Print usage message and quit.
-
- =item -x
-
- Extra debugging output.
-
- =item -a
-
- Set text(ascii) mode for content input and output. If this option is not
- used, content input and output is done in binary mode.
-
- =back
-
- Because this program is implemented using the LWP library, it will
- only support the protocols that LWP supports.
-
- =head1 SEE ALSO
-
- L<lwp-mirror>, L<LWP>
-
- =head1 COPYRIGHT
-
- Copyright 1995-1999 Gisle Aas.
-
- This library is free software; you can redistribute it and/or
- modify it under the same terms as Perl itself.
-
- =head1 AUTHOR
-
- Gisle Aas <gisle@aas.no>
-
- =cut
-
- $progname = $0;
- $progname =~ s,.*/,,; # use basename only
- $progname =~ s/\.\w*$//; # strip extension, if any
-
- $VERSION = sprintf("%d.%02d", q$Revision: 1.38 $ =~ /(\d+)\.(\d+)/);
-
-
- require LWP;
- require LWP::Debug;
-
- use URI;
- use URI::Heuristic qw(uf_uri);
-
- use HTTP::Status qw(status_message);
- use HTTP::Date qw(time2str str2time);
-
-
- # This table lists the methods that are allowed. It should really be
- # a superset for all methods supported for every scheme that may be
- # supported by the library. Currently it might be a bit too HTTP
- # specific. You might use the -f option to force a method through.
- #
- # "" = No content in request, "C" = Needs content in request
- #
- %allowed_methods = (
- GET => "",
- HEAD => "",
- POST => "C",
- PUT => "C",
- DELETE => "",
- TRACE => "",
- OPTIONS => "",
- );
-
-
- # We make our own specialization of LWP::UserAgent that asks for
- # user/password if document is protected.
- {
- package RequestAgent;
- @ISA = qw(LWP::UserAgent);
-
- sub new
- {
- my $self = LWP::UserAgent::new(@_);
- $self->agent("lwp-request/$main::VERSION");
- $self;
- }
-
- sub get_basic_credentials
- {
- my($self, $realm, $uri) = @_;
- if ($main::options{'C'}) {
- return split(':', $main::options{'C'}, 2);
- } elsif (-t) {
- my $netloc = $uri->host_port;
- print "Enter username for $realm at $netloc: ";
- my $user = <STDIN>;
- chomp($user);
- return (undef, undef) unless length $user;
- print "Password: ";
- system("stty -echo");
- my $password = <STDIN>;
- system("stty echo");
- print "\n"; # because we disabled echo
- chomp($password);
- return ($user, $password);
- } else {
- return (undef, undef)
- }
- }
- }
-
- $method = uc(lc($progname) eq "lwp-request" ? "GET" : $progname);
-
- # Parse command line
- use Getopt::Long;
-
- my @getopt_args = (
- 'a', # content i/o in text(ascii) mode
- 'm=s', # set method
- 'f', # make request even if method is not in %allowed_methods
- 'b=s', # base url
- 't=s', # timeout
- 'i=s', # if-modified-since
- 'c=s', # content type for POST
- 'C=s', # credentials for basic authorization
- 'H=s@', # extra headers, form "Header: value string"
- #
- 'u', # display method, URL and headers of request
- 'U', # display request headers also
- 's', # display status code
- 'S', # display whole chain of status codes
- 'e', # display response headers (default for HEAD)
- 'd', # don't display content
- #
- 'h', # print usage
- 'v', # print version
- #
- 'x', # extra debugging info
- 'p=s', # proxy URL
- 'P', # don't load proxy setting from environment
- #
- 'o=s', # output format
- );
-
- Getopt::Long::config("noignorecase", "bundling");
- unless (GetOptions(\%options, @getopt_args)) {
- usage();
- }
- if ($options{'v'}) {
- require LWP;
- my $DISTNAME = 'libwww-perl-' . LWP::Version();
- die <<"EOT";
- This is lwp-request version $VERSION ($DISTNAME)
-
- Copyright 1995-1999, Gisle Aas.
-
- This program is free software; you can redistribute it and/or
- modify it under the same terms as Perl itself.
- EOT
- }
-
- usage() if $options{'h'} || !@ARGV;
-
- LWP::Debug::level('+') if $options{'x'};
-
- # Create the user agent object
- $ua = RequestAgent->new;
-
- # Load proxy settings from *_proxy environment variables.
- $ua->env_proxy unless $options{'P'};
-
- $method = uc($options{'m'}) if defined $options{'m'};
-
- if ($options{'f'}) {
- if ($options{'c'}) {
- $allowed_methods{$method} = "C"; # force content
- } else {
- $allowed_methods{$method} = "";
- }
- } elsif (!defined $allowed_methods{$method}) {
- die "$progname: $method is not an allowed method\n";
- }
-
- if ($method eq "HEAD") {
- $options{'s'} = 1;
- $options{'e'} = 1 unless $options{'d'};
- $options{'d'} = 1;
- }
-
- if (defined $options{'t'}) {
- $options{'t'} =~ /^(\d+)([smh])?/;
- die "$progname: Illegal timeout value!\n" unless defined $1;
- $timeout = $1;
- $timeout *= 60 if ($2 eq "m");
- $timeout *= 3600 if ($2 eq "h");
- $ua->timeout($timeout);
- }
-
- if (defined $options{'i'}) {
- if (-e $options{'i'}) {
- $time = (stat _)[9];
- } else {
- $time = str2time($options{'i'});
- die "$progname: Illegal time syntax for -i option\n"
- unless defined $time;
- }
- $options{'i'} = time2str($time);
- }
-
- $content = undef;
- if ($allowed_methods{$method} eq "C") {
- # This request needs some content
- unless (defined $options{'c'}) {
- # set default content type
- $options{'c'} = ($method eq "POST") ?
- "application/x-www-form-urlencoded"
- : "text/plain";
- } else {
- die "$progname: Illegal Content-type format\n"
- unless $options{'c'} =~ m,^[\w\-]+/[\w\-]+(?:\s*;.*)?$,
- }
- print "Please enter content ($options{'c'}) to be ${method}ed:\n"
- if -t;
- binmode STDIN unless -t or $options{'a'};
- $content = join("", <STDIN>);
- } else {
- die "$progname: Can't set Content-type for $method requests\n"
- if defined $options{'c'};
- }
-
- # Set up a request. We will use the same request object for all URLs.
- $request = HTTP::Request->new($method);
- $request->header('If-Modified-Since', $options{'i'}) if defined $options{'i'};
- for my $user_header (@{ $options{'H'} || [] }) {
- my ($header_name, $header_value) = split /:\s*/, $user_header, 2;
- $request->header($header_name, $header_value);
- $ua->agent($header_value) if lc($header_name) eq "user-agent"; # Ugh!
- }
- #$request->header('Accept', '*/*');
- if ($options{'c'}) { # will always be set for request that wants content
- $request->header('Content-Type', $options{'c'});
- $request->header('Content-Length', length $content); # Not really needed
- $request->content($content);
- }
-
- $errors = 0;
-
- # Ok, now we perform the requests, one URL at a time
- while ($url = shift) {
- # Create the URL object, but protect us against bad URLs
- eval {
- if ($url =~ /^\w+:/ || $options{'b'}) { # is there any scheme specification
- $url = URI->new($url, $options{'b'});
- $url = $url->abs($options{'b'}) if $options{'b'};
- } else {
- $url = uf_uri($url);
- }
- };
- if ($@) {
- $@ =~ s/at\s+\S+\s+line\s\d+//;
- print STDERR $@;
- $errors++;
- next;
- }
-
- $ua->proxy($url->scheme, $options{'p'}) if $options{'p'};
-
- # Send the request and get a response back from the server
- $request->url($url);
- $response = $ua->request($request);
-
- if ($options{'u'} || $options{'U'}) {
- my $url = $response->request->url->as_string;
- print "$method $url\n";
- print $response->request->headers_as_string, "\n" if $options{'U'};
- }
-
- if ($options{'S'}) {
- printResponseChain($response);
- } elsif ($options{'s'}) {
- print $response->status_line, "\n";
- }
-
- if ($options{'e'}) {
- # Display headers
- print $response->headers_as_string;
- print "\n"; # separate headers and content
- }
-
- if ($response->is_success) {
- unless ($options{'d'}) {
- if ($options{'o'} &&
- $response->content_type eq 'text/html') {
- require HTML::Parse;
- my $html = HTML::Parse::parse_html($response->content);
- {
- $options{'o'} eq 'ps' && do {
- require HTML::FormatPS;
- my $f = HTML::FormatPS->new;
- print $f->format($html);
- last;
- };
- $options{'o'} eq 'text' && do {
- require HTML::FormatText;
- my $f = HTML::FormatText->new;
- print $f->format($html);
- last;
- };
- $options{'o'} eq 'html' && do {
- print $html->as_HTML;
- last;
- };
- $options{'o'} eq 'links' && do {
- my $base = $response->base;
- for ( @{ $html->extract_links } ) {
- my($link, $elem) = @$_;
- my $tag = uc $elem->tag;
- $link = URI->new($link)->abs($base)->as_string;
- print "$tag\t$link\n";
- }
- last;
- };
- $options{'o'} eq 'dump' && do {
- $html->dump;
- last;
- };
- # It is bad to not notice this before now :-(
- die "Illegal -o option value ($options{'o'})\n";
- }
- } else {
- binmode STDOUT unless $options{'a'};
- print $response->content;
- }
- }
- } else {
- print STDERR $response->error_as_HTML unless $options{'d'};
- $errors++;
- }
- }
-
- exit $errors;
-
-
- sub printResponseChain
- {
- my($response) = @_;
- return unless defined $response;
- printResponseChain($response->previous);
- my $method = $response->request->method;
- my $url = $response->request->url->as_string;
- my $code = $response->code;
- print "$method $url --> ", $response->status_line, "\n";
- }
-
-
- sub usage
- {
- die <<"EOT";
- Usage: $progname [-options] <url>...
- -m <method> use method for the request (default is '$method')
- -f make request even if $progname believes method is illegal
- -b <base> Use the specified URL as base
- -t <timeout> Set timeout value
- -i <time> Set the If-Modified-Since header on the request
- -c <conttype> use this content-type for POST, PUT, CHECKIN
- -a Use text mode for content I/O
- -p <proxyurl> use this as a proxy
- -P don't load proxy settings from environment
- -H <header> send this HTTP header (you can specify several)
-
- -u Display method and URL before any response
- -U Display request headers (implies -u)
- -s Display response status code
- -S Display response status chain
- -e Display response headers
- -d Do not display content
- -o <format> Process HTML content in various ways
-
- -v Show program version
- -h Print this message
-
- -x Extra debugging output
- EOT
- }
-