Add built local::lib
[catagits/Gitalist.git] / local-lib5 / bin / lwp-request
CommitLineData
3fea05b9 1#!/usr/bin/perl -w
2
3eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
4 if 0; # not running under some shell
5
6# Simple user agent using LWP library.
7
8=head1 NAME
9
10lwp-request, GET, POST, HEAD - Simple command line user agent
11
12=head1 SYNOPSIS
13
14B<lwp-request> [B<-afPuUsSedvhx>] [B<-m> I<method>] [B<-b> I<base URL>] [B<-t> I<timeout>]
15 [B<-i> I<if-modified-since>] [B<-c> I<content-type>]
16 [B<-C> I<credentials>] [B<-p> I<proxy-url>] [B<-o> I<format>] I<url>...
17
18=head1 DESCRIPTION
19
20This program can be used to send requests to WWW servers and your
21local file system. The request content for POST and PUT
22methods is read from stdin. The content of the response is printed on
23stdout. Error messages are printed on stderr. The program returns a
24status value indicating the number of URLs that failed.
25
26The options are:
27
28=over 4
29
30=item -m <method>
31
32Set which method to use for the request. If this option is not used,
33then the method is derived from the name of the program.
34
35=item -f
36
37Force request through, even if the program believes that the method is
38illegal. The server might reject the request eventually.
39
40=item -b <uri>
41
42This URI will be used as the base URI for resolving all relative URIs
43given as argument.
44
45=item -t <timeout>
46
47Set the timeout value for the requests. The timeout is the amount of
48time that the program will wait for a response from the remote server
49before it fails. The default unit for the timeout value is seconds.
50You might append "m" or "h" to the timeout value to make it minutes or
51hours, respectively. The default timeout is '3m', i.e. 3 minutes.
52
53=item -i <time>
54
55Set the If-Modified-Since header in the request. If I<time> is the
56name of a file, use the modification timestamp for this file. If
57I<time> is not a file, it is parsed as a literal date. Take a look at
58L<HTTP::Date> for recognized formats.
59
60=item -c <content-type>
61
62Set the Content-Type for the request. This option is only allowed for
63requests that take a content, i.e. POST and PUT. You can
64force methods to take content by using the C<-f> option together with
65C<-c>. The default Content-Type for POST is
66C<application/x-www-form-urlencoded>. The default Content-type for
67the others is C<text/plain>.
68
69=item -p <proxy-url>
70
71Set the proxy to be used for the requests. The program also loads
72proxy settings from the environment. You can disable this with the
73C<-P> option.
74
75=item -P
76
77Don't load proxy settings from environment.
78
79=item -H <header>
80
81Send this HTTP header with each request. You can specify several, e.g.:
82
83 lwp-request \
84 -H 'Referer: http://other.url/' \
85 -H 'Host: somehost' \
86 http://this.url/
87
88=item -C <username>:<password>
89
90Provide credentials for documents that are protected by Basic
91Authentication. If the document is protected and you did not specify
92the username and password with this option, then you will be prompted
93to provide these values.
94
95=back
96
97The following options controls what is displayed by the program:
98
99=over 4
100
101=item -u
102
103Print request method and absolute URL as requests are made.
104
105=item -U
106
107Print request headers in addition to request method and absolute URL.
108
109=item -s
110
111Print response status code. This option is always on for HEAD requests.
112
113=item -S
114
115Print response status chain. This shows redirect and authorization
116requests that are handled by the library.
117
118=item -e
119
120Print response headers. This option is always on for HEAD requests.
121
122=item -d
123
124Do B<not> print the content of the response.
125
126=item -o <format>
127
128Process HTML content in various ways before printing it. If the
129content type of the response is not HTML, then this option has no
130effect. The legal format values are; I<text>, I<ps>, I<links>,
131I<html> and I<dump>.
132
133If you specify the I<text> format then the HTML will be formatted as
134plain latin1 text. If you specify the I<ps> format then it will be
135formatted as Postscript.
136
137The I<links> format will output all links found in the HTML document.
138Relative links will be expanded to absolute ones.
139
140The I<html> format will reformat the HTML code and the I<dump> format
141will just dump the HTML syntax tree.
142
143Note that the C<HTML-Tree> distribution needs to be installed for this
144option to work. In addition the C<HTML-Format> distribution needs to
145be installed for I<-o text> or I<-o ps> to work.
146
147=item -v
148
149Print the version number of the program and quit.
150
151=item -h
152
153Print usage message and quit.
154
155=item -a
156
157Set text(ascii) mode for content input and output. If this option is not
158used, content input and output is done in binary mode.
159
160=back
161
162Because this program is implemented using the LWP library, it will
163only support the protocols that LWP supports.
164
165=head1 SEE ALSO
166
167L<lwp-mirror>, L<LWP>
168
169=head1 COPYRIGHT
170
171Copyright 1995-1999 Gisle Aas.
172
173This library is free software; you can redistribute it and/or
174modify it under the same terms as Perl itself.
175
176=head1 AUTHOR
177
178Gisle Aas <gisle@aas.no>
179
180=cut
181
182$progname = $0;
183$progname =~ s,.*[\\/],,; # use basename only
184$progname =~ s/\.\w*$//; # strip extension, if any
185
186$VERSION = "5.834";
187
188
189require LWP;
190
191use URI;
192use URI::Heuristic qw(uf_uri);
193
194use HTTP::Status qw(status_message);
195use HTTP::Date qw(time2str str2time);
196
197
198# This table lists the methods that are allowed. It should really be
199# a superset for all methods supported for every scheme that may be
200# supported by the library. Currently it might be a bit too HTTP
201# specific. You might use the -f option to force a method through.
202#
203# "" = No content in request, "C" = Needs content in request
204#
205%allowed_methods = (
206 GET => "",
207 HEAD => "",
208 POST => "C",
209 PUT => "C",
210 DELETE => "",
211 TRACE => "",
212 OPTIONS => "",
213);
214
215
216# We make our own specialization of LWP::UserAgent that asks for
217# user/password if document is protected.
218{
219 package RequestAgent;
220 @ISA = qw(LWP::UserAgent);
221
222 sub new
223 {
224 my $self = LWP::UserAgent::new(@_);
225 $self->agent("lwp-request/$main::VERSION ");
226 $self;
227 }
228
229 sub get_basic_credentials
230 {
231 my($self, $realm, $uri) = @_;
232 if ($main::options{'C'}) {
233 return split(':', $main::options{'C'}, 2);
234 }
235 elsif (-t) {
236 my $netloc = $uri->host_port;
237 print STDERR "Enter username for $realm at $netloc: ";
238 my $user = <STDIN>;
239 chomp($user);
240 return (undef, undef) unless length $user;
241 print STDERR "Password: ";
242 system("stty -echo");
243 my $password = <STDIN>;
244 system("stty echo");
245 print STDERR "\n"; # because we disabled echo
246 chomp($password);
247 return ($user, $password);
248 }
249 else {
250 return (undef, undef)
251 }
252 }
253}
254
255$method = uc(lc($progname) eq "lwp-request" ? "GET" : $progname);
256
257# Parse command line
258use Getopt::Long;
259
260my @getopt_args = (
261 'a', # content i/o in text(ascii) mode
262 'm=s', # set method
263 'f', # make request even if method is not in %allowed_methods
264 'b=s', # base url
265 't=s', # timeout
266 'i=s', # if-modified-since
267 'c=s', # content type for POST
268 'C=s', # credentials for basic authorization
269 'H=s@', # extra headers, form "Header: value string"
270 #
271 'u', # display method, URL and headers of request
272 'U', # display request headers also
273 's', # display status code
274 'S', # display whole chain of status codes
275 'e', # display response headers (default for HEAD)
276 'd', # don't display content
277 #
278 'h', # print usage
279 'v', # print version
280 #
281 'p=s', # proxy URL
282 'P', # don't load proxy setting from environment
283 #
284 'o=s', # output format
285);
286
287Getopt::Long::config("noignorecase", "bundling");
288unless (GetOptions(\%options, @getopt_args)) {
289 usage();
290}
291if ($options{'v'}) {
292 require LWP;
293 my $DISTNAME = 'libwww-perl-' . LWP::Version();
294 die <<"EOT";
295This is lwp-request version $VERSION ($DISTNAME)
296
297Copyright 1995-1999, Gisle Aas.
298
299This program is free software; you can redistribute it and/or
300modify it under the same terms as Perl itself.
301EOT
302}
303
304usage() if $options{'h'} || !@ARGV;
305
306# Create the user agent object
307$ua = RequestAgent->new;
308
309# Load proxy settings from *_proxy environment variables.
310$ua->env_proxy unless $options{'P'};
311
312$method = uc($options{'m'}) if defined $options{'m'};
313
314if ($options{'f'}) {
315 if ($options{'c'}) {
316 $allowed_methods{$method} = "C"; # force content
317 }
318 else {
319 $allowed_methods{$method} = "";
320 }
321}
322elsif (!defined $allowed_methods{$method}) {
323 die "$progname: $method is not an allowed method\n";
324}
325
326if ($method eq "HEAD") {
327 $options{'s'} = 1;
328 $options{'e'} = 1 unless $options{'d'};
329 $options{'d'} = 1;
330}
331
332if (defined $options{'t'}) {
333 $options{'t'} =~ /^(\d+)([smh])?/;
334 die "$progname: Illegal timeout value!\n" unless defined $1;
335 $timeout = $1;
336 if (defined $2) {
337 $timeout *= 60 if $2 eq "m";
338 $timeout *= 3600 if $2 eq "h";
339 }
340 $ua->timeout($timeout);
341}
342
343if (defined $options{'i'}) {
344 if (-e $options{'i'}) {
345 $time = (stat _)[9];
346 }
347 else {
348 $time = str2time($options{'i'});
349 die "$progname: Illegal time syntax for -i option\n"
350 unless defined $time;
351 }
352 $options{'i'} = time2str($time);
353}
354
355$content = undef;
356if ($allowed_methods{$method} eq "C") {
357 # This request needs some content
358 unless (defined $options{'c'}) {
359 # set default content type
360 $options{'c'} = ($method eq "POST") ?
361 "application/x-www-form-urlencoded"
362 : "text/plain";
363 }
364 else {
365 die "$progname: Illegal Content-type format\n"
366 unless $options{'c'} =~ m,^[\w\-]+/[\w\-.+]+(?:\s*;.*)?$,
367 }
368 print STDERR "Please enter content ($options{'c'}) to be ${method}ed:\n"
369 if -t;
370 binmode STDIN unless -t or $options{'a'};
371 $content = join("", <STDIN>);
372}
373else {
374 die "$progname: Can't set Content-type for $method requests\n"
375 if defined $options{'c'};
376}
377
378# Set up a request. We will use the same request object for all URLs.
379$request = HTTP::Request->new($method);
380$request->header('If-Modified-Since', $options{'i'}) if defined $options{'i'};
381for my $user_header (@{ $options{'H'} || [] }) {
382 my ($header_name, $header_value) = split /:\s*/, $user_header, 2;
383 $request->header($header_name, $header_value);
384 $ua->agent($header_value) if lc($header_name) eq "user-agent"; # Ugh!
385}
386#$request->header('Accept', '*/*');
387if ($options{'c'}) { # will always be set for request that wants content
388 $request->header('Content-Type', $options{'c'});
389 $request->header('Content-Length', length $content); # Not really needed
390 $request->content($content);
391}
392
393$errors = 0;
394
395# Ok, now we perform the requests, one URL at a time
396while ($url = shift) {
397 # Create the URL object, but protect us against bad URLs
398 eval {
399 if ($url =~ /^\w+:/ || $options{'b'}) { # is there any scheme specification
400 $url = URI->new($url, $options{'b'});
401 $url = $url->abs($options{'b'}) if $options{'b'};
402 }
403 else {
404 $url = uf_uri($url);
405 }
406 };
407 if ($@) {
408 $@ =~ s/ at .* line \d+.*//;
409 print STDERR $@;
410 $errors++;
411 next;
412 }
413
414 $ua->proxy($url->scheme, $options{'p'}) if $options{'p'};
415
416 # Send the request and get a response back from the server
417 $request->uri($url);
418 $response = $ua->request($request);
419
420 if ($options{'u'} || $options{'U'}) {
421 my $url = $response->request->uri->as_string;
422 print "$method $url\n";
423 print $response->request->headers_as_string, "\n" if $options{'U'};
424 }
425
426 if ($options{'S'}) {
427 for my $r ($response->redirects, $response) {
428 my $method = $r->request->method;
429 my $url = $r->request->uri->as_string;
430 print "$method $url --> ", $r->status_line, "\n";
431 }
432 }
433 elsif ($options{'s'}) {
434 print $response->status_line, "\n";
435 }
436
437 if ($options{'e'}) {
438 # Display headers
439 print $response->headers_as_string;
440 print "\n"; # separate headers and content
441 }
442
443 unless ($options{'d'}) {
444 if ($options{'o'} &&
445 $response->content_type eq 'text/html') {
446 eval {
447 require HTML::Parse;
448 };
449 if ($@) {
450 if ($@ =~ m,^Can't locate HTML/Parse.pm in \@INC,) {
451 die "The HTML-Tree distribution need to be installed for the -o option to be used.\n";
452 }
453 else {
454 die $@;
455 }
456 }
457 my $html = HTML::Parse::parse_html($response->content);
458 {
459 $options{'o'} eq 'ps' && do {
460 require HTML::FormatPS;
461 my $f = HTML::FormatPS->new;
462 print $f->format($html);
463 last;
464 };
465 $options{'o'} eq 'text' && do {
466 require HTML::FormatText;
467 my $f = HTML::FormatText->new;
468 print $f->format($html);
469 last;
470 };
471 $options{'o'} eq 'html' && do {
472 print $html->as_HTML;
473 last;
474 };
475 $options{'o'} eq 'links' && do {
476 my $base = $response->base;
477 $base = $options{'b'} if $options{'b'};
478 for ( @{ $html->extract_links } ) {
479 my($link, $elem) = @$_;
480 my $tag = uc $elem->tag;
481 $link = URI->new($link)->abs($base)->as_string;
482 print "$tag\t$link\n";
483 }
484 last;
485 };
486 $options{'o'} eq 'dump' && do {
487 $html->dump;
488 last;
489 };
490 # It is bad to not notice this before now :-(
491 die "Illegal -o option value ($options{'o'})\n";
492 }
493 }
494 else {
495 binmode STDOUT unless $options{'a'};
496 print $response->content;
497 }
498 }
499
500 $errors++ unless $response->is_success;
501}
502
503exit $errors;
504
505
506sub usage
507{
508 die <<"EOT";
509Usage: $progname [-options] <url>...
510 -m <method> use method for the request (default is '$method')
511 -f make request even if $progname believes method is illegal
512 -b <base> Use the specified URL as base
513 -t <timeout> Set timeout value
514 -i <time> Set the If-Modified-Since header on the request
515 -c <conttype> use this content-type for POST, PUT, CHECKIN
516 -a Use text mode for content I/O
517 -p <proxyurl> use this as a proxy
518 -P don't load proxy settings from environment
519 -H <header> send this HTTP header (you can specify several)
520 -C <username>:<password>
521 provide credentials for basic authentication
522
523 -u Display method and URL before any response
524 -U Display request headers (implies -u)
525 -s Display response status code
526 -S Display response status chain
527 -e Display response headers
528 -d Do not display content
529 -o <format> Process HTML content in various ways
530
531 -v Show program version
532 -h Print this message
533EOT
534}