Commit | Line | Data |
3fea05b9 |
1 | #!/usr/bin/perl -w |
2 | |
3 | eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}' |
4 | if 0; # not running under some shell |
5 | |
6 | # Simple user agent using LWP library. |
7 | |
8 | =head1 NAME |
9 | |
10 | lwp-request, GET, POST, HEAD - Simple command line user agent |
11 | |
12 | =head1 SYNOPSIS |
13 | |
14 | B<lwp-request> [B<-afPuUsSedvhx>] [B<-m> I<method>] [B<-b> I<base URL>] [B<-t> I<timeout>] |
15 | [B<-i> I<if-modified-since>] [B<-c> I<content-type>] |
16 | [B<-C> I<credentials>] [B<-p> I<proxy-url>] [B<-o> I<format>] I<url>... |
17 | |
18 | =head1 DESCRIPTION |
19 | |
20 | This program can be used to send requests to WWW servers and your |
21 | local file system. The request content for POST and PUT |
22 | methods is read from stdin. The content of the response is printed on |
23 | stdout. Error messages are printed on stderr. The program returns a |
24 | status value indicating the number of URLs that failed. |
25 | |
26 | The options are: |
27 | |
28 | =over 4 |
29 | |
30 | =item -m <method> |
31 | |
32 | Set which method to use for the request. If this option is not used, |
33 | then the method is derived from the name of the program. |
34 | |
35 | =item -f |
36 | |
37 | Force request through, even if the program believes that the method is |
38 | illegal. The server might reject the request eventually. |
39 | |
40 | =item -b <uri> |
41 | |
42 | This URI will be used as the base URI for resolving all relative URIs |
43 | given as argument. |
44 | |
45 | =item -t <timeout> |
46 | |
47 | Set the timeout value for the requests. The timeout is the amount of |
48 | time that the program will wait for a response from the remote server |
49 | before it fails. The default unit for the timeout value is seconds. |
50 | You might append "m" or "h" to the timeout value to make it minutes or |
51 | hours, respectively. The default timeout is '3m', i.e. 3 minutes. |
52 | |
53 | =item -i <time> |
54 | |
55 | Set the If-Modified-Since header in the request. If I<time> is the |
56 | name of a file, use the modification timestamp for this file. If |
57 | I<time> is not a file, it is parsed as a literal date. Take a look at |
58 | L<HTTP::Date> for recognized formats. |
59 | |
60 | =item -c <content-type> |
61 | |
62 | Set the Content-Type for the request. This option is only allowed for |
63 | requests that take a content, i.e. POST and PUT. You can |
64 | force methods to take content by using the C<-f> option together with |
65 | C<-c>. The default Content-Type for POST is |
66 | C<application/x-www-form-urlencoded>. The default Content-type for |
67 | the others is C<text/plain>. |
68 | |
69 | =item -p <proxy-url> |
70 | |
71 | Set the proxy to be used for the requests. The program also loads |
72 | proxy settings from the environment. You can disable this with the |
73 | C<-P> option. |
74 | |
75 | =item -P |
76 | |
77 | Don't load proxy settings from environment. |
78 | |
79 | =item -H <header> |
80 | |
81 | Send this HTTP header with each request. You can specify several, e.g.: |
82 | |
83 | lwp-request \ |
84 | -H 'Referer: http://other.url/' \ |
85 | -H 'Host: somehost' \ |
86 | http://this.url/ |
87 | |
88 | =item -C <username>:<password> |
89 | |
90 | Provide credentials for documents that are protected by Basic |
91 | Authentication. If the document is protected and you did not specify |
92 | the username and password with this option, then you will be prompted |
93 | to provide these values. |
94 | |
95 | =back |
96 | |
97 | The following options controls what is displayed by the program: |
98 | |
99 | =over 4 |
100 | |
101 | =item -u |
102 | |
103 | Print request method and absolute URL as requests are made. |
104 | |
105 | =item -U |
106 | |
107 | Print request headers in addition to request method and absolute URL. |
108 | |
109 | =item -s |
110 | |
111 | Print response status code. This option is always on for HEAD requests. |
112 | |
113 | =item -S |
114 | |
115 | Print response status chain. This shows redirect and authorization |
116 | requests that are handled by the library. |
117 | |
118 | =item -e |
119 | |
120 | Print response headers. This option is always on for HEAD requests. |
121 | |
122 | =item -d |
123 | |
124 | Do B<not> print the content of the response. |
125 | |
126 | =item -o <format> |
127 | |
128 | Process HTML content in various ways before printing it. If the |
129 | content type of the response is not HTML, then this option has no |
130 | effect. The legal format values are; I<text>, I<ps>, I<links>, |
131 | I<html> and I<dump>. |
132 | |
133 | If you specify the I<text> format then the HTML will be formatted as |
134 | plain latin1 text. If you specify the I<ps> format then it will be |
135 | formatted as Postscript. |
136 | |
137 | The I<links> format will output all links found in the HTML document. |
138 | Relative links will be expanded to absolute ones. |
139 | |
140 | The I<html> format will reformat the HTML code and the I<dump> format |
141 | will just dump the HTML syntax tree. |
142 | |
143 | Note that the C<HTML-Tree> distribution needs to be installed for this |
144 | option to work. In addition the C<HTML-Format> distribution needs to |
145 | be installed for I<-o text> or I<-o ps> to work. |
146 | |
147 | =item -v |
148 | |
149 | Print the version number of the program and quit. |
150 | |
151 | =item -h |
152 | |
153 | Print usage message and quit. |
154 | |
155 | =item -a |
156 | |
157 | Set text(ascii) mode for content input and output. If this option is not |
158 | used, content input and output is done in binary mode. |
159 | |
160 | =back |
161 | |
162 | Because this program is implemented using the LWP library, it will |
163 | only support the protocols that LWP supports. |
164 | |
165 | =head1 SEE ALSO |
166 | |
167 | L<lwp-mirror>, L<LWP> |
168 | |
169 | =head1 COPYRIGHT |
170 | |
171 | Copyright 1995-1999 Gisle Aas. |
172 | |
173 | This library is free software; you can redistribute it and/or |
174 | modify it under the same terms as Perl itself. |
175 | |
176 | =head1 AUTHOR |
177 | |
178 | Gisle Aas <gisle@aas.no> |
179 | |
180 | =cut |
181 | |
182 | $progname = $0; |
183 | $progname =~ s,.*[\\/],,; # use basename only |
184 | $progname =~ s/\.\w*$//; # strip extension, if any |
185 | |
186 | $VERSION = "5.834"; |
187 | |
188 | |
189 | require LWP; |
190 | |
191 | use URI; |
192 | use URI::Heuristic qw(uf_uri); |
193 | |
194 | use HTTP::Status qw(status_message); |
195 | use HTTP::Date qw(time2str str2time); |
196 | |
197 | |
198 | # This table lists the methods that are allowed. It should really be |
199 | # a superset for all methods supported for every scheme that may be |
200 | # supported by the library. Currently it might be a bit too HTTP |
201 | # specific. You might use the -f option to force a method through. |
202 | # |
203 | # "" = No content in request, "C" = Needs content in request |
204 | # |
205 | %allowed_methods = ( |
206 | GET => "", |
207 | HEAD => "", |
208 | POST => "C", |
209 | PUT => "C", |
210 | DELETE => "", |
211 | TRACE => "", |
212 | OPTIONS => "", |
213 | ); |
214 | |
215 | |
216 | # We make our own specialization of LWP::UserAgent that asks for |
217 | # user/password if document is protected. |
218 | { |
219 | package RequestAgent; |
220 | @ISA = qw(LWP::UserAgent); |
221 | |
222 | sub new |
223 | { |
224 | my $self = LWP::UserAgent::new(@_); |
225 | $self->agent("lwp-request/$main::VERSION "); |
226 | $self; |
227 | } |
228 | |
229 | sub get_basic_credentials |
230 | { |
231 | my($self, $realm, $uri) = @_; |
232 | if ($main::options{'C'}) { |
233 | return split(':', $main::options{'C'}, 2); |
234 | } |
235 | elsif (-t) { |
236 | my $netloc = $uri->host_port; |
237 | print STDERR "Enter username for $realm at $netloc: "; |
238 | my $user = <STDIN>; |
239 | chomp($user); |
240 | return (undef, undef) unless length $user; |
241 | print STDERR "Password: "; |
242 | system("stty -echo"); |
243 | my $password = <STDIN>; |
244 | system("stty echo"); |
245 | print STDERR "\n"; # because we disabled echo |
246 | chomp($password); |
247 | return ($user, $password); |
248 | } |
249 | else { |
250 | return (undef, undef) |
251 | } |
252 | } |
253 | } |
254 | |
255 | $method = uc(lc($progname) eq "lwp-request" ? "GET" : $progname); |
256 | |
257 | # Parse command line |
258 | use Getopt::Long; |
259 | |
260 | my @getopt_args = ( |
261 | 'a', # content i/o in text(ascii) mode |
262 | 'm=s', # set method |
263 | 'f', # make request even if method is not in %allowed_methods |
264 | 'b=s', # base url |
265 | 't=s', # timeout |
266 | 'i=s', # if-modified-since |
267 | 'c=s', # content type for POST |
268 | 'C=s', # credentials for basic authorization |
269 | 'H=s@', # extra headers, form "Header: value string" |
270 | # |
271 | 'u', # display method, URL and headers of request |
272 | 'U', # display request headers also |
273 | 's', # display status code |
274 | 'S', # display whole chain of status codes |
275 | 'e', # display response headers (default for HEAD) |
276 | 'd', # don't display content |
277 | # |
278 | 'h', # print usage |
279 | 'v', # print version |
280 | # |
281 | 'p=s', # proxy URL |
282 | 'P', # don't load proxy setting from environment |
283 | # |
284 | 'o=s', # output format |
285 | ); |
286 | |
287 | Getopt::Long::config("noignorecase", "bundling"); |
288 | unless (GetOptions(\%options, @getopt_args)) { |
289 | usage(); |
290 | } |
291 | if ($options{'v'}) { |
292 | require LWP; |
293 | my $DISTNAME = 'libwww-perl-' . LWP::Version(); |
294 | die <<"EOT"; |
295 | This is lwp-request version $VERSION ($DISTNAME) |
296 | |
297 | Copyright 1995-1999, Gisle Aas. |
298 | |
299 | This program is free software; you can redistribute it and/or |
300 | modify it under the same terms as Perl itself. |
301 | EOT |
302 | } |
303 | |
304 | usage() if $options{'h'} || !@ARGV; |
305 | |
306 | # Create the user agent object |
307 | $ua = RequestAgent->new; |
308 | |
309 | # Load proxy settings from *_proxy environment variables. |
310 | $ua->env_proxy unless $options{'P'}; |
311 | |
312 | $method = uc($options{'m'}) if defined $options{'m'}; |
313 | |
314 | if ($options{'f'}) { |
315 | if ($options{'c'}) { |
316 | $allowed_methods{$method} = "C"; # force content |
317 | } |
318 | else { |
319 | $allowed_methods{$method} = ""; |
320 | } |
321 | } |
322 | elsif (!defined $allowed_methods{$method}) { |
323 | die "$progname: $method is not an allowed method\n"; |
324 | } |
325 | |
326 | if ($method eq "HEAD") { |
327 | $options{'s'} = 1; |
328 | $options{'e'} = 1 unless $options{'d'}; |
329 | $options{'d'} = 1; |
330 | } |
331 | |
332 | if (defined $options{'t'}) { |
333 | $options{'t'} =~ /^(\d+)([smh])?/; |
334 | die "$progname: Illegal timeout value!\n" unless defined $1; |
335 | $timeout = $1; |
336 | if (defined $2) { |
337 | $timeout *= 60 if $2 eq "m"; |
338 | $timeout *= 3600 if $2 eq "h"; |
339 | } |
340 | $ua->timeout($timeout); |
341 | } |
342 | |
343 | if (defined $options{'i'}) { |
344 | if (-e $options{'i'}) { |
345 | $time = (stat _)[9]; |
346 | } |
347 | else { |
348 | $time = str2time($options{'i'}); |
349 | die "$progname: Illegal time syntax for -i option\n" |
350 | unless defined $time; |
351 | } |
352 | $options{'i'} = time2str($time); |
353 | } |
354 | |
355 | $content = undef; |
356 | if ($allowed_methods{$method} eq "C") { |
357 | # This request needs some content |
358 | unless (defined $options{'c'}) { |
359 | # set default content type |
360 | $options{'c'} = ($method eq "POST") ? |
361 | "application/x-www-form-urlencoded" |
362 | : "text/plain"; |
363 | } |
364 | else { |
365 | die "$progname: Illegal Content-type format\n" |
366 | unless $options{'c'} =~ m,^[\w\-]+/[\w\-.+]+(?:\s*;.*)?$, |
367 | } |
368 | print STDERR "Please enter content ($options{'c'}) to be ${method}ed:\n" |
369 | if -t; |
370 | binmode STDIN unless -t or $options{'a'}; |
371 | $content = join("", <STDIN>); |
372 | } |
373 | else { |
374 | die "$progname: Can't set Content-type for $method requests\n" |
375 | if defined $options{'c'}; |
376 | } |
377 | |
378 | # Set up a request. We will use the same request object for all URLs. |
379 | $request = HTTP::Request->new($method); |
380 | $request->header('If-Modified-Since', $options{'i'}) if defined $options{'i'}; |
381 | for my $user_header (@{ $options{'H'} || [] }) { |
382 | my ($header_name, $header_value) = split /:\s*/, $user_header, 2; |
383 | $request->header($header_name, $header_value); |
384 | $ua->agent($header_value) if lc($header_name) eq "user-agent"; # Ugh! |
385 | } |
386 | #$request->header('Accept', '*/*'); |
387 | if ($options{'c'}) { # will always be set for request that wants content |
388 | $request->header('Content-Type', $options{'c'}); |
389 | $request->header('Content-Length', length $content); # Not really needed |
390 | $request->content($content); |
391 | } |
392 | |
393 | $errors = 0; |
394 | |
395 | # Ok, now we perform the requests, one URL at a time |
396 | while ($url = shift) { |
397 | # Create the URL object, but protect us against bad URLs |
398 | eval { |
399 | if ($url =~ /^\w+:/ || $options{'b'}) { # is there any scheme specification |
400 | $url = URI->new($url, $options{'b'}); |
401 | $url = $url->abs($options{'b'}) if $options{'b'}; |
402 | } |
403 | else { |
404 | $url = uf_uri($url); |
405 | } |
406 | }; |
407 | if ($@) { |
408 | $@ =~ s/ at .* line \d+.*//; |
409 | print STDERR $@; |
410 | $errors++; |
411 | next; |
412 | } |
413 | |
414 | $ua->proxy($url->scheme, $options{'p'}) if $options{'p'}; |
415 | |
416 | # Send the request and get a response back from the server |
417 | $request->uri($url); |
418 | $response = $ua->request($request); |
419 | |
420 | if ($options{'u'} || $options{'U'}) { |
421 | my $url = $response->request->uri->as_string; |
422 | print "$method $url\n"; |
423 | print $response->request->headers_as_string, "\n" if $options{'U'}; |
424 | } |
425 | |
426 | if ($options{'S'}) { |
427 | for my $r ($response->redirects, $response) { |
428 | my $method = $r->request->method; |
429 | my $url = $r->request->uri->as_string; |
430 | print "$method $url --> ", $r->status_line, "\n"; |
431 | } |
432 | } |
433 | elsif ($options{'s'}) { |
434 | print $response->status_line, "\n"; |
435 | } |
436 | |
437 | if ($options{'e'}) { |
438 | # Display headers |
439 | print $response->headers_as_string; |
440 | print "\n"; # separate headers and content |
441 | } |
442 | |
443 | unless ($options{'d'}) { |
444 | if ($options{'o'} && |
445 | $response->content_type eq 'text/html') { |
446 | eval { |
447 | require HTML::Parse; |
448 | }; |
449 | if ($@) { |
450 | if ($@ =~ m,^Can't locate HTML/Parse.pm in \@INC,) { |
451 | die "The HTML-Tree distribution need to be installed for the -o option to be used.\n"; |
452 | } |
453 | else { |
454 | die $@; |
455 | } |
456 | } |
457 | my $html = HTML::Parse::parse_html($response->content); |
458 | { |
459 | $options{'o'} eq 'ps' && do { |
460 | require HTML::FormatPS; |
461 | my $f = HTML::FormatPS->new; |
462 | print $f->format($html); |
463 | last; |
464 | }; |
465 | $options{'o'} eq 'text' && do { |
466 | require HTML::FormatText; |
467 | my $f = HTML::FormatText->new; |
468 | print $f->format($html); |
469 | last; |
470 | }; |
471 | $options{'o'} eq 'html' && do { |
472 | print $html->as_HTML; |
473 | last; |
474 | }; |
475 | $options{'o'} eq 'links' && do { |
476 | my $base = $response->base; |
477 | $base = $options{'b'} if $options{'b'}; |
478 | for ( @{ $html->extract_links } ) { |
479 | my($link, $elem) = @$_; |
480 | my $tag = uc $elem->tag; |
481 | $link = URI->new($link)->abs($base)->as_string; |
482 | print "$tag\t$link\n"; |
483 | } |
484 | last; |
485 | }; |
486 | $options{'o'} eq 'dump' && do { |
487 | $html->dump; |
488 | last; |
489 | }; |
490 | # It is bad to not notice this before now :-( |
491 | die "Illegal -o option value ($options{'o'})\n"; |
492 | } |
493 | } |
494 | else { |
495 | binmode STDOUT unless $options{'a'}; |
496 | print $response->content; |
497 | } |
498 | } |
499 | |
500 | $errors++ unless $response->is_success; |
501 | } |
502 | |
503 | exit $errors; |
504 | |
505 | |
506 | sub usage |
507 | { |
508 | die <<"EOT"; |
509 | Usage: $progname [-options] <url>... |
510 | -m <method> use method for the request (default is '$method') |
511 | -f make request even if $progname believes method is illegal |
512 | -b <base> Use the specified URL as base |
513 | -t <timeout> Set timeout value |
514 | -i <time> Set the If-Modified-Since header on the request |
515 | -c <conttype> use this content-type for POST, PUT, CHECKIN |
516 | -a Use text mode for content I/O |
517 | -p <proxyurl> use this as a proxy |
518 | -P don't load proxy settings from environment |
519 | -H <header> send this HTTP header (you can specify several) |
520 | -C <username>:<password> |
521 | provide credentials for basic authentication |
522 | |
523 | -u Display method and URL before any response |
524 | -U Display request headers (implies -u) |
525 | -s Display response status code |
526 | -S Display response status chain |
527 | -e Display response headers |
528 | -d Do not display content |
529 | -o <format> Process HTML content in various ways |
530 | |
531 | -v Show program version |
532 | -h Print this message |
533 | EOT |
534 | } |