[win32] tweaks to win32 makefiles. This version builds and passes all
[p5sagit/p5-mst-13.2.git] / win32 / bin / webget.pl
CommitLineData
d444a431 1#!/usr/local/bin/perl -w
2
3#-
4#!/usr/local/bin/perl -w
5$version = "951121.18";
6$comments = 'jfriedl@omron.co.jp';
7
8##
9## This is "webget"
10##
11## Jeffrey Friedl (jfriedl@omron.co.jp), July 1994.
12## Copyright 19.... ah hell, just take it.
13## Should work with either perl4 or perl5
14##
15## BLURB:
16## Given a URL on the command line (HTTP and FTP supported at the moment),
17## webget fetches the named object (HTML text, images, audio, whatever the
18## object happens to be). Will automatically use a proxy if one is defined
19## in the environment, follow "this URL has moved" responses, and retry
20## "can't find host" responses from a proxy in case host lookup was slow).
21## Supports users & passwords (FTP), Basic Authorization (HTTP), update-if-
22## modified (HTTP), and much more. Works with perl4 or perl5.
23
24##
25## More-detailed instructions in the comment block below the history list.
26##
27
28##
29## To-do:
30## Add gopher support.
31## Fix up how error messages are passed among this and the libraries.
32##
33
34## 951219.19
35## Lost ftp connections now die with a bit more grace.
36##
37## 951121.18
38## Add -nnab.
39## Brought the "usage" string in line with reality.
40##
41## 951114.17
42## Added -head.
43## Added -update/-refresh/-IfNewerThan. If any URL was not pulled
44## because it was not out of date, an exit value of 2 is returned.
45##
46## 951031.16
47## Added -timeout. Cleaned up (a bit) the exit value. Now exits
48## with 1 if all URLs had some error (timeout exits immediately with
49## code 3, though. This is subject to change). Exits with 0 if any
50## URL was brought over safely.
51##
52## 951017.15
53## Neat -pf, -postfile idea from Lorrie Cranor
54## (http://www.ccrc.wustl.edu/~lorracks/)
55##
56## 950912.14
57## Sigh, fixed a typo.
58##
59## 950911.13
60## Added Basic Authorization support for http. See "PASSWORDS AND STUFF"
61## in the documentation.
62##
63## 950911.12
64## Implemented a most-excellent suggestion by Anthony D'Atri
65## (aad@nwnet.net), to be able to automatically grab to a local file of
66## the same name as the URL. See the '-nab' flag.
67##
68## 950706.11
69## Quelled small -w warning (thanks: Lars Rasmussen <gnort@daimi.aau.dk>)
70##
71## 950630.10
72## Steve Campbell to the rescue again. FTP now works when supplied
73## with a userid & password (eg ftp://user:pass@foo.bar.com/index.txt).
74##
75## 950623.9
76## Incorporated changes from Steve Campbell (steven_campbell@uk.ibm.com)
77## so that the ftp will work when no password is required of a user.
78##
79## 950530.8
80## Minor changes:
81## Eliminate read-size warning message when size unknown.
82## Pseudo-debug/warning messages at the end of debug_read now go to
83## stderr. Some better error handling when trying to contact systems
84## that aren't really set up for ftp. Fixed a bug concerning FTP access
85## to a root directory. Added proxy documentation at head of file.
86##
87## 950426.6,7
88## Complete Overhaul:
89## Renamed from httpget. Added ftp support (very sketchy at the moment).
90## Redid to work with new 'www.pl' library; chucked 'Www.pl' library.
91## More or less new and/or improved in many ways, but probably introduced
92## a few bugs along the way.
93##
94## 941227.5
95## Added follow stuff (with -nofollow, etc.)
96## Added -updateme. Cool!
97## Some general tidying up.
98##
99## 941107.4
100## Allowed for ^M ending a header line... PCs give those kind of headers.
101##
102## 940820.3
103## First sorta'clean net release.
104##
105##
106
107##
108##>
109##
110## Fetch http and/or ftp URL(s) given on the command line and spit to
111## STDOUT.
112##
113## Options include:
114## -V, -version
115## Print version information; exit.
116##
117## -p, -post
118## If the URL looks like a reply to a form (i.e. has a '?' in it),
119## the request is POST'ed instead of GET'ed.
120##
121## -head
122## Gets the header only (for HTTP). This might include such useful
123## things as 'Last-modified' and 'Content-length' fields
124## (a lack of a 'Last-modified' might be a good indication that it's
125## a CGI).
126##
127## The "-head" option implies "-nostrip", but does *not* imply,
128## for example "-nofollow".
129##
130##
131## -pf, -postfile
132## The item after the '?' is taken as a local filename, and the contents
133## are POST'ed as with -post
134##
135## -nab, -f, -file
136## Rather than spit the URL(s) to standard output, unconditionally
137## dump to a file (or files) whose name is that as used in the URL,
138## sans path. I like '-nab', but supply '-file' as well since that's
139## what was originally suggested. Also see '-update' below for the
140## only-if-changed version.
141##
142## -nnab
143## Like -nab, but in addtion to dumping to a file, dump to stdout as well.
144## Sort of like the 'tee' command.
145##
146## -update, -refresh
147## Do the same thing as -nab, etc., but does not bother pulling the
148## URL if it older than the localfile. Only applies to HTTP.
149## Uses the HTTP "If-Modified-Since" field. If the URL was not modified
150## (and hence not changed), the return value is '2'.
151##
152## -IfNewerThan FILE
153## -int FILE
154## Only pulls URLs if they are newer than the date the local FILE was
155## last written.
156##
157## -q, -quiet
158## Suppresses all non-essential informational messages.
159##
160## -nf, -nofollow
161## Normally, a "this URL has moved" HTTP response is automatically
162## followed. Not done with -nofollow.
163##
164## -nr, -noretry
165## Normally, an HTTP proxy response of "can't find host" is retried
166## up to three times, to give the remote hostname lookup time to
167## come back with an answer. This suppresses the retries. This is the
168## same as '-retry 0'.
169##
170## -r#, -retry#, -r #, -retry #
171## Sets the number of times to retry. Default 3.
172##
173## -ns, -nostrip
174## For HTTP items (including other items going through an HTTP proxy),
175## the HTTP response header is printed rather than stripped as default.
176##
177## -np, -noproxy
178## A proxy is not used, even if defined for the protocol.
179##
180## -h, -help
181## Show a usage message and exit.
182##
183## -d, -debug
184## Show some debugging messages.
185##
186## -updateme
187## The special and rather cool flag "-updateme" will see if webget has
188## been updated since you got your version, and prepare a local
189## version of the new version for you to use. Keep updated! (although
190## you can always ask to be put on the ping list to be notified when
191## there's a new version -- see the author's perl web page).
192##
193## -timeout TIMESPAN
194## -to TIMESPAN
195## Time out if a connection can not be made within the specified time
196## period. TIMESPAN is normally in seconds, although a 'm' or 'h' may
197## be appended to indicate minutes and hours. "-to 1.5m" would timeout
198## after 90 seconds.
199##
200## (At least for now), a timeout causes immediate program death (with
201## exit value 3). For some reason, the alarm doesn't always cause a
202## waiting read or connect to abort, so I just die immediately.. /-:
203##
204## I might consider adding an "entire fetch" timeout, if someone
205## wants it.
206##
207## PASSWORDS AND SUCH
208##
209## You can use webget to do FTP fetches from non-Anonymous systems and
210## accounts. Just put the required username and password into the URL,
211## as with
212## webget 'ftp:/user:password@ftp.somesite.com/pub/pix/babe.gif
213## ^^^^^^^^^^^^^
214## Note the user:password is separated from the hostname by a '@'.
215##
216## You can use the same kind of thing with HTTP, and if so it will provide
217## what's know as Basic Authorization. This is >weak< authorization. It
218## also provides >zero< security -- I wouldn't be sending any credit-card
219## numbers this way (unless you send them 'round my way :-). It seems to
220## be used most by providers of free stuff where they want to make some
221## attempt to limit access to "known users".
222##
223## PROXY STUFF
224##
225## If you need to go through a gateway to get out to the whole internet,
226## you can use a proxy if one's been set up on the gateway. This is done
227## by setting the "http_proxy" environmental variable to point to the
228## proxy server. Other variables are used for other target protocols....
229## "gopher_proxy", "ftp_proxy", "wais_proxy", etc.
230##
231## For example, I have the following in my ".login" file (for use with csh):
232##
233## setenv http_proxy http://local.gateway.machine:8080/
234##
235## This is to indicate that any http URL should go to local.gateway.machine
236## (port 8080) via HTTP. Additionally, I have
237##
238## setenv gopher_proxy "$http_proxy"
239## setenv wais_proxy "$http_proxy"
240## setenv ftp_proxy "$http_proxy"
241##
242## This means that any gopher, wais, or ftp URL should also go to the
243## same place, also via HTTP. This allows webget to get, for example,
244## GOPHER URLs even though it doesn't support GOPHER itself. It uses HTTP
245## to talk to the proxy, which then uses GOPHER to talk to the destination.
246##
247## Finally, if there are sites inside your gateway that you would like to
248## connect to, you can list them in the "no_proxy" variable. This will allow
249## you to connect to them directly and skip going through the proxy:
250##
251## setenv no_proxy "www.this,www.that,www.other"
252##
253## I (jfriedl@omron.co.jp) have little personal experience with proxies
254## except what I deal with here at Omron, so if this is not representative
255## of your situation, please let me know.
256##
257## RETURN VALUE
258## The value returned to the system by webget is rather screwed up because
259## I didn't think about dealing with it until things were already
260## complicated. Since there can be more than one URL on the command line,
261## it's hard to decide what to return when one times out, another is fetched,
262## another doesn't need to be fetched, and a fourth isn't found.
263##
264## So, here's the current status:
265##
266## Upon any timeout (via the -timeout arg), webget immediately
267## returns 3. End of story. Otherwise....
268##
269## If any URL was fetched with a date limit (i.e. via
270## '-update/-refresh/-IfNewerThan' and was found to not have changed,
271## 2 is returned. Otherwise....
272##
273## If any URL was successfully fetched, 0 is returned. Otherwise...
274##
275## If there were any errors, 1 is returned. Otherwise...
276##
277## Must have been an info-only or do-nothing instance. 0 is returned.
278##
279## Phew. Hopefully useful to someone.
280##<
281##
282
283## Where latest version should be.
284$WEB_normal = 'http://www.wg.omron.co.jp/~jfriedl/perl/webget';
285$WEB_inlined = 'http://www.wg.omron.co.jp/~jfriedl/perl/inlined/webget';
286
287
288require 'network.pl'; ## inline if possible (directive to a tool of mine)
289require 'www.pl'; ## inline if possible (directive to a tool of mine)
290$inlined=0; ## this might be changed by a the inline thing.
291
292##
293## Exit values. All screwed up.
294##
295$EXIT_ok = 0;
296$EXIT_error = 1;
297$EXIT_notmodified = 2;
298$EXIT_timeout = 3;
299
300##
301##
302
303warn qq/WARNING:\n$0: need a newer version of "network.pl"\n/ if
304 !defined($network'version) || $network'version < "950311.5";
305warn qq/WARNING:\n$0: need a newer version of "www.pl"\n/ if
306 !defined($www'version) || $www'version < "951114.8";
307
308$WEB = $inlined ? $WEB_inlined : $WEB_normal;
309
310$debug = 0;
311$strip = 1; ## default is to strip
312$quiet = 0; ## also normally off.
313$follow = 1; ## normally, we follow "Found (302)" links
314$retry = 3; ## normally, retry proxy hostname lookups up to 3 times.
315$nab = 0; ## If true, grab to a local file of the same name.
316$refresh = 0; ## If true, use 'If-Modified-Since' with -nab get.
317$postfile = 0; ## If true, filename is given after the '?'
318$defaultdelta2print = 2048;
319$TimeoutSpan = 0; ## seconds after which we should time out.
320
321while (@ARGV && $ARGV[0] =~ m/^-/)
322{
323 $arg = shift(@ARGV);
324
325 $nab = 1, next if $arg =~ m/^-f(ile)?$/;
326 $nab = 1, next if $arg =~ m/^-nab$/;
327 $nab = 2, next if $arg =~ m/^-nnab$/;
328 $post = 1, next if $arg =~ m/^-p(ost)?$/i;
329 $post = $postfile = 1, next if $arg =~ m/^-p(ost)?f(ile)?$/i;
330 $quiet=1, next if $arg =~ m/^-q(uiet)?$/;
331 $follow = 0, next if $arg =~ m/^-no?f(ollow)?$/;
332 $strip = 0, next if $arg =~ m/^-no?s(trip)?$/;
333 $debug=1, next if $arg =~ m/^-d(ebug)?$/;
334 $noproxy=1, next if $arg =~ m/^-no?p(roxy)?$/;
335 $retry=0, next if $arg =~ m/^-no?r(etry)?$/;
336 $retry=$2, next if $arg =~ m/^-r(etry)?(\d+)$/;
337 &updateme if $arg eq '-updateme';
338 $strip = 0, $head = 1, next if $arg =~ m/^-head(er)?/;
339 $nab = $refresh = 1, next if $arg =~ m/^-(refresh|update)/;
340
341 &usage($EXIT_ok) if $arg =~ m/^-h(elp)?$/;
342 &show_version, exit($EXIT_ok) if $arg eq '-version' || $arg eq '-V';
343
344 if ($arg =~ m/^-t(ime)?o(ut)?$/i) {
345 local($num) = shift(@ARGV);
346 &usage($EXIT_error, "expecting timespan argument to $arg\n") unless
347 $num =~ m/^\d+(\d*)?[hms]?$/;
348 &timeout_arg($num);
349 next;
350 }
351
352 if ($arg =~ m/^-if?n(ewer)?t(han)?$/i) {
353 $reference_file = shift(@ARGV);
354 &usage($EXIT_error, "expecting filename arg to $arg")
355 if !defined $reference_file;
356 if (!-f $reference_file) {
357 warn qq/$0: ${arg}'s "$reference_file" not found.\n/;
358 exit($EXIT_error);
359 }
360 next;
361 }
362
363 if ($arg eq '-r' || $arg eq '-retry') {
364 local($num) = shift(@ARGV);
365 &usage($EXIT_error, "expecting numerical arg to $arg\n") unless
366 defined($num) && $num =~ m/^\d+$/;
367 $retry = $num;
368 next;
369 }
370 &usage($EXIT_error, qq/$0: unknown option "$arg"\n/);
371}
372
373if ($head && $post) {
374 warn "$0: combining -head and -post makes no sense, ignoring -post.\n";
375 $post = 0;
376 undef $postfile;
377}
378
379if ($refresh && defined($reference_file)) {
380 warn "$0: combining -update and -IfNewerThan make no sense, ignoring -IfNewerThan.\n";
381 undef $reference_file;
382}
383
384if (@ARGV == 0) {
385 warn "$0: nothing to do. Use -help for info.\n";
386 exit($EXIT_ok);
387}
388
389
390##
391## Now run through the remaining arguments (mostly URLs) and do a quick
392## check to see if they look well-formed. We won't *do* anything -- just
393## want to catch quick errors before really starting the work.
394##
395@tmp = @ARGV;
396$errors = 0;
397while (@tmp) {
398 $arg = shift(@tmp);
399 if ($arg =~ m/^-t(ime)?o(ut)?$/) {
400 local($num) = shift(@tmp);
401 if ($num !~ m/^\d+(\d*)?[hms]?$/) {
402 &warn("expecting timespan argument to $arg\n");
403 $errors++;
404 }
405 } else {
406 local($protocol) = &www'grok_URL($arg, $noproxy);
407
408 if (!defined $protocol) {
409 warn qq/can't grok "$arg"/;
410 $errors++;
411 } elsif (!$quiet && ($protocol eq 'ftp')) {
412 warn qq/warning: -head ignored for ftp URLs\n/ if $head;
413 warn qq/warning: -refresh ignored for ftp URLs\n/if $refresh;
414 warn qq/warning: -IfNewerThan ignored for ftp URLs\n/if defined($reference_file);
415
416 }
417 }
418}
419
420exit($EXIT_error) if $errors;
421
422
423$SuccessfulCount = 0;
424$NotModifiedCount = 0;
425
426##
427## Now do the real thing.
428##
429while (@ARGV) {
430 $arg = shift(@ARGV);
431 if ($arg =~ m/^-t(ime)?o(ut)?$/) {
432 &timeout_arg(shift(@ARGV));
433 } else {
434 &fetch_url($arg);
435 }
436}
437
438if ($NotModifiedCount) {
439 exit($EXIT_notmodified);
440} elsif ($SuccessfulCount) {
441 exit($EXIT_ok);
442} else {
443 exit($EXIT_error);
444}
445
446###########################################################################
447###########################################################################
448
449sub timeout_arg
450{
451 ($TimeoutSpan) = @_;
452 $TimeoutSpan =~ s/s//;
453 $TimeoutSpan *= 60 if $TimeoutSpan =~ m/m/;
454 $TimeoutSpan *= 3600 if $TimeoutSpan =~ m/h/;
455
456}
457
458##
459## As a byproduct, returns the basename of $0.
460##
461sub show_version
462{
463 local($base) = $0;
464 $base =~ s,.*/,,;
465 print STDERR "This is $base version $version\n";
466 $base;
467}
468
469##
470## &usage(exitval, message);
471##
472## Prints a usage message to STDERR.
473## If MESSAGE is defined, prints that first.
474## If exitval is defined, exits with that value. Otherwise, returns.
475##
476sub usage
477{
478 local($exit, $message) = @_;
479
480 print STDERR $message if defined $message;
481 local($base) = &show_version;
482 print STDERR <<INLINE_LITERAL_TEXT;
483usage: $0 [options] URL ...
484 Fetches and displays the named URL(s). Supports http and ftp.
485 (if no protocol is given, a leading "http://" is normally used).
486
487Options are from among:
488 -V, -version Print version information; exit.
489 -p, -post If URL looks like a form reply, does POST instead of GET.
490 -pf, -postfile Like -post, but takes everything after ? to be a filename.
491 -q, -quiet All non-essential informational messages are suppressed.
492 -nf, -nofollow Don't follow "this document has moved" replies.
493 -nr, -noretry Doesn't retry a failed hostname lookup (same as -retry 0)
494 -r #, -retry # Sets failed-hostname-lookup-retry to # (default $retry)
495 -np, -noproxy Uses no proxy, even if one defined for the protocol.
496 -ns, -nostrip The HTTP header, normally elided, is printed.
497 -head gets item header only (implies -ns)
498 -nab, -file Dumps output to file whose name taken from URL, minus path
499 -nnab Like -nab, but *also* dumps to stdout.
500 -update HTTP only. Like -nab, but only if the page has been modified.
501 -h, -help Prints this message.
502 -IfNewerThan F HTTP only. Only brings page if it is newer than named file.
503 -timeout T Fail if a connection can't be made in the specified time.
504
505 -updateme Pull the latest version of $base from
506 $WEB
507 and reports if it is newer than your current version.
508
509Comments to $comments.
510INLINE_LITERAL_TEXT
511
512 exit($exit) if defined $exit;
513}
514
515##
516## Pull the latest version of this program to a local file.
517## Clip the first couple lines from this executing file so that we
518## preserve the local invocation style.
519##
520sub updateme
521{
522 ##
523 ## Open a temp file to hold the new version,
524 ## redirecting STDOUT to it.
525 ##
526 open(STDOUT, '>'.($tempFile="/tmp/webget.new")) ||
527 open(STDOUT, '>'.($tempFile="/usr/tmp/webget.new")) ||
528 open(STDOUT, '>'.($tempFile="/webget.new")) ||
529 open(STDOUT, '>'.($tempFile="webget.new")) ||
530 die "$0: can't open a temp file.\n";
531
532 ##
533 ## See if we can figure out how we were called.
534 ## The seek will rewind not to the start of the data, but to the
535 ## start of the whole program script.
536 ##
537 ## Keep the first line if it begins with #!, and the next two if they
538 ## look like the trick mentioned in the perl man page for getting
539 ## around the lack of #!-support.
540 ##
541 if (seek(DATA, 0, 0)) { ##
542 $_ = <DATA>; if (m/^#!/) { print STDOUT;
543 $_ = <DATA>; if (m/^\s*eval/) { print STDOUT;
544 $_ = <DATA>; if (m/^\s*if/) { print STDOUT; }
545 }
546 }
547 print STDOUT "\n#-\n";
548 }
549
550 ## Go get the latest one...
551 local(@options);
552 push(@options, 'head') if $head;
553 push(@options, 'nofollow') unless $follow;
554 push(@options, ('retry') x $retry) if $retry;
555 push(@options, 'quiet') if $quiet;
556 push(@options, 'debug') if $debug;
557 local($status, $memo, %info) = &www'open_http_url(*IN, $WEB, @options);
558 die "fetching $WEB:\n $memo\n" unless $status eq 'ok';
559
560 $size = $info{'content-length'};
561 while (<IN>)
562 {
563 $size -= length;
564 print STDOUT;
565 if (!defined $fetched_version && m/version\s*=\s*"([^"]+)"/) {
566 $fetched_version = $1;
567 &general_read(*IN, $size);
568 last;
569 }
570 }
571
572 $fetched_version = "<unknown>" unless defined $fetched_version;
573
574 ##
575 ## Try to update the mode of the temp file with the mode of this file.
576 ## Don't worry if it fails.
577 ##
578 chmod($mode, $tempFile) if $mode = (stat($0))[2];
579
580 $as_well = '';
581 if ($fetched_version eq $version)
582 {
583 print STDERR "You already have the most-recent version ($version).\n",
584 qq/FWIW, the newly fetched one has been left in "$tempFile".\n/;
585 }
586 elsif ($fetched_version <= $version)
587 {
588 print STDERR
589 "Mmm, your current version seems newer (?!):\n",
590 qq/ your version: "$version"\n/,
591 qq/ new version: "$fetched_version"\n/,
592 qq/FWIW, fetched one left in "$tempFile".\n/;
593 }
594 else
595 {
596 print STDERR
597 "Indeed, your current version was old:\n",
598 qq/ your version: "$version"\n/,
599 qq/ new version: "$fetched_version"\n/,
600 qq/The file "$tempFile" is ready to replace the old one.\n/;
601 print STDERR qq/Just do:\n % mv $tempFile $0\n/ if -f $0;
602 $as_well = ' as well';
603 }
604 print STDERR "Note that the libraries it uses may (or may not) need updating$as_well.\n"
605 unless $inlined;
606 exit($EXIT_ok);
607}
608
609##
610## Given a list of URLs, fetch'em.
611## Parses the URL and calls the routine for the appropriate protocol
612##
613sub fetch_url
614{
615 local(@todo) = @_;
616 local(%circref, %hold_circref);
617
618 URL_LOOP: while (@todo)
619 {
620 $URL = shift(@todo);
621 %hold_circref = %circref; undef %circref;
622
623 local($protocol, @args) = &www'grok_URL($URL, $noproxy);
624
625 if (!defined $protocol) {
626 &www'message(1, qq/can't grok "$URL"/);
627 next URL_LOOP;
628 }
629
630 ## call protocol-specific handler
631 $func = "fetch_via_" . $protocol;
632 $error = &$func(@args, $TimeoutSpan);
633 if (defined $error) {
634 &www'message(1, "$URL: $error");
635 } else {
636 $SuccessfulCount++;
637 }
638 }
639}
640
641sub filedate
642{
643 local($filename) = @_;
644 local($filetime) = (stat($filename))[9];
645 return 0 if !defined $filetime;
646 local($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($filetime);
647 return 0 if !defined $wday;
648 sprintf(qq/"%s, %02d-%s-%02d %02d:%02d:%02d GMT"/,
649 ("Sunday", "Monday", "Tuesdsy", "Wednesday",
650 "Thursday", "Friday", "Saturday")[$wday],
651 $mday,
652 ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
653 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[$mon],
654 $year,
655 $hour,
656 $min,
657 $sec);
658}
659
660sub local_filename
661{
662 local($filename) = @_;
663 $filename =~ s,/+$,,; ## remove any trailing slashes
664 $filename =~ s,.*/,,; ## remove any leading path
665 if ($filename eq '') {
666 ## empty -- pick a random name
667 $filename = "file0000";
668 ## look for a free random name.
669 $filename++ while -f $filename;
670 }
671 $filename;
672}
673
674sub set_output_file
675{
676 local($filename) = @_;
677 if (!open(OUT, ">$filename")) {
678 &www'message(1, "$0: can't open [$filename] for output");
679 } else {
680 open(SAVEOUT, ">>&STDOUT") || die "$!";;
681 open(STDOUT, ">>&OUT");
682 }
683}
684
685sub close_output_file
686{
687 local($filename) = @_;
688 unless ($quiet)
689 {
690 local($note) = qq/"$filename" written/;
691 if (defined $error) {
692 $note .= " (possibly corrupt due to error above)";
693 }
694 &www'message(1, "$note.");
695 }
696 close(STDOUT);
697 open(STDOUT, ">&SAVEOUT");
698}
699
700sub http_alarm
701{
702 &www'message(1, "ERROR: $AlarmNote.");
703 exit($EXIT_timeout); ## the alarm doesn't seem to cause a waiting syscall to break?
704# $HaveAlarm = 1;
705}
706
707##
708## Given the host, port, and path, and (for info only) real target,
709## fetch via HTTP.
710##
711## If there is a user and/or password, use that for Basic Authorization.
712##
713## If $timeout is nonzero, time out after that many seconds.
714##
715sub fetch_via_http
716{
717 local($host, $port, $path, $target, $user, $password, $timeout) = @_;
718 local(@options);
719 local($local_filename);
720
721 ##
722 ## If we're posting, but -postfile was given, we need to interpret
723 ## the item in $path after '?' as a filename, and replace it with
724 ## the contents of the file.
725 ##
726 if ($postfile && $path =~ s/\?([\d\D]*)//) {
727 local($filename) = $1;
728 return("can't open [$filename] to POST") if !open(IN, "<$filename");
729 local($/) = ''; ## want to suck up the whole file.
730 $path .= '?' . <IN>;
731 close(IN);
732 }
733
734 $local_filename = &local_filename($path)
735 if $refresh || $nab || defined($reference_file);
736 $refresh = &filedate($local_filename) if $refresh;
737 $refresh = &filedate($reference_file) if defined($reference_file);
738
739 push(@options, 'head') if $head;
740 push(@options, 'post') if $post;
741 push(@options, 'nofollow') unless $follow;
742 push(@options, ('retry') x 3);
743 push(@options, 'quiet') if $quiet;
744 push(@options, 'debug') if $debug;
745 push(@options, "ifmodifiedsince=$refresh") if $refresh;
746
747 if (defined $password || defined $user) {
748 local($auth) = join(':', ($user || ''), ($password || ''));
749 push(@options, "authorization=$auth");
750 }
751
752 local($old_alarm);
753 if ($timeout) {
754 $old_alarm = $SIG{'ALRM'} || 'DEFAULT';
755 $SIG{'ALRM'} = "main'http_alarm";
756# $HaveAlarm = 0;
757 $AlarmNote = "host $host";
758 $AlarmNote .= ":$port" if $port != $www'default_port{'http'};
759 $AlarmNote .= " timed out after $timeout second";
760 $AlarmNote .= 's' if $timeout > 1;
761 alarm($timeout);
762 }
763 local($result, $memo, %info) =
764 &www'open_http_connection(*HTTP, $host,$port,$path,$target,@options);
765
766 if ($timeout) {
767 alarm(0);
768 $SIG{'ALRM'} = $old_alarm;
769 }
770
771# if ($HaveAlarm) {
772# close(HTTP);
773# $error = "timeout after $timeout second";
774# $error .= "s" if $timeout > 1;
775# return $error;
776# }
777
778 if ($follow && ($result eq 'follow')) {
779 %circref = %hold_circref;
780 $circref{$memo} = 1;
781 unshift(@todo, $memo);
782 return undef;
783 }
784
785
786 return $memo if $result eq 'error';
787 if (!$quiet && $result eq 'status' && ! -t STDOUT) {
788 #&www'message(1, "Warning: $memo");
789 $error = "Warning: $memo";
790 }
791
792 if ($info{'CODE'} == 304) { ## 304 is magic for "Not Modified"
793 close(HTTP);
794 &www'message(1, "$URL: Not Modified") unless $quiet;
795 $NotModifiedCount++;
796 return undef; ## no error
797 }
798
799
800 &set_output_file($local_filename) if $nab;
801
802 unless($strip) {
803 print $info{'STATUS'}, "\n", $info{'HEADER'}, "\n";
804
805 print SAVEOUT $info{'STATUS'}, "\n", $info{'HEADER'}, "\n" if $nab==2;
806 }
807
808 if (defined $info{'BODY'}) {
809 print $info{'BODY'};
810 print SAVEOUT $info{'BODY'} if $nab==2;
811 }
812
813 if (!$head) {
814 &general_read(*HTTP, $info{'content-length'});
815 }
816 close(HTTP);
817 &close_output_file($local_filename) if $nab;
818
819 $error; ## will be 'undef' if no error;
820}
821
822sub fetch_via_ftp
823{
824 local($host, $port, $path, $target, $user, $password, $timeout) = @_;
825 local($local_filename) = &local_filename($path);
826 local($ftp_debug) = $debug;
827 local(@password) = ($password);
828 $path =~ s,^/,,; ## remove a leading / from the path.
829 $path = '.' if $path eq ''; ## make sure we have something
830
831 if (!defined $user) {
832 $user = 'anonymous';
833 $password = $ENV{'USER'} || 'WWWuser';
834 @password = ($password.'@'. &network'addr_to_ascii(&network'my_addr),
835 $password.'@');
836 } elsif (!defined $password) {
837 @password = ("");
838 }
839
840 local($_last_ftp_reply, $_passive_host, $_passive_port);
841 local($size);
842
843 sub _ftp_get_reply
844 {
845 local($text) = scalar(<FTP_CONTROL>);
846 die "lost connection to $host\n" if !defined $text;
847 local($_, $tmp);
848 print STDERR "READ: $text" if $ftp_debug;
849 die "internal error: expected reply code in response from ".
850 "ftp server [$text]" unless $text =~ s/^(\d+)([- ])//;
851 local($code) = $1;
852 if ($2 eq '-') {
853 while (<FTP_CONTROL>) {
854 ($tmp = $_) =~ s/^\d+[- ]//;
855 $text .= $tmp;
856 last if m/^$code /;
857 }
858 }
859 $text =~ s/^\d+ ?/<foo>/g;
860 ($code, $text);
861 }
862
863 sub _ftp_expect
864 {
865 local($code, $text) = &_ftp_get_reply;
866 $_last_ftp_reply = $text;
867 foreach $expect (@_) {
868 return ($code, $text) if $code == $expect;
869 }
870 die "internal error: expected return code ".
871 join('|',@_).", got [$text]";
872 }
873
874 sub _ftp_send
875 {
876 print STDERR "SEND: ", @_ if $ftp_debug;
877 print FTP_CONTROL @_;
878 }
879
880 sub _ftp_do_passive
881 {
882 local(@commands) = @_;
883
884 &_ftp_send("PASV\r\n");
885 local($code) = &_ftp_expect(227, 125);
886
887 if ($code == 227)
888 {
889 die "internal error: can't grok passive reply [$_last_ftp_reply]"
890 unless $_last_ftp_reply =~ m/\(([\d,]+)\)/;
891 local($a,$b,$c,$d, $p1, $p2) = split(/,/, $1);
892 ($_passive_host, $_passive_port) =
893 ("$a.$b.$c.$d", $p1*256 + $p2);
894 }
895
896 foreach(@commands) {
897 &_ftp_send($_);
898 }
899
900 local($error)=
901 &network'connect_to(*PASSIVE, $_passive_host, $_passive_port);
902 die "internal error: passive ftp connect [$error]" if $error;
903 }
904
905 ## make the connection to the host
906 &www'message($debug, "connecting to $host...") unless $quiet;
907
908 local($old_alarm);
909 if ($timeout) {
910 $old_alarm = $SIG{'ALRM'} || 'DEFAULT';
911 $SIG{'ALRM'} = "main'http_alarm"; ## can use this for now
912# $HaveAlarm = 0;
913 $AlarmNote = "host $host";
914 $AlarmNote .= ":$port" if $port != $www'default_port{'ftp'};
915 $AlarmNote .= " timed out after $timeout second";
916 $AlarmNote .= 's' if $timeout > 1;
917 alarm($timeout);
918 }
919
920 local($error) = &network'connect_to(*FTP_CONTROL, $host, $port);
921
922 if ($timeout) {
923 alarm(0);
924 $SIG{'ALRM'} = $old_alarm;
925 }
926
927 return $error if $error;
928
929 local ($code, $text) = &_ftp_get_reply(*FTP_CONTROL);
930 close(FTP_CONTROL), return "internal ftp error: [$text]" unless $code==220;
931
932 ## log in
933 &www'message($debug, "logging in as $user...") unless $quiet;
934 foreach $password (@password)
935 {
936 &_ftp_send("USER $user\r\n");
937 ($code, $text) = &_ftp_expect(230,331,530);
938 close(FTP_CONTROL), return $text if ($code == 530);
939 last if $code == 230; ## hey, already logged in, cool.
940
941 &_ftp_send("PASS $password\r\n");
942 ($code, $text) = &_ftp_expect(220,230,530,550,332);
943 last if $code != 550;
944 last if $text =~ m/can't change directory/;
945 }
946
947 if ($code == 550)
948 {
949 $text =~ s/\n+$//;
950 &www'message(1, "Can't log in $host: $text") unless $quiet;
951 exit($EXIT_error);
952 }
953
954 if ($code == 332)
955 {
956 &_ftp_send("ACCT noaccount\r\n");
957 ($code, $text) = &_ftp_expect(230, 202, 530, 500,501,503, 421)
958 }
959 close(FTP_CONTROL), return $text if $code >= 300;
960
961 &_ftp_send("TYPE I\r\n");
962 &_ftp_expect(200);
963
964 unless ($quiet) {
965 local($name) = $path;
966 $name =~ s,.*/([^/]),$1,;
967 &www'message($debug, "requesting $name...");
968 }
969 ## get file
970 &_ftp_do_passive("RETR $path\r\n");
971 ($code,$text) = &_ftp_expect(125, 150, 550, 530);
972 close(FTP_CONTROL), return $text if $code == 530;
973
974 if ($code == 550)
975 {
976 close(PASSIVE);
977 if ($text =~ /directory/i) {
978 ## probably from "no such file or directory", so just return now.
979 close(FTP_CONTROL);
980 return $text;
981 }
982
983 ## do like Mosaic and try getting a directory listing.
984 &_ftp_send("CWD $path\r\n");
985 ($code) = &_ftp_expect(250,550);
986 if ($code == 550) {
987 close(FTP_CONTROL);
988 return $text;
989 }
990 &_ftp_do_passive("LIST\r\n");
991 &_ftp_expect(125, 150);
992 }
993
994 $size = $1 if $text =~ m/(\d+)\s+bytes/;
995 binmode(PASSIVE); ## just in case.
996 &www'message($debug, "waiting for data...") unless $quiet;
997 &set_output_file($local_filename) if $nab;
998 &general_read(*PASSIVE, $size);
999 &close_output_file($local_filename) if $nab;
1000
1001 close(PASSIVE);
1002 close(FTP_CONTROL);
1003 undef;
1004}
1005
1006sub general_read
1007{
1008 local(*INPUT, $size) = @_;
1009 local($lastcount, $bytes) = (0,0);
1010 local($need_to_clear) = 0;
1011 local($start_time) = time;
1012 local($last_time, $time) = $start_time;
1013 ## Figure out how often to print the "bytes read" message
1014 local($delta2print) =
1015 (defined $size) ? int($size/50) : $defaultdelta2print;
1016
1017 &www'message(0, "read 0 bytes") unless $quiet;
1018
1019 ## so $! below is set only if a real error happens from now
1020 eval 'local($^W) = 0; undef $!';
1021
1022
1023 while (defined($_ = <INPUT>))
1024 {
1025 ## shove it out.
1026 &www'clear_message if $need_to_clear;
1027 print;
1028 print SAVEOUT if $nab==2;
1029
1030 ## if we know the content-size, keep track of what we're reading.
1031 $bytes += length;
1032
1033 last if eof || (defined $size && $bytes >= $size);
1034
1035 if (!$quiet && $bytes > ($lastcount + $delta2print))
1036 {
1037 if ($time = time, $last_time == $time) {
1038 $delta2print *= 1.5;
1039 } else {
1040 $last_time = $time;
1041 $lastcount = $bytes;
1042 local($time_delta) = $time - $start_time;
1043 local($text);
1044
1045 $delta2print /= $time_delta;
1046 if (defined $size) {
1047 $text = sprintf("read $bytes bytes (%.0f%%)",
1048 $bytes*100/$size);
1049 } else {
1050 $text = "read $bytes bytes";
1051 }
1052
1053 if ($time_delta > 5 || ($time_delta && $bytes > 10240))
1054 {
1055 local($rate) = int($bytes / $time_delta);
1056 if ($rate < 5000) {
1057 $text .= " ($rate bytes/sec)";
1058 } elsif ($rate < 1024 * 10) {
1059 $text .= sprintf(" (%.1f k/sec)", $rate/1024);
1060 } else {
1061 $text .= sprintf(" (%.0f k/sec)", $rate/1024);
1062 }
1063 }
1064 &www'message(0, "$text...");
1065 $need_to_clear = -t STDOUT;
1066 }
1067 }
1068 }
1069
1070 if (!$quiet)
1071 {
1072 if ($size && ($size != $bytes)) {
1073 &www'message("WARNING: Expected $size bytes, read $bytes bytes.\n");
1074 }
1075# if ($!) {
1076# print STDERR "\$! is [$!]\n";
1077# }
1078# if ($@) {
1079# print STDERR "\$\@ is [$@]\n";
1080# }
1081 }
1082 &www'clear_message($text) unless $quiet;
1083}
1084
1085sub dummy {
1086 1 || &dummy || &fetch_via_ftp || &fetch_via_http || &http_alarm;
1087 1 || close(OUT);
1088 1 || close(SAVEOUT);
1089}
1090
1091__END__