Improve dual-universe comments in hints/sunos_4_1.sh
[p5sagit/p5-mst-13.2.git] / win32 / bin / webget.bat
CommitLineData
0a753a76 1@rem = '--*-Perl-*--';
2@rem = '
3@echo off
4perl -S %0.bat %1 %2 %3 %4 %5 %6 %7 %8 %9
5goto endofperl
6@rem ';
7#!/usr/local/bin/perl -w
8
9#-
10#!/usr/local/bin/perl -w
11$version = "951121.18";
12$comments = 'jfriedl@omron.co.jp';
13
14##
15## This is "webget"
16##
17## Jeffrey Friedl (jfriedl@omron.co.jp), July 1994.
18## Copyright 19.... ah hell, just take it.
19## Should work with either perl4 or perl5
20##
21## BLURB:
22## Given a URL on the command line (HTTP and FTP supported at the moment),
23## webget fetches the named object (HTML text, images, audio, whatever the
24## object happens to be). Will automatically use a proxy if one is defined
25## in the environment, follow "this URL has moved" responses, and retry
26## "can't find host" responses from a proxy in case host lookup was slow).
27## Supports users & passwords (FTP), Basic Authorization (HTTP), update-if-
28## modified (HTTP), and much more. Works with perl4 or perl5.
29
30##
31## More-detailed instructions in the comment block below the history list.
32##
33
34##
35## To-do:
36## Add gopher support.
37## Fix up how error messages are passed among this and the libraries.
38##
39
40## 951219.19
41## Lost ftp connections now die with a bit more grace.
42##
43## 951121.18
44## Add -nnab.
45## Brought the "usage" string in line with reality.
46##
47## 951114.17
48## Added -head.
49## Added -update/-refresh/-IfNewerThan. If any URL was not pulled
50## because it was not out of date, an exit value of 2 is returned.
51##
52## 951031.16
53## Added -timeout. Cleaned up (a bit) the exit value. Now exits
54## with 1 if all URLs had some error (timeout exits immediately with
55## code 3, though. This is subject to change). Exits with 0 if any
56## URL was brought over safely.
57##
58## 951017.15
59## Neat -pf, -postfile idea from Lorrie Cranor
60## (http://www.ccrc.wustl.edu/~lorracks/)
61##
62## 950912.14
63## Sigh, fixed a typo.
64##
65## 950911.13
66## Added Basic Authorization support for http. See "PASSWORDS AND STUFF"
67## in the documentation.
68##
69## 950911.12
70## Implemented a most-excellent suggestion by Anthony D'Atri
71## (aad@nwnet.net), to be able to automatically grab to a local file of
72## the same name as the URL. See the '-nab' flag.
73##
74## 950706.11
75## Quelled small -w warning (thanks: Lars Rasmussen <gnort@daimi.aau.dk>)
76##
77## 950630.10
78## Steve Campbell to the rescue again. FTP now works when supplied
79## with a userid & password (eg ftp://user:pass@foo.bar.com/index.txt).
80##
81## 950623.9
82## Incorporated changes from Steve Campbell (steven_campbell@uk.ibm.com)
83## so that the ftp will work when no password is required of a user.
84##
85## 950530.8
86## Minor changes:
87## Eliminate read-size warning message when size unknown.
88## Pseudo-debug/warning messages at the end of debug_read now go to
89## stderr. Some better error handling when trying to contact systems
90## that aren't really set up for ftp. Fixed a bug concerning FTP access
91## to a root directory. Added proxy documentation at head of file.
92##
93## 950426.6,7
94## Complete Overhaul:
95## Renamed from httpget. Added ftp support (very sketchy at the moment).
96## Redid to work with new 'www.pl' library; chucked 'Www.pl' library.
97## More or less new and/or improved in many ways, but probably introduced
98## a few bugs along the way.
99##
100## 941227.5
101## Added follow stuff (with -nofollow, etc.)
102## Added -updateme. Cool!
103## Some general tidying up.
104##
105## 941107.4
106## Allowed for ^M ending a header line... PCs give those kind of headers.
107##
108## 940820.3
109## First sorta'clean net release.
110##
111##
112
113##
114##>
115##
116## Fetch http and/or ftp URL(s) given on the command line and spit to
117## STDOUT.
118##
119## Options include:
120## -V, -version
121## Print version information; exit.
122##
123## -p, -post
124## If the URL looks like a reply to a form (i.e. has a '?' in it),
125## the request is POST'ed instead of GET'ed.
126##
127## -head
128## Gets the header only (for HTTP). This might include such useful
129## things as 'Last-modified' and 'Content-length' fields
130## (a lack of a 'Last-modified' might be a good indication that it's
131## a CGI).
132##
133## The "-head" option implies "-nostrip", but does *not* imply,
134## for example "-nofollow".
135##
136##
137## -pf, -postfile
138## The item after the '?' is taken as a local filename, and the contents
139## are POST'ed as with -post
140##
141## -nab, -f, -file
142## Rather than spit the URL(s) to standard output, unconditionally
143## dump to a file (or files) whose name is that as used in the URL,
144## sans path. I like '-nab', but supply '-file' as well since that's
145## what was originally suggested. Also see '-update' below for the
146## only-if-changed version.
147##
148## -nnab
149## Like -nab, but in addtion to dumping to a file, dump to stdout as well.
150## Sort of like the 'tee' command.
151##
152## -update, -refresh
153## Do the same thing as -nab, etc., but does not bother pulling the
154## URL if it older than the localfile. Only applies to HTTP.
155## Uses the HTTP "If-Modified-Since" field. If the URL was not modified
156## (and hence not changed), the return value is '2'.
157##
158## -IfNewerThan FILE
159## -int FILE
160## Only pulls URLs if they are newer than the date the local FILE was
161## last written.
162##
163## -q, -quiet
164## Suppresses all non-essential informational messages.
165##
166## -nf, -nofollow
167## Normally, a "this URL has moved" HTTP response is automatically
168## followed. Not done with -nofollow.
169##
170## -nr, -noretry
171## Normally, an HTTP proxy response of "can't find host" is retried
172## up to three times, to give the remote hostname lookup time to
173## come back with an answer. This suppresses the retries. This is the
174## same as '-retry 0'.
175##
176## -r#, -retry#, -r #, -retry #
177## Sets the number of times to retry. Default 3.
178##
179## -ns, -nostrip
180## For HTTP items (including other items going through an HTTP proxy),
181## the HTTP response header is printed rather than stripped as default.
182##
183## -np, -noproxy
184## A proxy is not used, even if defined for the protocol.
185##
186## -h, -help
187## Show a usage message and exit.
188##
189## -d, -debug
190## Show some debugging messages.
191##
192## -updateme
193## The special and rather cool flag "-updateme" will see if webget has
194## been updated since you got your version, and prepare a local
195## version of the new version for you to use. Keep updated! (although
196## you can always ask to be put on the ping list to be notified when
197## there's a new version -- see the author's perl web page).
198##
199## -timeout TIMESPAN
200## -to TIMESPAN
201## Time out if a connection can not be made within the specified time
202## period. TIMESPAN is normally in seconds, although a 'm' or 'h' may
203## be appended to indicate minutes and hours. "-to 1.5m" would timeout
204## after 90 seconds.
205##
206## (At least for now), a timeout causes immediate program death (with
207## exit value 3). For some reason, the alarm doesn't always cause a
208## waiting read or connect to abort, so I just die immediately.. /-:
209##
210## I might consider adding an "entire fetch" timeout, if someone
211## wants it.
212##
213## PASSWORDS AND SUCH
214##
215## You can use webget to do FTP fetches from non-Anonymous systems and
216## accounts. Just put the required username and password into the URL,
217## as with
218## webget 'ftp:/user:password@ftp.somesite.com/pub/pix/babe.gif
219## ^^^^^^^^^^^^^
220## Note the user:password is separated from the hostname by a '@'.
221##
222## You can use the same kind of thing with HTTP, and if so it will provide
223## what's know as Basic Authorization. This is >weak< authorization. It
224## also provides >zero< security -- I wouldn't be sending any credit-card
225## numbers this way (unless you send them 'round my way :-). It seems to
226## be used most by providers of free stuff where they want to make some
227## attempt to limit access to "known users".
228##
229## PROXY STUFF
230##
231## If you need to go through a gateway to get out to the whole internet,
232## you can use a proxy if one's been set up on the gateway. This is done
233## by setting the "http_proxy" environmental variable to point to the
234## proxy server. Other variables are used for other target protocols....
235## "gopher_proxy", "ftp_proxy", "wais_proxy", etc.
236##
237## For example, I have the following in my ".login" file (for use with csh):
238##
239## setenv http_proxy http://local.gateway.machine:8080/
240##
241## This is to indicate that any http URL should go to local.gateway.machine
242## (port 8080) via HTTP. Additionally, I have
243##
244## setenv gopher_proxy "$http_proxy"
245## setenv wais_proxy "$http_proxy"
246## setenv ftp_proxy "$http_proxy"
247##
248## This means that any gopher, wais, or ftp URL should also go to the
249## same place, also via HTTP. This allows webget to get, for example,
250## GOPHER URLs even though it doesn't support GOPHER itself. It uses HTTP
251## to talk to the proxy, which then uses GOPHER to talk to the destination.
252##
253## Finally, if there are sites inside your gateway that you would like to
254## connect to, you can list them in the "no_proxy" variable. This will allow
255## you to connect to them directly and skip going through the proxy:
256##
257## setenv no_proxy "www.this,www.that,www.other"
258##
259## I (jfriedl@omron.co.jp) have little personal experience with proxies
260## except what I deal with here at Omron, so if this is not representative
261## of your situation, please let me know.
262##
263## RETURN VALUE
264## The value returned to the system by webget is rather screwed up because
265## I didn't think about dealing with it until things were already
266## complicated. Since there can be more than one URL on the command line,
267## it's hard to decide what to return when one times out, another is fetched,
268## another doesn't need to be fetched, and a fourth isn't found.
269##
270## So, here's the current status:
271##
272## Upon any timeout (via the -timeout arg), webget immediately
273## returns 3. End of story. Otherwise....
274##
275## If any URL was fetched with a date limit (i.e. via
276## '-update/-refresh/-IfNewerThan' and was found to not have changed,
277## 2 is returned. Otherwise....
278##
279## If any URL was successfully fetched, 0 is returned. Otherwise...
280##
281## If there were any errors, 1 is returned. Otherwise...
282##
283## Must have been an info-only or do-nothing instance. 0 is returned.
284##
285## Phew. Hopefully useful to someone.
286##<
287##
288
289## Where latest version should be.
290$WEB_normal = 'http://www.wg.omron.co.jp/~jfriedl/perl/webget';
291$WEB_inlined = 'http://www.wg.omron.co.jp/~jfriedl/perl/inlined/webget';
292
293
294require 'network.pl'; ## inline if possible (directive to a tool of mine)
295require 'www.pl'; ## inline if possible (directive to a tool of mine)
296$inlined=0; ## this might be changed by a the inline thing.
297
298##
299## Exit values. All screwed up.
300##
301$EXIT_ok = 0;
302$EXIT_error = 1;
303$EXIT_notmodified = 2;
304$EXIT_timeout = 3;
305
306##
307##
308
309warn qq/WARNING:\n$0: need a newer version of "network.pl"\n/ if
310 !defined($network'version) || $network'version < "950311.5";
311warn qq/WARNING:\n$0: need a newer version of "www.pl"\n/ if
312 !defined($www'version) || $www'version < "951114.8";
313
314$WEB = $inlined ? $WEB_inlined : $WEB_normal;
315
316$debug = 0;
317$strip = 1; ## default is to strip
318$quiet = 0; ## also normally off.
319$follow = 1; ## normally, we follow "Found (302)" links
320$retry = 3; ## normally, retry proxy hostname lookups up to 3 times.
321$nab = 0; ## If true, grab to a local file of the same name.
322$refresh = 0; ## If true, use 'If-Modified-Since' with -nab get.
323$postfile = 0; ## If true, filename is given after the '?'
324$defaultdelta2print = 2048;
325$TimeoutSpan = 0; ## seconds after which we should time out.
326
327while (@ARGV && $ARGV[0] =~ m/^-/)
328{
329 $arg = shift(@ARGV);
330
331 $nab = 1, next if $arg =~ m/^-f(ile)?$/;
332 $nab = 1, next if $arg =~ m/^-nab$/;
333 $nab = 2, next if $arg =~ m/^-nnab$/;
334 $post = 1, next if $arg =~ m/^-p(ost)?$/i;
335 $post = $postfile = 1, next if $arg =~ m/^-p(ost)?f(ile)?$/i;
336 $quiet=1, next if $arg =~ m/^-q(uiet)?$/;
337 $follow = 0, next if $arg =~ m/^-no?f(ollow)?$/;
338 $strip = 0, next if $arg =~ m/^-no?s(trip)?$/;
339 $debug=1, next if $arg =~ m/^-d(ebug)?$/;
340 $noproxy=1, next if $arg =~ m/^-no?p(roxy)?$/;
341 $retry=0, next if $arg =~ m/^-no?r(etry)?$/;
342 $retry=$2, next if $arg =~ m/^-r(etry)?(\d+)$/;
343 &updateme if $arg eq '-updateme';
344 $strip = 0, $head = 1, next if $arg =~ m/^-head(er)?/;
345 $nab = $refresh = 1, next if $arg =~ m/^-(refresh|update)/;
346
347 &usage($EXIT_ok) if $arg =~ m/^-h(elp)?$/;
348 &show_version, exit($EXIT_ok) if $arg eq '-version' || $arg eq '-V';
349
350 if ($arg =~ m/^-t(ime)?o(ut)?$/i) {
351 local($num) = shift(@ARGV);
352 &usage($EXIT_error, "expecting timespan argument to $arg\n") unless
353 $num =~ m/^\d+(\d*)?[hms]?$/;
354 &timeout_arg($num);
355 next;
356 }
357
358 if ($arg =~ m/^-if?n(ewer)?t(han)?$/i) {
359 $reference_file = shift(@ARGV);
360 &usage($EXIT_error, "expecting filename arg to $arg")
361 if !defined $reference_file;
362 if (!-f $reference_file) {
363 warn qq/$0: ${arg}'s "$reference_file" not found.\n/;
364 exit($EXIT_error);
365 }
366 next;
367 }
368
369 if ($arg eq '-r' || $arg eq '-retry') {
370 local($num) = shift(@ARGV);
371 &usage($EXIT_error, "expecting numerical arg to $arg\n") unless
372 defined($num) && $num =~ m/^\d+$/;
373 $retry = $num;
374 next;
375 }
376 &usage($EXIT_error, qq/$0: unknown option "$arg"\n/);
377}
378
379if ($head && $post) {
380 warn "$0: combining -head and -post makes no sense, ignoring -post.\n";
381 $post = 0;
382 undef $postfile;
383}
384
385if ($refresh && defined($reference_file)) {
386 warn "$0: combining -update and -IfNewerThan make no sense, ignoring -IfNewerThan.\n";
387 undef $reference_file;
388}
389
390if (@ARGV == 0) {
391 warn "$0: nothing to do. Use -help for info.\n";
392 exit($EXIT_ok);
393}
394
395
396##
397## Now run through the remaining arguments (mostly URLs) and do a quick
398## check to see if they look well-formed. We won't *do* anything -- just
399## want to catch quick errors before really starting the work.
400##
401@tmp = @ARGV;
402$errors = 0;
403while (@tmp) {
404 $arg = shift(@tmp);
405 if ($arg =~ m/^-t(ime)?o(ut)?$/) {
406 local($num) = shift(@tmp);
407 if ($num !~ m/^\d+(\d*)?[hms]?$/) {
408 &warn("expecting timespan argument to $arg\n");
409 $errors++;
410 }
411 } else {
412 local($protocol) = &www'grok_URL($arg, $noproxy);
413
414 if (!defined $protocol) {
415 warn qq/can't grok "$arg"/;
416 $errors++;
417 } elsif (!$quiet && ($protocol eq 'ftp')) {
418 warn qq/warning: -head ignored for ftp URLs\n/ if $head;
419 warn qq/warning: -refresh ignored for ftp URLs\n/if $refresh;
420 warn qq/warning: -IfNewerThan ignored for ftp URLs\n/if defined($reference_file);
421
422 }
423 }
424}
425
426exit($EXIT_error) if $errors;
427
428
429$SuccessfulCount = 0;
430$NotModifiedCount = 0;
431
432##
433## Now do the real thing.
434##
435while (@ARGV) {
436 $arg = shift(@ARGV);
437 if ($arg =~ m/^-t(ime)?o(ut)?$/) {
438 &timeout_arg(shift(@ARGV));
439 } else {
440 &fetch_url($arg);
441 }
442}
443
444if ($NotModifiedCount) {
445 exit($EXIT_notmodified);
446} elsif ($SuccessfulCount) {
447 exit($EXIT_ok);
448} else {
449 exit($EXIT_error);
450}
451
452###########################################################################
453###########################################################################
454
455sub timeout_arg
456{
457 ($TimeoutSpan) = @_;
458 $TimeoutSpan =~ s/s//;
459 $TimeoutSpan *= 60 if $TimeoutSpan =~ m/m/;
460 $TimeoutSpan *= 3600 if $TimeoutSpan =~ m/h/;
461
462}
463
464##
465## As a byproduct, returns the basename of $0.
466##
467sub show_version
468{
469 local($base) = $0;
470 $base =~ s,.*/,,;
471 print STDERR "This is $base version $version\n";
472 $base;
473}
474
475##
476## &usage(exitval, message);
477##
478## Prints a usage message to STDERR.
479## If MESSAGE is defined, prints that first.
480## If exitval is defined, exits with that value. Otherwise, returns.
481##
482sub usage
483{
484 local($exit, $message) = @_;
485
486 print STDERR $message if defined $message;
487 local($base) = &show_version;
488 print STDERR <<INLINE_LITERAL_TEXT;
489usage: $0 [options] URL ...
490 Fetches and displays the named URL(s). Supports http and ftp.
491 (if no protocol is given, a leading "http://" is normally used).
492
493Options are from among:
494 -V, -version Print version information; exit.
495 -p, -post If URL looks like a form reply, does POST instead of GET.
496 -pf, -postfile Like -post, but takes everything after ? to be a filename.
497 -q, -quiet All non-essential informational messages are suppressed.
498 -nf, -nofollow Don't follow "this document has moved" replies.
499 -nr, -noretry Doesn't retry a failed hostname lookup (same as -retry 0)
500 -r #, -retry # Sets failed-hostname-lookup-retry to # (default $retry)
501 -np, -noproxy Uses no proxy, even if one defined for the protocol.
502 -ns, -nostrip The HTTP header, normally elided, is printed.
503 -head gets item header only (implies -ns)
504 -nab, -file Dumps output to file whose name taken from URL, minus path
505 -nnab Like -nab, but *also* dumps to stdout.
506 -update HTTP only. Like -nab, but only if the page has been modified.
507 -h, -help Prints this message.
508 -IfNewerThan F HTTP only. Only brings page if it is newer than named file.
509 -timeout T Fail if a connection can't be made in the specified time.
510
511 -updateme Pull the latest version of $base from
512 $WEB
513 and reports if it is newer than your current version.
514
515Comments to $comments.
516INLINE_LITERAL_TEXT
517
518 exit($exit) if defined $exit;
519}
520
521##
522## Pull the latest version of this program to a local file.
523## Clip the first couple lines from this executing file so that we
524## preserve the local invocation style.
525##
526sub updateme
527{
528 ##
529 ## Open a temp file to hold the new version,
530 ## redirecting STDOUT to it.
531 ##
532 open(STDOUT, '>'.($tempFile="/tmp/webget.new")) ||
533 open(STDOUT, '>'.($tempFile="/usr/tmp/webget.new")) ||
534 open(STDOUT, '>'.($tempFile="/webget.new")) ||
535 open(STDOUT, '>'.($tempFile="webget.new")) ||
536 die "$0: can't open a temp file.\n";
537
538 ##
539 ## See if we can figure out how we were called.
540 ## The seek will rewind not to the start of the data, but to the
541 ## start of the whole program script.
542 ##
543 ## Keep the first line if it begins with #!, and the next two if they
544 ## look like the trick mentioned in the perl man page for getting
545 ## around the lack of #!-support.
546 ##
547 if (seek(DATA, 0, 0)) { ##
548 $_ = <DATA>; if (m/^#!/) { print STDOUT;
549 $_ = <DATA>; if (m/^\s*eval/) { print STDOUT;
550 $_ = <DATA>; if (m/^\s*if/) { print STDOUT; }
551 }
552 }
553 print STDOUT "\n#-\n";
554 }
555
556 ## Go get the latest one...
557 local(@options);
558 push(@options, 'head') if $head;
559 push(@options, 'nofollow') unless $follow;
560 push(@options, ('retry') x $retry) if $retry;
561 push(@options, 'quiet') if $quiet;
562 push(@options, 'debug') if $debug;
563 local($status, $memo, %info) = &www'open_http_url(*IN, $WEB, @options);
564 die "fetching $WEB:\n $memo\n" unless $status eq 'ok';
565
566 $size = $info{'content-length'};
567 while (<IN>)
568 {
569 $size -= length;
570 print STDOUT;
571 if (!defined $fetched_version && m/version\s*=\s*"([^"]+)"/) {
572 $fetched_version = $1;
573 &general_read(*IN, $size);
574 last;
575 }
576 }
577
578 $fetched_version = "<unknown>" unless defined $fetched_version;
579
580 ##
581 ## Try to update the mode of the temp file with the mode of this file.
582 ## Don't worry if it fails.
583 ##
584 chmod($mode, $tempFile) if $mode = (stat($0))[2];
585
586 $as_well = '';
587 if ($fetched_version eq $version)
588 {
589 print STDERR "You already have the most-recent version ($version).\n",
590 qq/FWIW, the newly fetched one has been left in "$tempFile".\n/;
591 }
592 elsif ($fetched_version <= $version)
593 {
594 print STDERR
595 "Mmm, your current version seems newer (?!):\n",
596 qq/ your version: "$version"\n/,
597 qq/ new version: "$fetched_version"\n/,
598 qq/FWIW, fetched one left in "$tempFile".\n/;
599 }
600 else
601 {
602 print STDERR
603 "Indeed, your current version was old:\n",
604 qq/ your version: "$version"\n/,
605 qq/ new version: "$fetched_version"\n/,
606 qq/The file "$tempFile" is ready to replace the old one.\n/;
607 print STDERR qq/Just do:\n % mv $tempFile $0\n/ if -f $0;
608 $as_well = ' as well';
609 }
610 print STDERR "Note that the libraries it uses may (or may not) need updating$as_well.\n"
611 unless $inlined;
612 exit($EXIT_ok);
613}
614
615##
616## Given a list of URLs, fetch'em.
617## Parses the URL and calls the routine for the appropriate protocol
618##
619sub fetch_url
620{
621 local(@todo) = @_;
622 local(%circref, %hold_circref);
623
624 URL_LOOP: while (@todo)
625 {
626 $URL = shift(@todo);
627 %hold_circref = %circref; undef %circref;
628
629 local($protocol, @args) = &www'grok_URL($URL, $noproxy);
630
631 if (!defined $protocol) {
632 &www'message(1, qq/can't grok "$URL"/);
633 next URL_LOOP;
634 }
635
636 ## call protocol-specific handler
637 $func = "fetch_via_" . $protocol;
638 $error = &$func(@args, $TimeoutSpan);
639 if (defined $error) {
640 &www'message(1, "$URL: $error");
641 } else {
642 $SuccessfulCount++;
643 }
644 }
645}
646
647sub filedate
648{
649 local($filename) = @_;
650 local($filetime) = (stat($filename))[9];
651 return 0 if !defined $filetime;
652 local($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($filetime);
653 return 0 if !defined $wday;
654 sprintf(qq/"%s, %02d-%s-%02d %02d:%02d:%02d GMT"/,
655 ("Sunday", "Monday", "Tuesdsy", "Wednesday",
656 "Thursday", "Friday", "Saturday")[$wday],
657 $mday,
658 ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
659 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[$mon],
660 $year,
661 $hour,
662 $min,
663 $sec);
664}
665
666sub local_filename
667{
668 local($filename) = @_;
669 $filename =~ s,/+$,,; ## remove any trailing slashes
670 $filename =~ s,.*/,,; ## remove any leading path
671 if ($filename eq '') {
672 ## empty -- pick a random name
673 $filename = "file0000";
674 ## look for a free random name.
675 $filename++ while -f $filename;
676 }
677 $filename;
678}
679
680sub set_output_file
681{
682 local($filename) = @_;
683 if (!open(OUT, ">$filename")) {
684 &www'message(1, "$0: can't open [$filename] for output");
685 } else {
686 open(SAVEOUT, ">>&STDOUT") || die "$!";;
687 open(STDOUT, ">>&OUT");
688 }
689}
690
691sub close_output_file
692{
693 local($filename) = @_;
694 unless ($quiet)
695 {
696 local($note) = qq/"$filename" written/;
697 if (defined $error) {
698 $note .= " (possibly corrupt due to error above)";
699 }
700 &www'message(1, "$note.");
701 }
702 close(STDOUT);
703 open(STDOUT, ">&SAVEOUT");
704}
705
706sub http_alarm
707{
708 &www'message(1, "ERROR: $AlarmNote.");
709 exit($EXIT_timeout); ## the alarm doesn't seem to cause a waiting syscall to break?
710# $HaveAlarm = 1;
711}
712
713##
714## Given the host, port, and path, and (for info only) real target,
715## fetch via HTTP.
716##
717## If there is a user and/or password, use that for Basic Authorization.
718##
719## If $timeout is nonzero, time out after that many seconds.
720##
721sub fetch_via_http
722{
723 local($host, $port, $path, $target, $user, $password, $timeout) = @_;
724 local(@options);
725 local($local_filename);
726
727 ##
728 ## If we're posting, but -postfile was given, we need to interpret
729 ## the item in $path after '?' as a filename, and replace it with
730 ## the contents of the file.
731 ##
732 if ($postfile && $path =~ s/\?([\d\D]*)//) {
733 local($filename) = $1;
734 return("can't open [$filename] to POST") if !open(IN, "<$filename");
735 local($/) = ''; ## want to suck up the whole file.
736 $path .= '?' . <IN>;
737 close(IN);
738 }
739
740 $local_filename = &local_filename($path)
741 if $refresh || $nab || defined($reference_file);
742 $refresh = &filedate($local_filename) if $refresh;
743 $refresh = &filedate($reference_file) if defined($reference_file);
744
745 push(@options, 'head') if $head;
746 push(@options, 'post') if $post;
747 push(@options, 'nofollow') unless $follow;
748 push(@options, ('retry') x 3);
749 push(@options, 'quiet') if $quiet;
750 push(@options, 'debug') if $debug;
751 push(@options, "ifmodifiedsince=$refresh") if $refresh;
752
753 if (defined $password || defined $user) {
754 local($auth) = join(':', ($user || ''), ($password || ''));
755 push(@options, "authorization=$auth");
756 }
757
758 local($old_alarm);
759 if ($timeout) {
760 $old_alarm = $SIG{'ALRM'} || 'DEFAULT';
761 $SIG{'ALRM'} = "main'http_alarm";
762# $HaveAlarm = 0;
763 $AlarmNote = "host $host";
764 $AlarmNote .= ":$port" if $port != $www'default_port{'http'};
765 $AlarmNote .= " timed out after $timeout second";
766 $AlarmNote .= 's' if $timeout > 1;
767 alarm($timeout);
768 }
769 local($result, $memo, %info) =
770 &www'open_http_connection(*HTTP, $host,$port,$path,$target,@options);
771
772 if ($timeout) {
773 alarm(0);
774 $SIG{'ALRM'} = $old_alarm;
775 }
776
777# if ($HaveAlarm) {
778# close(HTTP);
779# $error = "timeout after $timeout second";
780# $error .= "s" if $timeout > 1;
781# return $error;
782# }
783
784 if ($follow && ($result eq 'follow')) {
785 %circref = %hold_circref;
786 $circref{$memo} = 1;
787 unshift(@todo, $memo);
788 return undef;
789 }
790
791
792 return $memo if $result eq 'error';
793 if (!$quiet && $result eq 'status' && ! -t STDOUT) {
794 #&www'message(1, "Warning: $memo");
795 $error = "Warning: $memo";
796 }
797
798 if ($info{'CODE'} == 304) { ## 304 is magic for "Not Modified"
799 close(HTTP);
800 &www'message(1, "$URL: Not Modified") unless $quiet;
801 $NotModifiedCount++;
802 return undef; ## no error
803 }
804
805
806 &set_output_file($local_filename) if $nab;
807
808 unless($strip) {
809 print $info{'STATUS'}, "\n", $info{'HEADER'}, "\n";
810
811 print SAVEOUT $info{'STATUS'}, "\n", $info{'HEADER'}, "\n" if $nab==2;
812 }
813
814 if (defined $info{'BODY'}) {
815 print $info{'BODY'};
816 print SAVEOUT $info{'BODY'} if $nab==2;
817 }
818
819 if (!$head) {
820 &general_read(*HTTP, $info{'content-length'});
821 }
822 close(HTTP);
823 &close_output_file($local_filename) if $nab;
824
825 $error; ## will be 'undef' if no error;
826}
827
828sub fetch_via_ftp
829{
830 local($host, $port, $path, $target, $user, $password, $timeout) = @_;
831 local($local_filename) = &local_filename($path);
832 local($ftp_debug) = $debug;
833 local(@password) = ($password);
834 $path =~ s,^/,,; ## remove a leading / from the path.
835 $path = '.' if $path eq ''; ## make sure we have something
836
837 if (!defined $user) {
838 $user = 'anonymous';
839 $password = $ENV{'USER'} || 'WWWuser';
840 @password = ($password.'@'. &network'addr_to_ascii(&network'my_addr),
841 $password.'@');
842 } elsif (!defined $password) {
843 @password = ("");
844 }
845
846 local($_last_ftp_reply, $_passive_host, $_passive_port);
847 local($size);
848
849 sub _ftp_get_reply
850 {
851 local($text) = scalar(<FTP_CONTROL>);
852 die "lost connection to $host\n" if !defined $text;
853 local($_, $tmp);
854 print STDERR "READ: $text" if $ftp_debug;
855 die "internal error: expected reply code in response from ".
856 "ftp server [$text]" unless $text =~ s/^(\d+)([- ])//;
857 local($code) = $1;
858 if ($2 eq '-') {
859 while (<FTP_CONTROL>) {
860 ($tmp = $_) =~ s/^\d+[- ]//;
861 $text .= $tmp;
862 last if m/^$code /;
863 }
864 }
865 $text =~ s/^\d+ ?/<foo>/g;
866 ($code, $text);
867 }
868
869 sub _ftp_expect
870 {
871 local($code, $text) = &_ftp_get_reply;
872 $_last_ftp_reply = $text;
873 foreach $expect (@_) {
874 return ($code, $text) if $code == $expect;
875 }
876 die "internal error: expected return code ".
877 join('|',@_).", got [$text]";
878 }
879
880 sub _ftp_send
881 {
882 print STDERR "SEND: ", @_ if $ftp_debug;
883 print FTP_CONTROL @_;
884 }
885
886 sub _ftp_do_passive
887 {
888 local(@commands) = @_;
889
890 &_ftp_send("PASV\r\n");
891 local($code) = &_ftp_expect(227, 125);
892
893 if ($code == 227)
894 {
895 die "internal error: can't grok passive reply [$_last_ftp_reply]"
896 unless $_last_ftp_reply =~ m/\(([\d,]+)\)/;
897 local($a,$b,$c,$d, $p1, $p2) = split(/,/, $1);
898 ($_passive_host, $_passive_port) =
899 ("$a.$b.$c.$d", $p1*256 + $p2);
900 }
901
902 foreach(@commands) {
903 &_ftp_send($_);
904 }
905
906 local($error)=
907 &network'connect_to(*PASSIVE, $_passive_host, $_passive_port);
908 die "internal error: passive ftp connect [$error]" if $error;
909 }
910
911 ## make the connection to the host
912 &www'message($debug, "connecting to $host...") unless $quiet;
913
914 local($old_alarm);
915 if ($timeout) {
916 $old_alarm = $SIG{'ALRM'} || 'DEFAULT';
917 $SIG{'ALRM'} = "main'http_alarm"; ## can use this for now
918# $HaveAlarm = 0;
919 $AlarmNote = "host $host";
920 $AlarmNote .= ":$port" if $port != $www'default_port{'ftp'};
921 $AlarmNote .= " timed out after $timeout second";
922 $AlarmNote .= 's' if $timeout > 1;
923 alarm($timeout);
924 }
925
926 local($error) = &network'connect_to(*FTP_CONTROL, $host, $port);
927
928 if ($timeout) {
929 alarm(0);
930 $SIG{'ALRM'} = $old_alarm;
931 }
932
933 return $error if $error;
934
935 local ($code, $text) = &_ftp_get_reply(*FTP_CONTROL);
936 close(FTP_CONTROL), return "internal ftp error: [$text]" unless $code==220;
937
938 ## log in
939 &www'message($debug, "logging in as $user...") unless $quiet;
940 foreach $password (@password)
941 {
942 &_ftp_send("USER $user\r\n");
943 ($code, $text) = &_ftp_expect(230,331,530);
944 close(FTP_CONTROL), return $text if ($code == 530);
945 last if $code == 230; ## hey, already logged in, cool.
946
947 &_ftp_send("PASS $password\r\n");
948 ($code, $text) = &_ftp_expect(220,230,530,550,332);
949 last if $code != 550;
950 last if $text =~ m/can't change directory/;
951 }
952
953 if ($code == 550)
954 {
955 $text =~ s/\n+$//;
956 &www'message(1, "Can't log in $host: $text") unless $quiet;
957 exit($EXIT_error);
958 }
959
960 if ($code == 332)
961 {
962 &_ftp_send("ACCT noaccount\r\n");
963 ($code, $text) = &_ftp_expect(230, 202, 530, 500,501,503, 421)
964 }
965 close(FTP_CONTROL), return $text if $code >= 300;
966
967 &_ftp_send("TYPE I\r\n");
968 &_ftp_expect(200);
969
970 unless ($quiet) {
971 local($name) = $path;
972 $name =~ s,.*/([^/]),$1,;
973 &www'message($debug, "requesting $name...");
974 }
975 ## get file
976 &_ftp_do_passive("RETR $path\r\n");
977 ($code,$text) = &_ftp_expect(125, 150, 550, 530);
978 close(FTP_CONTROL), return $text if $code == 530;
979
980 if ($code == 550)
981 {
982 close(PASSIVE);
983 if ($text =~ /directory/i) {
984 ## probably from "no such file or directory", so just return now.
985 close(FTP_CONTROL);
986 return $text;
987 }
988
989 ## do like Mosaic and try getting a directory listing.
990 &_ftp_send("CWD $path\r\n");
991 ($code) = &_ftp_expect(250,550);
992 if ($code == 550) {
993 close(FTP_CONTROL);
994 return $text;
995 }
996 &_ftp_do_passive("LIST\r\n");
997 &_ftp_expect(125, 150);
998 }
999
1000 $size = $1 if $text =~ m/(\d+)\s+bytes/;
1001 binmode(PASSIVE); ## just in case.
1002 &www'message($debug, "waiting for data...") unless $quiet;
1003 &set_output_file($local_filename) if $nab;
1004 &general_read(*PASSIVE, $size);
1005 &close_output_file($local_filename) if $nab;
1006
1007 close(PASSIVE);
1008 close(FTP_CONTROL);
1009 undef;
1010}
1011
1012sub general_read
1013{
1014 local(*INPUT, $size) = @_;
1015 local($lastcount, $bytes) = (0,0);
1016 local($need_to_clear) = 0;
1017 local($start_time) = time;
1018 local($last_time, $time) = $start_time;
1019 ## Figure out how often to print the "bytes read" message
1020 local($delta2print) =
1021 (defined $size) ? int($size/50) : $defaultdelta2print;
1022
1023 &www'message(0, "read 0 bytes") unless $quiet;
1024
1025 ## so $! below is set only if a real error happens from now
1026 eval 'local($^W) = 0; undef $!';
1027
1028
1029 while (defined($_ = <INPUT>))
1030 {
1031 ## shove it out.
1032 &www'clear_message if $need_to_clear;
1033 print;
1034 print SAVEOUT if $nab==2;
1035
1036 ## if we know the content-size, keep track of what we're reading.
1037 $bytes += length;
1038
1039 last if eof || (defined $size && $bytes >= $size);
1040
1041 if (!$quiet && $bytes > ($lastcount + $delta2print))
1042 {
1043 if ($time = time, $last_time == $time) {
1044 $delta2print *= 1.5;
1045 } else {
1046 $last_time = $time;
1047 $lastcount = $bytes;
1048 local($time_delta) = $time - $start_time;
1049 local($text);
1050
1051 $delta2print /= $time_delta;
1052 if (defined $size) {
1053 $text = sprintf("read $bytes bytes (%.0f%%)",
1054 $bytes*100/$size);
1055 } else {
1056 $text = "read $bytes bytes";
1057 }
1058
1059 if ($time_delta > 5 || ($time_delta && $bytes > 10240))
1060 {
1061 local($rate) = int($bytes / $time_delta);
1062 if ($rate < 5000) {
1063 $text .= " ($rate bytes/sec)";
1064 } elsif ($rate < 1024 * 10) {
1065 $text .= sprintf(" (%.1f k/sec)", $rate/1024);
1066 } else {
1067 $text .= sprintf(" (%.0f k/sec)", $rate/1024);
1068 }
1069 }
1070 &www'message(0, "$text...");
1071 $need_to_clear = -t STDOUT;
1072 }
1073 }
1074 }
1075
1076 if (!$quiet)
1077 {
1078 if ($size && ($size != $bytes)) {
1079 &www'message("WARNING: Expected $size bytes, read $bytes bytes.\n");
1080 }
1081# if ($!) {
1082# print STDERR "\$! is [$!]\n";
1083# }
1084# if ($@) {
1085# print STDERR "\$\@ is [$@]\n";
1086# }
1087 }
1088 &www'clear_message($text) unless $quiet;
1089}
1090
1091sub dummy {
1092 1 || &dummy || &fetch_via_ftp || &fetch_via_http || &http_alarm;
1093 1 || close(OUT);
1094 1 || close(SAVEOUT);
1095}
1096
1097__END__
1098__END__
1099:endofperl