Sync up with Digest-MD5-2.38 from CPAN
[p5sagit/p5-mst-13.2.git] / lib / File / Fetch.pm
CommitLineData
79fd8837 1package File::Fetch;
2
3use strict;
4use FileHandle;
6e654618 5use File::Temp;
79fd8837 6use File::Copy;
7use File::Spec;
8use File::Spec::Unix;
79fd8837 9use File::Basename qw[dirname];
10
11use Cwd qw[cwd];
12use Carp qw[carp];
13use IPC::Cmd qw[can_run run];
14use File::Path qw[mkpath];
15use Params::Check qw[check];
16use Module::Load::Conditional qw[can_load];
17use Locale::Maketext::Simple Style => 'gettext';
18
19use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
20 $BLACKLIST $METHOD_FAIL $VERSION $METHODS
21 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
22 ];
23
d4b3706f 24use constant QUOTE => do { $^O eq 'MSWin32' ? q["] : q['] };
25
26
6e654618 27$VERSION = '0.16';
fe98d82b 28$VERSION = eval $VERSION; # avoid warnings with development releases
29$PREFER_BIN = 0; # XXX TODO implement
79fd8837 30$FROM_EMAIL = 'File-Fetch@example.com';
31$USER_AGENT = 'File::Fetch/$VERSION';
32$BLACKLIST = [qw|ftp|];
33$METHOD_FAIL = { };
34$FTP_PASSIVE = 1;
35$TIMEOUT = 0;
36$DEBUG = 0;
37$WARN = 1;
38
39### methods available to fetch the file depending on the scheme
40$METHODS = {
6e654618 41 http => [ qw|lwp wget curl lftp lynx| ],
42 ftp => [ qw|lwp netftp wget curl lftp ncftp ftp| ],
43 file => [ qw|lwp lftp file| ],
79fd8837 44 rsync => [ qw|rsync| ]
45};
46
47### silly warnings ###
48local $Params::Check::VERBOSE = 1;
49local $Params::Check::VERBOSE = 1;
50local $Module::Load::Conditional::VERBOSE = 0;
51local $Module::Load::Conditional::VERBOSE = 0;
52
53### see what OS we are on, important for file:// uris ###
9e5ea595 54use constant ON_WIN => ($^O eq 'MSWin32');
55use constant ON_VMS => ($^O eq 'VMS');
1f80753b 56use constant ON_UNIX => (!ON_WIN);
57use constant HAS_VOL => (ON_WIN);
fe98d82b 58use constant HAS_SHARE => (ON_WIN);
79fd8837 59=pod
60
61=head1 NAME
62
63File::Fetch - A generic file fetching mechanism
64
65=head1 SYNOPSIS
66
67 use File::Fetch;
68
69 ### build a File::Fetch object ###
70 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
71
72 ### fetch the uri to cwd() ###
73 my $where = $ff->fetch() or die $ff->error;
74
75 ### fetch the uri to /tmp ###
76 my $where = $ff->fetch( to => '/tmp' );
77
78 ### parsed bits from the uri ###
79 $ff->uri;
80 $ff->scheme;
81 $ff->host;
82 $ff->path;
83 $ff->file;
84
85=head1 DESCRIPTION
86
87File::Fetch is a generic file fetching mechanism.
88
89It allows you to fetch any file pointed to by a C<ftp>, C<http>,
90C<file>, or C<rsync> uri by a number of different means.
91
92See the C<HOW IT WORKS> section further down for details.
93
d4b3706f 94=head1 ACCESSORS
95
96A C<File::Fetch> object has the following accessors
97
98=over 4
99
100=item $ff->uri
101
102The uri you passed to the constructor
103
104=item $ff->scheme
105
106The scheme from the uri (like 'file', 'http', etc)
107
108=item $ff->host
109
fe98d82b 110The hostname in the uri. Will be empty if host was originally
111'localhost' for a 'file://' url.
112
113=item $ff->vol
114
115On operating systems with the concept of a volume the second element
116of a file:// is considered to the be volume specification for the file.
1f80753b 117Thus on Win32 this routine returns the volume, on other operating
118systems this returns nothing.
fe98d82b 119
120On Windows this value may be empty if the uri is to a network share, in
121which case the 'share' property will be defined. Additionally, volume
122specifications that use '|' as ':' will be converted on read to use ':'.
123
1f80753b 124On VMS, which has a volume concept, this field will be empty because VMS
125file specifications are converted to absolute UNIX format and the volume
126information is transparently included.
127
fe98d82b 128=item $ff->share
129
130On systems with the concept of a network share (currently only Windows) returns
131the sharename from a file://// url. On other operating systems returns empty.
d4b3706f 132
133=item $ff->path
134
135The path from the uri, will be at least a single '/'.
136
137=item $ff->file
138
139The name of the remote file. For the local file name, the
140result of $ff->output_file will be used.
141
142=cut
143
144
145##########################
146### Object & Accessors ###
147##########################
148
149{
150 ### template for new() and autogenerated accessors ###
151 my $Tmpl = {
152 scheme => { default => 'http' },
153 host => { default => 'localhost' },
154 path => { default => '/' },
155 file => { required => 1 },
156 uri => { required => 1 },
1f80753b 157 vol => { default => '' }, # windows for file:// uris
fe98d82b 158 share => { default => '' }, # windows for file:// uris
d4b3706f 159 _error_msg => { no_override => 1 },
160 _error_msg_long => { no_override => 1 },
161 };
162
163 for my $method ( keys %$Tmpl ) {
164 no strict 'refs';
165 *$method = sub {
166 my $self = shift;
167 $self->{$method} = $_[0] if @_;
168 return $self->{$method};
169 }
170 }
171
172 sub _create {
173 my $class = shift;
174 my %hash = @_;
175
176 my $args = check( $Tmpl, \%hash ) or return;
177
178 bless $args, $class;
179
180 if( lc($args->scheme) ne 'file' and not $args->host ) {
181 return File::Fetch->_error(loc(
182 "Hostname required when fetching from '%1'",$args->scheme));
183 }
184
185 for (qw[path file]) {
9e5ea595 186 unless( $args->$_() ) { # 5.5.x needs the ()
d4b3706f 187 return File::Fetch->_error(loc("No '%1' specified",$_));
188 }
189 }
190
191 return $args;
192 }
193}
194
195=item $ff->output_file
196
197The name of the output file. This is the same as $ff->file,
198but any query parameters are stripped off. For example:
199
200 http://example.com/index.html?x=y
201
202would make the output file be C<index.html> rather than
203C<index.html?x=y>.
204
205=back
206
207=cut
208
209sub output_file {
210 my $self = shift;
211 my $file = $self->file;
212
213 $file =~ s/\?.*$//g;
214
215 return $file;
216}
217
218### XXX do this or just point to URI::Escape?
219# =head2 $esc_uri = $ff->escaped_uri
220#
221# =cut
222#
223# ### most of this is stolen straight from URI::escape
224# { ### Build a char->hex map
225# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
226#
227# sub escaped_uri {
228# my $self = shift;
229# my $uri = $self->uri;
230#
231# ### Default unsafe characters. RFC 2732 ^(uric - reserved)
232# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
233# $escapes{$1} || $self->_fail_hi($1)/ge;
234#
235# return $uri;
236# }
237#
238# sub _fail_hi {
239# my $self = shift;
240# my $char = shift;
241#
242# $self->_error(loc(
243# "Can't escape '%1', try using the '%2' module instead",
244# sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
245# ));
246# }
247#
248# sub output_file {
249#
250# }
251#
252#
253# }
254
79fd8837 255=head1 METHODS
256
257=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
258
259Parses the uri and creates a corresponding File::Fetch::Item object,
260that is ready to be C<fetch>ed and returns it.
261
262Returns false on failure.
263
264=cut
265
266sub new {
267 my $class = shift;
268 my %hash = @_;
269
270 my ($uri);
271 my $tmpl = {
272 uri => { required => 1, store => \$uri },
273 };
274
275 check( $tmpl, \%hash ) or return;
276
277 ### parse the uri to usable parts ###
278 my $href = __PACKAGE__->_parse_uri( $uri ) or return;
279
280 ### make it into a FFI object ###
d4b3706f 281 my $ff = File::Fetch->_create( %$href ) or return;
79fd8837 282
283
284 ### return the object ###
d4b3706f 285 return $ff;
79fd8837 286}
287
288### parses an uri to a hash structure:
289###
290### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
291###
292### becomes:
293###
294### $href = {
295### scheme => 'ftp',
296### host => 'ftp.cpan.org',
297### path => '/pub/mirror',
298### file => 'index.html'
299### };
300###
9e5ea595 301### In the case of file:// urls there maybe be additional fields
302###
5e6d05d2 303### For systems with volume specifications such as Win32 there will be
fe98d82b 304### a volume specifier provided in the 'vol' field.
305###
306### 'vol' => 'volumename'
307###
9e5ea595 308### For windows file shares there may be a 'share' key specified
309###
310### 'share' => 'sharename'
311###
fe98d82b 312### Note that the rules of what a file:// url means vary by the operating system
313### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
314### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
315### not '/foo/bar.txt'
9e5ea595 316###
fe98d82b 317### Similarly if the host interpreting the url is VMS then
318### file:///disk$user/my/notes/note12345.txt' means
1f80753b 319### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
320### if it is unix where it means /disk$user/my/notes/note12345.txt'.
321### Except for some cases in the File::Spec methods, Perl on VMS will generally
322### handle UNIX format file specifications.
fe98d82b 323###
324### This means it is impossible to serve certain file:// urls on certain systems.
325###
326### Thus are the problems with a protocol-less specification. :-(
9e5ea595 327###
328
79fd8837 329sub _parse_uri {
330 my $self = shift;
331 my $uri = shift or return;
332
333 my $href = { uri => $uri };
334
335 ### find the scheme ###
336 $uri =~ s|^(\w+)://||;
337 $href->{scheme} = $1;
338
9e5ea595 339 ### See rfc 1738 section 3.10
340 ### http://www.faqs.org/rfcs/rfc1738.html
341 ### And wikipedia for more on windows file:// urls
342 ### http://en.wikipedia.org/wiki/File://
79fd8837 343 if( $href->{scheme} eq 'file' ) {
9e5ea595 344
345 my @parts = split '/',$uri;
346
347 ### file://hostname/...
348 ### file://hostname/...
fe98d82b 349 ### normalize file://localhost with file:///
9e5ea595 350 $href->{host} = $parts[0] || '';
351
352 ### index in @parts where the path components begin;
353 my $index = 1;
9e5ea595 354
355 ### file:////hostname/sharename/blah.txt
fe98d82b 356 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
357
9e5ea595 358 $href->{host} = $parts[2] || ''; # avoid warnings
359 $href->{share} = $parts[3] || ''; # avoid warnings
360
361 $index = 4 # index after the share
9e5ea595 362
fe98d82b 363 ### file:///D|/blah.txt
364 ### file:///D:/blah.txt
fe98d82b 365 } elsif (HAS_VOL) {
366
367 ### this code comes from dmq's patch, but:
368 ### XXX if volume is empty, wouldn't that be an error? --kane
369 ### if so, our file://localhost test needs to be fixed as wel
370 $href->{vol} = $parts[1] || '';
371
372 ### correct D| style colume descriptors
373 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
374
375 $index = 2; # index after the volume
376 }
377
378 ### rebuild the path from the leftover parts;
9e5ea595 379 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
79fd8837 380
381 } else {
9e5ea595 382 ### using anything but qw() in hash slices may produce warnings
383 ### in older perls :-(
384 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
79fd8837 385 }
386
387 ### split the path into file + dir ###
388 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
389 $href->{path} = $parts[1];
390 $href->{file} = $parts[2];
391 }
392
fe98d82b 393 ### host will be empty if the target was 'localhost' and the
394 ### scheme was 'file'
395 $href->{host} = '' if ($href->{host} eq 'localhost') and
396 ($href->{scheme} eq 'file');
79fd8837 397
398 return $href;
399}
400
401=head2 $ff->fetch( [to => /my/output/dir/] )
402
403Fetches the file you requested. By default it writes to C<cwd()>,
404but you can override that by specifying the C<to> argument.
405
406Returns the full path to the downloaded file on success, and false
407on failure.
408
409=cut
410
411sub fetch {
412 my $self = shift or return;
413 my %hash = @_;
414
415 my $to;
416 my $tmpl = {
417 to => { default => cwd(), store => \$to },
418 };
419
420 check( $tmpl, \%hash ) or return;
421
5e6d05d2 422 ### On VMS force to VMS format so File::Spec will work.
1f80753b 423 $to = VMS::Filespec::vmspath($to) if ON_VMS;
424
79fd8837 425 ### create the path if it doesn't exist yet ###
426 unless( -d $to ) {
427 eval { mkpath( $to ) };
428
429 return $self->_error(loc("Could not create path '%1'",$to)) if $@;
430 }
431
432 ### set passive ftp if required ###
433 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
434
fe98d82b 435 ### we dont use catfile on win32 because if we are using a cygwin tool
436 ### under cmd.exe they wont understand windows style separators.
437 my $out_to = ON_WIN ? $to.'/'.$self->output_file
438 : File::Spec->catfile( $to, $self->output_file );
439
79fd8837 440 for my $method ( @{ $METHODS->{$self->scheme} } ) {
441 my $sub = '_'.$method.'_fetch';
442
443 unless( __PACKAGE__->can($sub) ) {
444 $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
445 $method));
446 next;
447 }
448
449 ### method is blacklisted ###
450 next if grep { lc $_ eq $method } @$BLACKLIST;
451
452 ### method is known to fail ###
453 next if $METHOD_FAIL->{$method};
454
d4b3706f 455 ### there's serious issues with IPC::Run and quoting of command
456 ### line arguments. using quotes in the wrong place breaks things,
457 ### and in the case of say,
458 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
459 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
460 ### it doesn't matter how you quote, it always fails.
461 local $IPC::Cmd::USE_IPC_RUN = 0;
462
463 if( my $file = $self->$sub(
a0ad4830 464 to => $out_to
d4b3706f 465 )){
79fd8837 466
467 unless( -e $file && -s _ ) {
468 $self->_error(loc("'%1' said it fetched '%2', ".
469 "but it was not created",$method,$file));
470
471 ### mark the failure ###
472 $METHOD_FAIL->{$method} = 1;
473
474 next;
475
476 } else {
477
478 my $abs = File::Spec->rel2abs( $file );
479 return $abs;
480 }
481 }
482 }
483
484
485 ### if we got here, we looped over all methods, but we weren't able
486 ### to fetch it.
487 return;
488}
489
79fd8837 490########################
491### _*_fetch methods ###
492########################
493
494### LWP fetching ###
495sub _lwp_fetch {
496 my $self = shift;
497 my %hash = @_;
498
499 my ($to);
500 my $tmpl = {
501 to => { required => 1, store => \$to }
502 };
503 check( $tmpl, \%hash ) or return;
504
505 ### modules required to download with lwp ###
506 my $use_list = {
507 LWP => '0.0',
508 'LWP::UserAgent' => '0.0',
509 'HTTP::Request' => '0.0',
510 'HTTP::Status' => '0.0',
511 URI => '0.0',
512
513 };
514
515 if( can_load(modules => $use_list) ) {
516
517 ### setup the uri object
518 my $uri = URI->new( File::Spec::Unix->catfile(
519 $self->path, $self->file
520 ) );
521
522 ### special rules apply for file:// uris ###
523 $uri->scheme( $self->scheme );
524 $uri->host( $self->scheme eq 'file' ? '' : $self->host );
525 $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
526
527 ### set up the useragent object
528 my $ua = LWP::UserAgent->new();
529 $ua->timeout( $TIMEOUT ) if $TIMEOUT;
530 $ua->agent( $USER_AGENT );
531 $ua->from( $FROM_EMAIL );
532 $ua->env_proxy;
533
534 my $res = $ua->mirror($uri, $to) or return;
535
536 ### uptodate or fetched ok ###
537 if ( $res->code == 304 or $res->code == 200 ) {
538 return $to;
539
540 } else {
541 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
542 $res->code, HTTP::Status::status_message($res->code),
543 $res->status_line));
544 }
545
546 } else {
547 $METHOD_FAIL->{'lwp'} = 1;
548 return;
549 }
550}
551
552### Net::FTP fetching
553sub _netftp_fetch {
554 my $self = shift;
555 my %hash = @_;
556
557 my ($to);
558 my $tmpl = {
559 to => { required => 1, store => \$to }
560 };
561 check( $tmpl, \%hash ) or return;
562
563 ### required modules ###
564 my $use_list = { 'Net::FTP' => 0 };
565
566 if( can_load( modules => $use_list ) ) {
567
568 ### make connection ###
569 my $ftp;
570 my @options = ($self->host);
571 push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
572 unless( $ftp = Net::FTP->new( @options ) ) {
573 return $self->_error(loc("Ftp creation failed: %1",$@));
574 }
575
576 ### login ###
577 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
578 return $self->_error(loc("Could not login to '%1'",$self->host));
579 }
580
581 ### set binary mode, just in case ###
582 $ftp->binary;
583
584 ### create the remote path
585 ### remember remote paths are unix paths! [#11483]
586 my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
587
588 ### fetch the file ###
589 my $target;
590 unless( $target = $ftp->get( $remote, $to ) ) {
591 return $self->_error(loc("Could not fetch '%1' from '%2'",
592 $remote, $self->host));
593 }
594
595 ### log out ###
596 $ftp->quit;
597
598 return $target;
599
600 } else {
601 $METHOD_FAIL->{'netftp'} = 1;
602 return;
603 }
604}
605
606### /bin/wget fetch ###
607sub _wget_fetch {
608 my $self = shift;
609 my %hash = @_;
610
611 my ($to);
612 my $tmpl = {
613 to => { required => 1, store => \$to }
614 };
615 check( $tmpl, \%hash ) or return;
616
617 ### see if we have a wget binary ###
618 if( my $wget = can_run('wget') ) {
619
620 ### no verboseness, thanks ###
621 my $cmd = [ $wget, '--quiet' ];
622
623 ### if a timeout is set, add it ###
624 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
625
626 ### run passive if specified ###
627 push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
628
629 ### set the output document, add the uri ###
6e654618 630 push @$cmd, '--output-document', $to, $self->uri;
631
632 ### with IPC::Cmd > 0.41, this is fixed in teh library,
633 ### and there's no need for special casing any more.
634 ### DO NOT quote things for IPC::Run, it breaks stuff.
635 # $IPC::Cmd::USE_IPC_RUN
636 # ? ($to, $self->uri)
637 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837 638
639 ### shell out ###
640 my $captured;
d4b3706f 641 unless(run( command => $cmd,
642 buffer => \$captured,
643 verbose => $DEBUG
644 )) {
79fd8837 645 ### wget creates the output document always, even if the fetch
646 ### fails.. so unlink it in that case
647 1 while unlink $to;
648
649 return $self->_error(loc( "Command failed: %1", $captured || '' ));
650 }
651
652 return $to;
653
654 } else {
655 $METHOD_FAIL->{'wget'} = 1;
656 return;
657 }
658}
659
6e654618 660### /bin/lftp fetch ###
661sub _lftp_fetch {
662 my $self = shift;
663 my %hash = @_;
664
665 my ($to);
666 my $tmpl = {
667 to => { required => 1, store => \$to }
668 };
669 check( $tmpl, \%hash ) or return;
670
671 ### see if we have a wget binary ###
672 if( my $lftp = can_run('lftp') ) {
673
674 ### no verboseness, thanks ###
675 my $cmd = [ $lftp, '-f' ];
676
677 my $fh = File::Temp->new;
678
679 my $str;
680
681 ### if a timeout is set, add it ###
682 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
683
684 ### run passive if specified ###
685 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
686
687 ### set the output document, add the uri ###
688 ### quote the URI, because lftp supports certain shell
689 ### expansions, most notably & for backgrounding.
690 ### ' quote does nto work, must be "
691 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
692
693 if( $DEBUG ) {
694 my $pp_str = join ' ', split $/, $str;
695 print "# lftp command: $pp_str\n";
696 }
697
698 ### write straight to the file.
699 $fh->autoflush(1);
700 print $fh $str;
701
702 ### the command needs to be 1 string to be executed
703 push @$cmd, $fh->filename;
704
705 ### with IPC::Cmd > 0.41, this is fixed in teh library,
706 ### and there's no need for special casing any more.
707 ### DO NOT quote things for IPC::Run, it breaks stuff.
708 # $IPC::Cmd::USE_IPC_RUN
709 # ? ($to, $self->uri)
710 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
711
712
713 ### shell out ###
714 my $captured;
715 unless(run( command => $cmd,
716 buffer => \$captured,
717 verbose => $DEBUG
718 )) {
719 ### wget creates the output document always, even if the fetch
720 ### fails.. so unlink it in that case
721 1 while unlink $to;
722
723 return $self->_error(loc( "Command failed: %1", $captured || '' ));
724 }
725
726 return $to;
727
728 } else {
729 $METHOD_FAIL->{'lftp'} = 1;
730 return;
731 }
732}
733
734
79fd8837 735
736### /bin/ftp fetch ###
737sub _ftp_fetch {
738 my $self = shift;
739 my %hash = @_;
740
741 my ($to);
742 my $tmpl = {
743 to => { required => 1, store => \$to }
744 };
745 check( $tmpl, \%hash ) or return;
746
d4b3706f 747 ### see if we have a ftp binary ###
79fd8837 748 if( my $ftp = can_run('ftp') ) {
749
750 my $fh = FileHandle->new;
751
752 local $SIG{CHLD} = 'IGNORE';
753
754 unless ($fh->open("|$ftp -n")) {
755 return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
756 }
757
758 my @dialog = (
759 "lcd " . dirname($to),
760 "open " . $self->host,
761 "user anonymous $FROM_EMAIL",
762 "cd /",
763 "cd " . $self->path,
764 "binary",
d4b3706f 765 "get " . $self->file . " " . $self->output_file,
79fd8837 766 "quit",
767 );
768
769 foreach (@dialog) { $fh->print($_, "\n") }
770 $fh->close or return;
771
772 return $to;
773 }
774}
775
776### lynx is stupid - it decompresses any .gz file it finds to be text
777### use /bin/lynx to fetch files
778sub _lynx_fetch {
779 my $self = shift;
780 my %hash = @_;
781
782 my ($to);
783 my $tmpl = {
784 to => { required => 1, store => \$to }
785 };
786 check( $tmpl, \%hash ) or return;
787
d4b3706f 788 ### see if we have a lynx binary ###
79fd8837 789 if( my $lynx = can_run('lynx') ) {
790
d4b3706f 791 unless( IPC::Cmd->can_capture_buffer ) {
792 $METHOD_FAIL->{'lynx'} = 1;
793
794 return $self->_error(loc(
795 "Can not capture buffers. Can not use '%1' to fetch files",
796 'lynx' ));
797 }
79fd8837 798
6e654618 799 ### check if the HTTP resource exists ###
800 if ($self->uri =~ /^https?:\/\//i) {
801 my $cmd = [
802 $lynx,
803 '-head',
804 '-source',
805 "-auth=anonymous:$FROM_EMAIL",
806 ];
807
808 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
809
810 push @$cmd, $self->uri;
811
812 ### shell out ###
813 my $head;
814 unless(run( command => $cmd,
815 buffer => \$head,
816 verbose => $DEBUG )
817 ) {
818 return $self->_error(loc("Command failed: %1", $head || ''));
819 }
820
821 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
822 return $self->_error(loc("Command failed: %1", $head || ''));
823 }
824 }
825
79fd8837 826 ### write to the output file ourselves, since lynx ass_u_mes to much
827 my $local = FileHandle->new(">$to")
828 or return $self->_error(loc(
829 "Could not open '%1' for writing: %2",$to,$!));
830
831 ### dump to stdout ###
832 my $cmd = [
833 $lynx,
834 '-source',
835 "-auth=anonymous:$FROM_EMAIL",
836 ];
837
838 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
839
d4b3706f 840 ### DO NOT quote things for IPC::Run, it breaks stuff.
6e654618 841 push @$cmd, $self->uri;
842
843 ### with IPC::Cmd > 0.41, this is fixed in teh library,
844 ### and there's no need for special casing any more.
845 ### DO NOT quote things for IPC::Run, it breaks stuff.
846 # $IPC::Cmd::USE_IPC_RUN
847 # ? $self->uri
848 # : QUOTE. $self->uri .QUOTE;
d4b3706f 849
79fd8837 850
851 ### shell out ###
852 my $captured;
853 unless(run( command => $cmd,
854 buffer => \$captured,
855 verbose => $DEBUG )
856 ) {
857 return $self->_error(loc("Command failed: %1", $captured || ''));
858 }
859
860 ### print to local file ###
861 ### XXX on a 404 with a special error page, $captured will actually
862 ### hold the contents of that page, and make it *appear* like the
863 ### request was a success, when really it wasn't :(
864 ### there doesn't seem to be an option for lynx to change the exit
865 ### code based on a 4XX status or so.
866 ### the closest we can come is using --error_file and parsing that,
867 ### which is very unreliable ;(
868 $local->print( $captured );
869 $local->close or return;
870
871 return $to;
872
873 } else {
874 $METHOD_FAIL->{'lynx'} = 1;
875 return;
876 }
877}
878
879### use /bin/ncftp to fetch files
880sub _ncftp_fetch {
881 my $self = shift;
882 my %hash = @_;
883
884 my ($to);
885 my $tmpl = {
886 to => { required => 1, store => \$to }
887 };
888 check( $tmpl, \%hash ) or return;
889
890 ### we can only set passive mode in interactive sesssions, so bail out
891 ### if $FTP_PASSIVE is set
892 return if $FTP_PASSIVE;
893
d4b3706f 894 ### see if we have a ncftp binary ###
79fd8837 895 if( my $ncftp = can_run('ncftp') ) {
896
897 my $cmd = [
898 $ncftp,
899 '-V', # do not be verbose
900 '-p', $FROM_EMAIL, # email as password
901 $self->host, # hostname
902 dirname($to), # local dir for the file
903 # remote path to the file
d4b3706f 904 ### DO NOT quote things for IPC::Run, it breaks stuff.
905 $IPC::Cmd::USE_IPC_RUN
906 ? File::Spec::Unix->catdir( $self->path, $self->file )
907 : QUOTE. File::Spec::Unix->catdir(
908 $self->path, $self->file ) .QUOTE
909
79fd8837 910 ];
911
912 ### shell out ###
913 my $captured;
914 unless(run( command => $cmd,
915 buffer => \$captured,
916 verbose => $DEBUG )
917 ) {
918 return $self->_error(loc("Command failed: %1", $captured || ''));
919 }
920
921 return $to;
922
923 } else {
924 $METHOD_FAIL->{'ncftp'} = 1;
925 return;
926 }
927}
928
929### use /bin/curl to fetch files
930sub _curl_fetch {
931 my $self = shift;
932 my %hash = @_;
933
934 my ($to);
935 my $tmpl = {
936 to => { required => 1, store => \$to }
937 };
938 check( $tmpl, \%hash ) or return;
939
940 if (my $curl = can_run('curl')) {
941
942 ### these long opts are self explanatory - I like that -jmb
6e654618 943 my $cmd = [ $curl, '-q' ];
79fd8837 944
945 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
946
947 push(@$cmd, '--silent') unless $DEBUG;
948
949 ### curl does the right thing with passive, regardless ###
950 if ($self->scheme eq 'ftp') {
951 push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
952 }
953
954 ### curl doesn't follow 302 (temporarily moved) etc automatically
955 ### so we add --location to enable that.
6e654618 956 push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
957
958 ### with IPC::Cmd > 0.41, this is fixed in teh library,
959 ### and there's no need for special casing any more.
960 ### DO NOT quote things for IPC::Run, it breaks stuff.
961 # $IPC::Cmd::USE_IPC_RUN
962 # ? ($to, $self->uri)
963 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
964
79fd8837 965
966 my $captured;
967 unless(run( command => $cmd,
968 buffer => \$captured,
969 verbose => $DEBUG )
970 ) {
971
972 return $self->_error(loc("Command failed: %1", $captured || ''));
973 }
974
975 return $to;
976
977 } else {
978 $METHOD_FAIL->{'curl'} = 1;
979 return;
980 }
981}
982
983
984### use File::Copy for fetching file:// urls ###
9e5ea595 985###
986### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
987### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
fe98d82b 988###
9e5ea595 989
79fd8837 990sub _file_fetch {
991 my $self = shift;
992 my %hash = @_;
993
994 my ($to);
995 my $tmpl = {
996 to => { required => 1, store => \$to }
997 };
998 check( $tmpl, \%hash ) or return;
999
9e5ea595 1000
1001
79fd8837 1002 ### prefix a / on unix systems with a file uri, since it would
1003 ### look somewhat like this:
9e5ea595 1004 ### file:///home/kane/file
1005 ### wheras windows file uris for 'c:\some\dir\file' might look like:
1006 ### file:///C:/some/dir/file
1007 ### file:///C|/some/dir/file
1008 ### or for a network share '\\host\share\some\dir\file':
1009 ### file:////host/share/some/dir/file
1010 ###
1011 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1012 ### file://vms.host.edu/disk$user/my/notes/note12345.txt
1013 ###
1014
1015 my $path = $self->path;
1016 my $vol = $self->vol;
1017 my $share = $self->share;
1018
1019 my $remote;
1020 if (!$share and $self->host) {
1021 return $self->_error(loc(
1022 "Currently %1 cannot handle hosts in %2 urls",
1023 'File::Fetch', 'file://'
1024 ));
1025 }
1026
1027 if( $vol ) {
1028 $path = File::Spec->catdir( split /\//, $path );
1029 $remote = File::Spec->catpath( $vol, $path, $self->file);
79fd8837 1030
9e5ea595 1031 } elsif( $share ) {
1032 ### win32 specific, and a share name, so we wont bother with File::Spec
1033 $path =~ s|/+|\\|g;
1034 $remote = "\\\\".$self->host."\\$share\\$path";
1035
1036 } else {
5e6d05d2 1037 ### File::Spec on VMS can not currently handle UNIX syntax.
1038 my $file_class = ON_VMS
1039 ? 'File::Spec::Unix'
1040 : 'File::Spec';
1041
1042 $remote = $file_class->catfile( $path, $self->file );
9e5ea595 1043 }
79fd8837 1044
1045 ### File::Copy is littered with 'die' statements :( ###
1046 my $rv = eval { File::Copy::copy( $remote, $to ) };
1047
1048 ### something went wrong ###
1049 if( !$rv or $@ ) {
1050 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1051 $remote, $to, $!, $@));
1052 }
1053
1054 return $to;
1055}
1056
1057### use /usr/bin/rsync to fetch files
1058sub _rsync_fetch {
1059 my $self = shift;
1060 my %hash = @_;
1061
1062 my ($to);
1063 my $tmpl = {
1064 to => { required => 1, store => \$to }
1065 };
1066 check( $tmpl, \%hash ) or return;
1067
1068 if (my $rsync = can_run('rsync')) {
1069
1070 my $cmd = [ $rsync ];
1071
1072 ### XXX: rsync has no I/O timeouts at all, by default
1073 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1074
1075 push(@$cmd, '--quiet') unless $DEBUG;
1076
d4b3706f 1077 ### DO NOT quote things for IPC::Run, it breaks stuff.
6e654618 1078 push @$cmd, $self->uri, $to;
1079
1080 ### with IPC::Cmd > 0.41, this is fixed in teh library,
1081 ### and there's no need for special casing any more.
1082 ### DO NOT quote things for IPC::Run, it breaks stuff.
1083 # $IPC::Cmd::USE_IPC_RUN
1084 # ? ($to, $self->uri)
1085 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837 1086
1087 my $captured;
1088 unless(run( command => $cmd,
1089 buffer => \$captured,
1090 verbose => $DEBUG )
1091 ) {
1092
fe98d82b 1093 return $self->_error(loc("Command %1 failed: %2",
1094 "@$cmd" || '', $captured || ''));
79fd8837 1095 }
1096
1097 return $to;
1098
1099 } else {
1100 $METHOD_FAIL->{'rsync'} = 1;
1101 return;
1102 }
1103}
1104
1105#################################
1106#
1107# Error code
1108#
1109#################################
1110
1111=pod
1112
1113=head2 $ff->error([BOOL])
1114
1115Returns the last encountered error as string.
1116Pass it a true value to get the C<Carp::longmess()> output instead.
1117
1118=cut
1119
d4b3706f 1120### error handling the way Archive::Extract does it
1121sub _error {
1122 my $self = shift;
1123 my $error = shift;
1124
1125 $self->_error_msg( $error );
1126 $self->_error_msg_long( Carp::longmess($error) );
1127
1128 if( $WARN ) {
1129 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
79fd8837 1130 }
1131
d4b3706f 1132 return;
79fd8837 1133}
1134
d4b3706f 1135sub error {
1136 my $self = shift;
1137 return shift() ? $self->_error_msg_long : $self->_error_msg;
1138}
79fd8837 1139
1140
11411;
1142
1143=pod
1144
1145=head1 HOW IT WORKS
1146
1147File::Fetch is able to fetch a variety of uris, by using several
1148external programs and modules.
1149
1150Below is a mapping of what utilities will be used in what order
1151for what schemes, if available:
1152
6e654618 1153 file => LWP, lftp, file
1154 http => LWP, wget, curl, lftp, lynx
1155 ftp => LWP, Net::FTP, wget, curl, lftp, ncftp, ftp
79fd8837 1156 rsync => rsync
1157
1158If you'd like to disable the use of one or more of these utilities
1159and/or modules, see the C<$BLACKLIST> variable further down.
1160
1161If a utility or module isn't available, it will be marked in a cache
1162(see the C<$METHOD_FAIL> variable further down), so it will not be
1163tried again. The C<fetch> method will only fail when all options are
1164exhausted, and it was not able to retrieve the file.
1165
1166A special note about fetching files from an ftp uri:
1167
1168By default, all ftp connections are done in passive mode. To change
1169that, see the C<$FTP_PASSIVE> variable further down.
1170
1171Furthermore, ftp uris only support anonymous connections, so no
1172named user/password pair can be passed along.
1173
1174C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1175further down.
1176
1177=head1 GLOBAL VARIABLES
1178
1179The behaviour of File::Fetch can be altered by changing the following
1180global variables:
1181
1182=head2 $File::Fetch::FROM_EMAIL
1183
1184This is the email address that will be sent as your anonymous ftp
1185password.
1186
1187Default is C<File-Fetch@example.com>.
1188
1189=head2 $File::Fetch::USER_AGENT
1190
1191This is the useragent as C<LWP> will report it.
1192
1193Default is C<File::Fetch/$VERSION>.
1194
1195=head2 $File::Fetch::FTP_PASSIVE
1196
1197This variable controls whether the environment variable C<FTP_PASSIVE>
1198and any passive switches to commandline tools will be set to true.
1199
1200Default value is 1.
1201
1202Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1203files, since passive mode can only be set interactively for this binary
1204
1205=head2 $File::Fetch::TIMEOUT
1206
1207When set, controls the network timeout (counted in seconds).
1208
1209Default value is 0.
1210
1211=head2 $File::Fetch::WARN
1212
1213This variable controls whether errors encountered internally by
1214C<File::Fetch> should be C<carp>'d or not.
1215
1216Set to false to silence warnings. Inspect the output of the C<error()>
1217method manually to see what went wrong.
1218
1219Defaults to C<true>.
1220
1221=head2 $File::Fetch::DEBUG
1222
1223This enables debugging output when calling commandline utilities to
1224fetch files.
1225This also enables C<Carp::longmess> errors, instead of the regular
1226C<carp> errors.
1227
1228Good for tracking down why things don't work with your particular
1229setup.
1230
1231Default is 0.
1232
1233=head2 $File::Fetch::BLACKLIST
1234
1235This is an array ref holding blacklisted modules/utilities for fetching
1236files with.
1237
1238To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1239set $File::Fetch::BLACKLIST to:
1240
1241 $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1242
1243The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1244
1245See the note on C<MAPPING> below.
1246
1247=head2 $File::Fetch::METHOD_FAIL
1248
1249This is a hashref registering what modules/utilities were known to fail
1250for fetching files (mostly because they weren't installed).
1251
1252You can reset this cache by assigning an empty hashref to it, or
1253individually remove keys.
1254
1255See the note on C<MAPPING> below.
1256
1257=head1 MAPPING
1258
1259
1260Here's a quick mapping for the utilities/modules, and their names for
1261the $BLACKLIST, $METHOD_FAIL and other internal functions.
1262
1263 LWP => lwp
1264 Net::FTP => netftp
1265 wget => wget
1266 lynx => lynx
1267 ncftp => ncftp
1268 ftp => ftp
1269 curl => curl
1270 rsync => rsync
6e654618 1271 lftp => lftp
79fd8837 1272
1273=head1 FREQUENTLY ASKED QUESTIONS
1274
1275=head2 So how do I use a proxy with File::Fetch?
1276
1277C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1278You will need to set your environment variables accordingly. For
1279example, to use an ftp proxy:
1280
1281 $ENV{ftp_proxy} = 'foo.com';
1282
1283Refer to the LWP::UserAgent manpage for more details.
1284
1285=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1286
1287C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1288which we in turn capture. If that content is a 'custom' error file
1289(like, say, a C<404 handler>), you will get that contents instead.
1290
1291Sadly, C<lynx> doesn't support any options to return a different exit
1292code on non-C<200 OK> status, giving us no way to tell the difference
1293between a 'successfull' fetch and a custom error page.
1294
1295Therefor, we recommend to only use C<lynx> as a last resort. This is
1296why it is at the back of our list of methods to try as well.
1297
d4b3706f 1298=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1299
1300C<File::Fetch> is relatively smart about things. When trying to write
1301a file to disk, it removes the C<query parameters> (see the
1302C<output_file> method for details) from the file name before creating
1303it. In most cases this suffices.
1304
1305If you have any other characters you need to escape, please install
1306the C<URI::Escape> module from CPAN, and pre-encode your URI before
1307passing it to C<File::Fetch>. You can read about the details of URIs
1308and URI encoding here:
1309
1310 http://www.faqs.org/rfcs/rfc2396.html
1311
79fd8837 1312=head1 TODO
1313
1314=over 4
1315
1316=item Implement $PREFER_BIN
1317
1318To indicate to rather use commandline tools than modules
1319
a0ad4830 1320=back
1321
1322=head1 BUG REPORTS
1323
1324Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1325
1326=head1 AUTHOR
79fd8837 1327
d4b3706f 1328This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
79fd8837 1329
1330=head1 COPYRIGHT
1331
a0ad4830 1332This library is free software; you may redistribute and/or modify it
1333under the same terms as Perl itself.
79fd8837 1334
79fd8837 1335
1336=cut
1337
1338# Local variables:
1339# c-indentation-style: bsd
1340# c-basic-offset: 4
1341# indent-tabs-mode: nil
1342# End:
1343# vim: expandtab shiftwidth=4:
1344
1345
1346
1347