-e is better than -f (in case of symbolic links)
[p5sagit/p5-mst-13.2.git] / lib / File / Fetch.pm
CommitLineData
79fd8837 1package File::Fetch;
2
3use strict;
4use FileHandle;
5use File::Copy;
6use File::Spec;
7use File::Spec::Unix;
79fd8837 8use File::Basename qw[dirname];
9
10use Cwd qw[cwd];
11use Carp qw[carp];
12use IPC::Cmd qw[can_run run];
13use File::Path qw[mkpath];
14use Params::Check qw[check];
15use Module::Load::Conditional qw[can_load];
16use Locale::Maketext::Simple Style => 'gettext';
17
18use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
19 $BLACKLIST $METHOD_FAIL $VERSION $METHODS
20 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
21 ];
22
d4b3706f 23use constant QUOTE => do { $^O eq 'MSWin32' ? q["] : q['] };
24
25
1f80753b 26$VERSION = '0.13_04';
fe98d82b 27$VERSION = eval $VERSION; # avoid warnings with development releases
28$PREFER_BIN = 0; # XXX TODO implement
79fd8837 29$FROM_EMAIL = 'File-Fetch@example.com';
30$USER_AGENT = 'File::Fetch/$VERSION';
31$BLACKLIST = [qw|ftp|];
32$METHOD_FAIL = { };
33$FTP_PASSIVE = 1;
34$TIMEOUT = 0;
35$DEBUG = 0;
36$WARN = 1;
37
38### methods available to fetch the file depending on the scheme
39$METHODS = {
40 http => [ qw|lwp wget curl lynx| ],
41 ftp => [ qw|lwp netftp wget curl ncftp ftp| ],
42 file => [ qw|lwp file| ],
43 rsync => [ qw|rsync| ]
44};
45
46### silly warnings ###
47local $Params::Check::VERBOSE = 1;
48local $Params::Check::VERBOSE = 1;
49local $Module::Load::Conditional::VERBOSE = 0;
50local $Module::Load::Conditional::VERBOSE = 0;
51
52### see what OS we are on, important for file:// uris ###
9e5ea595 53use constant ON_WIN => ($^O eq 'MSWin32');
54use constant ON_VMS => ($^O eq 'VMS');
1f80753b 55use constant ON_UNIX => (!ON_WIN);
56use constant HAS_VOL => (ON_WIN);
fe98d82b 57use constant HAS_SHARE => (ON_WIN);
79fd8837 58=pod
59
60=head1 NAME
61
62File::Fetch - A generic file fetching mechanism
63
64=head1 SYNOPSIS
65
66 use File::Fetch;
67
68 ### build a File::Fetch object ###
69 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
70
71 ### fetch the uri to cwd() ###
72 my $where = $ff->fetch() or die $ff->error;
73
74 ### fetch the uri to /tmp ###
75 my $where = $ff->fetch( to => '/tmp' );
76
77 ### parsed bits from the uri ###
78 $ff->uri;
79 $ff->scheme;
80 $ff->host;
81 $ff->path;
82 $ff->file;
83
84=head1 DESCRIPTION
85
86File::Fetch is a generic file fetching mechanism.
87
88It allows you to fetch any file pointed to by a C<ftp>, C<http>,
89C<file>, or C<rsync> uri by a number of different means.
90
91See the C<HOW IT WORKS> section further down for details.
92
d4b3706f 93=head1 ACCESSORS
94
95A C<File::Fetch> object has the following accessors
96
97=over 4
98
99=item $ff->uri
100
101The uri you passed to the constructor
102
103=item $ff->scheme
104
105The scheme from the uri (like 'file', 'http', etc)
106
107=item $ff->host
108
fe98d82b 109The hostname in the uri. Will be empty if host was originally
110'localhost' for a 'file://' url.
111
112=item $ff->vol
113
114On operating systems with the concept of a volume the second element
115of a file:// is considered to the be volume specification for the file.
1f80753b 116Thus on Win32 this routine returns the volume, on other operating
117systems this returns nothing.
fe98d82b 118
119On Windows this value may be empty if the uri is to a network share, in
120which case the 'share' property will be defined. Additionally, volume
121specifications that use '|' as ':' will be converted on read to use ':'.
122
1f80753b 123On VMS, which has a volume concept, this field will be empty because VMS
124file specifications are converted to absolute UNIX format and the volume
125information is transparently included.
126
fe98d82b 127=item $ff->share
128
129On systems with the concept of a network share (currently only Windows) returns
130the sharename from a file://// url. On other operating systems returns empty.
d4b3706f 131
132=item $ff->path
133
134The path from the uri, will be at least a single '/'.
135
136=item $ff->file
137
138The name of the remote file. For the local file name, the
139result of $ff->output_file will be used.
140
141=cut
142
143
144##########################
145### Object & Accessors ###
146##########################
147
148{
149 ### template for new() and autogenerated accessors ###
150 my $Tmpl = {
151 scheme => { default => 'http' },
152 host => { default => 'localhost' },
153 path => { default => '/' },
154 file => { required => 1 },
155 uri => { required => 1 },
1f80753b 156 vol => { default => '' }, # windows for file:// uris
fe98d82b 157 share => { default => '' }, # windows for file:// uris
d4b3706f 158 _error_msg => { no_override => 1 },
159 _error_msg_long => { no_override => 1 },
160 };
161
162 for my $method ( keys %$Tmpl ) {
163 no strict 'refs';
164 *$method = sub {
165 my $self = shift;
166 $self->{$method} = $_[0] if @_;
167 return $self->{$method};
168 }
169 }
170
171 sub _create {
172 my $class = shift;
173 my %hash = @_;
174
175 my $args = check( $Tmpl, \%hash ) or return;
176
177 bless $args, $class;
178
179 if( lc($args->scheme) ne 'file' and not $args->host ) {
180 return File::Fetch->_error(loc(
181 "Hostname required when fetching from '%1'",$args->scheme));
182 }
183
184 for (qw[path file]) {
9e5ea595 185 unless( $args->$_() ) { # 5.5.x needs the ()
d4b3706f 186 return File::Fetch->_error(loc("No '%1' specified",$_));
187 }
188 }
189
190 return $args;
191 }
192}
193
194=item $ff->output_file
195
196The name of the output file. This is the same as $ff->file,
197but any query parameters are stripped off. For example:
198
199 http://example.com/index.html?x=y
200
201would make the output file be C<index.html> rather than
202C<index.html?x=y>.
203
204=back
205
206=cut
207
208sub output_file {
209 my $self = shift;
210 my $file = $self->file;
211
212 $file =~ s/\?.*$//g;
213
214 return $file;
215}
216
217### XXX do this or just point to URI::Escape?
218# =head2 $esc_uri = $ff->escaped_uri
219#
220# =cut
221#
222# ### most of this is stolen straight from URI::escape
223# { ### Build a char->hex map
224# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
225#
226# sub escaped_uri {
227# my $self = shift;
228# my $uri = $self->uri;
229#
230# ### Default unsafe characters. RFC 2732 ^(uric - reserved)
231# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
232# $escapes{$1} || $self->_fail_hi($1)/ge;
233#
234# return $uri;
235# }
236#
237# sub _fail_hi {
238# my $self = shift;
239# my $char = shift;
240#
241# $self->_error(loc(
242# "Can't escape '%1', try using the '%2' module instead",
243# sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
244# ));
245# }
246#
247# sub output_file {
248#
249# }
250#
251#
252# }
253
79fd8837 254=head1 METHODS
255
256=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
257
258Parses the uri and creates a corresponding File::Fetch::Item object,
259that is ready to be C<fetch>ed and returns it.
260
261Returns false on failure.
262
263=cut
264
265sub new {
266 my $class = shift;
267 my %hash = @_;
268
269 my ($uri);
270 my $tmpl = {
271 uri => { required => 1, store => \$uri },
272 };
273
274 check( $tmpl, \%hash ) or return;
275
276 ### parse the uri to usable parts ###
277 my $href = __PACKAGE__->_parse_uri( $uri ) or return;
278
279 ### make it into a FFI object ###
d4b3706f 280 my $ff = File::Fetch->_create( %$href ) or return;
79fd8837 281
282
283 ### return the object ###
d4b3706f 284 return $ff;
79fd8837 285}
286
287### parses an uri to a hash structure:
288###
289### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
290###
291### becomes:
292###
293### $href = {
294### scheme => 'ftp',
295### host => 'ftp.cpan.org',
296### path => '/pub/mirror',
297### file => 'index.html'
298### };
299###
9e5ea595 300### In the case of file:// urls there maybe be additional fields
301###
1f80753b 302### For systems with volume specifications such as Win32 there will be
fe98d82b 303### a volume specifier provided in the 'vol' field.
304###
305### 'vol' => 'volumename'
306###
9e5ea595 307### For windows file shares there may be a 'share' key specified
308###
309### 'share' => 'sharename'
310###
fe98d82b 311### Note that the rules of what a file:// url means vary by the operating system
312### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
313### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
314### not '/foo/bar.txt'
9e5ea595 315###
fe98d82b 316### Similarly if the host interpreting the url is VMS then
317### file:///disk$user/my/notes/note12345.txt' means
1f80753b 318### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
319### if it is unix where it means /disk$user/my/notes/note12345.txt'.
320### Except for some cases in the File::Spec methods, Perl on VMS will generally
321### handle UNIX format file specifications.
fe98d82b 322###
323### This means it is impossible to serve certain file:// urls on certain systems.
324###
325### Thus are the problems with a protocol-less specification. :-(
9e5ea595 326###
327
79fd8837 328sub _parse_uri {
329 my $self = shift;
330 my $uri = shift or return;
331
332 my $href = { uri => $uri };
333
334 ### find the scheme ###
335 $uri =~ s|^(\w+)://||;
336 $href->{scheme} = $1;
337
9e5ea595 338 ### See rfc 1738 section 3.10
339 ### http://www.faqs.org/rfcs/rfc1738.html
340 ### And wikipedia for more on windows file:// urls
341 ### http://en.wikipedia.org/wiki/File://
79fd8837 342 if( $href->{scheme} eq 'file' ) {
9e5ea595 343
344 my @parts = split '/',$uri;
345
346 ### file://hostname/...
347 ### file://hostname/...
fe98d82b 348 ### normalize file://localhost with file:///
9e5ea595 349 $href->{host} = $parts[0] || '';
350
351 ### index in @parts where the path components begin;
352 my $index = 1;
9e5ea595 353
354 ### file:////hostname/sharename/blah.txt
fe98d82b 355 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
356
9e5ea595 357 $href->{host} = $parts[2] || ''; # avoid warnings
358 $href->{share} = $parts[3] || ''; # avoid warnings
359
360 $index = 4 # index after the share
9e5ea595 361
fe98d82b 362 ### file:///D|/blah.txt
363 ### file:///D:/blah.txt
fe98d82b 364 } elsif (HAS_VOL) {
365
366 ### this code comes from dmq's patch, but:
367 ### XXX if volume is empty, wouldn't that be an error? --kane
368 ### if so, our file://localhost test needs to be fixed as wel
369 $href->{vol} = $parts[1] || '';
370
371 ### correct D| style colume descriptors
372 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
373
374 $index = 2; # index after the volume
375 }
376
377 ### rebuild the path from the leftover parts;
9e5ea595 378 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
79fd8837 379
380 } else {
9e5ea595 381 ### using anything but qw() in hash slices may produce warnings
382 ### in older perls :-(
383 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
79fd8837 384 }
385
386 ### split the path into file + dir ###
387 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
388 $href->{path} = $parts[1];
389 $href->{file} = $parts[2];
390 }
391
fe98d82b 392 ### host will be empty if the target was 'localhost' and the
393 ### scheme was 'file'
394 $href->{host} = '' if ($href->{host} eq 'localhost') and
395 ($href->{scheme} eq 'file');
79fd8837 396
397 return $href;
398}
399
400=head2 $ff->fetch( [to => /my/output/dir/] )
401
402Fetches the file you requested. By default it writes to C<cwd()>,
403but you can override that by specifying the C<to> argument.
404
405Returns the full path to the downloaded file on success, and false
406on failure.
407
408=cut
409
410sub fetch {
411 my $self = shift or return;
412 my %hash = @_;
413
414 my $to;
415 my $tmpl = {
416 to => { default => cwd(), store => \$to },
417 };
418
419 check( $tmpl, \%hash ) or return;
420
1f80753b 421 # On VMS force to VMS format so File::Spec will work.
422 $to = VMS::Filespec::vmspath($to) if ON_VMS;
423
79fd8837 424 ### create the path if it doesn't exist yet ###
425 unless( -d $to ) {
426 eval { mkpath( $to ) };
427
428 return $self->_error(loc("Could not create path '%1'",$to)) if $@;
429 }
430
431 ### set passive ftp if required ###
432 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
433
fe98d82b 434 ### we dont use catfile on win32 because if we are using a cygwin tool
435 ### under cmd.exe they wont understand windows style separators.
436 my $out_to = ON_WIN ? $to.'/'.$self->output_file
437 : File::Spec->catfile( $to, $self->output_file );
438
79fd8837 439 for my $method ( @{ $METHODS->{$self->scheme} } ) {
440 my $sub = '_'.$method.'_fetch';
441
442 unless( __PACKAGE__->can($sub) ) {
443 $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
444 $method));
445 next;
446 }
447
448 ### method is blacklisted ###
449 next if grep { lc $_ eq $method } @$BLACKLIST;
450
451 ### method is known to fail ###
452 next if $METHOD_FAIL->{$method};
453
d4b3706f 454 ### there's serious issues with IPC::Run and quoting of command
455 ### line arguments. using quotes in the wrong place breaks things,
456 ### and in the case of say,
457 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
458 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
459 ### it doesn't matter how you quote, it always fails.
460 local $IPC::Cmd::USE_IPC_RUN = 0;
461
462 if( my $file = $self->$sub(
a0ad4830 463 to => $out_to
d4b3706f 464 )){
79fd8837 465
466 unless( -e $file && -s _ ) {
467 $self->_error(loc("'%1' said it fetched '%2', ".
468 "but it was not created",$method,$file));
469
470 ### mark the failure ###
471 $METHOD_FAIL->{$method} = 1;
472
473 next;
474
475 } else {
476
477 my $abs = File::Spec->rel2abs( $file );
478 return $abs;
479 }
480 }
481 }
482
483
484 ### if we got here, we looped over all methods, but we weren't able
485 ### to fetch it.
486 return;
487}
488
79fd8837 489########################
490### _*_fetch methods ###
491########################
492
493### LWP fetching ###
494sub _lwp_fetch {
495 my $self = shift;
496 my %hash = @_;
497
498 my ($to);
499 my $tmpl = {
500 to => { required => 1, store => \$to }
501 };
502 check( $tmpl, \%hash ) or return;
503
504 ### modules required to download with lwp ###
505 my $use_list = {
506 LWP => '0.0',
507 'LWP::UserAgent' => '0.0',
508 'HTTP::Request' => '0.0',
509 'HTTP::Status' => '0.0',
510 URI => '0.0',
511
512 };
513
514 if( can_load(modules => $use_list) ) {
515
516 ### setup the uri object
517 my $uri = URI->new( File::Spec::Unix->catfile(
518 $self->path, $self->file
519 ) );
520
521 ### special rules apply for file:// uris ###
522 $uri->scheme( $self->scheme );
523 $uri->host( $self->scheme eq 'file' ? '' : $self->host );
524 $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
525
526 ### set up the useragent object
527 my $ua = LWP::UserAgent->new();
528 $ua->timeout( $TIMEOUT ) if $TIMEOUT;
529 $ua->agent( $USER_AGENT );
530 $ua->from( $FROM_EMAIL );
531 $ua->env_proxy;
532
533 my $res = $ua->mirror($uri, $to) or return;
534
535 ### uptodate or fetched ok ###
536 if ( $res->code == 304 or $res->code == 200 ) {
537 return $to;
538
539 } else {
540 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
541 $res->code, HTTP::Status::status_message($res->code),
542 $res->status_line));
543 }
544
545 } else {
546 $METHOD_FAIL->{'lwp'} = 1;
547 return;
548 }
549}
550
551### Net::FTP fetching
552sub _netftp_fetch {
553 my $self = shift;
554 my %hash = @_;
555
556 my ($to);
557 my $tmpl = {
558 to => { required => 1, store => \$to }
559 };
560 check( $tmpl, \%hash ) or return;
561
562 ### required modules ###
563 my $use_list = { 'Net::FTP' => 0 };
564
565 if( can_load( modules => $use_list ) ) {
566
567 ### make connection ###
568 my $ftp;
569 my @options = ($self->host);
570 push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
571 unless( $ftp = Net::FTP->new( @options ) ) {
572 return $self->_error(loc("Ftp creation failed: %1",$@));
573 }
574
575 ### login ###
576 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
577 return $self->_error(loc("Could not login to '%1'",$self->host));
578 }
579
580 ### set binary mode, just in case ###
581 $ftp->binary;
582
583 ### create the remote path
584 ### remember remote paths are unix paths! [#11483]
585 my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
586
587 ### fetch the file ###
588 my $target;
589 unless( $target = $ftp->get( $remote, $to ) ) {
590 return $self->_error(loc("Could not fetch '%1' from '%2'",
591 $remote, $self->host));
592 }
593
594 ### log out ###
595 $ftp->quit;
596
597 return $target;
598
599 } else {
600 $METHOD_FAIL->{'netftp'} = 1;
601 return;
602 }
603}
604
605### /bin/wget fetch ###
606sub _wget_fetch {
607 my $self = shift;
608 my %hash = @_;
609
610 my ($to);
611 my $tmpl = {
612 to => { required => 1, store => \$to }
613 };
614 check( $tmpl, \%hash ) or return;
615
616 ### see if we have a wget binary ###
617 if( my $wget = can_run('wget') ) {
618
619 ### no verboseness, thanks ###
620 my $cmd = [ $wget, '--quiet' ];
621
622 ### if a timeout is set, add it ###
623 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
624
625 ### run passive if specified ###
626 push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
627
628 ### set the output document, add the uri ###
d4b3706f 629 push @$cmd, '--output-document',
630 ### DO NOT quote things for IPC::Run, it breaks stuff.
631 $IPC::Cmd::USE_IPC_RUN
632 ? ($to, $self->uri)
633 : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837 634
635 ### shell out ###
636 my $captured;
d4b3706f 637 unless(run( command => $cmd,
638 buffer => \$captured,
639 verbose => $DEBUG
640 )) {
79fd8837 641 ### wget creates the output document always, even if the fetch
642 ### fails.. so unlink it in that case
643 1 while unlink $to;
644
645 return $self->_error(loc( "Command failed: %1", $captured || '' ));
646 }
647
648 return $to;
649
650 } else {
651 $METHOD_FAIL->{'wget'} = 1;
652 return;
653 }
654}
655
656
657### /bin/ftp fetch ###
658sub _ftp_fetch {
659 my $self = shift;
660 my %hash = @_;
661
662 my ($to);
663 my $tmpl = {
664 to => { required => 1, store => \$to }
665 };
666 check( $tmpl, \%hash ) or return;
667
d4b3706f 668 ### see if we have a ftp binary ###
79fd8837 669 if( my $ftp = can_run('ftp') ) {
670
671 my $fh = FileHandle->new;
672
673 local $SIG{CHLD} = 'IGNORE';
674
675 unless ($fh->open("|$ftp -n")) {
676 return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
677 }
678
679 my @dialog = (
680 "lcd " . dirname($to),
681 "open " . $self->host,
682 "user anonymous $FROM_EMAIL",
683 "cd /",
684 "cd " . $self->path,
685 "binary",
d4b3706f 686 "get " . $self->file . " " . $self->output_file,
79fd8837 687 "quit",
688 );
689
690 foreach (@dialog) { $fh->print($_, "\n") }
691 $fh->close or return;
692
693 return $to;
694 }
695}
696
697### lynx is stupid - it decompresses any .gz file it finds to be text
698### use /bin/lynx to fetch files
699sub _lynx_fetch {
700 my $self = shift;
701 my %hash = @_;
702
703 my ($to);
704 my $tmpl = {
705 to => { required => 1, store => \$to }
706 };
707 check( $tmpl, \%hash ) or return;
708
d4b3706f 709 ### see if we have a lynx binary ###
79fd8837 710 if( my $lynx = can_run('lynx') ) {
711
d4b3706f 712 unless( IPC::Cmd->can_capture_buffer ) {
713 $METHOD_FAIL->{'lynx'} = 1;
714
715 return $self->_error(loc(
716 "Can not capture buffers. Can not use '%1' to fetch files",
717 'lynx' ));
718 }
79fd8837 719
720 ### write to the output file ourselves, since lynx ass_u_mes to much
721 my $local = FileHandle->new(">$to")
722 or return $self->_error(loc(
723 "Could not open '%1' for writing: %2",$to,$!));
724
725 ### dump to stdout ###
726 my $cmd = [
727 $lynx,
728 '-source',
729 "-auth=anonymous:$FROM_EMAIL",
730 ];
731
732 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
733
d4b3706f 734 ### DO NOT quote things for IPC::Run, it breaks stuff.
735 push @$cmd, $IPC::Cmd::USE_IPC_RUN
736 ? $self->uri
737 : QUOTE. $self->uri .QUOTE;
738
79fd8837 739
740 ### shell out ###
741 my $captured;
742 unless(run( command => $cmd,
743 buffer => \$captured,
744 verbose => $DEBUG )
745 ) {
746 return $self->_error(loc("Command failed: %1", $captured || ''));
747 }
748
749 ### print to local file ###
750 ### XXX on a 404 with a special error page, $captured will actually
751 ### hold the contents of that page, and make it *appear* like the
752 ### request was a success, when really it wasn't :(
753 ### there doesn't seem to be an option for lynx to change the exit
754 ### code based on a 4XX status or so.
755 ### the closest we can come is using --error_file and parsing that,
756 ### which is very unreliable ;(
757 $local->print( $captured );
758 $local->close or return;
759
760 return $to;
761
762 } else {
763 $METHOD_FAIL->{'lynx'} = 1;
764 return;
765 }
766}
767
768### use /bin/ncftp to fetch files
769sub _ncftp_fetch {
770 my $self = shift;
771 my %hash = @_;
772
773 my ($to);
774 my $tmpl = {
775 to => { required => 1, store => \$to }
776 };
777 check( $tmpl, \%hash ) or return;
778
779 ### we can only set passive mode in interactive sesssions, so bail out
780 ### if $FTP_PASSIVE is set
781 return if $FTP_PASSIVE;
782
d4b3706f 783 ### see if we have a ncftp binary ###
79fd8837 784 if( my $ncftp = can_run('ncftp') ) {
785
786 my $cmd = [
787 $ncftp,
788 '-V', # do not be verbose
789 '-p', $FROM_EMAIL, # email as password
790 $self->host, # hostname
791 dirname($to), # local dir for the file
792 # remote path to the file
d4b3706f 793 ### DO NOT quote things for IPC::Run, it breaks stuff.
794 $IPC::Cmd::USE_IPC_RUN
795 ? File::Spec::Unix->catdir( $self->path, $self->file )
796 : QUOTE. File::Spec::Unix->catdir(
797 $self->path, $self->file ) .QUOTE
798
79fd8837 799 ];
800
801 ### shell out ###
802 my $captured;
803 unless(run( command => $cmd,
804 buffer => \$captured,
805 verbose => $DEBUG )
806 ) {
807 return $self->_error(loc("Command failed: %1", $captured || ''));
808 }
809
810 return $to;
811
812 } else {
813 $METHOD_FAIL->{'ncftp'} = 1;
814 return;
815 }
816}
817
818### use /bin/curl to fetch files
819sub _curl_fetch {
820 my $self = shift;
821 my %hash = @_;
822
823 my ($to);
824 my $tmpl = {
825 to => { required => 1, store => \$to }
826 };
827 check( $tmpl, \%hash ) or return;
828
829 if (my $curl = can_run('curl')) {
830
831 ### these long opts are self explanatory - I like that -jmb
832 my $cmd = [ $curl ];
833
834 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
835
836 push(@$cmd, '--silent') unless $DEBUG;
837
838 ### curl does the right thing with passive, regardless ###
839 if ($self->scheme eq 'ftp') {
840 push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
841 }
842
843 ### curl doesn't follow 302 (temporarily moved) etc automatically
844 ### so we add --location to enable that.
d4b3706f 845 push @$cmd, '--fail', '--location', '--output',
846 ### DO NOT quote things for IPC::Run, it breaks stuff.
847 $IPC::Cmd::USE_IPC_RUN
848 ? ($to, $self->uri)
849 : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837 850
851 my $captured;
852 unless(run( command => $cmd,
853 buffer => \$captured,
854 verbose => $DEBUG )
855 ) {
856
857 return $self->_error(loc("Command failed: %1", $captured || ''));
858 }
859
860 return $to;
861
862 } else {
863 $METHOD_FAIL->{'curl'} = 1;
864 return;
865 }
866}
867
868
869### use File::Copy for fetching file:// urls ###
9e5ea595 870###
871### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
872### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
fe98d82b 873###
9e5ea595 874
79fd8837 875sub _file_fetch {
876 my $self = shift;
877 my %hash = @_;
878
879 my ($to);
880 my $tmpl = {
881 to => { required => 1, store => \$to }
882 };
883 check( $tmpl, \%hash ) or return;
884
9e5ea595 885
886
79fd8837 887 ### prefix a / on unix systems with a file uri, since it would
888 ### look somewhat like this:
9e5ea595 889 ### file:///home/kane/file
890 ### wheras windows file uris for 'c:\some\dir\file' might look like:
891 ### file:///C:/some/dir/file
892 ### file:///C|/some/dir/file
893 ### or for a network share '\\host\share\some\dir\file':
894 ### file:////host/share/some/dir/file
895 ###
896 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
897 ### file://vms.host.edu/disk$user/my/notes/note12345.txt
898 ###
899
900 my $path = $self->path;
901 my $vol = $self->vol;
902 my $share = $self->share;
903
904 my $remote;
905 if (!$share and $self->host) {
906 return $self->_error(loc(
907 "Currently %1 cannot handle hosts in %2 urls",
908 'File::Fetch', 'file://'
909 ));
910 }
911
912 if( $vol ) {
913 $path = File::Spec->catdir( split /\//, $path );
914 $remote = File::Spec->catpath( $vol, $path, $self->file);
79fd8837 915
9e5ea595 916 } elsif( $share ) {
917 ### win32 specific, and a share name, so we wont bother with File::Spec
918 $path =~ s|/+|\\|g;
919 $remote = "\\\\".$self->host."\\$share\\$path";
920
921 } else {
1f80753b 922 if (ON_VMS) {
923 # File::Spec on VMS can not currently handle UNIX syntax.
924 $remote = File::Spec::Unix->catfile( $path, $self->file );
925 } else {
926 $remote = File::Spec->catfile( $path, $self->file );
927 }
9e5ea595 928 }
79fd8837 929
930 ### File::Copy is littered with 'die' statements :( ###
931 my $rv = eval { File::Copy::copy( $remote, $to ) };
932
933 ### something went wrong ###
934 if( !$rv or $@ ) {
935 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
936 $remote, $to, $!, $@));
937 }
938
939 return $to;
940}
941
942### use /usr/bin/rsync to fetch files
943sub _rsync_fetch {
944 my $self = shift;
945 my %hash = @_;
946
947 my ($to);
948 my $tmpl = {
949 to => { required => 1, store => \$to }
950 };
951 check( $tmpl, \%hash ) or return;
952
953 if (my $rsync = can_run('rsync')) {
954
955 my $cmd = [ $rsync ];
956
957 ### XXX: rsync has no I/O timeouts at all, by default
958 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
959
960 push(@$cmd, '--quiet') unless $DEBUG;
961
d4b3706f 962 ### DO NOT quote things for IPC::Run, it breaks stuff.
963 push @$cmd, $IPC::Cmd::USE_IPC_RUN
964 ? ($self->uri, $to)
965 : (QUOTE. $self->uri .QUOTE, QUOTE. $to .QUOTE);
79fd8837 966
967 my $captured;
968 unless(run( command => $cmd,
969 buffer => \$captured,
970 verbose => $DEBUG )
971 ) {
972
fe98d82b 973 return $self->_error(loc("Command %1 failed: %2",
974 "@$cmd" || '', $captured || ''));
79fd8837 975 }
976
977 return $to;
978
979 } else {
980 $METHOD_FAIL->{'rsync'} = 1;
981 return;
982 }
983}
984
985#################################
986#
987# Error code
988#
989#################################
990
991=pod
992
993=head2 $ff->error([BOOL])
994
995Returns the last encountered error as string.
996Pass it a true value to get the C<Carp::longmess()> output instead.
997
998=cut
999
d4b3706f 1000### error handling the way Archive::Extract does it
1001sub _error {
1002 my $self = shift;
1003 my $error = shift;
1004
1005 $self->_error_msg( $error );
1006 $self->_error_msg_long( Carp::longmess($error) );
1007
1008 if( $WARN ) {
1009 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
79fd8837 1010 }
1011
d4b3706f 1012 return;
79fd8837 1013}
1014
d4b3706f 1015sub error {
1016 my $self = shift;
1017 return shift() ? $self->_error_msg_long : $self->_error_msg;
1018}
79fd8837 1019
1020
10211;
1022
1023=pod
1024
1025=head1 HOW IT WORKS
1026
1027File::Fetch is able to fetch a variety of uris, by using several
1028external programs and modules.
1029
1030Below is a mapping of what utilities will be used in what order
1031for what schemes, if available:
1032
1033 file => LWP, file
1034 http => LWP, wget, curl, lynx
1035 ftp => LWP, Net::FTP, wget, curl, ncftp, ftp
1036 rsync => rsync
1037
1038If you'd like to disable the use of one or more of these utilities
1039and/or modules, see the C<$BLACKLIST> variable further down.
1040
1041If a utility or module isn't available, it will be marked in a cache
1042(see the C<$METHOD_FAIL> variable further down), so it will not be
1043tried again. The C<fetch> method will only fail when all options are
1044exhausted, and it was not able to retrieve the file.
1045
1046A special note about fetching files from an ftp uri:
1047
1048By default, all ftp connections are done in passive mode. To change
1049that, see the C<$FTP_PASSIVE> variable further down.
1050
1051Furthermore, ftp uris only support anonymous connections, so no
1052named user/password pair can be passed along.
1053
1054C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1055further down.
1056
1057=head1 GLOBAL VARIABLES
1058
1059The behaviour of File::Fetch can be altered by changing the following
1060global variables:
1061
1062=head2 $File::Fetch::FROM_EMAIL
1063
1064This is the email address that will be sent as your anonymous ftp
1065password.
1066
1067Default is C<File-Fetch@example.com>.
1068
1069=head2 $File::Fetch::USER_AGENT
1070
1071This is the useragent as C<LWP> will report it.
1072
1073Default is C<File::Fetch/$VERSION>.
1074
1075=head2 $File::Fetch::FTP_PASSIVE
1076
1077This variable controls whether the environment variable C<FTP_PASSIVE>
1078and any passive switches to commandline tools will be set to true.
1079
1080Default value is 1.
1081
1082Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1083files, since passive mode can only be set interactively for this binary
1084
1085=head2 $File::Fetch::TIMEOUT
1086
1087When set, controls the network timeout (counted in seconds).
1088
1089Default value is 0.
1090
1091=head2 $File::Fetch::WARN
1092
1093This variable controls whether errors encountered internally by
1094C<File::Fetch> should be C<carp>'d or not.
1095
1096Set to false to silence warnings. Inspect the output of the C<error()>
1097method manually to see what went wrong.
1098
1099Defaults to C<true>.
1100
1101=head2 $File::Fetch::DEBUG
1102
1103This enables debugging output when calling commandline utilities to
1104fetch files.
1105This also enables C<Carp::longmess> errors, instead of the regular
1106C<carp> errors.
1107
1108Good for tracking down why things don't work with your particular
1109setup.
1110
1111Default is 0.
1112
1113=head2 $File::Fetch::BLACKLIST
1114
1115This is an array ref holding blacklisted modules/utilities for fetching
1116files with.
1117
1118To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1119set $File::Fetch::BLACKLIST to:
1120
1121 $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1122
1123The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1124
1125See the note on C<MAPPING> below.
1126
1127=head2 $File::Fetch::METHOD_FAIL
1128
1129This is a hashref registering what modules/utilities were known to fail
1130for fetching files (mostly because they weren't installed).
1131
1132You can reset this cache by assigning an empty hashref to it, or
1133individually remove keys.
1134
1135See the note on C<MAPPING> below.
1136
1137=head1 MAPPING
1138
1139
1140Here's a quick mapping for the utilities/modules, and their names for
1141the $BLACKLIST, $METHOD_FAIL and other internal functions.
1142
1143 LWP => lwp
1144 Net::FTP => netftp
1145 wget => wget
1146 lynx => lynx
1147 ncftp => ncftp
1148 ftp => ftp
1149 curl => curl
1150 rsync => rsync
1151
1152=head1 FREQUENTLY ASKED QUESTIONS
1153
1154=head2 So how do I use a proxy with File::Fetch?
1155
1156C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1157You will need to set your environment variables accordingly. For
1158example, to use an ftp proxy:
1159
1160 $ENV{ftp_proxy} = 'foo.com';
1161
1162Refer to the LWP::UserAgent manpage for more details.
1163
1164=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1165
1166C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1167which we in turn capture. If that content is a 'custom' error file
1168(like, say, a C<404 handler>), you will get that contents instead.
1169
1170Sadly, C<lynx> doesn't support any options to return a different exit
1171code on non-C<200 OK> status, giving us no way to tell the difference
1172between a 'successfull' fetch and a custom error page.
1173
1174Therefor, we recommend to only use C<lynx> as a last resort. This is
1175why it is at the back of our list of methods to try as well.
1176
d4b3706f 1177=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1178
1179C<File::Fetch> is relatively smart about things. When trying to write
1180a file to disk, it removes the C<query parameters> (see the
1181C<output_file> method for details) from the file name before creating
1182it. In most cases this suffices.
1183
1184If you have any other characters you need to escape, please install
1185the C<URI::Escape> module from CPAN, and pre-encode your URI before
1186passing it to C<File::Fetch>. You can read about the details of URIs
1187and URI encoding here:
1188
1189 http://www.faqs.org/rfcs/rfc2396.html
1190
79fd8837 1191=head1 TODO
1192
1193=over 4
1194
1195=item Implement $PREFER_BIN
1196
1197To indicate to rather use commandline tools than modules
1198
a0ad4830 1199=back
1200
1201=head1 BUG REPORTS
1202
1203Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1204
1205=head1 AUTHOR
79fd8837 1206
d4b3706f 1207This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
79fd8837 1208
1209=head1 COPYRIGHT
1210
a0ad4830 1211This library is free software; you may redistribute and/or modify it
1212under the same terms as Perl itself.
79fd8837 1213
79fd8837 1214
1215=cut
1216
1217# Local variables:
1218# c-indentation-style: bsd
1219# c-basic-offset: 4
1220# indent-tabs-mode: nil
1221# End:
1222# vim: expandtab shiftwidth=4:
1223
1224
1225
1226