9 use File::Basename qw[dirname];
13 use IPC::Cmd qw[can_run run QUOTE];
14 use File::Path qw[mkpath];
15 use Params::Check qw[check];
16 use Module::Load::Conditional qw[can_load];
17 use Locale::Maketext::Simple Style => 'gettext';
19 use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
20 $BLACKLIST $METHOD_FAIL $VERSION $METHODS
21 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
25 $VERSION = eval $VERSION; # avoid warnings with development releases
26 $PREFER_BIN = 0; # XXX TODO implement
27 $FROM_EMAIL = 'File-Fetch@example.com';
28 $USER_AGENT = "File::Fetch/$VERSION";
29 $BLACKLIST = [qw|ftp|];
36 ### methods available to fetch the file depending on the scheme
38 http => [ qw|lwp wget curl lftp lynx| ],
39 ftp => [ qw|lwp netftp wget curl lftp ncftp ftp| ],
40 file => [ qw|lwp lftp file| ],
41 rsync => [ qw|rsync| ]
44 ### silly warnings ###
45 local $Params::Check::VERBOSE = 1;
46 local $Params::Check::VERBOSE = 1;
47 local $Module::Load::Conditional::VERBOSE = 0;
48 local $Module::Load::Conditional::VERBOSE = 0;
50 ### see what OS we are on, important for file:// uris ###
51 use constant ON_WIN => ($^O eq 'MSWin32');
52 use constant ON_VMS => ($^O eq 'VMS');
53 use constant ON_UNIX => (!ON_WIN);
54 use constant HAS_VOL => (ON_WIN);
55 use constant HAS_SHARE => (ON_WIN);
62 File::Fetch - A generic file fetching mechanism
68 ### build a File::Fetch object ###
69 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
71 ### fetch the uri to cwd() ###
72 my $where = $ff->fetch() or die $ff->error;
74 ### fetch the uri to /tmp ###
75 my $where = $ff->fetch( to => '/tmp' );
77 ### parsed bits from the uri ###
86 File::Fetch is a generic file fetching mechanism.
88 It allows you to fetch any file pointed to by a C<ftp>, C<http>,
89 C<file>, or C<rsync> uri by a number of different means.
91 See the C<HOW IT WORKS> section further down for details.
95 A C<File::Fetch> object has the following accessors
101 The uri you passed to the constructor
105 The scheme from the uri (like 'file', 'http', etc)
109 The hostname in the uri. Will be empty if host was originally
110 'localhost' for a 'file://' url.
114 On operating systems with the concept of a volume the second element
115 of a file:// is considered to the be volume specification for the file.
116 Thus on Win32 this routine returns the volume, on other operating
117 systems this returns nothing.
119 On Windows this value may be empty if the uri is to a network share, in
120 which case the 'share' property will be defined. Additionally, volume
121 specifications that use '|' as ':' will be converted on read to use ':'.
123 On VMS, which has a volume concept, this field will be empty because VMS
124 file specifications are converted to absolute UNIX format and the volume
125 information is transparently included.
129 On systems with the concept of a network share (currently only Windows) returns
130 the sharename from a file://// url. On other operating systems returns empty.
134 The path from the uri, will be at least a single '/'.
138 The name of the remote file. For the local file name, the
139 result of $ff->output_file will be used.
144 ##########################
145 ### Object & Accessors ###
146 ##########################
149 ### template for autogenerated accessors ###
151 scheme => { default => 'http' },
152 host => { default => 'localhost' },
153 path => { default => '/' },
154 file => { required => 1 },
155 uri => { required => 1 },
156 vol => { default => '' }, # windows for file:// uris
157 share => { default => '' }, # windows for file:// uris
158 _error_msg => { no_override => 1 },
159 _error_msg_long => { no_override => 1 },
162 for my $method ( keys %$Tmpl ) {
166 $self->{$method} = $_[0] if @_;
167 return $self->{$method};
175 my $args = check( $Tmpl, \%hash ) or return;
179 if( lc($args->scheme) ne 'file' and not $args->host ) {
180 return File::Fetch->_error(loc(
181 "Hostname required when fetching from '%1'",$args->scheme));
184 for (qw[path file]) {
185 unless( $args->$_() ) { # 5.5.x needs the ()
186 return File::Fetch->_error(loc("No '%1' specified",$_));
194 =item $ff->output_file
196 The name of the output file. This is the same as $ff->file,
197 but any query parameters are stripped off. For example:
199 http://example.com/index.html?x=y
201 would make the output file be C<index.html> rather than
210 my $file = $self->file;
217 ### XXX do this or just point to URI::Escape?
218 # =head2 $esc_uri = $ff->escaped_uri
222 # ### most of this is stolen straight from URI::escape
223 # { ### Build a char->hex map
224 # my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
228 # my $uri = $self->uri;
230 # ### Default unsafe characters. RFC 2732 ^(uric - reserved)
231 # $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
232 # $escapes{$1} || $self->_fail_hi($1)/ge;
242 # "Can't escape '%1', try using the '%2' module instead",
243 # sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
256 =head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
258 Parses the uri and creates a corresponding File::Fetch::Item object,
259 that is ready to be C<fetch>ed and returns it.
261 Returns false on failure.
271 uri => { required => 1, store => \$uri },
274 check( $tmpl, \%hash ) or return;
276 ### parse the uri to usable parts ###
277 my $href = __PACKAGE__->_parse_uri( $uri ) or return;
279 ### make it into a FFI object ###
280 my $ff = File::Fetch->_create( %$href ) or return;
283 ### return the object ###
287 ### parses an uri to a hash structure:
289 ### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
295 ### host => 'ftp.cpan.org',
296 ### path => '/pub/mirror',
297 ### file => 'index.html'
300 ### In the case of file:// urls there maybe be additional fields
302 ### For systems with volume specifications such as Win32 there will be
303 ### a volume specifier provided in the 'vol' field.
305 ### 'vol' => 'volumename'
307 ### For windows file shares there may be a 'share' key specified
309 ### 'share' => 'sharename'
311 ### Note that the rules of what a file:// url means vary by the operating system
312 ### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
313 ### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
314 ### not '/foo/bar.txt'
316 ### Similarly if the host interpreting the url is VMS then
317 ### file:///disk$user/my/notes/note12345.txt' means
318 ### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
319 ### if it is unix where it means /disk$user/my/notes/note12345.txt'.
320 ### Except for some cases in the File::Spec methods, Perl on VMS will generally
321 ### handle UNIX format file specifications.
323 ### This means it is impossible to serve certain file:// urls on certain systems.
325 ### Thus are the problems with a protocol-less specification. :-(
330 my $uri = shift or return;
332 my $href = { uri => $uri };
334 ### find the scheme ###
335 $uri =~ s|^(\w+)://||;
336 $href->{scheme} = $1;
338 ### See rfc 1738 section 3.10
339 ### http://www.faqs.org/rfcs/rfc1738.html
340 ### And wikipedia for more on windows file:// urls
341 ### http://en.wikipedia.org/wiki/File://
342 if( $href->{scheme} eq 'file' ) {
344 my @parts = split '/',$uri;
346 ### file://hostname/...
347 ### file://hostname/...
348 ### normalize file://localhost with file:///
349 $href->{host} = $parts[0] || '';
351 ### index in @parts where the path components begin;
354 ### file:////hostname/sharename/blah.txt
355 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
357 $href->{host} = $parts[2] || ''; # avoid warnings
358 $href->{share} = $parts[3] || ''; # avoid warnings
360 $index = 4 # index after the share
362 ### file:///D|/blah.txt
363 ### file:///D:/blah.txt
366 ### this code comes from dmq's patch, but:
367 ### XXX if volume is empty, wouldn't that be an error? --kane
368 ### if so, our file://localhost test needs to be fixed as wel
369 $href->{vol} = $parts[1] || '';
371 ### correct D| style colume descriptors
372 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
374 $index = 2; # index after the volume
377 ### rebuild the path from the leftover parts;
378 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
381 ### using anything but qw() in hash slices may produce warnings
382 ### in older perls :-(
383 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
386 ### split the path into file + dir ###
387 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
388 $href->{path} = $parts[1];
389 $href->{file} = $parts[2];
392 ### host will be empty if the target was 'localhost' and the
393 ### scheme was 'file'
394 $href->{host} = '' if ($href->{host} eq 'localhost') and
395 ($href->{scheme} eq 'file');
400 =head2 $ff->fetch( [to => /my/output/dir/] )
402 Fetches the file you requested. By default it writes to C<cwd()>,
403 but you can override that by specifying the C<to> argument.
405 Returns the full path to the downloaded file on success, and false
411 my $self = shift or return;
416 to => { default => cwd(), store => \$to },
419 check( $tmpl, \%hash ) or return;
421 ### On VMS force to VMS format so File::Spec will work.
422 $to = VMS::Filespec::vmspath($to) if ON_VMS;
424 ### create the path if it doesn't exist yet ###
426 eval { mkpath( $to ) };
428 return $self->_error(loc("Could not create path '%1'",$to)) if $@;
431 ### set passive ftp if required ###
432 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
434 ### we dont use catfile on win32 because if we are using a cygwin tool
435 ### under cmd.exe they wont understand windows style separators.
436 my $out_to = ON_WIN ? $to.'/'.$self->output_file
437 : File::Spec->catfile( $to, $self->output_file );
439 for my $method ( @{ $METHODS->{$self->scheme} } ) {
440 my $sub = '_'.$method.'_fetch';
442 unless( __PACKAGE__->can($sub) ) {
443 $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
448 ### method is blacklisted ###
449 next if grep { lc $_ eq $method } @$BLACKLIST;
451 ### method is known to fail ###
452 next if $METHOD_FAIL->{$method};
454 ### there's serious issues with IPC::Run and quoting of command
455 ### line arguments. using quotes in the wrong place breaks things,
456 ### and in the case of say,
457 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
458 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
459 ### it doesn't matter how you quote, it always fails.
460 local $IPC::Cmd::USE_IPC_RUN = 0;
462 if( my $file = $self->$sub(
466 unless( -e $file && -s _ ) {
467 $self->_error(loc("'%1' said it fetched '%2', ".
468 "but it was not created",$method,$file));
470 ### mark the failure ###
471 $METHOD_FAIL->{$method} = 1;
477 my $abs = File::Spec->rel2abs( $file );
484 ### if we got here, we looped over all methods, but we weren't able
489 ########################
490 ### _*_fetch methods ###
491 ########################
500 to => { required => 1, store => \$to }
502 check( $tmpl, \%hash ) or return;
504 ### modules required to download with lwp ###
507 'LWP::UserAgent' => '0.0',
508 'HTTP::Request' => '0.0',
509 'HTTP::Status' => '0.0',
514 if( can_load(modules => $use_list) ) {
516 ### setup the uri object
517 my $uri = URI->new( File::Spec::Unix->catfile(
518 $self->path, $self->file
521 ### special rules apply for file:// uris ###
522 $uri->scheme( $self->scheme );
523 $uri->host( $self->scheme eq 'file' ? '' : $self->host );
524 $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
526 ### set up the useragent object
527 my $ua = LWP::UserAgent->new();
528 $ua->timeout( $TIMEOUT ) if $TIMEOUT;
529 $ua->agent( $USER_AGENT );
530 $ua->from( $FROM_EMAIL );
533 my $res = $ua->mirror($uri, $to) or return;
535 ### uptodate or fetched ok ###
536 if ( $res->code == 304 or $res->code == 200 ) {
540 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
541 $res->code, HTTP::Status::status_message($res->code),
546 $METHOD_FAIL->{'lwp'} = 1;
551 ### Net::FTP fetching
558 to => { required => 1, store => \$to }
560 check( $tmpl, \%hash ) or return;
562 ### required modules ###
563 my $use_list = { 'Net::FTP' => 0 };
565 if( can_load( modules => $use_list ) ) {
567 ### make connection ###
569 my @options = ($self->host);
570 push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
571 unless( $ftp = Net::FTP->new( @options ) ) {
572 return $self->_error(loc("Ftp creation failed: %1",$@));
576 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
577 return $self->_error(loc("Could not login to '%1'",$self->host));
580 ### set binary mode, just in case ###
583 ### create the remote path
584 ### remember remote paths are unix paths! [#11483]
585 my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
587 ### fetch the file ###
589 unless( $target = $ftp->get( $remote, $to ) ) {
590 return $self->_error(loc("Could not fetch '%1' from '%2'",
591 $remote, $self->host));
600 $METHOD_FAIL->{'netftp'} = 1;
605 ### /bin/wget fetch ###
612 to => { required => 1, store => \$to }
614 check( $tmpl, \%hash ) or return;
616 ### see if we have a wget binary ###
617 if( my $wget = can_run('wget') ) {
619 ### no verboseness, thanks ###
620 my $cmd = [ $wget, '--quiet' ];
622 ### if a timeout is set, add it ###
623 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
625 ### run passive if specified ###
626 push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
628 ### set the output document, add the uri ###
629 push @$cmd, '--output-document', $to, $self->uri;
631 ### with IPC::Cmd > 0.41, this is fixed in teh library,
632 ### and there's no need for special casing any more.
633 ### DO NOT quote things for IPC::Run, it breaks stuff.
634 # $IPC::Cmd::USE_IPC_RUN
635 # ? ($to, $self->uri)
636 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
640 unless(run( command => $cmd,
641 buffer => \$captured,
644 ### wget creates the output document always, even if the fetch
645 ### fails.. so unlink it in that case
648 return $self->_error(loc( "Command failed: %1", $captured || '' ));
654 $METHOD_FAIL->{'wget'} = 1;
659 ### /bin/lftp fetch ###
666 to => { required => 1, store => \$to }
668 check( $tmpl, \%hash ) or return;
670 ### see if we have a wget binary ###
671 if( my $lftp = can_run('lftp') ) {
673 ### no verboseness, thanks ###
674 my $cmd = [ $lftp, '-f' ];
676 my $fh = File::Temp->new;
680 ### if a timeout is set, add it ###
681 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
683 ### run passive if specified ###
684 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
686 ### set the output document, add the uri ###
687 ### quote the URI, because lftp supports certain shell
688 ### expansions, most notably & for backgrounding.
689 ### ' quote does nto work, must be "
690 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
693 my $pp_str = join ' ', split $/, $str;
694 print "# lftp command: $pp_str\n";
697 ### write straight to the file.
701 ### the command needs to be 1 string to be executed
702 push @$cmd, $fh->filename;
704 ### with IPC::Cmd > 0.41, this is fixed in teh library,
705 ### and there's no need for special casing any more.
706 ### DO NOT quote things for IPC::Run, it breaks stuff.
707 # $IPC::Cmd::USE_IPC_RUN
708 # ? ($to, $self->uri)
709 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
714 unless(run( command => $cmd,
715 buffer => \$captured,
718 ### wget creates the output document always, even if the fetch
719 ### fails.. so unlink it in that case
722 return $self->_error(loc( "Command failed: %1", $captured || '' ));
728 $METHOD_FAIL->{'lftp'} = 1;
735 ### /bin/ftp fetch ###
742 to => { required => 1, store => \$to }
744 check( $tmpl, \%hash ) or return;
746 ### see if we have a ftp binary ###
747 if( my $ftp = can_run('ftp') ) {
749 my $fh = FileHandle->new;
751 local $SIG{CHLD} = 'IGNORE';
753 unless ($fh->open("|$ftp -n")) {
754 return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
758 "lcd " . dirname($to),
759 "open " . $self->host,
760 "user anonymous $FROM_EMAIL",
764 "get " . $self->file . " " . $self->output_file,
768 foreach (@dialog) { $fh->print($_, "\n") }
769 $fh->close or return;
775 ### lynx is stupid - it decompresses any .gz file it finds to be text
776 ### use /bin/lynx to fetch files
783 to => { required => 1, store => \$to }
785 check( $tmpl, \%hash ) or return;
787 ### see if we have a lynx binary ###
788 if( my $lynx = can_run('lynx') ) {
790 unless( IPC::Cmd->can_capture_buffer ) {
791 $METHOD_FAIL->{'lynx'} = 1;
793 return $self->_error(loc(
794 "Can not capture buffers. Can not use '%1' to fetch files",
798 ### check if the HTTP resource exists ###
799 if ($self->uri =~ /^https?:\/\//i) {
804 "-auth=anonymous:$FROM_EMAIL",
807 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
809 push @$cmd, $self->uri;
813 unless(run( command => $cmd,
817 return $self->_error(loc("Command failed: %1", $head || ''));
820 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
821 return $self->_error(loc("Command failed: %1", $head || ''));
825 ### write to the output file ourselves, since lynx ass_u_mes to much
826 my $local = FileHandle->new(">$to")
827 or return $self->_error(loc(
828 "Could not open '%1' for writing: %2",$to,$!));
830 ### dump to stdout ###
834 "-auth=anonymous:$FROM_EMAIL",
837 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
839 ### DO NOT quote things for IPC::Run, it breaks stuff.
840 push @$cmd, $self->uri;
842 ### with IPC::Cmd > 0.41, this is fixed in teh library,
843 ### and there's no need for special casing any more.
844 ### DO NOT quote things for IPC::Run, it breaks stuff.
845 # $IPC::Cmd::USE_IPC_RUN
847 # : QUOTE. $self->uri .QUOTE;
852 unless(run( command => $cmd,
853 buffer => \$captured,
856 return $self->_error(loc("Command failed: %1", $captured || ''));
859 ### print to local file ###
860 ### XXX on a 404 with a special error page, $captured will actually
861 ### hold the contents of that page, and make it *appear* like the
862 ### request was a success, when really it wasn't :(
863 ### there doesn't seem to be an option for lynx to change the exit
864 ### code based on a 4XX status or so.
865 ### the closest we can come is using --error_file and parsing that,
866 ### which is very unreliable ;(
867 $local->print( $captured );
868 $local->close or return;
873 $METHOD_FAIL->{'lynx'} = 1;
878 ### use /bin/ncftp to fetch files
885 to => { required => 1, store => \$to }
887 check( $tmpl, \%hash ) or return;
889 ### we can only set passive mode in interactive sesssions, so bail out
890 ### if $FTP_PASSIVE is set
891 return if $FTP_PASSIVE;
893 ### see if we have a ncftp binary ###
894 if( my $ncftp = can_run('ncftp') ) {
898 '-V', # do not be verbose
899 '-p', $FROM_EMAIL, # email as password
900 $self->host, # hostname
901 dirname($to), # local dir for the file
902 # remote path to the file
903 ### DO NOT quote things for IPC::Run, it breaks stuff.
904 $IPC::Cmd::USE_IPC_RUN
905 ? File::Spec::Unix->catdir( $self->path, $self->file )
906 : QUOTE. File::Spec::Unix->catdir(
907 $self->path, $self->file ) .QUOTE
913 unless(run( command => $cmd,
914 buffer => \$captured,
917 return $self->_error(loc("Command failed: %1", $captured || ''));
923 $METHOD_FAIL->{'ncftp'} = 1;
928 ### use /bin/curl to fetch files
935 to => { required => 1, store => \$to }
937 check( $tmpl, \%hash ) or return;
939 if (my $curl = can_run('curl')) {
941 ### these long opts are self explanatory - I like that -jmb
942 my $cmd = [ $curl, '-q' ];
944 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
946 push(@$cmd, '--silent') unless $DEBUG;
948 ### curl does the right thing with passive, regardless ###
949 if ($self->scheme eq 'ftp') {
950 push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
953 ### curl doesn't follow 302 (temporarily moved) etc automatically
954 ### so we add --location to enable that.
955 push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
957 ### with IPC::Cmd > 0.41, this is fixed in teh library,
958 ### and there's no need for special casing any more.
959 ### DO NOT quote things for IPC::Run, it breaks stuff.
960 # $IPC::Cmd::USE_IPC_RUN
961 # ? ($to, $self->uri)
962 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
966 unless(run( command => $cmd,
967 buffer => \$captured,
971 return $self->_error(loc("Command failed: %1", $captured || ''));
977 $METHOD_FAIL->{'curl'} = 1;
983 ### use File::Copy for fetching file:// urls ###
985 ### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
986 ### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
995 to => { required => 1, store => \$to }
997 check( $tmpl, \%hash ) or return;
1001 ### prefix a / on unix systems with a file uri, since it would
1002 ### look somewhat like this:
1003 ### file:///home/kane/file
1004 ### wheras windows file uris for 'c:\some\dir\file' might look like:
1005 ### file:///C:/some/dir/file
1006 ### file:///C|/some/dir/file
1007 ### or for a network share '\\host\share\some\dir\file':
1008 ### file:////host/share/some/dir/file
1010 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1011 ### file://vms.host.edu/disk$user/my/notes/note12345.txt
1014 my $path = $self->path;
1015 my $vol = $self->vol;
1016 my $share = $self->share;
1019 if (!$share and $self->host) {
1020 return $self->_error(loc(
1021 "Currently %1 cannot handle hosts in %2 urls",
1022 'File::Fetch', 'file://'
1027 $path = File::Spec->catdir( split /\//, $path );
1028 $remote = File::Spec->catpath( $vol, $path, $self->file);
1031 ### win32 specific, and a share name, so we wont bother with File::Spec
1033 $remote = "\\\\".$self->host."\\$share\\$path";
1036 ### File::Spec on VMS can not currently handle UNIX syntax.
1037 my $file_class = ON_VMS
1038 ? 'File::Spec::Unix'
1041 $remote = $file_class->catfile( $path, $self->file );
1044 ### File::Copy is littered with 'die' statements :( ###
1045 my $rv = eval { File::Copy::copy( $remote, $to ) };
1047 ### something went wrong ###
1049 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1050 $remote, $to, $!, $@));
1056 ### use /usr/bin/rsync to fetch files
1063 to => { required => 1, store => \$to }
1065 check( $tmpl, \%hash ) or return;
1067 if (my $rsync = can_run('rsync')) {
1069 my $cmd = [ $rsync ];
1071 ### XXX: rsync has no I/O timeouts at all, by default
1072 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1074 push(@$cmd, '--quiet') unless $DEBUG;
1076 ### DO NOT quote things for IPC::Run, it breaks stuff.
1077 push @$cmd, $self->uri, $to;
1079 ### with IPC::Cmd > 0.41, this is fixed in teh library,
1080 ### and there's no need for special casing any more.
1081 ### DO NOT quote things for IPC::Run, it breaks stuff.
1082 # $IPC::Cmd::USE_IPC_RUN
1083 # ? ($to, $self->uri)
1084 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1087 unless(run( command => $cmd,
1088 buffer => \$captured,
1092 return $self->_error(loc("Command %1 failed: %2",
1093 "@$cmd" || '', $captured || ''));
1099 $METHOD_FAIL->{'rsync'} = 1;
1104 #################################
1108 #################################
1112 =head2 $ff->error([BOOL])
1114 Returns the last encountered error as string.
1115 Pass it a true value to get the C<Carp::longmess()> output instead.
1119 ### error handling the way Archive::Extract does it
1124 $self->_error_msg( $error );
1125 $self->_error_msg_long( Carp::longmess($error) );
1128 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1136 return shift() ? $self->_error_msg_long : $self->_error_msg;
1146 File::Fetch is able to fetch a variety of uris, by using several
1147 external programs and modules.
1149 Below is a mapping of what utilities will be used in what order
1150 for what schemes, if available:
1152 file => LWP, lftp, file
1153 http => LWP, wget, curl, lftp, lynx
1154 ftp => LWP, Net::FTP, wget, curl, lftp, ncftp, ftp
1157 If you'd like to disable the use of one or more of these utilities
1158 and/or modules, see the C<$BLACKLIST> variable further down.
1160 If a utility or module isn't available, it will be marked in a cache
1161 (see the C<$METHOD_FAIL> variable further down), so it will not be
1162 tried again. The C<fetch> method will only fail when all options are
1163 exhausted, and it was not able to retrieve the file.
1165 A special note about fetching files from an ftp uri:
1167 By default, all ftp connections are done in passive mode. To change
1168 that, see the C<$FTP_PASSIVE> variable further down.
1170 Furthermore, ftp uris only support anonymous connections, so no
1171 named user/password pair can be passed along.
1173 C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1176 =head1 GLOBAL VARIABLES
1178 The behaviour of File::Fetch can be altered by changing the following
1181 =head2 $File::Fetch::FROM_EMAIL
1183 This is the email address that will be sent as your anonymous ftp
1186 Default is C<File-Fetch@example.com>.
1188 =head2 $File::Fetch::USER_AGENT
1190 This is the useragent as C<LWP> will report it.
1192 Default is C<File::Fetch/$VERSION>.
1194 =head2 $File::Fetch::FTP_PASSIVE
1196 This variable controls whether the environment variable C<FTP_PASSIVE>
1197 and any passive switches to commandline tools will be set to true.
1201 Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1202 files, since passive mode can only be set interactively for this binary
1204 =head2 $File::Fetch::TIMEOUT
1206 When set, controls the network timeout (counted in seconds).
1210 =head2 $File::Fetch::WARN
1212 This variable controls whether errors encountered internally by
1213 C<File::Fetch> should be C<carp>'d or not.
1215 Set to false to silence warnings. Inspect the output of the C<error()>
1216 method manually to see what went wrong.
1218 Defaults to C<true>.
1220 =head2 $File::Fetch::DEBUG
1222 This enables debugging output when calling commandline utilities to
1224 This also enables C<Carp::longmess> errors, instead of the regular
1227 Good for tracking down why things don't work with your particular
1232 =head2 $File::Fetch::BLACKLIST
1234 This is an array ref holding blacklisted modules/utilities for fetching
1237 To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1238 set $File::Fetch::BLACKLIST to:
1240 $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1242 The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1244 See the note on C<MAPPING> below.
1246 =head2 $File::Fetch::METHOD_FAIL
1248 This is a hashref registering what modules/utilities were known to fail
1249 for fetching files (mostly because they weren't installed).
1251 You can reset this cache by assigning an empty hashref to it, or
1252 individually remove keys.
1254 See the note on C<MAPPING> below.
1259 Here's a quick mapping for the utilities/modules, and their names for
1260 the $BLACKLIST, $METHOD_FAIL and other internal functions.
1272 =head1 FREQUENTLY ASKED QUESTIONS
1274 =head2 So how do I use a proxy with File::Fetch?
1276 C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1277 You will need to set your environment variables accordingly. For
1278 example, to use an ftp proxy:
1280 $ENV{ftp_proxy} = 'foo.com';
1282 Refer to the LWP::UserAgent manpage for more details.
1284 =head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1286 C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1287 which we in turn capture. If that content is a 'custom' error file
1288 (like, say, a C<404 handler>), you will get that contents instead.
1290 Sadly, C<lynx> doesn't support any options to return a different exit
1291 code on non-C<200 OK> status, giving us no way to tell the difference
1292 between a 'successfull' fetch and a custom error page.
1294 Therefor, we recommend to only use C<lynx> as a last resort. This is
1295 why it is at the back of our list of methods to try as well.
1297 =head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1299 C<File::Fetch> is relatively smart about things. When trying to write
1300 a file to disk, it removes the C<query parameters> (see the
1301 C<output_file> method for details) from the file name before creating
1302 it. In most cases this suffices.
1304 If you have any other characters you need to escape, please install
1305 the C<URI::Escape> module from CPAN, and pre-encode your URI before
1306 passing it to C<File::Fetch>. You can read about the details of URIs
1307 and URI encoding here:
1309 http://www.faqs.org/rfcs/rfc2396.html
1315 =item Implement $PREFER_BIN
1317 To indicate to rather use commandline tools than modules
1323 Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1327 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1331 This library is free software; you may redistribute and/or modify it
1332 under the same terms as Perl itself.
1338 # c-indentation-style: bsd
1340 # indent-tabs-mode: nil
1342 # vim: expandtab shiftwidth=4: