9 use File::Basename qw[dirname];
13 use IPC::Cmd qw[can_run run];
14 use File::Path qw[mkpath];
15 use Params::Check qw[check];
16 use Module::Load::Conditional qw[can_load];
17 use Locale::Maketext::Simple Style => 'gettext';
19 use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
20 $BLACKLIST $METHOD_FAIL $VERSION $METHODS
21 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
24 use constant QUOTE => do { $^O eq 'MSWin32' ? q["] : q['] };
28 $VERSION = eval $VERSION; # avoid warnings with development releases
29 $PREFER_BIN = 0; # XXX TODO implement
30 $FROM_EMAIL = 'File-Fetch@example.com';
31 $USER_AGENT = 'File::Fetch/$VERSION';
32 $BLACKLIST = [qw|ftp|];
39 ### methods available to fetch the file depending on the scheme
41 http => [ qw|lwp wget curl lftp lynx| ],
42 ftp => [ qw|lwp netftp wget curl lftp ncftp ftp| ],
43 file => [ qw|lwp lftp file| ],
44 rsync => [ qw|rsync| ]
47 ### silly warnings ###
48 local $Params::Check::VERBOSE = 1;
49 local $Params::Check::VERBOSE = 1;
50 local $Module::Load::Conditional::VERBOSE = 0;
51 local $Module::Load::Conditional::VERBOSE = 0;
53 ### see what OS we are on, important for file:// uris ###
54 use constant ON_WIN => ($^O eq 'MSWin32');
55 use constant ON_VMS => ($^O eq 'VMS');
56 use constant ON_UNIX => (!ON_WIN);
57 use constant HAS_VOL => (ON_WIN);
58 use constant HAS_SHARE => (ON_WIN);
63 File::Fetch - A generic file fetching mechanism
69 ### build a File::Fetch object ###
70 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
72 ### fetch the uri to cwd() ###
73 my $where = $ff->fetch() or die $ff->error;
75 ### fetch the uri to /tmp ###
76 my $where = $ff->fetch( to => '/tmp' );
78 ### parsed bits from the uri ###
87 File::Fetch is a generic file fetching mechanism.
89 It allows you to fetch any file pointed to by a C<ftp>, C<http>,
90 C<file>, or C<rsync> uri by a number of different means.
92 See the C<HOW IT WORKS> section further down for details.
96 A C<File::Fetch> object has the following accessors
102 The uri you passed to the constructor
106 The scheme from the uri (like 'file', 'http', etc)
110 The hostname in the uri. Will be empty if host was originally
111 'localhost' for a 'file://' url.
115 On operating systems with the concept of a volume the second element
116 of a file:// is considered to the be volume specification for the file.
117 Thus on Win32 this routine returns the volume, on other operating
118 systems this returns nothing.
120 On Windows this value may be empty if the uri is to a network share, in
121 which case the 'share' property will be defined. Additionally, volume
122 specifications that use '|' as ':' will be converted on read to use ':'.
124 On VMS, which has a volume concept, this field will be empty because VMS
125 file specifications are converted to absolute UNIX format and the volume
126 information is transparently included.
130 On systems with the concept of a network share (currently only Windows) returns
131 the sharename from a file://// url. On other operating systems returns empty.
135 The path from the uri, will be at least a single '/'.
139 The name of the remote file. For the local file name, the
140 result of $ff->output_file will be used.
145 ##########################
146 ### Object & Accessors ###
147 ##########################
150 ### template for new() and autogenerated accessors ###
152 scheme => { default => 'http' },
153 host => { default => 'localhost' },
154 path => { default => '/' },
155 file => { required => 1 },
156 uri => { required => 1 },
157 vol => { default => '' }, # windows for file:// uris
158 share => { default => '' }, # windows for file:// uris
159 _error_msg => { no_override => 1 },
160 _error_msg_long => { no_override => 1 },
163 for my $method ( keys %$Tmpl ) {
167 $self->{$method} = $_[0] if @_;
168 return $self->{$method};
176 my $args = check( $Tmpl, \%hash ) or return;
180 if( lc($args->scheme) ne 'file' and not $args->host ) {
181 return File::Fetch->_error(loc(
182 "Hostname required when fetching from '%1'",$args->scheme));
185 for (qw[path file]) {
186 unless( $args->$_() ) { # 5.5.x needs the ()
187 return File::Fetch->_error(loc("No '%1' specified",$_));
195 =item $ff->output_file
197 The name of the output file. This is the same as $ff->file,
198 but any query parameters are stripped off. For example:
200 http://example.com/index.html?x=y
202 would make the output file be C<index.html> rather than
211 my $file = $self->file;
218 ### XXX do this or just point to URI::Escape?
219 # =head2 $esc_uri = $ff->escaped_uri
223 # ### most of this is stolen straight from URI::escape
224 # { ### Build a char->hex map
225 # my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
229 # my $uri = $self->uri;
231 # ### Default unsafe characters. RFC 2732 ^(uric - reserved)
232 # $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
233 # $escapes{$1} || $self->_fail_hi($1)/ge;
243 # "Can't escape '%1', try using the '%2' module instead",
244 # sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
257 =head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
259 Parses the uri and creates a corresponding File::Fetch::Item object,
260 that is ready to be C<fetch>ed and returns it.
262 Returns false on failure.
272 uri => { required => 1, store => \$uri },
275 check( $tmpl, \%hash ) or return;
277 ### parse the uri to usable parts ###
278 my $href = __PACKAGE__->_parse_uri( $uri ) or return;
280 ### make it into a FFI object ###
281 my $ff = File::Fetch->_create( %$href ) or return;
284 ### return the object ###
288 ### parses an uri to a hash structure:
290 ### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
296 ### host => 'ftp.cpan.org',
297 ### path => '/pub/mirror',
298 ### file => 'index.html'
301 ### In the case of file:// urls there maybe be additional fields
303 ### For systems with volume specifications such as Win32 there will be
304 ### a volume specifier provided in the 'vol' field.
306 ### 'vol' => 'volumename'
308 ### For windows file shares there may be a 'share' key specified
310 ### 'share' => 'sharename'
312 ### Note that the rules of what a file:// url means vary by the operating system
313 ### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
314 ### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
315 ### not '/foo/bar.txt'
317 ### Similarly if the host interpreting the url is VMS then
318 ### file:///disk$user/my/notes/note12345.txt' means
319 ### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
320 ### if it is unix where it means /disk$user/my/notes/note12345.txt'.
321 ### Except for some cases in the File::Spec methods, Perl on VMS will generally
322 ### handle UNIX format file specifications.
324 ### This means it is impossible to serve certain file:// urls on certain systems.
326 ### Thus are the problems with a protocol-less specification. :-(
331 my $uri = shift or return;
333 my $href = { uri => $uri };
335 ### find the scheme ###
336 $uri =~ s|^(\w+)://||;
337 $href->{scheme} = $1;
339 ### See rfc 1738 section 3.10
340 ### http://www.faqs.org/rfcs/rfc1738.html
341 ### And wikipedia for more on windows file:// urls
342 ### http://en.wikipedia.org/wiki/File://
343 if( $href->{scheme} eq 'file' ) {
345 my @parts = split '/',$uri;
347 ### file://hostname/...
348 ### file://hostname/...
349 ### normalize file://localhost with file:///
350 $href->{host} = $parts[0] || '';
352 ### index in @parts where the path components begin;
355 ### file:////hostname/sharename/blah.txt
356 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
358 $href->{host} = $parts[2] || ''; # avoid warnings
359 $href->{share} = $parts[3] || ''; # avoid warnings
361 $index = 4 # index after the share
363 ### file:///D|/blah.txt
364 ### file:///D:/blah.txt
367 ### this code comes from dmq's patch, but:
368 ### XXX if volume is empty, wouldn't that be an error? --kane
369 ### if so, our file://localhost test needs to be fixed as wel
370 $href->{vol} = $parts[1] || '';
372 ### correct D| style colume descriptors
373 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
375 $index = 2; # index after the volume
378 ### rebuild the path from the leftover parts;
379 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
382 ### using anything but qw() in hash slices may produce warnings
383 ### in older perls :-(
384 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
387 ### split the path into file + dir ###
388 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
389 $href->{path} = $parts[1];
390 $href->{file} = $parts[2];
393 ### host will be empty if the target was 'localhost' and the
394 ### scheme was 'file'
395 $href->{host} = '' if ($href->{host} eq 'localhost') and
396 ($href->{scheme} eq 'file');
401 =head2 $ff->fetch( [to => /my/output/dir/] )
403 Fetches the file you requested. By default it writes to C<cwd()>,
404 but you can override that by specifying the C<to> argument.
406 Returns the full path to the downloaded file on success, and false
412 my $self = shift or return;
417 to => { default => cwd(), store => \$to },
420 check( $tmpl, \%hash ) or return;
422 ### On VMS force to VMS format so File::Spec will work.
423 $to = VMS::Filespec::vmspath($to) if ON_VMS;
425 ### create the path if it doesn't exist yet ###
427 eval { mkpath( $to ) };
429 return $self->_error(loc("Could not create path '%1'",$to)) if $@;
432 ### set passive ftp if required ###
433 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
435 ### we dont use catfile on win32 because if we are using a cygwin tool
436 ### under cmd.exe they wont understand windows style separators.
437 my $out_to = ON_WIN ? $to.'/'.$self->output_file
438 : File::Spec->catfile( $to, $self->output_file );
440 for my $method ( @{ $METHODS->{$self->scheme} } ) {
441 my $sub = '_'.$method.'_fetch';
443 unless( __PACKAGE__->can($sub) ) {
444 $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
449 ### method is blacklisted ###
450 next if grep { lc $_ eq $method } @$BLACKLIST;
452 ### method is known to fail ###
453 next if $METHOD_FAIL->{$method};
455 ### there's serious issues with IPC::Run and quoting of command
456 ### line arguments. using quotes in the wrong place breaks things,
457 ### and in the case of say,
458 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
459 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
460 ### it doesn't matter how you quote, it always fails.
461 local $IPC::Cmd::USE_IPC_RUN = 0;
463 if( my $file = $self->$sub(
467 unless( -e $file && -s _ ) {
468 $self->_error(loc("'%1' said it fetched '%2', ".
469 "but it was not created",$method,$file));
471 ### mark the failure ###
472 $METHOD_FAIL->{$method} = 1;
478 my $abs = File::Spec->rel2abs( $file );
485 ### if we got here, we looped over all methods, but we weren't able
490 ########################
491 ### _*_fetch methods ###
492 ########################
501 to => { required => 1, store => \$to }
503 check( $tmpl, \%hash ) or return;
505 ### modules required to download with lwp ###
508 'LWP::UserAgent' => '0.0',
509 'HTTP::Request' => '0.0',
510 'HTTP::Status' => '0.0',
515 if( can_load(modules => $use_list) ) {
517 ### setup the uri object
518 my $uri = URI->new( File::Spec::Unix->catfile(
519 $self->path, $self->file
522 ### special rules apply for file:// uris ###
523 $uri->scheme( $self->scheme );
524 $uri->host( $self->scheme eq 'file' ? '' : $self->host );
525 $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
527 ### set up the useragent object
528 my $ua = LWP::UserAgent->new();
529 $ua->timeout( $TIMEOUT ) if $TIMEOUT;
530 $ua->agent( $USER_AGENT );
531 $ua->from( $FROM_EMAIL );
534 my $res = $ua->mirror($uri, $to) or return;
536 ### uptodate or fetched ok ###
537 if ( $res->code == 304 or $res->code == 200 ) {
541 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
542 $res->code, HTTP::Status::status_message($res->code),
547 $METHOD_FAIL->{'lwp'} = 1;
552 ### Net::FTP fetching
559 to => { required => 1, store => \$to }
561 check( $tmpl, \%hash ) or return;
563 ### required modules ###
564 my $use_list = { 'Net::FTP' => 0 };
566 if( can_load( modules => $use_list ) ) {
568 ### make connection ###
570 my @options = ($self->host);
571 push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
572 unless( $ftp = Net::FTP->new( @options ) ) {
573 return $self->_error(loc("Ftp creation failed: %1",$@));
577 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
578 return $self->_error(loc("Could not login to '%1'",$self->host));
581 ### set binary mode, just in case ###
584 ### create the remote path
585 ### remember remote paths are unix paths! [#11483]
586 my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
588 ### fetch the file ###
590 unless( $target = $ftp->get( $remote, $to ) ) {
591 return $self->_error(loc("Could not fetch '%1' from '%2'",
592 $remote, $self->host));
601 $METHOD_FAIL->{'netftp'} = 1;
606 ### /bin/wget fetch ###
613 to => { required => 1, store => \$to }
615 check( $tmpl, \%hash ) or return;
617 ### see if we have a wget binary ###
618 if( my $wget = can_run('wget') ) {
620 ### no verboseness, thanks ###
621 my $cmd = [ $wget, '--quiet' ];
623 ### if a timeout is set, add it ###
624 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
626 ### run passive if specified ###
627 push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
629 ### set the output document, add the uri ###
630 push @$cmd, '--output-document', $to, $self->uri;
632 ### with IPC::Cmd > 0.41, this is fixed in teh library,
633 ### and there's no need for special casing any more.
634 ### DO NOT quote things for IPC::Run, it breaks stuff.
635 # $IPC::Cmd::USE_IPC_RUN
636 # ? ($to, $self->uri)
637 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
641 unless(run( command => $cmd,
642 buffer => \$captured,
645 ### wget creates the output document always, even if the fetch
646 ### fails.. so unlink it in that case
649 return $self->_error(loc( "Command failed: %1", $captured || '' ));
655 $METHOD_FAIL->{'wget'} = 1;
660 ### /bin/lftp fetch ###
667 to => { required => 1, store => \$to }
669 check( $tmpl, \%hash ) or return;
671 ### see if we have a wget binary ###
672 if( my $lftp = can_run('lftp') ) {
674 ### no verboseness, thanks ###
675 my $cmd = [ $lftp, '-f' ];
677 my $fh = File::Temp->new;
681 ### if a timeout is set, add it ###
682 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
684 ### run passive if specified ###
685 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
687 ### set the output document, add the uri ###
688 ### quote the URI, because lftp supports certain shell
689 ### expansions, most notably & for backgrounding.
690 ### ' quote does nto work, must be "
691 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
694 my $pp_str = join ' ', split $/, $str;
695 print "# lftp command: $pp_str\n";
698 ### write straight to the file.
702 ### the command needs to be 1 string to be executed
703 push @$cmd, $fh->filename;
705 ### with IPC::Cmd > 0.41, this is fixed in teh library,
706 ### and there's no need for special casing any more.
707 ### DO NOT quote things for IPC::Run, it breaks stuff.
708 # $IPC::Cmd::USE_IPC_RUN
709 # ? ($to, $self->uri)
710 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
715 unless(run( command => $cmd,
716 buffer => \$captured,
719 ### wget creates the output document always, even if the fetch
720 ### fails.. so unlink it in that case
723 return $self->_error(loc( "Command failed: %1", $captured || '' ));
729 $METHOD_FAIL->{'lftp'} = 1;
736 ### /bin/ftp fetch ###
743 to => { required => 1, store => \$to }
745 check( $tmpl, \%hash ) or return;
747 ### see if we have a ftp binary ###
748 if( my $ftp = can_run('ftp') ) {
750 my $fh = FileHandle->new;
752 local $SIG{CHLD} = 'IGNORE';
754 unless ($fh->open("|$ftp -n")) {
755 return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
759 "lcd " . dirname($to),
760 "open " . $self->host,
761 "user anonymous $FROM_EMAIL",
765 "get " . $self->file . " " . $self->output_file,
769 foreach (@dialog) { $fh->print($_, "\n") }
770 $fh->close or return;
776 ### lynx is stupid - it decompresses any .gz file it finds to be text
777 ### use /bin/lynx to fetch files
784 to => { required => 1, store => \$to }
786 check( $tmpl, \%hash ) or return;
788 ### see if we have a lynx binary ###
789 if( my $lynx = can_run('lynx') ) {
791 unless( IPC::Cmd->can_capture_buffer ) {
792 $METHOD_FAIL->{'lynx'} = 1;
794 return $self->_error(loc(
795 "Can not capture buffers. Can not use '%1' to fetch files",
799 ### check if the HTTP resource exists ###
800 if ($self->uri =~ /^https?:\/\//i) {
805 "-auth=anonymous:$FROM_EMAIL",
808 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
810 push @$cmd, $self->uri;
814 unless(run( command => $cmd,
818 return $self->_error(loc("Command failed: %1", $head || ''));
821 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
822 return $self->_error(loc("Command failed: %1", $head || ''));
826 ### write to the output file ourselves, since lynx ass_u_mes to much
827 my $local = FileHandle->new(">$to")
828 or return $self->_error(loc(
829 "Could not open '%1' for writing: %2",$to,$!));
831 ### dump to stdout ###
835 "-auth=anonymous:$FROM_EMAIL",
838 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
840 ### DO NOT quote things for IPC::Run, it breaks stuff.
841 push @$cmd, $self->uri;
843 ### with IPC::Cmd > 0.41, this is fixed in teh library,
844 ### and there's no need for special casing any more.
845 ### DO NOT quote things for IPC::Run, it breaks stuff.
846 # $IPC::Cmd::USE_IPC_RUN
848 # : QUOTE. $self->uri .QUOTE;
853 unless(run( command => $cmd,
854 buffer => \$captured,
857 return $self->_error(loc("Command failed: %1", $captured || ''));
860 ### print to local file ###
861 ### XXX on a 404 with a special error page, $captured will actually
862 ### hold the contents of that page, and make it *appear* like the
863 ### request was a success, when really it wasn't :(
864 ### there doesn't seem to be an option for lynx to change the exit
865 ### code based on a 4XX status or so.
866 ### the closest we can come is using --error_file and parsing that,
867 ### which is very unreliable ;(
868 $local->print( $captured );
869 $local->close or return;
874 $METHOD_FAIL->{'lynx'} = 1;
879 ### use /bin/ncftp to fetch files
886 to => { required => 1, store => \$to }
888 check( $tmpl, \%hash ) or return;
890 ### we can only set passive mode in interactive sesssions, so bail out
891 ### if $FTP_PASSIVE is set
892 return if $FTP_PASSIVE;
894 ### see if we have a ncftp binary ###
895 if( my $ncftp = can_run('ncftp') ) {
899 '-V', # do not be verbose
900 '-p', $FROM_EMAIL, # email as password
901 $self->host, # hostname
902 dirname($to), # local dir for the file
903 # remote path to the file
904 ### DO NOT quote things for IPC::Run, it breaks stuff.
905 $IPC::Cmd::USE_IPC_RUN
906 ? File::Spec::Unix->catdir( $self->path, $self->file )
907 : QUOTE. File::Spec::Unix->catdir(
908 $self->path, $self->file ) .QUOTE
914 unless(run( command => $cmd,
915 buffer => \$captured,
918 return $self->_error(loc("Command failed: %1", $captured || ''));
924 $METHOD_FAIL->{'ncftp'} = 1;
929 ### use /bin/curl to fetch files
936 to => { required => 1, store => \$to }
938 check( $tmpl, \%hash ) or return;
940 if (my $curl = can_run('curl')) {
942 ### these long opts are self explanatory - I like that -jmb
943 my $cmd = [ $curl, '-q' ];
945 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
947 push(@$cmd, '--silent') unless $DEBUG;
949 ### curl does the right thing with passive, regardless ###
950 if ($self->scheme eq 'ftp') {
951 push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
954 ### curl doesn't follow 302 (temporarily moved) etc automatically
955 ### so we add --location to enable that.
956 push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
958 ### with IPC::Cmd > 0.41, this is fixed in teh library,
959 ### and there's no need for special casing any more.
960 ### DO NOT quote things for IPC::Run, it breaks stuff.
961 # $IPC::Cmd::USE_IPC_RUN
962 # ? ($to, $self->uri)
963 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
967 unless(run( command => $cmd,
968 buffer => \$captured,
972 return $self->_error(loc("Command failed: %1", $captured || ''));
978 $METHOD_FAIL->{'curl'} = 1;
984 ### use File::Copy for fetching file:// urls ###
986 ### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
987 ### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
996 to => { required => 1, store => \$to }
998 check( $tmpl, \%hash ) or return;
1002 ### prefix a / on unix systems with a file uri, since it would
1003 ### look somewhat like this:
1004 ### file:///home/kane/file
1005 ### wheras windows file uris for 'c:\some\dir\file' might look like:
1006 ### file:///C:/some/dir/file
1007 ### file:///C|/some/dir/file
1008 ### or for a network share '\\host\share\some\dir\file':
1009 ### file:////host/share/some/dir/file
1011 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1012 ### file://vms.host.edu/disk$user/my/notes/note12345.txt
1015 my $path = $self->path;
1016 my $vol = $self->vol;
1017 my $share = $self->share;
1020 if (!$share and $self->host) {
1021 return $self->_error(loc(
1022 "Currently %1 cannot handle hosts in %2 urls",
1023 'File::Fetch', 'file://'
1028 $path = File::Spec->catdir( split /\//, $path );
1029 $remote = File::Spec->catpath( $vol, $path, $self->file);
1032 ### win32 specific, and a share name, so we wont bother with File::Spec
1034 $remote = "\\\\".$self->host."\\$share\\$path";
1037 ### File::Spec on VMS can not currently handle UNIX syntax.
1038 my $file_class = ON_VMS
1039 ? 'File::Spec::Unix'
1042 $remote = $file_class->catfile( $path, $self->file );
1045 ### File::Copy is littered with 'die' statements :( ###
1046 my $rv = eval { File::Copy::copy( $remote, $to ) };
1048 ### something went wrong ###
1050 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1051 $remote, $to, $!, $@));
1057 ### use /usr/bin/rsync to fetch files
1064 to => { required => 1, store => \$to }
1066 check( $tmpl, \%hash ) or return;
1068 if (my $rsync = can_run('rsync')) {
1070 my $cmd = [ $rsync ];
1072 ### XXX: rsync has no I/O timeouts at all, by default
1073 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1075 push(@$cmd, '--quiet') unless $DEBUG;
1077 ### DO NOT quote things for IPC::Run, it breaks stuff.
1078 push @$cmd, $self->uri, $to;
1080 ### with IPC::Cmd > 0.41, this is fixed in teh library,
1081 ### and there's no need for special casing any more.
1082 ### DO NOT quote things for IPC::Run, it breaks stuff.
1083 # $IPC::Cmd::USE_IPC_RUN
1084 # ? ($to, $self->uri)
1085 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1088 unless(run( command => $cmd,
1089 buffer => \$captured,
1093 return $self->_error(loc("Command %1 failed: %2",
1094 "@$cmd" || '', $captured || ''));
1100 $METHOD_FAIL->{'rsync'} = 1;
1105 #################################
1109 #################################
1113 =head2 $ff->error([BOOL])
1115 Returns the last encountered error as string.
1116 Pass it a true value to get the C<Carp::longmess()> output instead.
1120 ### error handling the way Archive::Extract does it
1125 $self->_error_msg( $error );
1126 $self->_error_msg_long( Carp::longmess($error) );
1129 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1137 return shift() ? $self->_error_msg_long : $self->_error_msg;
1147 File::Fetch is able to fetch a variety of uris, by using several
1148 external programs and modules.
1150 Below is a mapping of what utilities will be used in what order
1151 for what schemes, if available:
1153 file => LWP, lftp, file
1154 http => LWP, wget, curl, lftp, lynx
1155 ftp => LWP, Net::FTP, wget, curl, lftp, ncftp, ftp
1158 If you'd like to disable the use of one or more of these utilities
1159 and/or modules, see the C<$BLACKLIST> variable further down.
1161 If a utility or module isn't available, it will be marked in a cache
1162 (see the C<$METHOD_FAIL> variable further down), so it will not be
1163 tried again. The C<fetch> method will only fail when all options are
1164 exhausted, and it was not able to retrieve the file.
1166 A special note about fetching files from an ftp uri:
1168 By default, all ftp connections are done in passive mode. To change
1169 that, see the C<$FTP_PASSIVE> variable further down.
1171 Furthermore, ftp uris only support anonymous connections, so no
1172 named user/password pair can be passed along.
1174 C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1177 =head1 GLOBAL VARIABLES
1179 The behaviour of File::Fetch can be altered by changing the following
1182 =head2 $File::Fetch::FROM_EMAIL
1184 This is the email address that will be sent as your anonymous ftp
1187 Default is C<File-Fetch@example.com>.
1189 =head2 $File::Fetch::USER_AGENT
1191 This is the useragent as C<LWP> will report it.
1193 Default is C<File::Fetch/$VERSION>.
1195 =head2 $File::Fetch::FTP_PASSIVE
1197 This variable controls whether the environment variable C<FTP_PASSIVE>
1198 and any passive switches to commandline tools will be set to true.
1202 Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1203 files, since passive mode can only be set interactively for this binary
1205 =head2 $File::Fetch::TIMEOUT
1207 When set, controls the network timeout (counted in seconds).
1211 =head2 $File::Fetch::WARN
1213 This variable controls whether errors encountered internally by
1214 C<File::Fetch> should be C<carp>'d or not.
1216 Set to false to silence warnings. Inspect the output of the C<error()>
1217 method manually to see what went wrong.
1219 Defaults to C<true>.
1221 =head2 $File::Fetch::DEBUG
1223 This enables debugging output when calling commandline utilities to
1225 This also enables C<Carp::longmess> errors, instead of the regular
1228 Good for tracking down why things don't work with your particular
1233 =head2 $File::Fetch::BLACKLIST
1235 This is an array ref holding blacklisted modules/utilities for fetching
1238 To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1239 set $File::Fetch::BLACKLIST to:
1241 $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1243 The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1245 See the note on C<MAPPING> below.
1247 =head2 $File::Fetch::METHOD_FAIL
1249 This is a hashref registering what modules/utilities were known to fail
1250 for fetching files (mostly because they weren't installed).
1252 You can reset this cache by assigning an empty hashref to it, or
1253 individually remove keys.
1255 See the note on C<MAPPING> below.
1260 Here's a quick mapping for the utilities/modules, and their names for
1261 the $BLACKLIST, $METHOD_FAIL and other internal functions.
1273 =head1 FREQUENTLY ASKED QUESTIONS
1275 =head2 So how do I use a proxy with File::Fetch?
1277 C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1278 You will need to set your environment variables accordingly. For
1279 example, to use an ftp proxy:
1281 $ENV{ftp_proxy} = 'foo.com';
1283 Refer to the LWP::UserAgent manpage for more details.
1285 =head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1287 C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1288 which we in turn capture. If that content is a 'custom' error file
1289 (like, say, a C<404 handler>), you will get that contents instead.
1291 Sadly, C<lynx> doesn't support any options to return a different exit
1292 code on non-C<200 OK> status, giving us no way to tell the difference
1293 between a 'successfull' fetch and a custom error page.
1295 Therefor, we recommend to only use C<lynx> as a last resort. This is
1296 why it is at the back of our list of methods to try as well.
1298 =head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1300 C<File::Fetch> is relatively smart about things. When trying to write
1301 a file to disk, it removes the C<query parameters> (see the
1302 C<output_file> method for details) from the file name before creating
1303 it. In most cases this suffices.
1305 If you have any other characters you need to escape, please install
1306 the C<URI::Escape> module from CPAN, and pre-encode your URI before
1307 passing it to C<File::Fetch>. You can read about the details of URIs
1308 and URI encoding here:
1310 http://www.faqs.org/rfcs/rfc2396.html
1316 =item Implement $PREFER_BIN
1318 To indicate to rather use commandline tools than modules
1324 Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1328 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1332 This library is free software; you may redistribute and/or modify it
1333 under the same terms as Perl itself.
1339 # c-indentation-style: bsd
1341 # indent-tabs-mode: nil
1343 # vim: expandtab shiftwidth=4: