9 use File::Basename qw[dirname];
13 use IPC::Cmd qw[can_run run QUOTE];
14 use File::Path qw[mkpath];
15 use File::Temp qw[tempdir];
16 use Params::Check qw[check];
17 use Module::Load::Conditional qw[can_load];
18 use Locale::Maketext::Simple Style => 'gettext';
20 use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
21 $BLACKLIST $METHOD_FAIL $VERSION $METHODS
22 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
26 $VERSION = eval $VERSION; # avoid warnings with development releases
27 $PREFER_BIN = 0; # XXX TODO implement
28 $FROM_EMAIL = 'File-Fetch@example.com';
29 $USER_AGENT = "File::Fetch/$VERSION";
30 $BLACKLIST = [qw|ftp|];
37 ### methods available to fetch the file depending on the scheme
39 http => [ qw|lwp wget curl lftp lynx| ],
40 ftp => [ qw|lwp netftp wget curl lftp ncftp ftp| ],
41 file => [ qw|lwp lftp file| ],
42 rsync => [ qw|rsync| ]
45 ### silly warnings ###
46 local $Params::Check::VERBOSE = 1;
47 local $Params::Check::VERBOSE = 1;
48 local $Module::Load::Conditional::VERBOSE = 0;
49 local $Module::Load::Conditional::VERBOSE = 0;
51 ### see what OS we are on, important for file:// uris ###
52 use constant ON_WIN => ($^O eq 'MSWin32');
53 use constant ON_VMS => ($^O eq 'VMS');
54 use constant ON_UNIX => (!ON_WIN);
55 use constant HAS_VOL => (ON_WIN);
56 use constant HAS_SHARE => (ON_WIN);
63 File::Fetch - A generic file fetching mechanism
69 ### build a File::Fetch object ###
70 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
72 ### fetch the uri to cwd() ###
73 my $where = $ff->fetch() or die $ff->error;
75 ### fetch the uri to /tmp ###
76 my $where = $ff->fetch( to => '/tmp' );
78 ### parsed bits from the uri ###
87 File::Fetch is a generic file fetching mechanism.
89 It allows you to fetch any file pointed to by a C<ftp>, C<http>,
90 C<file>, or C<rsync> uri by a number of different means.
92 See the C<HOW IT WORKS> section further down for details.
96 A C<File::Fetch> object has the following accessors
102 The uri you passed to the constructor
106 The scheme from the uri (like 'file', 'http', etc)
110 The hostname in the uri. Will be empty if host was originally
111 'localhost' for a 'file://' url.
115 On operating systems with the concept of a volume the second element
116 of a file:// is considered to the be volume specification for the file.
117 Thus on Win32 this routine returns the volume, on other operating
118 systems this returns nothing.
120 On Windows this value may be empty if the uri is to a network share, in
121 which case the 'share' property will be defined. Additionally, volume
122 specifications that use '|' as ':' will be converted on read to use ':'.
124 On VMS, which has a volume concept, this field will be empty because VMS
125 file specifications are converted to absolute UNIX format and the volume
126 information is transparently included.
130 On systems with the concept of a network share (currently only Windows) returns
131 the sharename from a file://// url. On other operating systems returns empty.
135 The path from the uri, will be at least a single '/'.
139 The name of the remote file. For the local file name, the
140 result of $ff->output_file will be used.
145 ##########################
146 ### Object & Accessors ###
147 ##########################
150 ### template for autogenerated accessors ###
152 scheme => { default => 'http' },
153 host => { default => 'localhost' },
154 path => { default => '/' },
155 file => { required => 1 },
156 uri => { required => 1 },
157 vol => { default => '' }, # windows for file:// uris
158 share => { default => '' }, # windows for file:// uris
159 _error_msg => { no_override => 1 },
160 _error_msg_long => { no_override => 1 },
163 for my $method ( keys %$Tmpl ) {
167 $self->{$method} = $_[0] if @_;
168 return $self->{$method};
176 my $args = check( $Tmpl, \%hash ) or return;
180 if( lc($args->scheme) ne 'file' and not $args->host ) {
181 return File::Fetch->_error(loc(
182 "Hostname required when fetching from '%1'",$args->scheme));
185 for (qw[path file]) {
186 unless( $args->$_() ) { # 5.5.x needs the ()
187 return File::Fetch->_error(loc("No '%1' specified",$_));
195 =item $ff->output_file
197 The name of the output file. This is the same as $ff->file,
198 but any query parameters are stripped off. For example:
200 http://example.com/index.html?x=y
202 would make the output file be C<index.html> rather than
211 my $file = $self->file;
218 ### XXX do this or just point to URI::Escape?
219 # =head2 $esc_uri = $ff->escaped_uri
223 # ### most of this is stolen straight from URI::escape
224 # { ### Build a char->hex map
225 # my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
229 # my $uri = $self->uri;
231 # ### Default unsafe characters. RFC 2732 ^(uric - reserved)
232 # $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
233 # $escapes{$1} || $self->_fail_hi($1)/ge;
243 # "Can't escape '%1', try using the '%2' module instead",
244 # sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
257 =head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
259 Parses the uri and creates a corresponding File::Fetch::Item object,
260 that is ready to be C<fetch>ed and returns it.
262 Returns false on failure.
272 uri => { required => 1, store => \$uri },
275 check( $tmpl, \%hash ) or return;
277 ### parse the uri to usable parts ###
278 my $href = __PACKAGE__->_parse_uri( $uri ) or return;
280 ### make it into a FFI object ###
281 my $ff = File::Fetch->_create( %$href ) or return;
284 ### return the object ###
288 ### parses an uri to a hash structure:
290 ### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
296 ### host => 'ftp.cpan.org',
297 ### path => '/pub/mirror',
298 ### file => 'index.html'
301 ### In the case of file:// urls there maybe be additional fields
303 ### For systems with volume specifications such as Win32 there will be
304 ### a volume specifier provided in the 'vol' field.
306 ### 'vol' => 'volumename'
308 ### For windows file shares there may be a 'share' key specified
310 ### 'share' => 'sharename'
312 ### Note that the rules of what a file:// url means vary by the operating system
313 ### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
314 ### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
315 ### not '/foo/bar.txt'
317 ### Similarly if the host interpreting the url is VMS then
318 ### file:///disk$user/my/notes/note12345.txt' means
319 ### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
320 ### if it is unix where it means /disk$user/my/notes/note12345.txt'.
321 ### Except for some cases in the File::Spec methods, Perl on VMS will generally
322 ### handle UNIX format file specifications.
324 ### This means it is impossible to serve certain file:// urls on certain systems.
326 ### Thus are the problems with a protocol-less specification. :-(
331 my $uri = shift or return;
333 my $href = { uri => $uri };
335 ### find the scheme ###
336 $uri =~ s|^(\w+)://||;
337 $href->{scheme} = $1;
339 ### See rfc 1738 section 3.10
340 ### http://www.faqs.org/rfcs/rfc1738.html
341 ### And wikipedia for more on windows file:// urls
342 ### http://en.wikipedia.org/wiki/File://
343 if( $href->{scheme} eq 'file' ) {
345 my @parts = split '/',$uri;
347 ### file://hostname/...
348 ### file://hostname/...
349 ### normalize file://localhost with file:///
350 $href->{host} = $parts[0] || '';
352 ### index in @parts where the path components begin;
355 ### file:////hostname/sharename/blah.txt
356 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
358 $href->{host} = $parts[2] || ''; # avoid warnings
359 $href->{share} = $parts[3] || ''; # avoid warnings
361 $index = 4 # index after the share
363 ### file:///D|/blah.txt
364 ### file:///D:/blah.txt
367 ### this code comes from dmq's patch, but:
368 ### XXX if volume is empty, wouldn't that be an error? --kane
369 ### if so, our file://localhost test needs to be fixed as wel
370 $href->{vol} = $parts[1] || '';
372 ### correct D| style colume descriptors
373 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
375 $index = 2; # index after the volume
378 ### rebuild the path from the leftover parts;
379 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
382 ### using anything but qw() in hash slices may produce warnings
383 ### in older perls :-(
384 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
387 ### split the path into file + dir ###
388 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
389 $href->{path} = $parts[1];
390 $href->{file} = $parts[2];
393 ### host will be empty if the target was 'localhost' and the
394 ### scheme was 'file'
395 $href->{host} = '' if ($href->{host} eq 'localhost') and
396 ($href->{scheme} eq 'file');
401 =head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
403 Fetches the file you requested and returns the full path to the file.
405 By default it writes to C<cwd()>, but you can override that by specifying
408 ### file fetch to /tmp, full path to the file in $where
409 $where = $ff->fetch( to => '/tmp' );
411 ### file slurped into $scalar, full path to the file in $where
412 ### file is downloaded to a temp directory and cleaned up at exit time
413 $where = $ff->fetch( to => \$scalar );
415 Returns the full path to the downloaded file on success, and false
421 my $self = shift or return;
426 to => { default => cwd(), store => \$target },
429 check( $tmpl, \%hash ) or return;
432 ### you want us to slurp the contents
433 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
434 $to = tempdir( 'FileFetch.XXXXXX', CLEANUP => 1 );
440 ### On VMS force to VMS format so File::Spec will work.
441 $to = VMS::Filespec::vmspath($to) if ON_VMS;
443 ### create the path if it doesn't exist yet ###
445 eval { mkpath( $to ) };
447 return $self->_error(loc("Could not create path '%1'",$to)) if $@;
451 ### set passive ftp if required ###
452 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
454 ### we dont use catfile on win32 because if we are using a cygwin tool
455 ### under cmd.exe they wont understand windows style separators.
456 my $out_to = ON_WIN ? $to.'/'.$self->output_file
457 : File::Spec->catfile( $to, $self->output_file );
459 for my $method ( @{ $METHODS->{$self->scheme} } ) {
460 my $sub = '_'.$method.'_fetch';
462 unless( __PACKAGE__->can($sub) ) {
463 $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
468 ### method is blacklisted ###
469 next if grep { lc $_ eq $method } @$BLACKLIST;
471 ### method is known to fail ###
472 next if $METHOD_FAIL->{$method};
474 ### there's serious issues with IPC::Run and quoting of command
475 ### line arguments. using quotes in the wrong place breaks things,
476 ### and in the case of say,
477 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
478 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
479 ### it doesn't matter how you quote, it always fails.
480 local $IPC::Cmd::USE_IPC_RUN = 0;
482 if( my $file = $self->$sub(
486 unless( -e $file && -s _ ) {
487 $self->_error(loc("'%1' said it fetched '%2', ".
488 "but it was not created",$method,$file));
490 ### mark the failure ###
491 $METHOD_FAIL->{$method} = 1;
498 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
501 open my $fh, $file or do {
503 loc("Could not open '%1': %2", $file, $!));
508 $$target = do { local $/; <$fh> };
512 my $abs = File::Spec->rel2abs( $file );
520 ### if we got here, we looped over all methods, but we weren't able
525 ########################
526 ### _*_fetch methods ###
527 ########################
536 to => { required => 1, store => \$to }
538 check( $tmpl, \%hash ) or return;
540 ### modules required to download with lwp ###
543 'LWP::UserAgent' => '0.0',
544 'HTTP::Request' => '0.0',
545 'HTTP::Status' => '0.0',
550 if( can_load(modules => $use_list) ) {
552 ### setup the uri object
553 my $uri = URI->new( File::Spec::Unix->catfile(
554 $self->path, $self->file
557 ### special rules apply for file:// uris ###
558 $uri->scheme( $self->scheme );
559 $uri->host( $self->scheme eq 'file' ? '' : $self->host );
560 $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
562 ### set up the useragent object
563 my $ua = LWP::UserAgent->new();
564 $ua->timeout( $TIMEOUT ) if $TIMEOUT;
565 $ua->agent( $USER_AGENT );
566 $ua->from( $FROM_EMAIL );
569 my $res = $ua->mirror($uri, $to) or return;
571 ### uptodate or fetched ok ###
572 if ( $res->code == 304 or $res->code == 200 ) {
576 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
577 $res->code, HTTP::Status::status_message($res->code),
582 $METHOD_FAIL->{'lwp'} = 1;
587 ### Net::FTP fetching
594 to => { required => 1, store => \$to }
596 check( $tmpl, \%hash ) or return;
598 ### required modules ###
599 my $use_list = { 'Net::FTP' => 0 };
601 if( can_load( modules => $use_list ) ) {
603 ### make connection ###
605 my @options = ($self->host);
606 push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
607 unless( $ftp = Net::FTP->new( @options ) ) {
608 return $self->_error(loc("Ftp creation failed: %1",$@));
612 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
613 return $self->_error(loc("Could not login to '%1'",$self->host));
616 ### set binary mode, just in case ###
619 ### create the remote path
620 ### remember remote paths are unix paths! [#11483]
621 my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
623 ### fetch the file ###
625 unless( $target = $ftp->get( $remote, $to ) ) {
626 return $self->_error(loc("Could not fetch '%1' from '%2'",
627 $remote, $self->host));
636 $METHOD_FAIL->{'netftp'} = 1;
641 ### /bin/wget fetch ###
648 to => { required => 1, store => \$to }
650 check( $tmpl, \%hash ) or return;
652 ### see if we have a wget binary ###
653 if( my $wget = can_run('wget') ) {
655 ### no verboseness, thanks ###
656 my $cmd = [ $wget, '--quiet' ];
658 ### if a timeout is set, add it ###
659 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
661 ### run passive if specified ###
662 push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
664 ### set the output document, add the uri ###
665 push @$cmd, '--output-document', $to, $self->uri;
667 ### with IPC::Cmd > 0.41, this is fixed in teh library,
668 ### and there's no need for special casing any more.
669 ### DO NOT quote things for IPC::Run, it breaks stuff.
670 # $IPC::Cmd::USE_IPC_RUN
671 # ? ($to, $self->uri)
672 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
676 unless(run( command => $cmd,
677 buffer => \$captured,
680 ### wget creates the output document always, even if the fetch
681 ### fails.. so unlink it in that case
684 return $self->_error(loc( "Command failed: %1", $captured || '' ));
690 $METHOD_FAIL->{'wget'} = 1;
695 ### /bin/lftp fetch ###
702 to => { required => 1, store => \$to }
704 check( $tmpl, \%hash ) or return;
706 ### see if we have a wget binary ###
707 if( my $lftp = can_run('lftp') ) {
709 ### no verboseness, thanks ###
710 my $cmd = [ $lftp, '-f' ];
712 my $fh = File::Temp->new;
716 ### if a timeout is set, add it ###
717 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
719 ### run passive if specified ###
720 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
722 ### set the output document, add the uri ###
723 ### quote the URI, because lftp supports certain shell
724 ### expansions, most notably & for backgrounding.
725 ### ' quote does nto work, must be "
726 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
729 my $pp_str = join ' ', split $/, $str;
730 print "# lftp command: $pp_str\n";
733 ### write straight to the file.
737 ### the command needs to be 1 string to be executed
738 push @$cmd, $fh->filename;
740 ### with IPC::Cmd > 0.41, this is fixed in teh library,
741 ### and there's no need for special casing any more.
742 ### DO NOT quote things for IPC::Run, it breaks stuff.
743 # $IPC::Cmd::USE_IPC_RUN
744 # ? ($to, $self->uri)
745 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
750 unless(run( command => $cmd,
751 buffer => \$captured,
754 ### wget creates the output document always, even if the fetch
755 ### fails.. so unlink it in that case
758 return $self->_error(loc( "Command failed: %1", $captured || '' ));
764 $METHOD_FAIL->{'lftp'} = 1;
771 ### /bin/ftp fetch ###
778 to => { required => 1, store => \$to }
780 check( $tmpl, \%hash ) or return;
782 ### see if we have a ftp binary ###
783 if( my $ftp = can_run('ftp') ) {
785 my $fh = FileHandle->new;
787 local $SIG{CHLD} = 'IGNORE';
789 unless ($fh->open("|$ftp -n")) {
790 return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
794 "lcd " . dirname($to),
795 "open " . $self->host,
796 "user anonymous $FROM_EMAIL",
800 "get " . $self->file . " " . $self->output_file,
804 foreach (@dialog) { $fh->print($_, "\n") }
805 $fh->close or return;
811 ### lynx is stupid - it decompresses any .gz file it finds to be text
812 ### use /bin/lynx to fetch files
819 to => { required => 1, store => \$to }
821 check( $tmpl, \%hash ) or return;
823 ### see if we have a lynx binary ###
824 if( my $lynx = can_run('lynx') ) {
826 unless( IPC::Cmd->can_capture_buffer ) {
827 $METHOD_FAIL->{'lynx'} = 1;
829 return $self->_error(loc(
830 "Can not capture buffers. Can not use '%1' to fetch files",
834 ### check if the HTTP resource exists ###
835 if ($self->uri =~ /^https?:\/\//i) {
840 "-auth=anonymous:$FROM_EMAIL",
843 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
845 push @$cmd, $self->uri;
849 unless(run( command => $cmd,
853 return $self->_error(loc("Command failed: %1", $head || ''));
856 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
857 return $self->_error(loc("Command failed: %1", $head || ''));
861 ### write to the output file ourselves, since lynx ass_u_mes to much
862 my $local = FileHandle->new(">$to")
863 or return $self->_error(loc(
864 "Could not open '%1' for writing: %2",$to,$!));
866 ### dump to stdout ###
870 "-auth=anonymous:$FROM_EMAIL",
873 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
875 ### DO NOT quote things for IPC::Run, it breaks stuff.
876 push @$cmd, $self->uri;
878 ### with IPC::Cmd > 0.41, this is fixed in teh library,
879 ### and there's no need for special casing any more.
880 ### DO NOT quote things for IPC::Run, it breaks stuff.
881 # $IPC::Cmd::USE_IPC_RUN
883 # : QUOTE. $self->uri .QUOTE;
888 unless(run( command => $cmd,
889 buffer => \$captured,
892 return $self->_error(loc("Command failed: %1", $captured || ''));
895 ### print to local file ###
896 ### XXX on a 404 with a special error page, $captured will actually
897 ### hold the contents of that page, and make it *appear* like the
898 ### request was a success, when really it wasn't :(
899 ### there doesn't seem to be an option for lynx to change the exit
900 ### code based on a 4XX status or so.
901 ### the closest we can come is using --error_file and parsing that,
902 ### which is very unreliable ;(
903 $local->print( $captured );
904 $local->close or return;
909 $METHOD_FAIL->{'lynx'} = 1;
914 ### use /bin/ncftp to fetch files
921 to => { required => 1, store => \$to }
923 check( $tmpl, \%hash ) or return;
925 ### we can only set passive mode in interactive sesssions, so bail out
926 ### if $FTP_PASSIVE is set
927 return if $FTP_PASSIVE;
929 ### see if we have a ncftp binary ###
930 if( my $ncftp = can_run('ncftp') ) {
934 '-V', # do not be verbose
935 '-p', $FROM_EMAIL, # email as password
936 $self->host, # hostname
937 dirname($to), # local dir for the file
938 # remote path to the file
939 ### DO NOT quote things for IPC::Run, it breaks stuff.
940 $IPC::Cmd::USE_IPC_RUN
941 ? File::Spec::Unix->catdir( $self->path, $self->file )
942 : QUOTE. File::Spec::Unix->catdir(
943 $self->path, $self->file ) .QUOTE
949 unless(run( command => $cmd,
950 buffer => \$captured,
953 return $self->_error(loc("Command failed: %1", $captured || ''));
959 $METHOD_FAIL->{'ncftp'} = 1;
964 ### use /bin/curl to fetch files
971 to => { required => 1, store => \$to }
973 check( $tmpl, \%hash ) or return;
975 if (my $curl = can_run('curl')) {
977 ### these long opts are self explanatory - I like that -jmb
978 my $cmd = [ $curl, '-q' ];
980 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
982 push(@$cmd, '--silent') unless $DEBUG;
984 ### curl does the right thing with passive, regardless ###
985 if ($self->scheme eq 'ftp') {
986 push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
989 ### curl doesn't follow 302 (temporarily moved) etc automatically
990 ### so we add --location to enable that.
991 push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
993 ### with IPC::Cmd > 0.41, this is fixed in teh library,
994 ### and there's no need for special casing any more.
995 ### DO NOT quote things for IPC::Run, it breaks stuff.
996 # $IPC::Cmd::USE_IPC_RUN
997 # ? ($to, $self->uri)
998 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1002 unless(run( command => $cmd,
1003 buffer => \$captured,
1007 return $self->_error(loc("Command failed: %1", $captured || ''));
1013 $METHOD_FAIL->{'curl'} = 1;
1019 ### use File::Copy for fetching file:// urls ###
1021 ### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
1022 ### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
1031 to => { required => 1, store => \$to }
1033 check( $tmpl, \%hash ) or return;
1037 ### prefix a / on unix systems with a file uri, since it would
1038 ### look somewhat like this:
1039 ### file:///home/kane/file
1040 ### wheras windows file uris for 'c:\some\dir\file' might look like:
1041 ### file:///C:/some/dir/file
1042 ### file:///C|/some/dir/file
1043 ### or for a network share '\\host\share\some\dir\file':
1044 ### file:////host/share/some/dir/file
1046 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1047 ### file://vms.host.edu/disk$user/my/notes/note12345.txt
1050 my $path = $self->path;
1051 my $vol = $self->vol;
1052 my $share = $self->share;
1055 if (!$share and $self->host) {
1056 return $self->_error(loc(
1057 "Currently %1 cannot handle hosts in %2 urls",
1058 'File::Fetch', 'file://'
1063 $path = File::Spec->catdir( split /\//, $path );
1064 $remote = File::Spec->catpath( $vol, $path, $self->file);
1067 ### win32 specific, and a share name, so we wont bother with File::Spec
1069 $remote = "\\\\".$self->host."\\$share\\$path";
1072 ### File::Spec on VMS can not currently handle UNIX syntax.
1073 my $file_class = ON_VMS
1074 ? 'File::Spec::Unix'
1077 $remote = $file_class->catfile( $path, $self->file );
1080 ### File::Copy is littered with 'die' statements :( ###
1081 my $rv = eval { File::Copy::copy( $remote, $to ) };
1083 ### something went wrong ###
1085 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1086 $remote, $to, $!, $@));
1092 ### use /usr/bin/rsync to fetch files
1099 to => { required => 1, store => \$to }
1101 check( $tmpl, \%hash ) or return;
1103 if (my $rsync = can_run('rsync')) {
1105 my $cmd = [ $rsync ];
1107 ### XXX: rsync has no I/O timeouts at all, by default
1108 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1110 push(@$cmd, '--quiet') unless $DEBUG;
1112 ### DO NOT quote things for IPC::Run, it breaks stuff.
1113 push @$cmd, $self->uri, $to;
1115 ### with IPC::Cmd > 0.41, this is fixed in teh library,
1116 ### and there's no need for special casing any more.
1117 ### DO NOT quote things for IPC::Run, it breaks stuff.
1118 # $IPC::Cmd::USE_IPC_RUN
1119 # ? ($to, $self->uri)
1120 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1123 unless(run( command => $cmd,
1124 buffer => \$captured,
1128 return $self->_error(loc("Command %1 failed: %2",
1129 "@$cmd" || '', $captured || ''));
1135 $METHOD_FAIL->{'rsync'} = 1;
1140 #################################
1144 #################################
1148 =head2 $ff->error([BOOL])
1150 Returns the last encountered error as string.
1151 Pass it a true value to get the C<Carp::longmess()> output instead.
1155 ### error handling the way Archive::Extract does it
1160 $self->_error_msg( $error );
1161 $self->_error_msg_long( Carp::longmess($error) );
1164 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1172 return shift() ? $self->_error_msg_long : $self->_error_msg;
1182 File::Fetch is able to fetch a variety of uris, by using several
1183 external programs and modules.
1185 Below is a mapping of what utilities will be used in what order
1186 for what schemes, if available:
1188 file => LWP, lftp, file
1189 http => LWP, wget, curl, lftp, lynx
1190 ftp => LWP, Net::FTP, wget, curl, lftp, ncftp, ftp
1193 If you'd like to disable the use of one or more of these utilities
1194 and/or modules, see the C<$BLACKLIST> variable further down.
1196 If a utility or module isn't available, it will be marked in a cache
1197 (see the C<$METHOD_FAIL> variable further down), so it will not be
1198 tried again. The C<fetch> method will only fail when all options are
1199 exhausted, and it was not able to retrieve the file.
1201 A special note about fetching files from an ftp uri:
1203 By default, all ftp connections are done in passive mode. To change
1204 that, see the C<$FTP_PASSIVE> variable further down.
1206 Furthermore, ftp uris only support anonymous connections, so no
1207 named user/password pair can be passed along.
1209 C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1212 =head1 GLOBAL VARIABLES
1214 The behaviour of File::Fetch can be altered by changing the following
1217 =head2 $File::Fetch::FROM_EMAIL
1219 This is the email address that will be sent as your anonymous ftp
1222 Default is C<File-Fetch@example.com>.
1224 =head2 $File::Fetch::USER_AGENT
1226 This is the useragent as C<LWP> will report it.
1228 Default is C<File::Fetch/$VERSION>.
1230 =head2 $File::Fetch::FTP_PASSIVE
1232 This variable controls whether the environment variable C<FTP_PASSIVE>
1233 and any passive switches to commandline tools will be set to true.
1237 Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1238 files, since passive mode can only be set interactively for this binary
1240 =head2 $File::Fetch::TIMEOUT
1242 When set, controls the network timeout (counted in seconds).
1246 =head2 $File::Fetch::WARN
1248 This variable controls whether errors encountered internally by
1249 C<File::Fetch> should be C<carp>'d or not.
1251 Set to false to silence warnings. Inspect the output of the C<error()>
1252 method manually to see what went wrong.
1254 Defaults to C<true>.
1256 =head2 $File::Fetch::DEBUG
1258 This enables debugging output when calling commandline utilities to
1260 This also enables C<Carp::longmess> errors, instead of the regular
1263 Good for tracking down why things don't work with your particular
1268 =head2 $File::Fetch::BLACKLIST
1270 This is an array ref holding blacklisted modules/utilities for fetching
1273 To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1274 set $File::Fetch::BLACKLIST to:
1276 $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1278 The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1280 See the note on C<MAPPING> below.
1282 =head2 $File::Fetch::METHOD_FAIL
1284 This is a hashref registering what modules/utilities were known to fail
1285 for fetching files (mostly because they weren't installed).
1287 You can reset this cache by assigning an empty hashref to it, or
1288 individually remove keys.
1290 See the note on C<MAPPING> below.
1295 Here's a quick mapping for the utilities/modules, and their names for
1296 the $BLACKLIST, $METHOD_FAIL and other internal functions.
1308 =head1 FREQUENTLY ASKED QUESTIONS
1310 =head2 So how do I use a proxy with File::Fetch?
1312 C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1313 You will need to set your environment variables accordingly. For
1314 example, to use an ftp proxy:
1316 $ENV{ftp_proxy} = 'foo.com';
1318 Refer to the LWP::UserAgent manpage for more details.
1320 =head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1322 C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1323 which we in turn capture. If that content is a 'custom' error file
1324 (like, say, a C<404 handler>), you will get that contents instead.
1326 Sadly, C<lynx> doesn't support any options to return a different exit
1327 code on non-C<200 OK> status, giving us no way to tell the difference
1328 between a 'successfull' fetch and a custom error page.
1330 Therefor, we recommend to only use C<lynx> as a last resort. This is
1331 why it is at the back of our list of methods to try as well.
1333 =head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1335 C<File::Fetch> is relatively smart about things. When trying to write
1336 a file to disk, it removes the C<query parameters> (see the
1337 C<output_file> method for details) from the file name before creating
1338 it. In most cases this suffices.
1340 If you have any other characters you need to escape, please install
1341 the C<URI::Escape> module from CPAN, and pre-encode your URI before
1342 passing it to C<File::Fetch>. You can read about the details of URIs
1343 and URI encoding here:
1345 http://www.faqs.org/rfcs/rfc2396.html
1351 =item Implement $PREFER_BIN
1353 To indicate to rather use commandline tools than modules
1359 Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1363 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1367 This library is free software; you may redistribute and/or modify it
1368 under the same terms as Perl itself.
1374 # c-indentation-style: bsd
1376 # indent-tabs-mode: nil
1378 # vim: expandtab shiftwidth=4: