Upgrade to File::Fetch 0.13_02
[p5sagit/p5-mst-13.2.git] / lib / File / Fetch.pm
CommitLineData
79fd8837 1package File::Fetch;
2
3use strict;
4use FileHandle;
5use File::Copy;
6use File::Spec;
7use File::Spec::Unix;
79fd8837 8use File::Basename qw[dirname];
9
10use Cwd qw[cwd];
11use Carp qw[carp];
12use IPC::Cmd qw[can_run run];
13use File::Path qw[mkpath];
14use Params::Check qw[check];
15use Module::Load::Conditional qw[can_load];
16use Locale::Maketext::Simple Style => 'gettext';
17
18use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
19 $BLACKLIST $METHOD_FAIL $VERSION $METHODS
20 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
21 ];
22
d4b3706f 23use constant QUOTE => do { $^O eq 'MSWin32' ? q["] : q['] };
24
25
9e5ea595 26$VERSION = '0.13_02';
79fd8837 27$PREFER_BIN = 0; # XXX TODO implement
28$FROM_EMAIL = 'File-Fetch@example.com';
29$USER_AGENT = 'File::Fetch/$VERSION';
30$BLACKLIST = [qw|ftp|];
31$METHOD_FAIL = { };
32$FTP_PASSIVE = 1;
33$TIMEOUT = 0;
34$DEBUG = 0;
35$WARN = 1;
36
37### methods available to fetch the file depending on the scheme
38$METHODS = {
39 http => [ qw|lwp wget curl lynx| ],
40 ftp => [ qw|lwp netftp wget curl ncftp ftp| ],
41 file => [ qw|lwp file| ],
42 rsync => [ qw|rsync| ]
43};
44
45### silly warnings ###
46local $Params::Check::VERBOSE = 1;
47local $Params::Check::VERBOSE = 1;
48local $Module::Load::Conditional::VERBOSE = 0;
49local $Module::Load::Conditional::VERBOSE = 0;
50
51### see what OS we are on, important for file:// uris ###
9e5ea595 52use constant ON_WIN => ($^O eq 'MSWin32');
53use constant ON_VMS => ($^O eq 'VMS');
54use constant ON_UNIX => (!ON_WIN and !ON_VMS);
79fd8837 55
56=pod
57
58=head1 NAME
59
60File::Fetch - A generic file fetching mechanism
61
62=head1 SYNOPSIS
63
64 use File::Fetch;
65
66 ### build a File::Fetch object ###
67 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
68
69 ### fetch the uri to cwd() ###
70 my $where = $ff->fetch() or die $ff->error;
71
72 ### fetch the uri to /tmp ###
73 my $where = $ff->fetch( to => '/tmp' );
74
75 ### parsed bits from the uri ###
76 $ff->uri;
77 $ff->scheme;
78 $ff->host;
79 $ff->path;
80 $ff->file;
81
82=head1 DESCRIPTION
83
84File::Fetch is a generic file fetching mechanism.
85
86It allows you to fetch any file pointed to by a C<ftp>, C<http>,
87C<file>, or C<rsync> uri by a number of different means.
88
89See the C<HOW IT WORKS> section further down for details.
90
d4b3706f 91=head1 ACCESSORS
92
93A C<File::Fetch> object has the following accessors
94
95=over 4
96
97=item $ff->uri
98
99The uri you passed to the constructor
100
101=item $ff->scheme
102
103The scheme from the uri (like 'file', 'http', etc)
104
105=item $ff->host
106
107The hostname in the uri, will be empty for a 'file' scheme.
108
109=item $ff->path
110
111The path from the uri, will be at least a single '/'.
112
113=item $ff->file
114
115The name of the remote file. For the local file name, the
116result of $ff->output_file will be used.
117
118=cut
119
120
121##########################
122### Object & Accessors ###
123##########################
124
125{
126 ### template for new() and autogenerated accessors ###
127 my $Tmpl = {
128 scheme => { default => 'http' },
129 host => { default => 'localhost' },
130 path => { default => '/' },
131 file => { required => 1 },
132 uri => { required => 1 },
9e5ea595 133 vol => { }, # windows and vms for file:// uris
134 share => { }, # windows for file:// uris
d4b3706f 135 _error_msg => { no_override => 1 },
136 _error_msg_long => { no_override => 1 },
137 };
138
139 for my $method ( keys %$Tmpl ) {
140 no strict 'refs';
141 *$method = sub {
142 my $self = shift;
143 $self->{$method} = $_[0] if @_;
144 return $self->{$method};
145 }
146 }
147
148 sub _create {
149 my $class = shift;
150 my %hash = @_;
151
152 my $args = check( $Tmpl, \%hash ) or return;
153
154 bless $args, $class;
155
156 if( lc($args->scheme) ne 'file' and not $args->host ) {
157 return File::Fetch->_error(loc(
158 "Hostname required when fetching from '%1'",$args->scheme));
159 }
160
161 for (qw[path file]) {
9e5ea595 162 unless( $args->$_() ) { # 5.5.x needs the ()
d4b3706f 163 return File::Fetch->_error(loc("No '%1' specified",$_));
164 }
165 }
166
167 return $args;
168 }
169}
170
171=item $ff->output_file
172
173The name of the output file. This is the same as $ff->file,
174but any query parameters are stripped off. For example:
175
176 http://example.com/index.html?x=y
177
178would make the output file be C<index.html> rather than
179C<index.html?x=y>.
180
181=back
182
183=cut
184
185sub output_file {
186 my $self = shift;
187 my $file = $self->file;
188
189 $file =~ s/\?.*$//g;
190
191 return $file;
192}
193
194### XXX do this or just point to URI::Escape?
195# =head2 $esc_uri = $ff->escaped_uri
196#
197# =cut
198#
199# ### most of this is stolen straight from URI::escape
200# { ### Build a char->hex map
201# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
202#
203# sub escaped_uri {
204# my $self = shift;
205# my $uri = $self->uri;
206#
207# ### Default unsafe characters. RFC 2732 ^(uric - reserved)
208# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
209# $escapes{$1} || $self->_fail_hi($1)/ge;
210#
211# return $uri;
212# }
213#
214# sub _fail_hi {
215# my $self = shift;
216# my $char = shift;
217#
218# $self->_error(loc(
219# "Can't escape '%1', try using the '%2' module instead",
220# sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
221# ));
222# }
223#
224# sub output_file {
225#
226# }
227#
228#
229# }
230
79fd8837 231=head1 METHODS
232
233=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
234
235Parses the uri and creates a corresponding File::Fetch::Item object,
236that is ready to be C<fetch>ed and returns it.
237
238Returns false on failure.
239
240=cut
241
242sub new {
243 my $class = shift;
244 my %hash = @_;
245
246 my ($uri);
247 my $tmpl = {
248 uri => { required => 1, store => \$uri },
249 };
250
251 check( $tmpl, \%hash ) or return;
252
253 ### parse the uri to usable parts ###
254 my $href = __PACKAGE__->_parse_uri( $uri ) or return;
255
256 ### make it into a FFI object ###
d4b3706f 257 my $ff = File::Fetch->_create( %$href ) or return;
79fd8837 258
259
260 ### return the object ###
d4b3706f 261 return $ff;
79fd8837 262}
263
264### parses an uri to a hash structure:
265###
266### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
267###
268### becomes:
269###
270### $href = {
271### scheme => 'ftp',
272### host => 'ftp.cpan.org',
273### path => '/pub/mirror',
274### file => 'index.html'
275### };
276###
9e5ea595 277### In the case of file:// urls there maybe be additional fields
278###
279### For windows file shares there may be a 'share' key specified
280###
281### 'share' => 'sharename'
282###
283### For systems with volume specifications such as VMS and Win32 there may be
284### a volume specifier provided in the 'vol' field.
285###
286### 'vol' => 'volumename'
287###
288
79fd8837 289sub _parse_uri {
290 my $self = shift;
291 my $uri = shift or return;
292
293 my $href = { uri => $uri };
294
295 ### find the scheme ###
296 $uri =~ s|^(\w+)://||;
297 $href->{scheme} = $1;
298
9e5ea595 299 ### See rfc 1738 section 3.10
300 ### http://www.faqs.org/rfcs/rfc1738.html
301 ### And wikipedia for more on windows file:// urls
302 ### http://en.wikipedia.org/wiki/File://
79fd8837 303 if( $href->{scheme} eq 'file' ) {
9e5ea595 304
305 my @parts = split '/',$uri;
306
307 ### file://hostname/...
308 ### file://hostname/...
309 $href->{host} = $parts[0] || '';
310
311 ### index in @parts where the path components begin;
312 my $index = 1;
313
314 ### file:///D|/blah.txt
315 ### file:///D:/blah.txt
316 ### file://hostname/D|/blah.txt
317 ### file://hostname/D:/blah.txt
318 if ($parts[1] =~ s/\A([A-Z])\|\z/$1:/i || # s/D|/D:/
319 $parts[1] =~ m/\A[A-Z]:\z/i # m/D:/
320 ) {
321 $href->{vol} = $parts[1];
322 $index = 2; # index after the volume
323
324 ### file:////hostname/sharename/blah.txt
325 } elsif ( not length $parts[0] and not length $parts[1] ) {
326 $href->{host} = $parts[2] || ''; # avoid warnings
327 $href->{share} = $parts[3] || ''; # avoid warnings
328
329 $index = 4 # index after the share
330 }
331
332 ### rebuild the path from the leftover paths;
333 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
79fd8837 334
335 } else {
9e5ea595 336 ### using anything but qw() in hash slices may produce warnings
337 ### in older perls :-(
338 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
79fd8837 339 }
340
341 ### split the path into file + dir ###
342 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
343 $href->{path} = $parts[1];
344 $href->{file} = $parts[2];
345 }
346
347
348 return $href;
349}
350
351=head2 $ff->fetch( [to => /my/output/dir/] )
352
353Fetches the file you requested. By default it writes to C<cwd()>,
354but you can override that by specifying the C<to> argument.
355
356Returns the full path to the downloaded file on success, and false
357on failure.
358
359=cut
360
361sub fetch {
362 my $self = shift or return;
363 my %hash = @_;
364
365 my $to;
366 my $tmpl = {
367 to => { default => cwd(), store => \$to },
368 };
369
370 check( $tmpl, \%hash ) or return;
371
372 ### create the path if it doesn't exist yet ###
373 unless( -d $to ) {
374 eval { mkpath( $to ) };
375
376 return $self->_error(loc("Could not create path '%1'",$to)) if $@;
377 }
378
379 ### set passive ftp if required ###
380 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
381
382 ###
a0ad4830 383 my $out_to = File::Spec->catfile( $to, $self->output_file );
79fd8837 384 for my $method ( @{ $METHODS->{$self->scheme} } ) {
385 my $sub = '_'.$method.'_fetch';
386
387 unless( __PACKAGE__->can($sub) ) {
388 $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
389 $method));
390 next;
391 }
392
393 ### method is blacklisted ###
394 next if grep { lc $_ eq $method } @$BLACKLIST;
395
396 ### method is known to fail ###
397 next if $METHOD_FAIL->{$method};
398
d4b3706f 399 ### there's serious issues with IPC::Run and quoting of command
400 ### line arguments. using quotes in the wrong place breaks things,
401 ### and in the case of say,
402 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
403 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
404 ### it doesn't matter how you quote, it always fails.
405 local $IPC::Cmd::USE_IPC_RUN = 0;
406
407 if( my $file = $self->$sub(
a0ad4830 408 to => $out_to
d4b3706f 409 )){
79fd8837 410
411 unless( -e $file && -s _ ) {
412 $self->_error(loc("'%1' said it fetched '%2', ".
413 "but it was not created",$method,$file));
414
415 ### mark the failure ###
416 $METHOD_FAIL->{$method} = 1;
417
418 next;
419
420 } else {
421
422 my $abs = File::Spec->rel2abs( $file );
423 return $abs;
424 }
425 }
426 }
427
428
429 ### if we got here, we looped over all methods, but we weren't able
430 ### to fetch it.
431 return;
432}
433
79fd8837 434########################
435### _*_fetch methods ###
436########################
437
438### LWP fetching ###
439sub _lwp_fetch {
440 my $self = shift;
441 my %hash = @_;
442
443 my ($to);
444 my $tmpl = {
445 to => { required => 1, store => \$to }
446 };
447 check( $tmpl, \%hash ) or return;
448
449 ### modules required to download with lwp ###
450 my $use_list = {
451 LWP => '0.0',
452 'LWP::UserAgent' => '0.0',
453 'HTTP::Request' => '0.0',
454 'HTTP::Status' => '0.0',
455 URI => '0.0',
456
457 };
458
459 if( can_load(modules => $use_list) ) {
460
461 ### setup the uri object
462 my $uri = URI->new( File::Spec::Unix->catfile(
463 $self->path, $self->file
464 ) );
465
466 ### special rules apply for file:// uris ###
467 $uri->scheme( $self->scheme );
468 $uri->host( $self->scheme eq 'file' ? '' : $self->host );
469 $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
470
471 ### set up the useragent object
472 my $ua = LWP::UserAgent->new();
473 $ua->timeout( $TIMEOUT ) if $TIMEOUT;
474 $ua->agent( $USER_AGENT );
475 $ua->from( $FROM_EMAIL );
476 $ua->env_proxy;
477
478 my $res = $ua->mirror($uri, $to) or return;
479
480 ### uptodate or fetched ok ###
481 if ( $res->code == 304 or $res->code == 200 ) {
482 return $to;
483
484 } else {
485 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
486 $res->code, HTTP::Status::status_message($res->code),
487 $res->status_line));
488 }
489
490 } else {
491 $METHOD_FAIL->{'lwp'} = 1;
492 return;
493 }
494}
495
496### Net::FTP fetching
497sub _netftp_fetch {
498 my $self = shift;
499 my %hash = @_;
500
501 my ($to);
502 my $tmpl = {
503 to => { required => 1, store => \$to }
504 };
505 check( $tmpl, \%hash ) or return;
506
507 ### required modules ###
508 my $use_list = { 'Net::FTP' => 0 };
509
510 if( can_load( modules => $use_list ) ) {
511
512 ### make connection ###
513 my $ftp;
514 my @options = ($self->host);
515 push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
516 unless( $ftp = Net::FTP->new( @options ) ) {
517 return $self->_error(loc("Ftp creation failed: %1",$@));
518 }
519
520 ### login ###
521 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
522 return $self->_error(loc("Could not login to '%1'",$self->host));
523 }
524
525 ### set binary mode, just in case ###
526 $ftp->binary;
527
528 ### create the remote path
529 ### remember remote paths are unix paths! [#11483]
530 my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
531
532 ### fetch the file ###
533 my $target;
534 unless( $target = $ftp->get( $remote, $to ) ) {
535 return $self->_error(loc("Could not fetch '%1' from '%2'",
536 $remote, $self->host));
537 }
538
539 ### log out ###
540 $ftp->quit;
541
542 return $target;
543
544 } else {
545 $METHOD_FAIL->{'netftp'} = 1;
546 return;
547 }
548}
549
550### /bin/wget fetch ###
551sub _wget_fetch {
552 my $self = shift;
553 my %hash = @_;
554
555 my ($to);
556 my $tmpl = {
557 to => { required => 1, store => \$to }
558 };
559 check( $tmpl, \%hash ) or return;
560
561 ### see if we have a wget binary ###
562 if( my $wget = can_run('wget') ) {
563
564 ### no verboseness, thanks ###
565 my $cmd = [ $wget, '--quiet' ];
566
567 ### if a timeout is set, add it ###
568 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
569
570 ### run passive if specified ###
571 push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
572
573 ### set the output document, add the uri ###
d4b3706f 574 push @$cmd, '--output-document',
575 ### DO NOT quote things for IPC::Run, it breaks stuff.
576 $IPC::Cmd::USE_IPC_RUN
577 ? ($to, $self->uri)
578 : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837 579
580 ### shell out ###
581 my $captured;
d4b3706f 582 unless(run( command => $cmd,
583 buffer => \$captured,
584 verbose => $DEBUG
585 )) {
79fd8837 586 ### wget creates the output document always, even if the fetch
587 ### fails.. so unlink it in that case
588 1 while unlink $to;
589
590 return $self->_error(loc( "Command failed: %1", $captured || '' ));
591 }
592
593 return $to;
594
595 } else {
596 $METHOD_FAIL->{'wget'} = 1;
597 return;
598 }
599}
600
601
602### /bin/ftp fetch ###
603sub _ftp_fetch {
604 my $self = shift;
605 my %hash = @_;
606
607 my ($to);
608 my $tmpl = {
609 to => { required => 1, store => \$to }
610 };
611 check( $tmpl, \%hash ) or return;
612
d4b3706f 613 ### see if we have a ftp binary ###
79fd8837 614 if( my $ftp = can_run('ftp') ) {
615
616 my $fh = FileHandle->new;
617
618 local $SIG{CHLD} = 'IGNORE';
619
620 unless ($fh->open("|$ftp -n")) {
621 return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
622 }
623
624 my @dialog = (
625 "lcd " . dirname($to),
626 "open " . $self->host,
627 "user anonymous $FROM_EMAIL",
628 "cd /",
629 "cd " . $self->path,
630 "binary",
d4b3706f 631 "get " . $self->file . " " . $self->output_file,
79fd8837 632 "quit",
633 );
634
635 foreach (@dialog) { $fh->print($_, "\n") }
636 $fh->close or return;
637
638 return $to;
639 }
640}
641
642### lynx is stupid - it decompresses any .gz file it finds to be text
643### use /bin/lynx to fetch files
644sub _lynx_fetch {
645 my $self = shift;
646 my %hash = @_;
647
648 my ($to);
649 my $tmpl = {
650 to => { required => 1, store => \$to }
651 };
652 check( $tmpl, \%hash ) or return;
653
d4b3706f 654 ### see if we have a lynx binary ###
79fd8837 655 if( my $lynx = can_run('lynx') ) {
656
d4b3706f 657 unless( IPC::Cmd->can_capture_buffer ) {
658 $METHOD_FAIL->{'lynx'} = 1;
659
660 return $self->_error(loc(
661 "Can not capture buffers. Can not use '%1' to fetch files",
662 'lynx' ));
663 }
79fd8837 664
665 ### write to the output file ourselves, since lynx ass_u_mes to much
666 my $local = FileHandle->new(">$to")
667 or return $self->_error(loc(
668 "Could not open '%1' for writing: %2",$to,$!));
669
670 ### dump to stdout ###
671 my $cmd = [
672 $lynx,
673 '-source',
674 "-auth=anonymous:$FROM_EMAIL",
675 ];
676
677 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
678
d4b3706f 679 ### DO NOT quote things for IPC::Run, it breaks stuff.
680 push @$cmd, $IPC::Cmd::USE_IPC_RUN
681 ? $self->uri
682 : QUOTE. $self->uri .QUOTE;
683
79fd8837 684
685 ### shell out ###
686 my $captured;
687 unless(run( command => $cmd,
688 buffer => \$captured,
689 verbose => $DEBUG )
690 ) {
691 return $self->_error(loc("Command failed: %1", $captured || ''));
692 }
693
694 ### print to local file ###
695 ### XXX on a 404 with a special error page, $captured will actually
696 ### hold the contents of that page, and make it *appear* like the
697 ### request was a success, when really it wasn't :(
698 ### there doesn't seem to be an option for lynx to change the exit
699 ### code based on a 4XX status or so.
700 ### the closest we can come is using --error_file and parsing that,
701 ### which is very unreliable ;(
702 $local->print( $captured );
703 $local->close or return;
704
705 return $to;
706
707 } else {
708 $METHOD_FAIL->{'lynx'} = 1;
709 return;
710 }
711}
712
713### use /bin/ncftp to fetch files
714sub _ncftp_fetch {
715 my $self = shift;
716 my %hash = @_;
717
718 my ($to);
719 my $tmpl = {
720 to => { required => 1, store => \$to }
721 };
722 check( $tmpl, \%hash ) or return;
723
724 ### we can only set passive mode in interactive sesssions, so bail out
725 ### if $FTP_PASSIVE is set
726 return if $FTP_PASSIVE;
727
d4b3706f 728 ### see if we have a ncftp binary ###
79fd8837 729 if( my $ncftp = can_run('ncftp') ) {
730
731 my $cmd = [
732 $ncftp,
733 '-V', # do not be verbose
734 '-p', $FROM_EMAIL, # email as password
735 $self->host, # hostname
736 dirname($to), # local dir for the file
737 # remote path to the file
d4b3706f 738 ### DO NOT quote things for IPC::Run, it breaks stuff.
739 $IPC::Cmd::USE_IPC_RUN
740 ? File::Spec::Unix->catdir( $self->path, $self->file )
741 : QUOTE. File::Spec::Unix->catdir(
742 $self->path, $self->file ) .QUOTE
743
79fd8837 744 ];
745
746 ### shell out ###
747 my $captured;
748 unless(run( command => $cmd,
749 buffer => \$captured,
750 verbose => $DEBUG )
751 ) {
752 return $self->_error(loc("Command failed: %1", $captured || ''));
753 }
754
755 return $to;
756
757 } else {
758 $METHOD_FAIL->{'ncftp'} = 1;
759 return;
760 }
761}
762
763### use /bin/curl to fetch files
764sub _curl_fetch {
765 my $self = shift;
766 my %hash = @_;
767
768 my ($to);
769 my $tmpl = {
770 to => { required => 1, store => \$to }
771 };
772 check( $tmpl, \%hash ) or return;
773
774 if (my $curl = can_run('curl')) {
775
776 ### these long opts are self explanatory - I like that -jmb
777 my $cmd = [ $curl ];
778
779 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
780
781 push(@$cmd, '--silent') unless $DEBUG;
782
783 ### curl does the right thing with passive, regardless ###
784 if ($self->scheme eq 'ftp') {
785 push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
786 }
787
788 ### curl doesn't follow 302 (temporarily moved) etc automatically
789 ### so we add --location to enable that.
d4b3706f 790 push @$cmd, '--fail', '--location', '--output',
791 ### DO NOT quote things for IPC::Run, it breaks stuff.
792 $IPC::Cmd::USE_IPC_RUN
793 ? ($to, $self->uri)
794 : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837 795
796 my $captured;
797 unless(run( command => $cmd,
798 buffer => \$captured,
799 verbose => $DEBUG )
800 ) {
801
802 return $self->_error(loc("Command failed: %1", $captured || ''));
803 }
804
805 return $to;
806
807 } else {
808 $METHOD_FAIL->{'curl'} = 1;
809 return;
810 }
811}
812
813
814### use File::Copy for fetching file:// urls ###
815### XXX file:// uri to local path conversion is just too weird...
816### depend on LWP to do it for us
9e5ea595 817###
818### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
819### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
820
79fd8837 821sub _file_fetch {
822 my $self = shift;
823 my %hash = @_;
824
825 my ($to);
826 my $tmpl = {
827 to => { required => 1, store => \$to }
828 };
829 check( $tmpl, \%hash ) or return;
830
9e5ea595 831
832
79fd8837 833 ### prefix a / on unix systems with a file uri, since it would
834 ### look somewhat like this:
9e5ea595 835 ### file:///home/kane/file
836 ### wheras windows file uris for 'c:\some\dir\file' might look like:
837 ### file:///C:/some/dir/file
838 ### file:///C|/some/dir/file
839 ### or for a network share '\\host\share\some\dir\file':
840 ### file:////host/share/some/dir/file
841 ###
842 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
843 ### file://vms.host.edu/disk$user/my/notes/note12345.txt
844 ###
845
846 my $path = $self->path;
847 my $vol = $self->vol;
848 my $share = $self->share;
849
850 my $remote;
851 if (!$share and $self->host) {
852 return $self->_error(loc(
853 "Currently %1 cannot handle hosts in %2 urls",
854 'File::Fetch', 'file://'
855 ));
856 }
857
858 if( $vol ) {
859 $path = File::Spec->catdir( split /\//, $path );
860 $remote = File::Spec->catpath( $vol, $path, $self->file);
79fd8837 861
9e5ea595 862 } elsif( $share ) {
863 ### win32 specific, and a share name, so we wont bother with File::Spec
864 $path =~ s|/+|\\|g;
865 $remote = "\\\\".$self->host."\\$share\\$path";
866
867 } else {
868 $remote = File::Spec->catfile( $path, $self->file );
869 }
79fd8837 870
871 ### File::Copy is littered with 'die' statements :( ###
872 my $rv = eval { File::Copy::copy( $remote, $to ) };
873
874 ### something went wrong ###
875 if( !$rv or $@ ) {
876 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
877 $remote, $to, $!, $@));
878 }
879
880 return $to;
881}
882
883### use /usr/bin/rsync to fetch files
884sub _rsync_fetch {
885 my $self = shift;
886 my %hash = @_;
887
888 my ($to);
889 my $tmpl = {
890 to => { required => 1, store => \$to }
891 };
892 check( $tmpl, \%hash ) or return;
893
894 if (my $rsync = can_run('rsync')) {
895
896 my $cmd = [ $rsync ];
897
898 ### XXX: rsync has no I/O timeouts at all, by default
899 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
900
901 push(@$cmd, '--quiet') unless $DEBUG;
902
d4b3706f 903 ### DO NOT quote things for IPC::Run, it breaks stuff.
904 push @$cmd, $IPC::Cmd::USE_IPC_RUN
905 ? ($self->uri, $to)
906 : (QUOTE. $self->uri .QUOTE, QUOTE. $to .QUOTE);
79fd8837 907
908 my $captured;
909 unless(run( command => $cmd,
910 buffer => \$captured,
911 verbose => $DEBUG )
912 ) {
913
914 return $self->_error(loc("Command failed: %1", $captured || ''));
915 }
916
917 return $to;
918
919 } else {
920 $METHOD_FAIL->{'rsync'} = 1;
921 return;
922 }
923}
924
925#################################
926#
927# Error code
928#
929#################################
930
931=pod
932
933=head2 $ff->error([BOOL])
934
935Returns the last encountered error as string.
936Pass it a true value to get the C<Carp::longmess()> output instead.
937
938=cut
939
d4b3706f 940### error handling the way Archive::Extract does it
941sub _error {
942 my $self = shift;
943 my $error = shift;
944
945 $self->_error_msg( $error );
946 $self->_error_msg_long( Carp::longmess($error) );
947
948 if( $WARN ) {
949 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
79fd8837 950 }
951
d4b3706f 952 return;
79fd8837 953}
954
d4b3706f 955sub error {
956 my $self = shift;
957 return shift() ? $self->_error_msg_long : $self->_error_msg;
958}
79fd8837 959
960
9611;
962
963=pod
964
965=head1 HOW IT WORKS
966
967File::Fetch is able to fetch a variety of uris, by using several
968external programs and modules.
969
970Below is a mapping of what utilities will be used in what order
971for what schemes, if available:
972
973 file => LWP, file
974 http => LWP, wget, curl, lynx
975 ftp => LWP, Net::FTP, wget, curl, ncftp, ftp
976 rsync => rsync
977
978If you'd like to disable the use of one or more of these utilities
979and/or modules, see the C<$BLACKLIST> variable further down.
980
981If a utility or module isn't available, it will be marked in a cache
982(see the C<$METHOD_FAIL> variable further down), so it will not be
983tried again. The C<fetch> method will only fail when all options are
984exhausted, and it was not able to retrieve the file.
985
986A special note about fetching files from an ftp uri:
987
988By default, all ftp connections are done in passive mode. To change
989that, see the C<$FTP_PASSIVE> variable further down.
990
991Furthermore, ftp uris only support anonymous connections, so no
992named user/password pair can be passed along.
993
994C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
995further down.
996
997=head1 GLOBAL VARIABLES
998
999The behaviour of File::Fetch can be altered by changing the following
1000global variables:
1001
1002=head2 $File::Fetch::FROM_EMAIL
1003
1004This is the email address that will be sent as your anonymous ftp
1005password.
1006
1007Default is C<File-Fetch@example.com>.
1008
1009=head2 $File::Fetch::USER_AGENT
1010
1011This is the useragent as C<LWP> will report it.
1012
1013Default is C<File::Fetch/$VERSION>.
1014
1015=head2 $File::Fetch::FTP_PASSIVE
1016
1017This variable controls whether the environment variable C<FTP_PASSIVE>
1018and any passive switches to commandline tools will be set to true.
1019
1020Default value is 1.
1021
1022Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1023files, since passive mode can only be set interactively for this binary
1024
1025=head2 $File::Fetch::TIMEOUT
1026
1027When set, controls the network timeout (counted in seconds).
1028
1029Default value is 0.
1030
1031=head2 $File::Fetch::WARN
1032
1033This variable controls whether errors encountered internally by
1034C<File::Fetch> should be C<carp>'d or not.
1035
1036Set to false to silence warnings. Inspect the output of the C<error()>
1037method manually to see what went wrong.
1038
1039Defaults to C<true>.
1040
1041=head2 $File::Fetch::DEBUG
1042
1043This enables debugging output when calling commandline utilities to
1044fetch files.
1045This also enables C<Carp::longmess> errors, instead of the regular
1046C<carp> errors.
1047
1048Good for tracking down why things don't work with your particular
1049setup.
1050
1051Default is 0.
1052
1053=head2 $File::Fetch::BLACKLIST
1054
1055This is an array ref holding blacklisted modules/utilities for fetching
1056files with.
1057
1058To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1059set $File::Fetch::BLACKLIST to:
1060
1061 $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1062
1063The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1064
1065See the note on C<MAPPING> below.
1066
1067=head2 $File::Fetch::METHOD_FAIL
1068
1069This is a hashref registering what modules/utilities were known to fail
1070for fetching files (mostly because they weren't installed).
1071
1072You can reset this cache by assigning an empty hashref to it, or
1073individually remove keys.
1074
1075See the note on C<MAPPING> below.
1076
1077=head1 MAPPING
1078
1079
1080Here's a quick mapping for the utilities/modules, and their names for
1081the $BLACKLIST, $METHOD_FAIL and other internal functions.
1082
1083 LWP => lwp
1084 Net::FTP => netftp
1085 wget => wget
1086 lynx => lynx
1087 ncftp => ncftp
1088 ftp => ftp
1089 curl => curl
1090 rsync => rsync
1091
1092=head1 FREQUENTLY ASKED QUESTIONS
1093
1094=head2 So how do I use a proxy with File::Fetch?
1095
1096C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1097You will need to set your environment variables accordingly. For
1098example, to use an ftp proxy:
1099
1100 $ENV{ftp_proxy} = 'foo.com';
1101
1102Refer to the LWP::UserAgent manpage for more details.
1103
1104=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1105
1106C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1107which we in turn capture. If that content is a 'custom' error file
1108(like, say, a C<404 handler>), you will get that contents instead.
1109
1110Sadly, C<lynx> doesn't support any options to return a different exit
1111code on non-C<200 OK> status, giving us no way to tell the difference
1112between a 'successfull' fetch and a custom error page.
1113
1114Therefor, we recommend to only use C<lynx> as a last resort. This is
1115why it is at the back of our list of methods to try as well.
1116
d4b3706f 1117=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1118
1119C<File::Fetch> is relatively smart about things. When trying to write
1120a file to disk, it removes the C<query parameters> (see the
1121C<output_file> method for details) from the file name before creating
1122it. In most cases this suffices.
1123
1124If you have any other characters you need to escape, please install
1125the C<URI::Escape> module from CPAN, and pre-encode your URI before
1126passing it to C<File::Fetch>. You can read about the details of URIs
1127and URI encoding here:
1128
1129 http://www.faqs.org/rfcs/rfc2396.html
1130
79fd8837 1131=head1 TODO
1132
1133=over 4
1134
1135=item Implement $PREFER_BIN
1136
1137To indicate to rather use commandline tools than modules
1138
a0ad4830 1139=back
1140
1141=head1 BUG REPORTS
1142
1143Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1144
1145=head1 AUTHOR
79fd8837 1146
d4b3706f 1147This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
79fd8837 1148
1149=head1 COPYRIGHT
1150
a0ad4830 1151This library is free software; you may redistribute and/or modify it
1152under the same terms as Perl itself.
79fd8837 1153
79fd8837 1154
1155=cut
1156
1157# Local variables:
1158# c-indentation-style: bsd
1159# c-basic-offset: 4
1160# indent-tabs-mode: nil
1161# End:
1162# vim: expandtab shiftwidth=4:
1163
1164
1165
1166