Following 32238, change "interpreter_size" to "interp_size" in the new
[p5sagit/p5-mst-13.2.git] / lib / File / Fetch.pm
CommitLineData
79fd8837 1package File::Fetch;
2
3use strict;
4use FileHandle;
5use File::Copy;
6use File::Spec;
7use File::Spec::Unix;
79fd8837 8use File::Basename qw[dirname];
9
10use Cwd qw[cwd];
11use Carp qw[carp];
12use IPC::Cmd qw[can_run run];
13use File::Path qw[mkpath];
14use Params::Check qw[check];
15use Module::Load::Conditional qw[can_load];
16use Locale::Maketext::Simple Style => 'gettext';
17
18use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
19 $BLACKLIST $METHOD_FAIL $VERSION $METHODS
20 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
21 ];
22
d4b3706f 23use constant QUOTE => do { $^O eq 'MSWin32' ? q["] : q['] };
24
25
fe98d82b 26$VERSION = '0.13_03';
27$VERSION = eval $VERSION; # avoid warnings with development releases
28$PREFER_BIN = 0; # XXX TODO implement
79fd8837 29$FROM_EMAIL = 'File-Fetch@example.com';
30$USER_AGENT = 'File::Fetch/$VERSION';
31$BLACKLIST = [qw|ftp|];
32$METHOD_FAIL = { };
33$FTP_PASSIVE = 1;
34$TIMEOUT = 0;
35$DEBUG = 0;
36$WARN = 1;
37
38### methods available to fetch the file depending on the scheme
39$METHODS = {
40 http => [ qw|lwp wget curl lynx| ],
41 ftp => [ qw|lwp netftp wget curl ncftp ftp| ],
42 file => [ qw|lwp file| ],
43 rsync => [ qw|rsync| ]
44};
45
46### silly warnings ###
47local $Params::Check::VERBOSE = 1;
48local $Params::Check::VERBOSE = 1;
49local $Module::Load::Conditional::VERBOSE = 0;
50local $Module::Load::Conditional::VERBOSE = 0;
51
52### see what OS we are on, important for file:// uris ###
9e5ea595 53use constant ON_WIN => ($^O eq 'MSWin32');
54use constant ON_VMS => ($^O eq 'VMS');
55use constant ON_UNIX => (!ON_WIN and !ON_VMS);
fe98d82b 56use constant HAS_VOL => (ON_WIN or ON_VMS);
57use constant HAS_SHARE => (ON_WIN);
79fd8837 58=pod
59
60=head1 NAME
61
62File::Fetch - A generic file fetching mechanism
63
64=head1 SYNOPSIS
65
66 use File::Fetch;
67
68 ### build a File::Fetch object ###
69 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
70
71 ### fetch the uri to cwd() ###
72 my $where = $ff->fetch() or die $ff->error;
73
74 ### fetch the uri to /tmp ###
75 my $where = $ff->fetch( to => '/tmp' );
76
77 ### parsed bits from the uri ###
78 $ff->uri;
79 $ff->scheme;
80 $ff->host;
81 $ff->path;
82 $ff->file;
83
84=head1 DESCRIPTION
85
86File::Fetch is a generic file fetching mechanism.
87
88It allows you to fetch any file pointed to by a C<ftp>, C<http>,
89C<file>, or C<rsync> uri by a number of different means.
90
91See the C<HOW IT WORKS> section further down for details.
92
d4b3706f 93=head1 ACCESSORS
94
95A C<File::Fetch> object has the following accessors
96
97=over 4
98
99=item $ff->uri
100
101The uri you passed to the constructor
102
103=item $ff->scheme
104
105The scheme from the uri (like 'file', 'http', etc)
106
107=item $ff->host
108
fe98d82b 109The hostname in the uri. Will be empty if host was originally
110'localhost' for a 'file://' url.
111
112=item $ff->vol
113
114On operating systems with the concept of a volume the second element
115of a file:// is considered to the be volume specification for the file.
116Thus on Win32 and VMS this routine returns the volume, on other operating
117systems this returns nothing.
118
119On Windows this value may be empty if the uri is to a network share, in
120which case the 'share' property will be defined. Additionally, volume
121specifications that use '|' as ':' will be converted on read to use ':'.
122
123=item $ff->share
124
125On systems with the concept of a network share (currently only Windows) returns
126the sharename from a file://// url. On other operating systems returns empty.
d4b3706f 127
128=item $ff->path
129
130The path from the uri, will be at least a single '/'.
131
132=item $ff->file
133
134The name of the remote file. For the local file name, the
135result of $ff->output_file will be used.
136
137=cut
138
139
140##########################
141### Object & Accessors ###
142##########################
143
144{
145 ### template for new() and autogenerated accessors ###
146 my $Tmpl = {
147 scheme => { default => 'http' },
148 host => { default => 'localhost' },
149 path => { default => '/' },
150 file => { required => 1 },
151 uri => { required => 1 },
fe98d82b 152 vol => { default => '' }, # windows and vms for file:// uris
153 share => { default => '' }, # windows for file:// uris
d4b3706f 154 _error_msg => { no_override => 1 },
155 _error_msg_long => { no_override => 1 },
156 };
157
158 for my $method ( keys %$Tmpl ) {
159 no strict 'refs';
160 *$method = sub {
161 my $self = shift;
162 $self->{$method} = $_[0] if @_;
163 return $self->{$method};
164 }
165 }
166
167 sub _create {
168 my $class = shift;
169 my %hash = @_;
170
171 my $args = check( $Tmpl, \%hash ) or return;
172
173 bless $args, $class;
174
175 if( lc($args->scheme) ne 'file' and not $args->host ) {
176 return File::Fetch->_error(loc(
177 "Hostname required when fetching from '%1'",$args->scheme));
178 }
179
180 for (qw[path file]) {
9e5ea595 181 unless( $args->$_() ) { # 5.5.x needs the ()
d4b3706f 182 return File::Fetch->_error(loc("No '%1' specified",$_));
183 }
184 }
185
186 return $args;
187 }
188}
189
190=item $ff->output_file
191
192The name of the output file. This is the same as $ff->file,
193but any query parameters are stripped off. For example:
194
195 http://example.com/index.html?x=y
196
197would make the output file be C<index.html> rather than
198C<index.html?x=y>.
199
200=back
201
202=cut
203
204sub output_file {
205 my $self = shift;
206 my $file = $self->file;
207
208 $file =~ s/\?.*$//g;
209
210 return $file;
211}
212
213### XXX do this or just point to URI::Escape?
214# =head2 $esc_uri = $ff->escaped_uri
215#
216# =cut
217#
218# ### most of this is stolen straight from URI::escape
219# { ### Build a char->hex map
220# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
221#
222# sub escaped_uri {
223# my $self = shift;
224# my $uri = $self->uri;
225#
226# ### Default unsafe characters. RFC 2732 ^(uric - reserved)
227# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
228# $escapes{$1} || $self->_fail_hi($1)/ge;
229#
230# return $uri;
231# }
232#
233# sub _fail_hi {
234# my $self = shift;
235# my $char = shift;
236#
237# $self->_error(loc(
238# "Can't escape '%1', try using the '%2' module instead",
239# sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
240# ));
241# }
242#
243# sub output_file {
244#
245# }
246#
247#
248# }
249
79fd8837 250=head1 METHODS
251
252=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
253
254Parses the uri and creates a corresponding File::Fetch::Item object,
255that is ready to be C<fetch>ed and returns it.
256
257Returns false on failure.
258
259=cut
260
261sub new {
262 my $class = shift;
263 my %hash = @_;
264
265 my ($uri);
266 my $tmpl = {
267 uri => { required => 1, store => \$uri },
268 };
269
270 check( $tmpl, \%hash ) or return;
271
272 ### parse the uri to usable parts ###
273 my $href = __PACKAGE__->_parse_uri( $uri ) or return;
274
275 ### make it into a FFI object ###
d4b3706f 276 my $ff = File::Fetch->_create( %$href ) or return;
79fd8837 277
278
279 ### return the object ###
d4b3706f 280 return $ff;
79fd8837 281}
282
283### parses an uri to a hash structure:
284###
285### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
286###
287### becomes:
288###
289### $href = {
290### scheme => 'ftp',
291### host => 'ftp.cpan.org',
292### path => '/pub/mirror',
293### file => 'index.html'
294### };
295###
9e5ea595 296### In the case of file:// urls there maybe be additional fields
297###
fe98d82b 298### For systems with volume specifications such as VMS and Win32 there will be
299### a volume specifier provided in the 'vol' field.
300###
301### 'vol' => 'volumename'
302###
9e5ea595 303### For windows file shares there may be a 'share' key specified
304###
305### 'share' => 'sharename'
306###
fe98d82b 307### Note that the rules of what a file:// url means vary by the operating system
308### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
309### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
310### not '/foo/bar.txt'
9e5ea595 311###
fe98d82b 312### Similarly if the host interpreting the url is VMS then
313### file:///disk$user/my/notes/note12345.txt' means
314### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but if it is unix it means
315### #/disk$user/my/notes/note12345.txt'.
316###
317### This means it is impossible to serve certain file:// urls on certain systems.
318###
319### Thus are the problems with a protocol-less specification. :-(
9e5ea595 320###
321
79fd8837 322sub _parse_uri {
323 my $self = shift;
324 my $uri = shift or return;
325
326 my $href = { uri => $uri };
327
328 ### find the scheme ###
329 $uri =~ s|^(\w+)://||;
330 $href->{scheme} = $1;
331
9e5ea595 332 ### See rfc 1738 section 3.10
333 ### http://www.faqs.org/rfcs/rfc1738.html
334 ### And wikipedia for more on windows file:// urls
335 ### http://en.wikipedia.org/wiki/File://
79fd8837 336 if( $href->{scheme} eq 'file' ) {
9e5ea595 337
338 my @parts = split '/',$uri;
339
340 ### file://hostname/...
341 ### file://hostname/...
fe98d82b 342 ### normalize file://localhost with file:///
9e5ea595 343 $href->{host} = $parts[0] || '';
344
345 ### index in @parts where the path components begin;
346 my $index = 1;
9e5ea595 347
348 ### file:////hostname/sharename/blah.txt
fe98d82b 349 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
350
9e5ea595 351 $href->{host} = $parts[2] || ''; # avoid warnings
352 $href->{share} = $parts[3] || ''; # avoid warnings
353
354 $index = 4 # index after the share
9e5ea595 355
fe98d82b 356 ### file:///D|/blah.txt
357 ### file:///D:/blah.txt
358 ### file:///disk$user/blah.txt
359 } elsif (HAS_VOL) {
360
361 ### this code comes from dmq's patch, but:
362 ### XXX if volume is empty, wouldn't that be an error? --kane
363 ### if so, our file://localhost test needs to be fixed as wel
364 $href->{vol} = $parts[1] || '';
365
366 ### correct D| style colume descriptors
367 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
368
369 $index = 2; # index after the volume
370 }
371
372 ### rebuild the path from the leftover parts;
9e5ea595 373 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
79fd8837 374
375 } else {
9e5ea595 376 ### using anything but qw() in hash slices may produce warnings
377 ### in older perls :-(
378 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
79fd8837 379 }
380
381 ### split the path into file + dir ###
382 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
383 $href->{path} = $parts[1];
384 $href->{file} = $parts[2];
385 }
386
fe98d82b 387 ### host will be empty if the target was 'localhost' and the
388 ### scheme was 'file'
389 $href->{host} = '' if ($href->{host} eq 'localhost') and
390 ($href->{scheme} eq 'file');
79fd8837 391
392 return $href;
393}
394
395=head2 $ff->fetch( [to => /my/output/dir/] )
396
397Fetches the file you requested. By default it writes to C<cwd()>,
398but you can override that by specifying the C<to> argument.
399
400Returns the full path to the downloaded file on success, and false
401on failure.
402
403=cut
404
405sub fetch {
406 my $self = shift or return;
407 my %hash = @_;
408
409 my $to;
410 my $tmpl = {
411 to => { default => cwd(), store => \$to },
412 };
413
414 check( $tmpl, \%hash ) or return;
415
416 ### create the path if it doesn't exist yet ###
417 unless( -d $to ) {
418 eval { mkpath( $to ) };
419
420 return $self->_error(loc("Could not create path '%1'",$to)) if $@;
421 }
422
423 ### set passive ftp if required ###
424 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
425
fe98d82b 426 ### we dont use catfile on win32 because if we are using a cygwin tool
427 ### under cmd.exe they wont understand windows style separators.
428 my $out_to = ON_WIN ? $to.'/'.$self->output_file
429 : File::Spec->catfile( $to, $self->output_file );
430
79fd8837 431 for my $method ( @{ $METHODS->{$self->scheme} } ) {
432 my $sub = '_'.$method.'_fetch';
433
434 unless( __PACKAGE__->can($sub) ) {
435 $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
436 $method));
437 next;
438 }
439
440 ### method is blacklisted ###
441 next if grep { lc $_ eq $method } @$BLACKLIST;
442
443 ### method is known to fail ###
444 next if $METHOD_FAIL->{$method};
445
d4b3706f 446 ### there's serious issues with IPC::Run and quoting of command
447 ### line arguments. using quotes in the wrong place breaks things,
448 ### and in the case of say,
449 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
450 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
451 ### it doesn't matter how you quote, it always fails.
452 local $IPC::Cmd::USE_IPC_RUN = 0;
453
454 if( my $file = $self->$sub(
a0ad4830 455 to => $out_to
d4b3706f 456 )){
79fd8837 457
458 unless( -e $file && -s _ ) {
459 $self->_error(loc("'%1' said it fetched '%2', ".
460 "but it was not created",$method,$file));
461
462 ### mark the failure ###
463 $METHOD_FAIL->{$method} = 1;
464
465 next;
466
467 } else {
468
469 my $abs = File::Spec->rel2abs( $file );
470 return $abs;
471 }
472 }
473 }
474
475
476 ### if we got here, we looped over all methods, but we weren't able
477 ### to fetch it.
478 return;
479}
480
79fd8837 481########################
482### _*_fetch methods ###
483########################
484
485### LWP fetching ###
486sub _lwp_fetch {
487 my $self = shift;
488 my %hash = @_;
489
490 my ($to);
491 my $tmpl = {
492 to => { required => 1, store => \$to }
493 };
494 check( $tmpl, \%hash ) or return;
495
496 ### modules required to download with lwp ###
497 my $use_list = {
498 LWP => '0.0',
499 'LWP::UserAgent' => '0.0',
500 'HTTP::Request' => '0.0',
501 'HTTP::Status' => '0.0',
502 URI => '0.0',
503
504 };
505
506 if( can_load(modules => $use_list) ) {
507
508 ### setup the uri object
509 my $uri = URI->new( File::Spec::Unix->catfile(
510 $self->path, $self->file
511 ) );
512
513 ### special rules apply for file:// uris ###
514 $uri->scheme( $self->scheme );
515 $uri->host( $self->scheme eq 'file' ? '' : $self->host );
516 $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
517
518 ### set up the useragent object
519 my $ua = LWP::UserAgent->new();
520 $ua->timeout( $TIMEOUT ) if $TIMEOUT;
521 $ua->agent( $USER_AGENT );
522 $ua->from( $FROM_EMAIL );
523 $ua->env_proxy;
524
525 my $res = $ua->mirror($uri, $to) or return;
526
527 ### uptodate or fetched ok ###
528 if ( $res->code == 304 or $res->code == 200 ) {
529 return $to;
530
531 } else {
532 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
533 $res->code, HTTP::Status::status_message($res->code),
534 $res->status_line));
535 }
536
537 } else {
538 $METHOD_FAIL->{'lwp'} = 1;
539 return;
540 }
541}
542
543### Net::FTP fetching
544sub _netftp_fetch {
545 my $self = shift;
546 my %hash = @_;
547
548 my ($to);
549 my $tmpl = {
550 to => { required => 1, store => \$to }
551 };
552 check( $tmpl, \%hash ) or return;
553
554 ### required modules ###
555 my $use_list = { 'Net::FTP' => 0 };
556
557 if( can_load( modules => $use_list ) ) {
558
559 ### make connection ###
560 my $ftp;
561 my @options = ($self->host);
562 push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
563 unless( $ftp = Net::FTP->new( @options ) ) {
564 return $self->_error(loc("Ftp creation failed: %1",$@));
565 }
566
567 ### login ###
568 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
569 return $self->_error(loc("Could not login to '%1'",$self->host));
570 }
571
572 ### set binary mode, just in case ###
573 $ftp->binary;
574
575 ### create the remote path
576 ### remember remote paths are unix paths! [#11483]
577 my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
578
579 ### fetch the file ###
580 my $target;
581 unless( $target = $ftp->get( $remote, $to ) ) {
582 return $self->_error(loc("Could not fetch '%1' from '%2'",
583 $remote, $self->host));
584 }
585
586 ### log out ###
587 $ftp->quit;
588
589 return $target;
590
591 } else {
592 $METHOD_FAIL->{'netftp'} = 1;
593 return;
594 }
595}
596
597### /bin/wget fetch ###
598sub _wget_fetch {
599 my $self = shift;
600 my %hash = @_;
601
602 my ($to);
603 my $tmpl = {
604 to => { required => 1, store => \$to }
605 };
606 check( $tmpl, \%hash ) or return;
607
608 ### see if we have a wget binary ###
609 if( my $wget = can_run('wget') ) {
610
611 ### no verboseness, thanks ###
612 my $cmd = [ $wget, '--quiet' ];
613
614 ### if a timeout is set, add it ###
615 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
616
617 ### run passive if specified ###
618 push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
619
620 ### set the output document, add the uri ###
d4b3706f 621 push @$cmd, '--output-document',
622 ### DO NOT quote things for IPC::Run, it breaks stuff.
623 $IPC::Cmd::USE_IPC_RUN
624 ? ($to, $self->uri)
625 : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837 626
627 ### shell out ###
628 my $captured;
d4b3706f 629 unless(run( command => $cmd,
630 buffer => \$captured,
631 verbose => $DEBUG
632 )) {
79fd8837 633 ### wget creates the output document always, even if the fetch
634 ### fails.. so unlink it in that case
635 1 while unlink $to;
636
637 return $self->_error(loc( "Command failed: %1", $captured || '' ));
638 }
639
640 return $to;
641
642 } else {
643 $METHOD_FAIL->{'wget'} = 1;
644 return;
645 }
646}
647
648
649### /bin/ftp fetch ###
650sub _ftp_fetch {
651 my $self = shift;
652 my %hash = @_;
653
654 my ($to);
655 my $tmpl = {
656 to => { required => 1, store => \$to }
657 };
658 check( $tmpl, \%hash ) or return;
659
d4b3706f 660 ### see if we have a ftp binary ###
79fd8837 661 if( my $ftp = can_run('ftp') ) {
662
663 my $fh = FileHandle->new;
664
665 local $SIG{CHLD} = 'IGNORE';
666
667 unless ($fh->open("|$ftp -n")) {
668 return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
669 }
670
671 my @dialog = (
672 "lcd " . dirname($to),
673 "open " . $self->host,
674 "user anonymous $FROM_EMAIL",
675 "cd /",
676 "cd " . $self->path,
677 "binary",
d4b3706f 678 "get " . $self->file . " " . $self->output_file,
79fd8837 679 "quit",
680 );
681
682 foreach (@dialog) { $fh->print($_, "\n") }
683 $fh->close or return;
684
685 return $to;
686 }
687}
688
689### lynx is stupid - it decompresses any .gz file it finds to be text
690### use /bin/lynx to fetch files
691sub _lynx_fetch {
692 my $self = shift;
693 my %hash = @_;
694
695 my ($to);
696 my $tmpl = {
697 to => { required => 1, store => \$to }
698 };
699 check( $tmpl, \%hash ) or return;
700
d4b3706f 701 ### see if we have a lynx binary ###
79fd8837 702 if( my $lynx = can_run('lynx') ) {
703
d4b3706f 704 unless( IPC::Cmd->can_capture_buffer ) {
705 $METHOD_FAIL->{'lynx'} = 1;
706
707 return $self->_error(loc(
708 "Can not capture buffers. Can not use '%1' to fetch files",
709 'lynx' ));
710 }
79fd8837 711
712 ### write to the output file ourselves, since lynx ass_u_mes to much
713 my $local = FileHandle->new(">$to")
714 or return $self->_error(loc(
715 "Could not open '%1' for writing: %2",$to,$!));
716
717 ### dump to stdout ###
718 my $cmd = [
719 $lynx,
720 '-source',
721 "-auth=anonymous:$FROM_EMAIL",
722 ];
723
724 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
725
d4b3706f 726 ### DO NOT quote things for IPC::Run, it breaks stuff.
727 push @$cmd, $IPC::Cmd::USE_IPC_RUN
728 ? $self->uri
729 : QUOTE. $self->uri .QUOTE;
730
79fd8837 731
732 ### shell out ###
733 my $captured;
734 unless(run( command => $cmd,
735 buffer => \$captured,
736 verbose => $DEBUG )
737 ) {
738 return $self->_error(loc("Command failed: %1", $captured || ''));
739 }
740
741 ### print to local file ###
742 ### XXX on a 404 with a special error page, $captured will actually
743 ### hold the contents of that page, and make it *appear* like the
744 ### request was a success, when really it wasn't :(
745 ### there doesn't seem to be an option for lynx to change the exit
746 ### code based on a 4XX status or so.
747 ### the closest we can come is using --error_file and parsing that,
748 ### which is very unreliable ;(
749 $local->print( $captured );
750 $local->close or return;
751
752 return $to;
753
754 } else {
755 $METHOD_FAIL->{'lynx'} = 1;
756 return;
757 }
758}
759
760### use /bin/ncftp to fetch files
761sub _ncftp_fetch {
762 my $self = shift;
763 my %hash = @_;
764
765 my ($to);
766 my $tmpl = {
767 to => { required => 1, store => \$to }
768 };
769 check( $tmpl, \%hash ) or return;
770
771 ### we can only set passive mode in interactive sesssions, so bail out
772 ### if $FTP_PASSIVE is set
773 return if $FTP_PASSIVE;
774
d4b3706f 775 ### see if we have a ncftp binary ###
79fd8837 776 if( my $ncftp = can_run('ncftp') ) {
777
778 my $cmd = [
779 $ncftp,
780 '-V', # do not be verbose
781 '-p', $FROM_EMAIL, # email as password
782 $self->host, # hostname
783 dirname($to), # local dir for the file
784 # remote path to the file
d4b3706f 785 ### DO NOT quote things for IPC::Run, it breaks stuff.
786 $IPC::Cmd::USE_IPC_RUN
787 ? File::Spec::Unix->catdir( $self->path, $self->file )
788 : QUOTE. File::Spec::Unix->catdir(
789 $self->path, $self->file ) .QUOTE
790
79fd8837 791 ];
792
793 ### shell out ###
794 my $captured;
795 unless(run( command => $cmd,
796 buffer => \$captured,
797 verbose => $DEBUG )
798 ) {
799 return $self->_error(loc("Command failed: %1", $captured || ''));
800 }
801
802 return $to;
803
804 } else {
805 $METHOD_FAIL->{'ncftp'} = 1;
806 return;
807 }
808}
809
810### use /bin/curl to fetch files
811sub _curl_fetch {
812 my $self = shift;
813 my %hash = @_;
814
815 my ($to);
816 my $tmpl = {
817 to => { required => 1, store => \$to }
818 };
819 check( $tmpl, \%hash ) or return;
820
821 if (my $curl = can_run('curl')) {
822
823 ### these long opts are self explanatory - I like that -jmb
824 my $cmd = [ $curl ];
825
826 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
827
828 push(@$cmd, '--silent') unless $DEBUG;
829
830 ### curl does the right thing with passive, regardless ###
831 if ($self->scheme eq 'ftp') {
832 push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
833 }
834
835 ### curl doesn't follow 302 (temporarily moved) etc automatically
836 ### so we add --location to enable that.
d4b3706f 837 push @$cmd, '--fail', '--location', '--output',
838 ### DO NOT quote things for IPC::Run, it breaks stuff.
839 $IPC::Cmd::USE_IPC_RUN
840 ? ($to, $self->uri)
841 : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837 842
843 my $captured;
844 unless(run( command => $cmd,
845 buffer => \$captured,
846 verbose => $DEBUG )
847 ) {
848
849 return $self->_error(loc("Command failed: %1", $captured || ''));
850 }
851
852 return $to;
853
854 } else {
855 $METHOD_FAIL->{'curl'} = 1;
856 return;
857 }
858}
859
860
861### use File::Copy for fetching file:// urls ###
9e5ea595 862###
863### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
864### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
fe98d82b 865###
9e5ea595 866
79fd8837 867sub _file_fetch {
868 my $self = shift;
869 my %hash = @_;
870
871 my ($to);
872 my $tmpl = {
873 to => { required => 1, store => \$to }
874 };
875 check( $tmpl, \%hash ) or return;
876
9e5ea595 877
878
79fd8837 879 ### prefix a / on unix systems with a file uri, since it would
880 ### look somewhat like this:
9e5ea595 881 ### file:///home/kane/file
882 ### wheras windows file uris for 'c:\some\dir\file' might look like:
883 ### file:///C:/some/dir/file
884 ### file:///C|/some/dir/file
885 ### or for a network share '\\host\share\some\dir\file':
886 ### file:////host/share/some/dir/file
887 ###
888 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
889 ### file://vms.host.edu/disk$user/my/notes/note12345.txt
890 ###
891
892 my $path = $self->path;
893 my $vol = $self->vol;
894 my $share = $self->share;
895
896 my $remote;
897 if (!$share and $self->host) {
898 return $self->_error(loc(
899 "Currently %1 cannot handle hosts in %2 urls",
900 'File::Fetch', 'file://'
901 ));
902 }
903
904 if( $vol ) {
905 $path = File::Spec->catdir( split /\//, $path );
906 $remote = File::Spec->catpath( $vol, $path, $self->file);
79fd8837 907
9e5ea595 908 } elsif( $share ) {
909 ### win32 specific, and a share name, so we wont bother with File::Spec
910 $path =~ s|/+|\\|g;
911 $remote = "\\\\".$self->host."\\$share\\$path";
912
913 } else {
914 $remote = File::Spec->catfile( $path, $self->file );
915 }
79fd8837 916
917 ### File::Copy is littered with 'die' statements :( ###
918 my $rv = eval { File::Copy::copy( $remote, $to ) };
919
920 ### something went wrong ###
921 if( !$rv or $@ ) {
922 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
923 $remote, $to, $!, $@));
924 }
925
926 return $to;
927}
928
929### use /usr/bin/rsync to fetch files
930sub _rsync_fetch {
931 my $self = shift;
932 my %hash = @_;
933
934 my ($to);
935 my $tmpl = {
936 to => { required => 1, store => \$to }
937 };
938 check( $tmpl, \%hash ) or return;
939
940 if (my $rsync = can_run('rsync')) {
941
942 my $cmd = [ $rsync ];
943
944 ### XXX: rsync has no I/O timeouts at all, by default
945 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
946
947 push(@$cmd, '--quiet') unless $DEBUG;
948
d4b3706f 949 ### DO NOT quote things for IPC::Run, it breaks stuff.
950 push @$cmd, $IPC::Cmd::USE_IPC_RUN
951 ? ($self->uri, $to)
952 : (QUOTE. $self->uri .QUOTE, QUOTE. $to .QUOTE);
79fd8837 953
954 my $captured;
955 unless(run( command => $cmd,
956 buffer => \$captured,
957 verbose => $DEBUG )
958 ) {
959
fe98d82b 960 return $self->_error(loc("Command %1 failed: %2",
961 "@$cmd" || '', $captured || ''));
79fd8837 962 }
963
964 return $to;
965
966 } else {
967 $METHOD_FAIL->{'rsync'} = 1;
968 return;
969 }
970}
971
972#################################
973#
974# Error code
975#
976#################################
977
978=pod
979
980=head2 $ff->error([BOOL])
981
982Returns the last encountered error as string.
983Pass it a true value to get the C<Carp::longmess()> output instead.
984
985=cut
986
d4b3706f 987### error handling the way Archive::Extract does it
988sub _error {
989 my $self = shift;
990 my $error = shift;
991
992 $self->_error_msg( $error );
993 $self->_error_msg_long( Carp::longmess($error) );
994
995 if( $WARN ) {
996 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
79fd8837 997 }
998
d4b3706f 999 return;
79fd8837 1000}
1001
d4b3706f 1002sub error {
1003 my $self = shift;
1004 return shift() ? $self->_error_msg_long : $self->_error_msg;
1005}
79fd8837 1006
1007
10081;
1009
1010=pod
1011
1012=head1 HOW IT WORKS
1013
1014File::Fetch is able to fetch a variety of uris, by using several
1015external programs and modules.
1016
1017Below is a mapping of what utilities will be used in what order
1018for what schemes, if available:
1019
1020 file => LWP, file
1021 http => LWP, wget, curl, lynx
1022 ftp => LWP, Net::FTP, wget, curl, ncftp, ftp
1023 rsync => rsync
1024
1025If you'd like to disable the use of one or more of these utilities
1026and/or modules, see the C<$BLACKLIST> variable further down.
1027
1028If a utility or module isn't available, it will be marked in a cache
1029(see the C<$METHOD_FAIL> variable further down), so it will not be
1030tried again. The C<fetch> method will only fail when all options are
1031exhausted, and it was not able to retrieve the file.
1032
1033A special note about fetching files from an ftp uri:
1034
1035By default, all ftp connections are done in passive mode. To change
1036that, see the C<$FTP_PASSIVE> variable further down.
1037
1038Furthermore, ftp uris only support anonymous connections, so no
1039named user/password pair can be passed along.
1040
1041C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1042further down.
1043
1044=head1 GLOBAL VARIABLES
1045
1046The behaviour of File::Fetch can be altered by changing the following
1047global variables:
1048
1049=head2 $File::Fetch::FROM_EMAIL
1050
1051This is the email address that will be sent as your anonymous ftp
1052password.
1053
1054Default is C<File-Fetch@example.com>.
1055
1056=head2 $File::Fetch::USER_AGENT
1057
1058This is the useragent as C<LWP> will report it.
1059
1060Default is C<File::Fetch/$VERSION>.
1061
1062=head2 $File::Fetch::FTP_PASSIVE
1063
1064This variable controls whether the environment variable C<FTP_PASSIVE>
1065and any passive switches to commandline tools will be set to true.
1066
1067Default value is 1.
1068
1069Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1070files, since passive mode can only be set interactively for this binary
1071
1072=head2 $File::Fetch::TIMEOUT
1073
1074When set, controls the network timeout (counted in seconds).
1075
1076Default value is 0.
1077
1078=head2 $File::Fetch::WARN
1079
1080This variable controls whether errors encountered internally by
1081C<File::Fetch> should be C<carp>'d or not.
1082
1083Set to false to silence warnings. Inspect the output of the C<error()>
1084method manually to see what went wrong.
1085
1086Defaults to C<true>.
1087
1088=head2 $File::Fetch::DEBUG
1089
1090This enables debugging output when calling commandline utilities to
1091fetch files.
1092This also enables C<Carp::longmess> errors, instead of the regular
1093C<carp> errors.
1094
1095Good for tracking down why things don't work with your particular
1096setup.
1097
1098Default is 0.
1099
1100=head2 $File::Fetch::BLACKLIST
1101
1102This is an array ref holding blacklisted modules/utilities for fetching
1103files with.
1104
1105To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1106set $File::Fetch::BLACKLIST to:
1107
1108 $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1109
1110The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1111
1112See the note on C<MAPPING> below.
1113
1114=head2 $File::Fetch::METHOD_FAIL
1115
1116This is a hashref registering what modules/utilities were known to fail
1117for fetching files (mostly because they weren't installed).
1118
1119You can reset this cache by assigning an empty hashref to it, or
1120individually remove keys.
1121
1122See the note on C<MAPPING> below.
1123
1124=head1 MAPPING
1125
1126
1127Here's a quick mapping for the utilities/modules, and their names for
1128the $BLACKLIST, $METHOD_FAIL and other internal functions.
1129
1130 LWP => lwp
1131 Net::FTP => netftp
1132 wget => wget
1133 lynx => lynx
1134 ncftp => ncftp
1135 ftp => ftp
1136 curl => curl
1137 rsync => rsync
1138
1139=head1 FREQUENTLY ASKED QUESTIONS
1140
1141=head2 So how do I use a proxy with File::Fetch?
1142
1143C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1144You will need to set your environment variables accordingly. For
1145example, to use an ftp proxy:
1146
1147 $ENV{ftp_proxy} = 'foo.com';
1148
1149Refer to the LWP::UserAgent manpage for more details.
1150
1151=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1152
1153C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1154which we in turn capture. If that content is a 'custom' error file
1155(like, say, a C<404 handler>), you will get that contents instead.
1156
1157Sadly, C<lynx> doesn't support any options to return a different exit
1158code on non-C<200 OK> status, giving us no way to tell the difference
1159between a 'successfull' fetch and a custom error page.
1160
1161Therefor, we recommend to only use C<lynx> as a last resort. This is
1162why it is at the back of our list of methods to try as well.
1163
d4b3706f 1164=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1165
1166C<File::Fetch> is relatively smart about things. When trying to write
1167a file to disk, it removes the C<query parameters> (see the
1168C<output_file> method for details) from the file name before creating
1169it. In most cases this suffices.
1170
1171If you have any other characters you need to escape, please install
1172the C<URI::Escape> module from CPAN, and pre-encode your URI before
1173passing it to C<File::Fetch>. You can read about the details of URIs
1174and URI encoding here:
1175
1176 http://www.faqs.org/rfcs/rfc2396.html
1177
79fd8837 1178=head1 TODO
1179
1180=over 4
1181
1182=item Implement $PREFER_BIN
1183
1184To indicate to rather use commandline tools than modules
1185
a0ad4830 1186=back
1187
1188=head1 BUG REPORTS
1189
1190Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1191
1192=head1 AUTHOR
79fd8837 1193
d4b3706f 1194This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
79fd8837 1195
1196=head1 COPYRIGHT
1197
a0ad4830 1198This library is free software; you may redistribute and/or modify it
1199under the same terms as Perl itself.
79fd8837 1200
79fd8837 1201
1202=cut
1203
1204# Local variables:
1205# c-indentation-style: bsd
1206# c-basic-offset: 4
1207# indent-tabs-mode: nil
1208# End:
1209# vim: expandtab shiftwidth=4:
1210
1211
1212
1213