Move cp(1)-like permission changes from copy to cp,
[p5sagit/p5-mst-13.2.git] / lib / File / Fetch.pm
CommitLineData
79fd8837 1package File::Fetch;
2
3use strict;
4use FileHandle;
6e654618 5use File::Temp;
79fd8837 6use File::Copy;
7use File::Spec;
8use File::Spec::Unix;
79fd8837 9use File::Basename qw[dirname];
10
11use Cwd qw[cwd];
12use Carp qw[carp];
6b6e6e92 13use IPC::Cmd qw[can_run run QUOTE];
79fd8837 14use File::Path qw[mkpath];
15use Params::Check qw[check];
16use Module::Load::Conditional qw[can_load];
17use Locale::Maketext::Simple Style => 'gettext';
18
19use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
20 $BLACKLIST $METHOD_FAIL $VERSION $METHODS
21 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
22 ];
23
6b6e6e92 24$VERSION = '0.18';
fe98d82b 25$VERSION = eval $VERSION; # avoid warnings with development releases
26$PREFER_BIN = 0; # XXX TODO implement
79fd8837 27$FROM_EMAIL = 'File-Fetch@example.com';
6b6e6e92 28$USER_AGENT = "File::Fetch/$VERSION";
79fd8837 29$BLACKLIST = [qw|ftp|];
30$METHOD_FAIL = { };
31$FTP_PASSIVE = 1;
32$TIMEOUT = 0;
33$DEBUG = 0;
34$WARN = 1;
35
36### methods available to fetch the file depending on the scheme
37$METHODS = {
6e654618 38 http => [ qw|lwp wget curl lftp lynx| ],
39 ftp => [ qw|lwp netftp wget curl lftp ncftp ftp| ],
40 file => [ qw|lwp lftp file| ],
79fd8837 41 rsync => [ qw|rsync| ]
42};
43
44### silly warnings ###
45local $Params::Check::VERBOSE = 1;
46local $Params::Check::VERBOSE = 1;
47local $Module::Load::Conditional::VERBOSE = 0;
48local $Module::Load::Conditional::VERBOSE = 0;
49
50### see what OS we are on, important for file:// uris ###
6b6e6e92 51use constant ON_WIN => ($^O eq 'MSWin32');
52use constant ON_VMS => ($^O eq 'VMS');
53use constant ON_UNIX => (!ON_WIN);
54use constant HAS_VOL => (ON_WIN);
55use constant HAS_SHARE => (ON_WIN);
56
57
79fd8837 58=pod
59
60=head1 NAME
61
62File::Fetch - A generic file fetching mechanism
63
64=head1 SYNOPSIS
65
66 use File::Fetch;
67
68 ### build a File::Fetch object ###
69 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
70
71 ### fetch the uri to cwd() ###
72 my $where = $ff->fetch() or die $ff->error;
73
74 ### fetch the uri to /tmp ###
75 my $where = $ff->fetch( to => '/tmp' );
76
77 ### parsed bits from the uri ###
78 $ff->uri;
79 $ff->scheme;
80 $ff->host;
81 $ff->path;
82 $ff->file;
83
84=head1 DESCRIPTION
85
86File::Fetch is a generic file fetching mechanism.
87
88It allows you to fetch any file pointed to by a C<ftp>, C<http>,
89C<file>, or C<rsync> uri by a number of different means.
90
91See the C<HOW IT WORKS> section further down for details.
92
d4b3706f 93=head1 ACCESSORS
94
95A C<File::Fetch> object has the following accessors
96
97=over 4
98
99=item $ff->uri
100
101The uri you passed to the constructor
102
103=item $ff->scheme
104
105The scheme from the uri (like 'file', 'http', etc)
106
107=item $ff->host
108
fe98d82b 109The hostname in the uri. Will be empty if host was originally
110'localhost' for a 'file://' url.
111
112=item $ff->vol
113
114On operating systems with the concept of a volume the second element
115of a file:// is considered to the be volume specification for the file.
1f80753b 116Thus on Win32 this routine returns the volume, on other operating
117systems this returns nothing.
fe98d82b 118
119On Windows this value may be empty if the uri is to a network share, in
120which case the 'share' property will be defined. Additionally, volume
121specifications that use '|' as ':' will be converted on read to use ':'.
122
1f80753b 123On VMS, which has a volume concept, this field will be empty because VMS
124file specifications are converted to absolute UNIX format and the volume
125information is transparently included.
126
fe98d82b 127=item $ff->share
128
129On systems with the concept of a network share (currently only Windows) returns
130the sharename from a file://// url. On other operating systems returns empty.
d4b3706f 131
132=item $ff->path
133
134The path from the uri, will be at least a single '/'.
135
136=item $ff->file
137
138The name of the remote file. For the local file name, the
139result of $ff->output_file will be used.
140
141=cut
142
143
144##########################
145### Object & Accessors ###
146##########################
147
148{
6b6e6e92 149 ### template for autogenerated accessors ###
d4b3706f 150 my $Tmpl = {
151 scheme => { default => 'http' },
152 host => { default => 'localhost' },
153 path => { default => '/' },
154 file => { required => 1 },
155 uri => { required => 1 },
1f80753b 156 vol => { default => '' }, # windows for file:// uris
fe98d82b 157 share => { default => '' }, # windows for file:// uris
d4b3706f 158 _error_msg => { no_override => 1 },
159 _error_msg_long => { no_override => 1 },
160 };
161
162 for my $method ( keys %$Tmpl ) {
163 no strict 'refs';
164 *$method = sub {
165 my $self = shift;
166 $self->{$method} = $_[0] if @_;
167 return $self->{$method};
168 }
169 }
170
171 sub _create {
172 my $class = shift;
173 my %hash = @_;
174
175 my $args = check( $Tmpl, \%hash ) or return;
176
177 bless $args, $class;
178
179 if( lc($args->scheme) ne 'file' and not $args->host ) {
180 return File::Fetch->_error(loc(
181 "Hostname required when fetching from '%1'",$args->scheme));
182 }
183
184 for (qw[path file]) {
9e5ea595 185 unless( $args->$_() ) { # 5.5.x needs the ()
d4b3706f 186 return File::Fetch->_error(loc("No '%1' specified",$_));
187 }
188 }
189
190 return $args;
191 }
192}
193
194=item $ff->output_file
195
196The name of the output file. This is the same as $ff->file,
197but any query parameters are stripped off. For example:
198
199 http://example.com/index.html?x=y
200
201would make the output file be C<index.html> rather than
202C<index.html?x=y>.
203
204=back
205
206=cut
207
208sub output_file {
209 my $self = shift;
210 my $file = $self->file;
211
212 $file =~ s/\?.*$//g;
213
214 return $file;
215}
216
217### XXX do this or just point to URI::Escape?
218# =head2 $esc_uri = $ff->escaped_uri
219#
220# =cut
221#
222# ### most of this is stolen straight from URI::escape
223# { ### Build a char->hex map
224# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
225#
226# sub escaped_uri {
227# my $self = shift;
228# my $uri = $self->uri;
229#
230# ### Default unsafe characters. RFC 2732 ^(uric - reserved)
231# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
232# $escapes{$1} || $self->_fail_hi($1)/ge;
233#
234# return $uri;
235# }
236#
237# sub _fail_hi {
238# my $self = shift;
239# my $char = shift;
240#
241# $self->_error(loc(
242# "Can't escape '%1', try using the '%2' module instead",
243# sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
244# ));
245# }
246#
247# sub output_file {
248#
249# }
250#
251#
252# }
253
79fd8837 254=head1 METHODS
255
256=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
257
258Parses the uri and creates a corresponding File::Fetch::Item object,
259that is ready to be C<fetch>ed and returns it.
260
261Returns false on failure.
262
263=cut
264
265sub new {
266 my $class = shift;
267 my %hash = @_;
268
269 my ($uri);
270 my $tmpl = {
271 uri => { required => 1, store => \$uri },
272 };
273
274 check( $tmpl, \%hash ) or return;
275
276 ### parse the uri to usable parts ###
277 my $href = __PACKAGE__->_parse_uri( $uri ) or return;
278
279 ### make it into a FFI object ###
d4b3706f 280 my $ff = File::Fetch->_create( %$href ) or return;
79fd8837 281
282
283 ### return the object ###
d4b3706f 284 return $ff;
79fd8837 285}
286
287### parses an uri to a hash structure:
288###
289### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
290###
291### becomes:
292###
293### $href = {
294### scheme => 'ftp',
295### host => 'ftp.cpan.org',
296### path => '/pub/mirror',
297### file => 'index.html'
298### };
299###
9e5ea595 300### In the case of file:// urls there maybe be additional fields
301###
5e6d05d2 302### For systems with volume specifications such as Win32 there will be
fe98d82b 303### a volume specifier provided in the 'vol' field.
304###
305### 'vol' => 'volumename'
306###
9e5ea595 307### For windows file shares there may be a 'share' key specified
308###
309### 'share' => 'sharename'
310###
fe98d82b 311### Note that the rules of what a file:// url means vary by the operating system
312### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
313### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
314### not '/foo/bar.txt'
9e5ea595 315###
fe98d82b 316### Similarly if the host interpreting the url is VMS then
317### file:///disk$user/my/notes/note12345.txt' means
1f80753b 318### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
319### if it is unix where it means /disk$user/my/notes/note12345.txt'.
320### Except for some cases in the File::Spec methods, Perl on VMS will generally
321### handle UNIX format file specifications.
fe98d82b 322###
323### This means it is impossible to serve certain file:// urls on certain systems.
324###
325### Thus are the problems with a protocol-less specification. :-(
9e5ea595 326###
327
79fd8837 328sub _parse_uri {
329 my $self = shift;
330 my $uri = shift or return;
331
332 my $href = { uri => $uri };
333
334 ### find the scheme ###
335 $uri =~ s|^(\w+)://||;
336 $href->{scheme} = $1;
337
9e5ea595 338 ### See rfc 1738 section 3.10
339 ### http://www.faqs.org/rfcs/rfc1738.html
340 ### And wikipedia for more on windows file:// urls
341 ### http://en.wikipedia.org/wiki/File://
79fd8837 342 if( $href->{scheme} eq 'file' ) {
9e5ea595 343
344 my @parts = split '/',$uri;
345
346 ### file://hostname/...
347 ### file://hostname/...
fe98d82b 348 ### normalize file://localhost with file:///
9e5ea595 349 $href->{host} = $parts[0] || '';
350
351 ### index in @parts where the path components begin;
352 my $index = 1;
9e5ea595 353
354 ### file:////hostname/sharename/blah.txt
fe98d82b 355 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
356
9e5ea595 357 $href->{host} = $parts[2] || ''; # avoid warnings
358 $href->{share} = $parts[3] || ''; # avoid warnings
359
360 $index = 4 # index after the share
9e5ea595 361
fe98d82b 362 ### file:///D|/blah.txt
363 ### file:///D:/blah.txt
fe98d82b 364 } elsif (HAS_VOL) {
365
366 ### this code comes from dmq's patch, but:
367 ### XXX if volume is empty, wouldn't that be an error? --kane
368 ### if so, our file://localhost test needs to be fixed as wel
369 $href->{vol} = $parts[1] || '';
370
371 ### correct D| style colume descriptors
372 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
373
374 $index = 2; # index after the volume
375 }
376
377 ### rebuild the path from the leftover parts;
9e5ea595 378 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
79fd8837 379
380 } else {
9e5ea595 381 ### using anything but qw() in hash slices may produce warnings
382 ### in older perls :-(
383 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
79fd8837 384 }
385
386 ### split the path into file + dir ###
387 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
388 $href->{path} = $parts[1];
389 $href->{file} = $parts[2];
390 }
391
fe98d82b 392 ### host will be empty if the target was 'localhost' and the
393 ### scheme was 'file'
394 $href->{host} = '' if ($href->{host} eq 'localhost') and
395 ($href->{scheme} eq 'file');
79fd8837 396
397 return $href;
398}
399
400=head2 $ff->fetch( [to => /my/output/dir/] )
401
402Fetches the file you requested. By default it writes to C<cwd()>,
403but you can override that by specifying the C<to> argument.
404
405Returns the full path to the downloaded file on success, and false
406on failure.
407
408=cut
409
410sub fetch {
411 my $self = shift or return;
412 my %hash = @_;
413
414 my $to;
415 my $tmpl = {
416 to => { default => cwd(), store => \$to },
417 };
418
419 check( $tmpl, \%hash ) or return;
420
5e6d05d2 421 ### On VMS force to VMS format so File::Spec will work.
1f80753b 422 $to = VMS::Filespec::vmspath($to) if ON_VMS;
423
79fd8837 424 ### create the path if it doesn't exist yet ###
425 unless( -d $to ) {
426 eval { mkpath( $to ) };
427
428 return $self->_error(loc("Could not create path '%1'",$to)) if $@;
429 }
430
431 ### set passive ftp if required ###
432 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
433
fe98d82b 434 ### we dont use catfile on win32 because if we are using a cygwin tool
435 ### under cmd.exe they wont understand windows style separators.
436 my $out_to = ON_WIN ? $to.'/'.$self->output_file
437 : File::Spec->catfile( $to, $self->output_file );
438
79fd8837 439 for my $method ( @{ $METHODS->{$self->scheme} } ) {
440 my $sub = '_'.$method.'_fetch';
441
442 unless( __PACKAGE__->can($sub) ) {
443 $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
444 $method));
445 next;
446 }
447
448 ### method is blacklisted ###
449 next if grep { lc $_ eq $method } @$BLACKLIST;
450
451 ### method is known to fail ###
452 next if $METHOD_FAIL->{$method};
453
d4b3706f 454 ### there's serious issues with IPC::Run and quoting of command
455 ### line arguments. using quotes in the wrong place breaks things,
456 ### and in the case of say,
457 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
458 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
459 ### it doesn't matter how you quote, it always fails.
460 local $IPC::Cmd::USE_IPC_RUN = 0;
461
462 if( my $file = $self->$sub(
a0ad4830 463 to => $out_to
d4b3706f 464 )){
79fd8837 465
466 unless( -e $file && -s _ ) {
467 $self->_error(loc("'%1' said it fetched '%2', ".
468 "but it was not created",$method,$file));
469
470 ### mark the failure ###
471 $METHOD_FAIL->{$method} = 1;
472
473 next;
474
475 } else {
476
477 my $abs = File::Spec->rel2abs( $file );
478 return $abs;
479 }
480 }
481 }
482
483
484 ### if we got here, we looped over all methods, but we weren't able
485 ### to fetch it.
486 return;
487}
488
79fd8837 489########################
490### _*_fetch methods ###
491########################
492
493### LWP fetching ###
494sub _lwp_fetch {
495 my $self = shift;
496 my %hash = @_;
497
498 my ($to);
499 my $tmpl = {
500 to => { required => 1, store => \$to }
501 };
502 check( $tmpl, \%hash ) or return;
503
504 ### modules required to download with lwp ###
505 my $use_list = {
506 LWP => '0.0',
507 'LWP::UserAgent' => '0.0',
508 'HTTP::Request' => '0.0',
509 'HTTP::Status' => '0.0',
510 URI => '0.0',
511
512 };
513
514 if( can_load(modules => $use_list) ) {
515
516 ### setup the uri object
517 my $uri = URI->new( File::Spec::Unix->catfile(
518 $self->path, $self->file
519 ) );
520
521 ### special rules apply for file:// uris ###
522 $uri->scheme( $self->scheme );
523 $uri->host( $self->scheme eq 'file' ? '' : $self->host );
524 $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
525
526 ### set up the useragent object
527 my $ua = LWP::UserAgent->new();
528 $ua->timeout( $TIMEOUT ) if $TIMEOUT;
529 $ua->agent( $USER_AGENT );
530 $ua->from( $FROM_EMAIL );
531 $ua->env_proxy;
532
533 my $res = $ua->mirror($uri, $to) or return;
534
535 ### uptodate or fetched ok ###
536 if ( $res->code == 304 or $res->code == 200 ) {
537 return $to;
538
539 } else {
540 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
541 $res->code, HTTP::Status::status_message($res->code),
542 $res->status_line));
543 }
544
545 } else {
546 $METHOD_FAIL->{'lwp'} = 1;
547 return;
548 }
549}
550
551### Net::FTP fetching
552sub _netftp_fetch {
553 my $self = shift;
554 my %hash = @_;
555
556 my ($to);
557 my $tmpl = {
558 to => { required => 1, store => \$to }
559 };
560 check( $tmpl, \%hash ) or return;
561
562 ### required modules ###
563 my $use_list = { 'Net::FTP' => 0 };
564
565 if( can_load( modules => $use_list ) ) {
566
567 ### make connection ###
568 my $ftp;
569 my @options = ($self->host);
570 push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
571 unless( $ftp = Net::FTP->new( @options ) ) {
572 return $self->_error(loc("Ftp creation failed: %1",$@));
573 }
574
575 ### login ###
576 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
577 return $self->_error(loc("Could not login to '%1'",$self->host));
578 }
579
580 ### set binary mode, just in case ###
581 $ftp->binary;
582
583 ### create the remote path
584 ### remember remote paths are unix paths! [#11483]
585 my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
586
587 ### fetch the file ###
588 my $target;
589 unless( $target = $ftp->get( $remote, $to ) ) {
590 return $self->_error(loc("Could not fetch '%1' from '%2'",
591 $remote, $self->host));
592 }
593
594 ### log out ###
595 $ftp->quit;
596
597 return $target;
598
599 } else {
600 $METHOD_FAIL->{'netftp'} = 1;
601 return;
602 }
603}
604
605### /bin/wget fetch ###
606sub _wget_fetch {
607 my $self = shift;
608 my %hash = @_;
609
610 my ($to);
611 my $tmpl = {
612 to => { required => 1, store => \$to }
613 };
614 check( $tmpl, \%hash ) or return;
615
616 ### see if we have a wget binary ###
617 if( my $wget = can_run('wget') ) {
618
619 ### no verboseness, thanks ###
620 my $cmd = [ $wget, '--quiet' ];
621
622 ### if a timeout is set, add it ###
623 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
624
625 ### run passive if specified ###
626 push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
627
628 ### set the output document, add the uri ###
6e654618 629 push @$cmd, '--output-document', $to, $self->uri;
630
631 ### with IPC::Cmd > 0.41, this is fixed in teh library,
632 ### and there's no need for special casing any more.
633 ### DO NOT quote things for IPC::Run, it breaks stuff.
634 # $IPC::Cmd::USE_IPC_RUN
635 # ? ($to, $self->uri)
636 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837 637
638 ### shell out ###
639 my $captured;
d4b3706f 640 unless(run( command => $cmd,
641 buffer => \$captured,
642 verbose => $DEBUG
643 )) {
79fd8837 644 ### wget creates the output document always, even if the fetch
645 ### fails.. so unlink it in that case
646 1 while unlink $to;
647
648 return $self->_error(loc( "Command failed: %1", $captured || '' ));
649 }
650
651 return $to;
652
653 } else {
654 $METHOD_FAIL->{'wget'} = 1;
655 return;
656 }
657}
658
6e654618 659### /bin/lftp fetch ###
660sub _lftp_fetch {
661 my $self = shift;
662 my %hash = @_;
663
664 my ($to);
665 my $tmpl = {
666 to => { required => 1, store => \$to }
667 };
668 check( $tmpl, \%hash ) or return;
669
670 ### see if we have a wget binary ###
671 if( my $lftp = can_run('lftp') ) {
672
673 ### no verboseness, thanks ###
674 my $cmd = [ $lftp, '-f' ];
675
676 my $fh = File::Temp->new;
677
678 my $str;
679
680 ### if a timeout is set, add it ###
681 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
682
683 ### run passive if specified ###
684 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
685
686 ### set the output document, add the uri ###
687 ### quote the URI, because lftp supports certain shell
688 ### expansions, most notably & for backgrounding.
689 ### ' quote does nto work, must be "
690 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
691
692 if( $DEBUG ) {
693 my $pp_str = join ' ', split $/, $str;
694 print "# lftp command: $pp_str\n";
695 }
696
697 ### write straight to the file.
698 $fh->autoflush(1);
699 print $fh $str;
700
701 ### the command needs to be 1 string to be executed
702 push @$cmd, $fh->filename;
703
704 ### with IPC::Cmd > 0.41, this is fixed in teh library,
705 ### and there's no need for special casing any more.
706 ### DO NOT quote things for IPC::Run, it breaks stuff.
707 # $IPC::Cmd::USE_IPC_RUN
708 # ? ($to, $self->uri)
709 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
710
711
712 ### shell out ###
713 my $captured;
714 unless(run( command => $cmd,
715 buffer => \$captured,
716 verbose => $DEBUG
717 )) {
718 ### wget creates the output document always, even if the fetch
719 ### fails.. so unlink it in that case
720 1 while unlink $to;
721
722 return $self->_error(loc( "Command failed: %1", $captured || '' ));
723 }
724
725 return $to;
726
727 } else {
728 $METHOD_FAIL->{'lftp'} = 1;
729 return;
730 }
731}
732
733
79fd8837 734
735### /bin/ftp fetch ###
736sub _ftp_fetch {
737 my $self = shift;
738 my %hash = @_;
739
740 my ($to);
741 my $tmpl = {
742 to => { required => 1, store => \$to }
743 };
744 check( $tmpl, \%hash ) or return;
745
d4b3706f 746 ### see if we have a ftp binary ###
79fd8837 747 if( my $ftp = can_run('ftp') ) {
748
749 my $fh = FileHandle->new;
750
751 local $SIG{CHLD} = 'IGNORE';
752
753 unless ($fh->open("|$ftp -n")) {
754 return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
755 }
756
757 my @dialog = (
758 "lcd " . dirname($to),
759 "open " . $self->host,
760 "user anonymous $FROM_EMAIL",
761 "cd /",
762 "cd " . $self->path,
763 "binary",
d4b3706f 764 "get " . $self->file . " " . $self->output_file,
79fd8837 765 "quit",
766 );
767
768 foreach (@dialog) { $fh->print($_, "\n") }
769 $fh->close or return;
770
771 return $to;
772 }
773}
774
775### lynx is stupid - it decompresses any .gz file it finds to be text
776### use /bin/lynx to fetch files
777sub _lynx_fetch {
778 my $self = shift;
779 my %hash = @_;
780
781 my ($to);
782 my $tmpl = {
783 to => { required => 1, store => \$to }
784 };
785 check( $tmpl, \%hash ) or return;
786
d4b3706f 787 ### see if we have a lynx binary ###
79fd8837 788 if( my $lynx = can_run('lynx') ) {
789
d4b3706f 790 unless( IPC::Cmd->can_capture_buffer ) {
791 $METHOD_FAIL->{'lynx'} = 1;
792
793 return $self->_error(loc(
794 "Can not capture buffers. Can not use '%1' to fetch files",
795 'lynx' ));
796 }
79fd8837 797
6e654618 798 ### check if the HTTP resource exists ###
799 if ($self->uri =~ /^https?:\/\//i) {
800 my $cmd = [
801 $lynx,
802 '-head',
803 '-source',
804 "-auth=anonymous:$FROM_EMAIL",
805 ];
806
807 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
808
809 push @$cmd, $self->uri;
810
811 ### shell out ###
812 my $head;
813 unless(run( command => $cmd,
814 buffer => \$head,
815 verbose => $DEBUG )
816 ) {
817 return $self->_error(loc("Command failed: %1", $head || ''));
818 }
819
820 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
821 return $self->_error(loc("Command failed: %1", $head || ''));
822 }
823 }
824
79fd8837 825 ### write to the output file ourselves, since lynx ass_u_mes to much
826 my $local = FileHandle->new(">$to")
827 or return $self->_error(loc(
828 "Could not open '%1' for writing: %2",$to,$!));
829
830 ### dump to stdout ###
831 my $cmd = [
832 $lynx,
833 '-source',
834 "-auth=anonymous:$FROM_EMAIL",
835 ];
836
837 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
838
d4b3706f 839 ### DO NOT quote things for IPC::Run, it breaks stuff.
6e654618 840 push @$cmd, $self->uri;
841
842 ### with IPC::Cmd > 0.41, this is fixed in teh library,
843 ### and there's no need for special casing any more.
844 ### DO NOT quote things for IPC::Run, it breaks stuff.
845 # $IPC::Cmd::USE_IPC_RUN
846 # ? $self->uri
847 # : QUOTE. $self->uri .QUOTE;
d4b3706f 848
79fd8837 849
850 ### shell out ###
851 my $captured;
852 unless(run( command => $cmd,
853 buffer => \$captured,
854 verbose => $DEBUG )
855 ) {
856 return $self->_error(loc("Command failed: %1", $captured || ''));
857 }
858
859 ### print to local file ###
860 ### XXX on a 404 with a special error page, $captured will actually
861 ### hold the contents of that page, and make it *appear* like the
862 ### request was a success, when really it wasn't :(
863 ### there doesn't seem to be an option for lynx to change the exit
864 ### code based on a 4XX status or so.
865 ### the closest we can come is using --error_file and parsing that,
866 ### which is very unreliable ;(
867 $local->print( $captured );
868 $local->close or return;
869
870 return $to;
871
872 } else {
873 $METHOD_FAIL->{'lynx'} = 1;
874 return;
875 }
876}
877
878### use /bin/ncftp to fetch files
879sub _ncftp_fetch {
880 my $self = shift;
881 my %hash = @_;
882
883 my ($to);
884 my $tmpl = {
885 to => { required => 1, store => \$to }
886 };
887 check( $tmpl, \%hash ) or return;
888
889 ### we can only set passive mode in interactive sesssions, so bail out
890 ### if $FTP_PASSIVE is set
891 return if $FTP_PASSIVE;
892
d4b3706f 893 ### see if we have a ncftp binary ###
79fd8837 894 if( my $ncftp = can_run('ncftp') ) {
895
896 my $cmd = [
897 $ncftp,
898 '-V', # do not be verbose
899 '-p', $FROM_EMAIL, # email as password
900 $self->host, # hostname
901 dirname($to), # local dir for the file
902 # remote path to the file
d4b3706f 903 ### DO NOT quote things for IPC::Run, it breaks stuff.
904 $IPC::Cmd::USE_IPC_RUN
905 ? File::Spec::Unix->catdir( $self->path, $self->file )
906 : QUOTE. File::Spec::Unix->catdir(
907 $self->path, $self->file ) .QUOTE
908
79fd8837 909 ];
910
911 ### shell out ###
912 my $captured;
913 unless(run( command => $cmd,
914 buffer => \$captured,
915 verbose => $DEBUG )
916 ) {
917 return $self->_error(loc("Command failed: %1", $captured || ''));
918 }
919
920 return $to;
921
922 } else {
923 $METHOD_FAIL->{'ncftp'} = 1;
924 return;
925 }
926}
927
928### use /bin/curl to fetch files
929sub _curl_fetch {
930 my $self = shift;
931 my %hash = @_;
932
933 my ($to);
934 my $tmpl = {
935 to => { required => 1, store => \$to }
936 };
937 check( $tmpl, \%hash ) or return;
938
939 if (my $curl = can_run('curl')) {
940
941 ### these long opts are self explanatory - I like that -jmb
6e654618 942 my $cmd = [ $curl, '-q' ];
79fd8837 943
944 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
945
946 push(@$cmd, '--silent') unless $DEBUG;
947
948 ### curl does the right thing with passive, regardless ###
949 if ($self->scheme eq 'ftp') {
950 push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
951 }
952
953 ### curl doesn't follow 302 (temporarily moved) etc automatically
954 ### so we add --location to enable that.
6e654618 955 push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
956
957 ### with IPC::Cmd > 0.41, this is fixed in teh library,
958 ### and there's no need for special casing any more.
959 ### DO NOT quote things for IPC::Run, it breaks stuff.
960 # $IPC::Cmd::USE_IPC_RUN
961 # ? ($to, $self->uri)
962 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
963
79fd8837 964
965 my $captured;
966 unless(run( command => $cmd,
967 buffer => \$captured,
968 verbose => $DEBUG )
969 ) {
970
971 return $self->_error(loc("Command failed: %1", $captured || ''));
972 }
973
974 return $to;
975
976 } else {
977 $METHOD_FAIL->{'curl'} = 1;
978 return;
979 }
980}
981
982
983### use File::Copy for fetching file:// urls ###
9e5ea595 984###
985### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
986### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
fe98d82b 987###
9e5ea595 988
79fd8837 989sub _file_fetch {
990 my $self = shift;
991 my %hash = @_;
992
993 my ($to);
994 my $tmpl = {
995 to => { required => 1, store => \$to }
996 };
997 check( $tmpl, \%hash ) or return;
998
9e5ea595 999
1000
79fd8837 1001 ### prefix a / on unix systems with a file uri, since it would
1002 ### look somewhat like this:
9e5ea595 1003 ### file:///home/kane/file
1004 ### wheras windows file uris for 'c:\some\dir\file' might look like:
1005 ### file:///C:/some/dir/file
1006 ### file:///C|/some/dir/file
1007 ### or for a network share '\\host\share\some\dir\file':
1008 ### file:////host/share/some/dir/file
1009 ###
1010 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1011 ### file://vms.host.edu/disk$user/my/notes/note12345.txt
1012 ###
1013
1014 my $path = $self->path;
1015 my $vol = $self->vol;
1016 my $share = $self->share;
1017
1018 my $remote;
1019 if (!$share and $self->host) {
1020 return $self->_error(loc(
1021 "Currently %1 cannot handle hosts in %2 urls",
1022 'File::Fetch', 'file://'
1023 ));
1024 }
1025
1026 if( $vol ) {
1027 $path = File::Spec->catdir( split /\//, $path );
1028 $remote = File::Spec->catpath( $vol, $path, $self->file);
79fd8837 1029
9e5ea595 1030 } elsif( $share ) {
1031 ### win32 specific, and a share name, so we wont bother with File::Spec
1032 $path =~ s|/+|\\|g;
1033 $remote = "\\\\".$self->host."\\$share\\$path";
1034
1035 } else {
5e6d05d2 1036 ### File::Spec on VMS can not currently handle UNIX syntax.
1037 my $file_class = ON_VMS
1038 ? 'File::Spec::Unix'
1039 : 'File::Spec';
1040
1041 $remote = $file_class->catfile( $path, $self->file );
9e5ea595 1042 }
79fd8837 1043
1044 ### File::Copy is littered with 'die' statements :( ###
1045 my $rv = eval { File::Copy::copy( $remote, $to ) };
1046
1047 ### something went wrong ###
1048 if( !$rv or $@ ) {
1049 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1050 $remote, $to, $!, $@));
1051 }
1052
1053 return $to;
1054}
1055
1056### use /usr/bin/rsync to fetch files
1057sub _rsync_fetch {
1058 my $self = shift;
1059 my %hash = @_;
1060
1061 my ($to);
1062 my $tmpl = {
1063 to => { required => 1, store => \$to }
1064 };
1065 check( $tmpl, \%hash ) or return;
1066
1067 if (my $rsync = can_run('rsync')) {
1068
1069 my $cmd = [ $rsync ];
1070
1071 ### XXX: rsync has no I/O timeouts at all, by default
1072 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1073
1074 push(@$cmd, '--quiet') unless $DEBUG;
1075
d4b3706f 1076 ### DO NOT quote things for IPC::Run, it breaks stuff.
6e654618 1077 push @$cmd, $self->uri, $to;
1078
1079 ### with IPC::Cmd > 0.41, this is fixed in teh library,
1080 ### and there's no need for special casing any more.
1081 ### DO NOT quote things for IPC::Run, it breaks stuff.
1082 # $IPC::Cmd::USE_IPC_RUN
1083 # ? ($to, $self->uri)
1084 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837 1085
1086 my $captured;
1087 unless(run( command => $cmd,
1088 buffer => \$captured,
1089 verbose => $DEBUG )
1090 ) {
1091
fe98d82b 1092 return $self->_error(loc("Command %1 failed: %2",
1093 "@$cmd" || '', $captured || ''));
79fd8837 1094 }
1095
1096 return $to;
1097
1098 } else {
1099 $METHOD_FAIL->{'rsync'} = 1;
1100 return;
1101 }
1102}
1103
1104#################################
1105#
1106# Error code
1107#
1108#################################
1109
1110=pod
1111
1112=head2 $ff->error([BOOL])
1113
1114Returns the last encountered error as string.
1115Pass it a true value to get the C<Carp::longmess()> output instead.
1116
1117=cut
1118
d4b3706f 1119### error handling the way Archive::Extract does it
1120sub _error {
1121 my $self = shift;
1122 my $error = shift;
1123
1124 $self->_error_msg( $error );
1125 $self->_error_msg_long( Carp::longmess($error) );
1126
1127 if( $WARN ) {
1128 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
79fd8837 1129 }
1130
d4b3706f 1131 return;
79fd8837 1132}
1133
d4b3706f 1134sub error {
1135 my $self = shift;
1136 return shift() ? $self->_error_msg_long : $self->_error_msg;
1137}
79fd8837 1138
1139
11401;
1141
1142=pod
1143
1144=head1 HOW IT WORKS
1145
1146File::Fetch is able to fetch a variety of uris, by using several
1147external programs and modules.
1148
1149Below is a mapping of what utilities will be used in what order
1150for what schemes, if available:
1151
6e654618 1152 file => LWP, lftp, file
1153 http => LWP, wget, curl, lftp, lynx
1154 ftp => LWP, Net::FTP, wget, curl, lftp, ncftp, ftp
79fd8837 1155 rsync => rsync
1156
1157If you'd like to disable the use of one or more of these utilities
1158and/or modules, see the C<$BLACKLIST> variable further down.
1159
1160If a utility or module isn't available, it will be marked in a cache
1161(see the C<$METHOD_FAIL> variable further down), so it will not be
1162tried again. The C<fetch> method will only fail when all options are
1163exhausted, and it was not able to retrieve the file.
1164
1165A special note about fetching files from an ftp uri:
1166
1167By default, all ftp connections are done in passive mode. To change
1168that, see the C<$FTP_PASSIVE> variable further down.
1169
1170Furthermore, ftp uris only support anonymous connections, so no
1171named user/password pair can be passed along.
1172
1173C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1174further down.
1175
1176=head1 GLOBAL VARIABLES
1177
1178The behaviour of File::Fetch can be altered by changing the following
1179global variables:
1180
1181=head2 $File::Fetch::FROM_EMAIL
1182
1183This is the email address that will be sent as your anonymous ftp
1184password.
1185
1186Default is C<File-Fetch@example.com>.
1187
1188=head2 $File::Fetch::USER_AGENT
1189
1190This is the useragent as C<LWP> will report it.
1191
1192Default is C<File::Fetch/$VERSION>.
1193
1194=head2 $File::Fetch::FTP_PASSIVE
1195
1196This variable controls whether the environment variable C<FTP_PASSIVE>
1197and any passive switches to commandline tools will be set to true.
1198
1199Default value is 1.
1200
1201Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1202files, since passive mode can only be set interactively for this binary
1203
1204=head2 $File::Fetch::TIMEOUT
1205
1206When set, controls the network timeout (counted in seconds).
1207
1208Default value is 0.
1209
1210=head2 $File::Fetch::WARN
1211
1212This variable controls whether errors encountered internally by
1213C<File::Fetch> should be C<carp>'d or not.
1214
1215Set to false to silence warnings. Inspect the output of the C<error()>
1216method manually to see what went wrong.
1217
1218Defaults to C<true>.
1219
1220=head2 $File::Fetch::DEBUG
1221
1222This enables debugging output when calling commandline utilities to
1223fetch files.
1224This also enables C<Carp::longmess> errors, instead of the regular
1225C<carp> errors.
1226
1227Good for tracking down why things don't work with your particular
1228setup.
1229
1230Default is 0.
1231
1232=head2 $File::Fetch::BLACKLIST
1233
1234This is an array ref holding blacklisted modules/utilities for fetching
1235files with.
1236
1237To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1238set $File::Fetch::BLACKLIST to:
1239
1240 $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1241
1242The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1243
1244See the note on C<MAPPING> below.
1245
1246=head2 $File::Fetch::METHOD_FAIL
1247
1248This is a hashref registering what modules/utilities were known to fail
1249for fetching files (mostly because they weren't installed).
1250
1251You can reset this cache by assigning an empty hashref to it, or
1252individually remove keys.
1253
1254See the note on C<MAPPING> below.
1255
1256=head1 MAPPING
1257
1258
1259Here's a quick mapping for the utilities/modules, and their names for
1260the $BLACKLIST, $METHOD_FAIL and other internal functions.
1261
1262 LWP => lwp
1263 Net::FTP => netftp
1264 wget => wget
1265 lynx => lynx
1266 ncftp => ncftp
1267 ftp => ftp
1268 curl => curl
1269 rsync => rsync
6e654618 1270 lftp => lftp
79fd8837 1271
1272=head1 FREQUENTLY ASKED QUESTIONS
1273
1274=head2 So how do I use a proxy with File::Fetch?
1275
1276C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1277You will need to set your environment variables accordingly. For
1278example, to use an ftp proxy:
1279
1280 $ENV{ftp_proxy} = 'foo.com';
1281
1282Refer to the LWP::UserAgent manpage for more details.
1283
1284=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1285
1286C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1287which we in turn capture. If that content is a 'custom' error file
1288(like, say, a C<404 handler>), you will get that contents instead.
1289
1290Sadly, C<lynx> doesn't support any options to return a different exit
1291code on non-C<200 OK> status, giving us no way to tell the difference
1292between a 'successfull' fetch and a custom error page.
1293
1294Therefor, we recommend to only use C<lynx> as a last resort. This is
1295why it is at the back of our list of methods to try as well.
1296
d4b3706f 1297=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1298
1299C<File::Fetch> is relatively smart about things. When trying to write
1300a file to disk, it removes the C<query parameters> (see the
1301C<output_file> method for details) from the file name before creating
1302it. In most cases this suffices.
1303
1304If you have any other characters you need to escape, please install
1305the C<URI::Escape> module from CPAN, and pre-encode your URI before
1306passing it to C<File::Fetch>. You can read about the details of URIs
1307and URI encoding here:
1308
1309 http://www.faqs.org/rfcs/rfc2396.html
1310
79fd8837 1311=head1 TODO
1312
1313=over 4
1314
1315=item Implement $PREFER_BIN
1316
1317To indicate to rather use commandline tools than modules
1318
a0ad4830 1319=back
1320
1321=head1 BUG REPORTS
1322
1323Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1324
1325=head1 AUTHOR
79fd8837 1326
d4b3706f 1327This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
79fd8837 1328
1329=head1 COPYRIGHT
1330
a0ad4830 1331This library is free software; you may redistribute and/or modify it
1332under the same terms as Perl itself.
79fd8837 1333
79fd8837 1334
1335=cut
1336
1337# Local variables:
1338# c-indentation-style: bsd
1339# c-basic-offset: 4
1340# indent-tabs-mode: nil
1341# End:
1342# vim: expandtab shiftwidth=4:
1343
1344
1345
1346