Integrate version.pm-0.77 into bleadperl
[p5sagit/p5-mst-13.2.git] / lib / File / Fetch.pm
CommitLineData
79fd8837 1package File::Fetch;
2
3use strict;
4use FileHandle;
6e654618 5use File::Temp;
79fd8837 6use File::Copy;
7use File::Spec;
8use File::Spec::Unix;
79fd8837 9use File::Basename qw[dirname];
10
11use Cwd qw[cwd];
12use Carp qw[carp];
6b6e6e92 13use IPC::Cmd qw[can_run run QUOTE];
79fd8837 14use File::Path qw[mkpath];
8d16e270 15use File::Temp qw[tempdir];
79fd8837 16use Params::Check qw[check];
17use Module::Load::Conditional qw[can_load];
18use Locale::Maketext::Simple Style => 'gettext';
19
20use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
21 $BLACKLIST $METHOD_FAIL $VERSION $METHODS
22 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
23 ];
24
8d16e270 25$VERSION = '0.20';
fe98d82b 26$VERSION = eval $VERSION; # avoid warnings with development releases
27$PREFER_BIN = 0; # XXX TODO implement
79fd8837 28$FROM_EMAIL = 'File-Fetch@example.com';
6b6e6e92 29$USER_AGENT = "File::Fetch/$VERSION";
79fd8837 30$BLACKLIST = [qw|ftp|];
31$METHOD_FAIL = { };
32$FTP_PASSIVE = 1;
33$TIMEOUT = 0;
34$DEBUG = 0;
35$WARN = 1;
36
37### methods available to fetch the file depending on the scheme
38$METHODS = {
6e654618 39 http => [ qw|lwp wget curl lftp lynx| ],
40 ftp => [ qw|lwp netftp wget curl lftp ncftp ftp| ],
41 file => [ qw|lwp lftp file| ],
79fd8837 42 rsync => [ qw|rsync| ]
43};
44
45### silly warnings ###
46local $Params::Check::VERBOSE = 1;
47local $Params::Check::VERBOSE = 1;
48local $Module::Load::Conditional::VERBOSE = 0;
49local $Module::Load::Conditional::VERBOSE = 0;
50
51### see what OS we are on, important for file:// uris ###
6b6e6e92 52use constant ON_WIN => ($^O eq 'MSWin32');
53use constant ON_VMS => ($^O eq 'VMS');
54use constant ON_UNIX => (!ON_WIN);
55use constant HAS_VOL => (ON_WIN);
56use constant HAS_SHARE => (ON_WIN);
57
58
79fd8837 59=pod
60
61=head1 NAME
62
63File::Fetch - A generic file fetching mechanism
64
65=head1 SYNOPSIS
66
67 use File::Fetch;
68
69 ### build a File::Fetch object ###
70 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
71
72 ### fetch the uri to cwd() ###
73 my $where = $ff->fetch() or die $ff->error;
74
75 ### fetch the uri to /tmp ###
76 my $where = $ff->fetch( to => '/tmp' );
77
78 ### parsed bits from the uri ###
79 $ff->uri;
80 $ff->scheme;
81 $ff->host;
82 $ff->path;
83 $ff->file;
84
85=head1 DESCRIPTION
86
87File::Fetch is a generic file fetching mechanism.
88
89It allows you to fetch any file pointed to by a C<ftp>, C<http>,
90C<file>, or C<rsync> uri by a number of different means.
91
92See the C<HOW IT WORKS> section further down for details.
93
d4b3706f 94=head1 ACCESSORS
95
96A C<File::Fetch> object has the following accessors
97
98=over 4
99
100=item $ff->uri
101
102The uri you passed to the constructor
103
104=item $ff->scheme
105
106The scheme from the uri (like 'file', 'http', etc)
107
108=item $ff->host
109
fe98d82b 110The hostname in the uri. Will be empty if host was originally
111'localhost' for a 'file://' url.
112
113=item $ff->vol
114
115On operating systems with the concept of a volume the second element
116of a file:// is considered to the be volume specification for the file.
1f80753b 117Thus on Win32 this routine returns the volume, on other operating
118systems this returns nothing.
fe98d82b 119
120On Windows this value may be empty if the uri is to a network share, in
121which case the 'share' property will be defined. Additionally, volume
122specifications that use '|' as ':' will be converted on read to use ':'.
123
1f80753b 124On VMS, which has a volume concept, this field will be empty because VMS
125file specifications are converted to absolute UNIX format and the volume
126information is transparently included.
127
fe98d82b 128=item $ff->share
129
130On systems with the concept of a network share (currently only Windows) returns
131the sharename from a file://// url. On other operating systems returns empty.
d4b3706f 132
133=item $ff->path
134
135The path from the uri, will be at least a single '/'.
136
137=item $ff->file
138
139The name of the remote file. For the local file name, the
140result of $ff->output_file will be used.
141
142=cut
143
144
145##########################
146### Object & Accessors ###
147##########################
148
149{
6b6e6e92 150 ### template for autogenerated accessors ###
d4b3706f 151 my $Tmpl = {
152 scheme => { default => 'http' },
153 host => { default => 'localhost' },
154 path => { default => '/' },
155 file => { required => 1 },
156 uri => { required => 1 },
1f80753b 157 vol => { default => '' }, # windows for file:// uris
fe98d82b 158 share => { default => '' }, # windows for file:// uris
d4b3706f 159 _error_msg => { no_override => 1 },
160 _error_msg_long => { no_override => 1 },
161 };
162
163 for my $method ( keys %$Tmpl ) {
164 no strict 'refs';
165 *$method = sub {
166 my $self = shift;
167 $self->{$method} = $_[0] if @_;
168 return $self->{$method};
169 }
170 }
171
172 sub _create {
173 my $class = shift;
174 my %hash = @_;
175
176 my $args = check( $Tmpl, \%hash ) or return;
177
178 bless $args, $class;
179
180 if( lc($args->scheme) ne 'file' and not $args->host ) {
181 return File::Fetch->_error(loc(
182 "Hostname required when fetching from '%1'",$args->scheme));
183 }
184
185 for (qw[path file]) {
9e5ea595 186 unless( $args->$_() ) { # 5.5.x needs the ()
d4b3706f 187 return File::Fetch->_error(loc("No '%1' specified",$_));
188 }
189 }
190
191 return $args;
192 }
193}
194
195=item $ff->output_file
196
197The name of the output file. This is the same as $ff->file,
198but any query parameters are stripped off. For example:
199
200 http://example.com/index.html?x=y
201
202would make the output file be C<index.html> rather than
203C<index.html?x=y>.
204
205=back
206
207=cut
208
209sub output_file {
210 my $self = shift;
211 my $file = $self->file;
212
213 $file =~ s/\?.*$//g;
214
215 return $file;
216}
217
218### XXX do this or just point to URI::Escape?
219# =head2 $esc_uri = $ff->escaped_uri
220#
221# =cut
222#
223# ### most of this is stolen straight from URI::escape
224# { ### Build a char->hex map
225# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
226#
227# sub escaped_uri {
228# my $self = shift;
229# my $uri = $self->uri;
230#
231# ### Default unsafe characters. RFC 2732 ^(uric - reserved)
232# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
233# $escapes{$1} || $self->_fail_hi($1)/ge;
234#
235# return $uri;
236# }
237#
238# sub _fail_hi {
239# my $self = shift;
240# my $char = shift;
241#
242# $self->_error(loc(
243# "Can't escape '%1', try using the '%2' module instead",
244# sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
245# ));
246# }
247#
248# sub output_file {
249#
250# }
251#
252#
253# }
254
79fd8837 255=head1 METHODS
256
257=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
258
259Parses the uri and creates a corresponding File::Fetch::Item object,
260that is ready to be C<fetch>ed and returns it.
261
262Returns false on failure.
263
264=cut
265
266sub new {
267 my $class = shift;
268 my %hash = @_;
269
270 my ($uri);
271 my $tmpl = {
272 uri => { required => 1, store => \$uri },
273 };
274
275 check( $tmpl, \%hash ) or return;
276
277 ### parse the uri to usable parts ###
278 my $href = __PACKAGE__->_parse_uri( $uri ) or return;
279
280 ### make it into a FFI object ###
d4b3706f 281 my $ff = File::Fetch->_create( %$href ) or return;
79fd8837 282
283
284 ### return the object ###
d4b3706f 285 return $ff;
79fd8837 286}
287
288### parses an uri to a hash structure:
289###
290### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
291###
292### becomes:
293###
294### $href = {
295### scheme => 'ftp',
296### host => 'ftp.cpan.org',
297### path => '/pub/mirror',
298### file => 'index.html'
299### };
300###
9e5ea595 301### In the case of file:// urls there maybe be additional fields
302###
5e6d05d2 303### For systems with volume specifications such as Win32 there will be
fe98d82b 304### a volume specifier provided in the 'vol' field.
305###
306### 'vol' => 'volumename'
307###
9e5ea595 308### For windows file shares there may be a 'share' key specified
309###
310### 'share' => 'sharename'
311###
fe98d82b 312### Note that the rules of what a file:// url means vary by the operating system
313### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
314### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
315### not '/foo/bar.txt'
9e5ea595 316###
fe98d82b 317### Similarly if the host interpreting the url is VMS then
318### file:///disk$user/my/notes/note12345.txt' means
1f80753b 319### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
320### if it is unix where it means /disk$user/my/notes/note12345.txt'.
321### Except for some cases in the File::Spec methods, Perl on VMS will generally
322### handle UNIX format file specifications.
fe98d82b 323###
324### This means it is impossible to serve certain file:// urls on certain systems.
325###
326### Thus are the problems with a protocol-less specification. :-(
9e5ea595 327###
328
79fd8837 329sub _parse_uri {
330 my $self = shift;
331 my $uri = shift or return;
332
333 my $href = { uri => $uri };
334
335 ### find the scheme ###
336 $uri =~ s|^(\w+)://||;
337 $href->{scheme} = $1;
338
9e5ea595 339 ### See rfc 1738 section 3.10
340 ### http://www.faqs.org/rfcs/rfc1738.html
341 ### And wikipedia for more on windows file:// urls
342 ### http://en.wikipedia.org/wiki/File://
79fd8837 343 if( $href->{scheme} eq 'file' ) {
9e5ea595 344
345 my @parts = split '/',$uri;
346
347 ### file://hostname/...
348 ### file://hostname/...
fe98d82b 349 ### normalize file://localhost with file:///
9e5ea595 350 $href->{host} = $parts[0] || '';
351
352 ### index in @parts where the path components begin;
353 my $index = 1;
9e5ea595 354
355 ### file:////hostname/sharename/blah.txt
fe98d82b 356 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
357
9e5ea595 358 $href->{host} = $parts[2] || ''; # avoid warnings
359 $href->{share} = $parts[3] || ''; # avoid warnings
360
361 $index = 4 # index after the share
9e5ea595 362
fe98d82b 363 ### file:///D|/blah.txt
364 ### file:///D:/blah.txt
fe98d82b 365 } elsif (HAS_VOL) {
366
367 ### this code comes from dmq's patch, but:
368 ### XXX if volume is empty, wouldn't that be an error? --kane
369 ### if so, our file://localhost test needs to be fixed as wel
370 $href->{vol} = $parts[1] || '';
371
372 ### correct D| style colume descriptors
373 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
374
375 $index = 2; # index after the volume
376 }
377
378 ### rebuild the path from the leftover parts;
9e5ea595 379 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
79fd8837 380
381 } else {
9e5ea595 382 ### using anything but qw() in hash slices may produce warnings
383 ### in older perls :-(
384 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
79fd8837 385 }
386
387 ### split the path into file + dir ###
388 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
389 $href->{path} = $parts[1];
390 $href->{file} = $parts[2];
391 }
392
fe98d82b 393 ### host will be empty if the target was 'localhost' and the
394 ### scheme was 'file'
395 $href->{host} = '' if ($href->{host} eq 'localhost') and
396 ($href->{scheme} eq 'file');
79fd8837 397
398 return $href;
399}
400
8d16e270 401=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
79fd8837 402
8d16e270 403Fetches the file you requested and returns the full path to the file.
404
405By default it writes to C<cwd()>, but you can override that by specifying
406the C<to> argument:
407
408 ### file fetch to /tmp, full path to the file in $where
409 $where = $ff->fetch( to => '/tmp' );
410
411 ### file slurped into $scalar, full path to the file in $where
412 ### file is downloaded to a temp directory and cleaned up at exit time
413 $where = $ff->fetch( to => \$scalar );
79fd8837 414
415Returns the full path to the downloaded file on success, and false
416on failure.
417
418=cut
419
420sub fetch {
421 my $self = shift or return;
422 my %hash = @_;
423
8d16e270 424 my $target;
79fd8837 425 my $tmpl = {
8d16e270 426 to => { default => cwd(), store => \$target },
79fd8837 427 };
428
429 check( $tmpl, \%hash ) or return;
430
8d16e270 431 my ($to, $fh);
432 ### you want us to slurp the contents
433 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
434 $to = tempdir( 'FileFetch.XXXXXX', CLEANUP => 1 );
435
436 ### plain old fetch
437 } else {
438 $to = $target;
1f80753b 439
8d16e270 440 ### On VMS force to VMS format so File::Spec will work.
441 $to = VMS::Filespec::vmspath($to) if ON_VMS;
79fd8837 442
8d16e270 443 ### create the path if it doesn't exist yet ###
444 unless( -d $to ) {
445 eval { mkpath( $to ) };
446
447 return $self->_error(loc("Could not create path '%1'",$to)) if $@;
448 }
79fd8837 449 }
450
451 ### set passive ftp if required ###
452 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
453
fe98d82b 454 ### we dont use catfile on win32 because if we are using a cygwin tool
455 ### under cmd.exe they wont understand windows style separators.
456 my $out_to = ON_WIN ? $to.'/'.$self->output_file
457 : File::Spec->catfile( $to, $self->output_file );
458
79fd8837 459 for my $method ( @{ $METHODS->{$self->scheme} } ) {
460 my $sub = '_'.$method.'_fetch';
461
462 unless( __PACKAGE__->can($sub) ) {
463 $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
464 $method));
465 next;
466 }
467
468 ### method is blacklisted ###
469 next if grep { lc $_ eq $method } @$BLACKLIST;
470
471 ### method is known to fail ###
472 next if $METHOD_FAIL->{$method};
473
d4b3706f 474 ### there's serious issues with IPC::Run and quoting of command
475 ### line arguments. using quotes in the wrong place breaks things,
476 ### and in the case of say,
477 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
478 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
479 ### it doesn't matter how you quote, it always fails.
480 local $IPC::Cmd::USE_IPC_RUN = 0;
481
482 if( my $file = $self->$sub(
a0ad4830 483 to => $out_to
d4b3706f 484 )){
79fd8837 485
486 unless( -e $file && -s _ ) {
487 $self->_error(loc("'%1' said it fetched '%2', ".
488 "but it was not created",$method,$file));
489
490 ### mark the failure ###
491 $METHOD_FAIL->{$method} = 1;
492
493 next;
494
495 } else {
496
8d16e270 497 ### slurp mode?
498 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
499
500 ### open the file
501 open my $fh, $file or do {
502 $self->_error(
503 loc("Could not open '%1': %2", $file, $!));
504 return;
505 };
506
507 ### slurp
508 $$target = do { local $/; <$fh> };
509
510 }
511
79fd8837 512 my $abs = File::Spec->rel2abs( $file );
513 return $abs;
8d16e270 514
79fd8837 515 }
516 }
517 }
518
519
520 ### if we got here, we looped over all methods, but we weren't able
521 ### to fetch it.
522 return;
523}
524
79fd8837 525########################
526### _*_fetch methods ###
527########################
528
529### LWP fetching ###
530sub _lwp_fetch {
531 my $self = shift;
532 my %hash = @_;
533
534 my ($to);
535 my $tmpl = {
536 to => { required => 1, store => \$to }
537 };
538 check( $tmpl, \%hash ) or return;
539
540 ### modules required to download with lwp ###
541 my $use_list = {
542 LWP => '0.0',
543 'LWP::UserAgent' => '0.0',
544 'HTTP::Request' => '0.0',
545 'HTTP::Status' => '0.0',
546 URI => '0.0',
547
548 };
549
550 if( can_load(modules => $use_list) ) {
551
552 ### setup the uri object
553 my $uri = URI->new( File::Spec::Unix->catfile(
554 $self->path, $self->file
555 ) );
556
557 ### special rules apply for file:// uris ###
558 $uri->scheme( $self->scheme );
559 $uri->host( $self->scheme eq 'file' ? '' : $self->host );
560 $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
561
562 ### set up the useragent object
563 my $ua = LWP::UserAgent->new();
564 $ua->timeout( $TIMEOUT ) if $TIMEOUT;
565 $ua->agent( $USER_AGENT );
566 $ua->from( $FROM_EMAIL );
567 $ua->env_proxy;
568
569 my $res = $ua->mirror($uri, $to) or return;
570
571 ### uptodate or fetched ok ###
572 if ( $res->code == 304 or $res->code == 200 ) {
573 return $to;
574
575 } else {
576 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
577 $res->code, HTTP::Status::status_message($res->code),
578 $res->status_line));
579 }
580
581 } else {
582 $METHOD_FAIL->{'lwp'} = 1;
583 return;
584 }
585}
586
587### Net::FTP fetching
588sub _netftp_fetch {
589 my $self = shift;
590 my %hash = @_;
591
592 my ($to);
593 my $tmpl = {
594 to => { required => 1, store => \$to }
595 };
596 check( $tmpl, \%hash ) or return;
597
598 ### required modules ###
599 my $use_list = { 'Net::FTP' => 0 };
600
601 if( can_load( modules => $use_list ) ) {
602
603 ### make connection ###
604 my $ftp;
605 my @options = ($self->host);
606 push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
607 unless( $ftp = Net::FTP->new( @options ) ) {
608 return $self->_error(loc("Ftp creation failed: %1",$@));
609 }
610
611 ### login ###
612 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
613 return $self->_error(loc("Could not login to '%1'",$self->host));
614 }
615
616 ### set binary mode, just in case ###
617 $ftp->binary;
618
619 ### create the remote path
620 ### remember remote paths are unix paths! [#11483]
621 my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
622
623 ### fetch the file ###
624 my $target;
625 unless( $target = $ftp->get( $remote, $to ) ) {
626 return $self->_error(loc("Could not fetch '%1' from '%2'",
627 $remote, $self->host));
628 }
629
630 ### log out ###
631 $ftp->quit;
632
633 return $target;
634
635 } else {
636 $METHOD_FAIL->{'netftp'} = 1;
637 return;
638 }
639}
640
641### /bin/wget fetch ###
642sub _wget_fetch {
643 my $self = shift;
644 my %hash = @_;
645
646 my ($to);
647 my $tmpl = {
648 to => { required => 1, store => \$to }
649 };
650 check( $tmpl, \%hash ) or return;
651
652 ### see if we have a wget binary ###
653 if( my $wget = can_run('wget') ) {
654
655 ### no verboseness, thanks ###
656 my $cmd = [ $wget, '--quiet' ];
657
658 ### if a timeout is set, add it ###
659 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
660
661 ### run passive if specified ###
662 push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
663
664 ### set the output document, add the uri ###
6e654618 665 push @$cmd, '--output-document', $to, $self->uri;
666
667 ### with IPC::Cmd > 0.41, this is fixed in teh library,
668 ### and there's no need for special casing any more.
669 ### DO NOT quote things for IPC::Run, it breaks stuff.
670 # $IPC::Cmd::USE_IPC_RUN
671 # ? ($to, $self->uri)
672 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837 673
674 ### shell out ###
675 my $captured;
d4b3706f 676 unless(run( command => $cmd,
677 buffer => \$captured,
678 verbose => $DEBUG
679 )) {
79fd8837 680 ### wget creates the output document always, even if the fetch
681 ### fails.. so unlink it in that case
682 1 while unlink $to;
683
684 return $self->_error(loc( "Command failed: %1", $captured || '' ));
685 }
686
687 return $to;
688
689 } else {
690 $METHOD_FAIL->{'wget'} = 1;
691 return;
692 }
693}
694
6e654618 695### /bin/lftp fetch ###
696sub _lftp_fetch {
697 my $self = shift;
698 my %hash = @_;
699
700 my ($to);
701 my $tmpl = {
702 to => { required => 1, store => \$to }
703 };
704 check( $tmpl, \%hash ) or return;
705
706 ### see if we have a wget binary ###
707 if( my $lftp = can_run('lftp') ) {
708
709 ### no verboseness, thanks ###
710 my $cmd = [ $lftp, '-f' ];
711
712 my $fh = File::Temp->new;
713
714 my $str;
715
716 ### if a timeout is set, add it ###
717 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
718
719 ### run passive if specified ###
720 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
721
722 ### set the output document, add the uri ###
723 ### quote the URI, because lftp supports certain shell
724 ### expansions, most notably & for backgrounding.
725 ### ' quote does nto work, must be "
726 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
727
728 if( $DEBUG ) {
729 my $pp_str = join ' ', split $/, $str;
730 print "# lftp command: $pp_str\n";
731 }
732
733 ### write straight to the file.
734 $fh->autoflush(1);
735 print $fh $str;
736
737 ### the command needs to be 1 string to be executed
738 push @$cmd, $fh->filename;
739
740 ### with IPC::Cmd > 0.41, this is fixed in teh library,
741 ### and there's no need for special casing any more.
742 ### DO NOT quote things for IPC::Run, it breaks stuff.
743 # $IPC::Cmd::USE_IPC_RUN
744 # ? ($to, $self->uri)
745 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
746
747
748 ### shell out ###
749 my $captured;
750 unless(run( command => $cmd,
751 buffer => \$captured,
752 verbose => $DEBUG
753 )) {
754 ### wget creates the output document always, even if the fetch
755 ### fails.. so unlink it in that case
756 1 while unlink $to;
757
758 return $self->_error(loc( "Command failed: %1", $captured || '' ));
759 }
760
761 return $to;
762
763 } else {
764 $METHOD_FAIL->{'lftp'} = 1;
765 return;
766 }
767}
768
769
79fd8837 770
771### /bin/ftp fetch ###
772sub _ftp_fetch {
773 my $self = shift;
774 my %hash = @_;
775
776 my ($to);
777 my $tmpl = {
778 to => { required => 1, store => \$to }
779 };
780 check( $tmpl, \%hash ) or return;
781
d4b3706f 782 ### see if we have a ftp binary ###
79fd8837 783 if( my $ftp = can_run('ftp') ) {
784
785 my $fh = FileHandle->new;
786
787 local $SIG{CHLD} = 'IGNORE';
788
789 unless ($fh->open("|$ftp -n")) {
790 return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
791 }
792
793 my @dialog = (
794 "lcd " . dirname($to),
795 "open " . $self->host,
796 "user anonymous $FROM_EMAIL",
797 "cd /",
798 "cd " . $self->path,
799 "binary",
d4b3706f 800 "get " . $self->file . " " . $self->output_file,
79fd8837 801 "quit",
802 );
803
804 foreach (@dialog) { $fh->print($_, "\n") }
805 $fh->close or return;
806
807 return $to;
808 }
809}
810
811### lynx is stupid - it decompresses any .gz file it finds to be text
812### use /bin/lynx to fetch files
813sub _lynx_fetch {
814 my $self = shift;
815 my %hash = @_;
816
817 my ($to);
818 my $tmpl = {
819 to => { required => 1, store => \$to }
820 };
821 check( $tmpl, \%hash ) or return;
822
d4b3706f 823 ### see if we have a lynx binary ###
79fd8837 824 if( my $lynx = can_run('lynx') ) {
825
d4b3706f 826 unless( IPC::Cmd->can_capture_buffer ) {
827 $METHOD_FAIL->{'lynx'} = 1;
828
829 return $self->_error(loc(
830 "Can not capture buffers. Can not use '%1' to fetch files",
831 'lynx' ));
832 }
79fd8837 833
6e654618 834 ### check if the HTTP resource exists ###
835 if ($self->uri =~ /^https?:\/\//i) {
836 my $cmd = [
837 $lynx,
838 '-head',
839 '-source',
840 "-auth=anonymous:$FROM_EMAIL",
841 ];
842
843 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
844
845 push @$cmd, $self->uri;
846
847 ### shell out ###
848 my $head;
849 unless(run( command => $cmd,
850 buffer => \$head,
851 verbose => $DEBUG )
852 ) {
853 return $self->_error(loc("Command failed: %1", $head || ''));
854 }
855
856 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
857 return $self->_error(loc("Command failed: %1", $head || ''));
858 }
859 }
860
79fd8837 861 ### write to the output file ourselves, since lynx ass_u_mes to much
862 my $local = FileHandle->new(">$to")
863 or return $self->_error(loc(
864 "Could not open '%1' for writing: %2",$to,$!));
865
866 ### dump to stdout ###
867 my $cmd = [
868 $lynx,
869 '-source',
870 "-auth=anonymous:$FROM_EMAIL",
871 ];
872
873 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
874
d4b3706f 875 ### DO NOT quote things for IPC::Run, it breaks stuff.
6e654618 876 push @$cmd, $self->uri;
877
878 ### with IPC::Cmd > 0.41, this is fixed in teh library,
879 ### and there's no need for special casing any more.
880 ### DO NOT quote things for IPC::Run, it breaks stuff.
881 # $IPC::Cmd::USE_IPC_RUN
882 # ? $self->uri
883 # : QUOTE. $self->uri .QUOTE;
d4b3706f 884
79fd8837 885
886 ### shell out ###
887 my $captured;
888 unless(run( command => $cmd,
889 buffer => \$captured,
890 verbose => $DEBUG )
891 ) {
892 return $self->_error(loc("Command failed: %1", $captured || ''));
893 }
894
895 ### print to local file ###
896 ### XXX on a 404 with a special error page, $captured will actually
897 ### hold the contents of that page, and make it *appear* like the
898 ### request was a success, when really it wasn't :(
899 ### there doesn't seem to be an option for lynx to change the exit
900 ### code based on a 4XX status or so.
901 ### the closest we can come is using --error_file and parsing that,
902 ### which is very unreliable ;(
903 $local->print( $captured );
904 $local->close or return;
905
906 return $to;
907
908 } else {
909 $METHOD_FAIL->{'lynx'} = 1;
910 return;
911 }
912}
913
914### use /bin/ncftp to fetch files
915sub _ncftp_fetch {
916 my $self = shift;
917 my %hash = @_;
918
919 my ($to);
920 my $tmpl = {
921 to => { required => 1, store => \$to }
922 };
923 check( $tmpl, \%hash ) or return;
924
925 ### we can only set passive mode in interactive sesssions, so bail out
926 ### if $FTP_PASSIVE is set
927 return if $FTP_PASSIVE;
928
d4b3706f 929 ### see if we have a ncftp binary ###
79fd8837 930 if( my $ncftp = can_run('ncftp') ) {
931
932 my $cmd = [
933 $ncftp,
934 '-V', # do not be verbose
935 '-p', $FROM_EMAIL, # email as password
936 $self->host, # hostname
937 dirname($to), # local dir for the file
938 # remote path to the file
d4b3706f 939 ### DO NOT quote things for IPC::Run, it breaks stuff.
940 $IPC::Cmd::USE_IPC_RUN
941 ? File::Spec::Unix->catdir( $self->path, $self->file )
942 : QUOTE. File::Spec::Unix->catdir(
943 $self->path, $self->file ) .QUOTE
944
79fd8837 945 ];
946
947 ### shell out ###
948 my $captured;
949 unless(run( command => $cmd,
950 buffer => \$captured,
951 verbose => $DEBUG )
952 ) {
953 return $self->_error(loc("Command failed: %1", $captured || ''));
954 }
955
956 return $to;
957
958 } else {
959 $METHOD_FAIL->{'ncftp'} = 1;
960 return;
961 }
962}
963
964### use /bin/curl to fetch files
965sub _curl_fetch {
966 my $self = shift;
967 my %hash = @_;
968
969 my ($to);
970 my $tmpl = {
971 to => { required => 1, store => \$to }
972 };
973 check( $tmpl, \%hash ) or return;
974
975 if (my $curl = can_run('curl')) {
976
977 ### these long opts are self explanatory - I like that -jmb
6e654618 978 my $cmd = [ $curl, '-q' ];
79fd8837 979
980 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
981
982 push(@$cmd, '--silent') unless $DEBUG;
983
984 ### curl does the right thing with passive, regardless ###
985 if ($self->scheme eq 'ftp') {
986 push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
987 }
988
989 ### curl doesn't follow 302 (temporarily moved) etc automatically
990 ### so we add --location to enable that.
6e654618 991 push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
992
993 ### with IPC::Cmd > 0.41, this is fixed in teh library,
994 ### and there's no need for special casing any more.
995 ### DO NOT quote things for IPC::Run, it breaks stuff.
996 # $IPC::Cmd::USE_IPC_RUN
997 # ? ($to, $self->uri)
998 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
999
79fd8837 1000
1001 my $captured;
1002 unless(run( command => $cmd,
1003 buffer => \$captured,
1004 verbose => $DEBUG )
1005 ) {
1006
1007 return $self->_error(loc("Command failed: %1", $captured || ''));
1008 }
1009
1010 return $to;
1011
1012 } else {
1013 $METHOD_FAIL->{'curl'} = 1;
1014 return;
1015 }
1016}
1017
1018
1019### use File::Copy for fetching file:// urls ###
9e5ea595 1020###
1021### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
1022### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
fe98d82b 1023###
9e5ea595 1024
79fd8837 1025sub _file_fetch {
1026 my $self = shift;
1027 my %hash = @_;
1028
1029 my ($to);
1030 my $tmpl = {
1031 to => { required => 1, store => \$to }
1032 };
1033 check( $tmpl, \%hash ) or return;
1034
9e5ea595 1035
1036
79fd8837 1037 ### prefix a / on unix systems with a file uri, since it would
1038 ### look somewhat like this:
9e5ea595 1039 ### file:///home/kane/file
1040 ### wheras windows file uris for 'c:\some\dir\file' might look like:
1041 ### file:///C:/some/dir/file
1042 ### file:///C|/some/dir/file
1043 ### or for a network share '\\host\share\some\dir\file':
1044 ### file:////host/share/some/dir/file
1045 ###
1046 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1047 ### file://vms.host.edu/disk$user/my/notes/note12345.txt
1048 ###
1049
1050 my $path = $self->path;
1051 my $vol = $self->vol;
1052 my $share = $self->share;
1053
1054 my $remote;
1055 if (!$share and $self->host) {
1056 return $self->_error(loc(
1057 "Currently %1 cannot handle hosts in %2 urls",
1058 'File::Fetch', 'file://'
1059 ));
1060 }
1061
1062 if( $vol ) {
1063 $path = File::Spec->catdir( split /\//, $path );
1064 $remote = File::Spec->catpath( $vol, $path, $self->file);
79fd8837 1065
9e5ea595 1066 } elsif( $share ) {
1067 ### win32 specific, and a share name, so we wont bother with File::Spec
1068 $path =~ s|/+|\\|g;
1069 $remote = "\\\\".$self->host."\\$share\\$path";
1070
1071 } else {
5e6d05d2 1072 ### File::Spec on VMS can not currently handle UNIX syntax.
1073 my $file_class = ON_VMS
1074 ? 'File::Spec::Unix'
1075 : 'File::Spec';
1076
1077 $remote = $file_class->catfile( $path, $self->file );
9e5ea595 1078 }
79fd8837 1079
1080 ### File::Copy is littered with 'die' statements :( ###
1081 my $rv = eval { File::Copy::copy( $remote, $to ) };
1082
1083 ### something went wrong ###
1084 if( !$rv or $@ ) {
1085 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1086 $remote, $to, $!, $@));
1087 }
1088
1089 return $to;
1090}
1091
1092### use /usr/bin/rsync to fetch files
1093sub _rsync_fetch {
1094 my $self = shift;
1095 my %hash = @_;
1096
1097 my ($to);
1098 my $tmpl = {
1099 to => { required => 1, store => \$to }
1100 };
1101 check( $tmpl, \%hash ) or return;
1102
1103 if (my $rsync = can_run('rsync')) {
1104
1105 my $cmd = [ $rsync ];
1106
1107 ### XXX: rsync has no I/O timeouts at all, by default
1108 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1109
1110 push(@$cmd, '--quiet') unless $DEBUG;
1111
d4b3706f 1112 ### DO NOT quote things for IPC::Run, it breaks stuff.
6e654618 1113 push @$cmd, $self->uri, $to;
1114
1115 ### with IPC::Cmd > 0.41, this is fixed in teh library,
1116 ### and there's no need for special casing any more.
1117 ### DO NOT quote things for IPC::Run, it breaks stuff.
1118 # $IPC::Cmd::USE_IPC_RUN
1119 # ? ($to, $self->uri)
1120 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837 1121
1122 my $captured;
1123 unless(run( command => $cmd,
1124 buffer => \$captured,
1125 verbose => $DEBUG )
1126 ) {
1127
fe98d82b 1128 return $self->_error(loc("Command %1 failed: %2",
1129 "@$cmd" || '', $captured || ''));
79fd8837 1130 }
1131
1132 return $to;
1133
1134 } else {
1135 $METHOD_FAIL->{'rsync'} = 1;
1136 return;
1137 }
1138}
1139
1140#################################
1141#
1142# Error code
1143#
1144#################################
1145
1146=pod
1147
1148=head2 $ff->error([BOOL])
1149
1150Returns the last encountered error as string.
1151Pass it a true value to get the C<Carp::longmess()> output instead.
1152
1153=cut
1154
d4b3706f 1155### error handling the way Archive::Extract does it
1156sub _error {
1157 my $self = shift;
1158 my $error = shift;
1159
1160 $self->_error_msg( $error );
1161 $self->_error_msg_long( Carp::longmess($error) );
1162
1163 if( $WARN ) {
1164 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
79fd8837 1165 }
1166
d4b3706f 1167 return;
79fd8837 1168}
1169
d4b3706f 1170sub error {
1171 my $self = shift;
1172 return shift() ? $self->_error_msg_long : $self->_error_msg;
1173}
79fd8837 1174
1175
11761;
1177
1178=pod
1179
1180=head1 HOW IT WORKS
1181
1182File::Fetch is able to fetch a variety of uris, by using several
1183external programs and modules.
1184
1185Below is a mapping of what utilities will be used in what order
1186for what schemes, if available:
1187
6e654618 1188 file => LWP, lftp, file
1189 http => LWP, wget, curl, lftp, lynx
1190 ftp => LWP, Net::FTP, wget, curl, lftp, ncftp, ftp
79fd8837 1191 rsync => rsync
1192
1193If you'd like to disable the use of one or more of these utilities
1194and/or modules, see the C<$BLACKLIST> variable further down.
1195
1196If a utility or module isn't available, it will be marked in a cache
1197(see the C<$METHOD_FAIL> variable further down), so it will not be
1198tried again. The C<fetch> method will only fail when all options are
1199exhausted, and it was not able to retrieve the file.
1200
1201A special note about fetching files from an ftp uri:
1202
1203By default, all ftp connections are done in passive mode. To change
1204that, see the C<$FTP_PASSIVE> variable further down.
1205
1206Furthermore, ftp uris only support anonymous connections, so no
1207named user/password pair can be passed along.
1208
1209C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1210further down.
1211
1212=head1 GLOBAL VARIABLES
1213
1214The behaviour of File::Fetch can be altered by changing the following
1215global variables:
1216
1217=head2 $File::Fetch::FROM_EMAIL
1218
1219This is the email address that will be sent as your anonymous ftp
1220password.
1221
1222Default is C<File-Fetch@example.com>.
1223
1224=head2 $File::Fetch::USER_AGENT
1225
1226This is the useragent as C<LWP> will report it.
1227
1228Default is C<File::Fetch/$VERSION>.
1229
1230=head2 $File::Fetch::FTP_PASSIVE
1231
1232This variable controls whether the environment variable C<FTP_PASSIVE>
1233and any passive switches to commandline tools will be set to true.
1234
1235Default value is 1.
1236
1237Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1238files, since passive mode can only be set interactively for this binary
1239
1240=head2 $File::Fetch::TIMEOUT
1241
1242When set, controls the network timeout (counted in seconds).
1243
1244Default value is 0.
1245
1246=head2 $File::Fetch::WARN
1247
1248This variable controls whether errors encountered internally by
1249C<File::Fetch> should be C<carp>'d or not.
1250
1251Set to false to silence warnings. Inspect the output of the C<error()>
1252method manually to see what went wrong.
1253
1254Defaults to C<true>.
1255
1256=head2 $File::Fetch::DEBUG
1257
1258This enables debugging output when calling commandline utilities to
1259fetch files.
1260This also enables C<Carp::longmess> errors, instead of the regular
1261C<carp> errors.
1262
1263Good for tracking down why things don't work with your particular
1264setup.
1265
1266Default is 0.
1267
1268=head2 $File::Fetch::BLACKLIST
1269
1270This is an array ref holding blacklisted modules/utilities for fetching
1271files with.
1272
1273To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1274set $File::Fetch::BLACKLIST to:
1275
1276 $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1277
1278The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1279
1280See the note on C<MAPPING> below.
1281
1282=head2 $File::Fetch::METHOD_FAIL
1283
1284This is a hashref registering what modules/utilities were known to fail
1285for fetching files (mostly because they weren't installed).
1286
1287You can reset this cache by assigning an empty hashref to it, or
1288individually remove keys.
1289
1290See the note on C<MAPPING> below.
1291
1292=head1 MAPPING
1293
1294
1295Here's a quick mapping for the utilities/modules, and their names for
1296the $BLACKLIST, $METHOD_FAIL and other internal functions.
1297
1298 LWP => lwp
1299 Net::FTP => netftp
1300 wget => wget
1301 lynx => lynx
1302 ncftp => ncftp
1303 ftp => ftp
1304 curl => curl
1305 rsync => rsync
6e654618 1306 lftp => lftp
79fd8837 1307
1308=head1 FREQUENTLY ASKED QUESTIONS
1309
1310=head2 So how do I use a proxy with File::Fetch?
1311
1312C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1313You will need to set your environment variables accordingly. For
1314example, to use an ftp proxy:
1315
1316 $ENV{ftp_proxy} = 'foo.com';
1317
1318Refer to the LWP::UserAgent manpage for more details.
1319
1320=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1321
1322C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1323which we in turn capture. If that content is a 'custom' error file
1324(like, say, a C<404 handler>), you will get that contents instead.
1325
1326Sadly, C<lynx> doesn't support any options to return a different exit
1327code on non-C<200 OK> status, giving us no way to tell the difference
1328between a 'successfull' fetch and a custom error page.
1329
1330Therefor, we recommend to only use C<lynx> as a last resort. This is
1331why it is at the back of our list of methods to try as well.
1332
d4b3706f 1333=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1334
1335C<File::Fetch> is relatively smart about things. When trying to write
1336a file to disk, it removes the C<query parameters> (see the
1337C<output_file> method for details) from the file name before creating
1338it. In most cases this suffices.
1339
1340If you have any other characters you need to escape, please install
1341the C<URI::Escape> module from CPAN, and pre-encode your URI before
1342passing it to C<File::Fetch>. You can read about the details of URIs
1343and URI encoding here:
1344
1345 http://www.faqs.org/rfcs/rfc2396.html
1346
79fd8837 1347=head1 TODO
1348
1349=over 4
1350
1351=item Implement $PREFER_BIN
1352
1353To indicate to rather use commandline tools than modules
1354
a0ad4830 1355=back
1356
1357=head1 BUG REPORTS
1358
1359Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1360
1361=head1 AUTHOR
79fd8837 1362
d4b3706f 1363This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
79fd8837 1364
1365=head1 COPYRIGHT
1366
a0ad4830 1367This library is free software; you may redistribute and/or modify it
1368under the same terms as Perl itself.
79fd8837 1369
79fd8837 1370
1371=cut
1372
1373# Local variables:
1374# c-indentation-style: bsd
1375# c-basic-offset: 4
1376# indent-tabs-mode: nil
1377# End:
1378# vim: expandtab shiftwidth=4:
1379
1380
1381
1382