Commit | Line | Data |
6aaee015 |
1 | package CPANPLUS::Internals::Fetch; |
2 | |
3 | use strict; |
4 | |
5 | use CPANPLUS::Error; |
6 | use CPANPLUS::Internals::Constants; |
7 | |
8 | use File::Fetch; |
9 | use File::Spec; |
10 | use Cwd qw[cwd]; |
11 | use IPC::Cmd qw[run]; |
12 | use Params::Check qw[check]; |
13 | use Module::Load::Conditional qw[can_load]; |
14 | use Locale::Maketext::Simple Class => 'CPANPLUS', Style => 'gettext'; |
15 | |
16 | $Params::Check::VERBOSE = 1; |
17 | |
18 | =pod |
19 | |
20 | =head1 NAME |
21 | |
22 | CPANPLUS::Internals::Fetch |
23 | |
24 | =head1 SYNOPSIS |
25 | |
26 | my $output = $cb->_fetch( |
27 | module => $modobj, |
28 | fetchdir => '/path/to/save/to', |
29 | verbose => BOOL, |
30 | force => BOOL, |
31 | ); |
32 | |
33 | $cb->_add_fail_host( host => 'foo.com' ); |
34 | $cb->_host_ok( host => 'foo.com' ); |
35 | |
36 | |
37 | =head1 DESCRIPTION |
38 | |
39 | CPANPLUS::Internals::Fetch fetches files from either ftp, http, file |
40 | or rsync mirrors. |
41 | |
42 | This is the rough flow: |
43 | |
44 | $cb->_fetch |
45 | Delegate to File::Fetch; |
46 | |
47 | |
48 | =head1 METHODS |
49 | |
50 | =cut |
51 | |
4443dd53 |
52 | =head1 $path = _fetch( module => $modobj, [fetchdir => '/path/to/save/to', fetch_from => 'scheme://path/to/fetch/from', verbose => BOOL, force => BOOL, prefer_bin => BOOL, ttl => $seconds] ) |
6aaee015 |
53 | |
54 | C<_fetch> will fetch files based on the information in a module |
55 | object. You always need a module object. If you want a fake module |
56 | object for a one-off fetch, look at C<CPANPLUS::Module::Fake>. |
57 | |
58 | C<fetchdir> is the place to save the file to. Usually this |
59 | information comes from your configuration, but you can override it |
60 | expressly if needed. |
61 | |
62 | C<fetch_from> lets you specify an URI to get this file from. If you |
63 | do not specify one, your list of configured hosts will be probed to |
64 | download the file from. |
65 | |
66 | C<force> forces a new download, even if the file already exists. |
67 | |
68 | C<verbose> simply indicates whether or not to print extra messages. |
69 | |
70 | C<prefer_bin> indicates whether you prefer the use of commandline |
71 | programs over perl modules. Defaults to your corresponding config |
72 | setting. |
73 | |
4443dd53 |
74 | C<ttl> (in seconds) indicates how long a cached copy is valid for. If |
75 | the fetch time of the local copy is within the ttl, the cached copy is |
76 | returned. Otherwise, the file is refetched. |
77 | |
6aaee015 |
78 | C<_fetch> figures out, based on the host list, what scheme to use and |
79 | from there, delegates to C<File::Fetch> do the actual fetching. |
80 | |
81 | Returns the path of the output file on success, false on failure. |
82 | |
83 | Note that you can set a C<blacklist> on certain methods in the config. |
84 | Simply add the identifying name of the method (ie, C<lwp>) to: |
85 | $conf->_set_fetch( blacklist => ['lwp'] ); |
86 | |
87 | And the C<LWP> function will be skipped by C<File::Fetch>. |
88 | |
89 | =cut |
90 | |
91 | sub _fetch { |
92 | my $self = shift; |
93 | my $conf = $self->configure_object; |
94 | my %hash = @_; |
95 | |
96 | local $Params::Check::NO_DUPLICATES = 0; |
97 | |
4443dd53 |
98 | my ($modobj, $verbose, $force, $fetch_from, $ttl); |
6aaee015 |
99 | my $tmpl = { |
100 | module => { required => 1, allow => IS_MODOBJ, store => \$modobj }, |
101 | fetchdir => { default => $conf->get_conf('fetchdir') }, |
102 | fetch_from => { default => '', store => \$fetch_from }, |
103 | force => { default => $conf->get_conf('force'), |
104 | store => \$force }, |
105 | verbose => { default => $conf->get_conf('verbose'), |
106 | store => \$verbose }, |
107 | prefer_bin => { default => $conf->get_conf('prefer_bin') }, |
4443dd53 |
108 | ttl => { default => 0, store => \$ttl }, |
6aaee015 |
109 | }; |
110 | |
111 | |
112 | my $args = check( $tmpl, \%hash ) or return; |
113 | |
114 | ### check if we already downloaded the thing ### |
4443dd53 |
115 | if( (my $where = $modobj->status->fetch()) and not $force and not $ttl ) { |
116 | |
6aaee015 |
117 | msg(loc("Already fetched '%1' to '%2', " . |
118 | "won't fetch again without force", |
119 | $modobj->module, $where ), $verbose ); |
120 | return $where; |
121 | } |
122 | |
123 | my ($remote_file, $local_file, $local_path); |
124 | |
125 | ### build the local path to downlaod to ### |
126 | { |
127 | $local_path = $args->{fetchdir} || |
128 | File::Spec->catdir( |
129 | $conf->get_conf('base'), |
130 | $modobj->path, |
131 | ); |
132 | |
133 | ### create the path if it doesn't exist ### |
134 | unless( -d $local_path ) { |
135 | unless( $self->_mkdir( dir => $local_path ) ) { |
136 | msg( loc("Could not create path '%1'", $local_path), $verbose); |
137 | return; |
138 | } |
139 | } |
140 | |
141 | $local_file = File::Spec->rel2abs( |
142 | File::Spec->catfile( |
143 | $local_path, |
144 | $modobj->package, |
145 | ) |
146 | ); |
6aaee015 |
147 | |
4443dd53 |
148 | ### do we already have the file? if so, can we use the cached version, |
149 | ### or do we need to refetch? |
150 | if( -e $local_file ) { |
151 | |
152 | my $unlink = 0; |
153 | my $use_cached = 0; |
154 | |
155 | ### if force is in effect, we have to refetch |
156 | if( $force ) { |
157 | $unlink++ |
158 | |
159 | ### if you provided a ttl, and it was exceeded, we'll refetch, |
160 | } elsif( $ttl and ([stat $local_file]->[9] + $ttl > time) ) { |
161 | msg(loc("Using cached file '%1' on disk; ". |
162 | "ttl (%2s) is not exceeded", |
163 | $local_file, $ttl), $verbose ); |
164 | |
165 | $use_cached++; |
166 | |
167 | ### if you provided a ttl, and the above conditional didn't match, |
168 | ### we exceeded the ttl, so we refetch |
169 | } elsif ( $ttl ) { |
170 | $unlink++; |
171 | |
172 | ### otherwise we can use the cached version |
173 | } else { |
174 | $use_cached++; |
175 | } |
176 | |
177 | if( $unlink ) { |
178 | ### some fetches will fail if the files exist already, so let's |
179 | ### delete them first |
180 | 1 while unlink $local_file; |
181 | |
182 | msg(loc("Could not delete %1, some methods may " . |
183 | "fail to force a download", $local_file), $verbose) |
184 | if -e $local_file; |
185 | |
186 | } else { |
187 | |
188 | ### store where we fetched it ### |
189 | $modobj->status->fetch( $local_file ); |
190 | |
191 | return $local_file; |
192 | } |
6aaee015 |
193 | } |
194 | } |
195 | |
196 | |
197 | ### we got a custom URI |
198 | if ( $fetch_from ) { |
199 | my $abs = $self->__file_fetch( from => $fetch_from, |
200 | to => $local_path, |
201 | verbose => $verbose ); |
202 | |
203 | unless( $abs ) { |
204 | error(loc("Unable to download '%1'", $fetch_from)); |
205 | return; |
206 | } |
207 | |
208 | ### store where we fetched it ### |
209 | $modobj->status->fetch( $abs ); |
210 | |
211 | return $abs; |
212 | |
213 | ### we will get it from one of our mirrors |
214 | } else { |
215 | ### build the remote path to download from ### |
216 | { $remote_file = File::Spec::Unix->catfile( |
217 | $modobj->path, |
218 | $modobj->package, |
219 | ); |
220 | unless( $remote_file ) { |
221 | error( loc('No remote file given for download') ); |
222 | return; |
223 | } |
224 | } |
225 | |
226 | ### see if we even have a host or a method to use to download with ### |
227 | my $found_host; |
228 | my @maybe_bad_host; |
229 | |
230 | HOST: { |
231 | ### F*CKING PIECE OF F*CKING p4 SHIT makes |
232 | ### '$File :: Fetch::SOME_VAR' |
233 | ### into a meta variable and starts substituting the file name... |
234 | ### GRAAAAAAAAAAAAAAAAAAAAAAH! |
235 | ### use ' to combat it! |
236 | |
237 | ### set up some flags for File::Fetch ### |
238 | local $File'Fetch::BLACKLIST = $conf->_get_fetch('blacklist'); |
239 | local $File'Fetch::TIMEOUT = $conf->get_conf('timeout'); |
240 | local $File'Fetch::DEBUG = $conf->get_conf('debug'); |
241 | local $File'Fetch::FTP_PASSIVE = $conf->get_conf('passive'); |
242 | local $File'Fetch::FROM_EMAIL = $conf->get_conf('email'); |
243 | local $File'Fetch::PREFER_BIN = $conf->get_conf('prefer_bin'); |
244 | local $File'Fetch::WARN = $verbose; |
245 | |
246 | |
247 | ### loop over all hosts we have ### |
248 | for my $host ( @{$conf->get_conf('hosts')} ) { |
249 | $found_host++; |
250 | |
6aaee015 |
251 | my $where; |
5879cbe1 |
252 | |
253 | ### file:// uris are special and need parsing |
254 | if( $host->{'scheme'} eq 'file' ) { |
255 | |
256 | ### the full path in the native format of the OS |
257 | my $host_spec = |
258 | File::Spec->file_name_is_absolute( $host->{'path'} ) |
259 | ? $host->{'path'} |
260 | : File::Spec->rel2abs( $host->{'path'} ); |
261 | |
262 | ### there might be volumes involved on vms/win32 |
263 | if( ON_WIN32 or ON_VMS ) { |
264 | |
265 | ### now extract the volume in order to be Win32 and |
266 | ### VMS friendly. |
267 | ### 'no_file' indicates that there's no file part |
268 | ### of this path, so we only get 2 bits returned. |
269 | my ($vol, $host_path) = File::Spec->splitpath( |
270 | $host_spec, 'no_file' |
271 | ); |
272 | |
273 | ### and split up the directories |
274 | my @host_dirs = File::Spec->splitdir( $host_path ); |
275 | |
276 | ### if we got a volume we pretend its a directory for |
277 | ### the sake of the file:// url |
278 | if( defined $vol and $vol ) { |
279 | |
280 | ### D:\foo\bar needs to be encoded as D|\foo\bar |
281 | ### For details, see the following link: |
282 | ### http://en.wikipedia.org/wiki/File:// |
283 | ### The RFC doesnt seem to address Windows volume |
284 | ### descriptors but it does address VMS volume |
285 | ### descriptors, however wikipedia covers a bit of |
286 | ### history regarding win32 |
287 | $vol =~ s/:$/|/ if ON_WIN32; |
0fe18d46 |
288 | |
ae592ab1 |
289 | $vol =~ s/:// if ON_VMS; |
0fe18d46 |
290 | |
5879cbe1 |
291 | ### XXX i'm not sure what cases this is addressing. |
292 | ### this comes straight from dmq's file:// patches |
293 | ### for win32. --kane |
0fe18d46 |
294 | ### According to dmq, the best summary is: |
295 | ### "if file:// urls dont look right on VMS reuse |
296 | ### the win32 logic and see if that fixes things" |
297 | |
298 | ### first element not empty? Might happen on VMS. |
299 | ### prepend the volume in that case. |
5879cbe1 |
300 | if( $host_dirs[0] ) { |
301 | unshift @host_dirs, $vol; |
0fe18d46 |
302 | |
303 | ### element empty? reuse it to store the volume |
304 | ### encoded as a directory name. (Win32/VMS) |
5879cbe1 |
305 | } else { |
306 | $host_dirs[0] = $vol; |
307 | } |
308 | } |
309 | |
310 | ### now it's in UNIX format, which is the same format |
311 | ### as used for URIs |
312 | $host_spec = File::Spec::Unix->catdir( @host_dirs ); |
313 | } |
314 | |
315 | ### now create the file:// uri from the components |
6aaee015 |
316 | $where = CREATE_FILE_URI->( |
5879cbe1 |
317 | File::Spec::Unix->catfile( |
318 | $host->{'host'} || '', |
319 | $host_spec, |
320 | $remote_file, |
321 | ) |
322 | ); |
323 | |
324 | ### its components will be in unix format, for a http://, |
325 | ### ftp:// or any other style of URI |
326 | } else { |
327 | my $mirror_path = File::Spec::Unix->catfile( |
328 | $host->{'path'}, $remote_file |
329 | ); |
330 | |
6aaee015 |
331 | my %args = ( scheme => $host->{scheme}, |
332 | host => $host->{host}, |
333 | path => $mirror_path, |
334 | ); |
335 | |
336 | $where = $self->_host_to_uri( %args ); |
337 | } |
338 | |
339 | my $abs = $self->__file_fetch( from => $where, |
340 | to => $local_path, |
341 | verbose => $verbose ); |
342 | |
343 | ### we got a path back? |
344 | if( $abs ) { |
345 | ### store where we fetched it ### |
346 | $modobj->status->fetch( $abs ); |
347 | |
348 | ### this host is good, the previous ones are apparently |
349 | ### not, so mark them as such. |
350 | $self->_add_fail_host( host => $_ ) for @maybe_bad_host; |
351 | |
352 | return $abs; |
353 | } |
354 | |
355 | ### so we tried to get the file but didn't actually fetch it -- |
356 | ### there's a chance this host is bad. mark it as such and |
357 | ### actually flag it back if we manage to get the file |
358 | ### somewhere else |
359 | push @maybe_bad_host, $host; |
360 | } |
361 | } |
362 | |
363 | $found_host |
364 | ? error(loc("Fetch failed: host list exhausted " . |
365 | "-- are you connected today?")) |
366 | : error(loc("No hosts found to download from " . |
367 | "-- check your config")); |
368 | } |
369 | |
370 | return; |
371 | } |
372 | |
373 | sub __file_fetch { |
374 | my $self = shift; |
375 | my $conf = $self->configure_object; |
376 | my %hash = @_; |
377 | |
378 | my ($where, $local_path, $verbose); |
379 | my $tmpl = { |
380 | from => { required => 1, store => \$where }, |
381 | to => { required => 1, store => \$local_path }, |
382 | verbose => { default => $conf->get_conf('verbose'), |
383 | store => \$verbose }, |
384 | }; |
385 | |
386 | check( $tmpl, \%hash ) or return; |
387 | |
388 | msg(loc("Trying to get '%1'", $where ), $verbose ); |
389 | |
390 | ### build the object ### |
391 | my $ff = File::Fetch->new( uri => $where ); |
392 | |
393 | ### sanity check ### |
394 | error(loc("Bad uri '%1'",$where)), return unless $ff; |
395 | |
396 | if( my $file = $ff->fetch( to => $local_path ) ) { |
397 | unless( -e $file && -s _ ) { |
398 | msg(loc("'%1' said it fetched '%2', but it was not created", |
399 | 'File::Fetch', $file), $verbose); |
400 | |
401 | } else { |
402 | my $abs = File::Spec->rel2abs( $file ); |
4443dd53 |
403 | |
404 | ### so TTLs will work |
405 | $self->_update_timestamp( file => $abs ); |
406 | |
6aaee015 |
407 | return $abs; |
408 | } |
409 | |
410 | } else { |
411 | error(loc("Fetching of '%1' failed: %2", $where, $ff->error)); |
412 | } |
413 | |
414 | return; |
415 | } |
416 | |
417 | =pod |
418 | |
419 | =head2 _add_fail_host( host => $host_hashref ) |
420 | |
421 | Mark a particular host as bad. This makes C<CPANPLUS::Internals::Fetch> |
422 | skip it in fetches until this cache is flushed. |
423 | |
424 | =head2 _host_ok( host => $host_hashref ) |
425 | |
426 | Query the cache to see if this host is ok, or if it has been flagged |
427 | as bad. |
428 | |
429 | Returns true if the host is ok, false otherwise. |
430 | |
431 | =cut |
432 | |
433 | { ### caching functions ### |
434 | |
435 | sub _add_fail_host { |
436 | my $self = shift; |
437 | my %hash = @_; |
438 | |
439 | my $host; |
440 | my $tmpl = { |
441 | host => { required => 1, default => {}, |
442 | strict_type => 1, store => \$host }, |
443 | }; |
444 | |
445 | check( $tmpl, \%hash ) or return; |
446 | |
447 | return $self->_hosts->{$host} = 1; |
448 | } |
449 | |
450 | sub _host_ok { |
451 | my $self = shift; |
452 | my %hash = @_; |
453 | |
454 | my $host; |
455 | my $tmpl = { |
456 | host => { required => 1, store => \$host }, |
457 | }; |
458 | |
459 | check( $tmpl, \%hash ) or return; |
460 | |
461 | return $self->_hosts->{$host} ? 0 : 1; |
462 | } |
463 | } |
464 | |
465 | |
466 | 1; |
467 | |
468 | # Local variables: |
469 | # c-indentation-style: bsd |
470 | # c-basic-offset: 4 |
471 | # indent-tabs-mode: nil |
472 | # End: |
473 | # vim: expandtab shiftwidth=4: |