Commit | Line | Data |
520c99e2 |
1 | package Archive::Extract; |
2 | |
3 | use strict; |
4 | |
5 | use Cwd qw[cwd]; |
6 | use Carp qw[carp]; |
7 | use IPC::Cmd qw[run can_run]; |
8 | use FileHandle; |
9 | use File::Path qw[mkpath]; |
10 | use File::Spec; |
11 | use File::Basename qw[dirname basename]; |
12 | use Params::Check qw[check]; |
13 | use Module::Load::Conditional qw[can_load check_install]; |
14 | use Locale::Maketext::Simple Style => 'gettext'; |
15 | |
16 | ### solaris has silly /bin/tar output ### |
17 | use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0; |
18 | use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 }; |
19 | |
20 | ### If these are changed, update @TYPES and the new() POD |
21 | use constant TGZ => 'tgz'; |
22 | use constant TAR => 'tar'; |
23 | use constant GZ => 'gz'; |
24 | use constant ZIP => 'zip'; |
25 | use constant BZ2 => 'bz2'; |
26 | use constant TBZ => 'tbz'; |
27 | |
28 | use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG]; |
29 | |
574b415d |
30 | $VERSION = '0.18'; |
520c99e2 |
31 | $PREFER_BIN = 0; |
32 | $WARN = 1; |
33 | $DEBUG = 0; |
34 | my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ ); # same as all constants |
35 | |
36 | local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1; |
37 | |
38 | =pod |
39 | |
40 | =head1 NAME |
41 | |
42 | Archive::Extract - A generic archive extracting mechanism |
43 | |
44 | =head1 SYNOPSIS |
45 | |
46 | use Archive::Extract; |
47 | |
48 | ### build an Archive::Extract object ### |
49 | my $ae = Archive::Extract->new( archive => 'foo.tgz' ); |
50 | |
51 | ### extract to cwd() ### |
52 | my $ok = $ae->extract; |
53 | |
54 | ### extract to /tmp ### |
55 | my $ok = $ae->extract( to => '/tmp' ); |
56 | |
57 | ### what if something went wrong? |
58 | my $ok = $ae->extract or die $ae->error; |
59 | |
60 | ### files from the archive ### |
61 | my $files = $ae->files; |
62 | |
63 | ### dir that was extracted to ### |
64 | my $outdir = $ae->extract_path; |
65 | |
66 | |
67 | ### quick check methods ### |
68 | $ae->is_tar # is it a .tar file? |
69 | $ae->is_tgz # is it a .tar.gz or .tgz file? |
70 | $ae->is_gz; # is it a .gz file? |
71 | $ae->is_zip; # is it a .zip file? |
72 | $ae->is_bz2; # is it a .bz2 file? |
73 | $ae->is_tbz; # is it a .tar.bz2 or .tbz file? |
74 | |
75 | ### absolute path to the archive you provided ### |
76 | $ae->archive; |
77 | |
78 | ### commandline tools, if found ### |
79 | $ae->bin_tar # path to /bin/tar, if found |
80 | $ae->bin_gzip # path to /bin/gzip, if found |
81 | $ae->bin_unzip # path to /bin/unzip, if found |
82 | $ae->bin_bunzip2 # path to /bin/bunzip2 if found |
83 | |
84 | =head1 DESCRIPTION |
85 | |
86 | Archive::Extract is a generic archive extraction mechanism. |
87 | |
88 | It allows you to extract any archive file of the type .tar, .tar.gz, |
89 | .gz, tar.bz2, .tbz, .bz2 or .zip without having to worry how it does |
90 | so, or use different interfaces for each type by using either perl |
91 | modules, or commandline tools on your system. |
92 | |
93 | See the C<HOW IT WORKS> section further down for details. |
94 | |
95 | =cut |
96 | |
97 | |
98 | ### see what /bin/programs are available ### |
99 | $PROGRAMS = {}; |
100 | for my $pgm (qw[tar unzip gzip bunzip2]) { |
101 | $PROGRAMS->{$pgm} = can_run($pgm); |
102 | } |
103 | |
104 | ### mapping from types to extractor methods ### |
105 | my $Mapping = { |
106 | is_tgz => '_untar', |
107 | is_tar => '_untar', |
108 | is_gz => '_gunzip', |
109 | is_zip => '_unzip', |
110 | is_tbz => '_untar', |
111 | is_bz2 => '_bunzip2', |
112 | }; |
113 | |
114 | { |
115 | my $tmpl = { |
116 | archive => { required => 1, allow => FILE_EXISTS }, |
117 | type => { default => '', allow => [ @Types ] }, |
118 | }; |
119 | |
120 | ### build accesssors ### |
121 | for my $method( keys %$tmpl, |
122 | qw[_extractor _gunzip_to files extract_path], |
123 | qw[_error_msg _error_msg_long] |
124 | ) { |
125 | no strict 'refs'; |
126 | *$method = sub { |
127 | my $self = shift; |
128 | $self->{$method} = $_[0] if @_; |
129 | return $self->{$method}; |
130 | } |
131 | } |
132 | |
133 | =head1 METHODS |
134 | |
135 | =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE]) |
136 | |
137 | Creates a new C<Archive::Extract> object based on the archive file you |
138 | passed it. Automatically determines the type of archive based on the |
139 | extension, but you can override that by explicitly providing the |
140 | C<type> argument. |
141 | |
142 | Valid values for C<type> are: |
143 | |
144 | =over 4 |
145 | |
146 | =item tar |
147 | |
148 | Standard tar files, as produced by, for example, C</bin/tar>. |
149 | Corresponds to a C<.tar> suffix. |
150 | |
151 | =item tgz |
152 | |
153 | Gzip compressed tar files, as produced by, for example C</bin/tar -z>. |
154 | Corresponds to a C<.tgz> or C<.tar.gz> suffix. |
155 | |
156 | =item gz |
157 | |
158 | Gzip compressed file, as produced by, for example C</bin/gzip>. |
159 | Corresponds to a C<.gz> suffix. |
160 | |
161 | =item zip |
162 | |
163 | Zip compressed file, as produced by, for example C</bin/zip>. |
164 | Corresponds to a C<.zip>, C<.jar> or C<.par> suffix. |
165 | |
166 | =item bz2 |
167 | |
168 | Bzip2 compressed file, as produced by, for example, C</bin/bzip2>. |
169 | Corresponds to a C<.bz2> suffix. |
170 | |
171 | =item tbz |
172 | |
173 | Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>. |
174 | Corresponds to a C<.tbz> or C<.tar.bz2> suffix. |
175 | |
176 | =back |
177 | |
178 | Returns a C<Archive::Extract> object on success, or false on failure. |
179 | |
180 | =cut |
181 | |
182 | ### constructor ### |
183 | sub new { |
184 | my $class = shift; |
185 | my %hash = @_; |
186 | |
187 | my $parsed = check( $tmpl, \%hash ) or return; |
188 | |
189 | ### make sure we have an absolute path ### |
190 | my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} ); |
191 | |
192 | ### figure out the type, if it wasn't already specified ### |
193 | unless ( $parsed->{type} ) { |
194 | $parsed->{type} = |
574b415d |
195 | $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ : |
520c99e2 |
196 | $ar =~ /.+?\.gz$/i ? GZ : |
197 | $ar =~ /.+?\.tar$/i ? TAR : |
198 | $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP : |
199 | $ar =~ /.+?\.(?:tbz|tar\.bz2?)$/i ? TBZ : |
200 | $ar =~ /.+?\.bz2$/i ? BZ2 : |
201 | ''; |
202 | |
203 | } |
204 | |
205 | ### don't know what type of file it is ### |
206 | return __PACKAGE__->_error(loc("Cannot determine file type for '%1'", |
207 | $parsed->{archive} )) unless $parsed->{type}; |
208 | |
209 | return bless $parsed, $class; |
210 | } |
211 | } |
212 | |
213 | =head2 $ae->extract( [to => '/output/path'] ) |
214 | |
215 | Extracts the archive represented by the C<Archive::Extract> object to |
216 | the path of your choice as specified by the C<to> argument. Defaults to |
217 | C<cwd()>. |
218 | |
219 | Since C<.gz> files never hold a directory, but only a single file; if |
220 | the C<to> argument is an existing directory, the file is extracted |
221 | there, with it's C<.gz> suffix stripped. |
222 | If the C<to> argument is not an existing directory, the C<to> argument |
223 | is understood to be a filename, if the archive type is C<gz>. |
224 | In the case that you did not specify a C<to> argument, the output |
225 | file will be the name of the archive file, stripped from it's C<.gz> |
226 | suffix, in the current working directory. |
227 | |
228 | C<extract> will try a pure perl solution first, and then fall back to |
229 | commandline tools if they are available. See the C<GLOBAL VARIABLES> |
230 | section below on how to alter this behaviour. |
231 | |
232 | It will return true on success, and false on failure. |
233 | |
234 | On success, it will also set the follow attributes in the object: |
235 | |
236 | =over 4 |
237 | |
238 | =item $ae->extract_path |
239 | |
240 | This is the directory that the files where extracted to. |
241 | |
242 | =item $ae->files |
243 | |
244 | This is an array ref with the paths of all the files in the archive, |
245 | relative to the C<to> argument you specified. |
246 | To get the full path to an extracted file, you would use: |
247 | |
248 | File::Spec->catfile( $to, $ae->files->[0] ); |
249 | |
250 | Note that all files from a tar archive will be in unix format, as per |
251 | the tar specification. |
252 | |
253 | =back |
254 | |
255 | =cut |
256 | |
257 | sub extract { |
258 | my $self = shift; |
259 | my %hash = @_; |
260 | |
261 | my $to; |
262 | my $tmpl = { |
263 | to => { default => '.', store => \$to } |
264 | }; |
265 | |
266 | check( $tmpl, \%hash ) or return; |
267 | |
268 | ### so 'to' could be a file or a dir, depending on whether it's a .gz |
269 | ### file, or basically anything else. |
270 | ### so, check that, then act accordingly. |
271 | ### set an accessor specifically so _gunzip can know what file to extract |
272 | ### to. |
273 | my $dir; |
274 | { ### a foo.gz file |
275 | if( $self->is_gz or $self->is_bz2 ) { |
276 | |
277 | my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2)$//i; |
278 | |
279 | ### to is a dir? |
280 | if ( -d $to ) { |
281 | $dir = $to; |
282 | $self->_gunzip_to( basename($cp) ); |
283 | |
284 | ### then it's a filename |
285 | } else { |
286 | $dir = dirname($to); |
287 | $self->_gunzip_to( basename($to) ); |
288 | } |
289 | |
290 | ### not a foo.gz file |
291 | } else { |
292 | $dir = $to; |
293 | } |
294 | } |
295 | |
296 | ### make the dir if it doesn't exist ### |
297 | unless( -d $dir ) { |
298 | eval { mkpath( $dir ) }; |
299 | |
300 | return $self->_error(loc("Could not create path '%1': %2", $dir, $@)) |
301 | if $@; |
302 | } |
303 | |
304 | ### get the current dir, to restore later ### |
305 | my $cwd = cwd(); |
306 | |
307 | my $ok = 1; |
308 | EXTRACT: { |
309 | |
310 | ### chdir to the target dir ### |
311 | unless( chdir $dir ) { |
312 | $self->_error(loc("Could not chdir to '%1': %2", $dir, $!)); |
313 | $ok = 0; last EXTRACT; |
314 | } |
315 | |
316 | ### set files to an empty array ref, so there's always an array |
317 | ### ref IN the accessor, to avoid errors like: |
318 | ### Can't use an undefined value as an ARRAY reference at |
319 | ### ../lib/Archive/Extract.pm line 742. (rt #19815) |
320 | $self->files( [] ); |
321 | |
322 | ### find what extractor method to use ### |
323 | while( my($type,$method) = each %$Mapping ) { |
324 | |
325 | ### call the corresponding method if the type is OK ### |
326 | if( $self->$type) { |
327 | $ok = $self->$method(); |
328 | } |
329 | } |
330 | |
331 | ### warn something went wrong if we didn't get an OK ### |
332 | $self->_error(loc("Extract failed, no extractor found")) |
333 | unless $ok; |
334 | |
335 | } |
336 | |
337 | ### and chdir back ### |
338 | unless( chdir $cwd ) { |
339 | $self->_error(loc("Could not chdir back to start dir '%1': %2'", |
340 | $cwd, $!)); |
341 | } |
342 | |
343 | return $ok; |
344 | } |
345 | |
346 | =pod |
347 | |
348 | =head1 ACCESSORS |
349 | |
350 | =head2 $ae->error([BOOL]) |
351 | |
352 | Returns the last encountered error as string. |
353 | Pass it a true value to get the C<Carp::longmess()> output instead. |
354 | |
355 | =head2 $ae->extract_path |
356 | |
357 | This is the directory the archive got extracted to. |
358 | See C<extract()> for details. |
359 | |
360 | =head2 $ae->files |
361 | |
362 | This is an array ref holding all the paths from the archive. |
363 | See C<extract()> for details. |
364 | |
365 | =head2 $ae->archive |
366 | |
367 | This is the full path to the archive file represented by this |
368 | C<Archive::Extract> object. |
369 | |
370 | =head2 $ae->type |
371 | |
372 | This is the type of archive represented by this C<Archive::Extract> |
373 | object. See accessors below for an easier way to use this. |
374 | See the C<new()> method for details. |
375 | |
376 | =head2 $ae->types |
377 | |
378 | Returns a list of all known C<types> for C<Archive::Extract>'s |
379 | C<new> method. |
380 | |
381 | =cut |
382 | |
383 | sub types { return @Types } |
384 | |
385 | =head2 $ae->is_tgz |
386 | |
387 | Returns true if the file is of type C<.tar.gz>. |
388 | See the C<new()> method for details. |
389 | |
390 | =head2 $ae->is_tar |
391 | |
392 | Returns true if the file is of type C<.tar>. |
393 | See the C<new()> method for details. |
394 | |
395 | =head2 $ae->is_gz |
396 | |
397 | Returns true if the file is of type C<.gz>. |
398 | See the C<new()> method for details. |
399 | |
400 | =head2 $ae->is_zip |
401 | |
402 | Returns true if the file is of type C<.zip>. |
403 | See the C<new()> method for details. |
404 | |
405 | =cut |
406 | |
407 | ### quick check methods ### |
408 | sub is_tgz { return $_[0]->type eq TGZ } |
409 | sub is_tar { return $_[0]->type eq TAR } |
410 | sub is_gz { return $_[0]->type eq GZ } |
411 | sub is_zip { return $_[0]->type eq ZIP } |
412 | sub is_tbz { return $_[0]->type eq TBZ } |
413 | sub is_bz2 { return $_[0]->type eq BZ2 } |
414 | |
415 | =pod |
416 | |
417 | =head2 $ae->bin_tar |
418 | |
419 | Returns the full path to your tar binary, if found. |
420 | |
421 | =head2 $ae->bin_gzip |
422 | |
423 | Returns the full path to your gzip binary, if found |
424 | |
425 | =head2 $ae->bin_unzip |
426 | |
427 | Returns the full path to your unzip binary, if found |
428 | |
429 | =cut |
430 | |
431 | ### paths to commandline tools ### |
432 | sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} } |
433 | sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} } |
434 | sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} } |
435 | sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} } |
436 | |
437 | ################################# |
438 | # |
439 | # Untar code |
440 | # |
441 | ################################# |
442 | |
443 | |
444 | ### untar wrapper... goes to either Archive::Tar or /bin/tar |
445 | ### depending on $PREFER_BIN |
446 | sub _untar { |
447 | my $self = shift; |
448 | |
449 | ### bzip2 support in A::T via IO::Uncompress::Bzip2 |
450 | my @methods = qw[_untar_at _untar_bin]; |
574b415d |
451 | @methods = reverse @methods if $PREFER_BIN; |
520c99e2 |
452 | |
453 | for my $method (@methods) { |
454 | $self->_extractor($method) && return 1 if $self->$method(); |
455 | } |
456 | |
457 | return $self->_error(loc("Unable to untar file '%1'", $self->archive)); |
458 | } |
459 | |
460 | ### use /bin/tar to extract ### |
461 | sub _untar_bin { |
462 | my $self = shift; |
463 | |
464 | ### check for /bin/tar ### |
465 | return $self->_error(loc("No '%1' program found", '/bin/tar')) |
466 | unless $self->bin_tar; |
467 | |
468 | ### check for /bin/gzip if we need it ### |
469 | return $self->_error(loc("No '%1' program found", '/bin/gzip')) |
470 | if $self->is_tgz && !$self->bin_gzip; |
471 | |
472 | return $self->_error(loc("No '%1' program found", '/bin/bunzip2')) |
473 | if $self->is_tbz && !$self->bin_bunzip2; |
474 | |
475 | ### XXX figure out how to make IPC::Run do this in one call -- |
476 | ### currently i don't know how to get output of a command after a pipe |
477 | ### trapped in a scalar. Mailed barries about this 5th of june 2004. |
478 | |
479 | |
480 | |
481 | ### see what command we should run, based on whether |
482 | ### it's a .tgz or .tar |
483 | |
484 | ### XXX solaris tar and bsdtar are having different outputs |
485 | ### depending whether you run with -x or -t |
486 | ### compensate for this insanity by running -t first, then -x |
487 | { my $cmd = |
488 | $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|', |
489 | $self->bin_tar, '-tf', '-'] : |
490 | $self->is_tbz ? [$self->bin_bunzip2, '-c', $self->archive, '|', |
491 | $self->bin_tar, '-tf', '-'] : |
492 | [$self->bin_tar, '-tf', $self->archive]; |
493 | |
494 | ### run the command ### |
495 | my $buffer = ''; |
496 | unless( scalar run( command => $cmd, |
497 | buffer => \$buffer, |
498 | verbose => $DEBUG ) |
499 | ) { |
500 | return $self->_error(loc( |
501 | "Error listing contents of archive '%1': %2", |
502 | $self->archive, $buffer )); |
503 | } |
504 | |
505 | ### no buffers available? |
506 | if( !IPC::Cmd->can_capture_buffer and !$buffer ) { |
507 | $self->_error( $self->_no_buffer_files( $self->archive ) ); |
508 | |
509 | } else { |
510 | ### if we're on solaris we /might/ be using /bin/tar, which has |
511 | ### a weird output format... we might also be using |
512 | ### /usr/local/bin/tar, which is gnu tar, which is perfectly |
513 | ### fine... so we have to do some guessing here =/ |
514 | my @files = map { chomp; |
515 | !ON_SOLARIS ? $_ |
516 | : (m|^ x \s+ # 'xtract' -- sigh |
517 | (.+?), # the actual file name |
518 | \s+ [\d,.]+ \s bytes, |
519 | \s+ [\d,.]+ \s tape \s blocks |
520 | |x ? $1 : $_); |
521 | |
522 | } split $/, $buffer; |
523 | |
524 | ### store the files that are in the archive ### |
525 | $self->files(\@files); |
526 | } |
527 | } |
528 | |
529 | ### now actually extract it ### |
530 | { my $cmd = |
531 | $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|', |
532 | $self->bin_tar, '-xf', '-'] : |
533 | $self->is_tbz ? [$self->bin_bunzip2, '-c', $self->archive, '|', |
534 | $self->bin_tar, '-xf', '-'] : |
535 | [$self->bin_tar, '-xf', $self->archive]; |
536 | |
537 | my $buffer = ''; |
538 | unless( scalar run( command => $cmd, |
539 | buffer => \$buffer, |
540 | verbose => $DEBUG ) |
541 | ) { |
542 | return $self->_error(loc("Error extracting archive '%1': %2", |
543 | $self->archive, $buffer )); |
544 | } |
545 | |
546 | ### we might not have them, due to lack of buffers |
547 | if( $self->files ) { |
548 | ### now that we've extracted, figure out where we extracted to |
549 | my $dir = $self->__get_extract_dir( $self->files ); |
550 | |
551 | ### store the extraction dir ### |
552 | $self->extract_path( $dir ); |
553 | } |
554 | } |
555 | |
556 | ### we got here, no error happened |
557 | return 1; |
558 | } |
559 | |
560 | ### use archive::tar to extract ### |
561 | sub _untar_at { |
562 | my $self = shift; |
563 | |
564 | ### we definitely need A::T, so load that first |
565 | { my $use_list = { 'Archive::Tar' => '0.0' }; |
566 | |
567 | unless( can_load( modules => $use_list ) ) { |
568 | |
569 | return $self->_error(loc("You do not have '%1' installed - " . |
570 | "Please install it as soon as possible.", |
571 | 'Archive::Tar')); |
572 | } |
573 | } |
574 | |
575 | ### we might pass it a filehandle if it's a .tbz file.. |
576 | my $fh_to_read = $self->archive; |
577 | |
578 | ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib |
579 | ### if A::T's version is 0.99 or higher |
580 | if( $self->is_tgz ) { |
581 | my $use_list = { 'Compress::Zlib' => '0.0' }; |
582 | $use_list->{ 'IO::Zlib' } = '0.0' |
583 | if $Archive::Tar::VERSION >= '0.99'; |
584 | |
585 | unless( can_load( modules => $use_list ) ) { |
586 | my $which = join '/', sort keys %$use_list; |
587 | |
588 | return $self->_error(loc( |
589 | "You do not have '%1' installed - Please ". |
590 | "install it as soon as possible.", $which)); |
591 | |
592 | } |
593 | } elsif ( $self->is_tbz ) { |
594 | my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' }; |
595 | unless( can_load( modules => $use_list ) ) { |
596 | return $self->_error(loc( |
597 | "You do not have '%1' installed - Please " . |
598 | "install it as soon as possible.", |
599 | 'IO::Uncompress::Bunzip2')); |
600 | } |
601 | |
602 | my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or |
603 | return $self->_error(loc("Unable to open '%1': %2", |
604 | $self->archive, |
605 | $IO::Uncompress::Bunzip2::Bunzip2Error)); |
606 | |
607 | $fh_to_read = $bz; |
608 | } |
609 | |
610 | my $tar = Archive::Tar->new(); |
611 | |
612 | ### only tell it it's compressed if it's a .tgz, as we give it a file |
613 | ### handle if it's a .tbz |
614 | unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) { |
615 | return $self->_error(loc("Unable to read '%1': %2", $self->archive, |
616 | $Archive::Tar::error)); |
617 | } |
618 | |
619 | ### workaround to prevent Archive::Tar from setting uid, which |
620 | ### is a potential security hole. -autrijus |
621 | ### have to do it here, since A::T needs to be /loaded/ first ### |
622 | { no strict 'refs'; local $^W; |
623 | |
624 | ### older versions of archive::tar <= 0.23 |
625 | *Archive::Tar::chown = sub {}; |
626 | } |
627 | |
628 | ### for version of archive::tar > 1.04 |
629 | local $Archive::Tar::Constant::CHOWN = 0; |
630 | |
631 | { local $^W; # quell 'splice() offset past end of array' warnings |
632 | # on older versions of A::T |
633 | |
634 | ### older archive::tar always returns $self, return value slightly |
635 | ### fux0r3d because of it. |
636 | $tar->extract() |
637 | or return $self->_error(loc("Unable to extract '%1': %2", |
638 | $self->archive, $Archive::Tar::error )); |
639 | } |
640 | |
641 | my @files = $tar->list_files; |
642 | my $dir = $self->__get_extract_dir( \@files ); |
643 | |
644 | ### store the files that are in the archive ### |
645 | $self->files(\@files); |
646 | |
647 | ### store the extraction dir ### |
648 | $self->extract_path( $dir ); |
649 | |
650 | ### check if the dir actually appeared ### |
651 | return 1 if -d $self->extract_path; |
652 | |
653 | ### no dir, we failed ### |
654 | return $self->_error(loc("Unable to extract '%1': %2", |
655 | $self->archive, $Archive::Tar::error )); |
656 | } |
657 | |
658 | ################################# |
659 | # |
660 | # Gunzip code |
661 | # |
662 | ################################# |
663 | |
664 | ### gunzip wrapper... goes to either Compress::Zlib or /bin/gzip |
665 | ### depending on $PREFER_BIN |
666 | sub _gunzip { |
667 | my $self = shift; |
668 | |
669 | my @methods = qw[_gunzip_cz _gunzip_bin]; |
670 | @methods = reverse @methods if $PREFER_BIN; |
671 | |
672 | for my $method (@methods) { |
673 | $self->_extractor($method) && return 1 if $self->$method(); |
674 | } |
675 | |
676 | return $self->_error(loc("Unable to gunzip file '%1'", $self->archive)); |
677 | } |
678 | |
679 | sub _gunzip_bin { |
680 | my $self = shift; |
681 | |
682 | ### check for /bin/gzip -- we need it ### |
683 | return $self->_error(loc("No '%1' program found", '/bin/gzip')) |
684 | unless $self->bin_gzip; |
685 | |
686 | |
687 | my $fh = FileHandle->new('>'. $self->_gunzip_to) or |
688 | return $self->_error(loc("Could not open '%1' for writing: %2", |
689 | $self->_gunzip_to, $! )); |
690 | |
691 | my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ]; |
692 | |
693 | my $buffer; |
694 | unless( scalar run( command => $cmd, |
695 | verbose => $DEBUG, |
696 | buffer => \$buffer ) |
697 | ) { |
698 | return $self->_error(loc("Unable to gunzip '%1': %2", |
699 | $self->archive, $buffer)); |
700 | } |
701 | |
702 | ### no buffers available? |
703 | if( !IPC::Cmd->can_capture_buffer and !$buffer ) { |
704 | $self->_error( $self->_no_buffer_content( $self->archive ) ); |
705 | } |
706 | |
707 | print $fh $buffer if defined $buffer; |
708 | |
709 | close $fh; |
710 | |
711 | ### set what files where extract, and where they went ### |
712 | $self->files( [$self->_gunzip_to] ); |
713 | $self->extract_path( File::Spec->rel2abs(cwd()) ); |
714 | |
715 | return 1; |
716 | } |
717 | |
718 | sub _gunzip_cz { |
719 | my $self = shift; |
720 | |
721 | my $use_list = { 'Compress::Zlib' => '0.0' }; |
722 | unless( can_load( modules => $use_list ) ) { |
723 | return $self->_error(loc("You do not have '%1' installed - Please " . |
724 | "install it as soon as possible.", 'Compress::Zlib')); |
725 | } |
726 | |
727 | my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or |
728 | return $self->_error(loc("Unable to open '%1': %2", |
729 | $self->archive, $Compress::Zlib::gzerrno)); |
730 | |
731 | my $fh = FileHandle->new('>'. $self->_gunzip_to) or |
732 | return $self->_error(loc("Could not open '%1' for writing: %2", |
733 | $self->_gunzip_to, $! )); |
734 | |
735 | my $buffer; |
736 | $fh->print($buffer) while $gz->gzread($buffer) > 0; |
737 | $fh->close; |
738 | |
739 | ### set what files where extract, and where they went ### |
740 | $self->files( [$self->_gunzip_to] ); |
741 | $self->extract_path( File::Spec->rel2abs(cwd()) ); |
742 | |
743 | return 1; |
744 | } |
745 | |
746 | ################################# |
747 | # |
748 | # Unzip code |
749 | # |
750 | ################################# |
751 | |
752 | ### unzip wrapper... goes to either Archive::Zip or /bin/unzip |
753 | ### depending on $PREFER_BIN |
754 | sub _unzip { |
755 | my $self = shift; |
756 | |
757 | my @methods = qw[_unzip_az _unzip_bin]; |
758 | @methods = reverse @methods if $PREFER_BIN; |
759 | |
760 | for my $method (@methods) { |
761 | $self->_extractor($method) && return 1 if $self->$method(); |
762 | } |
763 | |
764 | return $self->_error(loc("Unable to gunzip file '%1'", $self->archive)); |
765 | } |
766 | |
767 | sub _unzip_bin { |
768 | my $self = shift; |
769 | |
770 | ### check for /bin/gzip if we need it ### |
771 | return $self->_error(loc("No '%1' program found", '/bin/unzip')) |
772 | unless $self->bin_unzip; |
773 | |
774 | |
775 | ### first, get the files.. it must be 2 different commands with 'unzip' :( |
776 | { my $cmd = [ $self->bin_unzip, '-Z', '-1', $self->archive ]; |
777 | |
778 | my $buffer = ''; |
779 | unless( scalar run( command => $cmd, |
780 | verbose => $DEBUG, |
781 | buffer => \$buffer ) |
782 | ) { |
783 | return $self->_error(loc("Unable to unzip '%1': %2", |
784 | $self->archive, $buffer)); |
785 | } |
786 | |
787 | ### no buffers available? |
788 | if( !IPC::Cmd->can_capture_buffer and !$buffer ) { |
789 | $self->_error( $self->_no_buffer_files( $self->archive ) ); |
790 | |
791 | } else { |
792 | $self->files( [split $/, $buffer] ); |
793 | } |
794 | } |
795 | |
796 | ### now, extract the archive ### |
797 | { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ]; |
798 | |
799 | my $buffer; |
800 | unless( scalar run( command => $cmd, |
801 | verbose => $DEBUG, |
802 | buffer => \$buffer ) |
803 | ) { |
804 | return $self->_error(loc("Unable to unzip '%1': %2", |
805 | $self->archive, $buffer)); |
806 | } |
807 | |
808 | if( scalar @{$self->files} ) { |
809 | my $files = $self->files; |
810 | my $dir = $self->__get_extract_dir( $files ); |
811 | |
812 | $self->extract_path( $dir ); |
813 | } |
814 | } |
815 | |
816 | return 1; |
817 | } |
818 | |
819 | sub _unzip_az { |
820 | my $self = shift; |
821 | |
822 | my $use_list = { 'Archive::Zip' => '0.0' }; |
823 | unless( can_load( modules => $use_list ) ) { |
824 | return $self->_error(loc("You do not have '%1' installed - Please " . |
825 | "install it as soon as possible.", 'Archive::Zip')); |
826 | } |
827 | |
828 | my $zip = Archive::Zip->new(); |
829 | |
830 | unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) { |
831 | return $self->_error(loc("Unable to read '%1'", $self->archive)); |
832 | } |
833 | |
834 | my @files; |
835 | ### have to extract every memeber individually ### |
836 | for my $member ($zip->members) { |
837 | push @files, $member->{fileName}; |
838 | |
839 | unless( $zip->extractMember($member) == &Archive::Zip::AZ_OK ) { |
840 | return $self->_error(loc("Extraction of '%1' from '%2' failed", |
841 | $member->{fileName}, $self->archive )); |
842 | } |
843 | } |
844 | |
845 | my $dir = $self->__get_extract_dir( \@files ); |
846 | |
847 | ### set what files where extract, and where they went ### |
848 | $self->files( \@files ); |
849 | $self->extract_path( File::Spec->rel2abs($dir) ); |
850 | |
851 | return 1; |
852 | } |
853 | |
854 | sub __get_extract_dir { |
855 | my $self = shift; |
856 | my $files = shift || []; |
857 | |
858 | return unless scalar @$files; |
859 | |
860 | my($dir1, $dir2); |
861 | for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) { |
862 | my($dir,$pos) = @$aref; |
863 | |
864 | ### add a catdir(), so that any trailing slashes get |
865 | ### take care of (removed) |
866 | ### also, a catdir() normalises './dir/foo' to 'dir/foo'; |
867 | ### which was the problem in bug #23999 |
868 | my $res = -d $files->[$pos] |
869 | ? File::Spec->catdir( $files->[$pos], '' ) |
870 | : File::Spec->catdir( dirname( $files->[$pos] ) ); |
871 | |
872 | $$dir = $res; |
873 | } |
874 | |
875 | ### if the first and last dir don't match, make sure the |
876 | ### dirname is not set wrongly |
877 | my $dir; |
878 | |
879 | ### dirs are the same, so we know for sure what the extract dir is |
880 | if( $dir1 eq $dir2 ) { |
881 | $dir = $dir1; |
882 | |
883 | ### dirs are different.. do they share the base dir? |
884 | ### if so, use that, if not, fall back to '.' |
885 | } else { |
886 | my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0]; |
887 | my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0]; |
888 | |
889 | $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' ); |
890 | } |
891 | |
892 | return File::Spec->rel2abs( $dir ); |
893 | } |
894 | |
895 | ################################# |
896 | # |
897 | # Bunzip2 code |
898 | # |
899 | ################################# |
900 | |
901 | ### bunzip2 wrapper... |
902 | sub _bunzip2 { |
903 | my $self = shift; |
904 | |
905 | my @methods = qw[_bunzip2_cz2 _bunzip2_bin]; |
906 | @methods = reverse @methods if $PREFER_BIN; |
907 | |
908 | for my $method (@methods) { |
909 | $self->_extractor($method) && return 1 if $self->$method(); |
910 | } |
911 | |
912 | return $self->_error(loc("Unable to bunzip2 file '%1'", $self->archive)); |
913 | } |
914 | |
915 | sub _bunzip2_bin { |
916 | my $self = shift; |
917 | |
918 | ### check for /bin/gzip -- we need it ### |
919 | return $self->_error(loc("No '%1' program found", '/bin/bunzip2')) |
920 | unless $self->bin_bunzip2; |
921 | |
922 | |
923 | my $fh = FileHandle->new('>'. $self->_gunzip_to) or |
924 | return $self->_error(loc("Could not open '%1' for writing: %2", |
925 | $self->_gunzip_to, $! )); |
926 | |
927 | my $cmd = [ $self->bin_bunzip2, '-c', $self->archive ]; |
928 | |
929 | my $buffer; |
930 | unless( scalar run( command => $cmd, |
931 | verbose => $DEBUG, |
932 | buffer => \$buffer ) |
933 | ) { |
934 | return $self->_error(loc("Unable to bunzip2 '%1': %2", |
935 | $self->archive, $buffer)); |
936 | } |
937 | |
938 | ### no buffers available? |
939 | if( !IPC::Cmd->can_capture_buffer and !$buffer ) { |
940 | $self->_error( $self->_no_buffer_content( $self->archive ) ); |
941 | } |
942 | |
943 | print $fh $buffer if defined $buffer; |
944 | |
945 | close $fh; |
946 | |
947 | ### set what files where extract, and where they went ### |
948 | $self->files( [$self->_gunzip_to] ); |
949 | $self->extract_path( File::Spec->rel2abs(cwd()) ); |
950 | |
951 | return 1; |
952 | } |
953 | |
954 | ### using cz2, the compact versions... this we use mainly in archive::tar |
955 | ### extractor.. |
956 | # sub _bunzip2_cz1 { |
957 | # my $self = shift; |
958 | # |
959 | # my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' }; |
960 | # unless( can_load( modules => $use_list ) ) { |
961 | # return $self->_error(loc("You do not have '%1' installed - Please " . |
962 | # "install it as soon as possible.", |
963 | # 'IO::Uncompress::Bunzip2')); |
964 | # } |
965 | # |
966 | # my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or |
967 | # return $self->_error(loc("Unable to open '%1': %2", |
968 | # $self->archive, |
969 | # $IO::Uncompress::Bunzip2::Bunzip2Error)); |
970 | # |
971 | # my $fh = FileHandle->new('>'. $self->_gunzip_to) or |
972 | # return $self->_error(loc("Could not open '%1' for writing: %2", |
973 | # $self->_gunzip_to, $! )); |
974 | # |
975 | # my $buffer; |
976 | # $fh->print($buffer) while $bz->read($buffer) > 0; |
977 | # $fh->close; |
978 | # |
979 | # ### set what files where extract, and where they went ### |
980 | # $self->files( [$self->_gunzip_to] ); |
981 | # $self->extract_path( File::Spec->rel2abs(cwd()) ); |
982 | # |
983 | # return 1; |
984 | # } |
985 | |
986 | sub _bunzip2_cz2 { |
987 | my $self = shift; |
988 | |
989 | my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' }; |
990 | unless( can_load( modules => $use_list ) ) { |
991 | return $self->_error(loc("You do not have '%1' installed - Please " . |
992 | "install it as soon as possible.", |
993 | 'IO::Uncompress::Bunzip2')); |
994 | } |
995 | |
996 | IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to) |
997 | or return $self->_error(loc("Unable to uncompress '%1': %2", |
998 | $self->archive, |
999 | $IO::Uncompress::Bunzip2::Bunzip2Error)); |
1000 | |
1001 | ### set what files where extract, and where they went ### |
1002 | $self->files( [$self->_gunzip_to] ); |
1003 | $self->extract_path( File::Spec->rel2abs(cwd()) ); |
1004 | |
1005 | return 1; |
1006 | } |
1007 | |
1008 | |
1009 | ################################# |
1010 | # |
1011 | # Error code |
1012 | # |
1013 | ################################# |
1014 | |
1015 | sub _error { |
1016 | my $self = shift; |
1017 | my $error = shift; |
1018 | |
1019 | $self->_error_msg( $error ); |
1020 | $self->_error_msg_long( Carp::longmess($error) ); |
1021 | |
1022 | ### set $Archive::Extract::WARN to 0 to disable printing |
1023 | ### of errors |
1024 | if( $WARN ) { |
1025 | carp $DEBUG ? $self->_error_msg_long : $self->_error_msg; |
1026 | } |
1027 | |
1028 | return; |
1029 | } |
1030 | |
1031 | sub error { |
1032 | my $self = shift; |
1033 | return shift() ? $self->_error_msg_long : $self->_error_msg; |
1034 | } |
1035 | |
1036 | sub _no_buffer_files { |
1037 | my $self = shift; |
1038 | my $file = shift or return; |
1039 | return loc("No buffer captured, unable to tell ". |
1040 | "extracted files or extraction dir for '%1'", $file); |
1041 | } |
1042 | |
1043 | sub _no_buffer_content { |
1044 | my $self = shift; |
1045 | my $file = shift or return; |
1046 | return loc("No buffer captured, unable to get content for '%1'", $file); |
1047 | } |
1048 | 1; |
1049 | |
1050 | =pod |
1051 | |
1052 | =head1 HOW IT WORKS |
1053 | |
1054 | C<Archive::Extract> tries first to determine what type of archive you |
1055 | are passing it, by inspecting its suffix. It does not do this by using |
1056 | Mime magic, or something related. See C<CAVEATS> below. |
1057 | |
1058 | Once it has determined the file type, it knows which extraction methods |
1059 | it can use on the archive. It will try a perl solution first, then fall |
1060 | back to a commandline tool if that fails. If that also fails, it will |
1061 | return false, indicating it was unable to extract the archive. |
1062 | See the section on C<GLOBAL VARIABLES> to see how to alter this order. |
1063 | |
1064 | =head1 CAVEATS |
1065 | |
1066 | =head2 File Extensions |
1067 | |
1068 | C<Archive::Extract> trusts on the extension of the archive to determine |
1069 | what type it is, and what extractor methods therefore can be used. If |
1070 | your archives do not have any of the extensions as described in the |
1071 | C<new()> method, you will have to specify the type explicitly, or |
1072 | C<Archive::Extract> will not be able to extract the archive for you. |
1073 | |
1074 | =head2 Bzip2 Support |
1075 | |
1076 | There's currently no very reliable pure perl Bzip2 implementation |
1077 | available, so C<Archive::Extract> can only extract C<bzip2> |
1078 | compressed archives if you have a C</bin/bunzip2> program. |
1079 | |
1080 | =head1 GLOBAL VARIABLES |
1081 | |
1082 | =head2 $Archive::Extract::DEBUG |
1083 | |
1084 | Set this variable to C<true> to have all calls to command line tools |
1085 | be printed out, including all their output. |
1086 | This also enables C<Carp::longmess> errors, instead of the regular |
1087 | C<carp> errors. |
1088 | |
1089 | Good for tracking down why things don't work with your particular |
1090 | setup. |
1091 | |
1092 | Defaults to C<false>. |
1093 | |
1094 | =head2 $Archive::Extract::WARN |
1095 | |
1096 | This variable controls whether errors encountered internally by |
1097 | C<Archive::Extract> should be C<carp>'d or not. |
1098 | |
1099 | Set to false to silence warnings. Inspect the output of the C<error()> |
1100 | method manually to see what went wrong. |
1101 | |
1102 | Defaults to C<true>. |
1103 | |
1104 | =head2 $Archive::Extract::PREFER_BIN |
1105 | |
1106 | This variables controls whether C<Archive::Extract> should prefer the |
1107 | use of perl modules, or commandline tools to extract archives. |
1108 | |
1109 | Set to C<true> to have C<Archive::Extract> prefer commandline tools. |
1110 | |
1111 | Defaults to C<false>. |
1112 | |
1113 | =head1 TODO |
1114 | |
1115 | =over 4 |
1116 | |
1117 | =item Mime magic support |
1118 | |
1119 | Maybe this module should use something like C<File::Type> to determine |
1120 | the type, rather than blindly trust the suffix. |
1121 | |
574b415d |
1122 | =head1 BUG REPORTS |
520c99e2 |
1123 | |
574b415d |
1124 | Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>. |
520c99e2 |
1125 | |
574b415d |
1126 | =head1 AUTHOR |
1127 | |
1128 | This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. |
520c99e2 |
1129 | |
574b415d |
1130 | =head1 COPYRIGHT |
520c99e2 |
1131 | |
574b415d |
1132 | This library is free software; you may redistribute and/or modify it |
1133 | under the same terms as Perl itself. |
520c99e2 |
1134 | |
1135 | =cut |
1136 | |
1137 | # Local variables: |
1138 | # c-indentation-style: bsd |
1139 | # c-basic-offset: 4 |
1140 | # indent-tabs-mode: nil |
1141 | # End: |
1142 | # vim: expandtab shiftwidth=4: |
1143 | |