Commit | Line | Data |
520c99e2 |
1 | package Archive::Extract; |
2 | |
3 | use strict; |
4 | |
5 | use Cwd qw[cwd]; |
6 | use Carp qw[carp]; |
7 | use IPC::Cmd qw[run can_run]; |
8 | use FileHandle; |
9 | use File::Path qw[mkpath]; |
10 | use File::Spec; |
11 | use File::Basename qw[dirname basename]; |
12 | use Params::Check qw[check]; |
13 | use Module::Load::Conditional qw[can_load check_install]; |
14 | use Locale::Maketext::Simple Style => 'gettext'; |
15 | |
16 | ### solaris has silly /bin/tar output ### |
17 | use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0; |
18 | use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 }; |
19 | |
4f3b9739 |
20 | ### VMS may require quoting upper case command options |
21 | use constant ON_VMS => $^O eq 'VMS' ? 1 : 0; |
22 | |
520c99e2 |
23 | ### If these are changed, update @TYPES and the new() POD |
24 | use constant TGZ => 'tgz'; |
25 | use constant TAR => 'tar'; |
26 | use constant GZ => 'gz'; |
27 | use constant ZIP => 'zip'; |
28 | use constant BZ2 => 'bz2'; |
29 | use constant TBZ => 'tbz'; |
1dae2fb5 |
30 | use constant Z => 'Z'; |
520c99e2 |
31 | |
32 | use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG]; |
33 | |
6ef249b9 |
34 | $VERSION = '0.22_01'; |
520c99e2 |
35 | $PREFER_BIN = 0; |
36 | $WARN = 1; |
37 | $DEBUG = 0; |
1dae2fb5 |
38 | my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z ); # same as all constants |
520c99e2 |
39 | |
40 | local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1; |
41 | |
42 | =pod |
43 | |
44 | =head1 NAME |
45 | |
46 | Archive::Extract - A generic archive extracting mechanism |
47 | |
48 | =head1 SYNOPSIS |
49 | |
50 | use Archive::Extract; |
51 | |
52 | ### build an Archive::Extract object ### |
53 | my $ae = Archive::Extract->new( archive => 'foo.tgz' ); |
54 | |
55 | ### extract to cwd() ### |
56 | my $ok = $ae->extract; |
57 | |
58 | ### extract to /tmp ### |
59 | my $ok = $ae->extract( to => '/tmp' ); |
60 | |
61 | ### what if something went wrong? |
62 | my $ok = $ae->extract or die $ae->error; |
63 | |
64 | ### files from the archive ### |
65 | my $files = $ae->files; |
66 | |
67 | ### dir that was extracted to ### |
68 | my $outdir = $ae->extract_path; |
69 | |
70 | |
71 | ### quick check methods ### |
72 | $ae->is_tar # is it a .tar file? |
73 | $ae->is_tgz # is it a .tar.gz or .tgz file? |
74 | $ae->is_gz; # is it a .gz file? |
75 | $ae->is_zip; # is it a .zip file? |
76 | $ae->is_bz2; # is it a .bz2 file? |
77 | $ae->is_tbz; # is it a .tar.bz2 or .tbz file? |
78 | |
79 | ### absolute path to the archive you provided ### |
80 | $ae->archive; |
81 | |
82 | ### commandline tools, if found ### |
83 | $ae->bin_tar # path to /bin/tar, if found |
84 | $ae->bin_gzip # path to /bin/gzip, if found |
85 | $ae->bin_unzip # path to /bin/unzip, if found |
86 | $ae->bin_bunzip2 # path to /bin/bunzip2 if found |
87 | |
88 | =head1 DESCRIPTION |
89 | |
90 | Archive::Extract is a generic archive extraction mechanism. |
91 | |
92 | It allows you to extract any archive file of the type .tar, .tar.gz, |
1dae2fb5 |
93 | .gz, .Z, tar.bz2, .tbz, .bz2 or .zip without having to worry how it |
94 | does so, or use different interfaces for each type by using either |
95 | perl modules, or commandline tools on your system. |
520c99e2 |
96 | |
97 | See the C<HOW IT WORKS> section further down for details. |
98 | |
99 | =cut |
100 | |
101 | |
102 | ### see what /bin/programs are available ### |
103 | $PROGRAMS = {}; |
1dae2fb5 |
104 | for my $pgm (qw[tar unzip gzip bunzip2 uncompress]) { |
520c99e2 |
105 | $PROGRAMS->{$pgm} = can_run($pgm); |
106 | } |
107 | |
108 | ### mapping from types to extractor methods ### |
109 | my $Mapping = { |
110 | is_tgz => '_untar', |
111 | is_tar => '_untar', |
112 | is_gz => '_gunzip', |
113 | is_zip => '_unzip', |
114 | is_tbz => '_untar', |
115 | is_bz2 => '_bunzip2', |
1dae2fb5 |
116 | is_Z => '_uncompress', |
520c99e2 |
117 | }; |
118 | |
119 | { |
120 | my $tmpl = { |
121 | archive => { required => 1, allow => FILE_EXISTS }, |
122 | type => { default => '', allow => [ @Types ] }, |
123 | }; |
124 | |
125 | ### build accesssors ### |
126 | for my $method( keys %$tmpl, |
127 | qw[_extractor _gunzip_to files extract_path], |
128 | qw[_error_msg _error_msg_long] |
129 | ) { |
130 | no strict 'refs'; |
131 | *$method = sub { |
132 | my $self = shift; |
133 | $self->{$method} = $_[0] if @_; |
134 | return $self->{$method}; |
135 | } |
136 | } |
137 | |
138 | =head1 METHODS |
139 | |
140 | =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE]) |
141 | |
142 | Creates a new C<Archive::Extract> object based on the archive file you |
143 | passed it. Automatically determines the type of archive based on the |
144 | extension, but you can override that by explicitly providing the |
145 | C<type> argument. |
146 | |
147 | Valid values for C<type> are: |
148 | |
149 | =over 4 |
150 | |
151 | =item tar |
152 | |
153 | Standard tar files, as produced by, for example, C</bin/tar>. |
154 | Corresponds to a C<.tar> suffix. |
155 | |
156 | =item tgz |
157 | |
158 | Gzip compressed tar files, as produced by, for example C</bin/tar -z>. |
159 | Corresponds to a C<.tgz> or C<.tar.gz> suffix. |
160 | |
161 | =item gz |
162 | |
163 | Gzip compressed file, as produced by, for example C</bin/gzip>. |
164 | Corresponds to a C<.gz> suffix. |
165 | |
1dae2fb5 |
166 | =item Z |
167 | |
168 | Lempel-Ziv compressed file, as produced by, for example C</bin/compress>. |
169 | Corresponds to a C<.Z> suffix. |
170 | |
520c99e2 |
171 | =item zip |
172 | |
173 | Zip compressed file, as produced by, for example C</bin/zip>. |
174 | Corresponds to a C<.zip>, C<.jar> or C<.par> suffix. |
175 | |
176 | =item bz2 |
177 | |
178 | Bzip2 compressed file, as produced by, for example, C</bin/bzip2>. |
179 | Corresponds to a C<.bz2> suffix. |
180 | |
181 | =item tbz |
182 | |
183 | Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>. |
184 | Corresponds to a C<.tbz> or C<.tar.bz2> suffix. |
185 | |
186 | =back |
187 | |
188 | Returns a C<Archive::Extract> object on success, or false on failure. |
189 | |
190 | =cut |
191 | |
192 | ### constructor ### |
193 | sub new { |
194 | my $class = shift; |
195 | my %hash = @_; |
196 | |
197 | my $parsed = check( $tmpl, \%hash ) or return; |
198 | |
199 | ### make sure we have an absolute path ### |
200 | my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} ); |
201 | |
202 | ### figure out the type, if it wasn't already specified ### |
203 | unless ( $parsed->{type} ) { |
204 | $parsed->{type} = |
574b415d |
205 | $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ : |
520c99e2 |
206 | $ar =~ /.+?\.gz$/i ? GZ : |
207 | $ar =~ /.+?\.tar$/i ? TAR : |
208 | $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP : |
209 | $ar =~ /.+?\.(?:tbz|tar\.bz2?)$/i ? TBZ : |
210 | $ar =~ /.+?\.bz2$/i ? BZ2 : |
1dae2fb5 |
211 | $ar =~ /.+?\.Z$/ ? Z : |
520c99e2 |
212 | ''; |
213 | |
214 | } |
215 | |
216 | ### don't know what type of file it is ### |
217 | return __PACKAGE__->_error(loc("Cannot determine file type for '%1'", |
218 | $parsed->{archive} )) unless $parsed->{type}; |
219 | |
220 | return bless $parsed, $class; |
221 | } |
222 | } |
223 | |
224 | =head2 $ae->extract( [to => '/output/path'] ) |
225 | |
226 | Extracts the archive represented by the C<Archive::Extract> object to |
227 | the path of your choice as specified by the C<to> argument. Defaults to |
228 | C<cwd()>. |
229 | |
230 | Since C<.gz> files never hold a directory, but only a single file; if |
231 | the C<to> argument is an existing directory, the file is extracted |
232 | there, with it's C<.gz> suffix stripped. |
233 | If the C<to> argument is not an existing directory, the C<to> argument |
234 | is understood to be a filename, if the archive type is C<gz>. |
235 | In the case that you did not specify a C<to> argument, the output |
236 | file will be the name of the archive file, stripped from it's C<.gz> |
237 | suffix, in the current working directory. |
238 | |
239 | C<extract> will try a pure perl solution first, and then fall back to |
240 | commandline tools if they are available. See the C<GLOBAL VARIABLES> |
241 | section below on how to alter this behaviour. |
242 | |
243 | It will return true on success, and false on failure. |
244 | |
245 | On success, it will also set the follow attributes in the object: |
246 | |
247 | =over 4 |
248 | |
249 | =item $ae->extract_path |
250 | |
251 | This is the directory that the files where extracted to. |
252 | |
253 | =item $ae->files |
254 | |
255 | This is an array ref with the paths of all the files in the archive, |
256 | relative to the C<to> argument you specified. |
257 | To get the full path to an extracted file, you would use: |
258 | |
259 | File::Spec->catfile( $to, $ae->files->[0] ); |
260 | |
261 | Note that all files from a tar archive will be in unix format, as per |
262 | the tar specification. |
263 | |
264 | =back |
265 | |
266 | =cut |
267 | |
268 | sub extract { |
269 | my $self = shift; |
270 | my %hash = @_; |
271 | |
272 | my $to; |
273 | my $tmpl = { |
274 | to => { default => '.', store => \$to } |
275 | }; |
276 | |
277 | check( $tmpl, \%hash ) or return; |
278 | |
279 | ### so 'to' could be a file or a dir, depending on whether it's a .gz |
280 | ### file, or basically anything else. |
281 | ### so, check that, then act accordingly. |
282 | ### set an accessor specifically so _gunzip can know what file to extract |
283 | ### to. |
284 | my $dir; |
285 | { ### a foo.gz file |
1dae2fb5 |
286 | if( $self->is_gz or $self->is_bz2 or $self->is_Z) { |
520c99e2 |
287 | |
1dae2fb5 |
288 | my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2|Z)$//i; |
520c99e2 |
289 | |
290 | ### to is a dir? |
291 | if ( -d $to ) { |
292 | $dir = $to; |
293 | $self->_gunzip_to( basename($cp) ); |
294 | |
295 | ### then it's a filename |
296 | } else { |
297 | $dir = dirname($to); |
298 | $self->_gunzip_to( basename($to) ); |
299 | } |
300 | |
301 | ### not a foo.gz file |
302 | } else { |
303 | $dir = $to; |
304 | } |
305 | } |
306 | |
307 | ### make the dir if it doesn't exist ### |
308 | unless( -d $dir ) { |
309 | eval { mkpath( $dir ) }; |
310 | |
311 | return $self->_error(loc("Could not create path '%1': %2", $dir, $@)) |
312 | if $@; |
313 | } |
314 | |
315 | ### get the current dir, to restore later ### |
316 | my $cwd = cwd(); |
317 | |
318 | my $ok = 1; |
319 | EXTRACT: { |
320 | |
321 | ### chdir to the target dir ### |
322 | unless( chdir $dir ) { |
323 | $self->_error(loc("Could not chdir to '%1': %2", $dir, $!)); |
324 | $ok = 0; last EXTRACT; |
325 | } |
326 | |
327 | ### set files to an empty array ref, so there's always an array |
328 | ### ref IN the accessor, to avoid errors like: |
329 | ### Can't use an undefined value as an ARRAY reference at |
330 | ### ../lib/Archive/Extract.pm line 742. (rt #19815) |
331 | $self->files( [] ); |
332 | |
333 | ### find what extractor method to use ### |
334 | while( my($type,$method) = each %$Mapping ) { |
335 | |
336 | ### call the corresponding method if the type is OK ### |
337 | if( $self->$type) { |
338 | $ok = $self->$method(); |
339 | } |
340 | } |
341 | |
342 | ### warn something went wrong if we didn't get an OK ### |
343 | $self->_error(loc("Extract failed, no extractor found")) |
344 | unless $ok; |
345 | |
346 | } |
347 | |
348 | ### and chdir back ### |
349 | unless( chdir $cwd ) { |
350 | $self->_error(loc("Could not chdir back to start dir '%1': %2'", |
351 | $cwd, $!)); |
352 | } |
353 | |
354 | return $ok; |
355 | } |
356 | |
357 | =pod |
358 | |
359 | =head1 ACCESSORS |
360 | |
361 | =head2 $ae->error([BOOL]) |
362 | |
363 | Returns the last encountered error as string. |
364 | Pass it a true value to get the C<Carp::longmess()> output instead. |
365 | |
366 | =head2 $ae->extract_path |
367 | |
368 | This is the directory the archive got extracted to. |
369 | See C<extract()> for details. |
370 | |
371 | =head2 $ae->files |
372 | |
373 | This is an array ref holding all the paths from the archive. |
374 | See C<extract()> for details. |
375 | |
376 | =head2 $ae->archive |
377 | |
378 | This is the full path to the archive file represented by this |
379 | C<Archive::Extract> object. |
380 | |
381 | =head2 $ae->type |
382 | |
383 | This is the type of archive represented by this C<Archive::Extract> |
384 | object. See accessors below for an easier way to use this. |
385 | See the C<new()> method for details. |
386 | |
387 | =head2 $ae->types |
388 | |
389 | Returns a list of all known C<types> for C<Archive::Extract>'s |
390 | C<new> method. |
391 | |
392 | =cut |
393 | |
394 | sub types { return @Types } |
395 | |
396 | =head2 $ae->is_tgz |
397 | |
398 | Returns true if the file is of type C<.tar.gz>. |
399 | See the C<new()> method for details. |
400 | |
401 | =head2 $ae->is_tar |
402 | |
403 | Returns true if the file is of type C<.tar>. |
404 | See the C<new()> method for details. |
405 | |
406 | =head2 $ae->is_gz |
407 | |
408 | Returns true if the file is of type C<.gz>. |
409 | See the C<new()> method for details. |
410 | |
1dae2fb5 |
411 | =head2 $ae->is_Z |
412 | |
413 | Returns true if the file is of type C<.Z>. |
414 | See the C<new()> method for details. |
415 | |
520c99e2 |
416 | =head2 $ae->is_zip |
417 | |
418 | Returns true if the file is of type C<.zip>. |
419 | See the C<new()> method for details. |
420 | |
421 | =cut |
422 | |
423 | ### quick check methods ### |
424 | sub is_tgz { return $_[0]->type eq TGZ } |
425 | sub is_tar { return $_[0]->type eq TAR } |
426 | sub is_gz { return $_[0]->type eq GZ } |
427 | sub is_zip { return $_[0]->type eq ZIP } |
428 | sub is_tbz { return $_[0]->type eq TBZ } |
429 | sub is_bz2 { return $_[0]->type eq BZ2 } |
1dae2fb5 |
430 | sub is_Z { return $_[0]->type eq Z } |
520c99e2 |
431 | |
432 | =pod |
433 | |
434 | =head2 $ae->bin_tar |
435 | |
436 | Returns the full path to your tar binary, if found. |
437 | |
438 | =head2 $ae->bin_gzip |
439 | |
440 | Returns the full path to your gzip binary, if found |
441 | |
442 | =head2 $ae->bin_unzip |
443 | |
444 | Returns the full path to your unzip binary, if found |
445 | |
446 | =cut |
447 | |
448 | ### paths to commandline tools ### |
1dae2fb5 |
449 | sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} } |
450 | sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} } |
451 | sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} } |
452 | sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} } |
453 | sub bin_uncompress { return $PROGRAMS->{'uncompress'} |
454 | if $PROGRAMS->{'uncompress'} } |
520c99e2 |
455 | |
456 | ################################# |
457 | # |
458 | # Untar code |
459 | # |
460 | ################################# |
461 | |
462 | |
463 | ### untar wrapper... goes to either Archive::Tar or /bin/tar |
464 | ### depending on $PREFER_BIN |
465 | sub _untar { |
466 | my $self = shift; |
467 | |
468 | ### bzip2 support in A::T via IO::Uncompress::Bzip2 |
469 | my @methods = qw[_untar_at _untar_bin]; |
574b415d |
470 | @methods = reverse @methods if $PREFER_BIN; |
520c99e2 |
471 | |
472 | for my $method (@methods) { |
473 | $self->_extractor($method) && return 1 if $self->$method(); |
474 | } |
475 | |
476 | return $self->_error(loc("Unable to untar file '%1'", $self->archive)); |
477 | } |
478 | |
479 | ### use /bin/tar to extract ### |
480 | sub _untar_bin { |
481 | my $self = shift; |
482 | |
483 | ### check for /bin/tar ### |
484 | return $self->_error(loc("No '%1' program found", '/bin/tar')) |
485 | unless $self->bin_tar; |
486 | |
487 | ### check for /bin/gzip if we need it ### |
488 | return $self->_error(loc("No '%1' program found", '/bin/gzip')) |
489 | if $self->is_tgz && !$self->bin_gzip; |
490 | |
491 | return $self->_error(loc("No '%1' program found", '/bin/bunzip2')) |
492 | if $self->is_tbz && !$self->bin_bunzip2; |
493 | |
494 | ### XXX figure out how to make IPC::Run do this in one call -- |
495 | ### currently i don't know how to get output of a command after a pipe |
496 | ### trapped in a scalar. Mailed barries about this 5th of june 2004. |
497 | |
498 | |
499 | |
500 | ### see what command we should run, based on whether |
501 | ### it's a .tgz or .tar |
502 | |
503 | ### XXX solaris tar and bsdtar are having different outputs |
504 | ### depending whether you run with -x or -t |
505 | ### compensate for this insanity by running -t first, then -x |
506 | { my $cmd = |
507 | $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|', |
508 | $self->bin_tar, '-tf', '-'] : |
509 | $self->is_tbz ? [$self->bin_bunzip2, '-c', $self->archive, '|', |
510 | $self->bin_tar, '-tf', '-'] : |
511 | [$self->bin_tar, '-tf', $self->archive]; |
512 | |
513 | ### run the command ### |
514 | my $buffer = ''; |
515 | unless( scalar run( command => $cmd, |
516 | buffer => \$buffer, |
517 | verbose => $DEBUG ) |
518 | ) { |
519 | return $self->_error(loc( |
520 | "Error listing contents of archive '%1': %2", |
521 | $self->archive, $buffer )); |
522 | } |
523 | |
524 | ### no buffers available? |
525 | if( !IPC::Cmd->can_capture_buffer and !$buffer ) { |
526 | $self->_error( $self->_no_buffer_files( $self->archive ) ); |
527 | |
528 | } else { |
529 | ### if we're on solaris we /might/ be using /bin/tar, which has |
530 | ### a weird output format... we might also be using |
531 | ### /usr/local/bin/tar, which is gnu tar, which is perfectly |
532 | ### fine... so we have to do some guessing here =/ |
533 | my @files = map { chomp; |
534 | !ON_SOLARIS ? $_ |
535 | : (m|^ x \s+ # 'xtract' -- sigh |
536 | (.+?), # the actual file name |
537 | \s+ [\d,.]+ \s bytes, |
538 | \s+ [\d,.]+ \s tape \s blocks |
539 | |x ? $1 : $_); |
540 | |
541 | } split $/, $buffer; |
542 | |
543 | ### store the files that are in the archive ### |
544 | $self->files(\@files); |
545 | } |
546 | } |
547 | |
548 | ### now actually extract it ### |
549 | { my $cmd = |
550 | $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|', |
551 | $self->bin_tar, '-xf', '-'] : |
552 | $self->is_tbz ? [$self->bin_bunzip2, '-c', $self->archive, '|', |
553 | $self->bin_tar, '-xf', '-'] : |
554 | [$self->bin_tar, '-xf', $self->archive]; |
555 | |
556 | my $buffer = ''; |
557 | unless( scalar run( command => $cmd, |
558 | buffer => \$buffer, |
559 | verbose => $DEBUG ) |
560 | ) { |
561 | return $self->_error(loc("Error extracting archive '%1': %2", |
562 | $self->archive, $buffer )); |
563 | } |
564 | |
565 | ### we might not have them, due to lack of buffers |
566 | if( $self->files ) { |
567 | ### now that we've extracted, figure out where we extracted to |
568 | my $dir = $self->__get_extract_dir( $self->files ); |
569 | |
570 | ### store the extraction dir ### |
571 | $self->extract_path( $dir ); |
572 | } |
573 | } |
574 | |
575 | ### we got here, no error happened |
576 | return 1; |
577 | } |
578 | |
579 | ### use archive::tar to extract ### |
580 | sub _untar_at { |
581 | my $self = shift; |
582 | |
583 | ### we definitely need A::T, so load that first |
584 | { my $use_list = { 'Archive::Tar' => '0.0' }; |
585 | |
586 | unless( can_load( modules => $use_list ) ) { |
587 | |
588 | return $self->_error(loc("You do not have '%1' installed - " . |
589 | "Please install it as soon as possible.", |
590 | 'Archive::Tar')); |
591 | } |
592 | } |
593 | |
594 | ### we might pass it a filehandle if it's a .tbz file.. |
595 | my $fh_to_read = $self->archive; |
596 | |
597 | ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib |
598 | ### if A::T's version is 0.99 or higher |
599 | if( $self->is_tgz ) { |
600 | my $use_list = { 'Compress::Zlib' => '0.0' }; |
601 | $use_list->{ 'IO::Zlib' } = '0.0' |
602 | if $Archive::Tar::VERSION >= '0.99'; |
603 | |
604 | unless( can_load( modules => $use_list ) ) { |
605 | my $which = join '/', sort keys %$use_list; |
606 | |
607 | return $self->_error(loc( |
608 | "You do not have '%1' installed - Please ". |
609 | "install it as soon as possible.", $which)); |
610 | |
611 | } |
612 | } elsif ( $self->is_tbz ) { |
613 | my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' }; |
614 | unless( can_load( modules => $use_list ) ) { |
615 | return $self->_error(loc( |
616 | "You do not have '%1' installed - Please " . |
617 | "install it as soon as possible.", |
618 | 'IO::Uncompress::Bunzip2')); |
619 | } |
620 | |
621 | my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or |
622 | return $self->_error(loc("Unable to open '%1': %2", |
623 | $self->archive, |
624 | $IO::Uncompress::Bunzip2::Bunzip2Error)); |
625 | |
626 | $fh_to_read = $bz; |
627 | } |
628 | |
629 | my $tar = Archive::Tar->new(); |
630 | |
631 | ### only tell it it's compressed if it's a .tgz, as we give it a file |
632 | ### handle if it's a .tbz |
633 | unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) { |
634 | return $self->_error(loc("Unable to read '%1': %2", $self->archive, |
635 | $Archive::Tar::error)); |
636 | } |
637 | |
638 | ### workaround to prevent Archive::Tar from setting uid, which |
639 | ### is a potential security hole. -autrijus |
640 | ### have to do it here, since A::T needs to be /loaded/ first ### |
641 | { no strict 'refs'; local $^W; |
642 | |
643 | ### older versions of archive::tar <= 0.23 |
644 | *Archive::Tar::chown = sub {}; |
645 | } |
646 | |
647 | ### for version of archive::tar > 1.04 |
648 | local $Archive::Tar::Constant::CHOWN = 0; |
649 | |
650 | { local $^W; # quell 'splice() offset past end of array' warnings |
651 | # on older versions of A::T |
652 | |
653 | ### older archive::tar always returns $self, return value slightly |
654 | ### fux0r3d because of it. |
655 | $tar->extract() |
656 | or return $self->_error(loc("Unable to extract '%1': %2", |
657 | $self->archive, $Archive::Tar::error )); |
658 | } |
659 | |
660 | my @files = $tar->list_files; |
661 | my $dir = $self->__get_extract_dir( \@files ); |
662 | |
663 | ### store the files that are in the archive ### |
664 | $self->files(\@files); |
665 | |
666 | ### store the extraction dir ### |
667 | $self->extract_path( $dir ); |
668 | |
669 | ### check if the dir actually appeared ### |
670 | return 1 if -d $self->extract_path; |
671 | |
672 | ### no dir, we failed ### |
673 | return $self->_error(loc("Unable to extract '%1': %2", |
674 | $self->archive, $Archive::Tar::error )); |
675 | } |
676 | |
677 | ################################# |
678 | # |
679 | # Gunzip code |
680 | # |
681 | ################################# |
682 | |
683 | ### gunzip wrapper... goes to either Compress::Zlib or /bin/gzip |
684 | ### depending on $PREFER_BIN |
685 | sub _gunzip { |
686 | my $self = shift; |
687 | |
688 | my @methods = qw[_gunzip_cz _gunzip_bin]; |
689 | @methods = reverse @methods if $PREFER_BIN; |
690 | |
691 | for my $method (@methods) { |
692 | $self->_extractor($method) && return 1 if $self->$method(); |
693 | } |
694 | |
695 | return $self->_error(loc("Unable to gunzip file '%1'", $self->archive)); |
696 | } |
697 | |
698 | sub _gunzip_bin { |
699 | my $self = shift; |
700 | |
701 | ### check for /bin/gzip -- we need it ### |
702 | return $self->_error(loc("No '%1' program found", '/bin/gzip')) |
703 | unless $self->bin_gzip; |
704 | |
705 | |
706 | my $fh = FileHandle->new('>'. $self->_gunzip_to) or |
707 | return $self->_error(loc("Could not open '%1' for writing: %2", |
708 | $self->_gunzip_to, $! )); |
709 | |
710 | my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ]; |
711 | |
712 | my $buffer; |
713 | unless( scalar run( command => $cmd, |
714 | verbose => $DEBUG, |
715 | buffer => \$buffer ) |
716 | ) { |
717 | return $self->_error(loc("Unable to gunzip '%1': %2", |
718 | $self->archive, $buffer)); |
719 | } |
720 | |
721 | ### no buffers available? |
722 | if( !IPC::Cmd->can_capture_buffer and !$buffer ) { |
723 | $self->_error( $self->_no_buffer_content( $self->archive ) ); |
724 | } |
725 | |
726 | print $fh $buffer if defined $buffer; |
727 | |
728 | close $fh; |
729 | |
730 | ### set what files where extract, and where they went ### |
731 | $self->files( [$self->_gunzip_to] ); |
732 | $self->extract_path( File::Spec->rel2abs(cwd()) ); |
733 | |
734 | return 1; |
735 | } |
736 | |
737 | sub _gunzip_cz { |
738 | my $self = shift; |
739 | |
740 | my $use_list = { 'Compress::Zlib' => '0.0' }; |
741 | unless( can_load( modules => $use_list ) ) { |
742 | return $self->_error(loc("You do not have '%1' installed - Please " . |
743 | "install it as soon as possible.", 'Compress::Zlib')); |
744 | } |
745 | |
746 | my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or |
747 | return $self->_error(loc("Unable to open '%1': %2", |
748 | $self->archive, $Compress::Zlib::gzerrno)); |
749 | |
750 | my $fh = FileHandle->new('>'. $self->_gunzip_to) or |
751 | return $self->_error(loc("Could not open '%1' for writing: %2", |
752 | $self->_gunzip_to, $! )); |
753 | |
754 | my $buffer; |
755 | $fh->print($buffer) while $gz->gzread($buffer) > 0; |
756 | $fh->close; |
757 | |
758 | ### set what files where extract, and where they went ### |
759 | $self->files( [$self->_gunzip_to] ); |
760 | $self->extract_path( File::Spec->rel2abs(cwd()) ); |
761 | |
762 | return 1; |
763 | } |
764 | |
765 | ################################# |
766 | # |
1dae2fb5 |
767 | # Uncompress code |
768 | # |
769 | ################################# |
770 | |
771 | |
772 | ### untar wrapper... goes to either Archive::Tar or /bin/tar |
773 | ### depending on $PREFER_BIN |
774 | sub _uncompress { |
775 | my $self = shift; |
776 | |
777 | my @methods = qw[_gunzip_cz _uncompress_bin]; |
778 | @methods = reverse @methods if $PREFER_BIN; |
779 | |
780 | for my $method (@methods) { |
781 | $self->_extractor($method) && return 1 if $self->$method(); |
782 | } |
783 | |
784 | return $self->_error(loc("Unable to untar file '%1'", $self->archive)); |
785 | } |
786 | |
787 | sub _uncompress_bin { |
788 | my $self = shift; |
789 | |
790 | ### check for /bin/gzip -- we need it ### |
791 | return $self->_error(loc("No '%1' program found", '/bin/uncompress')) |
792 | unless $self->bin_uncompress; |
793 | |
794 | |
795 | my $fh = FileHandle->new('>'. $self->_gunzip_to) or |
796 | return $self->_error(loc("Could not open '%1' for writing: %2", |
797 | $self->_gunzip_to, $! )); |
798 | |
799 | my $cmd = [ $self->bin_uncompress, '-c', $self->archive ]; |
800 | |
801 | my $buffer; |
802 | unless( scalar run( command => $cmd, |
803 | verbose => $DEBUG, |
804 | buffer => \$buffer ) |
805 | ) { |
806 | return $self->_error(loc("Unable to uncompress '%1': %2", |
807 | $self->archive, $buffer)); |
808 | } |
809 | |
810 | ### no buffers available? |
811 | if( !IPC::Cmd->can_capture_buffer and !$buffer ) { |
812 | $self->_error( $self->_no_buffer_content( $self->archive ) ); |
813 | } |
814 | |
815 | print $fh $buffer if defined $buffer; |
816 | |
817 | close $fh; |
818 | |
819 | ### set what files where extract, and where they went ### |
820 | $self->files( [$self->_gunzip_to] ); |
821 | $self->extract_path( File::Spec->rel2abs(cwd()) ); |
822 | |
823 | return 1; |
824 | } |
825 | |
826 | |
827 | ################################# |
828 | # |
520c99e2 |
829 | # Unzip code |
830 | # |
831 | ################################# |
832 | |
833 | ### unzip wrapper... goes to either Archive::Zip or /bin/unzip |
834 | ### depending on $PREFER_BIN |
835 | sub _unzip { |
836 | my $self = shift; |
837 | |
838 | my @methods = qw[_unzip_az _unzip_bin]; |
839 | @methods = reverse @methods if $PREFER_BIN; |
840 | |
841 | for my $method (@methods) { |
842 | $self->_extractor($method) && return 1 if $self->$method(); |
843 | } |
844 | |
845 | return $self->_error(loc("Unable to gunzip file '%1'", $self->archive)); |
846 | } |
847 | |
848 | sub _unzip_bin { |
849 | my $self = shift; |
850 | |
851 | ### check for /bin/gzip if we need it ### |
852 | return $self->_error(loc("No '%1' program found", '/bin/unzip')) |
853 | unless $self->bin_unzip; |
854 | |
855 | |
856 | ### first, get the files.. it must be 2 different commands with 'unzip' :( |
4f3b9739 |
857 | { my $cmd; |
858 | if (ON_VMS) { |
859 | $cmd = [ $self->bin_unzip, '"-Z"', '-1', $self->archive ]; |
860 | } else { |
861 | $cmd = [ $self->bin_unzip, '-Z', '-1', $self->archive ]; |
862 | } |
520c99e2 |
863 | |
864 | my $buffer = ''; |
865 | unless( scalar run( command => $cmd, |
866 | verbose => $DEBUG, |
867 | buffer => \$buffer ) |
868 | ) { |
869 | return $self->_error(loc("Unable to unzip '%1': %2", |
870 | $self->archive, $buffer)); |
871 | } |
872 | |
873 | ### no buffers available? |
874 | if( !IPC::Cmd->can_capture_buffer and !$buffer ) { |
875 | $self->_error( $self->_no_buffer_files( $self->archive ) ); |
876 | |
877 | } else { |
878 | $self->files( [split $/, $buffer] ); |
879 | } |
880 | } |
881 | |
882 | ### now, extract the archive ### |
883 | { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ]; |
884 | |
885 | my $buffer; |
886 | unless( scalar run( command => $cmd, |
887 | verbose => $DEBUG, |
888 | buffer => \$buffer ) |
889 | ) { |
890 | return $self->_error(loc("Unable to unzip '%1': %2", |
891 | $self->archive, $buffer)); |
892 | } |
893 | |
894 | if( scalar @{$self->files} ) { |
895 | my $files = $self->files; |
896 | my $dir = $self->__get_extract_dir( $files ); |
897 | |
898 | $self->extract_path( $dir ); |
899 | } |
900 | } |
901 | |
902 | return 1; |
903 | } |
904 | |
905 | sub _unzip_az { |
906 | my $self = shift; |
907 | |
908 | my $use_list = { 'Archive::Zip' => '0.0' }; |
909 | unless( can_load( modules => $use_list ) ) { |
910 | return $self->_error(loc("You do not have '%1' installed - Please " . |
911 | "install it as soon as possible.", 'Archive::Zip')); |
912 | } |
913 | |
914 | my $zip = Archive::Zip->new(); |
915 | |
916 | unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) { |
917 | return $self->_error(loc("Unable to read '%1'", $self->archive)); |
918 | } |
919 | |
920 | my @files; |
921 | ### have to extract every memeber individually ### |
922 | for my $member ($zip->members) { |
923 | push @files, $member->{fileName}; |
924 | |
925 | unless( $zip->extractMember($member) == &Archive::Zip::AZ_OK ) { |
926 | return $self->_error(loc("Extraction of '%1' from '%2' failed", |
927 | $member->{fileName}, $self->archive )); |
928 | } |
929 | } |
930 | |
931 | my $dir = $self->__get_extract_dir( \@files ); |
932 | |
933 | ### set what files where extract, and where they went ### |
934 | $self->files( \@files ); |
935 | $self->extract_path( File::Spec->rel2abs($dir) ); |
936 | |
937 | return 1; |
938 | } |
939 | |
940 | sub __get_extract_dir { |
941 | my $self = shift; |
942 | my $files = shift || []; |
943 | |
944 | return unless scalar @$files; |
945 | |
946 | my($dir1, $dir2); |
947 | for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) { |
948 | my($dir,$pos) = @$aref; |
949 | |
950 | ### add a catdir(), so that any trailing slashes get |
951 | ### take care of (removed) |
952 | ### also, a catdir() normalises './dir/foo' to 'dir/foo'; |
953 | ### which was the problem in bug #23999 |
954 | my $res = -d $files->[$pos] |
955 | ? File::Spec->catdir( $files->[$pos], '' ) |
956 | : File::Spec->catdir( dirname( $files->[$pos] ) ); |
957 | |
958 | $$dir = $res; |
959 | } |
960 | |
961 | ### if the first and last dir don't match, make sure the |
962 | ### dirname is not set wrongly |
963 | my $dir; |
964 | |
965 | ### dirs are the same, so we know for sure what the extract dir is |
966 | if( $dir1 eq $dir2 ) { |
967 | $dir = $dir1; |
968 | |
969 | ### dirs are different.. do they share the base dir? |
970 | ### if so, use that, if not, fall back to '.' |
971 | } else { |
972 | my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0]; |
973 | my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0]; |
974 | |
975 | $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' ); |
976 | } |
977 | |
978 | return File::Spec->rel2abs( $dir ); |
979 | } |
980 | |
981 | ################################# |
982 | # |
983 | # Bunzip2 code |
984 | # |
985 | ################################# |
986 | |
987 | ### bunzip2 wrapper... |
988 | sub _bunzip2 { |
989 | my $self = shift; |
990 | |
991 | my @methods = qw[_bunzip2_cz2 _bunzip2_bin]; |
992 | @methods = reverse @methods if $PREFER_BIN; |
993 | |
994 | for my $method (@methods) { |
995 | $self->_extractor($method) && return 1 if $self->$method(); |
996 | } |
997 | |
998 | return $self->_error(loc("Unable to bunzip2 file '%1'", $self->archive)); |
999 | } |
1000 | |
1001 | sub _bunzip2_bin { |
1002 | my $self = shift; |
1003 | |
1004 | ### check for /bin/gzip -- we need it ### |
1005 | return $self->_error(loc("No '%1' program found", '/bin/bunzip2')) |
1006 | unless $self->bin_bunzip2; |
1007 | |
1008 | |
1009 | my $fh = FileHandle->new('>'. $self->_gunzip_to) or |
1010 | return $self->_error(loc("Could not open '%1' for writing: %2", |
1011 | $self->_gunzip_to, $! )); |
1012 | |
1013 | my $cmd = [ $self->bin_bunzip2, '-c', $self->archive ]; |
1014 | |
1015 | my $buffer; |
1016 | unless( scalar run( command => $cmd, |
1017 | verbose => $DEBUG, |
1018 | buffer => \$buffer ) |
1019 | ) { |
1020 | return $self->_error(loc("Unable to bunzip2 '%1': %2", |
1021 | $self->archive, $buffer)); |
1022 | } |
1023 | |
1024 | ### no buffers available? |
1025 | if( !IPC::Cmd->can_capture_buffer and !$buffer ) { |
1026 | $self->_error( $self->_no_buffer_content( $self->archive ) ); |
1027 | } |
1028 | |
1029 | print $fh $buffer if defined $buffer; |
1030 | |
1031 | close $fh; |
1032 | |
1033 | ### set what files where extract, and where they went ### |
1034 | $self->files( [$self->_gunzip_to] ); |
1035 | $self->extract_path( File::Spec->rel2abs(cwd()) ); |
1036 | |
1037 | return 1; |
1038 | } |
1039 | |
1040 | ### using cz2, the compact versions... this we use mainly in archive::tar |
1041 | ### extractor.. |
1042 | # sub _bunzip2_cz1 { |
1043 | # my $self = shift; |
1044 | # |
1045 | # my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' }; |
1046 | # unless( can_load( modules => $use_list ) ) { |
1047 | # return $self->_error(loc("You do not have '%1' installed - Please " . |
1048 | # "install it as soon as possible.", |
1049 | # 'IO::Uncompress::Bunzip2')); |
1050 | # } |
1051 | # |
1052 | # my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or |
1053 | # return $self->_error(loc("Unable to open '%1': %2", |
1054 | # $self->archive, |
1055 | # $IO::Uncompress::Bunzip2::Bunzip2Error)); |
1056 | # |
1057 | # my $fh = FileHandle->new('>'. $self->_gunzip_to) or |
1058 | # return $self->_error(loc("Could not open '%1' for writing: %2", |
1059 | # $self->_gunzip_to, $! )); |
1060 | # |
1061 | # my $buffer; |
1062 | # $fh->print($buffer) while $bz->read($buffer) > 0; |
1063 | # $fh->close; |
1064 | # |
1065 | # ### set what files where extract, and where they went ### |
1066 | # $self->files( [$self->_gunzip_to] ); |
1067 | # $self->extract_path( File::Spec->rel2abs(cwd()) ); |
1068 | # |
1069 | # return 1; |
1070 | # } |
1071 | |
1072 | sub _bunzip2_cz2 { |
1073 | my $self = shift; |
1074 | |
1075 | my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' }; |
1076 | unless( can_load( modules => $use_list ) ) { |
1077 | return $self->_error(loc("You do not have '%1' installed - Please " . |
1078 | "install it as soon as possible.", |
1079 | 'IO::Uncompress::Bunzip2')); |
1080 | } |
1081 | |
1082 | IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to) |
1083 | or return $self->_error(loc("Unable to uncompress '%1': %2", |
1084 | $self->archive, |
1085 | $IO::Uncompress::Bunzip2::Bunzip2Error)); |
1086 | |
1087 | ### set what files where extract, and where they went ### |
1088 | $self->files( [$self->_gunzip_to] ); |
1089 | $self->extract_path( File::Spec->rel2abs(cwd()) ); |
1090 | |
1091 | return 1; |
1092 | } |
1093 | |
1094 | |
1095 | ################################# |
1096 | # |
1097 | # Error code |
1098 | # |
1099 | ################################# |
1100 | |
1101 | sub _error { |
1102 | my $self = shift; |
1103 | my $error = shift; |
1104 | |
1105 | $self->_error_msg( $error ); |
1106 | $self->_error_msg_long( Carp::longmess($error) ); |
1107 | |
1108 | ### set $Archive::Extract::WARN to 0 to disable printing |
1109 | ### of errors |
1110 | if( $WARN ) { |
1111 | carp $DEBUG ? $self->_error_msg_long : $self->_error_msg; |
1112 | } |
1113 | |
1114 | return; |
1115 | } |
1116 | |
1117 | sub error { |
1118 | my $self = shift; |
1119 | return shift() ? $self->_error_msg_long : $self->_error_msg; |
1120 | } |
1121 | |
1122 | sub _no_buffer_files { |
1123 | my $self = shift; |
1124 | my $file = shift or return; |
1125 | return loc("No buffer captured, unable to tell ". |
1126 | "extracted files or extraction dir for '%1'", $file); |
1127 | } |
1128 | |
1129 | sub _no_buffer_content { |
1130 | my $self = shift; |
1131 | my $file = shift or return; |
1132 | return loc("No buffer captured, unable to get content for '%1'", $file); |
1133 | } |
1134 | 1; |
1135 | |
1136 | =pod |
1137 | |
1138 | =head1 HOW IT WORKS |
1139 | |
1140 | C<Archive::Extract> tries first to determine what type of archive you |
1141 | are passing it, by inspecting its suffix. It does not do this by using |
1142 | Mime magic, or something related. See C<CAVEATS> below. |
1143 | |
1144 | Once it has determined the file type, it knows which extraction methods |
1145 | it can use on the archive. It will try a perl solution first, then fall |
1146 | back to a commandline tool if that fails. If that also fails, it will |
1147 | return false, indicating it was unable to extract the archive. |
1148 | See the section on C<GLOBAL VARIABLES> to see how to alter this order. |
1149 | |
1150 | =head1 CAVEATS |
1151 | |
1152 | =head2 File Extensions |
1153 | |
1154 | C<Archive::Extract> trusts on the extension of the archive to determine |
1155 | what type it is, and what extractor methods therefore can be used. If |
1156 | your archives do not have any of the extensions as described in the |
1157 | C<new()> method, you will have to specify the type explicitly, or |
1158 | C<Archive::Extract> will not be able to extract the archive for you. |
1159 | |
520c99e2 |
1160 | =head1 GLOBAL VARIABLES |
1161 | |
1162 | =head2 $Archive::Extract::DEBUG |
1163 | |
1164 | Set this variable to C<true> to have all calls to command line tools |
1165 | be printed out, including all their output. |
1166 | This also enables C<Carp::longmess> errors, instead of the regular |
1167 | C<carp> errors. |
1168 | |
1169 | Good for tracking down why things don't work with your particular |
1170 | setup. |
1171 | |
1172 | Defaults to C<false>. |
1173 | |
1174 | =head2 $Archive::Extract::WARN |
1175 | |
1176 | This variable controls whether errors encountered internally by |
1177 | C<Archive::Extract> should be C<carp>'d or not. |
1178 | |
1179 | Set to false to silence warnings. Inspect the output of the C<error()> |
1180 | method manually to see what went wrong. |
1181 | |
1182 | Defaults to C<true>. |
1183 | |
1184 | =head2 $Archive::Extract::PREFER_BIN |
1185 | |
1186 | This variables controls whether C<Archive::Extract> should prefer the |
1187 | use of perl modules, or commandline tools to extract archives. |
1188 | |
1189 | Set to C<true> to have C<Archive::Extract> prefer commandline tools. |
1190 | |
1191 | Defaults to C<false>. |
1192 | |
1193 | =head1 TODO |
1194 | |
1195 | =over 4 |
1196 | |
1197 | =item Mime magic support |
1198 | |
1199 | Maybe this module should use something like C<File::Type> to determine |
1200 | the type, rather than blindly trust the suffix. |
1201 | |
1dae2fb5 |
1202 | =back |
1203 | |
574b415d |
1204 | =head1 BUG REPORTS |
520c99e2 |
1205 | |
574b415d |
1206 | Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>. |
520c99e2 |
1207 | |
574b415d |
1208 | =head1 AUTHOR |
1209 | |
1210 | This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. |
520c99e2 |
1211 | |
574b415d |
1212 | =head1 COPYRIGHT |
520c99e2 |
1213 | |
574b415d |
1214 | This library is free software; you may redistribute and/or modify it |
1215 | under the same terms as Perl itself. |
520c99e2 |
1216 | |
1217 | =cut |
1218 | |
1219 | # Local variables: |
1220 | # c-indentation-style: bsd |
1221 | # c-basic-offset: 4 |
1222 | # indent-tabs-mode: nil |
1223 | # End: |
1224 | # vim: expandtab shiftwidth=4: |
1225 | |