1 package Archive::Extract;
7 use IPC::Cmd qw[run can_run];
9 use File::Path qw[mkpath];
11 use File::Basename qw[dirname basename];
12 use Params::Check qw[check];
13 use Module::Load::Conditional qw[can_load check_install];
14 use Locale::Maketext::Simple Style => 'gettext';
16 ### solaris has silly /bin/tar output ###
17 use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
18 use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
20 ### VMS may require quoting upper case command options
21 use constant ON_VMS => $^O eq 'VMS' ? 1 : 0;
23 ### Windows needs special treatment of Tar options
24 use constant ON_WIN32 => $^O eq 'MSWin32' ? 1 : 0;
26 ### we can't use this extraction method, because of missing
28 use constant METHOD_NA => [];
30 ### If these are changed, update @TYPES and the new() POD
31 use constant TGZ => 'tgz';
32 use constant TAR => 'tar';
33 use constant GZ => 'gz';
34 use constant ZIP => 'zip';
35 use constant BZ2 => 'bz2';
36 use constant TBZ => 'tbz';
37 use constant Z => 'Z';
38 use constant LZMA => 'lzma';
40 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG
41 $_ALLOW_BIN $_ALLOW_PURE_PERL
48 $_ALLOW_PURE_PERL = 1; # allow pure perl extractors
49 $_ALLOW_BIN = 1; # allow binary extractors
51 # same as all constants
52 my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA );
54 local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
60 Archive::Extract - A generic archive extracting mechanism
66 ### build an Archive::Extract object ###
67 my $ae = Archive::Extract->new( archive => 'foo.tgz' );
69 ### extract to cwd() ###
70 my $ok = $ae->extract;
72 ### extract to /tmp ###
73 my $ok = $ae->extract( to => '/tmp' );
75 ### what if something went wrong?
76 my $ok = $ae->extract or die $ae->error;
78 ### files from the archive ###
79 my $files = $ae->files;
81 ### dir that was extracted to ###
82 my $outdir = $ae->extract_path;
85 ### quick check methods ###
86 $ae->is_tar # is it a .tar file?
87 $ae->is_tgz # is it a .tar.gz or .tgz file?
88 $ae->is_gz; # is it a .gz file?
89 $ae->is_zip; # is it a .zip file?
90 $ae->is_bz2; # is it a .bz2 file?
91 $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
92 $ae->is_lzma; # is it a .lzma file?
94 ### absolute path to the archive you provided ###
97 ### commandline tools, if found ###
98 $ae->bin_tar # path to /bin/tar, if found
99 $ae->bin_gzip # path to /bin/gzip, if found
100 $ae->bin_unzip # path to /bin/unzip, if found
101 $ae->bin_bunzip2 # path to /bin/bunzip2 if found
102 $ae->bin_unlzma # path to /bin/unlzma if found
106 Archive::Extract is a generic archive extraction mechanism.
108 It allows you to extract any archive file of the type .tar, .tar.gz,
109 .gz, .Z, tar.bz2, .tbz, .bz2, .zip or .lzma without having to worry how it
110 does so, or use different interfaces for each type by using either
111 perl modules, or commandline tools on your system.
113 See the C<HOW IT WORKS> section further down for details.
118 ### see what /bin/programs are available ###
120 for my $pgm (qw[tar unzip gzip bunzip2 uncompress unlzma]) {
121 $PROGRAMS->{$pgm} = can_run($pgm);
124 ### mapping from types to extractor methods ###
125 my $Mapping = { # binary program # pure perl module
126 is_tgz => { bin => '_untar_bin', pp => '_untar_at' },
127 is_tar => { bin => '_untar_bin', pp => '_untar_at' },
128 is_gz => { bin => '_gunzip_bin', pp => '_gunzip_cz' },
129 is_zip => { bin => '_unzip_bin', pp => '_unzip_az' },
130 is_tbz => { bin => '_untar_bin', pp => '_untar_at' },
131 is_bz2 => { bin => '_bunzip2_bin', pp => '_bunzip2_bz2'},
132 is_Z => { bin => '_uncompress_bin', pp => '_gunzip_cz' },
133 is_lzma => { bin => '_unlzma_bin', pp => '_unlzma_cz' },
136 { ### use subs so we re-generate array refs etc for the no-overide flags
137 ### if we don't, then we reuse the same arrayref, meaning objects store
140 archive => sub { { required => 1, allow => FILE_EXISTS } },
141 type => sub { { default => '', allow => [ @Types ] } },
142 _error_msg => sub { { no_override => 1, default => [] } },
143 _error_msg_long => sub { { no_override => 1, default => [] } },
146 ### build accesssors ###
147 for my $method( keys %$tmpl,
148 qw[_extractor _gunzip_to files extract_path],
153 $self->{$method} = $_[0] if @_;
154 return $self->{$method};
160 =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
162 Creates a new C<Archive::Extract> object based on the archive file you
163 passed it. Automatically determines the type of archive based on the
164 extension, but you can override that by explicitly providing the
167 Valid values for C<type> are:
173 Standard tar files, as produced by, for example, C</bin/tar>.
174 Corresponds to a C<.tar> suffix.
178 Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
179 Corresponds to a C<.tgz> or C<.tar.gz> suffix.
183 Gzip compressed file, as produced by, for example C</bin/gzip>.
184 Corresponds to a C<.gz> suffix.
188 Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
189 Corresponds to a C<.Z> suffix.
193 Zip compressed file, as produced by, for example C</bin/zip>.
194 Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
198 Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
199 Corresponds to a C<.bz2> suffix.
203 Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
204 Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
208 Lzma compressed file, as produced by C</bin/lzma>.
209 Corresponds to a C<.lzma> suffix.
213 Returns a C<Archive::Extract> object on success, or false on failure.
222 ### see above why we use subs here and generate the template;
223 ### it's basically to not re-use arrayrefs
224 my %utmpl = map { $_ => $tmpl->{$_}->() } keys %$tmpl;
226 my $parsed = check( \%utmpl, \%hash ) or return;
228 ### make sure we have an absolute path ###
229 my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
231 ### figure out the type, if it wasn't already specified ###
232 unless ( $parsed->{type} ) {
234 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ :
235 $ar =~ /.+?\.gz$/i ? GZ :
236 $ar =~ /.+?\.tar$/i ? TAR :
237 $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP :
238 $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i ? TBZ :
239 $ar =~ /.+?\.bz2$/i ? BZ2 :
240 $ar =~ /.+?\.Z$/ ? Z :
241 $ar =~ /.+?\.lzma$/ ? LZMA :
246 bless $parsed, $class;
248 ### don't know what type of file it is
249 ### XXX this *has* to be an object call, not a package call
250 return $parsed->_error(loc("Cannot determine file type for '%1'",
251 $parsed->{archive} )) unless $parsed->{type};
256 =head2 $ae->extract( [to => '/output/path'] )
258 Extracts the archive represented by the C<Archive::Extract> object to
259 the path of your choice as specified by the C<to> argument. Defaults to
262 Since C<.gz> files never hold a directory, but only a single file; if
263 the C<to> argument is an existing directory, the file is extracted
264 there, with its C<.gz> suffix stripped.
265 If the C<to> argument is not an existing directory, the C<to> argument
266 is understood to be a filename, if the archive type is C<gz>.
267 In the case that you did not specify a C<to> argument, the output
268 file will be the name of the archive file, stripped from its C<.gz>
269 suffix, in the current working directory.
271 C<extract> will try a pure perl solution first, and then fall back to
272 commandline tools if they are available. See the C<GLOBAL VARIABLES>
273 section below on how to alter this behaviour.
275 It will return true on success, and false on failure.
277 On success, it will also set the follow attributes in the object:
281 =item $ae->extract_path
283 This is the directory that the files where extracted to.
287 This is an array ref with the paths of all the files in the archive,
288 relative to the C<to> argument you specified.
289 To get the full path to an extracted file, you would use:
291 File::Spec->catfile( $to, $ae->files->[0] );
293 Note that all files from a tar archive will be in unix format, as per
294 the tar specification.
304 ### reset error messages
305 $self->_error_msg( [] );
306 $self->_error_msg_long( [] );
310 to => { default => '.', store => \$to }
313 check( $tmpl, \%hash ) or return;
315 ### so 'to' could be a file or a dir, depending on whether it's a .gz
316 ### file, or basically anything else.
317 ### so, check that, then act accordingly.
318 ### set an accessor specifically so _gunzip can know what file to extract
322 if( $self->is_gz or $self->is_bz2 or $self->is_Z or $self->is_lzma ) {
324 my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z|lzma)$//i;
329 $self->_gunzip_to( basename($cp) );
331 ### then it's a filename
334 $self->_gunzip_to( basename($to) );
337 ### not a foo.gz file
343 ### make the dir if it doesn't exist ###
345 eval { mkpath( $dir ) };
347 return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
351 ### get the current dir, to restore later ###
357 ### chdir to the target dir ###
358 unless( chdir $dir ) {
359 $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
360 $ok = 0; last EXTRACT;
363 ### set files to an empty array ref, so there's always an array
364 ### ref IN the accessor, to avoid errors like:
365 ### Can't use an undefined value as an ARRAY reference at
366 ### ../lib/Archive/Extract.pm line 742. (rt #19815)
369 ### find out the dispatch methods needed for this type of
370 ### archive. Do a $self->is_XXX to figure out the type, then
371 ### get the hashref with bin + pure perl dispatchers.
372 my ($map) = map { $Mapping->{$_} } grep { $self->$_ } keys %$Mapping;
374 ### add pure perl extractor if allowed & add bin extractor if allowed
376 push @methods, $map->{'pp'} if $_ALLOW_PURE_PERL;
377 push @methods, $map->{'bin'} if $_ALLOW_BIN;
379 ### reverse it if we prefer bin extractors
380 @methods = reverse @methods if $PREFER_BIN;
383 for my $method (@methods) {
384 print "# Extracting with ->$method\n" if $DEBUG;
386 my $rv = $self->$method;
388 ### a positive extraction
389 if( $rv and $rv ne METHOD_NA ) {
390 print "# Extraction succeeded\n" if $DEBUG;
391 $self->_extractor($method);
394 ### method is not available
395 } elsif ( $rv and $rv eq METHOD_NA ) {
396 print "# Extraction method not available\n" if $DEBUG;
399 print "# Extraction method failed\n" if $DEBUG;
404 ### warn something went wrong if we didn't get an extractor
405 unless( $self->_extractor ) {
406 my $diag = $fail ? loc("Extract failed due to errors") :
407 $na ? loc("Extract failed; no extractors available") :
410 $self->_error($diag);
415 ### and chdir back ###
416 unless( chdir $cwd ) {
417 $self->_error(loc("Could not chdir back to start dir '%1': %2'",
428 =head2 $ae->error([BOOL])
430 Returns the last encountered error as string.
431 Pass it a true value to get the C<Carp::longmess()> output instead.
433 =head2 $ae->extract_path
435 This is the directory the archive got extracted to.
436 See C<extract()> for details.
440 This is an array ref holding all the paths from the archive.
441 See C<extract()> for details.
445 This is the full path to the archive file represented by this
446 C<Archive::Extract> object.
450 This is the type of archive represented by this C<Archive::Extract>
451 object. See accessors below for an easier way to use this.
452 See the C<new()> method for details.
456 Returns a list of all known C<types> for C<Archive::Extract>'s
461 sub types { return @Types }
465 Returns true if the file is of type C<.tar.gz>.
466 See the C<new()> method for details.
470 Returns true if the file is of type C<.tar>.
471 See the C<new()> method for details.
475 Returns true if the file is of type C<.gz>.
476 See the C<new()> method for details.
480 Returns true if the file is of type C<.Z>.
481 See the C<new()> method for details.
485 Returns true if the file is of type C<.zip>.
486 See the C<new()> method for details.
490 Returns true if the file is of type C<.lzma>.
491 See the C<new()> method for details.
495 ### quick check methods ###
496 sub is_tgz { return $_[0]->type eq TGZ }
497 sub is_tar { return $_[0]->type eq TAR }
498 sub is_gz { return $_[0]->type eq GZ }
499 sub is_zip { return $_[0]->type eq ZIP }
500 sub is_tbz { return $_[0]->type eq TBZ }
501 sub is_bz2 { return $_[0]->type eq BZ2 }
502 sub is_Z { return $_[0]->type eq Z }
503 sub is_lzma { return $_[0]->type eq LZMA }
509 Returns the full path to your tar binary, if found.
513 Returns the full path to your gzip binary, if found
515 =head2 $ae->bin_unzip
517 Returns the full path to your unzip binary, if found
519 =head2 $ae->bin_unlzma
521 Returns the full path to your unlzma binary, if found
525 ### paths to commandline tools ###
526 sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} }
527 sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} }
528 sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} }
529 sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
530 sub bin_uncompress { return $PROGRAMS->{'uncompress'}
531 if $PROGRAMS->{'uncompress'} }
532 sub bin_unlzma { return $PROGRAMS->{'unlzma'} if $PROGRAMS->{'unlzma'} }
534 =head2 $bool = $ae->have_old_bunzip2
536 Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
537 require all archive names to end in C<.bz2> or it will not extract
538 them. This method checks if you have a recent version of C<bunzip2>
539 that allows any extension, or an older one that doesn't.
543 sub have_old_bunzip2 {
546 ### no bunzip2? no old bunzip2 either :)
547 return unless $self->bin_bunzip2;
549 ### if we can't run this, we can't be sure if it's too old or not
550 ### XXX stupid stupid stupid bunzip2 doesn't understand --version
551 ### is not a request to extract data:
552 ### $ bunzip2 --version
553 ### bzip2, a block-sorting file compressor. Version 1.0.2, 30-Dec-2001.
555 ### bunzip2: I won't read compressed data from a terminal.
556 ### bunzip2: For help, type: `bunzip2 --help'.
561 ### double hateful: bunzip2 --version also hangs if input is a pipe
562 ### See #32370: Archive::Extract will hang if stdin is a pipe [+PATCH]
563 ### So, we have to provide *another* argument which is a fake filename,
564 ### just so it wont try to read from stdin to print its version..
566 ### Even if the file exists, it won't clobber or change it.
569 command => [$self->bin_bunzip2, '--version', 'NoSuchFile'],
575 return unless $buffer;
577 my ($version) = $buffer =~ /version \s+ (\d+)/ix;
579 return 1 if $version < 1;
583 #################################
587 #################################
589 ### annoying issue with (gnu) tar on win32, as illustrated by this
590 ### bug: https://rt.cpan.org/Ticket/Display.html?id=40138
591 ### which shows that (gnu) tar will interpret a file name with a :
592 ### in it as a remote file name, so C:\tmp\foo.txt is interpreted
593 ### as a remote shell, and the extract fails.
595 if( ON_WIN32 and my $cmd = __PACKAGE__->bin_tar ) {
597 ### if this is gnu tar we are running, we need to use --force-local
598 push @ExtraTarFlags, '--force-local' if `$cmd --version` =~ /gnu tar/i;
602 ### use /bin/tar to extract ###
606 ### check for /bin/tar ###
607 ### check for /bin/gzip if we need it ###
608 ### if any of the binaries are not available, return NA
609 { my $diag = not $self->bin_tar ?
610 loc("No '%1' program found", '/bin/tar') :
611 $self->is_tgz && !$self->bin_gzip ?
612 loc("No '%1' program found", '/bin/gzip') :
613 $self->is_tbz && !$self->bin_bunzip2 ?
614 loc("No '%1' program found", '/bin/bunzip2') :
618 $self->_error( $diag );
623 ### XXX figure out how to make IPC::Run do this in one call --
624 ### currently i don't know how to get output of a command after a pipe
625 ### trapped in a scalar. Mailed barries about this 5th of june 2004.
627 ### see what command we should run, based on whether
628 ### it's a .tgz or .tar
630 ### XXX solaris tar and bsdtar are having different outputs
631 ### depending whether you run with -x or -t
632 ### compensate for this insanity by running -t first, then -x
634 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
635 $self->bin_tar, '-tf', '-'] :
636 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
637 $self->bin_tar, '-tf', '-'] :
638 [$self->bin_tar, @ExtraTarFlags, '-tf', $self->archive];
641 ### newer versions of 'tar' (1.21 and up) now print record size
642 ### to STDERR as well if v OR t is given (used to be both). This
643 ### is a 'feature' according to the changelog, so we must now only
644 ### inspect STDOUT, otherwise, failures like these occur:
645 ### nntp.perl.org/group/perl.cpan.testers/2009/02/msg3230366.html
647 my @out = run( command => $cmd,
651 ### command was unsuccessful
653 return $self->_error(loc(
654 "Error listing contents of archive '%1': %2",
655 $self->archive, $buffer ));
658 ### no buffers available?
659 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
660 $self->_error( $self->_no_buffer_files( $self->archive ) );
663 ### if we're on solaris we /might/ be using /bin/tar, which has
664 ### a weird output format... we might also be using
665 ### /usr/local/bin/tar, which is gnu tar, which is perfectly
666 ### fine... so we have to do some guessing here =/
667 my @files = map { chomp;
669 : (m|^ x \s+ # 'xtract' -- sigh
670 (.+?), # the actual file name
671 \s+ [\d,.]+ \s bytes,
672 \s+ [\d,.]+ \s tape \s blocks
675 ### only STDOUT, see above. Sometims, extra whitespace
676 ### is present, so make sure we only pick lines with
678 } grep { length } map { split $/, $_ } @{$out[3]};
680 ### store the files that are in the archive ###
681 $self->files(\@files);
685 ### now actually extract it ###
687 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
688 $self->bin_tar, '-xf', '-'] :
689 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
690 $self->bin_tar, '-xf', '-'] :
691 [$self->bin_tar, @ExtraTarFlags, '-xf', $self->archive];
694 unless( scalar run( command => $cmd,
698 return $self->_error(loc("Error extracting archive '%1': %2",
699 $self->archive, $buffer ));
702 ### we might not have them, due to lack of buffers
704 ### now that we've extracted, figure out where we extracted to
705 my $dir = $self->__get_extract_dir( $self->files );
707 ### store the extraction dir ###
708 $self->extract_path( $dir );
712 ### we got here, no error happened
718 ### use archive::tar to extract ###
722 ### Loading Archive::Tar is going to set it to 1, so make it local
723 ### within this block, starting with its initial value. Whatever
724 ### Achive::Tar does will be undone when we return.
726 ### Also, later, set $Archive::Tar::WARN to $Archive::Extract::WARN
727 ### so users don't have to even think about this variable. If they
728 ### do, they still get their set value outside of this call.
729 local $Archive::Tar::WARN = $Archive::Tar::WARN;
731 ### we definitely need Archive::Tar, so load that first
732 { my $use_list = { 'Archive::Tar' => '0.0' };
734 unless( can_load( modules => $use_list ) ) {
736 $self->_error(loc("You do not have '%1' installed - " .
737 "Please install it as soon as possible.",
744 ### we might pass it a filehandle if it's a .tbz file..
745 my $fh_to_read = $self->archive;
747 ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
748 ### if A::T's version is 0.99 or higher
749 if( $self->is_tgz ) {
750 my $use_list = { 'Compress::Zlib' => '0.0' };
751 $use_list->{ 'IO::Zlib' } = '0.0'
752 if $Archive::Tar::VERSION >= '0.99';
754 unless( can_load( modules => $use_list ) ) {
755 my $which = join '/', sort keys %$use_list;
758 "You do not have '%1' installed - Please ".
759 "install it as soon as possible.", $which)
765 } elsif ( $self->is_tbz ) {
766 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
767 unless( can_load( modules => $use_list ) ) {
769 "You do not have '%1' installed - Please " .
770 "install it as soon as possible.",
771 'IO::Uncompress::Bunzip2')
777 my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
778 return $self->_error(loc("Unable to open '%1': %2",
780 $IO::Uncompress::Bunzip2::Bunzip2Error));
785 ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've
786 ### localized $Archive::Tar::WARN already.
787 $Archive::Tar::WARN = $Archive::Extract::WARN;
789 my $tar = Archive::Tar->new();
791 ### only tell it it's compressed if it's a .tgz, as we give it a file
792 ### handle if it's a .tbz
793 unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) {
794 return $self->_error(loc("Unable to read '%1': %2", $self->archive,
795 $Archive::Tar::error));
798 ### workaround to prevent Archive::Tar from setting uid, which
799 ### is a potential security hole. -autrijus
800 ### have to do it here, since A::T needs to be /loaded/ first ###
801 { no strict 'refs'; local $^W;
803 ### older versions of archive::tar <= 0.23
804 *Archive::Tar::chown = sub {};
807 ### for version of Archive::Tar > 1.04
808 local $Archive::Tar::CHOWN = 0;
810 { local $^W; # quell 'splice() offset past end of array' warnings
811 # on older versions of A::T
813 ### older archive::tar always returns $self, return value slightly
814 ### fux0r3d because of it.
816 or return $self->_error(loc("Unable to extract '%1': %2",
817 $self->archive, $Archive::Tar::error ));
820 my @files = $tar->list_files;
821 my $dir = $self->__get_extract_dir( \@files );
823 ### store the files that are in the archive ###
824 $self->files(\@files);
826 ### store the extraction dir ###
827 $self->extract_path( $dir );
829 ### check if the dir actually appeared ###
830 return 1 if -d $self->extract_path;
832 ### no dir, we failed ###
833 return $self->_error(loc("Unable to extract '%1': %2",
834 $self->archive, $Archive::Tar::error ));
837 #################################
841 #################################
846 ### check for /bin/gzip -- we need it ###
847 unless( $self->bin_gzip ) {
848 $self->_error(loc("No '%1' program found", '/bin/gzip'));
852 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
853 return $self->_error(loc("Could not open '%1' for writing: %2",
854 $self->_gunzip_to, $! ));
856 my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
859 unless( scalar run( command => $cmd,
863 return $self->_error(loc("Unable to gunzip '%1': %2",
864 $self->archive, $buffer));
867 ### no buffers available?
868 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
869 $self->_error( $self->_no_buffer_content( $self->archive ) );
872 print $fh $buffer if defined $buffer;
876 ### set what files where extract, and where they went ###
877 $self->files( [$self->_gunzip_to] );
878 $self->extract_path( File::Spec->rel2abs(cwd()) );
886 my $use_list = { 'Compress::Zlib' => '0.0' };
887 unless( can_load( modules => $use_list ) ) {
888 $self->_error(loc("You do not have '%1' installed - Please " .
889 "install it as soon as possible.", 'Compress::Zlib'));
893 my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
894 return $self->_error(loc("Unable to open '%1': %2",
895 $self->archive, $Compress::Zlib::gzerrno));
897 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
898 return $self->_error(loc("Could not open '%1' for writing: %2",
899 $self->_gunzip_to, $! ));
902 $fh->print($buffer) while $gz->gzread($buffer) > 0;
905 ### set what files where extract, and where they went ###
906 $self->files( [$self->_gunzip_to] );
907 $self->extract_path( File::Spec->rel2abs(cwd()) );
912 #################################
916 #################################
918 sub _uncompress_bin {
921 ### check for /bin/gzip -- we need it ###
922 unless( $self->bin_uncompress ) {
923 $self->_error(loc("No '%1' program found", '/bin/uncompress'));
927 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
928 return $self->_error(loc("Could not open '%1' for writing: %2",
929 $self->_gunzip_to, $! ));
931 my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
934 unless( scalar run( command => $cmd,
938 return $self->_error(loc("Unable to uncompress '%1': %2",
939 $self->archive, $buffer));
942 ### no buffers available?
943 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
944 $self->_error( $self->_no_buffer_content( $self->archive ) );
947 print $fh $buffer if defined $buffer;
951 ### set what files where extract, and where they went ###
952 $self->files( [$self->_gunzip_to] );
953 $self->extract_path( File::Spec->rel2abs(cwd()) );
959 #################################
963 #################################
969 ### check for /bin/gzip if we need it ###
970 unless( $self->bin_unzip ) {
971 $self->_error(loc("No '%1' program found", '/bin/unzip'));
975 ### first, get the files.. it must be 2 different commands with 'unzip' :(
976 { ### on VMS, capital letter options have to be quoted. This is
977 ### peported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
978 ### Subject: [patch@31735]Archive Extract fix on VMS.
979 my $opt = ON_VMS ? '"-Z"' : '-Z';
980 my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
983 unless( scalar run( command => $cmd,
987 return $self->_error(loc("Unable to unzip '%1': %2",
988 $self->archive, $buffer));
991 ### no buffers available?
992 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
993 $self->_error( $self->_no_buffer_files( $self->archive ) );
996 $self->files( [split $/, $buffer] );
1000 ### now, extract the archive ###
1001 { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
1004 unless( scalar run( command => $cmd,
1006 buffer => \$buffer )
1008 return $self->_error(loc("Unable to unzip '%1': %2",
1009 $self->archive, $buffer));
1012 if( scalar @{$self->files} ) {
1013 my $files = $self->files;
1014 my $dir = $self->__get_extract_dir( $files );
1016 $self->extract_path( $dir );
1026 my $use_list = { 'Archive::Zip' => '0.0' };
1027 unless( can_load( modules => $use_list ) ) {
1028 $self->_error(loc("You do not have '%1' installed - Please " .
1029 "install it as soon as possible.", 'Archive::Zip'));
1033 my $zip = Archive::Zip->new();
1035 unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
1036 return $self->_error(loc("Unable to read '%1'", $self->archive));
1042 ### Address: #43278: Explicitly tell Archive::Zip where to put the files:
1043 ### "In my BackPAN indexing, Archive::Zip was extracting things
1044 ### in my script's directory instead of the current working directory.
1045 ### I traced this back through Archive::Zip::_asLocalName which
1046 ### eventually calls File::Spec::Win32::rel2abs which on Windows might
1047 ### call Cwd::getdcwd. getdcwd returns the wrong directory in my
1048 ### case, even though I think I'm on the same drive.
1050 ### To fix this, I pass the optional second argument to
1051 ### extractMember using the cwd from Archive::Extract." --bdfoy
1053 ## store cwd() before looping; calls to cwd() can be expensive, and
1054 ### it won't change during the loop
1055 my $extract_dir = cwd();
1057 ### have to extract every member individually ###
1058 for my $member ($zip->members) {
1059 push @files, $member->{fileName};
1061 ### file to extact to, to avoid the above problem
1062 my $to = File::Spec->catfile( $extract_dir, $member->{fileName} );
1064 unless( $zip->extractMember($member, $to) == &Archive::Zip::AZ_OK ) {
1065 return $self->_error(loc("Extraction of '%1' from '%2' failed",
1066 $member->{fileName}, $self->archive ));
1070 my $dir = $self->__get_extract_dir( \@files );
1072 ### set what files where extract, and where they went ###
1073 $self->files( \@files );
1074 $self->extract_path( File::Spec->rel2abs($dir) );
1079 sub __get_extract_dir {
1081 my $files = shift || [];
1083 return unless scalar @$files;
1086 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
1087 my($dir,$pos) = @$aref;
1089 ### add a catdir(), so that any trailing slashes get
1090 ### take care of (removed)
1091 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
1092 ### which was the problem in bug #23999
1093 my $res = -d $files->[$pos]
1094 ? File::Spec->catdir( $files->[$pos], '' )
1095 : File::Spec->catdir( dirname( $files->[$pos] ) );
1100 ### if the first and last dir don't match, make sure the
1101 ### dirname is not set wrongly
1104 ### dirs are the same, so we know for sure what the extract dir is
1105 if( $dir1 eq $dir2 ) {
1108 ### dirs are different.. do they share the base dir?
1109 ### if so, use that, if not, fall back to '.'
1111 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1112 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
1114 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1117 return File::Spec->rel2abs( $dir );
1120 #################################
1124 #################################
1129 ### check for /bin/gzip -- we need it ###
1130 unless( $self->bin_bunzip2 ) {
1131 $self->_error(loc("No '%1' program found", '/bin/bunzip2'));
1135 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1136 return $self->_error(loc("Could not open '%1' for writing: %2",
1137 $self->_gunzip_to, $! ));
1139 ### guard against broken bunzip2. See ->have_old_bunzip2()
1141 if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1142 return $self->_error(loc("Your bunzip2 version is too old and ".
1143 "can only extract files ending in '%1'",
1147 my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
1150 unless( scalar run( command => $cmd,
1152 buffer => \$buffer )
1154 return $self->_error(loc("Unable to bunzip2 '%1': %2",
1155 $self->archive, $buffer));
1158 ### no buffers available?
1159 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1160 $self->_error( $self->_no_buffer_content( $self->archive ) );
1163 print $fh $buffer if defined $buffer;
1167 ### set what files where extract, and where they went ###
1168 $self->files( [$self->_gunzip_to] );
1169 $self->extract_path( File::Spec->rel2abs(cwd()) );
1174 ### using cz2, the compact versions... this we use mainly in archive::tar
1176 # sub _bunzip2_cz1 {
1179 # my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1180 # unless( can_load( modules => $use_list ) ) {
1181 # return $self->_error(loc("You do not have '%1' installed - Please " .
1182 # "install it as soon as possible.",
1183 # 'IO::Uncompress::Bunzip2'));
1186 # my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1187 # return $self->_error(loc("Unable to open '%1': %2",
1189 # $IO::Uncompress::Bunzip2::Bunzip2Error));
1191 # my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1192 # return $self->_error(loc("Could not open '%1' for writing: %2",
1193 # $self->_gunzip_to, $! ));
1196 # $fh->print($buffer) while $bz->read($buffer) > 0;
1199 # ### set what files where extract, and where they went ###
1200 # $self->files( [$self->_gunzip_to] );
1201 # $self->extract_path( File::Spec->rel2abs(cwd()) );
1209 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1210 unless( can_load( modules => $use_list ) ) {
1211 $self->_error(loc("You do not have '%1' installed - Please " .
1212 "install it as soon as possible.",
1213 'IO::Uncompress::Bunzip2'));
1217 IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1218 or return $self->_error(loc("Unable to uncompress '%1': %2",
1220 $IO::Uncompress::Bunzip2::Bunzip2Error));
1222 ### set what files where extract, and where they went ###
1223 $self->files( [$self->_gunzip_to] );
1224 $self->extract_path( File::Spec->rel2abs(cwd()) );
1230 #################################
1234 #################################
1239 ### check for /bin/unlzma -- we need it ###
1240 unless( $self->bin_unlzma ) {
1241 $self->_error(loc("No '%1' program found", '/bin/unlzma'));
1245 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1246 return $self->_error(loc("Could not open '%1' for writing: %2",
1247 $self->_gunzip_to, $! ));
1249 my $cmd = [ $self->bin_unlzma, '-c', $self->archive ];
1252 unless( scalar run( command => $cmd,
1254 buffer => \$buffer )
1256 return $self->_error(loc("Unable to unlzma '%1': %2",
1257 $self->archive, $buffer));
1260 ### no buffers available?
1261 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1262 $self->_error( $self->_no_buffer_content( $self->archive ) );
1265 print $fh $buffer if defined $buffer;
1269 ### set what files where extract, and where they went ###
1270 $self->files( [$self->_gunzip_to] );
1271 $self->extract_path( File::Spec->rel2abs(cwd()) );
1279 my $use_list = { 'Compress::unLZMA' => '0.0' };
1280 unless( can_load( modules => $use_list ) ) {
1281 $self->_error(loc("You do not have '%1' installed - Please " .
1282 "install it as soon as possible.", 'Compress::unLZMA'));
1286 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1287 return $self->_error(loc("Could not open '%1' for writing: %2",
1288 $self->_gunzip_to, $! ));
1291 $buffer = Compress::unLZMA::uncompressfile( $self->archive );
1292 unless ( defined $buffer ) {
1293 return $self->_error(loc("Could not unlzma '%1': %2",
1294 $self->archive, $@));
1297 print $fh $buffer if defined $buffer;
1301 ### set what files where extract, and where they went ###
1302 $self->files( [$self->_gunzip_to] );
1303 $self->extract_path( File::Spec->rel2abs(cwd()) );
1308 #################################
1312 #################################
1317 my $lerror = Carp::longmess($error);
1319 push @{$self->_error_msg}, $error;
1320 push @{$self->_error_msg_long}, $lerror;
1322 ### set $Archive::Extract::WARN to 0 to disable printing
1325 carp $DEBUG ? $lerror : $error;
1334 ### make sure we have a fallback aref
1337 ? $self->_error_msg_long
1341 return join $/, @$aref;
1344 sub _no_buffer_files {
1346 my $file = shift or return;
1347 return loc("No buffer captured, unable to tell ".
1348 "extracted files or extraction dir for '%1'", $file);
1351 sub _no_buffer_content {
1353 my $file = shift or return;
1354 return loc("No buffer captured, unable to get content for '%1'", $file);
1362 C<Archive::Extract> tries first to determine what type of archive you
1363 are passing it, by inspecting its suffix. It does not do this by using
1364 Mime magic, or something related. See C<CAVEATS> below.
1366 Once it has determined the file type, it knows which extraction methods
1367 it can use on the archive. It will try a perl solution first, then fall
1368 back to a commandline tool if that fails. If that also fails, it will
1369 return false, indicating it was unable to extract the archive.
1370 See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1374 =head2 File Extensions
1376 C<Archive::Extract> trusts on the extension of the archive to determine
1377 what type it is, and what extractor methods therefore can be used. If
1378 your archives do not have any of the extensions as described in the
1379 C<new()> method, you will have to specify the type explicitly, or
1380 C<Archive::Extract> will not be able to extract the archive for you.
1382 =head2 Supporting Very Large Files
1384 C<Archive::Extract> can use either pure perl modules or command line
1385 programs under the hood. Some of the pure perl modules (like
1386 C<Archive::Tar> and Compress::unLZMA) take the entire contents of the archive into memory,
1387 which may not be feasible on your system. Consider setting the global
1388 variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1389 the use of command line programs and won't consume so much memory.
1391 See the C<GLOBAL VARIABLES> section below for details.
1393 =head2 Bunzip2 support of arbitrary extensions.
1395 Older versions of C</bin/bunzip2> do not support arbitrary file
1396 extensions and insist on a C<.bz2> suffix. Although we do our best
1397 to guard against this, if you experience a bunzip2 error, it may
1398 be related to this. For details, please see the C<have_old_bunzip2>
1401 =head1 GLOBAL VARIABLES
1403 =head2 $Archive::Extract::DEBUG
1405 Set this variable to C<true> to have all calls to command line tools
1406 be printed out, including all their output.
1407 This also enables C<Carp::longmess> errors, instead of the regular
1410 Good for tracking down why things don't work with your particular
1413 Defaults to C<false>.
1415 =head2 $Archive::Extract::WARN
1417 This variable controls whether errors encountered internally by
1418 C<Archive::Extract> should be C<carp>'d or not.
1420 Set to false to silence warnings. Inspect the output of the C<error()>
1421 method manually to see what went wrong.
1423 Defaults to C<true>.
1425 =head2 $Archive::Extract::PREFER_BIN
1427 This variables controls whether C<Archive::Extract> should prefer the
1428 use of perl modules, or commandline tools to extract archives.
1430 Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1432 Defaults to C<false>.
1434 =head1 TODO / CAVEATS
1438 =item Mime magic support
1440 Maybe this module should use something like C<File::Type> to determine
1441 the type, rather than blindly trust the suffix.
1445 Currently, C<Archive::Extract> does a C<chdir> to the extraction dir before
1446 extraction, and a C<chdir> back again after. This is not necessarily
1447 thread safe. See C<rt.cpan.org> bug C<#45671> for details.
1453 Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>.
1457 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1461 This library is free software; you may redistribute and/or modify it
1462 under the same terms as Perl itself.
1467 # c-indentation-style: bsd
1469 # indent-tabs-mode: nil
1471 # vim: expandtab shiftwidth=4: