1 package Archive::Extract;
7 use IPC::Cmd qw[run can_run];
9 use File::Path qw[mkpath];
11 use File::Basename qw[dirname basename];
12 use Params::Check qw[check];
13 use Module::Load::Conditional qw[can_load check_install];
14 use Locale::Maketext::Simple Style => 'gettext';
16 ### solaris has silly /bin/tar output ###
17 use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
18 use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
20 ### VMS may require quoting upper case command options
21 use constant ON_VMS => $^O eq 'VMS' ? 1 : 0;
23 ### Windows needs special treatment of Tar options
24 use constant ON_WIN32 => $^O eq 'MSWin32' ? 1 : 0;
26 ### we can't use this extraction method, because of missing
28 use constant METHOD_NA => [];
30 ### If these are changed, update @TYPES and the new() POD
31 use constant TGZ => 'tgz';
32 use constant TAR => 'tar';
33 use constant GZ => 'gz';
34 use constant ZIP => 'zip';
35 use constant BZ2 => 'bz2';
36 use constant TBZ => 'tbz';
37 use constant Z => 'Z';
38 use constant LZMA => 'lzma';
40 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG
41 $_ALLOW_BIN $_ALLOW_PURE_PERL
48 $_ALLOW_PURE_PERL = 1; # allow pure perl extractors
49 $_ALLOW_BIN = 1; # allow binary extractors
51 # same as all constants
52 my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA );
54 local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
60 Archive::Extract - A generic archive extracting mechanism
66 ### build an Archive::Extract object ###
67 my $ae = Archive::Extract->new( archive => 'foo.tgz' );
69 ### extract to cwd() ###
70 my $ok = $ae->extract;
72 ### extract to /tmp ###
73 my $ok = $ae->extract( to => '/tmp' );
75 ### what if something went wrong?
76 my $ok = $ae->extract or die $ae->error;
78 ### files from the archive ###
79 my $files = $ae->files;
81 ### dir that was extracted to ###
82 my $outdir = $ae->extract_path;
85 ### quick check methods ###
86 $ae->is_tar # is it a .tar file?
87 $ae->is_tgz # is it a .tar.gz or .tgz file?
88 $ae->is_gz; # is it a .gz file?
89 $ae->is_zip; # is it a .zip file?
90 $ae->is_bz2; # is it a .bz2 file?
91 $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
92 $ae->is_lzma; # is it a .lzma file?
94 ### absolute path to the archive you provided ###
97 ### commandline tools, if found ###
98 $ae->bin_tar # path to /bin/tar, if found
99 $ae->bin_gzip # path to /bin/gzip, if found
100 $ae->bin_unzip # path to /bin/unzip, if found
101 $ae->bin_bunzip2 # path to /bin/bunzip2 if found
102 $ae->bin_unlzma # path to /bin/unlzma if found
106 Archive::Extract is a generic archive extraction mechanism.
108 It allows you to extract any archive file of the type .tar, .tar.gz,
109 .gz, .Z, tar.bz2, .tbz, .bz2, .zip or .lzma without having to worry how it
110 does so, or use different interfaces for each type by using either
111 perl modules, or commandline tools on your system.
113 See the C<HOW IT WORKS> section further down for details.
118 ### see what /bin/programs are available ###
120 for my $pgm (qw[tar unzip gzip bunzip2 uncompress unlzma]) {
121 $PROGRAMS->{$pgm} = can_run($pgm);
124 ### mapping from types to extractor methods ###
125 my $Mapping = { # binary program # pure perl module
126 is_tgz => { bin => '_untar_bin', pp => '_untar_at' },
127 is_tar => { bin => '_untar_bin', pp => '_untar_at' },
128 is_gz => { bin => '_gunzip_bin', pp => '_gunzip_cz' },
129 is_zip => { bin => '_unzip_bin', pp => '_unzip_az' },
130 is_tbz => { bin => '_untar_bin', pp => '_untar_at' },
131 is_bz2 => { bin => '_bunzip2_bin', pp => '_bunzip2_bz2'},
132 is_Z => { bin => '_uncompress_bin', pp => '_gunzip_cz' },
133 is_lzma => { bin => '_unlzma_bin', pp => '_unlzma_cz' },
136 { ### use subs so we re-generate array refs etc for the no-overide flags
137 ### if we don't, then we reuse the same arrayref, meaning objects store
140 archive => sub { { required => 1, allow => FILE_EXISTS } },
141 type => sub { { default => '', allow => [ @Types ] } },
142 _error_msg => sub { { no_override => 1, default => [] } },
143 _error_msg_long => sub { { no_override => 1, default => [] } },
146 ### build accesssors ###
147 for my $method( keys %$tmpl,
148 qw[_extractor _gunzip_to files extract_path],
153 $self->{$method} = $_[0] if @_;
154 return $self->{$method};
160 =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
162 Creates a new C<Archive::Extract> object based on the archive file you
163 passed it. Automatically determines the type of archive based on the
164 extension, but you can override that by explicitly providing the
167 Valid values for C<type> are:
173 Standard tar files, as produced by, for example, C</bin/tar>.
174 Corresponds to a C<.tar> suffix.
178 Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
179 Corresponds to a C<.tgz> or C<.tar.gz> suffix.
183 Gzip compressed file, as produced by, for example C</bin/gzip>.
184 Corresponds to a C<.gz> suffix.
188 Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
189 Corresponds to a C<.Z> suffix.
193 Zip compressed file, as produced by, for example C</bin/zip>.
194 Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
198 Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
199 Corresponds to a C<.bz2> suffix.
203 Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
204 Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
208 Lzma compressed file, as produced by C</bin/lzma>.
209 Corresponds to a C<.lzma> suffix.
213 Returns a C<Archive::Extract> object on success, or false on failure.
222 ### see above why we use subs here and generate the template;
223 ### it's basically to not re-use arrayrefs
224 my %utmpl = map { $_ => $tmpl->{$_}->() } keys %$tmpl;
226 my $parsed = check( \%utmpl, \%hash ) or return;
228 ### make sure we have an absolute path ###
229 my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
231 ### figure out the type, if it wasn't already specified ###
232 unless ( $parsed->{type} ) {
234 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ :
235 $ar =~ /.+?\.gz$/i ? GZ :
236 $ar =~ /.+?\.tar$/i ? TAR :
237 $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP :
238 $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i ? TBZ :
239 $ar =~ /.+?\.bz2$/i ? BZ2 :
240 $ar =~ /.+?\.Z$/ ? Z :
241 $ar =~ /.+?\.lzma$/ ? LZMA :
246 bless $parsed, $class;
248 ### don't know what type of file it is
249 ### XXX this *has* to be an object call, not a package call
250 return $parsed->_error(loc("Cannot determine file type for '%1'",
251 $parsed->{archive} )) unless $parsed->{type};
256 =head2 $ae->extract( [to => '/output/path'] )
258 Extracts the archive represented by the C<Archive::Extract> object to
259 the path of your choice as specified by the C<to> argument. Defaults to
262 Since C<.gz> files never hold a directory, but only a single file; if
263 the C<to> argument is an existing directory, the file is extracted
264 there, with its C<.gz> suffix stripped.
265 If the C<to> argument is not an existing directory, the C<to> argument
266 is understood to be a filename, if the archive type is C<gz>.
267 In the case that you did not specify a C<to> argument, the output
268 file will be the name of the archive file, stripped from its C<.gz>
269 suffix, in the current working directory.
271 C<extract> will try a pure perl solution first, and then fall back to
272 commandline tools if they are available. See the C<GLOBAL VARIABLES>
273 section below on how to alter this behaviour.
275 It will return true on success, and false on failure.
277 On success, it will also set the follow attributes in the object:
281 =item $ae->extract_path
283 This is the directory that the files where extracted to.
287 This is an array ref with the paths of all the files in the archive,
288 relative to the C<to> argument you specified.
289 To get the full path to an extracted file, you would use:
291 File::Spec->catfile( $to, $ae->files->[0] );
293 Note that all files from a tar archive will be in unix format, as per
294 the tar specification.
304 ### reset error messages
305 $self->_error_msg( [] );
306 $self->_error_msg_long( [] );
310 to => { default => '.', store => \$to }
313 check( $tmpl, \%hash ) or return;
315 ### so 'to' could be a file or a dir, depending on whether it's a .gz
316 ### file, or basically anything else.
317 ### so, check that, then act accordingly.
318 ### set an accessor specifically so _gunzip can know what file to extract
322 if( $self->is_gz or $self->is_bz2 or $self->is_Z or $self->is_lzma ) {
324 my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z|lzma)$//i;
329 $self->_gunzip_to( basename($cp) );
331 ### then it's a filename
334 $self->_gunzip_to( basename($to) );
337 ### not a foo.gz file
343 ### make the dir if it doesn't exist ###
345 eval { mkpath( $dir ) };
347 return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
351 ### get the current dir, to restore later ###
357 ### chdir to the target dir ###
358 unless( chdir $dir ) {
359 $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
360 $ok = 0; last EXTRACT;
363 ### set files to an empty array ref, so there's always an array
364 ### ref IN the accessor, to avoid errors like:
365 ### Can't use an undefined value as an ARRAY reference at
366 ### ../lib/Archive/Extract.pm line 742. (rt #19815)
369 ### find out the dispatch methods needed for this type of
370 ### archive. Do a $self->is_XXX to figure out the type, then
371 ### get the hashref with bin + pure perl dispatchers.
372 my ($map) = map { $Mapping->{$_} } grep { $self->$_ } keys %$Mapping;
374 ### add pure perl extractor if allowed & add bin extractor if allowed
376 push @methods, $map->{'pp'} if $_ALLOW_PURE_PERL;
377 push @methods, $map->{'bin'} if $_ALLOW_BIN;
379 ### reverse it if we prefer bin extractors
380 @methods = reverse @methods if $PREFER_BIN;
383 for my $method (@methods) {
384 print "# Extracting with ->$method\n" if $DEBUG;
386 my $rv = $self->$method;
388 ### a positive extraction
389 if( $rv and $rv ne METHOD_NA ) {
390 print "# Extraction succeeded\n" if $DEBUG;
391 $self->_extractor($method);
394 ### method is not available
395 } elsif ( $rv and $rv eq METHOD_NA ) {
396 print "# Extraction method not available\n" if $DEBUG;
399 print "# Extraction method failed\n" if $DEBUG;
404 ### warn something went wrong if we didn't get an extractor
405 unless( $self->_extractor ) {
406 my $diag = $fail ? loc("Extract failed due to errors") :
407 $na ? loc("Extract failed; no extractors available") :
410 $self->_error($diag);
415 ### and chdir back ###
416 unless( chdir $cwd ) {
417 $self->_error(loc("Could not chdir back to start dir '%1': %2'",
428 =head2 $ae->error([BOOL])
430 Returns the last encountered error as string.
431 Pass it a true value to get the C<Carp::longmess()> output instead.
433 =head2 $ae->extract_path
435 This is the directory the archive got extracted to.
436 See C<extract()> for details.
440 This is an array ref holding all the paths from the archive.
441 See C<extract()> for details.
445 This is the full path to the archive file represented by this
446 C<Archive::Extract> object.
450 This is the type of archive represented by this C<Archive::Extract>
451 object. See accessors below for an easier way to use this.
452 See the C<new()> method for details.
456 Returns a list of all known C<types> for C<Archive::Extract>'s
461 sub types { return @Types }
465 Returns true if the file is of type C<.tar.gz>.
466 See the C<new()> method for details.
470 Returns true if the file is of type C<.tar>.
471 See the C<new()> method for details.
475 Returns true if the file is of type C<.gz>.
476 See the C<new()> method for details.
480 Returns true if the file is of type C<.Z>.
481 See the C<new()> method for details.
485 Returns true if the file is of type C<.zip>.
486 See the C<new()> method for details.
490 Returns true if the file is of type C<.lzma>.
491 See the C<new()> method for details.
495 ### quick check methods ###
496 sub is_tgz { return $_[0]->type eq TGZ }
497 sub is_tar { return $_[0]->type eq TAR }
498 sub is_gz { return $_[0]->type eq GZ }
499 sub is_zip { return $_[0]->type eq ZIP }
500 sub is_tbz { return $_[0]->type eq TBZ }
501 sub is_bz2 { return $_[0]->type eq BZ2 }
502 sub is_Z { return $_[0]->type eq Z }
503 sub is_lzma { return $_[0]->type eq LZMA }
509 Returns the full path to your tar binary, if found.
513 Returns the full path to your gzip binary, if found
515 =head2 $ae->bin_unzip
517 Returns the full path to your unzip binary, if found
519 =head2 $ae->bin_unlzma
521 Returns the full path to your unlzma binary, if found
525 ### paths to commandline tools ###
526 sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} }
527 sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} }
528 sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} }
529 sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
530 sub bin_uncompress { return $PROGRAMS->{'uncompress'}
531 if $PROGRAMS->{'uncompress'} }
532 sub bin_unlzma { return $PROGRAMS->{'unlzma'} if $PROGRAMS->{'unlzma'} }
534 =head2 $bool = $ae->have_old_bunzip2
536 Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
537 require all archive names to end in C<.bz2> or it will not extract
538 them. This method checks if you have a recent version of C<bunzip2>
539 that allows any extension, or an older one that doesn't.
543 sub have_old_bunzip2 {
546 ### no bunzip2? no old bunzip2 either :)
547 return unless $self->bin_bunzip2;
549 ### if we can't run this, we can't be sure if it's too old or not
550 ### XXX stupid stupid stupid bunzip2 doesn't understand --version
551 ### is not a request to extract data:
552 ### $ bunzip2 --version
553 ### bzip2, a block-sorting file compressor. Version 1.0.2, 30-Dec-2001.
555 ### bunzip2: I won't read compressed data from a terminal.
556 ### bunzip2: For help, type: `bunzip2 --help'.
561 ### double hateful: bunzip2 --version also hangs if input is a pipe
562 ### See #32370: Archive::Extract will hang if stdin is a pipe [+PATCH]
563 ### So, we have to provide *another* argument which is a fake filename,
564 ### just so it wont try to read from stdin to print its version..
566 ### Even if the file exists, it won't clobber or change it.
569 command => [$self->bin_bunzip2, '--version', 'NoSuchFile'],
575 return unless $buffer;
577 my ($version) = $buffer =~ /version \s+ (\d+)/ix;
579 return 1 if $version < 1;
583 #################################
587 #################################
589 ### annoying issue with (gnu) tar on win32, as illustrated by this
590 ### bug: https://rt.cpan.org/Ticket/Display.html?id=40138
591 ### which shows that (gnu) tar will interpret a file name with a :
592 ### in it as a remote file name, so C:\tmp\foo.txt is interpreted
593 ### as a remote shell, and the extract fails.
595 if( ON_WIN32 and my $cmd = __PACKAGE__->bin_tar ) {
597 ### if this is gnu tar we are running, we need to use --force-local
598 push @ExtraTarFlags, '--force-local' if `$cmd --version` =~ /gnu tar/i;
602 ### use /bin/tar to extract ###
606 ### check for /bin/tar ###
607 ### check for /bin/gzip if we need it ###
608 ### if any of the binaries are not available, return NA
609 { my $diag = not $self->bin_tar ?
610 loc("No '%1' program found", '/bin/tar') :
611 $self->is_tgz && !$self->bin_gzip ?
612 loc("No '%1' program found", '/bin/gzip') :
613 $self->is_tbz && !$self->bin_bunzip2 ?
614 loc("No '%1' program found", '/bin/bunzip2') :
618 $self->_error( $diag );
623 ### XXX figure out how to make IPC::Run do this in one call --
624 ### currently i don't know how to get output of a command after a pipe
625 ### trapped in a scalar. Mailed barries about this 5th of june 2004.
627 ### see what command we should run, based on whether
628 ### it's a .tgz or .tar
630 ### XXX solaris tar and bsdtar are having different outputs
631 ### depending whether you run with -x or -t
632 ### compensate for this insanity by running -t first, then -x
634 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
635 $self->bin_tar, '-tf', '-'] :
636 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
637 $self->bin_tar, '-tf', '-'] :
638 [$self->bin_tar, @ExtraTarFlags, '-tf', $self->archive];
640 ### run the command ###
642 unless( scalar run( command => $cmd,
646 return $self->_error(loc(
647 "Error listing contents of archive '%1': %2",
648 $self->archive, $buffer ));
651 ### no buffers available?
652 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
653 $self->_error( $self->_no_buffer_files( $self->archive ) );
656 ### if we're on solaris we /might/ be using /bin/tar, which has
657 ### a weird output format... we might also be using
658 ### /usr/local/bin/tar, which is gnu tar, which is perfectly
659 ### fine... so we have to do some guessing here =/
660 my @files = map { chomp;
662 : (m|^ x \s+ # 'xtract' -- sigh
663 (.+?), # the actual file name
664 \s+ [\d,.]+ \s bytes,
665 \s+ [\d,.]+ \s tape \s blocks
670 ### store the files that are in the archive ###
671 $self->files(\@files);
675 ### now actually extract it ###
677 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
678 $self->bin_tar, '-xf', '-'] :
679 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
680 $self->bin_tar, '-xf', '-'] :
681 [$self->bin_tar, @ExtraTarFlags, '-xf', $self->archive];
684 unless( scalar run( command => $cmd,
688 return $self->_error(loc("Error extracting archive '%1': %2",
689 $self->archive, $buffer ));
692 ### we might not have them, due to lack of buffers
694 ### now that we've extracted, figure out where we extracted to
695 my $dir = $self->__get_extract_dir( $self->files );
697 ### store the extraction dir ###
698 $self->extract_path( $dir );
702 ### we got here, no error happened
708 ### use archive::tar to extract ###
712 ### Loading Archive::Tar is going to set it to 1, so make it local
713 ### within this block, starting with its initial value. Whatever
714 ### Achive::Tar does will be undone when we return.
716 ### Also, later, set $Archive::Tar::WARN to $Archive::Extract::WARN
717 ### so users don't have to even think about this variable. If they
718 ### do, they still get their set value outside of this call.
719 local $Archive::Tar::WARN = $Archive::Tar::WARN;
721 ### we definitely need Archive::Tar, so load that first
722 { my $use_list = { 'Archive::Tar' => '0.0' };
724 unless( can_load( modules => $use_list ) ) {
726 $self->_error(loc("You do not have '%1' installed - " .
727 "Please install it as soon as possible.",
734 ### we might pass it a filehandle if it's a .tbz file..
735 my $fh_to_read = $self->archive;
737 ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
738 ### if A::T's version is 0.99 or higher
739 if( $self->is_tgz ) {
740 my $use_list = { 'Compress::Zlib' => '0.0' };
741 $use_list->{ 'IO::Zlib' } = '0.0'
742 if $Archive::Tar::VERSION >= '0.99';
744 unless( can_load( modules => $use_list ) ) {
745 my $which = join '/', sort keys %$use_list;
748 "You do not have '%1' installed - Please ".
749 "install it as soon as possible.", $which)
755 } elsif ( $self->is_tbz ) {
756 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
757 unless( can_load( modules => $use_list ) ) {
759 "You do not have '%1' installed - Please " .
760 "install it as soon as possible.",
761 'IO::Uncompress::Bunzip2')
767 my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
768 return $self->_error(loc("Unable to open '%1': %2",
770 $IO::Uncompress::Bunzip2::Bunzip2Error));
775 ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've
776 ### localized $Archive::Tar::WARN already.
777 $Archive::Tar::WARN = $Archive::Extract::WARN;
779 my $tar = Archive::Tar->new();
781 ### only tell it it's compressed if it's a .tgz, as we give it a file
782 ### handle if it's a .tbz
783 unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) {
784 return $self->_error(loc("Unable to read '%1': %2", $self->archive,
785 $Archive::Tar::error));
788 ### workaround to prevent Archive::Tar from setting uid, which
789 ### is a potential security hole. -autrijus
790 ### have to do it here, since A::T needs to be /loaded/ first ###
791 { no strict 'refs'; local $^W;
793 ### older versions of archive::tar <= 0.23
794 *Archive::Tar::chown = sub {};
797 ### for version of Archive::Tar > 1.04
798 local $Archive::Tar::CHOWN = 0;
800 { local $^W; # quell 'splice() offset past end of array' warnings
801 # on older versions of A::T
803 ### older archive::tar always returns $self, return value slightly
804 ### fux0r3d because of it.
806 or return $self->_error(loc("Unable to extract '%1': %2",
807 $self->archive, $Archive::Tar::error ));
810 my @files = $tar->list_files;
811 my $dir = $self->__get_extract_dir( \@files );
813 ### store the files that are in the archive ###
814 $self->files(\@files);
816 ### store the extraction dir ###
817 $self->extract_path( $dir );
819 ### check if the dir actually appeared ###
820 return 1 if -d $self->extract_path;
822 ### no dir, we failed ###
823 return $self->_error(loc("Unable to extract '%1': %2",
824 $self->archive, $Archive::Tar::error ));
827 #################################
831 #################################
836 ### check for /bin/gzip -- we need it ###
837 unless( $self->bin_gzip ) {
838 $self->_error(loc("No '%1' program found", '/bin/gzip'));
842 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
843 return $self->_error(loc("Could not open '%1' for writing: %2",
844 $self->_gunzip_to, $! ));
846 my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
849 unless( scalar run( command => $cmd,
853 return $self->_error(loc("Unable to gunzip '%1': %2",
854 $self->archive, $buffer));
857 ### no buffers available?
858 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
859 $self->_error( $self->_no_buffer_content( $self->archive ) );
862 print $fh $buffer if defined $buffer;
866 ### set what files where extract, and where they went ###
867 $self->files( [$self->_gunzip_to] );
868 $self->extract_path( File::Spec->rel2abs(cwd()) );
876 my $use_list = { 'Compress::Zlib' => '0.0' };
877 unless( can_load( modules => $use_list ) ) {
878 $self->_error(loc("You do not have '%1' installed - Please " .
879 "install it as soon as possible.", 'Compress::Zlib'));
883 my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
884 return $self->_error(loc("Unable to open '%1': %2",
885 $self->archive, $Compress::Zlib::gzerrno));
887 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
888 return $self->_error(loc("Could not open '%1' for writing: %2",
889 $self->_gunzip_to, $! ));
892 $fh->print($buffer) while $gz->gzread($buffer) > 0;
895 ### set what files where extract, and where they went ###
896 $self->files( [$self->_gunzip_to] );
897 $self->extract_path( File::Spec->rel2abs(cwd()) );
902 #################################
906 #################################
908 sub _uncompress_bin {
911 ### check for /bin/gzip -- we need it ###
912 unless( $self->bin_uncompress ) {
913 $self->_error(loc("No '%1' program found", '/bin/uncompress'));
917 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
918 return $self->_error(loc("Could not open '%1' for writing: %2",
919 $self->_gunzip_to, $! ));
921 my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
924 unless( scalar run( command => $cmd,
928 return $self->_error(loc("Unable to uncompress '%1': %2",
929 $self->archive, $buffer));
932 ### no buffers available?
933 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
934 $self->_error( $self->_no_buffer_content( $self->archive ) );
937 print $fh $buffer if defined $buffer;
941 ### set what files where extract, and where they went ###
942 $self->files( [$self->_gunzip_to] );
943 $self->extract_path( File::Spec->rel2abs(cwd()) );
949 #################################
953 #################################
959 ### check for /bin/gzip if we need it ###
960 unless( $self->bin_unzip ) {
961 $self->_error(loc("No '%1' program found", '/bin/unzip'));
965 ### first, get the files.. it must be 2 different commands with 'unzip' :(
966 { ### on VMS, capital letter options have to be quoted. This is
967 ### peported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
968 ### Subject: [patch@31735]Archive Extract fix on VMS.
969 my $opt = ON_VMS ? '"-Z"' : '-Z';
970 my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
973 unless( scalar run( command => $cmd,
977 return $self->_error(loc("Unable to unzip '%1': %2",
978 $self->archive, $buffer));
981 ### no buffers available?
982 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
983 $self->_error( $self->_no_buffer_files( $self->archive ) );
986 $self->files( [split $/, $buffer] );
990 ### now, extract the archive ###
991 { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
994 unless( scalar run( command => $cmd,
998 return $self->_error(loc("Unable to unzip '%1': %2",
999 $self->archive, $buffer));
1002 if( scalar @{$self->files} ) {
1003 my $files = $self->files;
1004 my $dir = $self->__get_extract_dir( $files );
1006 $self->extract_path( $dir );
1016 my $use_list = { 'Archive::Zip' => '0.0' };
1017 unless( can_load( modules => $use_list ) ) {
1018 $self->_error(loc("You do not have '%1' installed - Please " .
1019 "install it as soon as possible.", 'Archive::Zip'));
1023 my $zip = Archive::Zip->new();
1025 unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
1026 return $self->_error(loc("Unable to read '%1'", $self->archive));
1032 ### Address: #43278: Explicitly tell Archive::Zip where to put the files:
1033 ### "In my BackPAN indexing, Archive::Zip was extracting things
1034 ### in my script's directory instead of the current working directory.
1035 ### I traced this back through Archive::Zip::_asLocalName which
1036 ### eventually calls File::Spec::Win32::rel2abs which on Windows might
1037 ### call Cwd::getdcwd. getdcwd returns the wrong directory in my
1038 ### case, even though I think I'm on the same drive.
1040 ### To fix this, I pass the optional second argument to
1041 ### extractMember using the cwd from Archive::Extract." --bdfoy
1043 ## store cwd() before looping; calls to cwd() can be expensive, and
1044 ### it won't change during the loop
1045 my $extract_dir = cwd();
1047 ### have to extract every member individually ###
1048 for my $member ($zip->members) {
1049 push @files, $member->{fileName};
1051 ### file to extact to, to avoid the above problem
1052 my $to = File::Spec->catfile( $extract_dir, $member->{fileName} );
1054 unless( $zip->extractMember($member, $to) == &Archive::Zip::AZ_OK ) {
1055 return $self->_error(loc("Extraction of '%1' from '%2' failed",
1056 $member->{fileName}, $self->archive ));
1060 my $dir = $self->__get_extract_dir( \@files );
1062 ### set what files where extract, and where they went ###
1063 $self->files( \@files );
1064 $self->extract_path( File::Spec->rel2abs($dir) );
1069 sub __get_extract_dir {
1071 my $files = shift || [];
1073 return unless scalar @$files;
1076 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
1077 my($dir,$pos) = @$aref;
1079 ### add a catdir(), so that any trailing slashes get
1080 ### take care of (removed)
1081 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
1082 ### which was the problem in bug #23999
1083 my $res = -d $files->[$pos]
1084 ? File::Spec->catdir( $files->[$pos], '' )
1085 : File::Spec->catdir( dirname( $files->[$pos] ) );
1090 ### if the first and last dir don't match, make sure the
1091 ### dirname is not set wrongly
1094 ### dirs are the same, so we know for sure what the extract dir is
1095 if( $dir1 eq $dir2 ) {
1098 ### dirs are different.. do they share the base dir?
1099 ### if so, use that, if not, fall back to '.'
1101 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1102 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
1104 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1107 return File::Spec->rel2abs( $dir );
1110 #################################
1114 #################################
1119 ### check for /bin/gzip -- we need it ###
1120 unless( $self->bin_bunzip2 ) {
1121 $self->_error(loc("No '%1' program found", '/bin/bunzip2'));
1125 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1126 return $self->_error(loc("Could not open '%1' for writing: %2",
1127 $self->_gunzip_to, $! ));
1129 ### guard against broken bunzip2. See ->have_old_bunzip2()
1131 if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1132 return $self->_error(loc("Your bunzip2 version is too old and ".
1133 "can only extract files ending in '%1'",
1137 my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
1140 unless( scalar run( command => $cmd,
1142 buffer => \$buffer )
1144 return $self->_error(loc("Unable to bunzip2 '%1': %2",
1145 $self->archive, $buffer));
1148 ### no buffers available?
1149 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1150 $self->_error( $self->_no_buffer_content( $self->archive ) );
1153 print $fh $buffer if defined $buffer;
1157 ### set what files where extract, and where they went ###
1158 $self->files( [$self->_gunzip_to] );
1159 $self->extract_path( File::Spec->rel2abs(cwd()) );
1164 ### using cz2, the compact versions... this we use mainly in archive::tar
1166 # sub _bunzip2_cz1 {
1169 # my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1170 # unless( can_load( modules => $use_list ) ) {
1171 # return $self->_error(loc("You do not have '%1' installed - Please " .
1172 # "install it as soon as possible.",
1173 # 'IO::Uncompress::Bunzip2'));
1176 # my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1177 # return $self->_error(loc("Unable to open '%1': %2",
1179 # $IO::Uncompress::Bunzip2::Bunzip2Error));
1181 # my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1182 # return $self->_error(loc("Could not open '%1' for writing: %2",
1183 # $self->_gunzip_to, $! ));
1186 # $fh->print($buffer) while $bz->read($buffer) > 0;
1189 # ### set what files where extract, and where they went ###
1190 # $self->files( [$self->_gunzip_to] );
1191 # $self->extract_path( File::Spec->rel2abs(cwd()) );
1199 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1200 unless( can_load( modules => $use_list ) ) {
1201 $self->_error(loc("You do not have '%1' installed - Please " .
1202 "install it as soon as possible.",
1203 'IO::Uncompress::Bunzip2'));
1207 IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1208 or return $self->_error(loc("Unable to uncompress '%1': %2",
1210 $IO::Uncompress::Bunzip2::Bunzip2Error));
1212 ### set what files where extract, and where they went ###
1213 $self->files( [$self->_gunzip_to] );
1214 $self->extract_path( File::Spec->rel2abs(cwd()) );
1220 #################################
1224 #################################
1229 ### check for /bin/unlzma -- we need it ###
1230 unless( $self->bin_unlzma ) {
1231 $self->_error(loc("No '%1' program found", '/bin/unlzma'));
1235 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1236 return $self->_error(loc("Could not open '%1' for writing: %2",
1237 $self->_gunzip_to, $! ));
1239 my $cmd = [ $self->bin_unlzma, '-c', $self->archive ];
1242 unless( scalar run( command => $cmd,
1244 buffer => \$buffer )
1246 return $self->_error(loc("Unable to unlzma '%1': %2",
1247 $self->archive, $buffer));
1250 ### no buffers available?
1251 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1252 $self->_error( $self->_no_buffer_content( $self->archive ) );
1255 print $fh $buffer if defined $buffer;
1259 ### set what files where extract, and where they went ###
1260 $self->files( [$self->_gunzip_to] );
1261 $self->extract_path( File::Spec->rel2abs(cwd()) );
1269 my $use_list = { 'Compress::unLZMA' => '0.0' };
1270 unless( can_load( modules => $use_list ) ) {
1271 $self->_error(loc("You do not have '%1' installed - Please " .
1272 "install it as soon as possible.", 'Compress::unLZMA'));
1276 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1277 return $self->_error(loc("Could not open '%1' for writing: %2",
1278 $self->_gunzip_to, $! ));
1281 $buffer = Compress::unLZMA::uncompressfile( $self->archive );
1282 unless ( defined $buffer ) {
1283 return $self->_error(loc("Could not unlzma '%1': %2",
1284 $self->archive, $@));
1287 print $fh $buffer if defined $buffer;
1291 ### set what files where extract, and where they went ###
1292 $self->files( [$self->_gunzip_to] );
1293 $self->extract_path( File::Spec->rel2abs(cwd()) );
1298 #################################
1302 #################################
1307 my $lerror = Carp::longmess($error);
1309 push @{$self->_error_msg}, $error;
1310 push @{$self->_error_msg_long}, $lerror;
1312 ### set $Archive::Extract::WARN to 0 to disable printing
1315 carp $DEBUG ? $lerror : $error;
1324 ### make sure we have a fallback aref
1327 ? $self->_error_msg_long
1331 return join $/, @$aref;
1334 sub _no_buffer_files {
1336 my $file = shift or return;
1337 return loc("No buffer captured, unable to tell ".
1338 "extracted files or extraction dir for '%1'", $file);
1341 sub _no_buffer_content {
1343 my $file = shift or return;
1344 return loc("No buffer captured, unable to get content for '%1'", $file);
1352 C<Archive::Extract> tries first to determine what type of archive you
1353 are passing it, by inspecting its suffix. It does not do this by using
1354 Mime magic, or something related. See C<CAVEATS> below.
1356 Once it has determined the file type, it knows which extraction methods
1357 it can use on the archive. It will try a perl solution first, then fall
1358 back to a commandline tool if that fails. If that also fails, it will
1359 return false, indicating it was unable to extract the archive.
1360 See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1364 =head2 File Extensions
1366 C<Archive::Extract> trusts on the extension of the archive to determine
1367 what type it is, and what extractor methods therefore can be used. If
1368 your archives do not have any of the extensions as described in the
1369 C<new()> method, you will have to specify the type explicitly, or
1370 C<Archive::Extract> will not be able to extract the archive for you.
1372 =head2 Supporting Very Large Files
1374 C<Archive::Extract> can use either pure perl modules or command line
1375 programs under the hood. Some of the pure perl modules (like
1376 C<Archive::Tar> and Compress::unLZMA) take the entire contents of the archive into memory,
1377 which may not be feasible on your system. Consider setting the global
1378 variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1379 the use of command line programs and won't consume so much memory.
1381 See the C<GLOBAL VARIABLES> section below for details.
1383 =head2 Bunzip2 support of arbitrary extensions.
1385 Older versions of C</bin/bunzip2> do not support arbitrary file
1386 extensions and insist on a C<.bz2> suffix. Although we do our best
1387 to guard against this, if you experience a bunzip2 error, it may
1388 be related to this. For details, please see the C<have_old_bunzip2>
1391 =head1 GLOBAL VARIABLES
1393 =head2 $Archive::Extract::DEBUG
1395 Set this variable to C<true> to have all calls to command line tools
1396 be printed out, including all their output.
1397 This also enables C<Carp::longmess> errors, instead of the regular
1400 Good for tracking down why things don't work with your particular
1403 Defaults to C<false>.
1405 =head2 $Archive::Extract::WARN
1407 This variable controls whether errors encountered internally by
1408 C<Archive::Extract> should be C<carp>'d or not.
1410 Set to false to silence warnings. Inspect the output of the C<error()>
1411 method manually to see what went wrong.
1413 Defaults to C<true>.
1415 =head2 $Archive::Extract::PREFER_BIN
1417 This variables controls whether C<Archive::Extract> should prefer the
1418 use of perl modules, or commandline tools to extract archives.
1420 Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1422 Defaults to C<false>.
1428 =item Mime magic support
1430 Maybe this module should use something like C<File::Type> to determine
1431 the type, rather than blindly trust the suffix.
1437 Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>.
1441 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1445 This library is free software; you may redistribute and/or modify it
1446 under the same terms as Perl itself.
1451 # c-indentation-style: bsd
1453 # indent-tabs-mode: nil
1455 # vim: expandtab shiftwidth=4: