1 package Archive::Extract;
7 use IPC::Cmd qw[run can_run];
9 use File::Path qw[mkpath];
11 use File::Basename qw[dirname basename];
12 use Params::Check qw[check];
13 use Module::Load::Conditional qw[can_load check_install];
14 use Locale::Maketext::Simple Style => 'gettext';
16 ### solaris has silly /bin/tar output ###
17 use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
18 use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
20 ### VMS may require quoting upper case command options
21 use constant ON_VMS => $^O eq 'VMS' ? 1 : 0;
23 ### Windows needs special treatment of Tar options
24 use constant ON_WIN32 => $^O eq 'MSWin32' ? 1 : 0;
26 ### we can't use this extraction method, because of missing
28 use constant METHOD_NA => [];
30 ### If these are changed, update @TYPES and the new() POD
31 use constant TGZ => 'tgz';
32 use constant TAR => 'tar';
33 use constant GZ => 'gz';
34 use constant ZIP => 'zip';
35 use constant BZ2 => 'bz2';
36 use constant TBZ => 'tbz';
37 use constant Z => 'Z';
38 use constant LZMA => 'lzma';
40 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG
41 $_ALLOW_BIN $_ALLOW_PURE_PERL $_ALLOW_TAR_ITER
48 $_ALLOW_PURE_PERL = 1; # allow pure perl extractors
49 $_ALLOW_BIN = 1; # allow binary extractors
50 $_ALLOW_TAR_ITER = 1; # try to use Archive::Tar->iter if available
52 # same as all constants
53 my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA );
55 local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
61 Archive::Extract - A generic archive extracting mechanism
67 ### build an Archive::Extract object ###
68 my $ae = Archive::Extract->new( archive => 'foo.tgz' );
70 ### extract to cwd() ###
71 my $ok = $ae->extract;
73 ### extract to /tmp ###
74 my $ok = $ae->extract( to => '/tmp' );
76 ### what if something went wrong?
77 my $ok = $ae->extract or die $ae->error;
79 ### files from the archive ###
80 my $files = $ae->files;
82 ### dir that was extracted to ###
83 my $outdir = $ae->extract_path;
86 ### quick check methods ###
87 $ae->is_tar # is it a .tar file?
88 $ae->is_tgz # is it a .tar.gz or .tgz file?
89 $ae->is_gz; # is it a .gz file?
90 $ae->is_zip; # is it a .zip file?
91 $ae->is_bz2; # is it a .bz2 file?
92 $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
93 $ae->is_lzma; # is it a .lzma file?
95 ### absolute path to the archive you provided ###
98 ### commandline tools, if found ###
99 $ae->bin_tar # path to /bin/tar, if found
100 $ae->bin_gzip # path to /bin/gzip, if found
101 $ae->bin_unzip # path to /bin/unzip, if found
102 $ae->bin_bunzip2 # path to /bin/bunzip2 if found
103 $ae->bin_unlzma # path to /bin/unlzma if found
107 Archive::Extract is a generic archive extraction mechanism.
109 It allows you to extract any archive file of the type .tar, .tar.gz,
110 .gz, .Z, tar.bz2, .tbz, .bz2, .zip or .lzma without having to worry how it
111 does so, or use different interfaces for each type by using either
112 perl modules, or commandline tools on your system.
114 See the C<HOW IT WORKS> section further down for details.
119 ### see what /bin/programs are available ###
121 for my $pgm (qw[tar unzip gzip bunzip2 uncompress unlzma]) {
122 $PROGRAMS->{$pgm} = can_run($pgm);
125 ### mapping from types to extractor methods ###
126 my $Mapping = { # binary program # pure perl module
127 is_tgz => { bin => '_untar_bin', pp => '_untar_at' },
128 is_tar => { bin => '_untar_bin', pp => '_untar_at' },
129 is_gz => { bin => '_gunzip_bin', pp => '_gunzip_cz' },
130 is_zip => { bin => '_unzip_bin', pp => '_unzip_az' },
131 is_tbz => { bin => '_untar_bin', pp => '_untar_at' },
132 is_bz2 => { bin => '_bunzip2_bin', pp => '_bunzip2_bz2'},
133 is_Z => { bin => '_uncompress_bin', pp => '_gunzip_cz' },
134 is_lzma => { bin => '_unlzma_bin', pp => '_unlzma_cz' },
137 { ### use subs so we re-generate array refs etc for the no-overide flags
138 ### if we don't, then we reuse the same arrayref, meaning objects store
141 archive => sub { { required => 1, allow => FILE_EXISTS } },
142 type => sub { { default => '', allow => [ @Types ] } },
143 _error_msg => sub { { no_override => 1, default => [] } },
144 _error_msg_long => sub { { no_override => 1, default => [] } },
147 ### build accesssors ###
148 for my $method( keys %$tmpl,
149 qw[_extractor _gunzip_to files extract_path],
154 $self->{$method} = $_[0] if @_;
155 return $self->{$method};
161 =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
163 Creates a new C<Archive::Extract> object based on the archive file you
164 passed it. Automatically determines the type of archive based on the
165 extension, but you can override that by explicitly providing the
168 Valid values for C<type> are:
174 Standard tar files, as produced by, for example, C</bin/tar>.
175 Corresponds to a C<.tar> suffix.
179 Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
180 Corresponds to a C<.tgz> or C<.tar.gz> suffix.
184 Gzip compressed file, as produced by, for example C</bin/gzip>.
185 Corresponds to a C<.gz> suffix.
189 Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
190 Corresponds to a C<.Z> suffix.
194 Zip compressed file, as produced by, for example C</bin/zip>.
195 Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
199 Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
200 Corresponds to a C<.bz2> suffix.
204 Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
205 Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
209 Lzma compressed file, as produced by C</bin/lzma>.
210 Corresponds to a C<.lzma> suffix.
214 Returns a C<Archive::Extract> object on success, or false on failure.
223 ### see above why we use subs here and generate the template;
224 ### it's basically to not re-use arrayrefs
225 my %utmpl = map { $_ => $tmpl->{$_}->() } keys %$tmpl;
227 my $parsed = check( \%utmpl, \%hash ) or return;
229 ### make sure we have an absolute path ###
230 my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
232 ### figure out the type, if it wasn't already specified ###
233 unless ( $parsed->{type} ) {
235 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ :
236 $ar =~ /.+?\.gz$/i ? GZ :
237 $ar =~ /.+?\.tar$/i ? TAR :
238 $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP :
239 $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i ? TBZ :
240 $ar =~ /.+?\.bz2$/i ? BZ2 :
241 $ar =~ /.+?\.Z$/ ? Z :
242 $ar =~ /.+?\.lzma$/ ? LZMA :
247 bless $parsed, $class;
249 ### don't know what type of file it is
250 ### XXX this *has* to be an object call, not a package call
251 return $parsed->_error(loc("Cannot determine file type for '%1'",
252 $parsed->{archive} )) unless $parsed->{type};
257 =head2 $ae->extract( [to => '/output/path'] )
259 Extracts the archive represented by the C<Archive::Extract> object to
260 the path of your choice as specified by the C<to> argument. Defaults to
263 Since C<.gz> files never hold a directory, but only a single file; if
264 the C<to> argument is an existing directory, the file is extracted
265 there, with its C<.gz> suffix stripped.
266 If the C<to> argument is not an existing directory, the C<to> argument
267 is understood to be a filename, if the archive type is C<gz>.
268 In the case that you did not specify a C<to> argument, the output
269 file will be the name of the archive file, stripped from its C<.gz>
270 suffix, in the current working directory.
272 C<extract> will try a pure perl solution first, and then fall back to
273 commandline tools if they are available. See the C<GLOBAL VARIABLES>
274 section below on how to alter this behaviour.
276 It will return true on success, and false on failure.
278 On success, it will also set the follow attributes in the object:
282 =item $ae->extract_path
284 This is the directory that the files where extracted to.
288 This is an array ref with the paths of all the files in the archive,
289 relative to the C<to> argument you specified.
290 To get the full path to an extracted file, you would use:
292 File::Spec->catfile( $to, $ae->files->[0] );
294 Note that all files from a tar archive will be in unix format, as per
295 the tar specification.
305 ### reset error messages
306 $self->_error_msg( [] );
307 $self->_error_msg_long( [] );
311 to => { default => '.', store => \$to }
314 check( $tmpl, \%hash ) or return;
316 ### so 'to' could be a file or a dir, depending on whether it's a .gz
317 ### file, or basically anything else.
318 ### so, check that, then act accordingly.
319 ### set an accessor specifically so _gunzip can know what file to extract
323 if( $self->is_gz or $self->is_bz2 or $self->is_Z or $self->is_lzma ) {
325 my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z|lzma)$//i;
330 $self->_gunzip_to( basename($cp) );
332 ### then it's a filename
335 $self->_gunzip_to( basename($to) );
338 ### not a foo.gz file
344 ### make the dir if it doesn't exist ###
346 eval { mkpath( $dir ) };
348 return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
352 ### get the current dir, to restore later ###
358 ### chdir to the target dir ###
359 unless( chdir $dir ) {
360 $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
361 $ok = 0; last EXTRACT;
364 ### set files to an empty array ref, so there's always an array
365 ### ref IN the accessor, to avoid errors like:
366 ### Can't use an undefined value as an ARRAY reference at
367 ### ../lib/Archive/Extract.pm line 742. (rt #19815)
370 ### find out the dispatch methods needed for this type of
371 ### archive. Do a $self->is_XXX to figure out the type, then
372 ### get the hashref with bin + pure perl dispatchers.
373 my ($map) = map { $Mapping->{$_} } grep { $self->$_ } keys %$Mapping;
375 ### add pure perl extractor if allowed & add bin extractor if allowed
377 push @methods, $map->{'pp'} if $_ALLOW_PURE_PERL;
378 push @methods, $map->{'bin'} if $_ALLOW_BIN;
380 ### reverse it if we prefer bin extractors
381 @methods = reverse @methods if $PREFER_BIN;
384 for my $method (@methods) {
385 print "# Extracting with ->$method\n" if $DEBUG;
387 my $rv = $self->$method;
389 ### a positive extraction
390 if( $rv and $rv ne METHOD_NA ) {
391 print "# Extraction succeeded\n" if $DEBUG;
392 $self->_extractor($method);
395 ### method is not available
396 } elsif ( $rv and $rv eq METHOD_NA ) {
397 print "# Extraction method not available\n" if $DEBUG;
400 print "# Extraction method failed\n" if $DEBUG;
405 ### warn something went wrong if we didn't get an extractor
406 unless( $self->_extractor ) {
407 my $diag = $fail ? loc("Extract failed due to errors") :
408 $na ? loc("Extract failed; no extractors available") :
411 $self->_error($diag);
416 ### and chdir back ###
417 unless( chdir $cwd ) {
418 $self->_error(loc("Could not chdir back to start dir '%1': %2'",
429 =head2 $ae->error([BOOL])
431 Returns the last encountered error as string.
432 Pass it a true value to get the C<Carp::longmess()> output instead.
434 =head2 $ae->extract_path
436 This is the directory the archive got extracted to.
437 See C<extract()> for details.
441 This is an array ref holding all the paths from the archive.
442 See C<extract()> for details.
446 This is the full path to the archive file represented by this
447 C<Archive::Extract> object.
451 This is the type of archive represented by this C<Archive::Extract>
452 object. See accessors below for an easier way to use this.
453 See the C<new()> method for details.
457 Returns a list of all known C<types> for C<Archive::Extract>'s
462 sub types { return @Types }
466 Returns true if the file is of type C<.tar.gz>.
467 See the C<new()> method for details.
471 Returns true if the file is of type C<.tar>.
472 See the C<new()> method for details.
476 Returns true if the file is of type C<.gz>.
477 See the C<new()> method for details.
481 Returns true if the file is of type C<.Z>.
482 See the C<new()> method for details.
486 Returns true if the file is of type C<.zip>.
487 See the C<new()> method for details.
491 Returns true if the file is of type C<.lzma>.
492 See the C<new()> method for details.
496 ### quick check methods ###
497 sub is_tgz { return $_[0]->type eq TGZ }
498 sub is_tar { return $_[0]->type eq TAR }
499 sub is_gz { return $_[0]->type eq GZ }
500 sub is_zip { return $_[0]->type eq ZIP }
501 sub is_tbz { return $_[0]->type eq TBZ }
502 sub is_bz2 { return $_[0]->type eq BZ2 }
503 sub is_Z { return $_[0]->type eq Z }
504 sub is_lzma { return $_[0]->type eq LZMA }
510 Returns the full path to your tar binary, if found.
514 Returns the full path to your gzip binary, if found
516 =head2 $ae->bin_unzip
518 Returns the full path to your unzip binary, if found
520 =head2 $ae->bin_unlzma
522 Returns the full path to your unlzma binary, if found
526 ### paths to commandline tools ###
527 sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} }
528 sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} }
529 sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} }
530 sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
531 sub bin_uncompress { return $PROGRAMS->{'uncompress'}
532 if $PROGRAMS->{'uncompress'} }
533 sub bin_unlzma { return $PROGRAMS->{'unlzma'} if $PROGRAMS->{'unlzma'} }
535 =head2 $bool = $ae->have_old_bunzip2
537 Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
538 require all archive names to end in C<.bz2> or it will not extract
539 them. This method checks if you have a recent version of C<bunzip2>
540 that allows any extension, or an older one that doesn't.
544 sub have_old_bunzip2 {
547 ### no bunzip2? no old bunzip2 either :)
548 return unless $self->bin_bunzip2;
550 ### if we can't run this, we can't be sure if it's too old or not
551 ### XXX stupid stupid stupid bunzip2 doesn't understand --version
552 ### is not a request to extract data:
553 ### $ bunzip2 --version
554 ### bzip2, a block-sorting file compressor. Version 1.0.2, 30-Dec-2001.
556 ### bunzip2: I won't read compressed data from a terminal.
557 ### bunzip2: For help, type: `bunzip2 --help'.
562 ### double hateful: bunzip2 --version also hangs if input is a pipe
563 ### See #32370: Archive::Extract will hang if stdin is a pipe [+PATCH]
564 ### So, we have to provide *another* argument which is a fake filename,
565 ### just so it wont try to read from stdin to print its version..
567 ### Even if the file exists, it won't clobber or change it.
570 command => [$self->bin_bunzip2, '--version', 'NoSuchFile'],
576 return unless $buffer;
578 my ($version) = $buffer =~ /version \s+ (\d+)/ix;
580 return 1 if $version < 1;
584 #################################
588 #################################
590 ### annoying issue with (gnu) tar on win32, as illustrated by this
591 ### bug: https://rt.cpan.org/Ticket/Display.html?id=40138
592 ### which shows that (gnu) tar will interpret a file name with a :
593 ### in it as a remote file name, so C:\tmp\foo.txt is interpreted
594 ### as a remote shell, and the extract fails.
596 if( ON_WIN32 and my $cmd = __PACKAGE__->bin_tar ) {
598 ### if this is gnu tar we are running, we need to use --force-local
599 push @ExtraTarFlags, '--force-local' if `$cmd --version` =~ /gnu tar/i;
603 ### use /bin/tar to extract ###
607 ### check for /bin/tar ###
608 ### check for /bin/gzip if we need it ###
609 ### if any of the binaries are not available, return NA
610 { my $diag = not $self->bin_tar ?
611 loc("No '%1' program found", '/bin/tar') :
612 $self->is_tgz && !$self->bin_gzip ?
613 loc("No '%1' program found", '/bin/gzip') :
614 $self->is_tbz && !$self->bin_bunzip2 ?
615 loc("No '%1' program found", '/bin/bunzip2') :
619 $self->_error( $diag );
624 ### XXX figure out how to make IPC::Run do this in one call --
625 ### currently i don't know how to get output of a command after a pipe
626 ### trapped in a scalar. Mailed barries about this 5th of june 2004.
628 ### see what command we should run, based on whether
629 ### it's a .tgz or .tar
631 ### XXX solaris tar and bsdtar are having different outputs
632 ### depending whether you run with -x or -t
633 ### compensate for this insanity by running -t first, then -x
635 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
636 $self->bin_tar, '-tf', '-'] :
637 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
638 $self->bin_tar, '-tf', '-'] :
639 [$self->bin_tar, @ExtraTarFlags, '-tf', $self->archive];
642 ### newer versions of 'tar' (1.21 and up) now print record size
643 ### to STDERR as well if v OR t is given (used to be both). This
644 ### is a 'feature' according to the changelog, so we must now only
645 ### inspect STDOUT, otherwise, failures like these occur:
646 ### nntp.perl.org/group/perl.cpan.testers/2009/02/msg3230366.html
648 my @out = run( command => $cmd,
652 ### command was unsuccessful
654 return $self->_error(loc(
655 "Error listing contents of archive '%1': %2",
656 $self->archive, $buffer ));
659 ### no buffers available?
660 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
661 $self->_error( $self->_no_buffer_files( $self->archive ) );
664 ### if we're on solaris we /might/ be using /bin/tar, which has
665 ### a weird output format... we might also be using
666 ### /usr/local/bin/tar, which is gnu tar, which is perfectly
667 ### fine... so we have to do some guessing here =/
668 my @files = map { chomp;
670 : (m|^ x \s+ # 'xtract' -- sigh
671 (.+?), # the actual file name
672 \s+ [\d,.]+ \s bytes,
673 \s+ [\d,.]+ \s tape \s blocks
676 ### only STDOUT, see above. Sometims, extra whitespace
677 ### is present, so make sure we only pick lines with
679 } grep { length } map { split $/, $_ } @{$out[3]};
681 ### store the files that are in the archive ###
682 $self->files(\@files);
686 ### now actually extract it ###
688 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
689 $self->bin_tar, '-xf', '-'] :
690 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
691 $self->bin_tar, '-xf', '-'] :
692 [$self->bin_tar, @ExtraTarFlags, '-xf', $self->archive];
695 unless( scalar run( command => $cmd,
699 return $self->_error(loc("Error extracting archive '%1': %2",
700 $self->archive, $buffer ));
703 ### we might not have them, due to lack of buffers
705 ### now that we've extracted, figure out where we extracted to
706 my $dir = $self->__get_extract_dir( $self->files );
708 ### store the extraction dir ###
709 $self->extract_path( $dir );
713 ### we got here, no error happened
719 ### use archive::tar to extract ###
723 ### Loading Archive::Tar is going to set it to 1, so make it local
724 ### within this block, starting with its initial value. Whatever
725 ### Achive::Tar does will be undone when we return.
727 ### Also, later, set $Archive::Tar::WARN to $Archive::Extract::WARN
728 ### so users don't have to even think about this variable. If they
729 ### do, they still get their set value outside of this call.
730 local $Archive::Tar::WARN = $Archive::Tar::WARN;
732 ### we definitely need Archive::Tar, so load that first
733 { my $use_list = { 'Archive::Tar' => '0.0' };
735 unless( can_load( modules => $use_list ) ) {
737 $self->_error(loc("You do not have '%1' installed - " .
738 "Please install it as soon as possible.",
745 ### we might pass it a filehandle if it's a .tbz file..
746 my $fh_to_read = $self->archive;
748 ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
749 ### if A::T's version is 0.99 or higher
750 if( $self->is_tgz ) {
751 my $use_list = { 'Compress::Zlib' => '0.0' };
752 $use_list->{ 'IO::Zlib' } = '0.0'
753 if $Archive::Tar::VERSION >= '0.99';
755 unless( can_load( modules => $use_list ) ) {
756 my $which = join '/', sort keys %$use_list;
759 "You do not have '%1' installed - Please ".
760 "install it as soon as possible.", $which)
766 } elsif ( $self->is_tbz ) {
767 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
768 unless( can_load( modules => $use_list ) ) {
770 "You do not have '%1' installed - Please " .
771 "install it as soon as possible.",
772 'IO::Uncompress::Bunzip2')
778 my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
779 return $self->_error(loc("Unable to open '%1': %2",
781 $IO::Uncompress::Bunzip2::Bunzip2Error));
788 ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've
789 ### localized $Archive::Tar::WARN already.
790 $Archive::Tar::WARN = $Archive::Extract::WARN;
792 ### only tell it it's compressed if it's a .tgz, as we give it a file
793 ### handle if it's a .tbz
794 my @read = ( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) );
796 ### for version of Archive::Tar > 1.04
797 local $Archive::Tar::CHOWN = 0;
799 ### use the iterator if we can. it's a feature of A::T 1.40 and up
800 if ( $_ALLOW_TAR_ITER && Archive::Tar->can( 'iter' ) ) {
803 unless ( $next = Archive::Tar->iter( @read ) ) {
804 return $self->_error(loc(
805 "Unable to read '%1': %2", $self->archive,
806 $Archive::Tar::error));
809 while ( my $file = $next->() ) {
810 push @files, $file->full_path;
812 $file->extract or return $self->_error(loc(
813 "Unable to read '%1': %2",
815 $Archive::Tar::error));
818 ### older version, read the archive into memory
821 my $tar = Archive::Tar->new();
823 unless( $tar->read( @read ) ) {
824 return $self->_error(loc("Unable to read '%1': %2",
825 $self->archive, $Archive::Tar::error));
828 ### workaround to prevent Archive::Tar from setting uid, which
829 ### is a potential security hole. -autrijus
830 ### have to do it here, since A::T needs to be /loaded/ first ###
831 { no strict 'refs'; local $^W;
833 ### older versions of archive::tar <= 0.23
834 *Archive::Tar::chown = sub {};
837 { local $^W; # quell 'splice() offset past end of array' warnings
838 # on older versions of A::T
840 ### older archive::tar always returns $self, return value
841 ### slightly fux0r3d because of it.
842 $tar->extract or return $self->_error(loc(
843 "Unable to extract '%1': %2",
844 $self->archive, $Archive::Tar::error ));
847 @files = $tar->list_files;
851 my $dir = $self->__get_extract_dir( \@files );
853 ### store the files that are in the archive ###
854 $self->files(\@files);
856 ### store the extraction dir ###
857 $self->extract_path( $dir );
859 ### check if the dir actually appeared ###
860 return 1 if -d $self->extract_path;
862 ### no dir, we failed ###
863 return $self->_error(loc("Unable to extract '%1': %2",
864 $self->archive, $Archive::Tar::error ));
867 #################################
871 #################################
876 ### check for /bin/gzip -- we need it ###
877 unless( $self->bin_gzip ) {
878 $self->_error(loc("No '%1' program found", '/bin/gzip'));
882 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
883 return $self->_error(loc("Could not open '%1' for writing: %2",
884 $self->_gunzip_to, $! ));
886 my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
889 unless( scalar run( command => $cmd,
893 return $self->_error(loc("Unable to gunzip '%1': %2",
894 $self->archive, $buffer));
897 ### no buffers available?
898 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
899 $self->_error( $self->_no_buffer_content( $self->archive ) );
902 print $fh $buffer if defined $buffer;
906 ### set what files where extract, and where they went ###
907 $self->files( [$self->_gunzip_to] );
908 $self->extract_path( File::Spec->rel2abs(cwd()) );
916 my $use_list = { 'Compress::Zlib' => '0.0' };
917 unless( can_load( modules => $use_list ) ) {
918 $self->_error(loc("You do not have '%1' installed - Please " .
919 "install it as soon as possible.", 'Compress::Zlib'));
923 my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
924 return $self->_error(loc("Unable to open '%1': %2",
925 $self->archive, $Compress::Zlib::gzerrno));
927 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
928 return $self->_error(loc("Could not open '%1' for writing: %2",
929 $self->_gunzip_to, $! ));
932 $fh->print($buffer) while $gz->gzread($buffer) > 0;
935 ### set what files where extract, and where they went ###
936 $self->files( [$self->_gunzip_to] );
937 $self->extract_path( File::Spec->rel2abs(cwd()) );
942 #################################
946 #################################
948 sub _uncompress_bin {
951 ### check for /bin/gzip -- we need it ###
952 unless( $self->bin_uncompress ) {
953 $self->_error(loc("No '%1' program found", '/bin/uncompress'));
957 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
958 return $self->_error(loc("Could not open '%1' for writing: %2",
959 $self->_gunzip_to, $! ));
961 my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
964 unless( scalar run( command => $cmd,
968 return $self->_error(loc("Unable to uncompress '%1': %2",
969 $self->archive, $buffer));
972 ### no buffers available?
973 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
974 $self->_error( $self->_no_buffer_content( $self->archive ) );
977 print $fh $buffer if defined $buffer;
981 ### set what files where extract, and where they went ###
982 $self->files( [$self->_gunzip_to] );
983 $self->extract_path( File::Spec->rel2abs(cwd()) );
989 #################################
993 #################################
999 ### check for /bin/gzip if we need it ###
1000 unless( $self->bin_unzip ) {
1001 $self->_error(loc("No '%1' program found", '/bin/unzip'));
1005 ### first, get the files.. it must be 2 different commands with 'unzip' :(
1006 { ### on VMS, capital letter options have to be quoted. This is
1007 ### peported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
1008 ### Subject: [patch@31735]Archive Extract fix on VMS.
1009 my $opt = ON_VMS ? '"-Z"' : '-Z';
1010 my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
1013 unless( scalar run( command => $cmd,
1015 buffer => \$buffer )
1017 return $self->_error(loc("Unable to unzip '%1': %2",
1018 $self->archive, $buffer));
1021 ### no buffers available?
1022 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1023 $self->_error( $self->_no_buffer_files( $self->archive ) );
1026 $self->files( [split $/, $buffer] );
1030 ### now, extract the archive ###
1031 { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
1034 unless( scalar run( command => $cmd,
1036 buffer => \$buffer )
1038 return $self->_error(loc("Unable to unzip '%1': %2",
1039 $self->archive, $buffer));
1042 if( scalar @{$self->files} ) {
1043 my $files = $self->files;
1044 my $dir = $self->__get_extract_dir( $files );
1046 $self->extract_path( $dir );
1056 my $use_list = { 'Archive::Zip' => '0.0' };
1057 unless( can_load( modules => $use_list ) ) {
1058 $self->_error(loc("You do not have '%1' installed - Please " .
1059 "install it as soon as possible.", 'Archive::Zip'));
1063 my $zip = Archive::Zip->new();
1065 unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
1066 return $self->_error(loc("Unable to read '%1'", $self->archive));
1072 ### Address: #43278: Explicitly tell Archive::Zip where to put the files:
1073 ### "In my BackPAN indexing, Archive::Zip was extracting things
1074 ### in my script's directory instead of the current working directory.
1075 ### I traced this back through Archive::Zip::_asLocalName which
1076 ### eventually calls File::Spec::Win32::rel2abs which on Windows might
1077 ### call Cwd::getdcwd. getdcwd returns the wrong directory in my
1078 ### case, even though I think I'm on the same drive.
1080 ### To fix this, I pass the optional second argument to
1081 ### extractMember using the cwd from Archive::Extract." --bdfoy
1083 ## store cwd() before looping; calls to cwd() can be expensive, and
1084 ### it won't change during the loop
1085 my $extract_dir = cwd();
1087 ### have to extract every member individually ###
1088 for my $member ($zip->members) {
1089 push @files, $member->{fileName};
1091 ### file to extact to, to avoid the above problem
1092 my $to = File::Spec->catfile( $extract_dir, $member->{fileName} );
1094 unless( $zip->extractMember($member, $to) == &Archive::Zip::AZ_OK ) {
1095 return $self->_error(loc("Extraction of '%1' from '%2' failed",
1096 $member->{fileName}, $self->archive ));
1100 my $dir = $self->__get_extract_dir( \@files );
1102 ### set what files where extract, and where they went ###
1103 $self->files( \@files );
1104 $self->extract_path( File::Spec->rel2abs($dir) );
1109 sub __get_extract_dir {
1111 my $files = shift || [];
1113 return unless scalar @$files;
1116 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
1117 my($dir,$pos) = @$aref;
1119 ### add a catdir(), so that any trailing slashes get
1120 ### take care of (removed)
1121 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
1122 ### which was the problem in bug #23999
1123 my $res = -d $files->[$pos]
1124 ? File::Spec->catdir( $files->[$pos], '' )
1125 : File::Spec->catdir( dirname( $files->[$pos] ) );
1130 ### if the first and last dir don't match, make sure the
1131 ### dirname is not set wrongly
1134 ### dirs are the same, so we know for sure what the extract dir is
1135 if( $dir1 eq $dir2 ) {
1138 ### dirs are different.. do they share the base dir?
1139 ### if so, use that, if not, fall back to '.'
1141 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1142 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
1144 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1147 return File::Spec->rel2abs( $dir );
1150 #################################
1154 #################################
1159 ### check for /bin/gzip -- we need it ###
1160 unless( $self->bin_bunzip2 ) {
1161 $self->_error(loc("No '%1' program found", '/bin/bunzip2'));
1165 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1166 return $self->_error(loc("Could not open '%1' for writing: %2",
1167 $self->_gunzip_to, $! ));
1169 ### guard against broken bunzip2. See ->have_old_bunzip2()
1171 if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1172 return $self->_error(loc("Your bunzip2 version is too old and ".
1173 "can only extract files ending in '%1'",
1177 my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
1180 unless( scalar run( command => $cmd,
1182 buffer => \$buffer )
1184 return $self->_error(loc("Unable to bunzip2 '%1': %2",
1185 $self->archive, $buffer));
1188 ### no buffers available?
1189 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1190 $self->_error( $self->_no_buffer_content( $self->archive ) );
1193 print $fh $buffer if defined $buffer;
1197 ### set what files where extract, and where they went ###
1198 $self->files( [$self->_gunzip_to] );
1199 $self->extract_path( File::Spec->rel2abs(cwd()) );
1204 ### using cz2, the compact versions... this we use mainly in archive::tar
1206 # sub _bunzip2_cz1 {
1209 # my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1210 # unless( can_load( modules => $use_list ) ) {
1211 # return $self->_error(loc("You do not have '%1' installed - Please " .
1212 # "install it as soon as possible.",
1213 # 'IO::Uncompress::Bunzip2'));
1216 # my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1217 # return $self->_error(loc("Unable to open '%1': %2",
1219 # $IO::Uncompress::Bunzip2::Bunzip2Error));
1221 # my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1222 # return $self->_error(loc("Could not open '%1' for writing: %2",
1223 # $self->_gunzip_to, $! ));
1226 # $fh->print($buffer) while $bz->read($buffer) > 0;
1229 # ### set what files where extract, and where they went ###
1230 # $self->files( [$self->_gunzip_to] );
1231 # $self->extract_path( File::Spec->rel2abs(cwd()) );
1239 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1240 unless( can_load( modules => $use_list ) ) {
1241 $self->_error(loc("You do not have '%1' installed - Please " .
1242 "install it as soon as possible.",
1243 'IO::Uncompress::Bunzip2'));
1247 IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1248 or return $self->_error(loc("Unable to uncompress '%1': %2",
1250 $IO::Uncompress::Bunzip2::Bunzip2Error));
1252 ### set what files where extract, and where they went ###
1253 $self->files( [$self->_gunzip_to] );
1254 $self->extract_path( File::Spec->rel2abs(cwd()) );
1260 #################################
1264 #################################
1269 ### check for /bin/unlzma -- we need it ###
1270 unless( $self->bin_unlzma ) {
1271 $self->_error(loc("No '%1' program found", '/bin/unlzma'));
1275 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1276 return $self->_error(loc("Could not open '%1' for writing: %2",
1277 $self->_gunzip_to, $! ));
1279 my $cmd = [ $self->bin_unlzma, '-c', $self->archive ];
1282 unless( scalar run( command => $cmd,
1284 buffer => \$buffer )
1286 return $self->_error(loc("Unable to unlzma '%1': %2",
1287 $self->archive, $buffer));
1290 ### no buffers available?
1291 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1292 $self->_error( $self->_no_buffer_content( $self->archive ) );
1295 print $fh $buffer if defined $buffer;
1299 ### set what files where extract, and where they went ###
1300 $self->files( [$self->_gunzip_to] );
1301 $self->extract_path( File::Spec->rel2abs(cwd()) );
1309 my $use_list = { 'Compress::unLZMA' => '0.0' };
1310 unless( can_load( modules => $use_list ) ) {
1311 $self->_error(loc("You do not have '%1' installed - Please " .
1312 "install it as soon as possible.", 'Compress::unLZMA'));
1316 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1317 return $self->_error(loc("Could not open '%1' for writing: %2",
1318 $self->_gunzip_to, $! ));
1321 $buffer = Compress::unLZMA::uncompressfile( $self->archive );
1322 unless ( defined $buffer ) {
1323 return $self->_error(loc("Could not unlzma '%1': %2",
1324 $self->archive, $@));
1327 print $fh $buffer if defined $buffer;
1331 ### set what files where extract, and where they went ###
1332 $self->files( [$self->_gunzip_to] );
1333 $self->extract_path( File::Spec->rel2abs(cwd()) );
1338 #################################
1342 #################################
1347 my $lerror = Carp::longmess($error);
1349 push @{$self->_error_msg}, $error;
1350 push @{$self->_error_msg_long}, $lerror;
1352 ### set $Archive::Extract::WARN to 0 to disable printing
1355 carp $DEBUG ? $lerror : $error;
1364 ### make sure we have a fallback aref
1367 ? $self->_error_msg_long
1371 return join $/, @$aref;
1374 sub _no_buffer_files {
1376 my $file = shift or return;
1377 return loc("No buffer captured, unable to tell ".
1378 "extracted files or extraction dir for '%1'", $file);
1381 sub _no_buffer_content {
1383 my $file = shift or return;
1384 return loc("No buffer captured, unable to get content for '%1'", $file);
1392 C<Archive::Extract> tries first to determine what type of archive you
1393 are passing it, by inspecting its suffix. It does not do this by using
1394 Mime magic, or something related. See C<CAVEATS> below.
1396 Once it has determined the file type, it knows which extraction methods
1397 it can use on the archive. It will try a perl solution first, then fall
1398 back to a commandline tool if that fails. If that also fails, it will
1399 return false, indicating it was unable to extract the archive.
1400 See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1404 =head2 File Extensions
1406 C<Archive::Extract> trusts on the extension of the archive to determine
1407 what type it is, and what extractor methods therefore can be used. If
1408 your archives do not have any of the extensions as described in the
1409 C<new()> method, you will have to specify the type explicitly, or
1410 C<Archive::Extract> will not be able to extract the archive for you.
1412 =head2 Supporting Very Large Files
1414 C<Archive::Extract> can use either pure perl modules or command line
1415 programs under the hood. Some of the pure perl modules (like
1416 C<Archive::Tar> and Compress::unLZMA) take the entire contents of the archive into memory,
1417 which may not be feasible on your system. Consider setting the global
1418 variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1419 the use of command line programs and won't consume so much memory.
1421 See the C<GLOBAL VARIABLES> section below for details.
1423 =head2 Bunzip2 support of arbitrary extensions.
1425 Older versions of C</bin/bunzip2> do not support arbitrary file
1426 extensions and insist on a C<.bz2> suffix. Although we do our best
1427 to guard against this, if you experience a bunzip2 error, it may
1428 be related to this. For details, please see the C<have_old_bunzip2>
1431 =head1 GLOBAL VARIABLES
1433 =head2 $Archive::Extract::DEBUG
1435 Set this variable to C<true> to have all calls to command line tools
1436 be printed out, including all their output.
1437 This also enables C<Carp::longmess> errors, instead of the regular
1440 Good for tracking down why things don't work with your particular
1443 Defaults to C<false>.
1445 =head2 $Archive::Extract::WARN
1447 This variable controls whether errors encountered internally by
1448 C<Archive::Extract> should be C<carp>'d or not.
1450 Set to false to silence warnings. Inspect the output of the C<error()>
1451 method manually to see what went wrong.
1453 Defaults to C<true>.
1455 =head2 $Archive::Extract::PREFER_BIN
1457 This variables controls whether C<Archive::Extract> should prefer the
1458 use of perl modules, or commandline tools to extract archives.
1460 Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1462 Defaults to C<false>.
1464 =head1 TODO / CAVEATS
1468 =item Mime magic support
1470 Maybe this module should use something like C<File::Type> to determine
1471 the type, rather than blindly trust the suffix.
1475 Currently, C<Archive::Extract> does a C<chdir> to the extraction dir before
1476 extraction, and a C<chdir> back again after. This is not necessarily
1477 thread safe. See C<rt.cpan.org> bug C<#45671> for details.
1483 Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>.
1487 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1491 This library is free software; you may redistribute and/or modify it
1492 under the same terms as Perl itself.
1497 # c-indentation-style: bsd
1499 # indent-tabs-mode: nil
1501 # vim: expandtab shiftwidth=4: