1 package Archive::Extract;
7 use IPC::Cmd qw[run can_run];
9 use File::Path qw[mkpath];
11 use File::Basename qw[dirname basename];
12 use Params::Check qw[check];
13 use Module::Load::Conditional qw[can_load check_install];
14 use Locale::Maketext::Simple Style => 'gettext';
16 ### solaris has silly /bin/tar output ###
17 use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
18 use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
20 ### VMS may require quoting upper case command options
21 use constant ON_VMS => $^O eq 'VMS' ? 1 : 0;
23 ### we can't use this extraction method, because of missing
25 use constant METHOD_NA => [];
27 ### If these are changed, update @TYPES and the new() POD
28 use constant TGZ => 'tgz';
29 use constant TAR => 'tar';
30 use constant GZ => 'gz';
31 use constant ZIP => 'zip';
32 use constant BZ2 => 'bz2';
33 use constant TBZ => 'tbz';
34 use constant Z => 'Z';
35 use constant LZMA => 'lzma';
37 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG
38 $_ALLOW_BIN $_ALLOW_PURE_PERL
45 $_ALLOW_PURE_PERL = 1; # allow pure perl extractors
46 $_ALLOW_BIN = 1; # allow binary extractors
48 # same as all constants
49 my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA );
51 local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
57 Archive::Extract - A generic archive extracting mechanism
63 ### build an Archive::Extract object ###
64 my $ae = Archive::Extract->new( archive => 'foo.tgz' );
66 ### extract to cwd() ###
67 my $ok = $ae->extract;
69 ### extract to /tmp ###
70 my $ok = $ae->extract( to => '/tmp' );
72 ### what if something went wrong?
73 my $ok = $ae->extract or die $ae->error;
75 ### files from the archive ###
76 my $files = $ae->files;
78 ### dir that was extracted to ###
79 my $outdir = $ae->extract_path;
82 ### quick check methods ###
83 $ae->is_tar # is it a .tar file?
84 $ae->is_tgz # is it a .tar.gz or .tgz file?
85 $ae->is_gz; # is it a .gz file?
86 $ae->is_zip; # is it a .zip file?
87 $ae->is_bz2; # is it a .bz2 file?
88 $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
89 $ae->is_lzma; # is it a .lzma file?
91 ### absolute path to the archive you provided ###
94 ### commandline tools, if found ###
95 $ae->bin_tar # path to /bin/tar, if found
96 $ae->bin_gzip # path to /bin/gzip, if found
97 $ae->bin_unzip # path to /bin/unzip, if found
98 $ae->bin_bunzip2 # path to /bin/bunzip2 if found
99 $ae->bin_unlzma # path to /bin/unlzma if found
103 Archive::Extract is a generic archive extraction mechanism.
105 It allows you to extract any archive file of the type .tar, .tar.gz,
106 .gz, .Z, tar.bz2, .tbz, .bz2, .zip or .lzma without having to worry how it
107 does so, or use different interfaces for each type by using either
108 perl modules, or commandline tools on your system.
110 See the C<HOW IT WORKS> section further down for details.
115 ### see what /bin/programs are available ###
117 for my $pgm (qw[tar unzip gzip bunzip2 uncompress unlzma]) {
118 $PROGRAMS->{$pgm} = can_run($pgm);
121 ### mapping from types to extractor methods ###
122 my $Mapping = { # binary program # pure perl module
123 is_tgz => { bin => '_untar_bin', pp => '_untar_at' },
124 is_tar => { bin => '_untar_bin', pp => '_untar_at' },
125 is_gz => { bin => '_gunzip_bin', pp => '_gunzip_cz' },
126 is_zip => { bin => '_unzip_bin', pp => '_unzip_az' },
127 is_tbz => { bin => '_untar_bin', pp => '_untar_at' },
128 is_bz2 => { bin => '_bunzip2_bin', pp => '_bunzip2_bz2'},
129 is_Z => { bin => '_uncompress_bin', pp => '_gunzip_cz' },
130 is_lzma => { bin => '_unlzma_bin', pp => '_unlzma_cz' },
133 { ### use subs so we re-generate array refs etc for the no-overide flags
134 ### if we don't, then we reuse the same arrayref, meaning objects store
137 archive => sub { { required => 1, allow => FILE_EXISTS } },
138 type => sub { { default => '', allow => [ @Types ] } },
139 _error_msg => sub { { no_override => 1, default => [] } },
140 _error_msg_long => sub { { no_override => 1, default => [] } },
143 ### build accesssors ###
144 for my $method( keys %$tmpl,
145 qw[_extractor _gunzip_to files extract_path],
150 $self->{$method} = $_[0] if @_;
151 return $self->{$method};
157 =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
159 Creates a new C<Archive::Extract> object based on the archive file you
160 passed it. Automatically determines the type of archive based on the
161 extension, but you can override that by explicitly providing the
164 Valid values for C<type> are:
170 Standard tar files, as produced by, for example, C</bin/tar>.
171 Corresponds to a C<.tar> suffix.
175 Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
176 Corresponds to a C<.tgz> or C<.tar.gz> suffix.
180 Gzip compressed file, as produced by, for example C</bin/gzip>.
181 Corresponds to a C<.gz> suffix.
185 Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
186 Corresponds to a C<.Z> suffix.
190 Zip compressed file, as produced by, for example C</bin/zip>.
191 Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
195 Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
196 Corresponds to a C<.bz2> suffix.
200 Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
201 Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
205 Lzma compressed file, as produced by C</bin/lzma>.
206 Corresponds to a C<.lzma> suffix.
210 Returns a C<Archive::Extract> object on success, or false on failure.
219 ### see above why we use subs here and generate the template;
220 ### it's basically to not re-use arrayrefs
221 my %utmpl = map { $_ => $tmpl->{$_}->() } keys %$tmpl;
223 my $parsed = check( \%utmpl, \%hash ) or return;
225 ### make sure we have an absolute path ###
226 my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
228 ### figure out the type, if it wasn't already specified ###
229 unless ( $parsed->{type} ) {
231 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ :
232 $ar =~ /.+?\.gz$/i ? GZ :
233 $ar =~ /.+?\.tar$/i ? TAR :
234 $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP :
235 $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i ? TBZ :
236 $ar =~ /.+?\.bz2$/i ? BZ2 :
237 $ar =~ /.+?\.Z$/ ? Z :
238 $ar =~ /.+?\.lzma$/ ? LZMA :
243 bless $parsed, $class;
245 ### don't know what type of file it is
246 ### XXX this *has* to be an object call, not a package call
247 return $parsed->_error(loc("Cannot determine file type for '%1'",
248 $parsed->{archive} )) unless $parsed->{type};
253 =head2 $ae->extract( [to => '/output/path'] )
255 Extracts the archive represented by the C<Archive::Extract> object to
256 the path of your choice as specified by the C<to> argument. Defaults to
259 Since C<.gz> files never hold a directory, but only a single file; if
260 the C<to> argument is an existing directory, the file is extracted
261 there, with its C<.gz> suffix stripped.
262 If the C<to> argument is not an existing directory, the C<to> argument
263 is understood to be a filename, if the archive type is C<gz>.
264 In the case that you did not specify a C<to> argument, the output
265 file will be the name of the archive file, stripped from its C<.gz>
266 suffix, in the current working directory.
268 C<extract> will try a pure perl solution first, and then fall back to
269 commandline tools if they are available. See the C<GLOBAL VARIABLES>
270 section below on how to alter this behaviour.
272 It will return true on success, and false on failure.
274 On success, it will also set the follow attributes in the object:
278 =item $ae->extract_path
280 This is the directory that the files where extracted to.
284 This is an array ref with the paths of all the files in the archive,
285 relative to the C<to> argument you specified.
286 To get the full path to an extracted file, you would use:
288 File::Spec->catfile( $to, $ae->files->[0] );
290 Note that all files from a tar archive will be in unix format, as per
291 the tar specification.
301 ### reset error messages
302 $self->_error_msg( [] );
303 $self->_error_msg_long( [] );
307 to => { default => '.', store => \$to }
310 check( $tmpl, \%hash ) or return;
312 ### so 'to' could be a file or a dir, depending on whether it's a .gz
313 ### file, or basically anything else.
314 ### so, check that, then act accordingly.
315 ### set an accessor specifically so _gunzip can know what file to extract
319 if( $self->is_gz or $self->is_bz2 or $self->is_Z or $self->is_lzma ) {
321 my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z|lzma)$//i;
326 $self->_gunzip_to( basename($cp) );
328 ### then it's a filename
331 $self->_gunzip_to( basename($to) );
334 ### not a foo.gz file
340 ### make the dir if it doesn't exist ###
342 eval { mkpath( $dir ) };
344 return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
348 ### get the current dir, to restore later ###
354 ### chdir to the target dir ###
355 unless( chdir $dir ) {
356 $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
357 $ok = 0; last EXTRACT;
360 ### set files to an empty array ref, so there's always an array
361 ### ref IN the accessor, to avoid errors like:
362 ### Can't use an undefined value as an ARRAY reference at
363 ### ../lib/Archive/Extract.pm line 742. (rt #19815)
366 ### find out the dispatch methods needed for this type of
367 ### archive. Do a $self->is_XXX to figure out the type, then
368 ### get the hashref with bin + pure perl dispatchers.
369 my ($map) = map { $Mapping->{$_} } grep { $self->$_ } keys %$Mapping;
371 ### add pure perl extractor if allowed & add bin extractor if allowed
373 push @methods, $map->{'pp'} if $_ALLOW_PURE_PERL;
374 push @methods, $map->{'bin'} if $_ALLOW_BIN;
376 ### reverse it if we prefer bin extractors
377 @methods = reverse @methods if $PREFER_BIN;
380 for my $method (@methods) {
381 print "# Extracting with ->$method\n" if $DEBUG;
383 my $rv = $self->$method;
385 ### a positive extraction
386 if( $rv and $rv ne METHOD_NA ) {
387 print "# Extraction succeeded\n" if $DEBUG;
388 $self->_extractor($method);
391 ### method is not available
392 } elsif ( $rv and $rv eq METHOD_NA ) {
393 print "# Extraction method not available\n" if $DEBUG;
396 print "# Extraction method failed\n" if $DEBUG;
401 ### warn something went wrong if we didn't get an extractor
402 unless( $self->_extractor ) {
403 my $diag = $fail ? loc("Extract failed due to errors") :
404 $na ? loc("Extract failed; no extractors available") :
407 $self->_error($diag);
412 ### and chdir back ###
413 unless( chdir $cwd ) {
414 $self->_error(loc("Could not chdir back to start dir '%1': %2'",
425 =head2 $ae->error([BOOL])
427 Returns the last encountered error as string.
428 Pass it a true value to get the C<Carp::longmess()> output instead.
430 =head2 $ae->extract_path
432 This is the directory the archive got extracted to.
433 See C<extract()> for details.
437 This is an array ref holding all the paths from the archive.
438 See C<extract()> for details.
442 This is the full path to the archive file represented by this
443 C<Archive::Extract> object.
447 This is the type of archive represented by this C<Archive::Extract>
448 object. See accessors below for an easier way to use this.
449 See the C<new()> method for details.
453 Returns a list of all known C<types> for C<Archive::Extract>'s
458 sub types { return @Types }
462 Returns true if the file is of type C<.tar.gz>.
463 See the C<new()> method for details.
467 Returns true if the file is of type C<.tar>.
468 See the C<new()> method for details.
472 Returns true if the file is of type C<.gz>.
473 See the C<new()> method for details.
477 Returns true if the file is of type C<.Z>.
478 See the C<new()> method for details.
482 Returns true if the file is of type C<.zip>.
483 See the C<new()> method for details.
487 Returns true if the file is of type C<.lzma>.
488 See the C<new()> method for details.
492 ### quick check methods ###
493 sub is_tgz { return $_[0]->type eq TGZ }
494 sub is_tar { return $_[0]->type eq TAR }
495 sub is_gz { return $_[0]->type eq GZ }
496 sub is_zip { return $_[0]->type eq ZIP }
497 sub is_tbz { return $_[0]->type eq TBZ }
498 sub is_bz2 { return $_[0]->type eq BZ2 }
499 sub is_Z { return $_[0]->type eq Z }
500 sub is_lzma { return $_[0]->type eq LZMA }
506 Returns the full path to your tar binary, if found.
510 Returns the full path to your gzip binary, if found
512 =head2 $ae->bin_unzip
514 Returns the full path to your unzip binary, if found
516 =head2 $ae->bin_unlzma
518 Returns the full path to your unlzma binary, if found
522 ### paths to commandline tools ###
523 sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} }
524 sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} }
525 sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} }
526 sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
527 sub bin_uncompress { return $PROGRAMS->{'uncompress'}
528 if $PROGRAMS->{'uncompress'} }
529 sub bin_unlzma { return $PROGRAMS->{'unlzma'} if $PROGRAMS->{'unlzma'} }
531 =head2 $bool = $ae->have_old_bunzip2
533 Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
534 require all archive names to end in C<.bz2> or it will not extract
535 them. This method checks if you have a recent version of C<bunzip2>
536 that allows any extension, or an older one that doesn't.
540 sub have_old_bunzip2 {
543 ### no bunzip2? no old bunzip2 either :)
544 return unless $self->bin_bunzip2;
546 ### if we can't run this, we can't be sure if it's too old or not
547 ### XXX stupid stupid stupid bunzip2 doesn't understand --version
548 ### is not a request to extract data:
549 ### $ bunzip2 --version
550 ### bzip2, a block-sorting file compressor. Version 1.0.2, 30-Dec-2001.
552 ### bunzip2: I won't read compressed data from a terminal.
553 ### bunzip2: For help, type: `bunzip2 --help'.
558 ### double hateful: bunzip2 --version also hangs if input is a pipe
559 ### See #32370: Archive::Extract will hang if stdin is a pipe [+PATCH]
560 ### So, we have to provide *another* argument which is a fake filename,
561 ### just so it wont try to read from stdin to print its version..
563 ### Even if the file exists, it won't clobber or change it.
566 command => [$self->bin_bunzip2, '--version', 'NoSuchFile'],
572 return unless $buffer;
574 my ($version) = $buffer =~ /version \s+ (\d+)/ix;
576 return 1 if $version < 1;
580 #################################
584 #################################
587 ### use /bin/tar to extract ###
591 ### check for /bin/tar ###
592 ### check for /bin/gzip if we need it ###
593 ### if any of the binaries are not available, return NA
594 { my $diag = not $self->bin_tar ?
595 loc("No '%1' program found", '/bin/tar') :
596 $self->is_tgz && !$self->bin_gzip ?
597 loc("No '%1' program found", '/bin/gzip') :
598 $self->is_tbz && !$self->bin_bunzip2 ?
599 loc("No '%1' program found", '/bin/bunzip2') :
603 $self->_error( $diag );
608 ### XXX figure out how to make IPC::Run do this in one call --
609 ### currently i don't know how to get output of a command after a pipe
610 ### trapped in a scalar. Mailed barries about this 5th of june 2004.
612 ### see what command we should run, based on whether
613 ### it's a .tgz or .tar
615 ### XXX solaris tar and bsdtar are having different outputs
616 ### depending whether you run with -x or -t
617 ### compensate for this insanity by running -t first, then -x
619 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
620 $self->bin_tar, '-tf', '-'] :
621 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
622 $self->bin_tar, '-tf', '-'] :
623 [$self->bin_tar, '-tf', $self->archive];
625 ### run the command ###
627 unless( scalar run( command => $cmd,
631 return $self->_error(loc(
632 "Error listing contents of archive '%1': %2",
633 $self->archive, $buffer ));
636 ### no buffers available?
637 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
638 $self->_error( $self->_no_buffer_files( $self->archive ) );
641 ### if we're on solaris we /might/ be using /bin/tar, which has
642 ### a weird output format... we might also be using
643 ### /usr/local/bin/tar, which is gnu tar, which is perfectly
644 ### fine... so we have to do some guessing here =/
645 my @files = map { chomp;
647 : (m|^ x \s+ # 'xtract' -- sigh
648 (.+?), # the actual file name
649 \s+ [\d,.]+ \s bytes,
650 \s+ [\d,.]+ \s tape \s blocks
655 ### store the files that are in the archive ###
656 $self->files(\@files);
660 ### now actually extract it ###
662 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
663 $self->bin_tar, '-xf', '-'] :
664 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
665 $self->bin_tar, '-xf', '-'] :
666 [$self->bin_tar, '-xf', $self->archive];
669 unless( scalar run( command => $cmd,
673 return $self->_error(loc("Error extracting archive '%1': %2",
674 $self->archive, $buffer ));
677 ### we might not have them, due to lack of buffers
679 ### now that we've extracted, figure out where we extracted to
680 my $dir = $self->__get_extract_dir( $self->files );
682 ### store the extraction dir ###
683 $self->extract_path( $dir );
687 ### we got here, no error happened
691 ### use archive::tar to extract ###
695 ### Loading Archive::Tar is going to set it to 1, so make it local
696 ### within this block, starting with its initial value. Whatever
697 ### Achive::Tar does will be undone when we return.
699 ### Also, later, set $Archive::Tar::WARN to $Archive::Extract::WARN
700 ### so users don't have to even think about this variable. If they
701 ### do, they still get their set value outside of this call.
702 local $Archive::Tar::WARN = $Archive::Tar::WARN;
704 ### we definitely need Archive::Tar, so load that first
705 { my $use_list = { 'Archive::Tar' => '0.0' };
707 unless( can_load( modules => $use_list ) ) {
709 $self->_error(loc("You do not have '%1' installed - " .
710 "Please install it as soon as possible.",
717 ### we might pass it a filehandle if it's a .tbz file..
718 my $fh_to_read = $self->archive;
720 ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
721 ### if A::T's version is 0.99 or higher
722 if( $self->is_tgz ) {
723 my $use_list = { 'Compress::Zlib' => '0.0' };
724 $use_list->{ 'IO::Zlib' } = '0.0'
725 if $Archive::Tar::VERSION >= '0.99';
727 unless( can_load( modules => $use_list ) ) {
728 my $which = join '/', sort keys %$use_list;
731 "You do not have '%1' installed - Please ".
732 "install it as soon as possible.", $which)
738 } elsif ( $self->is_tbz ) {
739 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
740 unless( can_load( modules => $use_list ) ) {
742 "You do not have '%1' installed - Please " .
743 "install it as soon as possible.",
744 'IO::Uncompress::Bunzip2')
750 my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
751 return $self->_error(loc("Unable to open '%1': %2",
753 $IO::Uncompress::Bunzip2::Bunzip2Error));
758 ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've
759 ### localized $Archive::Tar::WARN already.
760 $Archive::Tar::WARN = $Archive::Extract::WARN;
762 my $tar = Archive::Tar->new();
764 ### only tell it it's compressed if it's a .tgz, as we give it a file
765 ### handle if it's a .tbz
766 unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) {
767 return $self->_error(loc("Unable to read '%1': %2", $self->archive,
768 $Archive::Tar::error));
771 ### workaround to prevent Archive::Tar from setting uid, which
772 ### is a potential security hole. -autrijus
773 ### have to do it here, since A::T needs to be /loaded/ first ###
774 { no strict 'refs'; local $^W;
776 ### older versions of archive::tar <= 0.23
777 *Archive::Tar::chown = sub {};
780 ### for version of Archive::Tar > 1.04
781 local $Archive::Tar::CHOWN = 0;
783 { local $^W; # quell 'splice() offset past end of array' warnings
784 # on older versions of A::T
786 ### older archive::tar always returns $self, return value slightly
787 ### fux0r3d because of it.
789 or return $self->_error(loc("Unable to extract '%1': %2",
790 $self->archive, $Archive::Tar::error ));
793 my @files = $tar->list_files;
794 my $dir = $self->__get_extract_dir( \@files );
796 ### store the files that are in the archive ###
797 $self->files(\@files);
799 ### store the extraction dir ###
800 $self->extract_path( $dir );
802 ### check if the dir actually appeared ###
803 return 1 if -d $self->extract_path;
805 ### no dir, we failed ###
806 return $self->_error(loc("Unable to extract '%1': %2",
807 $self->archive, $Archive::Tar::error ));
810 #################################
814 #################################
819 ### check for /bin/gzip -- we need it ###
820 unless( $self->bin_gzip ) {
821 $self->_error(loc("No '%1' program found", '/bin/gzip'));
825 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
826 return $self->_error(loc("Could not open '%1' for writing: %2",
827 $self->_gunzip_to, $! ));
829 my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
832 unless( scalar run( command => $cmd,
836 return $self->_error(loc("Unable to gunzip '%1': %2",
837 $self->archive, $buffer));
840 ### no buffers available?
841 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
842 $self->_error( $self->_no_buffer_content( $self->archive ) );
845 print $fh $buffer if defined $buffer;
849 ### set what files where extract, and where they went ###
850 $self->files( [$self->_gunzip_to] );
851 $self->extract_path( File::Spec->rel2abs(cwd()) );
859 my $use_list = { 'Compress::Zlib' => '0.0' };
860 unless( can_load( modules => $use_list ) ) {
861 $self->_error(loc("You do not have '%1' installed - Please " .
862 "install it as soon as possible.", 'Compress::Zlib'));
866 my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
867 return $self->_error(loc("Unable to open '%1': %2",
868 $self->archive, $Compress::Zlib::gzerrno));
870 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
871 return $self->_error(loc("Could not open '%1' for writing: %2",
872 $self->_gunzip_to, $! ));
875 $fh->print($buffer) while $gz->gzread($buffer) > 0;
878 ### set what files where extract, and where they went ###
879 $self->files( [$self->_gunzip_to] );
880 $self->extract_path( File::Spec->rel2abs(cwd()) );
885 #################################
889 #################################
891 sub _uncompress_bin {
894 ### check for /bin/gzip -- we need it ###
895 unless( $self->bin_uncompress ) {
896 $self->_error(loc("No '%1' program found", '/bin/uncompress'));
900 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
901 return $self->_error(loc("Could not open '%1' for writing: %2",
902 $self->_gunzip_to, $! ));
904 my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
907 unless( scalar run( command => $cmd,
911 return $self->_error(loc("Unable to uncompress '%1': %2",
912 $self->archive, $buffer));
915 ### no buffers available?
916 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
917 $self->_error( $self->_no_buffer_content( $self->archive ) );
920 print $fh $buffer if defined $buffer;
924 ### set what files where extract, and where they went ###
925 $self->files( [$self->_gunzip_to] );
926 $self->extract_path( File::Spec->rel2abs(cwd()) );
932 #################################
936 #################################
942 ### check for /bin/gzip if we need it ###
943 unless( $self->bin_unzip ) {
944 $self->_error(loc("No '%1' program found", '/bin/unzip'));
948 ### first, get the files.. it must be 2 different commands with 'unzip' :(
949 { ### on VMS, capital letter options have to be quoted. This is
950 ### peported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
951 ### Subject: [patch@31735]Archive Extract fix on VMS.
952 my $opt = ON_VMS ? '"-Z"' : '-Z';
953 my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
956 unless( scalar run( command => $cmd,
960 return $self->_error(loc("Unable to unzip '%1': %2",
961 $self->archive, $buffer));
964 ### no buffers available?
965 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
966 $self->_error( $self->_no_buffer_files( $self->archive ) );
969 $self->files( [split $/, $buffer] );
973 ### now, extract the archive ###
974 { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
977 unless( scalar run( command => $cmd,
981 return $self->_error(loc("Unable to unzip '%1': %2",
982 $self->archive, $buffer));
985 if( scalar @{$self->files} ) {
986 my $files = $self->files;
987 my $dir = $self->__get_extract_dir( $files );
989 $self->extract_path( $dir );
999 my $use_list = { 'Archive::Zip' => '0.0' };
1000 unless( can_load( modules => $use_list ) ) {
1001 $self->_error(loc("You do not have '%1' installed - Please " .
1002 "install it as soon as possible.", 'Archive::Zip'));
1006 my $zip = Archive::Zip->new();
1008 unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
1009 return $self->_error(loc("Unable to read '%1'", $self->archive));
1013 ### have to extract every memeber individually ###
1014 for my $member ($zip->members) {
1015 push @files, $member->{fileName};
1017 unless( $zip->extractMember($member) == &Archive::Zip::AZ_OK ) {
1018 return $self->_error(loc("Extraction of '%1' from '%2' failed",
1019 $member->{fileName}, $self->archive ));
1023 my $dir = $self->__get_extract_dir( \@files );
1025 ### set what files where extract, and where they went ###
1026 $self->files( \@files );
1027 $self->extract_path( File::Spec->rel2abs($dir) );
1032 sub __get_extract_dir {
1034 my $files = shift || [];
1036 return unless scalar @$files;
1039 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
1040 my($dir,$pos) = @$aref;
1042 ### add a catdir(), so that any trailing slashes get
1043 ### take care of (removed)
1044 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
1045 ### which was the problem in bug #23999
1046 my $res = -d $files->[$pos]
1047 ? File::Spec->catdir( $files->[$pos], '' )
1048 : File::Spec->catdir( dirname( $files->[$pos] ) );
1053 ### if the first and last dir don't match, make sure the
1054 ### dirname is not set wrongly
1057 ### dirs are the same, so we know for sure what the extract dir is
1058 if( $dir1 eq $dir2 ) {
1061 ### dirs are different.. do they share the base dir?
1062 ### if so, use that, if not, fall back to '.'
1064 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1065 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
1067 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1070 return File::Spec->rel2abs( $dir );
1073 #################################
1077 #################################
1082 ### check for /bin/gzip -- we need it ###
1083 unless( $self->bin_bunzip2 ) {
1084 $self->_error(loc("No '%1' program found", '/bin/bunzip2'));
1088 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1089 return $self->_error(loc("Could not open '%1' for writing: %2",
1090 $self->_gunzip_to, $! ));
1092 ### guard against broken bunzip2. See ->have_old_bunzip2()
1094 if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1095 return $self->_error(loc("Your bunzip2 version is too old and ".
1096 "can only extract files ending in '%1'",
1100 my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
1103 unless( scalar run( command => $cmd,
1105 buffer => \$buffer )
1107 return $self->_error(loc("Unable to bunzip2 '%1': %2",
1108 $self->archive, $buffer));
1111 ### no buffers available?
1112 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1113 $self->_error( $self->_no_buffer_content( $self->archive ) );
1116 print $fh $buffer if defined $buffer;
1120 ### set what files where extract, and where they went ###
1121 $self->files( [$self->_gunzip_to] );
1122 $self->extract_path( File::Spec->rel2abs(cwd()) );
1127 ### using cz2, the compact versions... this we use mainly in archive::tar
1129 # sub _bunzip2_cz1 {
1132 # my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1133 # unless( can_load( modules => $use_list ) ) {
1134 # return $self->_error(loc("You do not have '%1' installed - Please " .
1135 # "install it as soon as possible.",
1136 # 'IO::Uncompress::Bunzip2'));
1139 # my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1140 # return $self->_error(loc("Unable to open '%1': %2",
1142 # $IO::Uncompress::Bunzip2::Bunzip2Error));
1144 # my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1145 # return $self->_error(loc("Could not open '%1' for writing: %2",
1146 # $self->_gunzip_to, $! ));
1149 # $fh->print($buffer) while $bz->read($buffer) > 0;
1152 # ### set what files where extract, and where they went ###
1153 # $self->files( [$self->_gunzip_to] );
1154 # $self->extract_path( File::Spec->rel2abs(cwd()) );
1162 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1163 unless( can_load( modules => $use_list ) ) {
1164 $self->_error(loc("You do not have '%1' installed - Please " .
1165 "install it as soon as possible.",
1166 'IO::Uncompress::Bunzip2'));
1170 IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1171 or return $self->_error(loc("Unable to uncompress '%1': %2",
1173 $IO::Uncompress::Bunzip2::Bunzip2Error));
1175 ### set what files where extract, and where they went ###
1176 $self->files( [$self->_gunzip_to] );
1177 $self->extract_path( File::Spec->rel2abs(cwd()) );
1183 #################################
1187 #################################
1192 ### check for /bin/unlzma -- we need it ###
1193 unless( $self->bin_unlzma ) {
1194 $self->_error(loc("No '%1' program found", '/bin/unlzma'));
1198 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1199 return $self->_error(loc("Could not open '%1' for writing: %2",
1200 $self->_gunzip_to, $! ));
1202 my $cmd = [ $self->bin_unlzma, '-c', $self->archive ];
1205 unless( scalar run( command => $cmd,
1207 buffer => \$buffer )
1209 return $self->_error(loc("Unable to unlzma '%1': %2",
1210 $self->archive, $buffer));
1213 ### no buffers available?
1214 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1215 $self->_error( $self->_no_buffer_content( $self->archive ) );
1218 print $fh $buffer if defined $buffer;
1222 ### set what files where extract, and where they went ###
1223 $self->files( [$self->_gunzip_to] );
1224 $self->extract_path( File::Spec->rel2abs(cwd()) );
1232 my $use_list = { 'Compress::unLZMA' => '0.0' };
1233 unless( can_load( modules => $use_list ) ) {
1234 $self->_error(loc("You do not have '%1' installed - Please " .
1235 "install it as soon as possible.", 'Compress::unLZMA'));
1239 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1240 return $self->_error(loc("Could not open '%1' for writing: %2",
1241 $self->_gunzip_to, $! ));
1244 $buffer = Compress::unLZMA::uncompressfile( $self->archive );
1245 unless ( defined $buffer ) {
1246 return $self->_error(loc("Could not unlzma '%1': %2",
1247 $self->archive, $@));
1250 print $fh $buffer if defined $buffer;
1254 ### set what files where extract, and where they went ###
1255 $self->files( [$self->_gunzip_to] );
1256 $self->extract_path( File::Spec->rel2abs(cwd()) );
1261 #################################
1265 #################################
1270 my $lerror = Carp::longmess($error);
1272 push @{$self->_error_msg}, $error;
1273 push @{$self->_error_msg_long}, $lerror;
1275 ### set $Archive::Extract::WARN to 0 to disable printing
1278 carp $DEBUG ? $lerror : $error;
1287 ### make sure we have a fallback aref
1290 ? $self->_error_msg_long
1294 return join $/, @$aref;
1297 sub _no_buffer_files {
1299 my $file = shift or return;
1300 return loc("No buffer captured, unable to tell ".
1301 "extracted files or extraction dir for '%1'", $file);
1304 sub _no_buffer_content {
1306 my $file = shift or return;
1307 return loc("No buffer captured, unable to get content for '%1'", $file);
1315 C<Archive::Extract> tries first to determine what type of archive you
1316 are passing it, by inspecting its suffix. It does not do this by using
1317 Mime magic, or something related. See C<CAVEATS> below.
1319 Once it has determined the file type, it knows which extraction methods
1320 it can use on the archive. It will try a perl solution first, then fall
1321 back to a commandline tool if that fails. If that also fails, it will
1322 return false, indicating it was unable to extract the archive.
1323 See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1327 =head2 File Extensions
1329 C<Archive::Extract> trusts on the extension of the archive to determine
1330 what type it is, and what extractor methods therefore can be used. If
1331 your archives do not have any of the extensions as described in the
1332 C<new()> method, you will have to specify the type explicitly, or
1333 C<Archive::Extract> will not be able to extract the archive for you.
1335 =head2 Supporting Very Large Files
1337 C<Archive::Extract> can use either pure perl modules or command line
1338 programs under the hood. Some of the pure perl modules (like
1339 C<Archive::Tar> and Compress::unLZMA) take the entire contents of the archive into memory,
1340 which may not be feasible on your system. Consider setting the global
1341 variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1342 the use of command line programs and won't consume so much memory.
1344 See the C<GLOBAL VARIABLES> section below for details.
1346 =head2 Bunzip2 support of arbitrary extensions.
1348 Older versions of C</bin/bunzip2> do not support arbitrary file
1349 extensions and insist on a C<.bz2> suffix. Although we do our best
1350 to guard against this, if you experience a bunzip2 error, it may
1351 be related to this. For details, please see the C<have_old_bunzip2>
1354 =head1 GLOBAL VARIABLES
1356 =head2 $Archive::Extract::DEBUG
1358 Set this variable to C<true> to have all calls to command line tools
1359 be printed out, including all their output.
1360 This also enables C<Carp::longmess> errors, instead of the regular
1363 Good for tracking down why things don't work with your particular
1366 Defaults to C<false>.
1368 =head2 $Archive::Extract::WARN
1370 This variable controls whether errors encountered internally by
1371 C<Archive::Extract> should be C<carp>'d or not.
1373 Set to false to silence warnings. Inspect the output of the C<error()>
1374 method manually to see what went wrong.
1376 Defaults to C<true>.
1378 =head2 $Archive::Extract::PREFER_BIN
1380 This variables controls whether C<Archive::Extract> should prefer the
1381 use of perl modules, or commandline tools to extract archives.
1383 Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1385 Defaults to C<false>.
1391 =item Mime magic support
1393 Maybe this module should use something like C<File::Type> to determine
1394 the type, rather than blindly trust the suffix.
1400 Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>.
1404 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1408 This library is free software; you may redistribute and/or modify it
1409 under the same terms as Perl itself.
1414 # c-indentation-style: bsd
1416 # indent-tabs-mode: nil
1418 # vim: expandtab shiftwidth=4: