1 package Archive::Extract;
7 use IPC::Cmd qw[run can_run];
9 use File::Path qw[mkpath];
11 use File::Basename qw[dirname basename];
12 use Params::Check qw[check];
13 use Module::Load::Conditional qw[can_load check_install];
14 use Locale::Maketext::Simple Style => 'gettext';
16 ### solaris has silly /bin/tar output ###
17 use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
18 use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
20 ### VMS may require quoting upper case command options
21 use constant ON_VMS => $^O eq 'VMS' ? 1 : 0;
23 ### If these are changed, update @TYPES and the new() POD
24 use constant TGZ => 'tgz';
25 use constant TAR => 'tar';
26 use constant GZ => 'gz';
27 use constant ZIP => 'zip';
28 use constant BZ2 => 'bz2';
29 use constant TBZ => 'tbz';
30 use constant Z => 'Z';
32 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG];
38 my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z ); # same as all constants
40 local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
46 Archive::Extract - A generic archive extracting mechanism
52 ### build an Archive::Extract object ###
53 my $ae = Archive::Extract->new( archive => 'foo.tgz' );
55 ### extract to cwd() ###
56 my $ok = $ae->extract;
58 ### extract to /tmp ###
59 my $ok = $ae->extract( to => '/tmp' );
61 ### what if something went wrong?
62 my $ok = $ae->extract or die $ae->error;
64 ### files from the archive ###
65 my $files = $ae->files;
67 ### dir that was extracted to ###
68 my $outdir = $ae->extract_path;
71 ### quick check methods ###
72 $ae->is_tar # is it a .tar file?
73 $ae->is_tgz # is it a .tar.gz or .tgz file?
74 $ae->is_gz; # is it a .gz file?
75 $ae->is_zip; # is it a .zip file?
76 $ae->is_bz2; # is it a .bz2 file?
77 $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
79 ### absolute path to the archive you provided ###
82 ### commandline tools, if found ###
83 $ae->bin_tar # path to /bin/tar, if found
84 $ae->bin_gzip # path to /bin/gzip, if found
85 $ae->bin_unzip # path to /bin/unzip, if found
86 $ae->bin_bunzip2 # path to /bin/bunzip2 if found
90 Archive::Extract is a generic archive extraction mechanism.
92 It allows you to extract any archive file of the type .tar, .tar.gz,
93 .gz, .Z, tar.bz2, .tbz, .bz2 or .zip without having to worry how it
94 does so, or use different interfaces for each type by using either
95 perl modules, or commandline tools on your system.
97 See the C<HOW IT WORKS> section further down for details.
102 ### see what /bin/programs are available ###
104 for my $pgm (qw[tar unzip gzip bunzip2 uncompress]) {
105 $PROGRAMS->{$pgm} = can_run($pgm);
108 ### mapping from types to extractor methods ###
115 is_bz2 => '_bunzip2',
116 is_Z => '_uncompress',
121 archive => { required => 1, allow => FILE_EXISTS },
122 type => { default => '', allow => [ @Types ] },
125 ### build accesssors ###
126 for my $method( keys %$tmpl,
127 qw[_extractor _gunzip_to files extract_path],
128 qw[_error_msg _error_msg_long]
133 $self->{$method} = $_[0] if @_;
134 return $self->{$method};
140 =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
142 Creates a new C<Archive::Extract> object based on the archive file you
143 passed it. Automatically determines the type of archive based on the
144 extension, but you can override that by explicitly providing the
147 Valid values for C<type> are:
153 Standard tar files, as produced by, for example, C</bin/tar>.
154 Corresponds to a C<.tar> suffix.
158 Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
159 Corresponds to a C<.tgz> or C<.tar.gz> suffix.
163 Gzip compressed file, as produced by, for example C</bin/gzip>.
164 Corresponds to a C<.gz> suffix.
168 Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
169 Corresponds to a C<.Z> suffix.
173 Zip compressed file, as produced by, for example C</bin/zip>.
174 Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
178 Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
179 Corresponds to a C<.bz2> suffix.
183 Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
184 Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
188 Returns a C<Archive::Extract> object on success, or false on failure.
197 my $parsed = check( $tmpl, \%hash ) or return;
199 ### make sure we have an absolute path ###
200 my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
202 ### figure out the type, if it wasn't already specified ###
203 unless ( $parsed->{type} ) {
205 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ :
206 $ar =~ /.+?\.gz$/i ? GZ :
207 $ar =~ /.+?\.tar$/i ? TAR :
208 $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP :
209 $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i ? TBZ :
210 $ar =~ /.+?\.bz2$/i ? BZ2 :
211 $ar =~ /.+?\.Z$/ ? Z :
216 ### don't know what type of file it is ###
217 return __PACKAGE__->_error(loc("Cannot determine file type for '%1'",
218 $parsed->{archive} )) unless $parsed->{type};
220 return bless $parsed, $class;
224 =head2 $ae->extract( [to => '/output/path'] )
226 Extracts the archive represented by the C<Archive::Extract> object to
227 the path of your choice as specified by the C<to> argument. Defaults to
230 Since C<.gz> files never hold a directory, but only a single file; if
231 the C<to> argument is an existing directory, the file is extracted
232 there, with it's C<.gz> suffix stripped.
233 If the C<to> argument is not an existing directory, the C<to> argument
234 is understood to be a filename, if the archive type is C<gz>.
235 In the case that you did not specify a C<to> argument, the output
236 file will be the name of the archive file, stripped from it's C<.gz>
237 suffix, in the current working directory.
239 C<extract> will try a pure perl solution first, and then fall back to
240 commandline tools if they are available. See the C<GLOBAL VARIABLES>
241 section below on how to alter this behaviour.
243 It will return true on success, and false on failure.
245 On success, it will also set the follow attributes in the object:
249 =item $ae->extract_path
251 This is the directory that the files where extracted to.
255 This is an array ref with the paths of all the files in the archive,
256 relative to the C<to> argument you specified.
257 To get the full path to an extracted file, you would use:
259 File::Spec->catfile( $to, $ae->files->[0] );
261 Note that all files from a tar archive will be in unix format, as per
262 the tar specification.
274 to => { default => '.', store => \$to }
277 check( $tmpl, \%hash ) or return;
279 ### so 'to' could be a file or a dir, depending on whether it's a .gz
280 ### file, or basically anything else.
281 ### so, check that, then act accordingly.
282 ### set an accessor specifically so _gunzip can know what file to extract
286 if( $self->is_gz or $self->is_bz2 or $self->is_Z) {
288 my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z)$//i;
293 $self->_gunzip_to( basename($cp) );
295 ### then it's a filename
298 $self->_gunzip_to( basename($to) );
301 ### not a foo.gz file
307 ### make the dir if it doesn't exist ###
309 eval { mkpath( $dir ) };
311 return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
315 ### get the current dir, to restore later ###
321 ### chdir to the target dir ###
322 unless( chdir $dir ) {
323 $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
324 $ok = 0; last EXTRACT;
327 ### set files to an empty array ref, so there's always an array
328 ### ref IN the accessor, to avoid errors like:
329 ### Can't use an undefined value as an ARRAY reference at
330 ### ../lib/Archive/Extract.pm line 742. (rt #19815)
333 ### find what extractor method to use ###
334 while( my($type,$method) = each %$Mapping ) {
336 ### call the corresponding method if the type is OK ###
338 $ok = $self->$method();
342 ### warn something went wrong if we didn't get an OK ###
343 $self->_error(loc("Extract failed, no extractor found"))
348 ### and chdir back ###
349 unless( chdir $cwd ) {
350 $self->_error(loc("Could not chdir back to start dir '%1': %2'",
361 =head2 $ae->error([BOOL])
363 Returns the last encountered error as string.
364 Pass it a true value to get the C<Carp::longmess()> output instead.
366 =head2 $ae->extract_path
368 This is the directory the archive got extracted to.
369 See C<extract()> for details.
373 This is an array ref holding all the paths from the archive.
374 See C<extract()> for details.
378 This is the full path to the archive file represented by this
379 C<Archive::Extract> object.
383 This is the type of archive represented by this C<Archive::Extract>
384 object. See accessors below for an easier way to use this.
385 See the C<new()> method for details.
389 Returns a list of all known C<types> for C<Archive::Extract>'s
394 sub types { return @Types }
398 Returns true if the file is of type C<.tar.gz>.
399 See the C<new()> method for details.
403 Returns true if the file is of type C<.tar>.
404 See the C<new()> method for details.
408 Returns true if the file is of type C<.gz>.
409 See the C<new()> method for details.
413 Returns true if the file is of type C<.Z>.
414 See the C<new()> method for details.
418 Returns true if the file is of type C<.zip>.
419 See the C<new()> method for details.
423 ### quick check methods ###
424 sub is_tgz { return $_[0]->type eq TGZ }
425 sub is_tar { return $_[0]->type eq TAR }
426 sub is_gz { return $_[0]->type eq GZ }
427 sub is_zip { return $_[0]->type eq ZIP }
428 sub is_tbz { return $_[0]->type eq TBZ }
429 sub is_bz2 { return $_[0]->type eq BZ2 }
430 sub is_Z { return $_[0]->type eq Z }
436 Returns the full path to your tar binary, if found.
440 Returns the full path to your gzip binary, if found
442 =head2 $ae->bin_unzip
444 Returns the full path to your unzip binary, if found
448 ### paths to commandline tools ###
449 sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} }
450 sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} }
451 sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} }
452 sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
453 sub bin_uncompress { return $PROGRAMS->{'uncompress'}
454 if $PROGRAMS->{'uncompress'} }
455 =head2 $bool = $ae->have_old_bunzip2
457 Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
458 require all archive names to end in C<.bz2> or it will not extract
459 them. This method checks if you have a recent version of C<bunzip2>
460 that allows any extension, or an older one that doesn't.
464 sub have_old_bunzip2 {
467 ### no bunzip2? no old bunzip2 either :)
468 return unless $self->bin_bunzip2;
470 ### if we can't run this, we can't be sure if it's too old or not
471 ### XXX stupid stupid stupid bunzip2 doesn't understand --version
472 ### is not a request to extract data:
473 ### $ bunzip2 --version
474 ### bzip2, a block-sorting file compressor. Version 1.0.2, 30-Dec-2001.
476 ### bunzip2: I won't read compressed data from a terminal.
477 ### bunzip2: For help, type: `bunzip2 --help'.
482 scalar run( command => [$self->bin_bunzip2, '--version'],
488 return unless $buffer;
490 my ($version) = $buffer =~ /version \s+ (\d+)/ix;
492 return 1 if $version < 1;
496 #################################
500 #################################
503 ### untar wrapper... goes to either Archive::Tar or /bin/tar
504 ### depending on $PREFER_BIN
508 ### bzip2 support in A::T via IO::Uncompress::Bzip2
509 my @methods = qw[_untar_at _untar_bin];
510 @methods = reverse @methods if $PREFER_BIN;
512 for my $method (@methods) {
513 $self->_extractor($method) && return 1 if $self->$method();
516 return $self->_error(loc("Unable to untar file '%1'", $self->archive));
519 ### use /bin/tar to extract ###
523 ### check for /bin/tar ###
524 return $self->_error(loc("No '%1' program found", '/bin/tar'))
525 unless $self->bin_tar;
527 ### check for /bin/gzip if we need it ###
528 return $self->_error(loc("No '%1' program found", '/bin/gzip'))
529 if $self->is_tgz && !$self->bin_gzip;
531 return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
532 if $self->is_tbz && !$self->bin_bunzip2;
534 ### XXX figure out how to make IPC::Run do this in one call --
535 ### currently i don't know how to get output of a command after a pipe
536 ### trapped in a scalar. Mailed barries about this 5th of june 2004.
540 ### see what command we should run, based on whether
541 ### it's a .tgz or .tar
543 ### XXX solaris tar and bsdtar are having different outputs
544 ### depending whether you run with -x or -t
545 ### compensate for this insanity by running -t first, then -x
547 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
548 $self->bin_tar, '-tf', '-'] :
549 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
550 $self->bin_tar, '-tf', '-'] :
551 [$self->bin_tar, '-tf', $self->archive];
553 ### run the command ###
555 unless( scalar run( command => $cmd,
559 return $self->_error(loc(
560 "Error listing contents of archive '%1': %2",
561 $self->archive, $buffer ));
564 ### no buffers available?
565 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
566 $self->_error( $self->_no_buffer_files( $self->archive ) );
569 ### if we're on solaris we /might/ be using /bin/tar, which has
570 ### a weird output format... we might also be using
571 ### /usr/local/bin/tar, which is gnu tar, which is perfectly
572 ### fine... so we have to do some guessing here =/
573 my @files = map { chomp;
575 : (m|^ x \s+ # 'xtract' -- sigh
576 (.+?), # the actual file name
577 \s+ [\d,.]+ \s bytes,
578 \s+ [\d,.]+ \s tape \s blocks
583 ### store the files that are in the archive ###
584 $self->files(\@files);
588 ### now actually extract it ###
590 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
591 $self->bin_tar, '-xf', '-'] :
592 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
593 $self->bin_tar, '-xf', '-'] :
594 [$self->bin_tar, '-xf', $self->archive];
597 unless( scalar run( command => $cmd,
601 return $self->_error(loc("Error extracting archive '%1': %2",
602 $self->archive, $buffer ));
605 ### we might not have them, due to lack of buffers
607 ### now that we've extracted, figure out where we extracted to
608 my $dir = $self->__get_extract_dir( $self->files );
610 ### store the extraction dir ###
611 $self->extract_path( $dir );
615 ### we got here, no error happened
619 ### use archive::tar to extract ###
623 ### we definitely need A::T, so load that first
624 { my $use_list = { 'Archive::Tar' => '0.0' };
626 unless( can_load( modules => $use_list ) ) {
628 return $self->_error(loc("You do not have '%1' installed - " .
629 "Please install it as soon as possible.",
634 ### we might pass it a filehandle if it's a .tbz file..
635 my $fh_to_read = $self->archive;
637 ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
638 ### if A::T's version is 0.99 or higher
639 if( $self->is_tgz ) {
640 my $use_list = { 'Compress::Zlib' => '0.0' };
641 $use_list->{ 'IO::Zlib' } = '0.0'
642 if $Archive::Tar::VERSION >= '0.99';
644 unless( can_load( modules => $use_list ) ) {
645 my $which = join '/', sort keys %$use_list;
647 return $self->_error(loc(
648 "You do not have '%1' installed - Please ".
649 "install it as soon as possible.", $which));
652 } elsif ( $self->is_tbz ) {
653 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
654 unless( can_load( modules => $use_list ) ) {
655 return $self->_error(loc(
656 "You do not have '%1' installed - Please " .
657 "install it as soon as possible.",
658 'IO::Uncompress::Bunzip2'));
661 my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
662 return $self->_error(loc("Unable to open '%1': %2",
664 $IO::Uncompress::Bunzip2::Bunzip2Error));
669 my $tar = Archive::Tar->new();
671 ### only tell it it's compressed if it's a .tgz, as we give it a file
672 ### handle if it's a .tbz
673 unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) {
674 return $self->_error(loc("Unable to read '%1': %2", $self->archive,
675 $Archive::Tar::error));
678 ### workaround to prevent Archive::Tar from setting uid, which
679 ### is a potential security hole. -autrijus
680 ### have to do it here, since A::T needs to be /loaded/ first ###
681 { no strict 'refs'; local $^W;
683 ### older versions of archive::tar <= 0.23
684 *Archive::Tar::chown = sub {};
687 ### for version of archive::tar > 1.04
688 local $Archive::Tar::Constant::CHOWN = 0;
690 { local $^W; # quell 'splice() offset past end of array' warnings
691 # on older versions of A::T
693 ### older archive::tar always returns $self, return value slightly
694 ### fux0r3d because of it.
696 or return $self->_error(loc("Unable to extract '%1': %2",
697 $self->archive, $Archive::Tar::error ));
700 my @files = $tar->list_files;
701 my $dir = $self->__get_extract_dir( \@files );
703 ### store the files that are in the archive ###
704 $self->files(\@files);
706 ### store the extraction dir ###
707 $self->extract_path( $dir );
709 ### check if the dir actually appeared ###
710 return 1 if -d $self->extract_path;
712 ### no dir, we failed ###
713 return $self->_error(loc("Unable to extract '%1': %2",
714 $self->archive, $Archive::Tar::error ));
717 #################################
721 #################################
723 ### gunzip wrapper... goes to either Compress::Zlib or /bin/gzip
724 ### depending on $PREFER_BIN
728 my @methods = qw[_gunzip_cz _gunzip_bin];
729 @methods = reverse @methods if $PREFER_BIN;
731 for my $method (@methods) {
732 $self->_extractor($method) && return 1 if $self->$method();
735 return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
741 ### check for /bin/gzip -- we need it ###
742 return $self->_error(loc("No '%1' program found", '/bin/gzip'))
743 unless $self->bin_gzip;
746 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
747 return $self->_error(loc("Could not open '%1' for writing: %2",
748 $self->_gunzip_to, $! ));
750 my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
753 unless( scalar run( command => $cmd,
757 return $self->_error(loc("Unable to gunzip '%1': %2",
758 $self->archive, $buffer));
761 ### no buffers available?
762 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
763 $self->_error( $self->_no_buffer_content( $self->archive ) );
766 print $fh $buffer if defined $buffer;
770 ### set what files where extract, and where they went ###
771 $self->files( [$self->_gunzip_to] );
772 $self->extract_path( File::Spec->rel2abs(cwd()) );
780 my $use_list = { 'Compress::Zlib' => '0.0' };
781 unless( can_load( modules => $use_list ) ) {
782 return $self->_error(loc("You do not have '%1' installed - Please " .
783 "install it as soon as possible.", 'Compress::Zlib'));
786 my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
787 return $self->_error(loc("Unable to open '%1': %2",
788 $self->archive, $Compress::Zlib::gzerrno));
790 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
791 return $self->_error(loc("Could not open '%1' for writing: %2",
792 $self->_gunzip_to, $! ));
795 $fh->print($buffer) while $gz->gzread($buffer) > 0;
798 ### set what files where extract, and where they went ###
799 $self->files( [$self->_gunzip_to] );
800 $self->extract_path( File::Spec->rel2abs(cwd()) );
805 #################################
809 #################################
812 ### untar wrapper... goes to either Archive::Tar or /bin/tar
813 ### depending on $PREFER_BIN
817 my @methods = qw[_gunzip_cz _uncompress_bin];
818 @methods = reverse @methods if $PREFER_BIN;
820 for my $method (@methods) {
821 $self->_extractor($method) && return 1 if $self->$method();
824 return $self->_error(loc("Unable to untar file '%1'", $self->archive));
827 sub _uncompress_bin {
830 ### check for /bin/gzip -- we need it ###
831 return $self->_error(loc("No '%1' program found", '/bin/uncompress'))
832 unless $self->bin_uncompress;
835 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
836 return $self->_error(loc("Could not open '%1' for writing: %2",
837 $self->_gunzip_to, $! ));
839 my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
842 unless( scalar run( command => $cmd,
846 return $self->_error(loc("Unable to uncompress '%1': %2",
847 $self->archive, $buffer));
850 ### no buffers available?
851 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
852 $self->_error( $self->_no_buffer_content( $self->archive ) );
855 print $fh $buffer if defined $buffer;
859 ### set what files where extract, and where they went ###
860 $self->files( [$self->_gunzip_to] );
861 $self->extract_path( File::Spec->rel2abs(cwd()) );
867 #################################
871 #################################
873 ### unzip wrapper... goes to either Archive::Zip or /bin/unzip
874 ### depending on $PREFER_BIN
878 my @methods = qw[_unzip_az _unzip_bin];
879 @methods = reverse @methods if $PREFER_BIN;
881 for my $method (@methods) {
882 $self->_extractor($method) && return 1 if $self->$method();
885 return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
891 ### check for /bin/gzip if we need it ###
892 return $self->_error(loc("No '%1' program found", '/bin/unzip'))
893 unless $self->bin_unzip;
896 ### first, get the files.. it must be 2 different commands with 'unzip' :(
897 { ### on VMS, capital letter options have to be quoted. This is
898 ### peported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
899 ### Subject: [patch@31735]Archive Extract fix on VMS.
900 my $opt = ON_VMS ? '"-Z"' : '-Z';
901 my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
904 unless( scalar run( command => $cmd,
908 return $self->_error(loc("Unable to unzip '%1': %2",
909 $self->archive, $buffer));
912 ### no buffers available?
913 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
914 $self->_error( $self->_no_buffer_files( $self->archive ) );
917 $self->files( [split $/, $buffer] );
921 ### now, extract the archive ###
922 { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
925 unless( scalar run( command => $cmd,
929 return $self->_error(loc("Unable to unzip '%1': %2",
930 $self->archive, $buffer));
933 if( scalar @{$self->files} ) {
934 my $files = $self->files;
935 my $dir = $self->__get_extract_dir( $files );
937 $self->extract_path( $dir );
947 my $use_list = { 'Archive::Zip' => '0.0' };
948 unless( can_load( modules => $use_list ) ) {
949 return $self->_error(loc("You do not have '%1' installed - Please " .
950 "install it as soon as possible.", 'Archive::Zip'));
953 my $zip = Archive::Zip->new();
955 unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
956 return $self->_error(loc("Unable to read '%1'", $self->archive));
960 ### have to extract every memeber individually ###
961 for my $member ($zip->members) {
962 push @files, $member->{fileName};
964 unless( $zip->extractMember($member) == &Archive::Zip::AZ_OK ) {
965 return $self->_error(loc("Extraction of '%1' from '%2' failed",
966 $member->{fileName}, $self->archive ));
970 my $dir = $self->__get_extract_dir( \@files );
972 ### set what files where extract, and where they went ###
973 $self->files( \@files );
974 $self->extract_path( File::Spec->rel2abs($dir) );
979 sub __get_extract_dir {
981 my $files = shift || [];
983 return unless scalar @$files;
986 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
987 my($dir,$pos) = @$aref;
989 ### add a catdir(), so that any trailing slashes get
990 ### take care of (removed)
991 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
992 ### which was the problem in bug #23999
993 my $res = -d $files->[$pos]
994 ? File::Spec->catdir( $files->[$pos], '' )
995 : File::Spec->catdir( dirname( $files->[$pos] ) );
1000 ### if the first and last dir don't match, make sure the
1001 ### dirname is not set wrongly
1004 ### dirs are the same, so we know for sure what the extract dir is
1005 if( $dir1 eq $dir2 ) {
1008 ### dirs are different.. do they share the base dir?
1009 ### if so, use that, if not, fall back to '.'
1011 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1012 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
1014 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1017 return File::Spec->rel2abs( $dir );
1020 #################################
1024 #################################
1026 ### bunzip2 wrapper...
1030 my @methods = qw[_bunzip2_cz2 _bunzip2_bin];
1031 @methods = reverse @methods if $PREFER_BIN;
1033 for my $method (@methods) {
1034 $self->_extractor($method) && return 1 if $self->$method();
1037 return $self->_error(loc("Unable to bunzip2 file '%1'", $self->archive));
1043 ### check for /bin/gzip -- we need it ###
1044 return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
1045 unless $self->bin_bunzip2;
1048 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1049 return $self->_error(loc("Could not open '%1' for writing: %2",
1050 $self->_gunzip_to, $! ));
1052 ### guard against broken bunzip2. See ->have_old_bunzip2()
1054 if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1055 return $self->_error(loc("Your bunzip2 version is too old and ".
1056 "can only extract files ending in '%1'",
1060 my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
1063 unless( scalar run( command => $cmd,
1065 buffer => \$buffer )
1067 return $self->_error(loc("Unable to bunzip2 '%1': %2",
1068 $self->archive, $buffer));
1071 ### no buffers available?
1072 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1073 $self->_error( $self->_no_buffer_content( $self->archive ) );
1076 print $fh $buffer if defined $buffer;
1080 ### set what files where extract, and where they went ###
1081 $self->files( [$self->_gunzip_to] );
1082 $self->extract_path( File::Spec->rel2abs(cwd()) );
1087 ### using cz2, the compact versions... this we use mainly in archive::tar
1089 # sub _bunzip2_cz1 {
1092 # my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1093 # unless( can_load( modules => $use_list ) ) {
1094 # return $self->_error(loc("You do not have '%1' installed - Please " .
1095 # "install it as soon as possible.",
1096 # 'IO::Uncompress::Bunzip2'));
1099 # my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1100 # return $self->_error(loc("Unable to open '%1': %2",
1102 # $IO::Uncompress::Bunzip2::Bunzip2Error));
1104 # my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1105 # return $self->_error(loc("Could not open '%1' for writing: %2",
1106 # $self->_gunzip_to, $! ));
1109 # $fh->print($buffer) while $bz->read($buffer) > 0;
1112 # ### set what files where extract, and where they went ###
1113 # $self->files( [$self->_gunzip_to] );
1114 # $self->extract_path( File::Spec->rel2abs(cwd()) );
1122 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1123 unless( can_load( modules => $use_list ) ) {
1124 return $self->_error(loc("You do not have '%1' installed - Please " .
1125 "install it as soon as possible.",
1126 'IO::Uncompress::Bunzip2'));
1129 IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1130 or return $self->_error(loc("Unable to uncompress '%1': %2",
1132 $IO::Uncompress::Bunzip2::Bunzip2Error));
1134 ### set what files where extract, and where they went ###
1135 $self->files( [$self->_gunzip_to] );
1136 $self->extract_path( File::Spec->rel2abs(cwd()) );
1142 #################################
1146 #################################
1152 $self->_error_msg( $error );
1153 $self->_error_msg_long( Carp::longmess($error) );
1155 ### set $Archive::Extract::WARN to 0 to disable printing
1158 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1166 return shift() ? $self->_error_msg_long : $self->_error_msg;
1169 sub _no_buffer_files {
1171 my $file = shift or return;
1172 return loc("No buffer captured, unable to tell ".
1173 "extracted files or extraction dir for '%1'", $file);
1176 sub _no_buffer_content {
1178 my $file = shift or return;
1179 return loc("No buffer captured, unable to get content for '%1'", $file);
1187 C<Archive::Extract> tries first to determine what type of archive you
1188 are passing it, by inspecting its suffix. It does not do this by using
1189 Mime magic, or something related. See C<CAVEATS> below.
1191 Once it has determined the file type, it knows which extraction methods
1192 it can use on the archive. It will try a perl solution first, then fall
1193 back to a commandline tool if that fails. If that also fails, it will
1194 return false, indicating it was unable to extract the archive.
1195 See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1199 =head2 File Extensions
1201 C<Archive::Extract> trusts on the extension of the archive to determine
1202 what type it is, and what extractor methods therefore can be used. If
1203 your archives do not have any of the extensions as described in the
1204 C<new()> method, you will have to specify the type explicitly, or
1205 C<Archive::Extract> will not be able to extract the archive for you.
1207 =head2 Supporting Very Large Files
1209 C<Archive::Extract> can use either pure perl modules or command line
1210 programs under the hood. Some of the pure perl modules (like
1211 C<Archive::Tar> take the entire contents of the archive into memory,
1212 which may not be feasible on your system. Consider setting the global
1213 variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1214 the use of command line programs and won't consume so much memory.
1216 See the C<GLOBAL VARIABLES> section below for details.
1218 =head2 Bunzip2 support of arbitrary extensions.
1220 Older versions of C</bin/bunzip2> do not support arbitrary file
1221 extensions and insist on a C<.bz2> suffix. Although we do our best
1222 to guard against this, if you experience a bunzip2 error, it may
1223 be related to this. For details, please see the C<have_old_bunzip2>
1226 =head1 GLOBAL VARIABLES
1228 =head2 $Archive::Extract::DEBUG
1230 Set this variable to C<true> to have all calls to command line tools
1231 be printed out, including all their output.
1232 This also enables C<Carp::longmess> errors, instead of the regular
1235 Good for tracking down why things don't work with your particular
1238 Defaults to C<false>.
1240 =head2 $Archive::Extract::WARN
1242 This variable controls whether errors encountered internally by
1243 C<Archive::Extract> should be C<carp>'d or not.
1245 Set to false to silence warnings. Inspect the output of the C<error()>
1246 method manually to see what went wrong.
1248 Defaults to C<true>.
1250 =head2 $Archive::Extract::PREFER_BIN
1252 This variables controls whether C<Archive::Extract> should prefer the
1253 use of perl modules, or commandline tools to extract archives.
1255 Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1257 Defaults to C<false>.
1263 =item Mime magic support
1265 Maybe this module should use something like C<File::Type> to determine
1266 the type, rather than blindly trust the suffix.
1272 Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>.
1276 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1280 This library is free software; you may redistribute and/or modify it
1281 under the same terms as Perl itself.
1286 # c-indentation-style: bsd
1288 # indent-tabs-mode: nil
1290 # vim: expandtab shiftwidth=4: