1 package Archive::Extract;
7 use IPC::Cmd qw[run can_run];
9 use File::Path qw[mkpath];
11 use File::Basename qw[dirname basename];
12 use Params::Check qw[check];
13 use Module::Load::Conditional qw[can_load check_install];
14 use Locale::Maketext::Simple Style => 'gettext';
16 ### solaris has silly /bin/tar output ###
17 use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
18 use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
20 ### VMS may require quoting upper case command options
21 use constant ON_VMS => $^O eq 'VMS' ? 1 : 0;
23 ### Windows needs special treatment of Tar options
24 use constant ON_WIN32 => $^O eq 'MSWin32' ? 1 : 0;
26 ### we can't use this extraction method, because of missing
28 use constant METHOD_NA => [];
30 ### If these are changed, update @TYPES and the new() POD
31 use constant TGZ => 'tgz';
32 use constant TAR => 'tar';
33 use constant GZ => 'gz';
34 use constant ZIP => 'zip';
35 use constant BZ2 => 'bz2';
36 use constant TBZ => 'tbz';
37 use constant Z => 'Z';
38 use constant LZMA => 'lzma';
40 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG
41 $_ALLOW_BIN $_ALLOW_PURE_PERL
48 $_ALLOW_PURE_PERL = 1; # allow pure perl extractors
49 $_ALLOW_BIN = 1; # allow binary extractors
51 # same as all constants
52 my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA );
54 local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
60 Archive::Extract - A generic archive extracting mechanism
66 ### build an Archive::Extract object ###
67 my $ae = Archive::Extract->new( archive => 'foo.tgz' );
69 ### extract to cwd() ###
70 my $ok = $ae->extract;
72 ### extract to /tmp ###
73 my $ok = $ae->extract( to => '/tmp' );
75 ### what if something went wrong?
76 my $ok = $ae->extract or die $ae->error;
78 ### files from the archive ###
79 my $files = $ae->files;
81 ### dir that was extracted to ###
82 my $outdir = $ae->extract_path;
85 ### quick check methods ###
86 $ae->is_tar # is it a .tar file?
87 $ae->is_tgz # is it a .tar.gz or .tgz file?
88 $ae->is_gz; # is it a .gz file?
89 $ae->is_zip; # is it a .zip file?
90 $ae->is_bz2; # is it a .bz2 file?
91 $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
92 $ae->is_lzma; # is it a .lzma file?
94 ### absolute path to the archive you provided ###
97 ### commandline tools, if found ###
98 $ae->bin_tar # path to /bin/tar, if found
99 $ae->bin_gzip # path to /bin/gzip, if found
100 $ae->bin_unzip # path to /bin/unzip, if found
101 $ae->bin_bunzip2 # path to /bin/bunzip2 if found
102 $ae->bin_unlzma # path to /bin/unlzma if found
106 Archive::Extract is a generic archive extraction mechanism.
108 It allows you to extract any archive file of the type .tar, .tar.gz,
109 .gz, .Z, tar.bz2, .tbz, .bz2, .zip or .lzma without having to worry how it
110 does so, or use different interfaces for each type by using either
111 perl modules, or commandline tools on your system.
113 See the C<HOW IT WORKS> section further down for details.
118 ### see what /bin/programs are available ###
120 for my $pgm (qw[tar unzip gzip bunzip2 uncompress unlzma]) {
121 $PROGRAMS->{$pgm} = can_run($pgm);
124 ### mapping from types to extractor methods ###
125 my $Mapping = { # binary program # pure perl module
126 is_tgz => { bin => '_untar_bin', pp => '_untar_at' },
127 is_tar => { bin => '_untar_bin', pp => '_untar_at' },
128 is_gz => { bin => '_gunzip_bin', pp => '_gunzip_cz' },
129 is_zip => { bin => '_unzip_bin', pp => '_unzip_az' },
130 is_tbz => { bin => '_untar_bin', pp => '_untar_at' },
131 is_bz2 => { bin => '_bunzip2_bin', pp => '_bunzip2_bz2'},
132 is_Z => { bin => '_uncompress_bin', pp => '_gunzip_cz' },
133 is_lzma => { bin => '_unlzma_bin', pp => '_unlzma_cz' },
136 { ### use subs so we re-generate array refs etc for the no-overide flags
137 ### if we don't, then we reuse the same arrayref, meaning objects store
140 archive => sub { { required => 1, allow => FILE_EXISTS } },
141 type => sub { { default => '', allow => [ @Types ] } },
142 _error_msg => sub { { no_override => 1, default => [] } },
143 _error_msg_long => sub { { no_override => 1, default => [] } },
146 ### build accesssors ###
147 for my $method( keys %$tmpl,
148 qw[_extractor _gunzip_to files extract_path],
153 $self->{$method} = $_[0] if @_;
154 return $self->{$method};
160 =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
162 Creates a new C<Archive::Extract> object based on the archive file you
163 passed it. Automatically determines the type of archive based on the
164 extension, but you can override that by explicitly providing the
167 Valid values for C<type> are:
173 Standard tar files, as produced by, for example, C</bin/tar>.
174 Corresponds to a C<.tar> suffix.
178 Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
179 Corresponds to a C<.tgz> or C<.tar.gz> suffix.
183 Gzip compressed file, as produced by, for example C</bin/gzip>.
184 Corresponds to a C<.gz> suffix.
188 Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
189 Corresponds to a C<.Z> suffix.
193 Zip compressed file, as produced by, for example C</bin/zip>.
194 Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
198 Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
199 Corresponds to a C<.bz2> suffix.
203 Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
204 Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
208 Lzma compressed file, as produced by C</bin/lzma>.
209 Corresponds to a C<.lzma> suffix.
213 Returns a C<Archive::Extract> object on success, or false on failure.
222 ### see above why we use subs here and generate the template;
223 ### it's basically to not re-use arrayrefs
224 my %utmpl = map { $_ => $tmpl->{$_}->() } keys %$tmpl;
226 my $parsed = check( \%utmpl, \%hash ) or return;
228 ### make sure we have an absolute path ###
229 my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
231 ### figure out the type, if it wasn't already specified ###
232 unless ( $parsed->{type} ) {
234 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ :
235 $ar =~ /.+?\.gz$/i ? GZ :
236 $ar =~ /.+?\.tar$/i ? TAR :
237 $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP :
238 $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i ? TBZ :
239 $ar =~ /.+?\.bz2$/i ? BZ2 :
240 $ar =~ /.+?\.Z$/ ? Z :
241 $ar =~ /.+?\.lzma$/ ? LZMA :
246 bless $parsed, $class;
248 ### don't know what type of file it is
249 ### XXX this *has* to be an object call, not a package call
250 return $parsed->_error(loc("Cannot determine file type for '%1'",
251 $parsed->{archive} )) unless $parsed->{type};
256 =head2 $ae->extract( [to => '/output/path'] )
258 Extracts the archive represented by the C<Archive::Extract> object to
259 the path of your choice as specified by the C<to> argument. Defaults to
262 Since C<.gz> files never hold a directory, but only a single file; if
263 the C<to> argument is an existing directory, the file is extracted
264 there, with its C<.gz> suffix stripped.
265 If the C<to> argument is not an existing directory, the C<to> argument
266 is understood to be a filename, if the archive type is C<gz>.
267 In the case that you did not specify a C<to> argument, the output
268 file will be the name of the archive file, stripped from its C<.gz>
269 suffix, in the current working directory.
271 C<extract> will try a pure perl solution first, and then fall back to
272 commandline tools if they are available. See the C<GLOBAL VARIABLES>
273 section below on how to alter this behaviour.
275 It will return true on success, and false on failure.
277 On success, it will also set the follow attributes in the object:
281 =item $ae->extract_path
283 This is the directory that the files where extracted to.
287 This is an array ref with the paths of all the files in the archive,
288 relative to the C<to> argument you specified.
289 To get the full path to an extracted file, you would use:
291 File::Spec->catfile( $to, $ae->files->[0] );
293 Note that all files from a tar archive will be in unix format, as per
294 the tar specification.
304 ### reset error messages
305 $self->_error_msg( [] );
306 $self->_error_msg_long( [] );
310 to => { default => '.', store => \$to }
313 check( $tmpl, \%hash ) or return;
315 ### so 'to' could be a file or a dir, depending on whether it's a .gz
316 ### file, or basically anything else.
317 ### so, check that, then act accordingly.
318 ### set an accessor specifically so _gunzip can know what file to extract
322 if( $self->is_gz or $self->is_bz2 or $self->is_Z or $self->is_lzma ) {
324 my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z|lzma)$//i;
329 $self->_gunzip_to( basename($cp) );
331 ### then it's a filename
334 $self->_gunzip_to( basename($to) );
337 ### not a foo.gz file
343 ### make the dir if it doesn't exist ###
345 eval { mkpath( $dir ) };
347 return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
351 ### get the current dir, to restore later ###
357 ### chdir to the target dir ###
358 unless( chdir $dir ) {
359 $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
360 $ok = 0; last EXTRACT;
363 ### set files to an empty array ref, so there's always an array
364 ### ref IN the accessor, to avoid errors like:
365 ### Can't use an undefined value as an ARRAY reference at
366 ### ../lib/Archive/Extract.pm line 742. (rt #19815)
369 ### find out the dispatch methods needed for this type of
370 ### archive. Do a $self->is_XXX to figure out the type, then
371 ### get the hashref with bin + pure perl dispatchers.
372 my ($map) = map { $Mapping->{$_} } grep { $self->$_ } keys %$Mapping;
374 ### add pure perl extractor if allowed & add bin extractor if allowed
376 push @methods, $map->{'pp'} if $_ALLOW_PURE_PERL;
377 push @methods, $map->{'bin'} if $_ALLOW_BIN;
379 ### reverse it if we prefer bin extractors
380 @methods = reverse @methods if $PREFER_BIN;
383 for my $method (@methods) {
384 print "# Extracting with ->$method\n" if $DEBUG;
386 my $rv = $self->$method;
388 ### a positive extraction
389 if( $rv and $rv ne METHOD_NA ) {
390 print "# Extraction succeeded\n" if $DEBUG;
391 $self->_extractor($method);
394 ### method is not available
395 } elsif ( $rv and $rv eq METHOD_NA ) {
396 print "# Extraction method not available\n" if $DEBUG;
399 print "# Extraction method failed\n" if $DEBUG;
404 ### warn something went wrong if we didn't get an extractor
405 unless( $self->_extractor ) {
406 my $diag = $fail ? loc("Extract failed due to errors") :
407 $na ? loc("Extract failed; no extractors available") :
410 $self->_error($diag);
415 ### and chdir back ###
416 unless( chdir $cwd ) {
417 $self->_error(loc("Could not chdir back to start dir '%1': %2'",
428 =head2 $ae->error([BOOL])
430 Returns the last encountered error as string.
431 Pass it a true value to get the C<Carp::longmess()> output instead.
433 =head2 $ae->extract_path
435 This is the directory the archive got extracted to.
436 See C<extract()> for details.
440 This is an array ref holding all the paths from the archive.
441 See C<extract()> for details.
445 This is the full path to the archive file represented by this
446 C<Archive::Extract> object.
450 This is the type of archive represented by this C<Archive::Extract>
451 object. See accessors below for an easier way to use this.
452 See the C<new()> method for details.
456 Returns a list of all known C<types> for C<Archive::Extract>'s
461 sub types { return @Types }
465 Returns true if the file is of type C<.tar.gz>.
466 See the C<new()> method for details.
470 Returns true if the file is of type C<.tar>.
471 See the C<new()> method for details.
475 Returns true if the file is of type C<.gz>.
476 See the C<new()> method for details.
480 Returns true if the file is of type C<.Z>.
481 See the C<new()> method for details.
485 Returns true if the file is of type C<.zip>.
486 See the C<new()> method for details.
490 Returns true if the file is of type C<.lzma>.
491 See the C<new()> method for details.
495 ### quick check methods ###
496 sub is_tgz { return $_[0]->type eq TGZ }
497 sub is_tar { return $_[0]->type eq TAR }
498 sub is_gz { return $_[0]->type eq GZ }
499 sub is_zip { return $_[0]->type eq ZIP }
500 sub is_tbz { return $_[0]->type eq TBZ }
501 sub is_bz2 { return $_[0]->type eq BZ2 }
502 sub is_Z { return $_[0]->type eq Z }
503 sub is_lzma { return $_[0]->type eq LZMA }
509 Returns the full path to your tar binary, if found.
513 Returns the full path to your gzip binary, if found
515 =head2 $ae->bin_unzip
517 Returns the full path to your unzip binary, if found
519 =head2 $ae->bin_unlzma
521 Returns the full path to your unlzma binary, if found
525 ### paths to commandline tools ###
526 sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} }
527 sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} }
528 sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} }
529 sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
530 sub bin_uncompress { return $PROGRAMS->{'uncompress'}
531 if $PROGRAMS->{'uncompress'} }
532 sub bin_unlzma { return $PROGRAMS->{'unlzma'} if $PROGRAMS->{'unlzma'} }
534 =head2 $bool = $ae->have_old_bunzip2
536 Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
537 require all archive names to end in C<.bz2> or it will not extract
538 them. This method checks if you have a recent version of C<bunzip2>
539 that allows any extension, or an older one that doesn't.
543 sub have_old_bunzip2 {
546 ### no bunzip2? no old bunzip2 either :)
547 return unless $self->bin_bunzip2;
549 ### if we can't run this, we can't be sure if it's too old or not
550 ### XXX stupid stupid stupid bunzip2 doesn't understand --version
551 ### is not a request to extract data:
552 ### $ bunzip2 --version
553 ### bzip2, a block-sorting file compressor. Version 1.0.2, 30-Dec-2001.
555 ### bunzip2: I won't read compressed data from a terminal.
556 ### bunzip2: For help, type: `bunzip2 --help'.
561 ### double hateful: bunzip2 --version also hangs if input is a pipe
562 ### See #32370: Archive::Extract will hang if stdin is a pipe [+PATCH]
563 ### So, we have to provide *another* argument which is a fake filename,
564 ### just so it wont try to read from stdin to print its version..
566 ### Even if the file exists, it won't clobber or change it.
569 command => [$self->bin_bunzip2, '--version', 'NoSuchFile'],
575 return unless $buffer;
577 my ($version) = $buffer =~ /version \s+ (\d+)/ix;
579 return 1 if $version < 1;
583 #################################
587 #################################
589 ### annoying issue with (gnu) tar on win32, as illustrated by this
590 ### bug: https://rt.cpan.org/Ticket/Display.html?id=40138
591 ### which shows that (gnu) tar will interpret a file name with a :
592 ### in it as a remote file name, so C:\tmp\foo.txt is interpreted
593 ### as a remote shell, and the extract fails.
595 if( ON_WIN32 and my $cmd = __PACKAGE__->bin_tar ) {
597 ### if this is gnu tar we are running, we need to use --force-local
598 push @ExtraTarFlags, '--force-local' if `$cmd --version` =~ /gnu tar/i;
602 ### use /bin/tar to extract ###
606 ### check for /bin/tar ###
607 ### check for /bin/gzip if we need it ###
608 ### if any of the binaries are not available, return NA
609 { my $diag = not $self->bin_tar ?
610 loc("No '%1' program found", '/bin/tar') :
611 $self->is_tgz && !$self->bin_gzip ?
612 loc("No '%1' program found", '/bin/gzip') :
613 $self->is_tbz && !$self->bin_bunzip2 ?
614 loc("No '%1' program found", '/bin/bunzip2') :
618 $self->_error( $diag );
623 ### XXX figure out how to make IPC::Run do this in one call --
624 ### currently i don't know how to get output of a command after a pipe
625 ### trapped in a scalar. Mailed barries about this 5th of june 2004.
627 ### see what command we should run, based on whether
628 ### it's a .tgz or .tar
630 ### XXX solaris tar and bsdtar are having different outputs
631 ### depending whether you run with -x or -t
632 ### compensate for this insanity by running -t first, then -x
634 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
635 $self->bin_tar, '-tf', '-'] :
636 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
637 $self->bin_tar, '-tf', '-'] :
638 [$self->bin_tar, @ExtraTarFlags, '-tf', $self->archive];
641 ### newer versions of 'tar' (1.21 and up) now print record size
642 ### to STDERR as well if v OR t is given (used to be both). This
643 ### is a 'feature' according to the changelog, so we must now only
644 ### inspect STDOUT, otherwise, failures like these occur:
645 ### nntp.perl.org/group/perl.cpan.testers/2009/02/msg3230366.html
647 my @out = run( command => $cmd,
651 ### command was unsuccessful
653 return $self->_error(loc(
654 "Error listing contents of archive '%1': %2",
655 $self->archive, $buffer ));
658 ### no buffers available?
659 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
660 $self->_error( $self->_no_buffer_files( $self->archive ) );
663 ### if we're on solaris we /might/ be using /bin/tar, which has
664 ### a weird output format... we might also be using
665 ### /usr/local/bin/tar, which is gnu tar, which is perfectly
666 ### fine... so we have to do some guessing here =/
667 my @files = map { chomp;
669 : (m|^ x \s+ # 'xtract' -- sigh
670 (.+?), # the actual file name
671 \s+ [\d,.]+ \s bytes,
672 \s+ [\d,.]+ \s tape \s blocks
675 ### only STDOUT, see above
676 } map { split $/, $_ } @{$out[3]};
678 ### store the files that are in the archive ###
679 $self->files(\@files);
683 ### now actually extract it ###
685 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
686 $self->bin_tar, '-xf', '-'] :
687 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
688 $self->bin_tar, '-xf', '-'] :
689 [$self->bin_tar, @ExtraTarFlags, '-xf', $self->archive];
692 unless( scalar run( command => $cmd,
696 return $self->_error(loc("Error extracting archive '%1': %2",
697 $self->archive, $buffer ));
700 ### we might not have them, due to lack of buffers
702 ### now that we've extracted, figure out where we extracted to
703 my $dir = $self->__get_extract_dir( $self->files );
705 ### store the extraction dir ###
706 $self->extract_path( $dir );
710 ### we got here, no error happened
716 ### use archive::tar to extract ###
720 ### Loading Archive::Tar is going to set it to 1, so make it local
721 ### within this block, starting with its initial value. Whatever
722 ### Achive::Tar does will be undone when we return.
724 ### Also, later, set $Archive::Tar::WARN to $Archive::Extract::WARN
725 ### so users don't have to even think about this variable. If they
726 ### do, they still get their set value outside of this call.
727 local $Archive::Tar::WARN = $Archive::Tar::WARN;
729 ### we definitely need Archive::Tar, so load that first
730 { my $use_list = { 'Archive::Tar' => '0.0' };
732 unless( can_load( modules => $use_list ) ) {
734 $self->_error(loc("You do not have '%1' installed - " .
735 "Please install it as soon as possible.",
742 ### we might pass it a filehandle if it's a .tbz file..
743 my $fh_to_read = $self->archive;
745 ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
746 ### if A::T's version is 0.99 or higher
747 if( $self->is_tgz ) {
748 my $use_list = { 'Compress::Zlib' => '0.0' };
749 $use_list->{ 'IO::Zlib' } = '0.0'
750 if $Archive::Tar::VERSION >= '0.99';
752 unless( can_load( modules => $use_list ) ) {
753 my $which = join '/', sort keys %$use_list;
756 "You do not have '%1' installed - Please ".
757 "install it as soon as possible.", $which)
763 } elsif ( $self->is_tbz ) {
764 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
765 unless( can_load( modules => $use_list ) ) {
767 "You do not have '%1' installed - Please " .
768 "install it as soon as possible.",
769 'IO::Uncompress::Bunzip2')
775 my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
776 return $self->_error(loc("Unable to open '%1': %2",
778 $IO::Uncompress::Bunzip2::Bunzip2Error));
783 ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've
784 ### localized $Archive::Tar::WARN already.
785 $Archive::Tar::WARN = $Archive::Extract::WARN;
787 my $tar = Archive::Tar->new();
789 ### only tell it it's compressed if it's a .tgz, as we give it a file
790 ### handle if it's a .tbz
791 unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) {
792 return $self->_error(loc("Unable to read '%1': %2", $self->archive,
793 $Archive::Tar::error));
796 ### workaround to prevent Archive::Tar from setting uid, which
797 ### is a potential security hole. -autrijus
798 ### have to do it here, since A::T needs to be /loaded/ first ###
799 { no strict 'refs'; local $^W;
801 ### older versions of archive::tar <= 0.23
802 *Archive::Tar::chown = sub {};
805 ### for version of Archive::Tar > 1.04
806 local $Archive::Tar::CHOWN = 0;
808 { local $^W; # quell 'splice() offset past end of array' warnings
809 # on older versions of A::T
811 ### older archive::tar always returns $self, return value slightly
812 ### fux0r3d because of it.
814 or return $self->_error(loc("Unable to extract '%1': %2",
815 $self->archive, $Archive::Tar::error ));
818 my @files = $tar->list_files;
819 my $dir = $self->__get_extract_dir( \@files );
821 ### store the files that are in the archive ###
822 $self->files(\@files);
824 ### store the extraction dir ###
825 $self->extract_path( $dir );
827 ### check if the dir actually appeared ###
828 return 1 if -d $self->extract_path;
830 ### no dir, we failed ###
831 return $self->_error(loc("Unable to extract '%1': %2",
832 $self->archive, $Archive::Tar::error ));
835 #################################
839 #################################
844 ### check for /bin/gzip -- we need it ###
845 unless( $self->bin_gzip ) {
846 $self->_error(loc("No '%1' program found", '/bin/gzip'));
850 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
851 return $self->_error(loc("Could not open '%1' for writing: %2",
852 $self->_gunzip_to, $! ));
854 my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
857 unless( scalar run( command => $cmd,
861 return $self->_error(loc("Unable to gunzip '%1': %2",
862 $self->archive, $buffer));
865 ### no buffers available?
866 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
867 $self->_error( $self->_no_buffer_content( $self->archive ) );
870 print $fh $buffer if defined $buffer;
874 ### set what files where extract, and where they went ###
875 $self->files( [$self->_gunzip_to] );
876 $self->extract_path( File::Spec->rel2abs(cwd()) );
884 my $use_list = { 'Compress::Zlib' => '0.0' };
885 unless( can_load( modules => $use_list ) ) {
886 $self->_error(loc("You do not have '%1' installed - Please " .
887 "install it as soon as possible.", 'Compress::Zlib'));
891 my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
892 return $self->_error(loc("Unable to open '%1': %2",
893 $self->archive, $Compress::Zlib::gzerrno));
895 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
896 return $self->_error(loc("Could not open '%1' for writing: %2",
897 $self->_gunzip_to, $! ));
900 $fh->print($buffer) while $gz->gzread($buffer) > 0;
903 ### set what files where extract, and where they went ###
904 $self->files( [$self->_gunzip_to] );
905 $self->extract_path( File::Spec->rel2abs(cwd()) );
910 #################################
914 #################################
916 sub _uncompress_bin {
919 ### check for /bin/gzip -- we need it ###
920 unless( $self->bin_uncompress ) {
921 $self->_error(loc("No '%1' program found", '/bin/uncompress'));
925 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
926 return $self->_error(loc("Could not open '%1' for writing: %2",
927 $self->_gunzip_to, $! ));
929 my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
932 unless( scalar run( command => $cmd,
936 return $self->_error(loc("Unable to uncompress '%1': %2",
937 $self->archive, $buffer));
940 ### no buffers available?
941 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
942 $self->_error( $self->_no_buffer_content( $self->archive ) );
945 print $fh $buffer if defined $buffer;
949 ### set what files where extract, and where they went ###
950 $self->files( [$self->_gunzip_to] );
951 $self->extract_path( File::Spec->rel2abs(cwd()) );
957 #################################
961 #################################
967 ### check for /bin/gzip if we need it ###
968 unless( $self->bin_unzip ) {
969 $self->_error(loc("No '%1' program found", '/bin/unzip'));
973 ### first, get the files.. it must be 2 different commands with 'unzip' :(
974 { ### on VMS, capital letter options have to be quoted. This is
975 ### peported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
976 ### Subject: [patch@31735]Archive Extract fix on VMS.
977 my $opt = ON_VMS ? '"-Z"' : '-Z';
978 my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
981 unless( scalar run( command => $cmd,
985 return $self->_error(loc("Unable to unzip '%1': %2",
986 $self->archive, $buffer));
989 ### no buffers available?
990 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
991 $self->_error( $self->_no_buffer_files( $self->archive ) );
994 $self->files( [split $/, $buffer] );
998 ### now, extract the archive ###
999 { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
1002 unless( scalar run( command => $cmd,
1004 buffer => \$buffer )
1006 return $self->_error(loc("Unable to unzip '%1': %2",
1007 $self->archive, $buffer));
1010 if( scalar @{$self->files} ) {
1011 my $files = $self->files;
1012 my $dir = $self->__get_extract_dir( $files );
1014 $self->extract_path( $dir );
1024 my $use_list = { 'Archive::Zip' => '0.0' };
1025 unless( can_load( modules => $use_list ) ) {
1026 $self->_error(loc("You do not have '%1' installed - Please " .
1027 "install it as soon as possible.", 'Archive::Zip'));
1031 my $zip = Archive::Zip->new();
1033 unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
1034 return $self->_error(loc("Unable to read '%1'", $self->archive));
1040 ### Address: #43278: Explicitly tell Archive::Zip where to put the files:
1041 ### "In my BackPAN indexing, Archive::Zip was extracting things
1042 ### in my script's directory instead of the current working directory.
1043 ### I traced this back through Archive::Zip::_asLocalName which
1044 ### eventually calls File::Spec::Win32::rel2abs which on Windows might
1045 ### call Cwd::getdcwd. getdcwd returns the wrong directory in my
1046 ### case, even though I think I'm on the same drive.
1048 ### To fix this, I pass the optional second argument to
1049 ### extractMember using the cwd from Archive::Extract." --bdfoy
1051 ## store cwd() before looping; calls to cwd() can be expensive, and
1052 ### it won't change during the loop
1053 my $extract_dir = cwd();
1055 ### have to extract every member individually ###
1056 for my $member ($zip->members) {
1057 push @files, $member->{fileName};
1059 ### file to extact to, to avoid the above problem
1060 my $to = File::Spec->catfile( $extract_dir, $member->{fileName} );
1062 unless( $zip->extractMember($member, $to) == &Archive::Zip::AZ_OK ) {
1063 return $self->_error(loc("Extraction of '%1' from '%2' failed",
1064 $member->{fileName}, $self->archive ));
1068 my $dir = $self->__get_extract_dir( \@files );
1070 ### set what files where extract, and where they went ###
1071 $self->files( \@files );
1072 $self->extract_path( File::Spec->rel2abs($dir) );
1077 sub __get_extract_dir {
1079 my $files = shift || [];
1081 return unless scalar @$files;
1084 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
1085 my($dir,$pos) = @$aref;
1087 ### add a catdir(), so that any trailing slashes get
1088 ### take care of (removed)
1089 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
1090 ### which was the problem in bug #23999
1091 my $res = -d $files->[$pos]
1092 ? File::Spec->catdir( $files->[$pos], '' )
1093 : File::Spec->catdir( dirname( $files->[$pos] ) );
1098 ### if the first and last dir don't match, make sure the
1099 ### dirname is not set wrongly
1102 ### dirs are the same, so we know for sure what the extract dir is
1103 if( $dir1 eq $dir2 ) {
1106 ### dirs are different.. do they share the base dir?
1107 ### if so, use that, if not, fall back to '.'
1109 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1110 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
1112 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1115 return File::Spec->rel2abs( $dir );
1118 #################################
1122 #################################
1127 ### check for /bin/gzip -- we need it ###
1128 unless( $self->bin_bunzip2 ) {
1129 $self->_error(loc("No '%1' program found", '/bin/bunzip2'));
1133 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1134 return $self->_error(loc("Could not open '%1' for writing: %2",
1135 $self->_gunzip_to, $! ));
1137 ### guard against broken bunzip2. See ->have_old_bunzip2()
1139 if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1140 return $self->_error(loc("Your bunzip2 version is too old and ".
1141 "can only extract files ending in '%1'",
1145 my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
1148 unless( scalar run( command => $cmd,
1150 buffer => \$buffer )
1152 return $self->_error(loc("Unable to bunzip2 '%1': %2",
1153 $self->archive, $buffer));
1156 ### no buffers available?
1157 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1158 $self->_error( $self->_no_buffer_content( $self->archive ) );
1161 print $fh $buffer if defined $buffer;
1165 ### set what files where extract, and where they went ###
1166 $self->files( [$self->_gunzip_to] );
1167 $self->extract_path( File::Spec->rel2abs(cwd()) );
1172 ### using cz2, the compact versions... this we use mainly in archive::tar
1174 # sub _bunzip2_cz1 {
1177 # my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1178 # unless( can_load( modules => $use_list ) ) {
1179 # return $self->_error(loc("You do not have '%1' installed - Please " .
1180 # "install it as soon as possible.",
1181 # 'IO::Uncompress::Bunzip2'));
1184 # my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1185 # return $self->_error(loc("Unable to open '%1': %2",
1187 # $IO::Uncompress::Bunzip2::Bunzip2Error));
1189 # my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1190 # return $self->_error(loc("Could not open '%1' for writing: %2",
1191 # $self->_gunzip_to, $! ));
1194 # $fh->print($buffer) while $bz->read($buffer) > 0;
1197 # ### set what files where extract, and where they went ###
1198 # $self->files( [$self->_gunzip_to] );
1199 # $self->extract_path( File::Spec->rel2abs(cwd()) );
1207 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1208 unless( can_load( modules => $use_list ) ) {
1209 $self->_error(loc("You do not have '%1' installed - Please " .
1210 "install it as soon as possible.",
1211 'IO::Uncompress::Bunzip2'));
1215 IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1216 or return $self->_error(loc("Unable to uncompress '%1': %2",
1218 $IO::Uncompress::Bunzip2::Bunzip2Error));
1220 ### set what files where extract, and where they went ###
1221 $self->files( [$self->_gunzip_to] );
1222 $self->extract_path( File::Spec->rel2abs(cwd()) );
1228 #################################
1232 #################################
1237 ### check for /bin/unlzma -- we need it ###
1238 unless( $self->bin_unlzma ) {
1239 $self->_error(loc("No '%1' program found", '/bin/unlzma'));
1243 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1244 return $self->_error(loc("Could not open '%1' for writing: %2",
1245 $self->_gunzip_to, $! ));
1247 my $cmd = [ $self->bin_unlzma, '-c', $self->archive ];
1250 unless( scalar run( command => $cmd,
1252 buffer => \$buffer )
1254 return $self->_error(loc("Unable to unlzma '%1': %2",
1255 $self->archive, $buffer));
1258 ### no buffers available?
1259 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1260 $self->_error( $self->_no_buffer_content( $self->archive ) );
1263 print $fh $buffer if defined $buffer;
1267 ### set what files where extract, and where they went ###
1268 $self->files( [$self->_gunzip_to] );
1269 $self->extract_path( File::Spec->rel2abs(cwd()) );
1277 my $use_list = { 'Compress::unLZMA' => '0.0' };
1278 unless( can_load( modules => $use_list ) ) {
1279 $self->_error(loc("You do not have '%1' installed - Please " .
1280 "install it as soon as possible.", 'Compress::unLZMA'));
1284 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1285 return $self->_error(loc("Could not open '%1' for writing: %2",
1286 $self->_gunzip_to, $! ));
1289 $buffer = Compress::unLZMA::uncompressfile( $self->archive );
1290 unless ( defined $buffer ) {
1291 return $self->_error(loc("Could not unlzma '%1': %2",
1292 $self->archive, $@));
1295 print $fh $buffer if defined $buffer;
1299 ### set what files where extract, and where they went ###
1300 $self->files( [$self->_gunzip_to] );
1301 $self->extract_path( File::Spec->rel2abs(cwd()) );
1306 #################################
1310 #################################
1315 my $lerror = Carp::longmess($error);
1317 push @{$self->_error_msg}, $error;
1318 push @{$self->_error_msg_long}, $lerror;
1320 ### set $Archive::Extract::WARN to 0 to disable printing
1323 carp $DEBUG ? $lerror : $error;
1332 ### make sure we have a fallback aref
1335 ? $self->_error_msg_long
1339 return join $/, @$aref;
1342 sub _no_buffer_files {
1344 my $file = shift or return;
1345 return loc("No buffer captured, unable to tell ".
1346 "extracted files or extraction dir for '%1'", $file);
1349 sub _no_buffer_content {
1351 my $file = shift or return;
1352 return loc("No buffer captured, unable to get content for '%1'", $file);
1360 C<Archive::Extract> tries first to determine what type of archive you
1361 are passing it, by inspecting its suffix. It does not do this by using
1362 Mime magic, or something related. See C<CAVEATS> below.
1364 Once it has determined the file type, it knows which extraction methods
1365 it can use on the archive. It will try a perl solution first, then fall
1366 back to a commandline tool if that fails. If that also fails, it will
1367 return false, indicating it was unable to extract the archive.
1368 See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1372 =head2 File Extensions
1374 C<Archive::Extract> trusts on the extension of the archive to determine
1375 what type it is, and what extractor methods therefore can be used. If
1376 your archives do not have any of the extensions as described in the
1377 C<new()> method, you will have to specify the type explicitly, or
1378 C<Archive::Extract> will not be able to extract the archive for you.
1380 =head2 Supporting Very Large Files
1382 C<Archive::Extract> can use either pure perl modules or command line
1383 programs under the hood. Some of the pure perl modules (like
1384 C<Archive::Tar> and Compress::unLZMA) take the entire contents of the archive into memory,
1385 which may not be feasible on your system. Consider setting the global
1386 variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1387 the use of command line programs and won't consume so much memory.
1389 See the C<GLOBAL VARIABLES> section below for details.
1391 =head2 Bunzip2 support of arbitrary extensions.
1393 Older versions of C</bin/bunzip2> do not support arbitrary file
1394 extensions and insist on a C<.bz2> suffix. Although we do our best
1395 to guard against this, if you experience a bunzip2 error, it may
1396 be related to this. For details, please see the C<have_old_bunzip2>
1399 =head1 GLOBAL VARIABLES
1401 =head2 $Archive::Extract::DEBUG
1403 Set this variable to C<true> to have all calls to command line tools
1404 be printed out, including all their output.
1405 This also enables C<Carp::longmess> errors, instead of the regular
1408 Good for tracking down why things don't work with your particular
1411 Defaults to C<false>.
1413 =head2 $Archive::Extract::WARN
1415 This variable controls whether errors encountered internally by
1416 C<Archive::Extract> should be C<carp>'d or not.
1418 Set to false to silence warnings. Inspect the output of the C<error()>
1419 method manually to see what went wrong.
1421 Defaults to C<true>.
1423 =head2 $Archive::Extract::PREFER_BIN
1425 This variables controls whether C<Archive::Extract> should prefer the
1426 use of perl modules, or commandline tools to extract archives.
1428 Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1430 Defaults to C<false>.
1436 =item Mime magic support
1438 Maybe this module should use something like C<File::Type> to determine
1439 the type, rather than blindly trust the suffix.
1445 Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>.
1449 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1453 This library is free software; you may redistribute and/or modify it
1454 under the same terms as Perl itself.
1459 # c-indentation-style: bsd
1461 # indent-tabs-mode: nil
1463 # vim: expandtab shiftwidth=4: