### VMS may require quoting upper case command options
use constant ON_VMS => $^O eq 'VMS' ? 1 : 0;
+### Windows needs special treatment of Tar options
+use constant ON_WIN32 => $^O eq 'MSWin32' ? 1 : 0;
+
### we can't use this extraction method, because of missing
### modules/binaries:
use constant METHOD_NA => [];
use constant LZMA => 'lzma';
use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG
- $_ALLOW_BIN $_ALLOW_PURE_PERL
+ $_ALLOW_BIN $_ALLOW_PURE_PERL $_ALLOW_TAR_ITER
];
-$VERSION = '0.28';
+$VERSION = '0.34';
$PREFER_BIN = 0;
$WARN = 1;
$DEBUG = 0;
$_ALLOW_PURE_PERL = 1; # allow pure perl extractors
$_ALLOW_BIN = 1; # allow binary extractors
+$_ALLOW_TAR_ITER = 1; # try to use Archive::Tar->iter if available
# same as all constants
my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA );
#
#################################
+### annoying issue with (gnu) tar on win32, as illustrated by this
+### bug: https://rt.cpan.org/Ticket/Display.html?id=40138
+### which shows that (gnu) tar will interpret a file name with a :
+### in it as a remote file name, so C:\tmp\foo.txt is interpreted
+### as a remote shell, and the extract fails.
+{ my @ExtraTarFlags;
+ if( ON_WIN32 and my $cmd = __PACKAGE__->bin_tar ) {
-### use /bin/tar to extract ###
-sub _untar_bin {
- my $self = shift;
-
- ### check for /bin/tar ###
- ### check for /bin/gzip if we need it ###
- ### if any of the binaries are not available, return NA
- { my $diag = not $self->bin_tar ?
- loc("No '%1' program found", '/bin/tar') :
- $self->is_tgz && !$self->bin_gzip ?
- loc("No '%1' program found", '/bin/gzip') :
- $self->is_tbz && !$self->bin_bunzip2 ?
- loc("No '%1' program found", '/bin/bunzip2') :
- '';
-
- if( $diag ) {
- $self->_error( $diag );
- return METHOD_NA;
- }
+ ### if this is gnu tar we are running, we need to use --force-local
+ push @ExtraTarFlags, '--force-local' if `$cmd --version` =~ /gnu tar/i;
}
- ### XXX figure out how to make IPC::Run do this in one call --
- ### currently i don't know how to get output of a command after a pipe
- ### trapped in a scalar. Mailed barries about this 5th of june 2004.
-
- ### see what command we should run, based on whether
- ### it's a .tgz or .tar
- ### XXX solaris tar and bsdtar are having different outputs
- ### depending whether you run with -x or -t
- ### compensate for this insanity by running -t first, then -x
- { my $cmd =
- $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
- $self->bin_tar, '-tf', '-'] :
- $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
- $self->bin_tar, '-tf', '-'] :
- [$self->bin_tar, '-tf', $self->archive];
-
- ### run the command ###
- my $buffer = '';
- unless( scalar run( command => $cmd,
- buffer => \$buffer,
- verbose => $DEBUG )
- ) {
- return $self->_error(loc(
- "Error listing contents of archive '%1': %2",
- $self->archive, $buffer ));
+ ### use /bin/tar to extract ###
+ sub _untar_bin {
+ my $self = shift;
+
+ ### check for /bin/tar ###
+ ### check for /bin/gzip if we need it ###
+ ### if any of the binaries are not available, return NA
+ { my $diag = not $self->bin_tar ?
+ loc("No '%1' program found", '/bin/tar') :
+ $self->is_tgz && !$self->bin_gzip ?
+ loc("No '%1' program found", '/bin/gzip') :
+ $self->is_tbz && !$self->bin_bunzip2 ?
+ loc("No '%1' program found", '/bin/bunzip2') :
+ '';
+
+ if( $diag ) {
+ $self->_error( $diag );
+ return METHOD_NA;
+ }
+ }
+
+ ### XXX figure out how to make IPC::Run do this in one call --
+ ### currently i don't know how to get output of a command after a pipe
+ ### trapped in a scalar. Mailed barries about this 5th of june 2004.
+
+ ### see what command we should run, based on whether
+ ### it's a .tgz or .tar
+
+ ### XXX solaris tar and bsdtar are having different outputs
+ ### depending whether you run with -x or -t
+ ### compensate for this insanity by running -t first, then -x
+ { my $cmd =
+ $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
+ $self->bin_tar, '-tf', '-'] :
+ $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
+ $self->bin_tar, '-tf', '-'] :
+ [$self->bin_tar, @ExtraTarFlags, '-tf', $self->archive];
+
+ ### run the command
+ ### newer versions of 'tar' (1.21 and up) now print record size
+ ### to STDERR as well if v OR t is given (used to be both). This
+ ### is a 'feature' according to the changelog, so we must now only
+ ### inspect STDOUT, otherwise, failures like these occur:
+ ### nntp.perl.org/group/perl.cpan.testers/2009/02/msg3230366.html
+ my $buffer = '';
+ my @out = run( command => $cmd,
+ buffer => \$buffer,
+ verbose => $DEBUG );
+
+ ### command was unsuccessful
+ unless( $out[0] ) {
+ return $self->_error(loc(
+ "Error listing contents of archive '%1': %2",
+ $self->archive, $buffer ));
+ }
+
+ ### no buffers available?
+ if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
+ $self->_error( $self->_no_buffer_files( $self->archive ) );
+
+ } else {
+ ### if we're on solaris we /might/ be using /bin/tar, which has
+ ### a weird output format... we might also be using
+ ### /usr/local/bin/tar, which is gnu tar, which is perfectly
+ ### fine... so we have to do some guessing here =/
+ my @files = map { chomp;
+ !ON_SOLARIS ? $_
+ : (m|^ x \s+ # 'xtract' -- sigh
+ (.+?), # the actual file name
+ \s+ [\d,.]+ \s bytes,
+ \s+ [\d,.]+ \s tape \s blocks
+ |x ? $1 : $_);
+
+ ### only STDOUT, see above. Sometims, extra whitespace
+ ### is present, so make sure we only pick lines with
+ ### a length
+ } grep { length } map { split $/, $_ } @{$out[3]};
+
+ ### store the files that are in the archive ###
+ $self->files(\@files);
+ }
}
-
- ### no buffers available?
- if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
- $self->_error( $self->_no_buffer_files( $self->archive ) );
+
+ ### now actually extract it ###
+ { my $cmd =
+ $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
+ $self->bin_tar, '-xf', '-'] :
+ $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
+ $self->bin_tar, '-xf', '-'] :
+ [$self->bin_tar, @ExtraTarFlags, '-xf', $self->archive];
+
+ my $buffer = '';
+ unless( scalar run( command => $cmd,
+ buffer => \$buffer,
+ verbose => $DEBUG )
+ ) {
+ return $self->_error(loc("Error extracting archive '%1': %2",
+ $self->archive, $buffer ));
+ }
+
+ ### we might not have them, due to lack of buffers
+ if( $self->files ) {
+ ### now that we've extracted, figure out where we extracted to
+ my $dir = $self->__get_extract_dir( $self->files );
- } else {
- ### if we're on solaris we /might/ be using /bin/tar, which has
- ### a weird output format... we might also be using
- ### /usr/local/bin/tar, which is gnu tar, which is perfectly
- ### fine... so we have to do some guessing here =/
- my @files = map { chomp;
- !ON_SOLARIS ? $_
- : (m|^ x \s+ # 'xtract' -- sigh
- (.+?), # the actual file name
- \s+ [\d,.]+ \s bytes,
- \s+ [\d,.]+ \s tape \s blocks
- |x ? $1 : $_);
-
- } split $/, $buffer;
-
- ### store the files that are in the archive ###
- $self->files(\@files);
- }
- }
-
- ### now actually extract it ###
- { my $cmd =
- $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
- $self->bin_tar, '-xf', '-'] :
- $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
- $self->bin_tar, '-xf', '-'] :
- [$self->bin_tar, '-xf', $self->archive];
-
- my $buffer = '';
- unless( scalar run( command => $cmd,
- buffer => \$buffer,
- verbose => $DEBUG )
- ) {
- return $self->_error(loc("Error extracting archive '%1': %2",
- $self->archive, $buffer ));
+ ### store the extraction dir ###
+ $self->extract_path( $dir );
+ }
}
-
- ### we might not have them, due to lack of buffers
- if( $self->files ) {
- ### now that we've extracted, figure out where we extracted to
- my $dir = $self->__get_extract_dir( $self->files );
- ### store the extraction dir ###
- $self->extract_path( $dir );
- }
+ ### we got here, no error happened
+ return 1;
}
-
- ### we got here, no error happened
- return 1;
}
+
### use archive::tar to extract ###
sub _untar_at {
my $self = shift;
$fh_to_read = $bz;
}
- ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've
- ### localized $Archive::Tar::WARN already.
- $Archive::Tar::WARN = $Archive::Extract::WARN;
+ my @files;
+ {
+ ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've
+ ### localized $Archive::Tar::WARN already.
+ $Archive::Tar::WARN = $Archive::Extract::WARN;
+
+ ### only tell it it's compressed if it's a .tgz, as we give it a file
+ ### handle if it's a .tbz
+ my @read = ( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) );
+
+ ### for version of Archive::Tar > 1.04
+ local $Archive::Tar::CHOWN = 0;
+
+ ### use the iterator if we can. it's a feature of A::T 1.40 and up
+ if ( $_ALLOW_TAR_ITER && Archive::Tar->can( 'iter' ) ) {
+
+ my $next;
+ unless ( $next = Archive::Tar->iter( @read ) ) {
+ return $self->_error(loc(
+ "Unable to read '%1': %2", $self->archive,
+ $Archive::Tar::error));
+ }
- my $tar = Archive::Tar->new();
+ while ( my $file = $next->() ) {
+ push @files, $file->full_path;
+
+ $file->extract or return $self->_error(loc(
+ "Unable to read '%1': %2",
+ $self->archive,
+ $Archive::Tar::error));
+ }
+
+ ### older version, read the archive into memory
+ } else {
- ### only tell it it's compressed if it's a .tgz, as we give it a file
- ### handle if it's a .tbz
- unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) {
- return $self->_error(loc("Unable to read '%1': %2", $self->archive,
- $Archive::Tar::error));
- }
+ my $tar = Archive::Tar->new();
+
+ unless( $tar->read( @read ) ) {
+ return $self->_error(loc("Unable to read '%1': %2",
+ $self->archive, $Archive::Tar::error));
+ }
- ### workaround to prevent Archive::Tar from setting uid, which
- ### is a potential security hole. -autrijus
- ### have to do it here, since A::T needs to be /loaded/ first ###
- { no strict 'refs'; local $^W;
+ ### workaround to prevent Archive::Tar from setting uid, which
+ ### is a potential security hole. -autrijus
+ ### have to do it here, since A::T needs to be /loaded/ first ###
+ { no strict 'refs'; local $^W;
- ### older versions of archive::tar <= 0.23
- *Archive::Tar::chown = sub {};
- }
+ ### older versions of archive::tar <= 0.23
+ *Archive::Tar::chown = sub {};
+ }
- ### for version of Archive::Tar > 1.04
- local $Archive::Tar::CHOWN = 0;
+ { local $^W; # quell 'splice() offset past end of array' warnings
+ # on older versions of A::T
- { local $^W; # quell 'splice() offset past end of array' warnings
- # on older versions of A::T
+ ### older archive::tar always returns $self, return value
+ ### slightly fux0r3d because of it.
+ $tar->extract or return $self->_error(loc(
+ "Unable to extract '%1': %2",
+ $self->archive, $Archive::Tar::error ));
+ }
- ### older archive::tar always returns $self, return value slightly
- ### fux0r3d because of it.
- $tar->extract()
- or return $self->_error(loc("Unable to extract '%1': %2",
- $self->archive, $Archive::Tar::error ));
+ @files = $tar->list_files;
+ }
}
- my @files = $tar->list_files;
- my $dir = $self->__get_extract_dir( \@files );
+ my $dir = $self->__get_extract_dir( \@files );
### store the files that are in the archive ###
$self->files(\@files);
}
my @files;
- ### have to extract every memeber individually ###
+
+
+ ### Address: #43278: Explicitly tell Archive::Zip where to put the files:
+ ### "In my BackPAN indexing, Archive::Zip was extracting things
+ ### in my script's directory instead of the current working directory.
+ ### I traced this back through Archive::Zip::_asLocalName which
+ ### eventually calls File::Spec::Win32::rel2abs which on Windows might
+ ### call Cwd::getdcwd. getdcwd returns the wrong directory in my
+ ### case, even though I think I'm on the same drive.
+ ###
+ ### To fix this, I pass the optional second argument to
+ ### extractMember using the cwd from Archive::Extract." --bdfoy
+
+ ## store cwd() before looping; calls to cwd() can be expensive, and
+ ### it won't change during the loop
+ my $extract_dir = cwd();
+
+ ### have to extract every member individually ###
for my $member ($zip->members) {
push @files, $member->{fileName};
- unless( $zip->extractMember($member) == &Archive::Zip::AZ_OK ) {
+ ### file to extact to, to avoid the above problem
+ my $to = File::Spec->catfile( $extract_dir, $member->{fileName} );
+
+ unless( $zip->extractMember($member, $to) == &Archive::Zip::AZ_OK ) {
return $self->_error(loc("Extraction of '%1' from '%2' failed",
$member->{fileName}, $self->archive ));
}
Defaults to C<false>.
-=head1 TODO
+=head1 TODO / CAVEATS
=over 4
Maybe this module should use something like C<File::Type> to determine
the type, rather than blindly trust the suffix.
+=item Thread safety
+
+Currently, C<Archive::Extract> does a C<chdir> to the extraction dir before
+extraction, and a C<chdir> back again after. This is not necessarily
+thread safe. See C<rt.cpan.org> bug C<#45671> for details.
+
=back
=head1 BUG REPORTS