X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FArchive%2FExtract.pm;h=2c9331e22044a0694a3abfb010cbbf1b7fa87eae;hb=ab662740a280b0adc715b34ea6c3b6c1802d3143;hp=c7486bd100dfb59196244e47f6639f036db94ac2;hpb=83285295780b6718141d6f5bf054a2e6da464e39;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/Archive/Extract.pm b/lib/Archive/Extract.pm index c7486bd..2c9331e 100644 --- a/lib/Archive/Extract.pm +++ b/lib/Archive/Extract.pm @@ -20,6 +20,9 @@ use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 }; ### VMS may require quoting upper case command options use constant ON_VMS => $^O eq 'VMS' ? 1 : 0; +### Windows needs special treatment of Tar options +use constant ON_WIN32 => $^O eq 'MSWin32' ? 1 : 0; + ### we can't use this extraction method, because of missing ### modules/binaries: use constant METHOD_NA => []; @@ -35,15 +38,16 @@ use constant Z => 'Z'; use constant LZMA => 'lzma'; use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG - $_ALLOW_BIN $_ALLOW_PURE_PERL + $_ALLOW_BIN $_ALLOW_PURE_PERL $_ALLOW_TAR_ITER ]; -$VERSION = '0.28'; +$VERSION = '0.34'; $PREFER_BIN = 0; $WARN = 1; $DEBUG = 0; $_ALLOW_PURE_PERL = 1; # allow pure perl extractors $_ALLOW_BIN = 1; # allow binary extractors +$_ALLOW_TAR_ITER = 1; # try to use Archive::Tar->iter if available # same as all constants my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA ); @@ -583,111 +587,135 @@ sub have_old_bunzip2 { # ################################# +### annoying issue with (gnu) tar on win32, as illustrated by this +### bug: https://rt.cpan.org/Ticket/Display.html?id=40138 +### which shows that (gnu) tar will interpret a file name with a : +### in it as a remote file name, so C:\tmp\foo.txt is interpreted +### as a remote shell, and the extract fails. +{ my @ExtraTarFlags; + if( ON_WIN32 and my $cmd = __PACKAGE__->bin_tar ) { -### use /bin/tar to extract ### -sub _untar_bin { - my $self = shift; - - ### check for /bin/tar ### - ### check for /bin/gzip if we need it ### - ### if any of the binaries are not available, return NA - { my $diag = not $self->bin_tar ? - loc("No '%1' program found", '/bin/tar') : - $self->is_tgz && !$self->bin_gzip ? - loc("No '%1' program found", '/bin/gzip') : - $self->is_tbz && !$self->bin_bunzip2 ? - loc("No '%1' program found", '/bin/bunzip2') : - ''; - - if( $diag ) { - $self->_error( $diag ); - return METHOD_NA; - } + ### if this is gnu tar we are running, we need to use --force-local + push @ExtraTarFlags, '--force-local' if `$cmd --version` =~ /gnu tar/i; } - ### XXX figure out how to make IPC::Run do this in one call -- - ### currently i don't know how to get output of a command after a pipe - ### trapped in a scalar. Mailed barries about this 5th of june 2004. - - ### see what command we should run, based on whether - ### it's a .tgz or .tar - ### XXX solaris tar and bsdtar are having different outputs - ### depending whether you run with -x or -t - ### compensate for this insanity by running -t first, then -x - { my $cmd = - $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|', - $self->bin_tar, '-tf', '-'] : - $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|', - $self->bin_tar, '-tf', '-'] : - [$self->bin_tar, '-tf', $self->archive]; - - ### run the command ### - my $buffer = ''; - unless( scalar run( command => $cmd, - buffer => \$buffer, - verbose => $DEBUG ) - ) { - return $self->_error(loc( - "Error listing contents of archive '%1': %2", - $self->archive, $buffer )); + ### use /bin/tar to extract ### + sub _untar_bin { + my $self = shift; + + ### check for /bin/tar ### + ### check for /bin/gzip if we need it ### + ### if any of the binaries are not available, return NA + { my $diag = not $self->bin_tar ? + loc("No '%1' program found", '/bin/tar') : + $self->is_tgz && !$self->bin_gzip ? + loc("No '%1' program found", '/bin/gzip') : + $self->is_tbz && !$self->bin_bunzip2 ? + loc("No '%1' program found", '/bin/bunzip2') : + ''; + + if( $diag ) { + $self->_error( $diag ); + return METHOD_NA; + } + } + + ### XXX figure out how to make IPC::Run do this in one call -- + ### currently i don't know how to get output of a command after a pipe + ### trapped in a scalar. Mailed barries about this 5th of june 2004. + + ### see what command we should run, based on whether + ### it's a .tgz or .tar + + ### XXX solaris tar and bsdtar are having different outputs + ### depending whether you run with -x or -t + ### compensate for this insanity by running -t first, then -x + { my $cmd = + $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|', + $self->bin_tar, '-tf', '-'] : + $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|', + $self->bin_tar, '-tf', '-'] : + [$self->bin_tar, @ExtraTarFlags, '-tf', $self->archive]; + + ### run the command + ### newer versions of 'tar' (1.21 and up) now print record size + ### to STDERR as well if v OR t is given (used to be both). This + ### is a 'feature' according to the changelog, so we must now only + ### inspect STDOUT, otherwise, failures like these occur: + ### nntp.perl.org/group/perl.cpan.testers/2009/02/msg3230366.html + my $buffer = ''; + my @out = run( command => $cmd, + buffer => \$buffer, + verbose => $DEBUG ); + + ### command was unsuccessful + unless( $out[0] ) { + return $self->_error(loc( + "Error listing contents of archive '%1': %2", + $self->archive, $buffer )); + } + + ### no buffers available? + if( !IPC::Cmd->can_capture_buffer and !$buffer ) { + $self->_error( $self->_no_buffer_files( $self->archive ) ); + + } else { + ### if we're on solaris we /might/ be using /bin/tar, which has + ### a weird output format... we might also be using + ### /usr/local/bin/tar, which is gnu tar, which is perfectly + ### fine... so we have to do some guessing here =/ + my @files = map { chomp; + !ON_SOLARIS ? $_ + : (m|^ x \s+ # 'xtract' -- sigh + (.+?), # the actual file name + \s+ [\d,.]+ \s bytes, + \s+ [\d,.]+ \s tape \s blocks + |x ? $1 : $_); + + ### only STDOUT, see above. Sometims, extra whitespace + ### is present, so make sure we only pick lines with + ### a length + } grep { length } map { split $/, $_ } @{$out[3]}; + + ### store the files that are in the archive ### + $self->files(\@files); + } } - - ### no buffers available? - if( !IPC::Cmd->can_capture_buffer and !$buffer ) { - $self->_error( $self->_no_buffer_files( $self->archive ) ); + + ### now actually extract it ### + { my $cmd = + $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|', + $self->bin_tar, '-xf', '-'] : + $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|', + $self->bin_tar, '-xf', '-'] : + [$self->bin_tar, @ExtraTarFlags, '-xf', $self->archive]; + + my $buffer = ''; + unless( scalar run( command => $cmd, + buffer => \$buffer, + verbose => $DEBUG ) + ) { + return $self->_error(loc("Error extracting archive '%1': %2", + $self->archive, $buffer )); + } + + ### we might not have them, due to lack of buffers + if( $self->files ) { + ### now that we've extracted, figure out where we extracted to + my $dir = $self->__get_extract_dir( $self->files ); - } else { - ### if we're on solaris we /might/ be using /bin/tar, which has - ### a weird output format... we might also be using - ### /usr/local/bin/tar, which is gnu tar, which is perfectly - ### fine... so we have to do some guessing here =/ - my @files = map { chomp; - !ON_SOLARIS ? $_ - : (m|^ x \s+ # 'xtract' -- sigh - (.+?), # the actual file name - \s+ [\d,.]+ \s bytes, - \s+ [\d,.]+ \s tape \s blocks - |x ? $1 : $_); - - } split $/, $buffer; - - ### store the files that are in the archive ### - $self->files(\@files); - } - } - - ### now actually extract it ### - { my $cmd = - $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|', - $self->bin_tar, '-xf', '-'] : - $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|', - $self->bin_tar, '-xf', '-'] : - [$self->bin_tar, '-xf', $self->archive]; - - my $buffer = ''; - unless( scalar run( command => $cmd, - buffer => \$buffer, - verbose => $DEBUG ) - ) { - return $self->_error(loc("Error extracting archive '%1': %2", - $self->archive, $buffer )); + ### store the extraction dir ### + $self->extract_path( $dir ); + } } - - ### we might not have them, due to lack of buffers - if( $self->files ) { - ### now that we've extracted, figure out where we extracted to - my $dir = $self->__get_extract_dir( $self->files ); - ### store the extraction dir ### - $self->extract_path( $dir ); - } + ### we got here, no error happened + return 1; } - - ### we got here, no error happened - return 1; } + ### use archive::tar to extract ### sub _untar_at { my $self = shift; @@ -755,43 +783,72 @@ sub _untar_at { $fh_to_read = $bz; } - ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've - ### localized $Archive::Tar::WARN already. - $Archive::Tar::WARN = $Archive::Extract::WARN; + my @files; + { + ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've + ### localized $Archive::Tar::WARN already. + $Archive::Tar::WARN = $Archive::Extract::WARN; + + ### only tell it it's compressed if it's a .tgz, as we give it a file + ### handle if it's a .tbz + my @read = ( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ); + + ### for version of Archive::Tar > 1.04 + local $Archive::Tar::CHOWN = 0; + + ### use the iterator if we can. it's a feature of A::T 1.40 and up + if ( $_ALLOW_TAR_ITER && Archive::Tar->can( 'iter' ) ) { + + my $next; + unless ( $next = Archive::Tar->iter( @read ) ) { + return $self->_error(loc( + "Unable to read '%1': %2", $self->archive, + $Archive::Tar::error)); + } - my $tar = Archive::Tar->new(); + while ( my $file = $next->() ) { + push @files, $file->full_path; + + $file->extract or return $self->_error(loc( + "Unable to read '%1': %2", + $self->archive, + $Archive::Tar::error)); + } + + ### older version, read the archive into memory + } else { - ### only tell it it's compressed if it's a .tgz, as we give it a file - ### handle if it's a .tbz - unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) { - return $self->_error(loc("Unable to read '%1': %2", $self->archive, - $Archive::Tar::error)); - } + my $tar = Archive::Tar->new(); + + unless( $tar->read( @read ) ) { + return $self->_error(loc("Unable to read '%1': %2", + $self->archive, $Archive::Tar::error)); + } - ### workaround to prevent Archive::Tar from setting uid, which - ### is a potential security hole. -autrijus - ### have to do it here, since A::T needs to be /loaded/ first ### - { no strict 'refs'; local $^W; + ### workaround to prevent Archive::Tar from setting uid, which + ### is a potential security hole. -autrijus + ### have to do it here, since A::T needs to be /loaded/ first ### + { no strict 'refs'; local $^W; - ### older versions of archive::tar <= 0.23 - *Archive::Tar::chown = sub {}; - } + ### older versions of archive::tar <= 0.23 + *Archive::Tar::chown = sub {}; + } - ### for version of Archive::Tar > 1.04 - local $Archive::Tar::CHOWN = 0; + { local $^W; # quell 'splice() offset past end of array' warnings + # on older versions of A::T - { local $^W; # quell 'splice() offset past end of array' warnings - # on older versions of A::T + ### older archive::tar always returns $self, return value + ### slightly fux0r3d because of it. + $tar->extract or return $self->_error(loc( + "Unable to extract '%1': %2", + $self->archive, $Archive::Tar::error )); + } - ### older archive::tar always returns $self, return value slightly - ### fux0r3d because of it. - $tar->extract() - or return $self->_error(loc("Unable to extract '%1': %2", - $self->archive, $Archive::Tar::error )); + @files = $tar->list_files; + } } - my @files = $tar->list_files; - my $dir = $self->__get_extract_dir( \@files ); + my $dir = $self->__get_extract_dir( \@files ); ### store the files that are in the archive ### $self->files(\@files); @@ -1010,11 +1067,31 @@ sub _unzip_az { } my @files; - ### have to extract every memeber individually ### + + + ### Address: #43278: Explicitly tell Archive::Zip where to put the files: + ### "In my BackPAN indexing, Archive::Zip was extracting things + ### in my script's directory instead of the current working directory. + ### I traced this back through Archive::Zip::_asLocalName which + ### eventually calls File::Spec::Win32::rel2abs which on Windows might + ### call Cwd::getdcwd. getdcwd returns the wrong directory in my + ### case, even though I think I'm on the same drive. + ### + ### To fix this, I pass the optional second argument to + ### extractMember using the cwd from Archive::Extract." --bdfoy + + ## store cwd() before looping; calls to cwd() can be expensive, and + ### it won't change during the loop + my $extract_dir = cwd(); + + ### have to extract every member individually ### for my $member ($zip->members) { push @files, $member->{fileName}; - unless( $zip->extractMember($member) == &Archive::Zip::AZ_OK ) { + ### file to extact to, to avoid the above problem + my $to = File::Spec->catfile( $extract_dir, $member->{fileName} ); + + unless( $zip->extractMember($member, $to) == &Archive::Zip::AZ_OK ) { return $self->_error(loc("Extraction of '%1' from '%2' failed", $member->{fileName}, $self->archive )); } @@ -1384,7 +1461,7 @@ Set to C to have C prefer commandline tools. Defaults to C. -=head1 TODO +=head1 TODO / CAVEATS =over 4 @@ -1393,6 +1470,12 @@ Defaults to C. Maybe this module should use something like C to determine the type, rather than blindly trust the suffix. +=item Thread safety + +Currently, C does a C to the extraction dir before +extraction, and a C back again after. This is not necessarily +thread safe. See C bug C<#45671> for details. + =back =head1 BUG REPORTS