Update to Archive::Extract 0.32

[p5sagit/p5-mst-13.2.git] / lib / Archive / Extract.pm
diff --git a/lib/Archive/Extract.pm b/lib/Archive/Extract.pm

index c7486bd..ad3be80 100644 (file)
--- a/lib/Archive/Extract.pm
+++ b/lib/Archive/Extract.pm
@@ -20,6 +20,9 @@ use constant FILE_EXISTS    => sub { -e $_[0] ? 1 : 0 };
 ### VMS may require quoting upper case command options
 use constant ON_VMS         => $^O eq 'VMS' ? 1 : 0;
 
+### Windows needs special treatment of Tar options
+use constant ON_WIN32       => $^O eq 'MSWin32' ? 1 : 0;
+
 ### we can't use this extraction method, because of missing
 ### modules/binaries:
 use constant METHOD_NA      => []; 
@@ -38,7 +41,7 @@ use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG
             $_ALLOW_BIN $_ALLOW_PURE_PERL
          ];
 
-$VERSION            = '0.28';
+$VERSION            = '0.32';
 $PREFER_BIN         = 0;
 $WARN               = 1;
 $DEBUG              = 0;
@@ -583,111 +586,135 @@ sub have_old_bunzip2 {
 #
 #################################
 
+### annoying issue with (gnu) tar on win32, as illustrated by this
+### bug: https://rt.cpan.org/Ticket/Display.html?id=40138
+### which shows that (gnu) tar will interpret a file name with a :
+### in it as a remote file name, so C:\tmp\foo.txt is interpreted
+### as a remote shell, and the extract fails.
+{   my @ExtraTarFlags;
+    if( ON_WIN32 and my $cmd = __PACKAGE__->bin_tar ) {
 
-### use /bin/tar to extract ###
-sub _untar_bin {
-    my $self = shift;
-
-    ### check for /bin/tar ###
-    ### check for /bin/gzip if we need it ###
-    ### if any of the binaries are not available, return NA
-    {   my $diag =  not $self->bin_tar ? 
-                        loc("No '%1' program found", '/bin/tar') :
-                    $self->is_tgz && !$self->bin_gzip ? 
-                        loc("No '%1' program found", '/bin/gzip') :
-                    $self->is_tbz && !$self->bin_bunzip2 ?
-                        loc("No '%1' program found", '/bin/bunzip2') :
-                    '';
-                    
-        if( $diag ) {
-            $self->_error( $diag );
-            return METHOD_NA;
-        }
+        ### if this is gnu tar we are running, we need to use --force-local
+        push @ExtraTarFlags, '--force-local' if `$cmd --version` =~ /gnu tar/i;
     }        
 
-    ### XXX figure out how to make IPC::Run do this in one call --
-    ### currently i don't know how to get output of a command after a pipe
-    ### trapped in a scalar. Mailed barries about this 5th of june 2004.
-
-    ### see what command we should run, based on whether
-    ### it's a .tgz or .tar
 
-    ### XXX solaris tar and bsdtar are having different outputs
-    ### depending whether you run with -x or -t
-    ### compensate for this insanity by running -t first, then -x
-    {    my $cmd = 
-            $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
-                             $self->bin_tar, '-tf', '-'] :
-            $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',                             
-                             $self->bin_tar, '-tf', '-'] :
-            [$self->bin_tar, '-tf', $self->archive];
-
-        ### run the command ###
-        my $buffer = '';
-        unless( scalar run( command => $cmd,
-                            buffer  => \$buffer,
-                            verbose => $DEBUG )
-        ) {
-            return $self->_error(loc(
-                            "Error listing contents of archive '%1': %2",
-                            $self->archive, $buffer ));
+    ### use /bin/tar to extract ###
+    sub _untar_bin {
+        my $self = shift;
+    
+        ### check for /bin/tar ###
+        ### check for /bin/gzip if we need it ###
+        ### if any of the binaries are not available, return NA
+        {   my $diag =  not $self->bin_tar ? 
+                            loc("No '%1' program found", '/bin/tar') :
+                        $self->is_tgz && !$self->bin_gzip ? 
+                            loc("No '%1' program found", '/bin/gzip') :
+                        $self->is_tbz && !$self->bin_bunzip2 ?
+                            loc("No '%1' program found", '/bin/bunzip2') :
+                        '';
+                        
+            if( $diag ) {
+                $self->_error( $diag );
+                return METHOD_NA;
+            }
+        }        
+    
+        ### XXX figure out how to make IPC::Run do this in one call --
+        ### currently i don't know how to get output of a command after a pipe
+        ### trapped in a scalar. Mailed barries about this 5th of june 2004.
+    
+        ### see what command we should run, based on whether
+        ### it's a .tgz or .tar
+    
+        ### XXX solaris tar and bsdtar are having different outputs
+        ### depending whether you run with -x or -t
+        ### compensate for this insanity by running -t first, then -x
+        {    my $cmd = 
+                $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|', 
+                                 $self->bin_tar, '-tf', '-'] :
+                $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',                             
+                                 $self->bin_tar, '-tf', '-'] :
+                [$self->bin_tar, @ExtraTarFlags, '-tf', $self->archive];
+    
+            ### run the command 
+            ### newer versions of 'tar' (1.21 and up) now print record size
+            ### to STDERR as well if v OR t is given (used to be both). This 
+            ### is a 'feature' according to the changelog, so we must now only
+            ### inspect STDOUT, otherwise, failures like these occur:
+            ### nntp.perl.org/group/perl.cpan.testers/2009/02/msg3230366.html
+            my $buffer  = '';
+            my @out     = run(  command => $cmd,
+                                buffer  => \$buffer,
+                                verbose => $DEBUG );
+
+            ### command was unsuccessful            
+            unless( $out[0] ) { 
+                return $self->_error(loc(
+                                "Error listing contents of archive '%1': %2",
+                                $self->archive, $buffer ));
+            }
+    
+            ### no buffers available?
+            if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
+                $self->_error( $self->_no_buffer_files( $self->archive ) );
+            
+            } else {
+                ### if we're on solaris we /might/ be using /bin/tar, which has
+                ### a weird output format... we might also be using
+                ### /usr/local/bin/tar, which is gnu tar, which is perfectly
+                ### fine... so we have to do some guessing here =/
+                my @files = map { chomp;
+                              !ON_SOLARIS ? $_
+                                          : (m|^ x \s+  # 'xtract' -- sigh
+                                                (.+?),  # the actual file name
+                                                \s+ [\d,.]+ \s bytes,
+                                                \s+ [\d,.]+ \s tape \s blocks
+                                            |x ? $1 : $_);
+    
+                        ### only STDOUT, see above. Sometims, extra whitespace
+                        ### is present, so make sure we only pick lines with
+                        ### a length
+                        } grep { length } map { split $/, $_ } @{$out[3]};     
+    
+                ### store the files that are in the archive ###
+                $self->files(\@files);
+            }
         }
-
-        ### no buffers available?
-        if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
-            $self->_error( $self->_no_buffer_files( $self->archive ) );
+    
+        ### now actually extract it ###
+        {   my $cmd = 
+                $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
+                                 $self->bin_tar, '-xf', '-'] :
+                $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',                             
+                                 $self->bin_tar, '-xf', '-'] :
+                [$self->bin_tar, @ExtraTarFlags, '-xf', $self->archive];
+    
+            my $buffer = '';
+            unless( scalar run( command => $cmd,
+                                buffer  => \$buffer,
+                                verbose => $DEBUG )
+            ) {
+                return $self->_error(loc("Error extracting archive '%1': %2",
+                                $self->archive, $buffer ));
+            }
+    
+            ### we might not have them, due to lack of buffers
+            if( $self->files ) {
+                ### now that we've extracted, figure out where we extracted to
+                my $dir = $self->__get_extract_dir( $self->files );
         
-        } else {
-            ### if we're on solaris we /might/ be using /bin/tar, which has
-            ### a weird output format... we might also be using
-            ### /usr/local/bin/tar, which is gnu tar, which is perfectly
-            ### fine... so we have to do some guessing here =/
-            my @files = map { chomp;
-                          !ON_SOLARIS ? $_
-                                      : (m|^ x \s+  # 'xtract' -- sigh
-                                            (.+?),  # the actual file name
-                                            \s+ [\d,.]+ \s bytes,
-                                            \s+ [\d,.]+ \s tape \s blocks
-                                        |x ? $1 : $_);
-
-                    } split $/, $buffer;
-
-            ### store the files that are in the archive ###
-            $self->files(\@files);
-        }
-    }
-
-    ### now actually extract it ###
-    {   my $cmd = 
-            $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
-                             $self->bin_tar, '-xf', '-'] :
-            $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',                             
-                             $self->bin_tar, '-xf', '-'] :
-            [$self->bin_tar, '-xf', $self->archive];
-
-        my $buffer = '';
-        unless( scalar run( command => $cmd,
-                            buffer  => \$buffer,
-                            verbose => $DEBUG )
-        ) {
-            return $self->_error(loc("Error extracting archive '%1': %2",
-                            $self->archive, $buffer ));
+                ### store the extraction dir ###
+                $self->extract_path( $dir );
+            }
         }
-
-        ### we might not have them, due to lack of buffers
-        if( $self->files ) {
-            ### now that we've extracted, figure out where we extracted to
-            my $dir = $self->__get_extract_dir( $self->files );
     
-            ### store the extraction dir ###
-            $self->extract_path( $dir );
-        }
+        ### we got here, no error happened
+        return 1;
     }
-
-    ### we got here, no error happened
-    return 1;
 }
 
+
 ### use archive::tar to extract ###
 sub _untar_at {
     my $self = shift;
@@ -1010,11 +1037,31 @@ sub _unzip_az {
     }
 
     my @files;
-    ### have to extract every memeber individually ###
+    
+    
+    ### Address: #43278: Explicitly tell Archive::Zip where to put the files:
+    ### "In my BackPAN indexing, Archive::Zip was extracting things
+    ### in my script's directory instead of the current working directory.
+    ### I traced this back through Archive::Zip::_asLocalName which
+    ### eventually calls File::Spec::Win32::rel2abs which on Windows might
+    ### call Cwd::getdcwd. getdcwd returns the wrong directory in my
+    ### case, even though I think I'm on the same drive.
+    ### 
+    ### To fix this, I pass the optional second argument to
+    ### extractMember using the cwd from Archive::Extract." --bdfoy
+
+    ## store cwd() before looping; calls to cwd() can be expensive, and
+    ### it won't change during the loop
+    my $extract_dir = cwd();
+    
+    ### have to extract every member individually ###
     for my $member ($zip->members) {
         push @files, $member->{fileName};
 
-        unless( $zip->extractMember($member) == &Archive::Zip::AZ_OK ) {
+        ### file to extact to, to avoid the above problem
+        my $to = File::Spec->catfile( $extract_dir, $member->{fileName} );
+        
+        unless( $zip->extractMember($member, $to) == &Archive::Zip::AZ_OK ) {
             return $self->_error(loc("Extraction of '%1' from '%2' failed",
                         $member->{fileName}, $self->archive ));
         }
@@ -1384,7 +1431,7 @@ Set to C<true> to have C<Archive::Extract> prefer commandline tools.
 
 Defaults to C<false>.
 
-=head1 TODO
+=head1 TODO / CAVEATS
 
 =over 4
 
@@ -1393,6 +1440,12 @@ Defaults to C<false>.
 Maybe this module should use something like C<File::Type> to determine
 the type, rather than blindly trust the suffix.
 
+=item Thread safety
+
+Currently, C<Archive::Extract> does a C<chdir> to the extraction dir before
+extraction, and a C<chdir> back again after. This is not necessarily 
+thread safe. See C<rt.cpan.org> bug C<#45671> for details.
+
 =back
 
 =head1 BUG REPORTS