lib/Archive/Extract.pm

   1 package Archive::Extract;
   2
   3 use strict;
   4
   5 use Cwd                         qw[cwd];
   6 use Carp                        qw[carp];
   7 use IPC::Cmd                    qw[run can_run];
   8 use FileHandle;
   9 use File::Path                  qw[mkpath];
  10 use File::Spec;
  11 use File::Basename              qw[dirname basename];
  12 use Params::Check               qw[check];
  13 use Module::Load::Conditional   qw[can_load check_install];
  14 use Locale::Maketext::Simple    Style => 'gettext';
  15
  16 ### solaris has silly /bin/tar output ###
  17 use constant ON_SOLARIS     => $^O eq 'solaris' ? 1 : 0;
  18 use constant FILE_EXISTS    => sub { -e $_[0] ? 1 : 0 };
  19
  20 ### VMS may require quoting upper case command options
  21 use constant ON_VMS         => $^O eq 'VMS' ? 1 : 0;
  22
  23 ### If these are changed, update @TYPES and the new() POD
  24 use constant TGZ            => 'tgz';
  25 use constant TAR            => 'tar';
  26 use constant GZ             => 'gz';
  27 use constant ZIP            => 'zip';
  28 use constant BZ2            => 'bz2';
  29 use constant TBZ            => 'tbz';
  30 use constant Z              => 'Z';
  31
  32 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG];
  33
  34 $VERSION        = '0.24';
  35 $PREFER_BIN     = 0;
  36 $WARN           = 1;
  37 $DEBUG          = 0;
  38 my @Types       = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z ); # same as all constants
  39
  40 local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
  41
  42 =pod
  43
  44 =head1 NAME
  45
  46 Archive::Extract - A generic archive extracting mechanism
  47
  48 =head1 SYNOPSIS
  49
  50     use Archive::Extract;
  51
  52     ### build an Archive::Extract object ###
  53     my $ae = Archive::Extract->new( archive => 'foo.tgz' );
  54
  55     ### extract to cwd() ###
  56     my $ok = $ae->extract;
  57
  58     ### extract to /tmp ###
  59     my $ok = $ae->extract( to => '/tmp' );
  60
  61     ### what if something went wrong?
  62     my $ok = $ae->extract or die $ae->error;
  63
  64     ### files from the archive ###
  65     my $files   = $ae->files;
  66
  67     ### dir that was extracted to ###
  68     my $outdir  = $ae->extract_path;
  69
  70
  71     ### quick check methods ###
  72     $ae->is_tar     # is it a .tar file?
  73     $ae->is_tgz     # is it a .tar.gz or .tgz file?
  74     $ae->is_gz;     # is it a .gz file?
  75     $ae->is_zip;    # is it a .zip file?
  76     $ae->is_bz2;    # is it a .bz2 file?
  77     $ae->is_tbz;    # is it a .tar.bz2 or .tbz file?
  78
  79     ### absolute path to the archive you provided ###
  80     $ae->archive;
  81
  82     ### commandline tools, if found ###
  83     $ae->bin_tar     # path to /bin/tar, if found
  84     $ae->bin_gzip    # path to /bin/gzip, if found
  85     $ae->bin_unzip   # path to /bin/unzip, if found
  86     $ae->bin_bunzip2 # path to /bin/bunzip2 if found
  87
  88 =head1 DESCRIPTION
  89
  90 Archive::Extract is a generic archive extraction mechanism.
  91
  92 It allows you to extract any archive file of the type .tar, .tar.gz,
  93 .gz, .Z, tar.bz2, .tbz, .bz2 or .zip without having to worry how it
  94 does so, or use different interfaces for each type by using either
  95 perl modules, or commandline tools on your system.
  96
  97 See the C<HOW IT WORKS> section further down for details.
  98
  99 =cut
 100
 101
 102 ### see what /bin/programs are available ###
 103 $PROGRAMS = {};
 104 for my $pgm (qw[tar unzip gzip bunzip2 uncompress]) {
 105     $PROGRAMS->{$pgm} = can_run($pgm);
 106 }
 107
 108 ### mapping from types to extractor methods ###
 109 my $Mapping = {
 110     is_tgz  => '_untar',
 111     is_tar  => '_untar',
 112     is_gz   => '_gunzip',
 113     is_zip  => '_unzip',
 114     is_tbz  => '_untar',
 115     is_bz2  => '_bunzip2',
 116     is_Z    => '_uncompress',
 117 };
 118
 119 {
 120     my $tmpl = {
 121         archive => { required => 1, allow => FILE_EXISTS },
 122         type    => { default => '', allow => [ @Types ] },
 123     };
 124
 125     ### build accesssors ###
 126     for my $method( keys %$tmpl,
 127                     qw[_extractor _gunzip_to files extract_path],
 128                     qw[_error_msg _error_msg_long]
 129     ) {
 130         no strict 'refs';
 131         *$method = sub {
 132                         my $self = shift;
 133                         $self->{$method} = $_[0] if @_;
 134                         return $self->{$method};
 135                     }
 136     }
 137
 138 =head1 METHODS
 139
 140 =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
 141
 142 Creates a new C<Archive::Extract> object based on the archive file you
 143 passed it. Automatically determines the type of archive based on the
 144 extension, but you can override that by explicitly providing the
 145 C<type> argument.
 146
 147 Valid values for C<type> are:
 148
 149 =over 4
 150
 151 =item tar
 152
 153 Standard tar files, as produced by, for example, C</bin/tar>.
 154 Corresponds to a C<.tar> suffix.
 155
 156 =item tgz
 157
 158 Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
 159 Corresponds to a C<.tgz> or C<.tar.gz> suffix.
 160
 161 =item gz
 162
 163 Gzip compressed file, as produced by, for example C</bin/gzip>.
 164 Corresponds to a C<.gz> suffix.
 165
 166 =item Z
 167
 168 Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
 169 Corresponds to a C<.Z> suffix.
 170
 171 =item zip
 172
 173 Zip compressed file, as produced by, for example C</bin/zip>.
 174 Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
 175
 176 =item bz2
 177
 178 Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
 179 Corresponds to a C<.bz2> suffix.
 180
 181 =item tbz
 182
 183 Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
 184 Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
 185
 186 =back
 187
 188 Returns a C<Archive::Extract> object on success, or false on failure.
 189
 190 =cut
 191
 192     ### constructor ###
 193     sub new {
 194         my $class   = shift;
 195         my %hash    = @_;
 196
 197         my $parsed = check( $tmpl, \%hash ) or return;
 198
 199         ### make sure we have an absolute path ###
 200         my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
 201
 202         ### figure out the type, if it wasn't already specified ###
 203         unless ( $parsed->{type} ) {
 204             $parsed->{type} =
 205                 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i     ? TGZ   :
 206                 $ar =~ /.+?\.gz$/i                  ? GZ    :
 207                 $ar =~ /.+?\.tar$/i                 ? TAR   :
 208                 $ar =~ /.+?\.(zip|jar|par)$/i       ? ZIP   :
 209                 $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i ? TBZ   :
 210                 $ar =~ /.+?\.bz2$/i                 ? BZ2   :
 211                 $ar =~ /.+?\.Z$/                    ? Z     :
 212                 '';
 213
 214         }
 215
 216         ### don't know what type of file it is ###
 217         return __PACKAGE__->_error(loc("Cannot determine file type for '%1'",
 218                                 $parsed->{archive} )) unless $parsed->{type};
 219
 220         return bless $parsed, $class;
 221     }
 222 }
 223
 224 =head2 $ae->extract( [to => '/output/path'] )
 225
 226 Extracts the archive represented by the C<Archive::Extract> object to
 227 the path of your choice as specified by the C<to> argument. Defaults to
 228 C<cwd()>.
 229
 230 Since C<.gz> files never hold a directory, but only a single file; if
 231 the C<to> argument is an existing directory, the file is extracted
 232 there, with it's C<.gz> suffix stripped.
 233 If the C<to> argument is not an existing directory, the C<to> argument
 234 is understood to be a filename, if the archive type is C<gz>.
 235 In the case that you did not specify a C<to> argument, the output
 236 file will be the name of the archive file, stripped from it's C<.gz>
 237 suffix, in the current working directory.
 238
 239 C<extract> will try a pure perl solution first, and then fall back to
 240 commandline tools if they are available. See the C<GLOBAL VARIABLES>
 241 section below on how to alter this behaviour.
 242
 243 It will return true on success, and false on failure.
 244
 245 On success, it will also set the follow attributes in the object:
 246
 247 =over 4
 248
 249 =item $ae->extract_path
 250
 251 This is the directory that the files where extracted to.
 252
 253 =item $ae->files
 254
 255 This is an array ref with the paths of all the files in the archive,
 256 relative to the C<to> argument you specified.
 257 To get the full path to an extracted file, you would use:
 258
 259     File::Spec->catfile( $to, $ae->files->[0] );
 260
 261 Note that all files from a tar archive will be in unix format, as per
 262 the tar specification.
 263
 264 =back
 265
 266 =cut
 267
 268 sub extract {
 269     my $self = shift;
 270     my %hash = @_;
 271
 272     my $to;
 273     my $tmpl = {
 274         to  => { default => '.', store => \$to }
 275     };
 276
 277     check( $tmpl, \%hash ) or return;
 278
 279     ### so 'to' could be a file or a dir, depending on whether it's a .gz
 280     ### file, or basically anything else.
 281     ### so, check that, then act accordingly.
 282     ### set an accessor specifically so _gunzip can know what file to extract
 283     ### to.
 284     my $dir;
 285     {   ### a foo.gz file
 286         if( $self->is_gz or $self->is_bz2 or $self->is_Z) {
 287
 288             my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z)$//i;
 289
 290             ### to is a dir?
 291             if ( -d $to ) {
 292                 $dir = $to;
 293                 $self->_gunzip_to( basename($cp) );
 294
 295             ### then it's a filename
 296             } else {
 297                 $dir = dirname($to);
 298                 $self->_gunzip_to( basename($to) );
 299             }
 300
 301         ### not a foo.gz file
 302         } else {
 303             $dir = $to;
 304         }
 305     }
 306
 307     ### make the dir if it doesn't exist ###
 308     unless( -d $dir ) {
 309         eval { mkpath( $dir ) };
 310
 311         return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
 312             if $@;
 313     }
 314
 315     ### get the current dir, to restore later ###
 316     my $cwd = cwd();
 317
 318     my $ok = 1;
 319     EXTRACT: {
 320
 321         ### chdir to the target dir ###
 322         unless( chdir $dir ) {
 323             $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
 324             $ok = 0; last EXTRACT;
 325         }
 326
 327         ### set files to an empty array ref, so there's always an array
 328         ### ref IN the accessor, to avoid errors like:
 329         ### Can't use an undefined value as an ARRAY reference at
 330         ### ../lib/Archive/Extract.pm line 742. (rt #19815)
 331         $self->files( [] );
 332
 333         ### find what extractor method to use ###
 334         while( my($type,$method) = each %$Mapping ) {
 335
 336             ### call the corresponding method if the type is OK ###
 337             if( $self->$type) {
 338                 $ok = $self->$method();
 339             }
 340         }
 341
 342         ### warn something went wrong if we didn't get an OK ###
 343         $self->_error(loc("Extract failed, no extractor found"))
 344             unless $ok;
 345
 346     }
 347
 348     ### and chdir back ###
 349     unless( chdir $cwd ) {
 350         $self->_error(loc("Could not chdir back to start dir '%1': %2'",
 351                             $cwd, $!));
 352     }
 353
 354     return $ok;
 355 }
 356
 357 =pod
 358
 359 =head1 ACCESSORS
 360
 361 =head2 $ae->error([BOOL])
 362
 363 Returns the last encountered error as string.
 364 Pass it a true value to get the C<Carp::longmess()> output instead.
 365
 366 =head2 $ae->extract_path
 367
 368 This is the directory the archive got extracted to.
 369 See C<extract()> for details.
 370
 371 =head2 $ae->files
 372
 373 This is an array ref holding all the paths from the archive.
 374 See C<extract()> for details.
 375
 376 =head2 $ae->archive
 377
 378 This is the full path to the archive file represented by this
 379 C<Archive::Extract> object.
 380
 381 =head2 $ae->type
 382
 383 This is the type of archive represented by this C<Archive::Extract>
 384 object. See accessors below for an easier way to use this.
 385 See the C<new()> method for details.
 386
 387 =head2 $ae->types
 388
 389 Returns a list of all known C<types> for C<Archive::Extract>'s
 390 C<new> method.
 391
 392 =cut
 393
 394 sub types { return @Types }
 395
 396 =head2 $ae->is_tgz
 397
 398 Returns true if the file is of type C<.tar.gz>.
 399 See the C<new()> method for details.
 400
 401 =head2 $ae->is_tar
 402
 403 Returns true if the file is of type C<.tar>.
 404 See the C<new()> method for details.
 405
 406 =head2 $ae->is_gz
 407
 408 Returns true if the file is of type C<.gz>.
 409 See the C<new()> method for details.
 410
 411 =head2 $ae->is_Z
 412
 413 Returns true if the file is of type C<.Z>.
 414 See the C<new()> method for details.
 415
 416 =head2 $ae->is_zip
 417
 418 Returns true if the file is of type C<.zip>.
 419 See the C<new()> method for details.
 420
 421 =cut
 422
 423 ### quick check methods ###
 424 sub is_tgz  { return $_[0]->type eq TGZ }
 425 sub is_tar  { return $_[0]->type eq TAR }
 426 sub is_gz   { return $_[0]->type eq GZ  }
 427 sub is_zip  { return $_[0]->type eq ZIP }
 428 sub is_tbz  { return $_[0]->type eq TBZ }
 429 sub is_bz2  { return $_[0]->type eq BZ2 }
 430 sub is_Z    { return $_[0]->type eq Z   }
 431
 432 =pod
 433
 434 =head2 $ae->bin_tar
 435
 436 Returns the full path to your tar binary, if found.
 437
 438 =head2 $ae->bin_gzip
 439
 440 Returns the full path to your gzip binary, if found
 441
 442 =head2 $ae->bin_unzip
 443
 444 Returns the full path to your unzip binary, if found
 445
 446 =cut
 447
 448 ### paths to commandline tools ###
 449 sub bin_gzip        { return $PROGRAMS->{'gzip'}    if $PROGRAMS->{'gzip'}  }
 450 sub bin_unzip       { return $PROGRAMS->{'unzip'}   if $PROGRAMS->{'unzip'} }
 451 sub bin_tar         { return $PROGRAMS->{'tar'}     if $PROGRAMS->{'tar'}   }
 452 sub bin_bunzip2     { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
 453 sub bin_uncompress  { return $PROGRAMS->{'uncompress'}
 454                                                  if $PROGRAMS->{'uncompress'} }
 455 =head2 $bool = $ae->have_old_bunzip2
 456
 457 Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
 458 require all archive names to end in C<.bz2> or it will not extract
 459 them. This method checks if you have a recent version of C<bunzip2>
 460 that allows any extension, or an older one that doesn't.
 461
 462 =cut
 463
 464 sub have_old_bunzip2 {
 465     my $self = shift;
 466
 467     ### no bunzip2? no old bunzip2 either :)
 468     return unless $self->bin_bunzip2;
 469
 470     ### if we can't run this, we can't be sure if it's too old or not
 471     ### XXX stupid stupid stupid bunzip2 doesn't understand --version
 472     ### is not a request to extract data:
 473     ### $ bunzip2 --version
 474     ### bzip2, a block-sorting file compressor.  Version 1.0.2, 30-Dec-2001.
 475     ### [...]
 476     ### bunzip2: I won't read compressed data from a terminal.
 477     ### bunzip2: For help, type: `bunzip2 --help'.
 478     ### $ echo $?
 479     ### 1
 480     ### HATEFUL!
 481     my $buffer;
 482     scalar run( command => [$self->bin_bunzip2, '--version'],
 483          verbose => 0,
 484          buffer  => \$buffer
 485     );
 486
 487     ### no output
 488     return unless $buffer;
 489
 490     my ($version) = $buffer =~ /version \s+ (\d+)/ix;
 491
 492     return 1 if $version < 1;
 493     return;
 494 }
 495
 496 #################################
 497 #
 498 # Untar code
 499 #
 500 #################################
 501
 502
 503 ### untar wrapper... goes to either Archive::Tar or /bin/tar
 504 ### depending on $PREFER_BIN
 505 sub _untar {
 506     my $self = shift;
 507
 508     ### bzip2 support in A::T via IO::Uncompress::Bzip2
 509     my   @methods = qw[_untar_at _untar_bin];
 510          @methods = reverse @methods if $PREFER_BIN;
 511
 512     for my $method (@methods) {
 513         $self->_extractor($method) && return 1 if $self->$method();
 514     }
 515
 516     return $self->_error(loc("Unable to untar file '%1'", $self->archive));
 517 }
 518
 519 ### use /bin/tar to extract ###
 520 sub _untar_bin {
 521     my $self = shift;
 522
 523     ### check for /bin/tar ###
 524     return $self->_error(loc("No '%1' program found", '/bin/tar'))
 525         unless $self->bin_tar;
 526
 527     ### check for /bin/gzip if we need it ###
 528     return $self->_error(loc("No '%1' program found", '/bin/gzip'))
 529         if $self->is_tgz && !$self->bin_gzip;
 530
 531     return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
 532         if $self->is_tbz && !$self->bin_bunzip2;
 533
 534     ### XXX figure out how to make IPC::Run do this in one call --
 535     ### currently i don't know how to get output of a command after a pipe
 536     ### trapped in a scalar. Mailed barries about this 5th of june 2004.
 537
 538
 539
 540     ### see what command we should run, based on whether
 541     ### it's a .tgz or .tar
 542
 543     ### XXX solaris tar and bsdtar are having different outputs
 544     ### depending whether you run with -x or -t
 545     ### compensate for this insanity by running -t first, then -x
 546     {    my $cmd =
 547             $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
 548                              $self->bin_tar, '-tf', '-'] :
 549             $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
 550                              $self->bin_tar, '-tf', '-'] :
 551             [$self->bin_tar, '-tf', $self->archive];
 552
 553         ### run the command ###
 554         my $buffer = '';
 555         unless( scalar run( command => $cmd,
 556                             buffer  => \$buffer,
 557                             verbose => $DEBUG )
 558         ) {
 559             return $self->_error(loc(
 560                             "Error listing contents of archive '%1': %2",
 561                             $self->archive, $buffer ));
 562         }
 563
 564         ### no buffers available?
 565         if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 566             $self->_error( $self->_no_buffer_files( $self->archive ) );
 567
 568         } else {
 569             ### if we're on solaris we /might/ be using /bin/tar, which has
 570             ### a weird output format... we might also be using
 571             ### /usr/local/bin/tar, which is gnu tar, which is perfectly
 572             ### fine... so we have to do some guessing here =/
 573             my @files = map { chomp;
 574                           !ON_SOLARIS ? $_
 575                                       : (m|^ x \s+  # 'xtract' -- sigh
 576                                             (.+?),  # the actual file name
 577                                             \s+ [\d,.]+ \s bytes,
 578                                             \s+ [\d,.]+ \s tape \s blocks
 579                                         |x ? $1 : $_);
 580
 581                     } split $/, $buffer;
 582
 583             ### store the files that are in the archive ###
 584             $self->files(\@files);
 585         }
 586     }
 587
 588     ### now actually extract it ###
 589     {   my $cmd =
 590             $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
 591                              $self->bin_tar, '-xf', '-'] :
 592             $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
 593                              $self->bin_tar, '-xf', '-'] :
 594             [$self->bin_tar, '-xf', $self->archive];
 595
 596         my $buffer = '';
 597         unless( scalar run( command => $cmd,
 598                             buffer  => \$buffer,
 599                             verbose => $DEBUG )
 600         ) {
 601             return $self->_error(loc("Error extracting archive '%1': %2",
 602                             $self->archive, $buffer ));
 603         }
 604
 605         ### we might not have them, due to lack of buffers
 606         if( $self->files ) {
 607             ### now that we've extracted, figure out where we extracted to
 608             my $dir = $self->__get_extract_dir( $self->files );
 609
 610             ### store the extraction dir ###
 611             $self->extract_path( $dir );
 612         }
 613     }
 614
 615     ### we got here, no error happened
 616     return 1;
 617 }
 618
 619 ### use archive::tar to extract ###
 620 sub _untar_at {
 621     my $self = shift;
 622
 623     ### we definitely need A::T, so load that first
 624     {   my $use_list = { 'Archive::Tar' => '0.0' };
 625
 626         unless( can_load( modules => $use_list ) ) {
 627
 628             return $self->_error(loc("You do not have '%1' installed - " .
 629                                  "Please install it as soon as possible.",
 630                                  'Archive::Tar'));
 631         }
 632     }
 633
 634     ### we might pass it a filehandle if it's a .tbz file..
 635     my $fh_to_read = $self->archive;
 636
 637     ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
 638     ### if A::T's version is 0.99 or higher
 639     if( $self->is_tgz ) {
 640         my $use_list = { 'Compress::Zlib' => '0.0' };
 641            $use_list->{ 'IO::Zlib' } = '0.0'
 642                 if $Archive::Tar::VERSION >= '0.99';
 643
 644         unless( can_load( modules => $use_list ) ) {
 645             my $which = join '/', sort keys %$use_list;
 646
 647             return $self->_error(loc(
 648                                 "You do not have '%1' installed - Please ".
 649                                 "install it as soon as possible.", $which));
 650
 651         }
 652     } elsif ( $self->is_tbz ) {
 653         my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
 654         unless( can_load( modules => $use_list ) ) {
 655             return $self->_error(loc(
 656                     "You do not have '%1' installed - Please " .
 657                     "install it as soon as possible.",
 658                      'IO::Uncompress::Bunzip2'));
 659         }
 660
 661         my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
 662             return $self->_error(loc("Unable to open '%1': %2",
 663                             $self->archive,
 664                             $IO::Uncompress::Bunzip2::Bunzip2Error));
 665
 666         $fh_to_read = $bz;
 667     }
 668
 669     my $tar = Archive::Tar->new();
 670
 671     ### only tell it it's compressed if it's a .tgz, as we give it a file
 672     ### handle if it's a .tbz
 673     unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) {
 674         return $self->_error(loc("Unable to read '%1': %2", $self->archive,
 675                                     $Archive::Tar::error));
 676     }
 677
 678     ### workaround to prevent Archive::Tar from setting uid, which
 679     ### is a potential security hole. -autrijus
 680     ### have to do it here, since A::T needs to be /loaded/ first ###
 681     {   no strict 'refs'; local $^W;
 682
 683         ### older versions of archive::tar <= 0.23
 684         *Archive::Tar::chown = sub {};
 685     }
 686
 687     ### for version of archive::tar > 1.04
 688     local $Archive::Tar::Constant::CHOWN = 0;
 689
 690     {   local $^W;  # quell 'splice() offset past end of array' warnings
 691                     # on older versions of A::T
 692
 693         ### older archive::tar always returns $self, return value slightly
 694         ### fux0r3d because of it.
 695         $tar->extract()
 696             or return $self->_error(loc("Unable to extract '%1': %2",
 697                                     $self->archive, $Archive::Tar::error ));
 698     }
 699
 700     my @files   = $tar->list_files;
 701     my $dir     = $self->__get_extract_dir( \@files );
 702
 703     ### store the files that are in the archive ###
 704     $self->files(\@files);
 705
 706     ### store the extraction dir ###
 707     $self->extract_path( $dir );
 708
 709     ### check if the dir actually appeared ###
 710     return 1 if -d $self->extract_path;
 711
 712     ### no dir, we failed ###
 713     return $self->_error(loc("Unable to extract '%1': %2",
 714                                 $self->archive, $Archive::Tar::error ));
 715 }
 716
 717 #################################
 718 #
 719 # Gunzip code
 720 #
 721 #################################
 722
 723 ### gunzip wrapper... goes to either Compress::Zlib or /bin/gzip
 724 ### depending on $PREFER_BIN
 725 sub _gunzip {
 726     my $self = shift;
 727
 728     my @methods = qw[_gunzip_cz _gunzip_bin];
 729        @methods = reverse @methods if $PREFER_BIN;
 730
 731     for my $method (@methods) {
 732         $self->_extractor($method) && return 1 if $self->$method();
 733     }
 734
 735     return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
 736 }
 737
 738 sub _gunzip_bin {
 739     my $self = shift;
 740
 741     ### check for /bin/gzip -- we need it ###
 742     return $self->_error(loc("No '%1' program found", '/bin/gzip'))
 743         unless $self->bin_gzip;
 744
 745
 746     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
 747         return $self->_error(loc("Could not open '%1' for writing: %2",
 748                             $self->_gunzip_to, $! ));
 749
 750     my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
 751
 752     my $buffer;
 753     unless( scalar run( command => $cmd,
 754                         verbose => $DEBUG,
 755                         buffer  => \$buffer )
 756     ) {
 757         return $self->_error(loc("Unable to gunzip '%1': %2",
 758                                     $self->archive, $buffer));
 759     }
 760
 761     ### no buffers available?
 762     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 763         $self->_error( $self->_no_buffer_content( $self->archive ) );
 764     }
 765
 766     print $fh $buffer if defined $buffer;
 767
 768     close $fh;
 769
 770     ### set what files where extract, and where they went ###
 771     $self->files( [$self->_gunzip_to] );
 772     $self->extract_path( File::Spec->rel2abs(cwd()) );
 773
 774     return 1;
 775 }
 776
 777 sub _gunzip_cz {
 778     my $self = shift;
 779
 780     my $use_list = { 'Compress::Zlib' => '0.0' };
 781     unless( can_load( modules => $use_list ) ) {
 782         return $self->_error(loc("You do not have '%1' installed - Please " .
 783                         "install it as soon as possible.", 'Compress::Zlib'));
 784     }
 785
 786     my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
 787                 return $self->_error(loc("Unable to open '%1': %2",
 788                             $self->archive, $Compress::Zlib::gzerrno));
 789
 790     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
 791         return $self->_error(loc("Could not open '%1' for writing: %2",
 792                             $self->_gunzip_to, $! ));
 793
 794     my $buffer;
 795     $fh->print($buffer) while $gz->gzread($buffer) > 0;
 796     $fh->close;
 797
 798     ### set what files where extract, and where they went ###
 799     $self->files( [$self->_gunzip_to] );
 800     $self->extract_path( File::Spec->rel2abs(cwd()) );
 801
 802     return 1;
 803 }
 804
 805 #################################
 806 #
 807 # Uncompress code
 808 #
 809 #################################
 810
 811
 812 ### untar wrapper... goes to either Archive::Tar or /bin/tar
 813 ### depending on $PREFER_BIN
 814 sub _uncompress {
 815     my $self = shift;
 816
 817     my   @methods = qw[_gunzip_cz _uncompress_bin];
 818          @methods = reverse @methods if $PREFER_BIN;
 819
 820     for my $method (@methods) {
 821         $self->_extractor($method) && return 1 if $self->$method();
 822     }
 823
 824     return $self->_error(loc("Unable to untar file '%1'", $self->archive));
 825 }
 826
 827 sub _uncompress_bin {
 828     my $self = shift;
 829
 830     ### check for /bin/gzip -- we need it ###
 831     return $self->_error(loc("No '%1' program found", '/bin/uncompress'))
 832         unless $self->bin_uncompress;
 833
 834
 835     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
 836         return $self->_error(loc("Could not open '%1' for writing: %2",
 837                             $self->_gunzip_to, $! ));
 838
 839     my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
 840
 841     my $buffer;
 842     unless( scalar run( command => $cmd,
 843                         verbose => $DEBUG,
 844                         buffer  => \$buffer )
 845     ) {
 846         return $self->_error(loc("Unable to uncompress '%1': %2",
 847                                     $self->archive, $buffer));
 848     }
 849
 850     ### no buffers available?
 851     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 852         $self->_error( $self->_no_buffer_content( $self->archive ) );
 853     }
 854
 855     print $fh $buffer if defined $buffer;
 856
 857     close $fh;
 858
 859     ### set what files where extract, and where they went ###
 860     $self->files( [$self->_gunzip_to] );
 861     $self->extract_path( File::Spec->rel2abs(cwd()) );
 862
 863     return 1;
 864 }
 865
 866
 867 #################################
 868 #
 869 # Unzip code
 870 #
 871 #################################
 872
 873 ### unzip wrapper... goes to either Archive::Zip or /bin/unzip
 874 ### depending on $PREFER_BIN
 875 sub _unzip {
 876     my $self = shift;
 877
 878     my @methods = qw[_unzip_az _unzip_bin];
 879        @methods = reverse @methods if $PREFER_BIN;
 880
 881     for my $method (@methods) {
 882         $self->_extractor($method) && return 1 if $self->$method();
 883     }
 884
 885     return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
 886 }
 887
 888 sub _unzip_bin {
 889     my $self = shift;
 890
 891     ### check for /bin/gzip if we need it ###
 892     return $self->_error(loc("No '%1' program found", '/bin/unzip'))
 893         unless $self->bin_unzip;
 894
 895
 896     ### first, get the files.. it must be 2 different commands with 'unzip' :(
 897     {   ### on VMS, capital letter options have to be quoted. This is
 898         ### peported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
 899         ### Subject: [patch@31735]Archive Extract fix on VMS.
 900         my $opt = ON_VMS ? '"-Z"' : '-Z';
 901         my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
 902
 903         my $buffer = '';
 904         unless( scalar run( command => $cmd,
 905                             verbose => $DEBUG,
 906                             buffer  => \$buffer )
 907         ) {
 908             return $self->_error(loc("Unable to unzip '%1': %2",
 909                                         $self->archive, $buffer));
 910         }
 911
 912         ### no buffers available?
 913         if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 914             $self->_error( $self->_no_buffer_files( $self->archive ) );
 915
 916         } else {
 917             $self->files( [split $/, $buffer] );
 918         }
 919     }
 920
 921     ### now, extract the archive ###
 922     {   my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
 923
 924         my $buffer;
 925         unless( scalar run( command => $cmd,
 926                             verbose => $DEBUG,
 927                             buffer  => \$buffer )
 928         ) {
 929             return $self->_error(loc("Unable to unzip '%1': %2",
 930                                         $self->archive, $buffer));
 931         }
 932
 933         if( scalar @{$self->files} ) {
 934             my $files   = $self->files;
 935             my $dir     = $self->__get_extract_dir( $files );
 936
 937             $self->extract_path( $dir );
 938         }
 939     }
 940
 941     return 1;
 942 }
 943
 944 sub _unzip_az {
 945     my $self = shift;
 946
 947     my $use_list = { 'Archive::Zip' => '0.0' };
 948     unless( can_load( modules => $use_list ) ) {
 949         return $self->_error(loc("You do not have '%1' installed - Please " .
 950                         "install it as soon as possible.", 'Archive::Zip'));
 951     }
 952
 953     my $zip = Archive::Zip->new();
 954
 955     unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
 956         return $self->_error(loc("Unable to read '%1'", $self->archive));
 957     }
 958
 959     my @files;
 960     ### have to extract every memeber individually ###
 961     for my $member ($zip->members) {
 962         push @files, $member->{fileName};
 963
 964         unless( $zip->extractMember($member) == &Archive::Zip::AZ_OK ) {
 965             return $self->_error(loc("Extraction of '%1' from '%2' failed",
 966                         $member->{fileName}, $self->archive ));
 967         }
 968     }
 969
 970     my $dir = $self->__get_extract_dir( \@files );
 971
 972     ### set what files where extract, and where they went ###
 973     $self->files( \@files );
 974     $self->extract_path( File::Spec->rel2abs($dir) );
 975
 976     return 1;
 977 }
 978
 979 sub __get_extract_dir {
 980     my $self    = shift;
 981     my $files   = shift || [];
 982
 983     return unless scalar @$files;
 984
 985     my($dir1, $dir2);
 986     for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
 987         my($dir,$pos) = @$aref;
 988
 989         ### add a catdir(), so that any trailing slashes get
 990         ### take care of (removed)
 991         ### also, a catdir() normalises './dir/foo' to 'dir/foo';
 992         ### which was the problem in bug #23999
 993         my $res = -d $files->[$pos]
 994                     ? File::Spec->catdir( $files->[$pos], '' )
 995                     : File::Spec->catdir( dirname( $files->[$pos] ) );
 996
 997         $$dir = $res;
 998     }
 999
1000     ### if the first and last dir don't match, make sure the
1001     ### dirname is not set wrongly
1002     my $dir;
1003
1004     ### dirs are the same, so we know for sure what the extract dir is
1005     if( $dir1 eq $dir2 ) {
1006         $dir = $dir1;
1007
1008     ### dirs are different.. do they share the base dir?
1009     ### if so, use that, if not, fall back to '.'
1010     } else {
1011         my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1012         my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
1013
1014         $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1015     }
1016
1017     return File::Spec->rel2abs( $dir );
1018 }
1019
1020 #################################
1021 #
1022 # Bunzip2 code
1023 #
1024 #################################
1025
1026 ### bunzip2 wrapper...
1027 sub _bunzip2 {
1028     my $self = shift;
1029
1030     my @methods = qw[_bunzip2_cz2 _bunzip2_bin];
1031        @methods = reverse @methods if $PREFER_BIN;
1032
1033     for my $method (@methods) {
1034         $self->_extractor($method) && return 1 if $self->$method();
1035     }
1036
1037     return $self->_error(loc("Unable to bunzip2 file '%1'", $self->archive));
1038 }
1039
1040 sub _bunzip2_bin {
1041     my $self = shift;
1042
1043     ### check for /bin/gzip -- we need it ###
1044     return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
1045         unless $self->bin_bunzip2;
1046
1047
1048     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1049         return $self->_error(loc("Could not open '%1' for writing: %2",
1050                             $self->_gunzip_to, $! ));
1051
1052     ### guard against broken bunzip2. See ->have_old_bunzip2()
1053     ### for details
1054     if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1055         return $self->_error(loc("Your bunzip2 version is too old and ".
1056                                  "can only extract files ending in '%1'",
1057                                  '.bz2'));
1058     }
1059
1060     my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
1061
1062     my $buffer;
1063     unless( scalar run( command => $cmd,
1064                         verbose => $DEBUG,
1065                         buffer  => \$buffer )
1066     ) {
1067         return $self->_error(loc("Unable to bunzip2 '%1': %2",
1068                                     $self->archive, $buffer));
1069     }
1070
1071     ### no buffers available?
1072     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1073         $self->_error( $self->_no_buffer_content( $self->archive ) );
1074     }
1075
1076     print $fh $buffer if defined $buffer;
1077
1078     close $fh;
1079
1080     ### set what files where extract, and where they went ###
1081     $self->files( [$self->_gunzip_to] );
1082     $self->extract_path( File::Spec->rel2abs(cwd()) );
1083
1084     return 1;
1085 }
1086
1087 ### using cz2, the compact versions... this we use mainly in archive::tar
1088 ### extractor..
1089 # sub _bunzip2_cz1 {
1090 #     my $self = shift;
1091 #
1092 #     my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1093 #     unless( can_load( modules => $use_list ) ) {
1094 #         return $self->_error(loc("You do not have '%1' installed - Please " .
1095 #                         "install it as soon as possible.",
1096 #                         'IO::Uncompress::Bunzip2'));
1097 #     }
1098 #
1099 #     my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1100 #                 return $self->_error(loc("Unable to open '%1': %2",
1101 #                             $self->archive,
1102 #                             $IO::Uncompress::Bunzip2::Bunzip2Error));
1103 #
1104 #     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1105 #         return $self->_error(loc("Could not open '%1' for writing: %2",
1106 #                             $self->_gunzip_to, $! ));
1107 #
1108 #     my $buffer;
1109 #     $fh->print($buffer) while $bz->read($buffer) > 0;
1110 #     $fh->close;
1111 #
1112 #     ### set what files where extract, and where they went ###
1113 #     $self->files( [$self->_gunzip_to] );
1114 #     $self->extract_path( File::Spec->rel2abs(cwd()) );
1115 #
1116 #     return 1;
1117 # }
1118
1119 sub _bunzip2_cz2 {
1120     my $self = shift;
1121
1122     my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1123     unless( can_load( modules => $use_list ) ) {
1124         return $self->_error(loc("You do not have '%1' installed - Please " .
1125                         "install it as soon as possible.",
1126                         'IO::Uncompress::Bunzip2'));
1127     }
1128
1129     IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1130         or return $self->_error(loc("Unable to uncompress '%1': %2",
1131                             $self->archive,
1132                             $IO::Uncompress::Bunzip2::Bunzip2Error));
1133
1134     ### set what files where extract, and where they went ###
1135     $self->files( [$self->_gunzip_to] );
1136     $self->extract_path( File::Spec->rel2abs(cwd()) );
1137
1138     return 1;
1139 }
1140
1141
1142 #################################
1143 #
1144 # Error code
1145 #
1146 #################################
1147
1148 sub _error {
1149     my $self    = shift;
1150     my $error   = shift;
1151
1152     $self->_error_msg( $error );
1153     $self->_error_msg_long( Carp::longmess($error) );
1154
1155     ### set $Archive::Extract::WARN to 0 to disable printing
1156     ### of errors
1157     if( $WARN ) {
1158         carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1159     }
1160
1161     return;
1162 }
1163
1164 sub error {
1165     my $self = shift;
1166     return shift() ? $self->_error_msg_long : $self->_error_msg;
1167 }
1168
1169 sub _no_buffer_files {
1170     my $self = shift;
1171     my $file = shift or return;
1172     return loc("No buffer captured, unable to tell ".
1173                "extracted files or extraction dir for '%1'", $file);
1174 }
1175
1176 sub _no_buffer_content {
1177     my $self = shift;
1178     my $file = shift or return;
1179     return loc("No buffer captured, unable to get content for '%1'", $file);
1180 }
1181 1;
1182
1183 =pod
1184
1185 =head1 HOW IT WORKS
1186
1187 C<Archive::Extract> tries first to determine what type of archive you
1188 are passing it, by inspecting its suffix. It does not do this by using
1189 Mime magic, or something related. See C<CAVEATS> below.
1190
1191 Once it has determined the file type, it knows which extraction methods
1192 it can use on the archive. It will try a perl solution first, then fall
1193 back to a commandline tool if that fails. If that also fails, it will
1194 return false, indicating it was unable to extract the archive.
1195 See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1196
1197 =head1 CAVEATS
1198
1199 =head2 File Extensions
1200
1201 C<Archive::Extract> trusts on the extension of the archive to determine
1202 what type it is, and what extractor methods therefore can be used. If
1203 your archives do not have any of the extensions as described in the
1204 C<new()> method, you will have to specify the type explicitly, or
1205 C<Archive::Extract> will not be able to extract the archive for you.
1206
1207 =head2 Supporting Very Large Files
1208
1209 C<Archive::Extract> can use either pure perl modules or command line
1210 programs under the hood. Some of the pure perl modules (like
1211 C<Archive::Tar> take the entire contents of the archive into memory,
1212 which may not be feasible on your system. Consider setting the global
1213 variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1214 the use of command line programs and won't consume so much memory.
1215
1216 See the C<GLOBAL VARIABLES> section below for details.
1217
1218 =head2 Bunzip2 support of arbitrary extensions.
1219
1220 Older versions of C</bin/bunzip2> do not support arbitrary file
1221 extensions and insist on a C<.bz2> suffix. Although we do our best
1222 to guard against this, if you experience a bunzip2 error, it may
1223 be related to this. For details, please see the C<have_old_bunzip2>
1224 method.
1225
1226 =head1 GLOBAL VARIABLES
1227
1228 =head2 $Archive::Extract::DEBUG
1229
1230 Set this variable to C<true> to have all calls to command line tools
1231 be printed out, including all their output.
1232 This also enables C<Carp::longmess> errors, instead of the regular
1233 C<carp> errors.
1234
1235 Good for tracking down why things don't work with your particular
1236 setup.
1237
1238 Defaults to C<false>.
1239
1240 =head2 $Archive::Extract::WARN
1241
1242 This variable controls whether errors encountered internally by
1243 C<Archive::Extract> should be C<carp>'d or not.
1244
1245 Set to false to silence warnings. Inspect the output of the C<error()>
1246 method manually to see what went wrong.
1247
1248 Defaults to C<true>.
1249
1250 =head2 $Archive::Extract::PREFER_BIN
1251
1252 This variables controls whether C<Archive::Extract> should prefer the
1253 use of perl modules, or commandline tools to extract archives.
1254
1255 Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1256
1257 Defaults to C<false>.
1258
1259 =head1 TODO
1260
1261 =over 4
1262
1263 =item Mime magic support
1264
1265 Maybe this module should use something like C<File::Type> to determine
1266 the type, rather than blindly trust the suffix.
1267
1268 =back
1269
1270 =head1 BUG REPORTS
1271
1272 Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>.
1273
1274 =head1 AUTHOR
1275
1276 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1277
1278 =head1 COPYRIGHT
1279
1280 This library is free software; you may redistribute and/or modify it
1281 under the same terms as Perl itself.
1282
1283 =cut
1284
1285 # Local variables:
1286 # c-indentation-style: bsd
1287 # c-basic-offset: 4
1288 # indent-tabs-mode: nil
1289 # End:
1290 # vim: expandtab shiftwidth=4:
1291