lib/Archive/Extract.pm

   1 package Archive::Extract;
   2
   3 use strict;
   4
   5 use Cwd                         qw[cwd];
   6 use Carp                        qw[carp];
   7 use IPC::Cmd                    qw[run can_run];
   8 use FileHandle;
   9 use File::Path                  qw[mkpath];
  10 use File::Spec;
  11 use File::Basename              qw[dirname basename];
  12 use Params::Check               qw[check];
  13 use Module::Load::Conditional   qw[can_load check_install];
  14 use Locale::Maketext::Simple    Style => 'gettext';
  15
  16 ### solaris has silly /bin/tar output ###
  17 use constant ON_SOLARIS     => $^O eq 'solaris' ? 1 : 0;
  18 use constant FILE_EXISTS    => sub { -e $_[0] ? 1 : 0 };
  19
  20 ### VMS may require quoting upper case command options
  21 use constant ON_VMS         => $^O eq 'VMS' ? 1 : 0;
  22
  23 ### Windows needs special treatment of Tar options
  24 use constant ON_WIN32       => $^O eq 'MSWin32' ? 1 : 0;
  25
  26 ### we can't use this extraction method, because of missing
  27 ### modules/binaries:
  28 use constant METHOD_NA      => [];
  29
  30 ### If these are changed, update @TYPES and the new() POD
  31 use constant TGZ            => 'tgz';
  32 use constant TAR            => 'tar';
  33 use constant GZ             => 'gz';
  34 use constant ZIP            => 'zip';
  35 use constant BZ2            => 'bz2';
  36 use constant TBZ            => 'tbz';
  37 use constant Z              => 'Z';
  38 use constant LZMA           => 'lzma';
  39
  40 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG
  41             $_ALLOW_BIN $_ALLOW_PURE_PERL
  42          ];
  43
  44 $VERSION            = '0.31_02';
  45 $PREFER_BIN         = 0;
  46 $WARN               = 1;
  47 $DEBUG              = 0;
  48 $_ALLOW_PURE_PERL   = 1;    # allow pure perl extractors
  49 $_ALLOW_BIN         = 1;    # allow binary extractors
  50
  51 # same as all constants
  52 my @Types           = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA );
  53
  54 local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
  55
  56 =pod
  57
  58 =head1 NAME
  59
  60 Archive::Extract - A generic archive extracting mechanism
  61
  62 =head1 SYNOPSIS
  63
  64     use Archive::Extract;
  65
  66     ### build an Archive::Extract object ###
  67     my $ae = Archive::Extract->new( archive => 'foo.tgz' );
  68
  69     ### extract to cwd() ###
  70     my $ok = $ae->extract;
  71
  72     ### extract to /tmp ###
  73     my $ok = $ae->extract( to => '/tmp' );
  74
  75     ### what if something went wrong?
  76     my $ok = $ae->extract or die $ae->error;
  77
  78     ### files from the archive ###
  79     my $files   = $ae->files;
  80
  81     ### dir that was extracted to ###
  82     my $outdir  = $ae->extract_path;
  83
  84
  85     ### quick check methods ###
  86     $ae->is_tar     # is it a .tar file?
  87     $ae->is_tgz     # is it a .tar.gz or .tgz file?
  88     $ae->is_gz;     # is it a .gz file?
  89     $ae->is_zip;    # is it a .zip file?
  90     $ae->is_bz2;    # is it a .bz2 file?
  91     $ae->is_tbz;    # is it a .tar.bz2 or .tbz file?
  92     $ae->is_lzma;   # is it a .lzma file?
  93
  94     ### absolute path to the archive you provided ###
  95     $ae->archive;
  96
  97     ### commandline tools, if found ###
  98     $ae->bin_tar     # path to /bin/tar, if found
  99     $ae->bin_gzip    # path to /bin/gzip, if found
 100     $ae->bin_unzip   # path to /bin/unzip, if found
 101     $ae->bin_bunzip2 # path to /bin/bunzip2 if found
 102     $ae->bin_unlzma  # path to /bin/unlzma if found
 103
 104 =head1 DESCRIPTION
 105
 106 Archive::Extract is a generic archive extraction mechanism.
 107
 108 It allows you to extract any archive file of the type .tar, .tar.gz,
 109 .gz, .Z, tar.bz2, .tbz, .bz2, .zip or .lzma without having to worry how it
 110 does so, or use different interfaces for each type by using either
 111 perl modules, or commandline tools on your system.
 112
 113 See the C<HOW IT WORKS> section further down for details.
 114
 115 =cut
 116
 117
 118 ### see what /bin/programs are available ###
 119 $PROGRAMS = {};
 120 for my $pgm (qw[tar unzip gzip bunzip2 uncompress unlzma]) {
 121     $PROGRAMS->{$pgm} = can_run($pgm);
 122 }
 123
 124 ### mapping from types to extractor methods ###
 125 my $Mapping = {  # binary program           # pure perl module
 126     is_tgz  => { bin => '_untar_bin',       pp => '_untar_at'   },
 127     is_tar  => { bin => '_untar_bin',       pp => '_untar_at'   },
 128     is_gz   => { bin => '_gunzip_bin',      pp => '_gunzip_cz'  },
 129     is_zip  => { bin => '_unzip_bin',       pp => '_unzip_az'   },
 130     is_tbz  => { bin => '_untar_bin',       pp => '_untar_at'   },
 131     is_bz2  => { bin => '_bunzip2_bin',     pp => '_bunzip2_bz2'},
 132     is_Z    => { bin => '_uncompress_bin',  pp => '_gunzip_cz'  },
 133     is_lzma => { bin => '_unlzma_bin',      pp => '_unlzma_cz'  },
 134 };
 135
 136 {   ### use subs so we re-generate array refs etc for the no-overide flags
 137     ### if we don't, then we reuse the same arrayref, meaning objects store
 138     ### previous errors
 139     my $tmpl = {
 140         archive         => sub { { required => 1, allow => FILE_EXISTS }    },
 141         type            => sub { { default => '', allow => [ @Types ] }     },
 142         _error_msg      => sub { { no_override => 1, default => [] }        },
 143         _error_msg_long => sub { { no_override => 1, default => [] }        },
 144     };
 145
 146     ### build accesssors ###
 147     for my $method( keys %$tmpl,
 148                     qw[_extractor _gunzip_to files extract_path],
 149     ) {
 150         no strict 'refs';
 151         *$method = sub {
 152                         my $self = shift;
 153                         $self->{$method} = $_[0] if @_;
 154                         return $self->{$method};
 155                     }
 156     }
 157
 158 =head1 METHODS
 159
 160 =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
 161
 162 Creates a new C<Archive::Extract> object based on the archive file you
 163 passed it. Automatically determines the type of archive based on the
 164 extension, but you can override that by explicitly providing the
 165 C<type> argument.
 166
 167 Valid values for C<type> are:
 168
 169 =over 4
 170
 171 =item tar
 172
 173 Standard tar files, as produced by, for example, C</bin/tar>.
 174 Corresponds to a C<.tar> suffix.
 175
 176 =item tgz
 177
 178 Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
 179 Corresponds to a C<.tgz> or C<.tar.gz> suffix.
 180
 181 =item gz
 182
 183 Gzip compressed file, as produced by, for example C</bin/gzip>.
 184 Corresponds to a C<.gz> suffix.
 185
 186 =item Z
 187
 188 Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
 189 Corresponds to a C<.Z> suffix.
 190
 191 =item zip
 192
 193 Zip compressed file, as produced by, for example C</bin/zip>.
 194 Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
 195
 196 =item bz2
 197
 198 Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
 199 Corresponds to a C<.bz2> suffix.
 200
 201 =item tbz
 202
 203 Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
 204 Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
 205
 206 =item lzma
 207
 208 Lzma compressed file, as produced by C</bin/lzma>.
 209 Corresponds to a C<.lzma> suffix.
 210
 211 =back
 212
 213 Returns a C<Archive::Extract> object on success, or false on failure.
 214
 215 =cut
 216
 217     ### constructor ###
 218     sub new {
 219         my $class   = shift;
 220         my %hash    = @_;
 221
 222         ### see above why we use subs here and generate the template;
 223         ### it's basically to not re-use arrayrefs
 224         my %utmpl   = map { $_ => $tmpl->{$_}->() } keys %$tmpl;
 225
 226         my $parsed = check( \%utmpl, \%hash ) or return;
 227
 228         ### make sure we have an absolute path ###
 229         my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
 230
 231         ### figure out the type, if it wasn't already specified ###
 232         unless ( $parsed->{type} ) {
 233             $parsed->{type} =
 234                 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i     ? TGZ   :
 235                 $ar =~ /.+?\.gz$/i                  ? GZ    :
 236                 $ar =~ /.+?\.tar$/i                 ? TAR   :
 237                 $ar =~ /.+?\.(zip|jar|par)$/i       ? ZIP   :
 238                 $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i ? TBZ   :
 239                 $ar =~ /.+?\.bz2$/i                 ? BZ2   :
 240                 $ar =~ /.+?\.Z$/                    ? Z     :
 241                 $ar =~ /.+?\.lzma$/                 ? LZMA  :
 242                 '';
 243
 244         }
 245
 246         bless $parsed, $class;
 247
 248         ### don't know what type of file it is
 249         ### XXX this *has* to be an object call, not a package call
 250         return $parsed->_error(loc("Cannot determine file type for '%1'",
 251                                 $parsed->{archive} )) unless $parsed->{type};
 252         return $parsed;
 253     }
 254 }
 255
 256 =head2 $ae->extract( [to => '/output/path'] )
 257
 258 Extracts the archive represented by the C<Archive::Extract> object to
 259 the path of your choice as specified by the C<to> argument. Defaults to
 260 C<cwd()>.
 261
 262 Since C<.gz> files never hold a directory, but only a single file; if
 263 the C<to> argument is an existing directory, the file is extracted
 264 there, with its C<.gz> suffix stripped.
 265 If the C<to> argument is not an existing directory, the C<to> argument
 266 is understood to be a filename, if the archive type is C<gz>.
 267 In the case that you did not specify a C<to> argument, the output
 268 file will be the name of the archive file, stripped from its C<.gz>
 269 suffix, in the current working directory.
 270
 271 C<extract> will try a pure perl solution first, and then fall back to
 272 commandline tools if they are available. See the C<GLOBAL VARIABLES>
 273 section below on how to alter this behaviour.
 274
 275 It will return true on success, and false on failure.
 276
 277 On success, it will also set the follow attributes in the object:
 278
 279 =over 4
 280
 281 =item $ae->extract_path
 282
 283 This is the directory that the files where extracted to.
 284
 285 =item $ae->files
 286
 287 This is an array ref with the paths of all the files in the archive,
 288 relative to the C<to> argument you specified.
 289 To get the full path to an extracted file, you would use:
 290
 291     File::Spec->catfile( $to, $ae->files->[0] );
 292
 293 Note that all files from a tar archive will be in unix format, as per
 294 the tar specification.
 295
 296 =back
 297
 298 =cut
 299
 300 sub extract {
 301     my $self = shift;
 302     my %hash = @_;
 303
 304     ### reset error messages
 305     $self->_error_msg( [] );
 306     $self->_error_msg_long( [] );
 307
 308     my $to;
 309     my $tmpl = {
 310         to  => { default => '.', store => \$to }
 311     };
 312
 313     check( $tmpl, \%hash ) or return;
 314
 315     ### so 'to' could be a file or a dir, depending on whether it's a .gz
 316     ### file, or basically anything else.
 317     ### so, check that, then act accordingly.
 318     ### set an accessor specifically so _gunzip can know what file to extract
 319     ### to.
 320     my $dir;
 321     {   ### a foo.gz file
 322         if( $self->is_gz or $self->is_bz2 or $self->is_Z or $self->is_lzma ) {
 323
 324             my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z|lzma)$//i;
 325
 326             ### to is a dir?
 327             if ( -d $to ) {
 328                 $dir = $to;
 329                 $self->_gunzip_to( basename($cp) );
 330
 331             ### then it's a filename
 332             } else {
 333                 $dir = dirname($to);
 334                 $self->_gunzip_to( basename($to) );
 335             }
 336
 337         ### not a foo.gz file
 338         } else {
 339             $dir = $to;
 340         }
 341     }
 342
 343     ### make the dir if it doesn't exist ###
 344     unless( -d $dir ) {
 345         eval { mkpath( $dir ) };
 346
 347         return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
 348             if $@;
 349     }
 350
 351     ### get the current dir, to restore later ###
 352     my $cwd = cwd();
 353
 354     my $ok = 1;
 355     EXTRACT: {
 356
 357         ### chdir to the target dir ###
 358         unless( chdir $dir ) {
 359             $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
 360             $ok = 0; last EXTRACT;
 361         }
 362
 363         ### set files to an empty array ref, so there's always an array
 364         ### ref IN the accessor, to avoid errors like:
 365         ### Can't use an undefined value as an ARRAY reference at
 366         ### ../lib/Archive/Extract.pm line 742. (rt #19815)
 367         $self->files( [] );
 368
 369         ### find out the dispatch methods needed for this type of
 370         ### archive. Do a $self->is_XXX to figure out the type, then
 371         ### get the hashref with bin + pure perl dispatchers.
 372         my ($map) = map { $Mapping->{$_} } grep { $self->$_ } keys %$Mapping;
 373
 374         ### add pure perl extractor if allowed & add bin extractor if allowed
 375         my @methods;
 376         push @methods, $map->{'pp'}  if $_ALLOW_PURE_PERL;
 377         push @methods, $map->{'bin'} if $_ALLOW_BIN;
 378
 379         ### reverse it if we prefer bin extractors
 380         @methods = reverse @methods if $PREFER_BIN;
 381
 382         my($na, $fail);
 383         for my $method (@methods) {
 384             print "# Extracting with ->$method\n" if $DEBUG;
 385
 386             my $rv = $self->$method;
 387
 388             ### a positive extraction
 389             if( $rv and $rv ne METHOD_NA ) {
 390                 print "# Extraction succeeded\n" if $DEBUG;
 391                 $self->_extractor($method);
 392                 last;
 393
 394             ### method is not available
 395             } elsif ( $rv and $rv eq METHOD_NA ) {
 396                 print "# Extraction method not available\n" if $DEBUG;
 397                 $na++;
 398             } else {
 399                 print "# Extraction method failed\n" if $DEBUG;
 400                 $fail++;
 401             }
 402         }
 403
 404         ### warn something went wrong if we didn't get an extractor
 405         unless( $self->_extractor ) {
 406             my $diag = $fail ? loc("Extract failed due to errors") :
 407                        $na   ? loc("Extract failed; no extractors available") :
 408                        '';
 409
 410             $self->_error($diag);
 411             $ok = 0;
 412         }
 413     }
 414
 415     ### and chdir back ###
 416     unless( chdir $cwd ) {
 417         $self->_error(loc("Could not chdir back to start dir '%1': %2'",
 418                             $cwd, $!));
 419     }
 420
 421     return $ok;
 422 }
 423
 424 =pod
 425
 426 =head1 ACCESSORS
 427
 428 =head2 $ae->error([BOOL])
 429
 430 Returns the last encountered error as string.
 431 Pass it a true value to get the C<Carp::longmess()> output instead.
 432
 433 =head2 $ae->extract_path
 434
 435 This is the directory the archive got extracted to.
 436 See C<extract()> for details.
 437
 438 =head2 $ae->files
 439
 440 This is an array ref holding all the paths from the archive.
 441 See C<extract()> for details.
 442
 443 =head2 $ae->archive
 444
 445 This is the full path to the archive file represented by this
 446 C<Archive::Extract> object.
 447
 448 =head2 $ae->type
 449
 450 This is the type of archive represented by this C<Archive::Extract>
 451 object. See accessors below for an easier way to use this.
 452 See the C<new()> method for details.
 453
 454 =head2 $ae->types
 455
 456 Returns a list of all known C<types> for C<Archive::Extract>'s
 457 C<new> method.
 458
 459 =cut
 460
 461 sub types { return @Types }
 462
 463 =head2 $ae->is_tgz
 464
 465 Returns true if the file is of type C<.tar.gz>.
 466 See the C<new()> method for details.
 467
 468 =head2 $ae->is_tar
 469
 470 Returns true if the file is of type C<.tar>.
 471 See the C<new()> method for details.
 472
 473 =head2 $ae->is_gz
 474
 475 Returns true if the file is of type C<.gz>.
 476 See the C<new()> method for details.
 477
 478 =head2 $ae->is_Z
 479
 480 Returns true if the file is of type C<.Z>.
 481 See the C<new()> method for details.
 482
 483 =head2 $ae->is_zip
 484
 485 Returns true if the file is of type C<.zip>.
 486 See the C<new()> method for details.
 487
 488 =head2 $ae->is_lzma
 489
 490 Returns true if the file is of type C<.lzma>.
 491 See the C<new()> method for details.
 492
 493 =cut
 494
 495 ### quick check methods ###
 496 sub is_tgz  { return $_[0]->type eq TGZ }
 497 sub is_tar  { return $_[0]->type eq TAR }
 498 sub is_gz   { return $_[0]->type eq GZ  }
 499 sub is_zip  { return $_[0]->type eq ZIP }
 500 sub is_tbz  { return $_[0]->type eq TBZ }
 501 sub is_bz2  { return $_[0]->type eq BZ2 }
 502 sub is_Z    { return $_[0]->type eq Z   }
 503 sub is_lzma { return $_[0]->type eq LZMA }
 504
 505 =pod
 506
 507 =head2 $ae->bin_tar
 508
 509 Returns the full path to your tar binary, if found.
 510
 511 =head2 $ae->bin_gzip
 512
 513 Returns the full path to your gzip binary, if found
 514
 515 =head2 $ae->bin_unzip
 516
 517 Returns the full path to your unzip binary, if found
 518
 519 =head2 $ae->bin_unlzma
 520
 521 Returns the full path to your unlzma binary, if found
 522
 523 =cut
 524
 525 ### paths to commandline tools ###
 526 sub bin_gzip        { return $PROGRAMS->{'gzip'}    if $PROGRAMS->{'gzip'}  }
 527 sub bin_unzip       { return $PROGRAMS->{'unzip'}   if $PROGRAMS->{'unzip'} }
 528 sub bin_tar         { return $PROGRAMS->{'tar'}     if $PROGRAMS->{'tar'}   }
 529 sub bin_bunzip2     { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
 530 sub bin_uncompress  { return $PROGRAMS->{'uncompress'}
 531                                                  if $PROGRAMS->{'uncompress'} }
 532 sub bin_unlzma      { return $PROGRAMS->{'unlzma'}  if $PROGRAMS->{'unlzma'} }
 533
 534 =head2 $bool = $ae->have_old_bunzip2
 535
 536 Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
 537 require all archive names to end in C<.bz2> or it will not extract
 538 them. This method checks if you have a recent version of C<bunzip2>
 539 that allows any extension, or an older one that doesn't.
 540
 541 =cut
 542
 543 sub have_old_bunzip2 {
 544     my $self = shift;
 545
 546     ### no bunzip2? no old bunzip2 either :)
 547     return unless $self->bin_bunzip2;
 548
 549     ### if we can't run this, we can't be sure if it's too old or not
 550     ### XXX stupid stupid stupid bunzip2 doesn't understand --version
 551     ### is not a request to extract data:
 552     ### $ bunzip2 --version
 553     ### bzip2, a block-sorting file compressor.  Version 1.0.2, 30-Dec-2001.
 554     ### [...]
 555     ### bunzip2: I won't read compressed data from a terminal.
 556     ### bunzip2: For help, type: `bunzip2 --help'.
 557     ### $ echo $?
 558     ### 1
 559     ### HATEFUL!
 560
 561     ### double hateful: bunzip2 --version also hangs if input is a pipe
 562     ### See #32370: Archive::Extract will hang if stdin is a pipe [+PATCH]
 563     ### So, we have to provide *another* argument which is a fake filename,
 564     ### just so it wont try to read from stdin to print its version..
 565     ### *sigh*
 566     ### Even if the file exists, it won't clobber or change it.
 567     my $buffer;
 568     scalar run(
 569          command => [$self->bin_bunzip2, '--version', 'NoSuchFile'],
 570          verbose => 0,
 571          buffer  => \$buffer
 572     );
 573
 574     ### no output
 575     return unless $buffer;
 576
 577     my ($version) = $buffer =~ /version \s+ (\d+)/ix;
 578
 579     return 1 if $version < 1;
 580     return;
 581 }
 582
 583 #################################
 584 #
 585 # Untar code
 586 #
 587 #################################
 588
 589 ### annoying issue with (gnu) tar on win32, as illustrated by this
 590 ### bug: https://rt.cpan.org/Ticket/Display.html?id=40138
 591 ### which shows that (gnu) tar will interpret a file name with a :
 592 ### in it as a remote file name, so C:\tmp\foo.txt is interpreted
 593 ### as a remote shell, and the extract fails.
 594 {   my @ExtraTarFlags;
 595     if( ON_WIN32 and my $cmd = __PACKAGE__->bin_tar ) {
 596
 597         ### if this is gnu tar we are running, we need to use --force-local
 598         push @ExtraTarFlags, '--force-local' if `$cmd --version` =~ /gnu tar/i;
 599     }
 600
 601
 602     ### use /bin/tar to extract ###
 603     sub _untar_bin {
 604         my $self = shift;
 605
 606         ### check for /bin/tar ###
 607         ### check for /bin/gzip if we need it ###
 608         ### if any of the binaries are not available, return NA
 609         {   my $diag =  not $self->bin_tar ?
 610                             loc("No '%1' program found", '/bin/tar') :
 611                         $self->is_tgz && !$self->bin_gzip ?
 612                             loc("No '%1' program found", '/bin/gzip') :
 613                         $self->is_tbz && !$self->bin_bunzip2 ?
 614                             loc("No '%1' program found", '/bin/bunzip2') :
 615                         '';
 616
 617             if( $diag ) {
 618                 $self->_error( $diag );
 619                 return METHOD_NA;
 620             }
 621         }
 622
 623         ### XXX figure out how to make IPC::Run do this in one call --
 624         ### currently i don't know how to get output of a command after a pipe
 625         ### trapped in a scalar. Mailed barries about this 5th of june 2004.
 626
 627         ### see what command we should run, based on whether
 628         ### it's a .tgz or .tar
 629
 630         ### XXX solaris tar and bsdtar are having different outputs
 631         ### depending whether you run with -x or -t
 632         ### compensate for this insanity by running -t first, then -x
 633         {    my $cmd =
 634                 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
 635                                  $self->bin_tar, '-tf', '-'] :
 636                 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
 637                                  $self->bin_tar, '-tf', '-'] :
 638                 [$self->bin_tar, @ExtraTarFlags, '-tf', $self->archive];
 639
 640             ### run the command ###
 641             my $buffer = '';
 642             unless( scalar run( command => $cmd,
 643                                 buffer  => \$buffer,
 644                                 verbose => $DEBUG )
 645             ) {
 646                 return $self->_error(loc(
 647                                 "Error listing contents of archive '%1': %2",
 648                                 $self->archive, $buffer ));
 649             }
 650
 651             ### no buffers available?
 652             if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 653                 $self->_error( $self->_no_buffer_files( $self->archive ) );
 654
 655             } else {
 656                 ### if we're on solaris we /might/ be using /bin/tar, which has
 657                 ### a weird output format... we might also be using
 658                 ### /usr/local/bin/tar, which is gnu tar, which is perfectly
 659                 ### fine... so we have to do some guessing here =/
 660                 my @files = map { chomp;
 661                               !ON_SOLARIS ? $_
 662                                           : (m|^ x \s+  # 'xtract' -- sigh
 663                                                 (.+?),  # the actual file name
 664                                                 \s+ [\d,.]+ \s bytes,
 665                                                 \s+ [\d,.]+ \s tape \s blocks
 666                                             |x ? $1 : $_);
 667
 668                         } split $/, $buffer;
 669
 670                 ### store the files that are in the archive ###
 671                 $self->files(\@files);
 672             }
 673         }
 674
 675         ### now actually extract it ###
 676         {   my $cmd =
 677                 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
 678                                  $self->bin_tar, '-xf', '-'] :
 679                 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
 680                                  $self->bin_tar, '-xf', '-'] :
 681                 [$self->bin_tar, @ExtraTarFlags, '-xf', $self->archive];
 682
 683             my $buffer = '';
 684             unless( scalar run( command => $cmd,
 685                                 buffer  => \$buffer,
 686                                 verbose => $DEBUG )
 687             ) {
 688                 return $self->_error(loc("Error extracting archive '%1': %2",
 689                                 $self->archive, $buffer ));
 690             }
 691
 692             ### we might not have them, due to lack of buffers
 693             if( $self->files ) {
 694                 ### now that we've extracted, figure out where we extracted to
 695                 my $dir = $self->__get_extract_dir( $self->files );
 696
 697                 ### store the extraction dir ###
 698                 $self->extract_path( $dir );
 699             }
 700         }
 701
 702         ### we got here, no error happened
 703         return 1;
 704     }
 705 }
 706
 707
 708 ### use archive::tar to extract ###
 709 sub _untar_at {
 710     my $self = shift;
 711
 712     ### Loading Archive::Tar is going to set it to 1, so make it local
 713     ### within this block, starting with its initial value. Whatever
 714     ### Achive::Tar does will be undone when we return.
 715     ###
 716     ### Also, later, set $Archive::Tar::WARN to $Archive::Extract::WARN
 717     ### so users don't have to even think about this variable. If they
 718     ### do, they still get their set value outside of this call.
 719     local $Archive::Tar::WARN = $Archive::Tar::WARN;
 720
 721     ### we definitely need Archive::Tar, so load that first
 722     {   my $use_list = { 'Archive::Tar' => '0.0' };
 723
 724         unless( can_load( modules => $use_list ) ) {
 725
 726             $self->_error(loc("You do not have '%1' installed - " .
 727                               "Please install it as soon as possible.",
 728                               'Archive::Tar'));
 729
 730             return METHOD_NA;
 731         }
 732     }
 733
 734     ### we might pass it a filehandle if it's a .tbz file..
 735     my $fh_to_read = $self->archive;
 736
 737     ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
 738     ### if A::T's version is 0.99 or higher
 739     if( $self->is_tgz ) {
 740         my $use_list = { 'Compress::Zlib' => '0.0' };
 741            $use_list->{ 'IO::Zlib' } = '0.0'
 742                 if $Archive::Tar::VERSION >= '0.99';
 743
 744         unless( can_load( modules => $use_list ) ) {
 745             my $which = join '/', sort keys %$use_list;
 746
 747             $self->_error(loc(
 748                 "You do not have '%1' installed - Please ".
 749                 "install it as soon as possible.", $which)
 750             );
 751
 752             return METHOD_NA;
 753         }
 754
 755     } elsif ( $self->is_tbz ) {
 756         my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
 757         unless( can_load( modules => $use_list ) ) {
 758             $self->_error(loc(
 759                 "You do not have '%1' installed - Please " .
 760                 "install it as soon as possible.",
 761                 'IO::Uncompress::Bunzip2')
 762             );
 763
 764             return METHOD_NA;
 765         }
 766
 767         my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
 768             return $self->_error(loc("Unable to open '%1': %2",
 769                             $self->archive,
 770                             $IO::Uncompress::Bunzip2::Bunzip2Error));
 771
 772         $fh_to_read = $bz;
 773     }
 774
 775     ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've
 776     ### localized $Archive::Tar::WARN already.
 777     $Archive::Tar::WARN = $Archive::Extract::WARN;
 778
 779     my $tar = Archive::Tar->new();
 780
 781     ### only tell it it's compressed if it's a .tgz, as we give it a file
 782     ### handle if it's a .tbz
 783     unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) {
 784         return $self->_error(loc("Unable to read '%1': %2", $self->archive,
 785                                     $Archive::Tar::error));
 786     }
 787
 788     ### workaround to prevent Archive::Tar from setting uid, which
 789     ### is a potential security hole. -autrijus
 790     ### have to do it here, since A::T needs to be /loaded/ first ###
 791     {   no strict 'refs'; local $^W;
 792
 793         ### older versions of archive::tar <= 0.23
 794         *Archive::Tar::chown = sub {};
 795     }
 796
 797     ### for version of Archive::Tar > 1.04
 798     local $Archive::Tar::CHOWN = 0;
 799
 800     {   local $^W;  # quell 'splice() offset past end of array' warnings
 801                     # on older versions of A::T
 802
 803         ### older archive::tar always returns $self, return value slightly
 804         ### fux0r3d because of it.
 805         $tar->extract()
 806             or return $self->_error(loc("Unable to extract '%1': %2",
 807                                     $self->archive, $Archive::Tar::error ));
 808     }
 809
 810     my @files   = $tar->list_files;
 811     my $dir     = $self->__get_extract_dir( \@files );
 812
 813     ### store the files that are in the archive ###
 814     $self->files(\@files);
 815
 816     ### store the extraction dir ###
 817     $self->extract_path( $dir );
 818
 819     ### check if the dir actually appeared ###
 820     return 1 if -d $self->extract_path;
 821
 822     ### no dir, we failed ###
 823     return $self->_error(loc("Unable to extract '%1': %2",
 824                                 $self->archive, $Archive::Tar::error ));
 825 }
 826
 827 #################################
 828 #
 829 # Gunzip code
 830 #
 831 #################################
 832
 833 sub _gunzip_bin {
 834     my $self = shift;
 835
 836     ### check for /bin/gzip -- we need it ###
 837     unless( $self->bin_gzip ) {
 838         $self->_error(loc("No '%1' program found", '/bin/gzip'));
 839         return METHOD_NA;
 840     }
 841
 842     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
 843         return $self->_error(loc("Could not open '%1' for writing: %2",
 844                             $self->_gunzip_to, $! ));
 845
 846     my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
 847
 848     my $buffer;
 849     unless( scalar run( command => $cmd,
 850                         verbose => $DEBUG,
 851                         buffer  => \$buffer )
 852     ) {
 853         return $self->_error(loc("Unable to gunzip '%1': %2",
 854                                     $self->archive, $buffer));
 855     }
 856
 857     ### no buffers available?
 858     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 859         $self->_error( $self->_no_buffer_content( $self->archive ) );
 860     }
 861
 862     print $fh $buffer if defined $buffer;
 863
 864     close $fh;
 865
 866     ### set what files where extract, and where they went ###
 867     $self->files( [$self->_gunzip_to] );
 868     $self->extract_path( File::Spec->rel2abs(cwd()) );
 869
 870     return 1;
 871 }
 872
 873 sub _gunzip_cz {
 874     my $self = shift;
 875
 876     my $use_list = { 'Compress::Zlib' => '0.0' };
 877     unless( can_load( modules => $use_list ) ) {
 878         $self->_error(loc("You do not have '%1' installed - Please " .
 879                     "install it as soon as possible.", 'Compress::Zlib'));
 880         return METHOD_NA;
 881     }
 882
 883     my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
 884                 return $self->_error(loc("Unable to open '%1': %2",
 885                             $self->archive, $Compress::Zlib::gzerrno));
 886
 887     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
 888         return $self->_error(loc("Could not open '%1' for writing: %2",
 889                             $self->_gunzip_to, $! ));
 890
 891     my $buffer;
 892     $fh->print($buffer) while $gz->gzread($buffer) > 0;
 893     $fh->close;
 894
 895     ### set what files where extract, and where they went ###
 896     $self->files( [$self->_gunzip_to] );
 897     $self->extract_path( File::Spec->rel2abs(cwd()) );
 898
 899     return 1;
 900 }
 901
 902 #################################
 903 #
 904 # Uncompress code
 905 #
 906 #################################
 907
 908 sub _uncompress_bin {
 909     my $self = shift;
 910
 911     ### check for /bin/gzip -- we need it ###
 912     unless( $self->bin_uncompress ) {
 913         $self->_error(loc("No '%1' program found", '/bin/uncompress'));
 914         return METHOD_NA;
 915     }
 916
 917     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
 918         return $self->_error(loc("Could not open '%1' for writing: %2",
 919                             $self->_gunzip_to, $! ));
 920
 921     my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
 922
 923     my $buffer;
 924     unless( scalar run( command => $cmd,
 925                         verbose => $DEBUG,
 926                         buffer  => \$buffer )
 927     ) {
 928         return $self->_error(loc("Unable to uncompress '%1': %2",
 929                                     $self->archive, $buffer));
 930     }
 931
 932     ### no buffers available?
 933     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 934         $self->_error( $self->_no_buffer_content( $self->archive ) );
 935     }
 936
 937     print $fh $buffer if defined $buffer;
 938
 939     close $fh;
 940
 941     ### set what files where extract, and where they went ###
 942     $self->files( [$self->_gunzip_to] );
 943     $self->extract_path( File::Spec->rel2abs(cwd()) );
 944
 945     return 1;
 946 }
 947
 948
 949 #################################
 950 #
 951 # Unzip code
 952 #
 953 #################################
 954
 955
 956 sub _unzip_bin {
 957     my $self = shift;
 958
 959     ### check for /bin/gzip if we need it ###
 960     unless( $self->bin_unzip ) {
 961         $self->_error(loc("No '%1' program found", '/bin/unzip'));
 962         return METHOD_NA;
 963     }
 964
 965     ### first, get the files.. it must be 2 different commands with 'unzip' :(
 966     {   ### on VMS, capital letter options have to be quoted. This is
 967         ### peported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
 968         ### Subject: [patch@31735]Archive Extract fix on VMS.
 969         my $opt = ON_VMS ? '"-Z"' : '-Z';
 970         my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
 971
 972         my $buffer = '';
 973         unless( scalar run( command => $cmd,
 974                             verbose => $DEBUG,
 975                             buffer  => \$buffer )
 976         ) {
 977             return $self->_error(loc("Unable to unzip '%1': %2",
 978                                         $self->archive, $buffer));
 979         }
 980
 981         ### no buffers available?
 982         if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 983             $self->_error( $self->_no_buffer_files( $self->archive ) );
 984
 985         } else {
 986             $self->files( [split $/, $buffer] );
 987         }
 988     }
 989
 990     ### now, extract the archive ###
 991     {   my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
 992
 993         my $buffer;
 994         unless( scalar run( command => $cmd,
 995                             verbose => $DEBUG,
 996                             buffer  => \$buffer )
 997         ) {
 998             return $self->_error(loc("Unable to unzip '%1': %2",
 999                                         $self->archive, $buffer));
1000         }
1001
1002         if( scalar @{$self->files} ) {
1003             my $files   = $self->files;
1004             my $dir     = $self->__get_extract_dir( $files );
1005
1006             $self->extract_path( $dir );
1007         }
1008     }
1009
1010     return 1;
1011 }
1012
1013 sub _unzip_az {
1014     my $self = shift;
1015
1016     my $use_list = { 'Archive::Zip' => '0.0' };
1017     unless( can_load( modules => $use_list ) ) {
1018         $self->_error(loc("You do not have '%1' installed - Please " .
1019                       "install it as soon as possible.", 'Archive::Zip'));
1020         return METHOD_NA;
1021     }
1022
1023     my $zip = Archive::Zip->new();
1024
1025     unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
1026         return $self->_error(loc("Unable to read '%1'", $self->archive));
1027     }
1028
1029     my @files;
1030
1031
1032     ### Address: #43278: Explicitly tell Archive::Zip where to put the files:
1033     ### "In my BackPAN indexing, Archive::Zip was extracting things
1034     ### in my script's directory instead of the current working directory.
1035     ### I traced this back through Archive::Zip::_asLocalName which
1036     ### eventually calls File::Spec::Win32::rel2abs which on Windows might
1037     ### call Cwd::getdcwd. getdcwd returns the wrong directory in my
1038     ### case, even though I think I'm on the same drive.
1039     ###
1040     ### To fix this, I pass the optional second argument to
1041     ### extractMember using the cwd from Archive::Extract." --bdfoy
1042
1043     ## store cwd() before looping; calls to cwd() can be expensive, and
1044     ### it won't change during the loop
1045     my $extract_dir = cwd();
1046
1047     ### have to extract every member individually ###
1048     for my $member ($zip->members) {
1049         push @files, $member->{fileName};
1050
1051         ### file to extact to, to avoid the above problem
1052         my $to = File::Spec->catfile( $extract_dir, $member->{fileName} );
1053
1054         unless( $zip->extractMember($member, $to) == &Archive::Zip::AZ_OK ) {
1055             return $self->_error(loc("Extraction of '%1' from '%2' failed",
1056                         $member->{fileName}, $self->archive ));
1057         }
1058     }
1059
1060     my $dir = $self->__get_extract_dir( \@files );
1061
1062     ### set what files where extract, and where they went ###
1063     $self->files( \@files );
1064     $self->extract_path( File::Spec->rel2abs($dir) );
1065
1066     return 1;
1067 }
1068
1069 sub __get_extract_dir {
1070     my $self    = shift;
1071     my $files   = shift || [];
1072
1073     return unless scalar @$files;
1074
1075     my($dir1, $dir2);
1076     for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
1077         my($dir,$pos) = @$aref;
1078
1079         ### add a catdir(), so that any trailing slashes get
1080         ### take care of (removed)
1081         ### also, a catdir() normalises './dir/foo' to 'dir/foo';
1082         ### which was the problem in bug #23999
1083         my $res = -d $files->[$pos]
1084                     ? File::Spec->catdir( $files->[$pos], '' )
1085                     : File::Spec->catdir( dirname( $files->[$pos] ) );
1086
1087         $$dir = $res;
1088     }
1089
1090     ### if the first and last dir don't match, make sure the
1091     ### dirname is not set wrongly
1092     my $dir;
1093
1094     ### dirs are the same, so we know for sure what the extract dir is
1095     if( $dir1 eq $dir2 ) {
1096         $dir = $dir1;
1097
1098     ### dirs are different.. do they share the base dir?
1099     ### if so, use that, if not, fall back to '.'
1100     } else {
1101         my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1102         my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
1103
1104         $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1105     }
1106
1107     return File::Spec->rel2abs( $dir );
1108 }
1109
1110 #################################
1111 #
1112 # Bunzip2 code
1113 #
1114 #################################
1115
1116 sub _bunzip2_bin {
1117     my $self = shift;
1118
1119     ### check for /bin/gzip -- we need it ###
1120     unless( $self->bin_bunzip2 ) {
1121         $self->_error(loc("No '%1' program found", '/bin/bunzip2'));
1122         return METHOD_NA;
1123     }
1124
1125     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1126         return $self->_error(loc("Could not open '%1' for writing: %2",
1127                             $self->_gunzip_to, $! ));
1128
1129     ### guard against broken bunzip2. See ->have_old_bunzip2()
1130     ### for details
1131     if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1132         return $self->_error(loc("Your bunzip2 version is too old and ".
1133                                  "can only extract files ending in '%1'",
1134                                  '.bz2'));
1135     }
1136
1137     my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
1138
1139     my $buffer;
1140     unless( scalar run( command => $cmd,
1141                         verbose => $DEBUG,
1142                         buffer  => \$buffer )
1143     ) {
1144         return $self->_error(loc("Unable to bunzip2 '%1': %2",
1145                                     $self->archive, $buffer));
1146     }
1147
1148     ### no buffers available?
1149     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1150         $self->_error( $self->_no_buffer_content( $self->archive ) );
1151     }
1152
1153     print $fh $buffer if defined $buffer;
1154
1155     close $fh;
1156
1157     ### set what files where extract, and where they went ###
1158     $self->files( [$self->_gunzip_to] );
1159     $self->extract_path( File::Spec->rel2abs(cwd()) );
1160
1161     return 1;
1162 }
1163
1164 ### using cz2, the compact versions... this we use mainly in archive::tar
1165 ### extractor..
1166 # sub _bunzip2_cz1 {
1167 #     my $self = shift;
1168 #
1169 #     my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1170 #     unless( can_load( modules => $use_list ) ) {
1171 #         return $self->_error(loc("You do not have '%1' installed - Please " .
1172 #                         "install it as soon as possible.",
1173 #                         'IO::Uncompress::Bunzip2'));
1174 #     }
1175 #
1176 #     my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1177 #                 return $self->_error(loc("Unable to open '%1': %2",
1178 #                             $self->archive,
1179 #                             $IO::Uncompress::Bunzip2::Bunzip2Error));
1180 #
1181 #     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1182 #         return $self->_error(loc("Could not open '%1' for writing: %2",
1183 #                             $self->_gunzip_to, $! ));
1184 #
1185 #     my $buffer;
1186 #     $fh->print($buffer) while $bz->read($buffer) > 0;
1187 #     $fh->close;
1188 #
1189 #     ### set what files where extract, and where they went ###
1190 #     $self->files( [$self->_gunzip_to] );
1191 #     $self->extract_path( File::Spec->rel2abs(cwd()) );
1192 #
1193 #     return 1;
1194 # }
1195
1196 sub _bunzip2_bz2 {
1197     my $self = shift;
1198
1199     my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1200     unless( can_load( modules => $use_list ) ) {
1201         $self->_error(loc("You do not have '%1' installed - Please " .
1202                           "install it as soon as possible.",
1203                           'IO::Uncompress::Bunzip2'));
1204         return METHOD_NA;
1205     }
1206
1207     IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1208         or return $self->_error(loc("Unable to uncompress '%1': %2",
1209                             $self->archive,
1210                             $IO::Uncompress::Bunzip2::Bunzip2Error));
1211
1212     ### set what files where extract, and where they went ###
1213     $self->files( [$self->_gunzip_to] );
1214     $self->extract_path( File::Spec->rel2abs(cwd()) );
1215
1216     return 1;
1217 }
1218
1219
1220 #################################
1221 #
1222 # unlzma code
1223 #
1224 #################################
1225
1226 sub _unlzma_bin {
1227     my $self = shift;
1228
1229     ### check for /bin/unlzma -- we need it ###
1230     unless( $self->bin_unlzma ) {
1231         $self->_error(loc("No '%1' program found", '/bin/unlzma'));
1232         return METHOD_NA;
1233     }
1234
1235     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1236         return $self->_error(loc("Could not open '%1' for writing: %2",
1237                             $self->_gunzip_to, $! ));
1238
1239     my $cmd = [ $self->bin_unlzma, '-c', $self->archive ];
1240
1241     my $buffer;
1242     unless( scalar run( command => $cmd,
1243                         verbose => $DEBUG,
1244                         buffer  => \$buffer )
1245     ) {
1246         return $self->_error(loc("Unable to unlzma '%1': %2",
1247                                     $self->archive, $buffer));
1248     }
1249
1250     ### no buffers available?
1251     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1252         $self->_error( $self->_no_buffer_content( $self->archive ) );
1253     }
1254
1255     print $fh $buffer if defined $buffer;
1256
1257     close $fh;
1258
1259     ### set what files where extract, and where they went ###
1260     $self->files( [$self->_gunzip_to] );
1261     $self->extract_path( File::Spec->rel2abs(cwd()) );
1262
1263     return 1;
1264 }
1265
1266 sub _unlzma_cz {
1267     my $self = shift;
1268
1269     my $use_list = { 'Compress::unLZMA' => '0.0' };
1270     unless( can_load( modules => $use_list ) ) {
1271         $self->_error(loc("You do not have '%1' installed - Please " .
1272                     "install it as soon as possible.", 'Compress::unLZMA'));
1273         return METHOD_NA;
1274     }
1275
1276     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1277         return $self->_error(loc("Could not open '%1' for writing: %2",
1278                             $self->_gunzip_to, $! ));
1279
1280     my $buffer;
1281     $buffer = Compress::unLZMA::uncompressfile( $self->archive );
1282     unless ( defined $buffer ) {
1283         return $self->_error(loc("Could not unlzma '%1': %2",
1284                                     $self->archive, $@));
1285     }
1286
1287     print $fh $buffer if defined $buffer;
1288
1289     close $fh;
1290
1291     ### set what files where extract, and where they went ###
1292     $self->files( [$self->_gunzip_to] );
1293     $self->extract_path( File::Spec->rel2abs(cwd()) );
1294
1295     return 1;
1296 }
1297
1298 #################################
1299 #
1300 # Error code
1301 #
1302 #################################
1303
1304 sub _error {
1305     my $self    = shift;
1306     my $error   = shift;
1307     my $lerror  = Carp::longmess($error);
1308
1309     push @{$self->_error_msg},      $error;
1310     push @{$self->_error_msg_long}, $lerror;
1311
1312     ### set $Archive::Extract::WARN to 0 to disable printing
1313     ### of errors
1314     if( $WARN ) {
1315         carp $DEBUG ? $lerror : $error;
1316     }
1317
1318     return;
1319 }
1320
1321 sub error {
1322     my $self = shift;
1323
1324     ### make sure we have a fallback aref
1325     my $aref = do {
1326         shift()
1327             ? $self->_error_msg_long
1328             : $self->_error_msg
1329     } || [];
1330
1331     return join $/, @$aref;
1332 }
1333
1334 sub _no_buffer_files {
1335     my $self = shift;
1336     my $file = shift or return;
1337     return loc("No buffer captured, unable to tell ".
1338                "extracted files or extraction dir for '%1'", $file);
1339 }
1340
1341 sub _no_buffer_content {
1342     my $self = shift;
1343     my $file = shift or return;
1344     return loc("No buffer captured, unable to get content for '%1'", $file);
1345 }
1346 1;
1347
1348 =pod
1349
1350 =head1 HOW IT WORKS
1351
1352 C<Archive::Extract> tries first to determine what type of archive you
1353 are passing it, by inspecting its suffix. It does not do this by using
1354 Mime magic, or something related. See C<CAVEATS> below.
1355
1356 Once it has determined the file type, it knows which extraction methods
1357 it can use on the archive. It will try a perl solution first, then fall
1358 back to a commandline tool if that fails. If that also fails, it will
1359 return false, indicating it was unable to extract the archive.
1360 See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1361
1362 =head1 CAVEATS
1363
1364 =head2 File Extensions
1365
1366 C<Archive::Extract> trusts on the extension of the archive to determine
1367 what type it is, and what extractor methods therefore can be used. If
1368 your archives do not have any of the extensions as described in the
1369 C<new()> method, you will have to specify the type explicitly, or
1370 C<Archive::Extract> will not be able to extract the archive for you.
1371
1372 =head2 Supporting Very Large Files
1373
1374 C<Archive::Extract> can use either pure perl modules or command line
1375 programs under the hood. Some of the pure perl modules (like
1376 C<Archive::Tar> and Compress::unLZMA) take the entire contents of the archive into memory,
1377 which may not be feasible on your system. Consider setting the global
1378 variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1379 the use of command line programs and won't consume so much memory.
1380
1381 See the C<GLOBAL VARIABLES> section below for details.
1382
1383 =head2 Bunzip2 support of arbitrary extensions.
1384
1385 Older versions of C</bin/bunzip2> do not support arbitrary file
1386 extensions and insist on a C<.bz2> suffix. Although we do our best
1387 to guard against this, if you experience a bunzip2 error, it may
1388 be related to this. For details, please see the C<have_old_bunzip2>
1389 method.
1390
1391 =head1 GLOBAL VARIABLES
1392
1393 =head2 $Archive::Extract::DEBUG
1394
1395 Set this variable to C<true> to have all calls to command line tools
1396 be printed out, including all their output.
1397 This also enables C<Carp::longmess> errors, instead of the regular
1398 C<carp> errors.
1399
1400 Good for tracking down why things don't work with your particular
1401 setup.
1402
1403 Defaults to C<false>.
1404
1405 =head2 $Archive::Extract::WARN
1406
1407 This variable controls whether errors encountered internally by
1408 C<Archive::Extract> should be C<carp>'d or not.
1409
1410 Set to false to silence warnings. Inspect the output of the C<error()>
1411 method manually to see what went wrong.
1412
1413 Defaults to C<true>.
1414
1415 =head2 $Archive::Extract::PREFER_BIN
1416
1417 This variables controls whether C<Archive::Extract> should prefer the
1418 use of perl modules, or commandline tools to extract archives.
1419
1420 Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1421
1422 Defaults to C<false>.
1423
1424 =head1 TODO
1425
1426 =over 4
1427
1428 =item Mime magic support
1429
1430 Maybe this module should use something like C<File::Type> to determine
1431 the type, rather than blindly trust the suffix.
1432
1433 =back
1434
1435 =head1 BUG REPORTS
1436
1437 Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>.
1438
1439 =head1 AUTHOR
1440
1441 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1442
1443 =head1 COPYRIGHT
1444
1445 This library is free software; you may redistribute and/or modify it
1446 under the same terms as Perl itself.
1447
1448 =cut
1449
1450 # Local variables:
1451 # c-indentation-style: bsd
1452 # c-basic-offset: 4
1453 # indent-tabs-mode: nil
1454 # End:
1455 # vim: expandtab shiftwidth=4:
1456