lib/Archive/Extract.pm

   1 package Archive::Extract;
   2
   3 use strict;
   4
   5 use Cwd                         qw[cwd];
   6 use Carp                        qw[carp];
   7 use IPC::Cmd                    qw[run can_run];
   8 use FileHandle;
   9 use File::Path                  qw[mkpath];
  10 use File::Spec;
  11 use File::Basename              qw[dirname basename];
  12 use Params::Check               qw[check];
  13 use Module::Load::Conditional   qw[can_load check_install];
  14 use Locale::Maketext::Simple    Style => 'gettext';
  15
  16 ### solaris has silly /bin/tar output ###
  17 use constant ON_SOLARIS     => $^O eq 'solaris' ? 1 : 0;
  18 use constant FILE_EXISTS    => sub { -e $_[0] ? 1 : 0 };
  19
  20 ### If these are changed, update @TYPES and the new() POD
  21 use constant TGZ            => 'tgz';
  22 use constant TAR            => 'tar';
  23 use constant GZ             => 'gz';
  24 use constant ZIP            => 'zip';
  25 use constant BZ2            => 'bz2';
  26 use constant TBZ            => 'tbz';
  27
  28 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG];
  29
  30 $VERSION        = '0.18';
  31 $PREFER_BIN     = 0;
  32 $WARN           = 1;
  33 $DEBUG          = 0;
  34 my @Types       = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ ); # same as all constants
  35
  36 local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
  37
  38 =pod
  39
  40 =head1 NAME
  41
  42 Archive::Extract - A generic archive extracting mechanism
  43
  44 =head1 SYNOPSIS
  45
  46     use Archive::Extract;
  47
  48     ### build an Archive::Extract object ###
  49     my $ae = Archive::Extract->new( archive => 'foo.tgz' );
  50
  51     ### extract to cwd() ###
  52     my $ok = $ae->extract;
  53
  54     ### extract to /tmp ###
  55     my $ok = $ae->extract( to => '/tmp' );
  56
  57     ### what if something went wrong?
  58     my $ok = $ae->extract or die $ae->error;
  59
  60     ### files from the archive ###
  61     my $files   = $ae->files;
  62
  63     ### dir that was extracted to ###
  64     my $outdir  = $ae->extract_path;
  65
  66
  67     ### quick check methods ###
  68     $ae->is_tar     # is it a .tar file?
  69     $ae->is_tgz     # is it a .tar.gz or .tgz file?
  70     $ae->is_gz;     # is it a .gz file?
  71     $ae->is_zip;    # is it a .zip file?
  72     $ae->is_bz2;    # is it a .bz2 file?
  73     $ae->is_tbz;    # is it a .tar.bz2 or .tbz file?
  74
  75     ### absolute path to the archive you provided ###
  76     $ae->archive;
  77
  78     ### commandline tools, if found ###
  79     $ae->bin_tar     # path to /bin/tar, if found
  80     $ae->bin_gzip    # path to /bin/gzip, if found
  81     $ae->bin_unzip   # path to /bin/unzip, if found
  82     $ae->bin_bunzip2 # path to /bin/bunzip2 if found
  83
  84 =head1 DESCRIPTION
  85
  86 Archive::Extract is a generic archive extraction mechanism.
  87
  88 It allows you to extract any archive file of the type .tar, .tar.gz,
  89 .gz, tar.bz2, .tbz, .bz2 or .zip without having to worry how it does
  90 so, or use different interfaces for each type by using either perl
  91 modules, or commandline tools on your system.
  92
  93 See the C<HOW IT WORKS> section further down for details.
  94
  95 =cut
  96
  97
  98 ### see what /bin/programs are available ###
  99 $PROGRAMS = {};
 100 for my $pgm (qw[tar unzip gzip bunzip2]) {
 101     $PROGRAMS->{$pgm} = can_run($pgm);
 102 }
 103
 104 ### mapping from types to extractor methods ###
 105 my $Mapping = {
 106     is_tgz  => '_untar',
 107     is_tar  => '_untar',
 108     is_gz   => '_gunzip',
 109     is_zip  => '_unzip',
 110     is_tbz  => '_untar',
 111     is_bz2  => '_bunzip2',
 112 };
 113
 114 {
 115     my $tmpl = {
 116         archive => { required => 1, allow => FILE_EXISTS },
 117         type    => { default => '', allow => [ @Types ] },
 118     };
 119
 120     ### build accesssors ###
 121     for my $method( keys %$tmpl,
 122                     qw[_extractor _gunzip_to files extract_path],
 123                     qw[_error_msg _error_msg_long]
 124     ) {
 125         no strict 'refs';
 126         *$method = sub {
 127                         my $self = shift;
 128                         $self->{$method} = $_[0] if @_;
 129                         return $self->{$method};
 130                     }
 131     }
 132
 133 =head1 METHODS
 134
 135 =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
 136
 137 Creates a new C<Archive::Extract> object based on the archive file you
 138 passed it. Automatically determines the type of archive based on the
 139 extension, but you can override that by explicitly providing the
 140 C<type> argument.
 141
 142 Valid values for C<type> are:
 143
 144 =over 4
 145
 146 =item tar
 147
 148 Standard tar files, as produced by, for example, C</bin/tar>.
 149 Corresponds to a C<.tar> suffix.
 150
 151 =item tgz
 152
 153 Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
 154 Corresponds to a C<.tgz> or C<.tar.gz> suffix.
 155
 156 =item gz
 157
 158 Gzip compressed file, as produced by, for example C</bin/gzip>.
 159 Corresponds to a C<.gz> suffix.
 160
 161 =item zip
 162
 163 Zip compressed file, as produced by, for example C</bin/zip>.
 164 Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
 165
 166 =item bz2
 167
 168 Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
 169 Corresponds to a C<.bz2> suffix.
 170
 171 =item tbz
 172
 173 Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
 174 Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
 175
 176 =back
 177
 178 Returns a C<Archive::Extract> object on success, or false on failure.
 179
 180 =cut
 181
 182     ### constructor ###
 183     sub new {
 184         my $class   = shift;
 185         my %hash    = @_;
 186
 187         my $parsed = check( $tmpl, \%hash ) or return;
 188
 189         ### make sure we have an absolute path ###
 190         my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
 191
 192         ### figure out the type, if it wasn't already specified ###
 193         unless ( $parsed->{type} ) {
 194             $parsed->{type} =
 195                 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i     ? TGZ   :
 196                 $ar =~ /.+?\.gz$/i                  ? GZ    :
 197                 $ar =~ /.+?\.tar$/i                 ? TAR   :
 198                 $ar =~ /.+?\.(zip|jar|par)$/i       ? ZIP   :
 199                 $ar =~ /.+?\.(?:tbz|tar\.bz2?)$/i   ? TBZ   :
 200                 $ar =~ /.+?\.bz2$/i                 ? BZ2   :
 201                 '';
 202
 203         }
 204
 205         ### don't know what type of file it is ###
 206         return __PACKAGE__->_error(loc("Cannot determine file type for '%1'",
 207                                 $parsed->{archive} )) unless $parsed->{type};
 208
 209         return bless $parsed, $class;
 210     }
 211 }
 212
 213 =head2 $ae->extract( [to => '/output/path'] )
 214
 215 Extracts the archive represented by the C<Archive::Extract> object to
 216 the path of your choice as specified by the C<to> argument. Defaults to
 217 C<cwd()>.
 218
 219 Since C<.gz> files never hold a directory, but only a single file; if
 220 the C<to> argument is an existing directory, the file is extracted
 221 there, with it's C<.gz> suffix stripped.
 222 If the C<to> argument is not an existing directory, the C<to> argument
 223 is understood to be a filename, if the archive type is C<gz>.
 224 In the case that you did not specify a C<to> argument, the output
 225 file will be the name of the archive file, stripped from it's C<.gz>
 226 suffix, in the current working directory.
 227
 228 C<extract> will try a pure perl solution first, and then fall back to
 229 commandline tools if they are available. See the C<GLOBAL VARIABLES>
 230 section below on how to alter this behaviour.
 231
 232 It will return true on success, and false on failure.
 233
 234 On success, it will also set the follow attributes in the object:
 235
 236 =over 4
 237
 238 =item $ae->extract_path
 239
 240 This is the directory that the files where extracted to.
 241
 242 =item $ae->files
 243
 244 This is an array ref with the paths of all the files in the archive,
 245 relative to the C<to> argument you specified.
 246 To get the full path to an extracted file, you would use:
 247
 248     File::Spec->catfile( $to, $ae->files->[0] );
 249
 250 Note that all files from a tar archive will be in unix format, as per
 251 the tar specification.
 252
 253 =back
 254
 255 =cut
 256
 257 sub extract {
 258     my $self = shift;
 259     my %hash = @_;
 260
 261     my $to;
 262     my $tmpl = {
 263         to  => { default => '.', store => \$to }
 264     };
 265
 266     check( $tmpl, \%hash ) or return;
 267
 268     ### so 'to' could be a file or a dir, depending on whether it's a .gz
 269     ### file, or basically anything else.
 270     ### so, check that, then act accordingly.
 271     ### set an accessor specifically so _gunzip can know what file to extract
 272     ### to.
 273     my $dir;
 274     {   ### a foo.gz file
 275         if( $self->is_gz or $self->is_bz2 ) {
 276
 277             my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2)$//i;
 278
 279             ### to is a dir?
 280             if ( -d $to ) {
 281                 $dir = $to;
 282                 $self->_gunzip_to( basename($cp) );
 283
 284             ### then it's a filename
 285             } else {
 286                 $dir = dirname($to);
 287                 $self->_gunzip_to( basename($to) );
 288             }
 289
 290         ### not a foo.gz file
 291         } else {
 292             $dir = $to;
 293         }
 294     }
 295
 296     ### make the dir if it doesn't exist ###
 297     unless( -d $dir ) {
 298         eval { mkpath( $dir ) };
 299
 300         return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
 301             if $@;
 302     }
 303
 304     ### get the current dir, to restore later ###
 305     my $cwd = cwd();
 306
 307     my $ok = 1;
 308     EXTRACT: {
 309
 310         ### chdir to the target dir ###
 311         unless( chdir $dir ) {
 312             $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
 313             $ok = 0; last EXTRACT;
 314         }
 315
 316         ### set files to an empty array ref, so there's always an array
 317         ### ref IN the accessor, to avoid errors like:
 318         ### Can't use an undefined value as an ARRAY reference at
 319         ### ../lib/Archive/Extract.pm line 742. (rt #19815)
 320         $self->files( [] );
 321
 322         ### find what extractor method to use ###
 323         while( my($type,$method) = each %$Mapping ) {
 324
 325             ### call the corresponding method if the type is OK ###
 326             if( $self->$type) {
 327                 $ok = $self->$method();
 328             }
 329         }
 330
 331         ### warn something went wrong if we didn't get an OK ###
 332         $self->_error(loc("Extract failed, no extractor found"))
 333             unless $ok;
 334
 335     }
 336
 337     ### and chdir back ###
 338     unless( chdir $cwd ) {
 339         $self->_error(loc("Could not chdir back to start dir '%1': %2'",
 340                             $cwd, $!));
 341     }
 342
 343     return $ok;
 344 }
 345
 346 =pod
 347
 348 =head1 ACCESSORS
 349
 350 =head2 $ae->error([BOOL])
 351
 352 Returns the last encountered error as string.
 353 Pass it a true value to get the C<Carp::longmess()> output instead.
 354
 355 =head2 $ae->extract_path
 356
 357 This is the directory the archive got extracted to.
 358 See C<extract()> for details.
 359
 360 =head2 $ae->files
 361
 362 This is an array ref holding all the paths from the archive.
 363 See C<extract()> for details.
 364
 365 =head2 $ae->archive
 366
 367 This is the full path to the archive file represented by this
 368 C<Archive::Extract> object.
 369
 370 =head2 $ae->type
 371
 372 This is the type of archive represented by this C<Archive::Extract>
 373 object. See accessors below for an easier way to use this.
 374 See the C<new()> method for details.
 375
 376 =head2 $ae->types
 377
 378 Returns a list of all known C<types> for C<Archive::Extract>'s
 379 C<new> method.
 380
 381 =cut
 382
 383 sub types { return @Types }
 384
 385 =head2 $ae->is_tgz
 386
 387 Returns true if the file is of type C<.tar.gz>.
 388 See the C<new()> method for details.
 389
 390 =head2 $ae->is_tar
 391
 392 Returns true if the file is of type C<.tar>.
 393 See the C<new()> method for details.
 394
 395 =head2 $ae->is_gz
 396
 397 Returns true if the file is of type C<.gz>.
 398 See the C<new()> method for details.
 399
 400 =head2 $ae->is_zip
 401
 402 Returns true if the file is of type C<.zip>.
 403 See the C<new()> method for details.
 404
 405 =cut
 406
 407 ### quick check methods ###
 408 sub is_tgz  { return $_[0]->type eq TGZ }
 409 sub is_tar  { return $_[0]->type eq TAR }
 410 sub is_gz   { return $_[0]->type eq GZ  }
 411 sub is_zip  { return $_[0]->type eq ZIP }
 412 sub is_tbz  { return $_[0]->type eq TBZ }
 413 sub is_bz2  { return $_[0]->type eq BZ2 }
 414
 415 =pod
 416
 417 =head2 $ae->bin_tar
 418
 419 Returns the full path to your tar binary, if found.
 420
 421 =head2 $ae->bin_gzip
 422
 423 Returns the full path to your gzip binary, if found
 424
 425 =head2 $ae->bin_unzip
 426
 427 Returns the full path to your unzip binary, if found
 428
 429 =cut
 430
 431 ### paths to commandline tools ###
 432 sub bin_gzip    { return $PROGRAMS->{'gzip'}    if $PROGRAMS->{'gzip'}  }
 433 sub bin_unzip   { return $PROGRAMS->{'unzip'}   if $PROGRAMS->{'unzip'} }
 434 sub bin_tar     { return $PROGRAMS->{'tar'}     if $PROGRAMS->{'tar'}   }
 435 sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
 436
 437 #################################
 438 #
 439 # Untar code
 440 #
 441 #################################
 442
 443
 444 ### untar wrapper... goes to either Archive::Tar or /bin/tar
 445 ### depending on $PREFER_BIN
 446 sub _untar {
 447     my $self = shift;
 448
 449     ### bzip2 support in A::T via IO::Uncompress::Bzip2
 450     my   @methods = qw[_untar_at _untar_bin];
 451          @methods = reverse @methods if $PREFER_BIN;
 452
 453     for my $method (@methods) {
 454         $self->_extractor($method) && return 1 if $self->$method();
 455     }
 456
 457     return $self->_error(loc("Unable to untar file '%1'", $self->archive));
 458 }
 459
 460 ### use /bin/tar to extract ###
 461 sub _untar_bin {
 462     my $self = shift;
 463
 464     ### check for /bin/tar ###
 465     return $self->_error(loc("No '%1' program found", '/bin/tar'))
 466         unless $self->bin_tar;
 467
 468     ### check for /bin/gzip if we need it ###
 469     return $self->_error(loc("No '%1' program found", '/bin/gzip'))
 470         if $self->is_tgz && !$self->bin_gzip;
 471
 472     return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
 473         if $self->is_tbz && !$self->bin_bunzip2;
 474
 475     ### XXX figure out how to make IPC::Run do this in one call --
 476     ### currently i don't know how to get output of a command after a pipe
 477     ### trapped in a scalar. Mailed barries about this 5th of june 2004.
 478
 479
 480
 481     ### see what command we should run, based on whether
 482     ### it's a .tgz or .tar
 483
 484     ### XXX solaris tar and bsdtar are having different outputs
 485     ### depending whether you run with -x or -t
 486     ### compensate for this insanity by running -t first, then -x
 487     {    my $cmd =
 488             $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
 489                              $self->bin_tar, '-tf', '-'] :
 490             $self->is_tbz ? [$self->bin_bunzip2, '-c', $self->archive, '|',
 491                              $self->bin_tar, '-tf', '-'] :
 492             [$self->bin_tar, '-tf', $self->archive];
 493
 494         ### run the command ###
 495         my $buffer = '';
 496         unless( scalar run( command => $cmd,
 497                             buffer  => \$buffer,
 498                             verbose => $DEBUG )
 499         ) {
 500             return $self->_error(loc(
 501                             "Error listing contents of archive '%1': %2",
 502                             $self->archive, $buffer ));
 503         }
 504
 505         ### no buffers available?
 506         if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 507             $self->_error( $self->_no_buffer_files( $self->archive ) );
 508
 509         } else {
 510             ### if we're on solaris we /might/ be using /bin/tar, which has
 511             ### a weird output format... we might also be using
 512             ### /usr/local/bin/tar, which is gnu tar, which is perfectly
 513             ### fine... so we have to do some guessing here =/
 514             my @files = map { chomp;
 515                           !ON_SOLARIS ? $_
 516                                       : (m|^ x \s+  # 'xtract' -- sigh
 517                                             (.+?),  # the actual file name
 518                                             \s+ [\d,.]+ \s bytes,
 519                                             \s+ [\d,.]+ \s tape \s blocks
 520                                         |x ? $1 : $_);
 521
 522                     } split $/, $buffer;
 523
 524             ### store the files that are in the archive ###
 525             $self->files(\@files);
 526         }
 527     }
 528
 529     ### now actually extract it ###
 530     {   my $cmd =
 531             $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
 532                              $self->bin_tar, '-xf', '-'] :
 533             $self->is_tbz ? [$self->bin_bunzip2, '-c', $self->archive, '|',
 534                              $self->bin_tar, '-xf', '-'] :
 535             [$self->bin_tar, '-xf', $self->archive];
 536
 537         my $buffer = '';
 538         unless( scalar run( command => $cmd,
 539                             buffer  => \$buffer,
 540                             verbose => $DEBUG )
 541         ) {
 542             return $self->_error(loc("Error extracting archive '%1': %2",
 543                             $self->archive, $buffer ));
 544         }
 545
 546         ### we might not have them, due to lack of buffers
 547         if( $self->files ) {
 548             ### now that we've extracted, figure out where we extracted to
 549             my $dir = $self->__get_extract_dir( $self->files );
 550
 551             ### store the extraction dir ###
 552             $self->extract_path( $dir );
 553         }
 554     }
 555
 556     ### we got here, no error happened
 557     return 1;
 558 }
 559
 560 ### use archive::tar to extract ###
 561 sub _untar_at {
 562     my $self = shift;
 563
 564     ### we definitely need A::T, so load that first
 565     {   my $use_list = { 'Archive::Tar' => '0.0' };
 566
 567         unless( can_load( modules => $use_list ) ) {
 568
 569             return $self->_error(loc("You do not have '%1' installed - " .
 570                                  "Please install it as soon as possible.",
 571                                  'Archive::Tar'));
 572         }
 573     }
 574
 575     ### we might pass it a filehandle if it's a .tbz file..
 576     my $fh_to_read = $self->archive;
 577
 578     ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
 579     ### if A::T's version is 0.99 or higher
 580     if( $self->is_tgz ) {
 581         my $use_list = { 'Compress::Zlib' => '0.0' };
 582            $use_list->{ 'IO::Zlib' } = '0.0'
 583                 if $Archive::Tar::VERSION >= '0.99';
 584
 585         unless( can_load( modules => $use_list ) ) {
 586             my $which = join '/', sort keys %$use_list;
 587
 588             return $self->_error(loc(
 589                                 "You do not have '%1' installed - Please ".
 590                                 "install it as soon as possible.", $which));
 591
 592         }
 593     } elsif ( $self->is_tbz ) {
 594         my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
 595         unless( can_load( modules => $use_list ) ) {
 596             return $self->_error(loc(
 597                     "You do not have '%1' installed - Please " .
 598                     "install it as soon as possible.",
 599                      'IO::Uncompress::Bunzip2'));
 600         }
 601
 602         my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
 603             return $self->_error(loc("Unable to open '%1': %2",
 604                             $self->archive,
 605                             $IO::Uncompress::Bunzip2::Bunzip2Error));
 606
 607         $fh_to_read = $bz;
 608     }
 609
 610     my $tar = Archive::Tar->new();
 611
 612     ### only tell it it's compressed if it's a .tgz, as we give it a file
 613     ### handle if it's a .tbz
 614     unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) {
 615         return $self->_error(loc("Unable to read '%1': %2", $self->archive,
 616                                     $Archive::Tar::error));
 617     }
 618
 619     ### workaround to prevent Archive::Tar from setting uid, which
 620     ### is a potential security hole. -autrijus
 621     ### have to do it here, since A::T needs to be /loaded/ first ###
 622     {   no strict 'refs'; local $^W;
 623
 624         ### older versions of archive::tar <= 0.23
 625         *Archive::Tar::chown = sub {};
 626     }
 627
 628     ### for version of archive::tar > 1.04
 629     local $Archive::Tar::Constant::CHOWN = 0;
 630
 631     {   local $^W;  # quell 'splice() offset past end of array' warnings
 632                     # on older versions of A::T
 633
 634         ### older archive::tar always returns $self, return value slightly
 635         ### fux0r3d because of it.
 636         $tar->extract()
 637             or return $self->_error(loc("Unable to extract '%1': %2",
 638                                     $self->archive, $Archive::Tar::error ));
 639     }
 640
 641     my @files   = $tar->list_files;
 642     my $dir     = $self->__get_extract_dir( \@files );
 643
 644     ### store the files that are in the archive ###
 645     $self->files(\@files);
 646
 647     ### store the extraction dir ###
 648     $self->extract_path( $dir );
 649
 650     ### check if the dir actually appeared ###
 651     return 1 if -d $self->extract_path;
 652
 653     ### no dir, we failed ###
 654     return $self->_error(loc("Unable to extract '%1': %2",
 655                                 $self->archive, $Archive::Tar::error ));
 656 }
 657
 658 #################################
 659 #
 660 # Gunzip code
 661 #
 662 #################################
 663
 664 ### gunzip wrapper... goes to either Compress::Zlib or /bin/gzip
 665 ### depending on $PREFER_BIN
 666 sub _gunzip {
 667     my $self = shift;
 668
 669     my @methods = qw[_gunzip_cz _gunzip_bin];
 670        @methods = reverse @methods if $PREFER_BIN;
 671
 672     for my $method (@methods) {
 673         $self->_extractor($method) && return 1 if $self->$method();
 674     }
 675
 676     return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
 677 }
 678
 679 sub _gunzip_bin {
 680     my $self = shift;
 681
 682     ### check for /bin/gzip -- we need it ###
 683     return $self->_error(loc("No '%1' program found", '/bin/gzip'))
 684         unless $self->bin_gzip;
 685
 686
 687     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
 688         return $self->_error(loc("Could not open '%1' for writing: %2",
 689                             $self->_gunzip_to, $! ));
 690
 691     my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
 692
 693     my $buffer;
 694     unless( scalar run( command => $cmd,
 695                         verbose => $DEBUG,
 696                         buffer  => \$buffer )
 697     ) {
 698         return $self->_error(loc("Unable to gunzip '%1': %2",
 699                                     $self->archive, $buffer));
 700     }
 701
 702     ### no buffers available?
 703     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 704         $self->_error( $self->_no_buffer_content( $self->archive ) );
 705     }
 706
 707     print $fh $buffer if defined $buffer;
 708
 709     close $fh;
 710
 711     ### set what files where extract, and where they went ###
 712     $self->files( [$self->_gunzip_to] );
 713     $self->extract_path( File::Spec->rel2abs(cwd()) );
 714
 715     return 1;
 716 }
 717
 718 sub _gunzip_cz {
 719     my $self = shift;
 720
 721     my $use_list = { 'Compress::Zlib' => '0.0' };
 722     unless( can_load( modules => $use_list ) ) {
 723         return $self->_error(loc("You do not have '%1' installed - Please " .
 724                         "install it as soon as possible.", 'Compress::Zlib'));
 725     }
 726
 727     my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
 728                 return $self->_error(loc("Unable to open '%1': %2",
 729                             $self->archive, $Compress::Zlib::gzerrno));
 730
 731     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
 732         return $self->_error(loc("Could not open '%1' for writing: %2",
 733                             $self->_gunzip_to, $! ));
 734
 735     my $buffer;
 736     $fh->print($buffer) while $gz->gzread($buffer) > 0;
 737     $fh->close;
 738
 739     ### set what files where extract, and where they went ###
 740     $self->files( [$self->_gunzip_to] );
 741     $self->extract_path( File::Spec->rel2abs(cwd()) );
 742
 743     return 1;
 744 }
 745
 746 #################################
 747 #
 748 # Unzip code
 749 #
 750 #################################
 751
 752 ### unzip wrapper... goes to either Archive::Zip or /bin/unzip
 753 ### depending on $PREFER_BIN
 754 sub _unzip {
 755     my $self = shift;
 756
 757     my @methods = qw[_unzip_az _unzip_bin];
 758        @methods = reverse @methods if $PREFER_BIN;
 759
 760     for my $method (@methods) {
 761         $self->_extractor($method) && return 1 if $self->$method();
 762     }
 763
 764     return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
 765 }
 766
 767 sub _unzip_bin {
 768     my $self = shift;
 769
 770     ### check for /bin/gzip if we need it ###
 771     return $self->_error(loc("No '%1' program found", '/bin/unzip'))
 772         unless $self->bin_unzip;
 773
 774
 775     ### first, get the files.. it must be 2 different commands with 'unzip' :(
 776     {   my $cmd = [ $self->bin_unzip, '-Z', '-1', $self->archive ];
 777
 778         my $buffer = '';
 779         unless( scalar run( command => $cmd,
 780                             verbose => $DEBUG,
 781                             buffer  => \$buffer )
 782         ) {
 783             return $self->_error(loc("Unable to unzip '%1': %2",
 784                                         $self->archive, $buffer));
 785         }
 786
 787         ### no buffers available?
 788         if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 789             $self->_error( $self->_no_buffer_files( $self->archive ) );
 790
 791         } else {
 792             $self->files( [split $/, $buffer] );
 793         }
 794     }
 795
 796     ### now, extract the archive ###
 797     {   my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
 798
 799         my $buffer;
 800         unless( scalar run( command => $cmd,
 801                             verbose => $DEBUG,
 802                             buffer  => \$buffer )
 803         ) {
 804             return $self->_error(loc("Unable to unzip '%1': %2",
 805                                         $self->archive, $buffer));
 806         }
 807
 808         if( scalar @{$self->files} ) {
 809             my $files   = $self->files;
 810             my $dir     = $self->__get_extract_dir( $files );
 811
 812             $self->extract_path( $dir );
 813         }
 814     }
 815
 816     return 1;
 817 }
 818
 819 sub _unzip_az {
 820     my $self = shift;
 821
 822     my $use_list = { 'Archive::Zip' => '0.0' };
 823     unless( can_load( modules => $use_list ) ) {
 824         return $self->_error(loc("You do not have '%1' installed - Please " .
 825                         "install it as soon as possible.", 'Archive::Zip'));
 826     }
 827
 828     my $zip = Archive::Zip->new();
 829
 830     unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
 831         return $self->_error(loc("Unable to read '%1'", $self->archive));
 832     }
 833
 834     my @files;
 835     ### have to extract every memeber individually ###
 836     for my $member ($zip->members) {
 837         push @files, $member->{fileName};
 838
 839         unless( $zip->extractMember($member) == &Archive::Zip::AZ_OK ) {
 840             return $self->_error(loc("Extraction of '%1' from '%2' failed",
 841                         $member->{fileName}, $self->archive ));
 842         }
 843     }
 844
 845     my $dir = $self->__get_extract_dir( \@files );
 846
 847     ### set what files where extract, and where they went ###
 848     $self->files( \@files );
 849     $self->extract_path( File::Spec->rel2abs($dir) );
 850
 851     return 1;
 852 }
 853
 854 sub __get_extract_dir {
 855     my $self    = shift;
 856     my $files   = shift || [];
 857
 858     return unless scalar @$files;
 859
 860     my($dir1, $dir2);
 861     for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
 862         my($dir,$pos) = @$aref;
 863
 864         ### add a catdir(), so that any trailing slashes get
 865         ### take care of (removed)
 866         ### also, a catdir() normalises './dir/foo' to 'dir/foo';
 867         ### which was the problem in bug #23999
 868         my $res = -d $files->[$pos]
 869                     ? File::Spec->catdir( $files->[$pos], '' )
 870                     : File::Spec->catdir( dirname( $files->[$pos] ) );
 871
 872         $$dir = $res;
 873     }
 874
 875     ### if the first and last dir don't match, make sure the
 876     ### dirname is not set wrongly
 877     my $dir;
 878
 879     ### dirs are the same, so we know for sure what the extract dir is
 880     if( $dir1 eq $dir2 ) {
 881         $dir = $dir1;
 882
 883     ### dirs are different.. do they share the base dir?
 884     ### if so, use that, if not, fall back to '.'
 885     } else {
 886         my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
 887         my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
 888
 889         $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
 890     }
 891
 892     return File::Spec->rel2abs( $dir );
 893 }
 894
 895 #################################
 896 #
 897 # Bunzip2 code
 898 #
 899 #################################
 900
 901 ### bunzip2 wrapper...
 902 sub _bunzip2 {
 903     my $self = shift;
 904
 905     my @methods = qw[_bunzip2_cz2 _bunzip2_bin];
 906        @methods = reverse @methods if $PREFER_BIN;
 907
 908     for my $method (@methods) {
 909         $self->_extractor($method) && return 1 if $self->$method();
 910     }
 911
 912     return $self->_error(loc("Unable to bunzip2 file '%1'", $self->archive));
 913 }
 914
 915 sub _bunzip2_bin {
 916     my $self = shift;
 917
 918     ### check for /bin/gzip -- we need it ###
 919     return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
 920         unless $self->bin_bunzip2;
 921
 922
 923     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
 924         return $self->_error(loc("Could not open '%1' for writing: %2",
 925                             $self->_gunzip_to, $! ));
 926
 927     my $cmd = [ $self->bin_bunzip2, '-c', $self->archive ];
 928
 929     my $buffer;
 930     unless( scalar run( command => $cmd,
 931                         verbose => $DEBUG,
 932                         buffer  => \$buffer )
 933     ) {
 934         return $self->_error(loc("Unable to bunzip2 '%1': %2",
 935                                     $self->archive, $buffer));
 936     }
 937
 938     ### no buffers available?
 939     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 940         $self->_error( $self->_no_buffer_content( $self->archive ) );
 941     }
 942
 943     print $fh $buffer if defined $buffer;
 944
 945     close $fh;
 946
 947     ### set what files where extract, and where they went ###
 948     $self->files( [$self->_gunzip_to] );
 949     $self->extract_path( File::Spec->rel2abs(cwd()) );
 950
 951     return 1;
 952 }
 953
 954 ### using cz2, the compact versions... this we use mainly in archive::tar
 955 ### extractor..
 956 # sub _bunzip2_cz1 {
 957 #     my $self = shift;
 958 #
 959 #     my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
 960 #     unless( can_load( modules => $use_list ) ) {
 961 #         return $self->_error(loc("You do not have '%1' installed - Please " .
 962 #                         "install it as soon as possible.",
 963 #                         'IO::Uncompress::Bunzip2'));
 964 #     }
 965 #
 966 #     my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
 967 #                 return $self->_error(loc("Unable to open '%1': %2",
 968 #                             $self->archive,
 969 #                             $IO::Uncompress::Bunzip2::Bunzip2Error));
 970 #
 971 #     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
 972 #         return $self->_error(loc("Could not open '%1' for writing: %2",
 973 #                             $self->_gunzip_to, $! ));
 974 #
 975 #     my $buffer;
 976 #     $fh->print($buffer) while $bz->read($buffer) > 0;
 977 #     $fh->close;
 978 #
 979 #     ### set what files where extract, and where they went ###
 980 #     $self->files( [$self->_gunzip_to] );
 981 #     $self->extract_path( File::Spec->rel2abs(cwd()) );
 982 #
 983 #     return 1;
 984 # }
 985
 986 sub _bunzip2_cz2 {
 987     my $self = shift;
 988
 989     my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
 990     unless( can_load( modules => $use_list ) ) {
 991         return $self->_error(loc("You do not have '%1' installed - Please " .
 992                         "install it as soon as possible.",
 993                         'IO::Uncompress::Bunzip2'));
 994     }
 995
 996     IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
 997         or return $self->_error(loc("Unable to uncompress '%1': %2",
 998                             $self->archive,
 999                             $IO::Uncompress::Bunzip2::Bunzip2Error));
1000
1001     ### set what files where extract, and where they went ###
1002     $self->files( [$self->_gunzip_to] );
1003     $self->extract_path( File::Spec->rel2abs(cwd()) );
1004
1005     return 1;
1006 }
1007
1008
1009 #################################
1010 #
1011 # Error code
1012 #
1013 #################################
1014
1015 sub _error {
1016     my $self    = shift;
1017     my $error   = shift;
1018
1019     $self->_error_msg( $error );
1020     $self->_error_msg_long( Carp::longmess($error) );
1021
1022     ### set $Archive::Extract::WARN to 0 to disable printing
1023     ### of errors
1024     if( $WARN ) {
1025         carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1026     }
1027
1028     return;
1029 }
1030
1031 sub error {
1032     my $self = shift;
1033     return shift() ? $self->_error_msg_long : $self->_error_msg;
1034 }
1035
1036 sub _no_buffer_files {
1037     my $self = shift;
1038     my $file = shift or return;
1039     return loc("No buffer captured, unable to tell ".
1040                "extracted files or extraction dir for '%1'", $file);
1041 }
1042
1043 sub _no_buffer_content {
1044     my $self = shift;
1045     my $file = shift or return;
1046     return loc("No buffer captured, unable to get content for '%1'", $file);
1047 }
1048 1;
1049
1050 =pod
1051
1052 =head1 HOW IT WORKS
1053
1054 C<Archive::Extract> tries first to determine what type of archive you
1055 are passing it, by inspecting its suffix. It does not do this by using
1056 Mime magic, or something related. See C<CAVEATS> below.
1057
1058 Once it has determined the file type, it knows which extraction methods
1059 it can use on the archive. It will try a perl solution first, then fall
1060 back to a commandline tool if that fails. If that also fails, it will
1061 return false, indicating it was unable to extract the archive.
1062 See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1063
1064 =head1 CAVEATS
1065
1066 =head2 File Extensions
1067
1068 C<Archive::Extract> trusts on the extension of the archive to determine
1069 what type it is, and what extractor methods therefore can be used. If
1070 your archives do not have any of the extensions as described in the
1071 C<new()> method, you will have to specify the type explicitly, or
1072 C<Archive::Extract> will not be able to extract the archive for you.
1073
1074 =head2 Bzip2 Support
1075
1076 There's currently no very reliable pure perl Bzip2 implementation
1077 available, so C<Archive::Extract> can only extract C<bzip2>
1078 compressed archives if you have a C</bin/bunzip2> program.
1079
1080 =head1 GLOBAL VARIABLES
1081
1082 =head2 $Archive::Extract::DEBUG
1083
1084 Set this variable to C<true> to have all calls to command line tools
1085 be printed out, including all their output.
1086 This also enables C<Carp::longmess> errors, instead of the regular
1087 C<carp> errors.
1088
1089 Good for tracking down why things don't work with your particular
1090 setup.
1091
1092 Defaults to C<false>.
1093
1094 =head2 $Archive::Extract::WARN
1095
1096 This variable controls whether errors encountered internally by
1097 C<Archive::Extract> should be C<carp>'d or not.
1098
1099 Set to false to silence warnings. Inspect the output of the C<error()>
1100 method manually to see what went wrong.
1101
1102 Defaults to C<true>.
1103
1104 =head2 $Archive::Extract::PREFER_BIN
1105
1106 This variables controls whether C<Archive::Extract> should prefer the
1107 use of perl modules, or commandline tools to extract archives.
1108
1109 Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1110
1111 Defaults to C<false>.
1112
1113 =head1 TODO
1114
1115 =over 4
1116
1117 =item Mime magic support
1118
1119 Maybe this module should use something like C<File::Type> to determine
1120 the type, rather than blindly trust the suffix.
1121
1122 =head1 BUG REPORTS
1123
1124 Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>.
1125
1126 =head1 AUTHOR
1127
1128 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1129
1130 =head1 COPYRIGHT
1131
1132 This library is free software; you may redistribute and/or modify it
1133 under the same terms as Perl itself.
1134
1135 =cut
1136
1137 # Local variables:
1138 # c-indentation-style: bsd
1139 # c-basic-offset: 4
1140 # indent-tabs-mode: nil
1141 # End:
1142 # vim: expandtab shiftwidth=4:
1143