'make manisort'
[p5sagit/p5-mst-13.2.git] / lib / Archive / Extract.pm
CommitLineData
520c99e2 1package Archive::Extract;
2
3use strict;
4
5use Cwd qw[cwd];
6use Carp qw[carp];
7use IPC::Cmd qw[run can_run];
8use FileHandle;
9use File::Path qw[mkpath];
10use File::Spec;
11use File::Basename qw[dirname basename];
12use Params::Check qw[check];
13use Module::Load::Conditional qw[can_load check_install];
14use Locale::Maketext::Simple Style => 'gettext';
15
16### solaris has silly /bin/tar output ###
17use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
18use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
19
4f3b9739 20### VMS may require quoting upper case command options
21use constant ON_VMS => $^O eq 'VMS' ? 1 : 0;
22
520c99e2 23### If these are changed, update @TYPES and the new() POD
24use constant TGZ => 'tgz';
25use constant TAR => 'tar';
26use constant GZ => 'gz';
27use constant ZIP => 'zip';
28use constant BZ2 => 'bz2';
29use constant TBZ => 'tbz';
1dae2fb5 30use constant Z => 'Z';
8d2ac73b 31use constant LZMA => 'lzma';
520c99e2 32
33use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG];
34
8d2ac73b 35$VERSION = '0.26';
520c99e2 36$PREFER_BIN = 0;
37$WARN = 1;
38$DEBUG = 0;
8d2ac73b 39my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA ); # same as all constants
520c99e2 40
41local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
42
43=pod
44
45=head1 NAME
46
47Archive::Extract - A generic archive extracting mechanism
48
49=head1 SYNOPSIS
50
51 use Archive::Extract;
52
53 ### build an Archive::Extract object ###
54 my $ae = Archive::Extract->new( archive => 'foo.tgz' );
55
56 ### extract to cwd() ###
57 my $ok = $ae->extract;
58
59 ### extract to /tmp ###
60 my $ok = $ae->extract( to => '/tmp' );
61
62 ### what if something went wrong?
63 my $ok = $ae->extract or die $ae->error;
64
65 ### files from the archive ###
66 my $files = $ae->files;
67
68 ### dir that was extracted to ###
69 my $outdir = $ae->extract_path;
70
71
72 ### quick check methods ###
73 $ae->is_tar # is it a .tar file?
74 $ae->is_tgz # is it a .tar.gz or .tgz file?
75 $ae->is_gz; # is it a .gz file?
76 $ae->is_zip; # is it a .zip file?
77 $ae->is_bz2; # is it a .bz2 file?
78 $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
8d2ac73b 79 $ae->is_lzma; # is it a .lzma file?
520c99e2 80
81 ### absolute path to the archive you provided ###
82 $ae->archive;
83
84 ### commandline tools, if found ###
85 $ae->bin_tar # path to /bin/tar, if found
86 $ae->bin_gzip # path to /bin/gzip, if found
87 $ae->bin_unzip # path to /bin/unzip, if found
88 $ae->bin_bunzip2 # path to /bin/bunzip2 if found
8d2ac73b 89 $ae->bin_unlzma # path to /bin/unlzma if found
520c99e2 90
91=head1 DESCRIPTION
92
93Archive::Extract is a generic archive extraction mechanism.
94
95It allows you to extract any archive file of the type .tar, .tar.gz,
8d2ac73b 96.gz, .Z, tar.bz2, .tbz, .bz2, .zip or .lzma without having to worry how it
1dae2fb5 97does so, or use different interfaces for each type by using either
98perl modules, or commandline tools on your system.
520c99e2 99
100See the C<HOW IT WORKS> section further down for details.
101
102=cut
103
104
105### see what /bin/programs are available ###
106$PROGRAMS = {};
8d2ac73b 107for my $pgm (qw[tar unzip gzip bunzip2 uncompress unlzma]) {
520c99e2 108 $PROGRAMS->{$pgm} = can_run($pgm);
109}
110
111### mapping from types to extractor methods ###
112my $Mapping = {
113 is_tgz => '_untar',
114 is_tar => '_untar',
115 is_gz => '_gunzip',
116 is_zip => '_unzip',
117 is_tbz => '_untar',
118 is_bz2 => '_bunzip2',
1dae2fb5 119 is_Z => '_uncompress',
8d2ac73b 120 is_lzma => '_unlzma',
520c99e2 121};
122
123{
124 my $tmpl = {
125 archive => { required => 1, allow => FILE_EXISTS },
126 type => { default => '', allow => [ @Types ] },
127 };
128
129 ### build accesssors ###
130 for my $method( keys %$tmpl,
131 qw[_extractor _gunzip_to files extract_path],
132 qw[_error_msg _error_msg_long]
133 ) {
134 no strict 'refs';
135 *$method = sub {
136 my $self = shift;
137 $self->{$method} = $_[0] if @_;
138 return $self->{$method};
139 }
140 }
141
142=head1 METHODS
143
144=head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
145
146Creates a new C<Archive::Extract> object based on the archive file you
147passed it. Automatically determines the type of archive based on the
148extension, but you can override that by explicitly providing the
149C<type> argument.
150
151Valid values for C<type> are:
152
153=over 4
154
155=item tar
156
157Standard tar files, as produced by, for example, C</bin/tar>.
158Corresponds to a C<.tar> suffix.
159
160=item tgz
161
162Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
163Corresponds to a C<.tgz> or C<.tar.gz> suffix.
164
165=item gz
166
167Gzip compressed file, as produced by, for example C</bin/gzip>.
168Corresponds to a C<.gz> suffix.
169
1dae2fb5 170=item Z
171
172Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
173Corresponds to a C<.Z> suffix.
174
520c99e2 175=item zip
176
177Zip compressed file, as produced by, for example C</bin/zip>.
178Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
179
180=item bz2
181
182Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
183Corresponds to a C<.bz2> suffix.
184
185=item tbz
186
187Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
188Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
189
8d2ac73b 190=item lzma
191
192Lzma compressed file, as produced by C</bin/lzma>.
193Corresponds to a C<.lzma> suffix.
194
520c99e2 195=back
196
197Returns a C<Archive::Extract> object on success, or false on failure.
198
199=cut
200
201 ### constructor ###
202 sub new {
203 my $class = shift;
204 my %hash = @_;
205
206 my $parsed = check( $tmpl, \%hash ) or return;
207
208 ### make sure we have an absolute path ###
209 my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
210
211 ### figure out the type, if it wasn't already specified ###
212 unless ( $parsed->{type} ) {
213 $parsed->{type} =
574b415d 214 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ :
520c99e2 215 $ar =~ /.+?\.gz$/i ? GZ :
216 $ar =~ /.+?\.tar$/i ? TAR :
217 $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP :
9e5a0ef9 218 $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i ? TBZ :
520c99e2 219 $ar =~ /.+?\.bz2$/i ? BZ2 :
1dae2fb5 220 $ar =~ /.+?\.Z$/ ? Z :
8d2ac73b 221 $ar =~ /.+?\.lzma$/ ? LZMA :
520c99e2 222 '';
223
224 }
225
226 ### don't know what type of file it is ###
227 return __PACKAGE__->_error(loc("Cannot determine file type for '%1'",
228 $parsed->{archive} )) unless $parsed->{type};
229
230 return bless $parsed, $class;
231 }
232}
233
234=head2 $ae->extract( [to => '/output/path'] )
235
236Extracts the archive represented by the C<Archive::Extract> object to
237the path of your choice as specified by the C<to> argument. Defaults to
238C<cwd()>.
239
240Since C<.gz> files never hold a directory, but only a single file; if
241the C<to> argument is an existing directory, the file is extracted
242there, with it's C<.gz> suffix stripped.
243If the C<to> argument is not an existing directory, the C<to> argument
244is understood to be a filename, if the archive type is C<gz>.
245In the case that you did not specify a C<to> argument, the output
246file will be the name of the archive file, stripped from it's C<.gz>
247suffix, in the current working directory.
248
249C<extract> will try a pure perl solution first, and then fall back to
250commandline tools if they are available. See the C<GLOBAL VARIABLES>
251section below on how to alter this behaviour.
252
253It will return true on success, and false on failure.
254
255On success, it will also set the follow attributes in the object:
256
257=over 4
258
259=item $ae->extract_path
260
261This is the directory that the files where extracted to.
262
263=item $ae->files
264
265This is an array ref with the paths of all the files in the archive,
266relative to the C<to> argument you specified.
267To get the full path to an extracted file, you would use:
268
269 File::Spec->catfile( $to, $ae->files->[0] );
270
271Note that all files from a tar archive will be in unix format, as per
272the tar specification.
273
274=back
275
276=cut
277
278sub extract {
279 my $self = shift;
280 my %hash = @_;
281
282 my $to;
283 my $tmpl = {
284 to => { default => '.', store => \$to }
285 };
286
287 check( $tmpl, \%hash ) or return;
288
289 ### so 'to' could be a file or a dir, depending on whether it's a .gz
290 ### file, or basically anything else.
291 ### so, check that, then act accordingly.
292 ### set an accessor specifically so _gunzip can know what file to extract
293 ### to.
294 my $dir;
295 { ### a foo.gz file
8d2ac73b 296 if( $self->is_gz or $self->is_bz2 or $self->is_Z or $self->is_lzma ) {
520c99e2 297
8d2ac73b 298 my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z|lzma)$//i;
520c99e2 299
300 ### to is a dir?
301 if ( -d $to ) {
302 $dir = $to;
303 $self->_gunzip_to( basename($cp) );
304
305 ### then it's a filename
306 } else {
307 $dir = dirname($to);
308 $self->_gunzip_to( basename($to) );
309 }
310
311 ### not a foo.gz file
312 } else {
313 $dir = $to;
314 }
315 }
316
317 ### make the dir if it doesn't exist ###
318 unless( -d $dir ) {
319 eval { mkpath( $dir ) };
320
321 return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
322 if $@;
323 }
324
325 ### get the current dir, to restore later ###
326 my $cwd = cwd();
327
328 my $ok = 1;
329 EXTRACT: {
330
331 ### chdir to the target dir ###
332 unless( chdir $dir ) {
333 $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
334 $ok = 0; last EXTRACT;
335 }
336
337 ### set files to an empty array ref, so there's always an array
338 ### ref IN the accessor, to avoid errors like:
339 ### Can't use an undefined value as an ARRAY reference at
340 ### ../lib/Archive/Extract.pm line 742. (rt #19815)
341 $self->files( [] );
342
343 ### find what extractor method to use ###
344 while( my($type,$method) = each %$Mapping ) {
345
346 ### call the corresponding method if the type is OK ###
347 if( $self->$type) {
348 $ok = $self->$method();
349 }
350 }
351
352 ### warn something went wrong if we didn't get an OK ###
353 $self->_error(loc("Extract failed, no extractor found"))
354 unless $ok;
355
356 }
357
358 ### and chdir back ###
359 unless( chdir $cwd ) {
360 $self->_error(loc("Could not chdir back to start dir '%1': %2'",
361 $cwd, $!));
362 }
363
364 return $ok;
365}
366
367=pod
368
369=head1 ACCESSORS
370
371=head2 $ae->error([BOOL])
372
373Returns the last encountered error as string.
374Pass it a true value to get the C<Carp::longmess()> output instead.
375
376=head2 $ae->extract_path
377
378This is the directory the archive got extracted to.
379See C<extract()> for details.
380
381=head2 $ae->files
382
383This is an array ref holding all the paths from the archive.
384See C<extract()> for details.
385
386=head2 $ae->archive
387
388This is the full path to the archive file represented by this
389C<Archive::Extract> object.
390
391=head2 $ae->type
392
393This is the type of archive represented by this C<Archive::Extract>
394object. See accessors below for an easier way to use this.
395See the C<new()> method for details.
396
397=head2 $ae->types
398
399Returns a list of all known C<types> for C<Archive::Extract>'s
400C<new> method.
401
402=cut
403
404sub types { return @Types }
405
406=head2 $ae->is_tgz
407
408Returns true if the file is of type C<.tar.gz>.
409See the C<new()> method for details.
410
411=head2 $ae->is_tar
412
413Returns true if the file is of type C<.tar>.
414See the C<new()> method for details.
415
416=head2 $ae->is_gz
417
418Returns true if the file is of type C<.gz>.
419See the C<new()> method for details.
420
1dae2fb5 421=head2 $ae->is_Z
422
423Returns true if the file is of type C<.Z>.
424See the C<new()> method for details.
425
520c99e2 426=head2 $ae->is_zip
427
428Returns true if the file is of type C<.zip>.
429See the C<new()> method for details.
430
8d2ac73b 431=head2 $ae->is_lzma
432
433Returns true if the file is of type C<.lzma>.
434See the C<new()> method for details.
435
520c99e2 436=cut
437
438### quick check methods ###
439sub is_tgz { return $_[0]->type eq TGZ }
440sub is_tar { return $_[0]->type eq TAR }
441sub is_gz { return $_[0]->type eq GZ }
442sub is_zip { return $_[0]->type eq ZIP }
443sub is_tbz { return $_[0]->type eq TBZ }
444sub is_bz2 { return $_[0]->type eq BZ2 }
1dae2fb5 445sub is_Z { return $_[0]->type eq Z }
8d2ac73b 446sub is_lzma { return $_[0]->type eq LZMA }
520c99e2 447
448=pod
449
450=head2 $ae->bin_tar
451
452Returns the full path to your tar binary, if found.
453
454=head2 $ae->bin_gzip
455
456Returns the full path to your gzip binary, if found
457
458=head2 $ae->bin_unzip
459
460Returns the full path to your unzip binary, if found
461
8d2ac73b 462=head2 $ae->bin_unlzma
463
464Returns the full path to your unlzma binary, if found
465
520c99e2 466=cut
467
468### paths to commandline tools ###
1dae2fb5 469sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} }
470sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} }
471sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} }
472sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
473sub bin_uncompress { return $PROGRAMS->{'uncompress'}
474 if $PROGRAMS->{'uncompress'} }
8d2ac73b 475sub bin_unlzma { return $PROGRAMS->{'unlzma'} if $PROGRAMS->{'unlzma'} }
476
9e5a0ef9 477=head2 $bool = $ae->have_old_bunzip2
478
479Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
480require all archive names to end in C<.bz2> or it will not extract
481them. This method checks if you have a recent version of C<bunzip2>
482that allows any extension, or an older one that doesn't.
483
484=cut
485
486sub have_old_bunzip2 {
487 my $self = shift;
488
489 ### no bunzip2? no old bunzip2 either :)
490 return unless $self->bin_bunzip2;
491
492 ### if we can't run this, we can't be sure if it's too old or not
493 ### XXX stupid stupid stupid bunzip2 doesn't understand --version
494 ### is not a request to extract data:
495 ### $ bunzip2 --version
496 ### bzip2, a block-sorting file compressor. Version 1.0.2, 30-Dec-2001.
497 ### [...]
498 ### bunzip2: I won't read compressed data from a terminal.
499 ### bunzip2: For help, type: `bunzip2 --help'.
500 ### $ echo $?
501 ### 1
502 ### HATEFUL!
8d2ac73b 503
504 ### double hateful: bunzip2 --version also hangs if input is a pipe
505 ### See #32370: Archive::Extract will hang if stdin is a pipe [+PATCH]
506 ### So, we have to provide *another* argument which is a fake filename,
507 ### just so it wont try to read from stdin to print it's version..
508 ### *sigh*
509 ### Even if the file exists, it won't clobber or change it.
9e5a0ef9 510 my $buffer;
8d2ac73b 511 scalar run(
512 command => [$self->bin_bunzip2, '--version', 'NoSuchFile'],
9e5a0ef9 513 verbose => 0,
514 buffer => \$buffer
515 );
516
517 ### no output
518 return unless $buffer;
519
520 my ($version) = $buffer =~ /version \s+ (\d+)/ix;
521
522 return 1 if $version < 1;
523 return;
524}
520c99e2 525
526#################################
527#
528# Untar code
529#
530#################################
531
520c99e2 532### untar wrapper... goes to either Archive::Tar or /bin/tar
533### depending on $PREFER_BIN
534sub _untar {
535 my $self = shift;
536
537 ### bzip2 support in A::T via IO::Uncompress::Bzip2
538 my @methods = qw[_untar_at _untar_bin];
574b415d 539 @methods = reverse @methods if $PREFER_BIN;
520c99e2 540
541 for my $method (@methods) {
542 $self->_extractor($method) && return 1 if $self->$method();
543 }
544
545 return $self->_error(loc("Unable to untar file '%1'", $self->archive));
546}
547
548### use /bin/tar to extract ###
549sub _untar_bin {
550 my $self = shift;
551
552 ### check for /bin/tar ###
553 return $self->_error(loc("No '%1' program found", '/bin/tar'))
554 unless $self->bin_tar;
555
556 ### check for /bin/gzip if we need it ###
557 return $self->_error(loc("No '%1' program found", '/bin/gzip'))
558 if $self->is_tgz && !$self->bin_gzip;
559
560 return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
561 if $self->is_tbz && !$self->bin_bunzip2;
562
563 ### XXX figure out how to make IPC::Run do this in one call --
564 ### currently i don't know how to get output of a command after a pipe
565 ### trapped in a scalar. Mailed barries about this 5th of june 2004.
566
567
568
569 ### see what command we should run, based on whether
570 ### it's a .tgz or .tar
571
572 ### XXX solaris tar and bsdtar are having different outputs
573 ### depending whether you run with -x or -t
574 ### compensate for this insanity by running -t first, then -x
575 { my $cmd =
576 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
577 $self->bin_tar, '-tf', '-'] :
9e5a0ef9 578 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
520c99e2 579 $self->bin_tar, '-tf', '-'] :
580 [$self->bin_tar, '-tf', $self->archive];
581
582 ### run the command ###
583 my $buffer = '';
584 unless( scalar run( command => $cmd,
585 buffer => \$buffer,
586 verbose => $DEBUG )
587 ) {
588 return $self->_error(loc(
589 "Error listing contents of archive '%1': %2",
590 $self->archive, $buffer ));
591 }
592
593 ### no buffers available?
594 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
595 $self->_error( $self->_no_buffer_files( $self->archive ) );
596
597 } else {
598 ### if we're on solaris we /might/ be using /bin/tar, which has
599 ### a weird output format... we might also be using
600 ### /usr/local/bin/tar, which is gnu tar, which is perfectly
601 ### fine... so we have to do some guessing here =/
602 my @files = map { chomp;
603 !ON_SOLARIS ? $_
604 : (m|^ x \s+ # 'xtract' -- sigh
605 (.+?), # the actual file name
606 \s+ [\d,.]+ \s bytes,
607 \s+ [\d,.]+ \s tape \s blocks
608 |x ? $1 : $_);
609
610 } split $/, $buffer;
611
612 ### store the files that are in the archive ###
613 $self->files(\@files);
614 }
615 }
616
617 ### now actually extract it ###
618 { my $cmd =
619 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
620 $self->bin_tar, '-xf', '-'] :
9e5a0ef9 621 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
520c99e2 622 $self->bin_tar, '-xf', '-'] :
623 [$self->bin_tar, '-xf', $self->archive];
624
625 my $buffer = '';
626 unless( scalar run( command => $cmd,
627 buffer => \$buffer,
628 verbose => $DEBUG )
629 ) {
630 return $self->_error(loc("Error extracting archive '%1': %2",
631 $self->archive, $buffer ));
632 }
633
634 ### we might not have them, due to lack of buffers
635 if( $self->files ) {
636 ### now that we've extracted, figure out where we extracted to
637 my $dir = $self->__get_extract_dir( $self->files );
638
639 ### store the extraction dir ###
640 $self->extract_path( $dir );
641 }
642 }
643
644 ### we got here, no error happened
645 return 1;
646}
647
648### use archive::tar to extract ###
649sub _untar_at {
650 my $self = shift;
651
652 ### we definitely need A::T, so load that first
653 { my $use_list = { 'Archive::Tar' => '0.0' };
654
655 unless( can_load( modules => $use_list ) ) {
656
657 return $self->_error(loc("You do not have '%1' installed - " .
658 "Please install it as soon as possible.",
659 'Archive::Tar'));
660 }
661 }
662
663 ### we might pass it a filehandle if it's a .tbz file..
664 my $fh_to_read = $self->archive;
665
666 ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
667 ### if A::T's version is 0.99 or higher
668 if( $self->is_tgz ) {
669 my $use_list = { 'Compress::Zlib' => '0.0' };
670 $use_list->{ 'IO::Zlib' } = '0.0'
671 if $Archive::Tar::VERSION >= '0.99';
672
673 unless( can_load( modules => $use_list ) ) {
674 my $which = join '/', sort keys %$use_list;
675
676 return $self->_error(loc(
677 "You do not have '%1' installed - Please ".
678 "install it as soon as possible.", $which));
679
680 }
681 } elsif ( $self->is_tbz ) {
682 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
683 unless( can_load( modules => $use_list ) ) {
684 return $self->_error(loc(
685 "You do not have '%1' installed - Please " .
686 "install it as soon as possible.",
687 'IO::Uncompress::Bunzip2'));
688 }
689
690 my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
691 return $self->_error(loc("Unable to open '%1': %2",
692 $self->archive,
693 $IO::Uncompress::Bunzip2::Bunzip2Error));
694
695 $fh_to_read = $bz;
696 }
697
698 my $tar = Archive::Tar->new();
699
700 ### only tell it it's compressed if it's a .tgz, as we give it a file
701 ### handle if it's a .tbz
702 unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) {
703 return $self->_error(loc("Unable to read '%1': %2", $self->archive,
704 $Archive::Tar::error));
705 }
706
707 ### workaround to prevent Archive::Tar from setting uid, which
708 ### is a potential security hole. -autrijus
709 ### have to do it here, since A::T needs to be /loaded/ first ###
710 { no strict 'refs'; local $^W;
711
712 ### older versions of archive::tar <= 0.23
713 *Archive::Tar::chown = sub {};
714 }
715
716 ### for version of archive::tar > 1.04
717 local $Archive::Tar::Constant::CHOWN = 0;
718
719 { local $^W; # quell 'splice() offset past end of array' warnings
720 # on older versions of A::T
721
722 ### older archive::tar always returns $self, return value slightly
723 ### fux0r3d because of it.
724 $tar->extract()
725 or return $self->_error(loc("Unable to extract '%1': %2",
726 $self->archive, $Archive::Tar::error ));
727 }
728
729 my @files = $tar->list_files;
730 my $dir = $self->__get_extract_dir( \@files );
731
732 ### store the files that are in the archive ###
733 $self->files(\@files);
734
735 ### store the extraction dir ###
736 $self->extract_path( $dir );
737
738 ### check if the dir actually appeared ###
739 return 1 if -d $self->extract_path;
740
741 ### no dir, we failed ###
742 return $self->_error(loc("Unable to extract '%1': %2",
743 $self->archive, $Archive::Tar::error ));
744}
745
746#################################
747#
748# Gunzip code
749#
750#################################
751
752### gunzip wrapper... goes to either Compress::Zlib or /bin/gzip
753### depending on $PREFER_BIN
754sub _gunzip {
755 my $self = shift;
756
757 my @methods = qw[_gunzip_cz _gunzip_bin];
758 @methods = reverse @methods if $PREFER_BIN;
759
760 for my $method (@methods) {
761 $self->_extractor($method) && return 1 if $self->$method();
762 }
763
764 return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
765}
766
767sub _gunzip_bin {
768 my $self = shift;
769
770 ### check for /bin/gzip -- we need it ###
771 return $self->_error(loc("No '%1' program found", '/bin/gzip'))
772 unless $self->bin_gzip;
773
774
775 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
776 return $self->_error(loc("Could not open '%1' for writing: %2",
777 $self->_gunzip_to, $! ));
778
779 my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
780
781 my $buffer;
782 unless( scalar run( command => $cmd,
783 verbose => $DEBUG,
784 buffer => \$buffer )
785 ) {
786 return $self->_error(loc("Unable to gunzip '%1': %2",
787 $self->archive, $buffer));
788 }
789
790 ### no buffers available?
791 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
792 $self->_error( $self->_no_buffer_content( $self->archive ) );
793 }
794
795 print $fh $buffer if defined $buffer;
796
797 close $fh;
798
799 ### set what files where extract, and where they went ###
800 $self->files( [$self->_gunzip_to] );
801 $self->extract_path( File::Spec->rel2abs(cwd()) );
802
803 return 1;
804}
805
806sub _gunzip_cz {
807 my $self = shift;
808
809 my $use_list = { 'Compress::Zlib' => '0.0' };
810 unless( can_load( modules => $use_list ) ) {
811 return $self->_error(loc("You do not have '%1' installed - Please " .
812 "install it as soon as possible.", 'Compress::Zlib'));
813 }
814
815 my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
816 return $self->_error(loc("Unable to open '%1': %2",
817 $self->archive, $Compress::Zlib::gzerrno));
818
819 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
820 return $self->_error(loc("Could not open '%1' for writing: %2",
821 $self->_gunzip_to, $! ));
822
823 my $buffer;
824 $fh->print($buffer) while $gz->gzread($buffer) > 0;
825 $fh->close;
826
827 ### set what files where extract, and where they went ###
828 $self->files( [$self->_gunzip_to] );
829 $self->extract_path( File::Spec->rel2abs(cwd()) );
830
831 return 1;
832}
833
834#################################
835#
1dae2fb5 836# Uncompress code
837#
838#################################
839
840
841### untar wrapper... goes to either Archive::Tar or /bin/tar
842### depending on $PREFER_BIN
843sub _uncompress {
844 my $self = shift;
845
846 my @methods = qw[_gunzip_cz _uncompress_bin];
847 @methods = reverse @methods if $PREFER_BIN;
848
849 for my $method (@methods) {
850 $self->_extractor($method) && return 1 if $self->$method();
851 }
852
853 return $self->_error(loc("Unable to untar file '%1'", $self->archive));
854}
855
856sub _uncompress_bin {
857 my $self = shift;
858
859 ### check for /bin/gzip -- we need it ###
860 return $self->_error(loc("No '%1' program found", '/bin/uncompress'))
861 unless $self->bin_uncompress;
862
863
864 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
865 return $self->_error(loc("Could not open '%1' for writing: %2",
866 $self->_gunzip_to, $! ));
867
868 my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
869
870 my $buffer;
871 unless( scalar run( command => $cmd,
872 verbose => $DEBUG,
873 buffer => \$buffer )
874 ) {
875 return $self->_error(loc("Unable to uncompress '%1': %2",
876 $self->archive, $buffer));
877 }
878
879 ### no buffers available?
880 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
881 $self->_error( $self->_no_buffer_content( $self->archive ) );
882 }
883
884 print $fh $buffer if defined $buffer;
885
886 close $fh;
887
888 ### set what files where extract, and where they went ###
889 $self->files( [$self->_gunzip_to] );
890 $self->extract_path( File::Spec->rel2abs(cwd()) );
891
892 return 1;
893}
894
895
896#################################
897#
520c99e2 898# Unzip code
899#
900#################################
901
902### unzip wrapper... goes to either Archive::Zip or /bin/unzip
903### depending on $PREFER_BIN
904sub _unzip {
905 my $self = shift;
906
907 my @methods = qw[_unzip_az _unzip_bin];
908 @methods = reverse @methods if $PREFER_BIN;
909
910 for my $method (@methods) {
911 $self->_extractor($method) && return 1 if $self->$method();
912 }
913
914 return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
915}
916
917sub _unzip_bin {
918 my $self = shift;
919
920 ### check for /bin/gzip if we need it ###
921 return $self->_error(loc("No '%1' program found", '/bin/unzip'))
922 unless $self->bin_unzip;
923
924
925 ### first, get the files.. it must be 2 different commands with 'unzip' :(
9e5a0ef9 926 { ### on VMS, capital letter options have to be quoted. This is
927 ### peported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
928 ### Subject: [patch@31735]Archive Extract fix on VMS.
929 my $opt = ON_VMS ? '"-Z"' : '-Z';
930 my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
931
520c99e2 932 my $buffer = '';
933 unless( scalar run( command => $cmd,
934 verbose => $DEBUG,
935 buffer => \$buffer )
936 ) {
937 return $self->_error(loc("Unable to unzip '%1': %2",
938 $self->archive, $buffer));
939 }
940
941 ### no buffers available?
942 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
943 $self->_error( $self->_no_buffer_files( $self->archive ) );
944
945 } else {
946 $self->files( [split $/, $buffer] );
947 }
948 }
949
950 ### now, extract the archive ###
951 { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
952
953 my $buffer;
954 unless( scalar run( command => $cmd,
955 verbose => $DEBUG,
956 buffer => \$buffer )
957 ) {
958 return $self->_error(loc("Unable to unzip '%1': %2",
959 $self->archive, $buffer));
960 }
961
962 if( scalar @{$self->files} ) {
963 my $files = $self->files;
964 my $dir = $self->__get_extract_dir( $files );
965
966 $self->extract_path( $dir );
967 }
968 }
969
970 return 1;
971}
972
973sub _unzip_az {
974 my $self = shift;
975
976 my $use_list = { 'Archive::Zip' => '0.0' };
977 unless( can_load( modules => $use_list ) ) {
978 return $self->_error(loc("You do not have '%1' installed - Please " .
979 "install it as soon as possible.", 'Archive::Zip'));
980 }
981
982 my $zip = Archive::Zip->new();
983
984 unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
985 return $self->_error(loc("Unable to read '%1'", $self->archive));
986 }
987
988 my @files;
989 ### have to extract every memeber individually ###
990 for my $member ($zip->members) {
991 push @files, $member->{fileName};
992
993 unless( $zip->extractMember($member) == &Archive::Zip::AZ_OK ) {
994 return $self->_error(loc("Extraction of '%1' from '%2' failed",
995 $member->{fileName}, $self->archive ));
996 }
997 }
998
999 my $dir = $self->__get_extract_dir( \@files );
1000
1001 ### set what files where extract, and where they went ###
1002 $self->files( \@files );
1003 $self->extract_path( File::Spec->rel2abs($dir) );
1004
1005 return 1;
1006}
1007
1008sub __get_extract_dir {
1009 my $self = shift;
1010 my $files = shift || [];
1011
1012 return unless scalar @$files;
1013
1014 my($dir1, $dir2);
1015 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
1016 my($dir,$pos) = @$aref;
1017
1018 ### add a catdir(), so that any trailing slashes get
1019 ### take care of (removed)
1020 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
1021 ### which was the problem in bug #23999
1022 my $res = -d $files->[$pos]
1023 ? File::Spec->catdir( $files->[$pos], '' )
1024 : File::Spec->catdir( dirname( $files->[$pos] ) );
1025
1026 $$dir = $res;
1027 }
1028
1029 ### if the first and last dir don't match, make sure the
1030 ### dirname is not set wrongly
1031 my $dir;
1032
1033 ### dirs are the same, so we know for sure what the extract dir is
1034 if( $dir1 eq $dir2 ) {
1035 $dir = $dir1;
1036
1037 ### dirs are different.. do they share the base dir?
1038 ### if so, use that, if not, fall back to '.'
1039 } else {
1040 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1041 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
1042
1043 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1044 }
1045
1046 return File::Spec->rel2abs( $dir );
1047}
1048
1049#################################
1050#
1051# Bunzip2 code
1052#
1053#################################
1054
1055### bunzip2 wrapper...
1056sub _bunzip2 {
1057 my $self = shift;
1058
1059 my @methods = qw[_bunzip2_cz2 _bunzip2_bin];
1060 @methods = reverse @methods if $PREFER_BIN;
1061
1062 for my $method (@methods) {
1063 $self->_extractor($method) && return 1 if $self->$method();
1064 }
1065
1066 return $self->_error(loc("Unable to bunzip2 file '%1'", $self->archive));
1067}
1068
1069sub _bunzip2_bin {
1070 my $self = shift;
1071
1072 ### check for /bin/gzip -- we need it ###
1073 return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
1074 unless $self->bin_bunzip2;
1075
1076
1077 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1078 return $self->_error(loc("Could not open '%1' for writing: %2",
1079 $self->_gunzip_to, $! ));
9e5a0ef9 1080
1081 ### guard against broken bunzip2. See ->have_old_bunzip2()
1082 ### for details
1083 if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1084 return $self->_error(loc("Your bunzip2 version is too old and ".
1085 "can only extract files ending in '%1'",
1086 '.bz2'));
1087 }
520c99e2 1088
9e5a0ef9 1089 my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
520c99e2 1090
1091 my $buffer;
1092 unless( scalar run( command => $cmd,
1093 verbose => $DEBUG,
1094 buffer => \$buffer )
1095 ) {
1096 return $self->_error(loc("Unable to bunzip2 '%1': %2",
1097 $self->archive, $buffer));
1098 }
1099
1100 ### no buffers available?
1101 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1102 $self->_error( $self->_no_buffer_content( $self->archive ) );
1103 }
1104
1105 print $fh $buffer if defined $buffer;
1106
1107 close $fh;
1108
1109 ### set what files where extract, and where they went ###
1110 $self->files( [$self->_gunzip_to] );
1111 $self->extract_path( File::Spec->rel2abs(cwd()) );
1112
1113 return 1;
1114}
1115
1116### using cz2, the compact versions... this we use mainly in archive::tar
1117### extractor..
1118# sub _bunzip2_cz1 {
1119# my $self = shift;
1120#
1121# my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1122# unless( can_load( modules => $use_list ) ) {
1123# return $self->_error(loc("You do not have '%1' installed - Please " .
1124# "install it as soon as possible.",
1125# 'IO::Uncompress::Bunzip2'));
1126# }
1127#
1128# my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1129# return $self->_error(loc("Unable to open '%1': %2",
1130# $self->archive,
1131# $IO::Uncompress::Bunzip2::Bunzip2Error));
1132#
1133# my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1134# return $self->_error(loc("Could not open '%1' for writing: %2",
1135# $self->_gunzip_to, $! ));
1136#
1137# my $buffer;
1138# $fh->print($buffer) while $bz->read($buffer) > 0;
1139# $fh->close;
1140#
1141# ### set what files where extract, and where they went ###
1142# $self->files( [$self->_gunzip_to] );
1143# $self->extract_path( File::Spec->rel2abs(cwd()) );
1144#
1145# return 1;
1146# }
1147
1148sub _bunzip2_cz2 {
1149 my $self = shift;
1150
1151 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1152 unless( can_load( modules => $use_list ) ) {
1153 return $self->_error(loc("You do not have '%1' installed - Please " .
1154 "install it as soon as possible.",
1155 'IO::Uncompress::Bunzip2'));
1156 }
1157
1158 IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1159 or return $self->_error(loc("Unable to uncompress '%1': %2",
1160 $self->archive,
1161 $IO::Uncompress::Bunzip2::Bunzip2Error));
1162
1163 ### set what files where extract, and where they went ###
1164 $self->files( [$self->_gunzip_to] );
1165 $self->extract_path( File::Spec->rel2abs(cwd()) );
1166
1167 return 1;
1168}
1169
1170
1171#################################
1172#
8d2ac73b 1173# unlzma code
1174#
1175#################################
1176
1177### unlzma wrapper... goes to either Compress::unLZMA or /bin/unlzma
1178### depending on $PREFER_BIN
1179sub _unlzma {
1180 my $self = shift;
1181
1182 my @methods = qw[_unlzma_cz _unlzma_bin];
1183 @methods = reverse @methods if $PREFER_BIN;
1184
1185 for my $method (@methods) {
1186 $self->_extractor($method) && return 1 if $self->$method();
1187 }
1188
1189 return $self->_error(loc("Unable to unlzma file '%1'", $self->archive));
1190}
1191
1192sub _unlzma_bin {
1193 my $self = shift;
1194
1195 ### check for /bin/unlzma -- we need it ###
1196 return $self->_error(loc("No '%1' program found", '/bin/unlzma'))
1197 unless $self->bin_unlzma;
1198
1199 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1200 return $self->_error(loc("Could not open '%1' for writing: %2",
1201 $self->_gunzip_to, $! ));
1202
1203 my $cmd = [ $self->bin_unlzma, '-c', $self->archive ];
1204
1205 my $buffer;
1206 unless( scalar run( command => $cmd,
1207 verbose => $DEBUG,
1208 buffer => \$buffer )
1209 ) {
1210 return $self->_error(loc("Unable to unlzma '%1': %2",
1211 $self->archive, $buffer));
1212 }
1213
1214 ### no buffers available?
1215 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1216 $self->_error( $self->_no_buffer_content( $self->archive ) );
1217 }
1218
1219 print $fh $buffer if defined $buffer;
1220
1221 close $fh;
1222
1223 ### set what files where extract, and where they went ###
1224 $self->files( [$self->_gunzip_to] );
1225 $self->extract_path( File::Spec->rel2abs(cwd()) );
1226
1227 return 1;
1228}
1229
1230sub _unlzma_cz {
1231 my $self = shift;
1232
1233 my $use_list = { 'Compress::unLZMA' => '0.0' };
1234 unless( can_load( modules => $use_list ) ) {
1235 return $self->_error(loc("You do not have '%1' installed - Please " .
1236 "install it as soon as possible.", 'Compress::unLZMA'));
1237 }
1238
1239 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1240 return $self->_error(loc("Could not open '%1' for writing: %2",
1241 $self->_gunzip_to, $! ));
1242
1243 my $buffer;
1244 $buffer = Compress::unLZMA::uncompressfile( $self->archive );
1245 unless ( defined $buffer ) {
1246 return $self->_error(loc("Could not unlzma '%1': %2",
1247 $self->archive, $@));
1248 }
1249
1250 print $fh $buffer if defined $buffer;
1251
1252 close $fh;
1253
1254 ### set what files where extract, and where they went ###
1255 $self->files( [$self->_gunzip_to] );
1256 $self->extract_path( File::Spec->rel2abs(cwd()) );
1257
1258 return 1;
1259}
1260
1261#################################
1262#
520c99e2 1263# Error code
1264#
1265#################################
1266
1267sub _error {
1268 my $self = shift;
1269 my $error = shift;
1270
1271 $self->_error_msg( $error );
1272 $self->_error_msg_long( Carp::longmess($error) );
1273
1274 ### set $Archive::Extract::WARN to 0 to disable printing
1275 ### of errors
1276 if( $WARN ) {
1277 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1278 }
1279
1280 return;
1281}
1282
1283sub error {
1284 my $self = shift;
1285 return shift() ? $self->_error_msg_long : $self->_error_msg;
1286}
1287
1288sub _no_buffer_files {
1289 my $self = shift;
1290 my $file = shift or return;
1291 return loc("No buffer captured, unable to tell ".
1292 "extracted files or extraction dir for '%1'", $file);
1293}
1294
1295sub _no_buffer_content {
1296 my $self = shift;
1297 my $file = shift or return;
1298 return loc("No buffer captured, unable to get content for '%1'", $file);
1299}
13001;
1301
1302=pod
1303
1304=head1 HOW IT WORKS
1305
1306C<Archive::Extract> tries first to determine what type of archive you
1307are passing it, by inspecting its suffix. It does not do this by using
1308Mime magic, or something related. See C<CAVEATS> below.
1309
1310Once it has determined the file type, it knows which extraction methods
1311it can use on the archive. It will try a perl solution first, then fall
1312back to a commandline tool if that fails. If that also fails, it will
1313return false, indicating it was unable to extract the archive.
1314See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1315
1316=head1 CAVEATS
1317
1318=head2 File Extensions
1319
1320C<Archive::Extract> trusts on the extension of the archive to determine
1321what type it is, and what extractor methods therefore can be used. If
1322your archives do not have any of the extensions as described in the
1323C<new()> method, you will have to specify the type explicitly, or
1324C<Archive::Extract> will not be able to extract the archive for you.
1325
9e5a0ef9 1326=head2 Supporting Very Large Files
1327
1328C<Archive::Extract> can use either pure perl modules or command line
1329programs under the hood. Some of the pure perl modules (like
8d2ac73b 1330C<Archive::Tar> and Compress::unLZMA) take the entire contents of the archive into memory,
9e5a0ef9 1331which may not be feasible on your system. Consider setting the global
1332variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1333the use of command line programs and won't consume so much memory.
1334
1335See the C<GLOBAL VARIABLES> section below for details.
1336
1337=head2 Bunzip2 support of arbitrary extensions.
1338
1339Older versions of C</bin/bunzip2> do not support arbitrary file
1340extensions and insist on a C<.bz2> suffix. Although we do our best
1341to guard against this, if you experience a bunzip2 error, it may
1342be related to this. For details, please see the C<have_old_bunzip2>
1343method.
1344
520c99e2 1345=head1 GLOBAL VARIABLES
1346
1347=head2 $Archive::Extract::DEBUG
1348
1349Set this variable to C<true> to have all calls to command line tools
1350be printed out, including all their output.
1351This also enables C<Carp::longmess> errors, instead of the regular
1352C<carp> errors.
1353
1354Good for tracking down why things don't work with your particular
1355setup.
1356
1357Defaults to C<false>.
1358
1359=head2 $Archive::Extract::WARN
1360
1361This variable controls whether errors encountered internally by
1362C<Archive::Extract> should be C<carp>'d or not.
1363
1364Set to false to silence warnings. Inspect the output of the C<error()>
1365method manually to see what went wrong.
1366
1367Defaults to C<true>.
1368
1369=head2 $Archive::Extract::PREFER_BIN
1370
1371This variables controls whether C<Archive::Extract> should prefer the
1372use of perl modules, or commandline tools to extract archives.
1373
1374Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1375
1376Defaults to C<false>.
1377
1378=head1 TODO
1379
1380=over 4
1381
1382=item Mime magic support
1383
1384Maybe this module should use something like C<File::Type> to determine
1385the type, rather than blindly trust the suffix.
1386
1dae2fb5 1387=back
1388
574b415d 1389=head1 BUG REPORTS
520c99e2 1390
574b415d 1391Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>.
520c99e2 1392
574b415d 1393=head1 AUTHOR
1394
1395This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
520c99e2 1396
574b415d 1397=head1 COPYRIGHT
520c99e2 1398
574b415d 1399This library is free software; you may redistribute and/or modify it
1400under the same terms as Perl itself.
520c99e2 1401
1402=cut
1403
1404# Local variables:
1405# c-indentation-style: bsd
1406# c-basic-offset: 4
1407# indent-tabs-mode: nil
1408# End:
1409# vim: expandtab shiftwidth=4:
1410