Update to Archive::Extract 0.20, and re-apply patch #31158
[p5sagit/p5-mst-13.2.git] / lib / Archive / Extract.pm
CommitLineData
520c99e2 1package Archive::Extract;
2
3use strict;
4
5use Cwd qw[cwd];
6use Carp qw[carp];
7use IPC::Cmd qw[run can_run];
8use FileHandle;
9use File::Path qw[mkpath];
10use File::Spec;
11use File::Basename qw[dirname basename];
12use Params::Check qw[check];
13use Module::Load::Conditional qw[can_load check_install];
14use Locale::Maketext::Simple Style => 'gettext';
15
16### solaris has silly /bin/tar output ###
17use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
18use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
19
20### If these are changed, update @TYPES and the new() POD
21use constant TGZ => 'tgz';
22use constant TAR => 'tar';
23use constant GZ => 'gz';
24use constant ZIP => 'zip';
25use constant BZ2 => 'bz2';
26use constant TBZ => 'tbz';
1dae2fb5 27use constant Z => 'Z';
520c99e2 28
29use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG];
30
1dae2fb5 31$VERSION = '0.20';
520c99e2 32$PREFER_BIN = 0;
33$WARN = 1;
34$DEBUG = 0;
1dae2fb5 35my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z ); # same as all constants
520c99e2 36
37local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
38
39=pod
40
41=head1 NAME
42
43Archive::Extract - A generic archive extracting mechanism
44
45=head1 SYNOPSIS
46
47 use Archive::Extract;
48
49 ### build an Archive::Extract object ###
50 my $ae = Archive::Extract->new( archive => 'foo.tgz' );
51
52 ### extract to cwd() ###
53 my $ok = $ae->extract;
54
55 ### extract to /tmp ###
56 my $ok = $ae->extract( to => '/tmp' );
57
58 ### what if something went wrong?
59 my $ok = $ae->extract or die $ae->error;
60
61 ### files from the archive ###
62 my $files = $ae->files;
63
64 ### dir that was extracted to ###
65 my $outdir = $ae->extract_path;
66
67
68 ### quick check methods ###
69 $ae->is_tar # is it a .tar file?
70 $ae->is_tgz # is it a .tar.gz or .tgz file?
71 $ae->is_gz; # is it a .gz file?
72 $ae->is_zip; # is it a .zip file?
73 $ae->is_bz2; # is it a .bz2 file?
74 $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
75
76 ### absolute path to the archive you provided ###
77 $ae->archive;
78
79 ### commandline tools, if found ###
80 $ae->bin_tar # path to /bin/tar, if found
81 $ae->bin_gzip # path to /bin/gzip, if found
82 $ae->bin_unzip # path to /bin/unzip, if found
83 $ae->bin_bunzip2 # path to /bin/bunzip2 if found
84
85=head1 DESCRIPTION
86
87Archive::Extract is a generic archive extraction mechanism.
88
89It allows you to extract any archive file of the type .tar, .tar.gz,
1dae2fb5 90.gz, .Z, tar.bz2, .tbz, .bz2 or .zip without having to worry how it
91does so, or use different interfaces for each type by using either
92perl modules, or commandline tools on your system.
520c99e2 93
94See the C<HOW IT WORKS> section further down for details.
95
96=cut
97
98
99### see what /bin/programs are available ###
100$PROGRAMS = {};
1dae2fb5 101for my $pgm (qw[tar unzip gzip bunzip2 uncompress]) {
520c99e2 102 $PROGRAMS->{$pgm} = can_run($pgm);
103}
104
105### mapping from types to extractor methods ###
106my $Mapping = {
107 is_tgz => '_untar',
108 is_tar => '_untar',
109 is_gz => '_gunzip',
110 is_zip => '_unzip',
111 is_tbz => '_untar',
112 is_bz2 => '_bunzip2',
1dae2fb5 113 is_Z => '_uncompress',
520c99e2 114};
115
116{
117 my $tmpl = {
118 archive => { required => 1, allow => FILE_EXISTS },
119 type => { default => '', allow => [ @Types ] },
120 };
121
122 ### build accesssors ###
123 for my $method( keys %$tmpl,
124 qw[_extractor _gunzip_to files extract_path],
125 qw[_error_msg _error_msg_long]
126 ) {
127 no strict 'refs';
128 *$method = sub {
129 my $self = shift;
130 $self->{$method} = $_[0] if @_;
131 return $self->{$method};
132 }
133 }
134
135=head1 METHODS
136
137=head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
138
139Creates a new C<Archive::Extract> object based on the archive file you
140passed it. Automatically determines the type of archive based on the
141extension, but you can override that by explicitly providing the
142C<type> argument.
143
144Valid values for C<type> are:
145
146=over 4
147
148=item tar
149
150Standard tar files, as produced by, for example, C</bin/tar>.
151Corresponds to a C<.tar> suffix.
152
153=item tgz
154
155Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
156Corresponds to a C<.tgz> or C<.tar.gz> suffix.
157
158=item gz
159
160Gzip compressed file, as produced by, for example C</bin/gzip>.
161Corresponds to a C<.gz> suffix.
162
1dae2fb5 163=item Z
164
165Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
166Corresponds to a C<.Z> suffix.
167
520c99e2 168=item zip
169
170Zip compressed file, as produced by, for example C</bin/zip>.
171Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
172
173=item bz2
174
175Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
176Corresponds to a C<.bz2> suffix.
177
178=item tbz
179
180Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
181Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
182
183=back
184
185Returns a C<Archive::Extract> object on success, or false on failure.
186
187=cut
188
189 ### constructor ###
190 sub new {
191 my $class = shift;
192 my %hash = @_;
193
194 my $parsed = check( $tmpl, \%hash ) or return;
195
196 ### make sure we have an absolute path ###
197 my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
198
199 ### figure out the type, if it wasn't already specified ###
200 unless ( $parsed->{type} ) {
201 $parsed->{type} =
574b415d 202 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ :
520c99e2 203 $ar =~ /.+?\.gz$/i ? GZ :
204 $ar =~ /.+?\.tar$/i ? TAR :
205 $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP :
206 $ar =~ /.+?\.(?:tbz|tar\.bz2?)$/i ? TBZ :
207 $ar =~ /.+?\.bz2$/i ? BZ2 :
1dae2fb5 208 $ar =~ /.+?\.Z$/ ? Z :
520c99e2 209 '';
210
211 }
212
213 ### don't know what type of file it is ###
214 return __PACKAGE__->_error(loc("Cannot determine file type for '%1'",
215 $parsed->{archive} )) unless $parsed->{type};
216
217 return bless $parsed, $class;
218 }
219}
220
221=head2 $ae->extract( [to => '/output/path'] )
222
223Extracts the archive represented by the C<Archive::Extract> object to
224the path of your choice as specified by the C<to> argument. Defaults to
225C<cwd()>.
226
227Since C<.gz> files never hold a directory, but only a single file; if
228the C<to> argument is an existing directory, the file is extracted
229there, with it's C<.gz> suffix stripped.
230If the C<to> argument is not an existing directory, the C<to> argument
231is understood to be a filename, if the archive type is C<gz>.
232In the case that you did not specify a C<to> argument, the output
233file will be the name of the archive file, stripped from it's C<.gz>
234suffix, in the current working directory.
235
236C<extract> will try a pure perl solution first, and then fall back to
237commandline tools if they are available. See the C<GLOBAL VARIABLES>
238section below on how to alter this behaviour.
239
240It will return true on success, and false on failure.
241
242On success, it will also set the follow attributes in the object:
243
244=over 4
245
246=item $ae->extract_path
247
248This is the directory that the files where extracted to.
249
250=item $ae->files
251
252This is an array ref with the paths of all the files in the archive,
253relative to the C<to> argument you specified.
254To get the full path to an extracted file, you would use:
255
256 File::Spec->catfile( $to, $ae->files->[0] );
257
258Note that all files from a tar archive will be in unix format, as per
259the tar specification.
260
261=back
262
263=cut
264
265sub extract {
266 my $self = shift;
267 my %hash = @_;
268
269 my $to;
270 my $tmpl = {
271 to => { default => '.', store => \$to }
272 };
273
274 check( $tmpl, \%hash ) or return;
275
276 ### so 'to' could be a file or a dir, depending on whether it's a .gz
277 ### file, or basically anything else.
278 ### so, check that, then act accordingly.
279 ### set an accessor specifically so _gunzip can know what file to extract
280 ### to.
281 my $dir;
282 { ### a foo.gz file
1dae2fb5 283 if( $self->is_gz or $self->is_bz2 or $self->is_Z) {
520c99e2 284
1dae2fb5 285 my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2|Z)$//i;
520c99e2 286
287 ### to is a dir?
288 if ( -d $to ) {
289 $dir = $to;
290 $self->_gunzip_to( basename($cp) );
291
292 ### then it's a filename
293 } else {
294 $dir = dirname($to);
295 $self->_gunzip_to( basename($to) );
296 }
297
298 ### not a foo.gz file
299 } else {
300 $dir = $to;
301 }
302 }
303
304 ### make the dir if it doesn't exist ###
305 unless( -d $dir ) {
306 eval { mkpath( $dir ) };
307
308 return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
309 if $@;
310 }
311
312 ### get the current dir, to restore later ###
313 my $cwd = cwd();
314
315 my $ok = 1;
316 EXTRACT: {
317
318 ### chdir to the target dir ###
319 unless( chdir $dir ) {
320 $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
321 $ok = 0; last EXTRACT;
322 }
323
324 ### set files to an empty array ref, so there's always an array
325 ### ref IN the accessor, to avoid errors like:
326 ### Can't use an undefined value as an ARRAY reference at
327 ### ../lib/Archive/Extract.pm line 742. (rt #19815)
328 $self->files( [] );
329
330 ### find what extractor method to use ###
331 while( my($type,$method) = each %$Mapping ) {
332
333 ### call the corresponding method if the type is OK ###
334 if( $self->$type) {
335 $ok = $self->$method();
336 }
337 }
338
339 ### warn something went wrong if we didn't get an OK ###
340 $self->_error(loc("Extract failed, no extractor found"))
341 unless $ok;
342
343 }
344
345 ### and chdir back ###
346 unless( chdir $cwd ) {
347 $self->_error(loc("Could not chdir back to start dir '%1': %2'",
348 $cwd, $!));
349 }
350
351 return $ok;
352}
353
354=pod
355
356=head1 ACCESSORS
357
358=head2 $ae->error([BOOL])
359
360Returns the last encountered error as string.
361Pass it a true value to get the C<Carp::longmess()> output instead.
362
363=head2 $ae->extract_path
364
365This is the directory the archive got extracted to.
366See C<extract()> for details.
367
368=head2 $ae->files
369
370This is an array ref holding all the paths from the archive.
371See C<extract()> for details.
372
373=head2 $ae->archive
374
375This is the full path to the archive file represented by this
376C<Archive::Extract> object.
377
378=head2 $ae->type
379
380This is the type of archive represented by this C<Archive::Extract>
381object. See accessors below for an easier way to use this.
382See the C<new()> method for details.
383
384=head2 $ae->types
385
386Returns a list of all known C<types> for C<Archive::Extract>'s
387C<new> method.
388
389=cut
390
391sub types { return @Types }
392
393=head2 $ae->is_tgz
394
395Returns true if the file is of type C<.tar.gz>.
396See the C<new()> method for details.
397
398=head2 $ae->is_tar
399
400Returns true if the file is of type C<.tar>.
401See the C<new()> method for details.
402
403=head2 $ae->is_gz
404
405Returns true if the file is of type C<.gz>.
406See the C<new()> method for details.
407
1dae2fb5 408=head2 $ae->is_Z
409
410Returns true if the file is of type C<.Z>.
411See the C<new()> method for details.
412
520c99e2 413=head2 $ae->is_zip
414
415Returns true if the file is of type C<.zip>.
416See the C<new()> method for details.
417
418=cut
419
420### quick check methods ###
421sub is_tgz { return $_[0]->type eq TGZ }
422sub is_tar { return $_[0]->type eq TAR }
423sub is_gz { return $_[0]->type eq GZ }
424sub is_zip { return $_[0]->type eq ZIP }
425sub is_tbz { return $_[0]->type eq TBZ }
426sub is_bz2 { return $_[0]->type eq BZ2 }
1dae2fb5 427sub is_Z { return $_[0]->type eq Z }
520c99e2 428
429=pod
430
431=head2 $ae->bin_tar
432
433Returns the full path to your tar binary, if found.
434
435=head2 $ae->bin_gzip
436
437Returns the full path to your gzip binary, if found
438
439=head2 $ae->bin_unzip
440
441Returns the full path to your unzip binary, if found
442
443=cut
444
445### paths to commandline tools ###
1dae2fb5 446sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} }
447sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} }
448sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} }
449sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
450sub bin_uncompress { return $PROGRAMS->{'uncompress'}
451 if $PROGRAMS->{'uncompress'} }
520c99e2 452
453#################################
454#
455# Untar code
456#
457#################################
458
459
460### untar wrapper... goes to either Archive::Tar or /bin/tar
461### depending on $PREFER_BIN
462sub _untar {
463 my $self = shift;
464
465 ### bzip2 support in A::T via IO::Uncompress::Bzip2
466 my @methods = qw[_untar_at _untar_bin];
574b415d 467 @methods = reverse @methods if $PREFER_BIN;
520c99e2 468
469 for my $method (@methods) {
470 $self->_extractor($method) && return 1 if $self->$method();
471 }
472
473 return $self->_error(loc("Unable to untar file '%1'", $self->archive));
474}
475
476### use /bin/tar to extract ###
477sub _untar_bin {
478 my $self = shift;
479
480 ### check for /bin/tar ###
481 return $self->_error(loc("No '%1' program found", '/bin/tar'))
482 unless $self->bin_tar;
483
484 ### check for /bin/gzip if we need it ###
485 return $self->_error(loc("No '%1' program found", '/bin/gzip'))
486 if $self->is_tgz && !$self->bin_gzip;
487
488 return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
489 if $self->is_tbz && !$self->bin_bunzip2;
490
491 ### XXX figure out how to make IPC::Run do this in one call --
492 ### currently i don't know how to get output of a command after a pipe
493 ### trapped in a scalar. Mailed barries about this 5th of june 2004.
494
495
496
497 ### see what command we should run, based on whether
498 ### it's a .tgz or .tar
499
500 ### XXX solaris tar and bsdtar are having different outputs
501 ### depending whether you run with -x or -t
502 ### compensate for this insanity by running -t first, then -x
503 { my $cmd =
504 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
505 $self->bin_tar, '-tf', '-'] :
506 $self->is_tbz ? [$self->bin_bunzip2, '-c', $self->archive, '|',
507 $self->bin_tar, '-tf', '-'] :
508 [$self->bin_tar, '-tf', $self->archive];
509
510 ### run the command ###
511 my $buffer = '';
512 unless( scalar run( command => $cmd,
513 buffer => \$buffer,
514 verbose => $DEBUG )
515 ) {
516 return $self->_error(loc(
517 "Error listing contents of archive '%1': %2",
518 $self->archive, $buffer ));
519 }
520
521 ### no buffers available?
522 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
523 $self->_error( $self->_no_buffer_files( $self->archive ) );
524
525 } else {
526 ### if we're on solaris we /might/ be using /bin/tar, which has
527 ### a weird output format... we might also be using
528 ### /usr/local/bin/tar, which is gnu tar, which is perfectly
529 ### fine... so we have to do some guessing here =/
530 my @files = map { chomp;
531 !ON_SOLARIS ? $_
532 : (m|^ x \s+ # 'xtract' -- sigh
533 (.+?), # the actual file name
534 \s+ [\d,.]+ \s bytes,
535 \s+ [\d,.]+ \s tape \s blocks
536 |x ? $1 : $_);
537
538 } split $/, $buffer;
539
540 ### store the files that are in the archive ###
541 $self->files(\@files);
542 }
543 }
544
545 ### now actually extract it ###
546 { my $cmd =
547 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
548 $self->bin_tar, '-xf', '-'] :
549 $self->is_tbz ? [$self->bin_bunzip2, '-c', $self->archive, '|',
550 $self->bin_tar, '-xf', '-'] :
551 [$self->bin_tar, '-xf', $self->archive];
552
553 my $buffer = '';
554 unless( scalar run( command => $cmd,
555 buffer => \$buffer,
556 verbose => $DEBUG )
557 ) {
558 return $self->_error(loc("Error extracting archive '%1': %2",
559 $self->archive, $buffer ));
560 }
561
562 ### we might not have them, due to lack of buffers
563 if( $self->files ) {
564 ### now that we've extracted, figure out where we extracted to
565 my $dir = $self->__get_extract_dir( $self->files );
566
567 ### store the extraction dir ###
568 $self->extract_path( $dir );
569 }
570 }
571
572 ### we got here, no error happened
573 return 1;
574}
575
576### use archive::tar to extract ###
577sub _untar_at {
578 my $self = shift;
579
580 ### we definitely need A::T, so load that first
581 { my $use_list = { 'Archive::Tar' => '0.0' };
582
583 unless( can_load( modules => $use_list ) ) {
584
585 return $self->_error(loc("You do not have '%1' installed - " .
586 "Please install it as soon as possible.",
587 'Archive::Tar'));
588 }
589 }
590
591 ### we might pass it a filehandle if it's a .tbz file..
592 my $fh_to_read = $self->archive;
593
594 ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
595 ### if A::T's version is 0.99 or higher
596 if( $self->is_tgz ) {
597 my $use_list = { 'Compress::Zlib' => '0.0' };
598 $use_list->{ 'IO::Zlib' } = '0.0'
599 if $Archive::Tar::VERSION >= '0.99';
600
601 unless( can_load( modules => $use_list ) ) {
602 my $which = join '/', sort keys %$use_list;
603
604 return $self->_error(loc(
605 "You do not have '%1' installed - Please ".
606 "install it as soon as possible.", $which));
607
608 }
609 } elsif ( $self->is_tbz ) {
610 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
611 unless( can_load( modules => $use_list ) ) {
612 return $self->_error(loc(
613 "You do not have '%1' installed - Please " .
614 "install it as soon as possible.",
615 'IO::Uncompress::Bunzip2'));
616 }
617
618 my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
619 return $self->_error(loc("Unable to open '%1': %2",
620 $self->archive,
621 $IO::Uncompress::Bunzip2::Bunzip2Error));
622
623 $fh_to_read = $bz;
624 }
625
626 my $tar = Archive::Tar->new();
627
628 ### only tell it it's compressed if it's a .tgz, as we give it a file
629 ### handle if it's a .tbz
630 unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) {
631 return $self->_error(loc("Unable to read '%1': %2", $self->archive,
632 $Archive::Tar::error));
633 }
634
635 ### workaround to prevent Archive::Tar from setting uid, which
636 ### is a potential security hole. -autrijus
637 ### have to do it here, since A::T needs to be /loaded/ first ###
638 { no strict 'refs'; local $^W;
639
640 ### older versions of archive::tar <= 0.23
641 *Archive::Tar::chown = sub {};
642 }
643
644 ### for version of archive::tar > 1.04
645 local $Archive::Tar::Constant::CHOWN = 0;
646
647 { local $^W; # quell 'splice() offset past end of array' warnings
648 # on older versions of A::T
649
650 ### older archive::tar always returns $self, return value slightly
651 ### fux0r3d because of it.
652 $tar->extract()
653 or return $self->_error(loc("Unable to extract '%1': %2",
654 $self->archive, $Archive::Tar::error ));
655 }
656
657 my @files = $tar->list_files;
658 my $dir = $self->__get_extract_dir( \@files );
659
660 ### store the files that are in the archive ###
661 $self->files(\@files);
662
663 ### store the extraction dir ###
664 $self->extract_path( $dir );
665
666 ### check if the dir actually appeared ###
667 return 1 if -d $self->extract_path;
668
669 ### no dir, we failed ###
670 return $self->_error(loc("Unable to extract '%1': %2",
671 $self->archive, $Archive::Tar::error ));
672}
673
674#################################
675#
676# Gunzip code
677#
678#################################
679
680### gunzip wrapper... goes to either Compress::Zlib or /bin/gzip
681### depending on $PREFER_BIN
682sub _gunzip {
683 my $self = shift;
684
685 my @methods = qw[_gunzip_cz _gunzip_bin];
686 @methods = reverse @methods if $PREFER_BIN;
687
688 for my $method (@methods) {
689 $self->_extractor($method) && return 1 if $self->$method();
690 }
691
692 return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
693}
694
695sub _gunzip_bin {
696 my $self = shift;
697
698 ### check for /bin/gzip -- we need it ###
699 return $self->_error(loc("No '%1' program found", '/bin/gzip'))
700 unless $self->bin_gzip;
701
702
703 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
704 return $self->_error(loc("Could not open '%1' for writing: %2",
705 $self->_gunzip_to, $! ));
706
707 my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
708
709 my $buffer;
710 unless( scalar run( command => $cmd,
711 verbose => $DEBUG,
712 buffer => \$buffer )
713 ) {
714 return $self->_error(loc("Unable to gunzip '%1': %2",
715 $self->archive, $buffer));
716 }
717
718 ### no buffers available?
719 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
720 $self->_error( $self->_no_buffer_content( $self->archive ) );
721 }
722
723 print $fh $buffer if defined $buffer;
724
725 close $fh;
726
727 ### set what files where extract, and where they went ###
728 $self->files( [$self->_gunzip_to] );
729 $self->extract_path( File::Spec->rel2abs(cwd()) );
730
731 return 1;
732}
733
734sub _gunzip_cz {
735 my $self = shift;
736
737 my $use_list = { 'Compress::Zlib' => '0.0' };
738 unless( can_load( modules => $use_list ) ) {
739 return $self->_error(loc("You do not have '%1' installed - Please " .
740 "install it as soon as possible.", 'Compress::Zlib'));
741 }
742
743 my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
744 return $self->_error(loc("Unable to open '%1': %2",
745 $self->archive, $Compress::Zlib::gzerrno));
746
747 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
748 return $self->_error(loc("Could not open '%1' for writing: %2",
749 $self->_gunzip_to, $! ));
750
751 my $buffer;
752 $fh->print($buffer) while $gz->gzread($buffer) > 0;
753 $fh->close;
754
755 ### set what files where extract, and where they went ###
756 $self->files( [$self->_gunzip_to] );
757 $self->extract_path( File::Spec->rel2abs(cwd()) );
758
759 return 1;
760}
761
762#################################
763#
1dae2fb5 764# Uncompress code
765#
766#################################
767
768
769### untar wrapper... goes to either Archive::Tar or /bin/tar
770### depending on $PREFER_BIN
771sub _uncompress {
772 my $self = shift;
773
774 my @methods = qw[_gunzip_cz _uncompress_bin];
775 @methods = reverse @methods if $PREFER_BIN;
776
777 for my $method (@methods) {
778 $self->_extractor($method) && return 1 if $self->$method();
779 }
780
781 return $self->_error(loc("Unable to untar file '%1'", $self->archive));
782}
783
784sub _uncompress_bin {
785 my $self = shift;
786
787 ### check for /bin/gzip -- we need it ###
788 return $self->_error(loc("No '%1' program found", '/bin/uncompress'))
789 unless $self->bin_uncompress;
790
791
792 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
793 return $self->_error(loc("Could not open '%1' for writing: %2",
794 $self->_gunzip_to, $! ));
795
796 my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
797
798 my $buffer;
799 unless( scalar run( command => $cmd,
800 verbose => $DEBUG,
801 buffer => \$buffer )
802 ) {
803 return $self->_error(loc("Unable to uncompress '%1': %2",
804 $self->archive, $buffer));
805 }
806
807 ### no buffers available?
808 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
809 $self->_error( $self->_no_buffer_content( $self->archive ) );
810 }
811
812 print $fh $buffer if defined $buffer;
813
814 close $fh;
815
816 ### set what files where extract, and where they went ###
817 $self->files( [$self->_gunzip_to] );
818 $self->extract_path( File::Spec->rel2abs(cwd()) );
819
820 return 1;
821}
822
823
824#################################
825#
520c99e2 826# Unzip code
827#
828#################################
829
830### unzip wrapper... goes to either Archive::Zip or /bin/unzip
831### depending on $PREFER_BIN
832sub _unzip {
833 my $self = shift;
834
835 my @methods = qw[_unzip_az _unzip_bin];
836 @methods = reverse @methods if $PREFER_BIN;
837
838 for my $method (@methods) {
839 $self->_extractor($method) && return 1 if $self->$method();
840 }
841
842 return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
843}
844
845sub _unzip_bin {
846 my $self = shift;
847
848 ### check for /bin/gzip if we need it ###
849 return $self->_error(loc("No '%1' program found", '/bin/unzip'))
850 unless $self->bin_unzip;
851
852
853 ### first, get the files.. it must be 2 different commands with 'unzip' :(
854 { my $cmd = [ $self->bin_unzip, '-Z', '-1', $self->archive ];
855
856 my $buffer = '';
857 unless( scalar run( command => $cmd,
858 verbose => $DEBUG,
859 buffer => \$buffer )
860 ) {
861 return $self->_error(loc("Unable to unzip '%1': %2",
862 $self->archive, $buffer));
863 }
864
865 ### no buffers available?
866 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
867 $self->_error( $self->_no_buffer_files( $self->archive ) );
868
869 } else {
870 $self->files( [split $/, $buffer] );
871 }
872 }
873
874 ### now, extract the archive ###
875 { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
876
877 my $buffer;
878 unless( scalar run( command => $cmd,
879 verbose => $DEBUG,
880 buffer => \$buffer )
881 ) {
882 return $self->_error(loc("Unable to unzip '%1': %2",
883 $self->archive, $buffer));
884 }
885
886 if( scalar @{$self->files} ) {
887 my $files = $self->files;
888 my $dir = $self->__get_extract_dir( $files );
889
890 $self->extract_path( $dir );
891 }
892 }
893
894 return 1;
895}
896
897sub _unzip_az {
898 my $self = shift;
899
900 my $use_list = { 'Archive::Zip' => '0.0' };
901 unless( can_load( modules => $use_list ) ) {
902 return $self->_error(loc("You do not have '%1' installed - Please " .
903 "install it as soon as possible.", 'Archive::Zip'));
904 }
905
906 my $zip = Archive::Zip->new();
907
908 unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
909 return $self->_error(loc("Unable to read '%1'", $self->archive));
910 }
911
912 my @files;
913 ### have to extract every memeber individually ###
914 for my $member ($zip->members) {
915 push @files, $member->{fileName};
916
917 unless( $zip->extractMember($member) == &Archive::Zip::AZ_OK ) {
918 return $self->_error(loc("Extraction of '%1' from '%2' failed",
919 $member->{fileName}, $self->archive ));
920 }
921 }
922
923 my $dir = $self->__get_extract_dir( \@files );
924
925 ### set what files where extract, and where they went ###
926 $self->files( \@files );
927 $self->extract_path( File::Spec->rel2abs($dir) );
928
929 return 1;
930}
931
932sub __get_extract_dir {
933 my $self = shift;
934 my $files = shift || [];
935
936 return unless scalar @$files;
937
938 my($dir1, $dir2);
939 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
940 my($dir,$pos) = @$aref;
941
942 ### add a catdir(), so that any trailing slashes get
943 ### take care of (removed)
944 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
945 ### which was the problem in bug #23999
946 my $res = -d $files->[$pos]
947 ? File::Spec->catdir( $files->[$pos], '' )
948 : File::Spec->catdir( dirname( $files->[$pos] ) );
949
950 $$dir = $res;
951 }
952
953 ### if the first and last dir don't match, make sure the
954 ### dirname is not set wrongly
955 my $dir;
956
957 ### dirs are the same, so we know for sure what the extract dir is
958 if( $dir1 eq $dir2 ) {
959 $dir = $dir1;
960
961 ### dirs are different.. do they share the base dir?
962 ### if so, use that, if not, fall back to '.'
963 } else {
964 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
965 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
966
967 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
968 }
969
970 return File::Spec->rel2abs( $dir );
971}
972
973#################################
974#
975# Bunzip2 code
976#
977#################################
978
979### bunzip2 wrapper...
980sub _bunzip2 {
981 my $self = shift;
982
983 my @methods = qw[_bunzip2_cz2 _bunzip2_bin];
984 @methods = reverse @methods if $PREFER_BIN;
985
986 for my $method (@methods) {
987 $self->_extractor($method) && return 1 if $self->$method();
988 }
989
990 return $self->_error(loc("Unable to bunzip2 file '%1'", $self->archive));
991}
992
993sub _bunzip2_bin {
994 my $self = shift;
995
996 ### check for /bin/gzip -- we need it ###
997 return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
998 unless $self->bin_bunzip2;
999
1000
1001 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1002 return $self->_error(loc("Could not open '%1' for writing: %2",
1003 $self->_gunzip_to, $! ));
1004
1005 my $cmd = [ $self->bin_bunzip2, '-c', $self->archive ];
1006
1007 my $buffer;
1008 unless( scalar run( command => $cmd,
1009 verbose => $DEBUG,
1010 buffer => \$buffer )
1011 ) {
1012 return $self->_error(loc("Unable to bunzip2 '%1': %2",
1013 $self->archive, $buffer));
1014 }
1015
1016 ### no buffers available?
1017 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1018 $self->_error( $self->_no_buffer_content( $self->archive ) );
1019 }
1020
1021 print $fh $buffer if defined $buffer;
1022
1023 close $fh;
1024
1025 ### set what files where extract, and where they went ###
1026 $self->files( [$self->_gunzip_to] );
1027 $self->extract_path( File::Spec->rel2abs(cwd()) );
1028
1029 return 1;
1030}
1031
1032### using cz2, the compact versions... this we use mainly in archive::tar
1033### extractor..
1034# sub _bunzip2_cz1 {
1035# my $self = shift;
1036#
1037# my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1038# unless( can_load( modules => $use_list ) ) {
1039# return $self->_error(loc("You do not have '%1' installed - Please " .
1040# "install it as soon as possible.",
1041# 'IO::Uncompress::Bunzip2'));
1042# }
1043#
1044# my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1045# return $self->_error(loc("Unable to open '%1': %2",
1046# $self->archive,
1047# $IO::Uncompress::Bunzip2::Bunzip2Error));
1048#
1049# my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1050# return $self->_error(loc("Could not open '%1' for writing: %2",
1051# $self->_gunzip_to, $! ));
1052#
1053# my $buffer;
1054# $fh->print($buffer) while $bz->read($buffer) > 0;
1055# $fh->close;
1056#
1057# ### set what files where extract, and where they went ###
1058# $self->files( [$self->_gunzip_to] );
1059# $self->extract_path( File::Spec->rel2abs(cwd()) );
1060#
1061# return 1;
1062# }
1063
1064sub _bunzip2_cz2 {
1065 my $self = shift;
1066
1067 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1068 unless( can_load( modules => $use_list ) ) {
1069 return $self->_error(loc("You do not have '%1' installed - Please " .
1070 "install it as soon as possible.",
1071 'IO::Uncompress::Bunzip2'));
1072 }
1073
1074 IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1075 or return $self->_error(loc("Unable to uncompress '%1': %2",
1076 $self->archive,
1077 $IO::Uncompress::Bunzip2::Bunzip2Error));
1078
1079 ### set what files where extract, and where they went ###
1080 $self->files( [$self->_gunzip_to] );
1081 $self->extract_path( File::Spec->rel2abs(cwd()) );
1082
1083 return 1;
1084}
1085
1086
1087#################################
1088#
1089# Error code
1090#
1091#################################
1092
1093sub _error {
1094 my $self = shift;
1095 my $error = shift;
1096
1097 $self->_error_msg( $error );
1098 $self->_error_msg_long( Carp::longmess($error) );
1099
1100 ### set $Archive::Extract::WARN to 0 to disable printing
1101 ### of errors
1102 if( $WARN ) {
1103 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1104 }
1105
1106 return;
1107}
1108
1109sub error {
1110 my $self = shift;
1111 return shift() ? $self->_error_msg_long : $self->_error_msg;
1112}
1113
1114sub _no_buffer_files {
1115 my $self = shift;
1116 my $file = shift or return;
1117 return loc("No buffer captured, unable to tell ".
1118 "extracted files or extraction dir for '%1'", $file);
1119}
1120
1121sub _no_buffer_content {
1122 my $self = shift;
1123 my $file = shift or return;
1124 return loc("No buffer captured, unable to get content for '%1'", $file);
1125}
11261;
1127
1128=pod
1129
1130=head1 HOW IT WORKS
1131
1132C<Archive::Extract> tries first to determine what type of archive you
1133are passing it, by inspecting its suffix. It does not do this by using
1134Mime magic, or something related. See C<CAVEATS> below.
1135
1136Once it has determined the file type, it knows which extraction methods
1137it can use on the archive. It will try a perl solution first, then fall
1138back to a commandline tool if that fails. If that also fails, it will
1139return false, indicating it was unable to extract the archive.
1140See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1141
1142=head1 CAVEATS
1143
1144=head2 File Extensions
1145
1146C<Archive::Extract> trusts on the extension of the archive to determine
1147what type it is, and what extractor methods therefore can be used. If
1148your archives do not have any of the extensions as described in the
1149C<new()> method, you will have to specify the type explicitly, or
1150C<Archive::Extract> will not be able to extract the archive for you.
1151
1152=head2 Bzip2 Support
1153
1154There's currently no very reliable pure perl Bzip2 implementation
1155available, so C<Archive::Extract> can only extract C<bzip2>
1156compressed archives if you have a C</bin/bunzip2> program.
1157
1158=head1 GLOBAL VARIABLES
1159
1160=head2 $Archive::Extract::DEBUG
1161
1162Set this variable to C<true> to have all calls to command line tools
1163be printed out, including all their output.
1164This also enables C<Carp::longmess> errors, instead of the regular
1165C<carp> errors.
1166
1167Good for tracking down why things don't work with your particular
1168setup.
1169
1170Defaults to C<false>.
1171
1172=head2 $Archive::Extract::WARN
1173
1174This variable controls whether errors encountered internally by
1175C<Archive::Extract> should be C<carp>'d or not.
1176
1177Set to false to silence warnings. Inspect the output of the C<error()>
1178method manually to see what went wrong.
1179
1180Defaults to C<true>.
1181
1182=head2 $Archive::Extract::PREFER_BIN
1183
1184This variables controls whether C<Archive::Extract> should prefer the
1185use of perl modules, or commandline tools to extract archives.
1186
1187Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1188
1189Defaults to C<false>.
1190
1191=head1 TODO
1192
1193=over 4
1194
1195=item Mime magic support
1196
1197Maybe this module should use something like C<File::Type> to determine
1198the type, rather than blindly trust the suffix.
1199
1dae2fb5 1200=back
1201
574b415d 1202=head1 BUG REPORTS
520c99e2 1203
574b415d 1204Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>.
520c99e2 1205
574b415d 1206=head1 AUTHOR
1207
1208This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
520c99e2 1209
574b415d 1210=head1 COPYRIGHT
520c99e2 1211
574b415d 1212This library is free software; you may redistribute and/or modify it
1213under the same terms as Perl itself.
520c99e2 1214
1215=cut
1216
1217# Local variables:
1218# c-indentation-style: bsd
1219# c-basic-offset: 4
1220# indent-tabs-mode: nil
1221# End:
1222# vim: expandtab shiftwidth=4:
1223