Skip the POSIX::strftime() time test with a 60sec parameter on Vista:
[p5sagit/p5-mst-13.2.git] / lib / Archive / Tar.pm
CommitLineData
39713df4 1### the gnu tar specification:
f38c1908 2### http://www.gnu.org/software/tar/manual/tar.html
39713df4 3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
10use strict;
11use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
12 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING];
13
14$DEBUG = 0;
15$WARN = 1;
16$FOLLOW_SYMLINK = 0;
97a504ba 17$VERSION = "1.32";
39713df4 18$CHOWN = 1;
19$CHMOD = 1;
20$DO_NOT_USE_PREFIX = 0;
21
22BEGIN {
23 use Config;
24 $HAS_PERLIO = $Config::Config{useperlio};
25
26 ### try and load IO::String anyway, so you can dynamically
27 ### switch between perlio and IO::String
28 eval {
29 require IO::String;
30 import IO::String;
31 };
32 $HAS_IO_STRING = $@ ? 0 : 1;
33
34}
35
36use Cwd;
37use IO::File;
38use Carp qw(carp croak);
39use File::Spec ();
40use File::Spec::Unix ();
41use File::Path ();
42
43use Archive::Tar::File;
44use Archive::Tar::Constant;
45
46=head1 NAME
47
48Archive::Tar - module for manipulations of tar archives
49
50=head1 SYNOPSIS
51
52 use Archive::Tar;
53 my $tar = Archive::Tar->new;
54
55 $tar->read('origin.tgz',1);
56 $tar->extract();
57
58 $tar->add_files('file/foo.pl', 'docs/README');
59 $tar->add_data('file/baz.txt', 'This is the contents now');
60
61 $tar->rename('oldname', 'new/file/name');
62
63 $tar->write('files.tar');
64
65=head1 DESCRIPTION
66
67Archive::Tar provides an object oriented mechanism for handling tar
68files. It provides class methods for quick and easy files handling
69while also allowing for the creation of tar file objects for custom
70manipulation. If you have the IO::Zlib module installed,
71Archive::Tar will also support compressed or gzipped tar files.
72
73An object of class Archive::Tar represents a .tar(.gz) archive full
74of files and things.
75
76=head1 Object Methods
77
78=head2 Archive::Tar->new( [$file, $compressed] )
79
80Returns a new Tar object. If given any arguments, C<new()> calls the
81C<read()> method automatically, passing on the arguments provided to
82the C<read()> method.
83
84If C<new()> is invoked with arguments and the C<read()> method fails
85for any reason, C<new()> returns undef.
86
87=cut
88
89my $tmpl = {
90 _data => [ ],
91 _file => 'Unknown',
92};
93
94### install get/set accessors for this object.
95for my $key ( keys %$tmpl ) {
96 no strict 'refs';
97 *{__PACKAGE__."::$key"} = sub {
98 my $self = shift;
99 $self->{$key} = $_[0] if @_;
100 return $self->{$key};
101 }
102}
103
104sub new {
105 my $class = shift;
106 $class = ref $class if ref $class;
107
108 ### copying $tmpl here since a shallow copy makes it use the
109 ### same aref, causing for files to remain in memory always.
110 my $obj = bless { _data => [ ], _file => 'Unknown' }, $class;
111
112 if (@_) {
81a5970e 113 unless ( $obj->read( @_ ) ) {
114 $obj->_error(qq[No data could be read from file]);
115 return;
116 }
39713df4 117 }
118
119 return $obj;
120}
121
122=head2 $tar->read ( $filename|$handle, $compressed, {opt => 'val'} )
123
124Read the given tar file into memory.
125The first argument can either be the name of a file or a reference to
126an already open filehandle (or an IO::Zlib object if it's compressed)
127The second argument indicates whether the file referenced by the first
128argument is compressed.
129
130The C<read> will I<replace> any previous content in C<$tar>!
131
132The second argument may be considered optional if IO::Zlib is
133installed, since it will transparently Do The Right Thing.
134Archive::Tar will warn if you try to pass a compressed file if
135IO::Zlib is not available and simply return.
136
b3200c5d 137Note that you can currently B<not> pass a C<gzip> compressed
138filehandle, which is not opened with C<IO::Zlib>, nor a string
139containing the full archive information (either compressed or
140uncompressed). These are worth while features, but not currently
141implemented. See the C<TODO> section.
142
39713df4 143The third argument can be a hash reference with options. Note that
144all options are case-sensitive.
145
146=over 4
147
148=item limit
149
150Do not read more than C<limit> files. This is useful if you have
151very big archives, and are only interested in the first few files.
152
153=item extract
154
155If set to true, immediately extract entries when reading them. This
156gives you the same memory break as the C<extract_archive> function.
157Note however that entries will not be read into memory, but written
158straight to disk.
159
160=back
161
162All files are stored internally as C<Archive::Tar::File> objects.
163Please consult the L<Archive::Tar::File> documentation for details.
164
165Returns the number of files read in scalar context, and a list of
166C<Archive::Tar::File> objects in list context.
167
168=cut
169
170sub read {
171 my $self = shift;
172 my $file = shift;
173 my $gzip = shift || 0;
174 my $opts = shift || {};
175
176 unless( defined $file ) {
177 $self->_error( qq[No file to read from!] );
178 return;
179 } else {
180 $self->_file( $file );
181 }
182
183 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
184 or return;
185
186 my $data = $self->_read_tar( $handle, $opts ) or return;
187
188 $self->_data( $data );
189
190 return wantarray ? @$data : scalar @$data;
191}
192
193sub _get_handle {
194 my $self = shift;
195 my $file = shift; return unless defined $file;
196 return $file if ref $file;
197
198 my $gzip = shift || 0;
199 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only
200
201 my $fh; my $bin;
202
203 ### only default to ZLIB if we're not trying to /write/ to a handle ###
204 if( ZLIB and $gzip || MODE_READ->( $mode ) ) {
205
206 ### IO::Zlib will Do The Right Thing, even when passed
207 ### a plain file ###
208 $fh = new IO::Zlib;
209
210 } else {
211 if( $gzip ) {
212 $self->_error(qq[Compression not available - Install IO::Zlib!]);
213 return;
214
215 } else {
216 $fh = new IO::File;
217 $bin++;
218 }
219 }
220
221 unless( $fh->open( $file, $mode ) ) {
222 $self->_error( qq[Could not create filehandle for '$file': $!!] );
223 return;
224 }
225
226 binmode $fh if $bin;
227
228 return $fh;
229}
230
231sub _read_tar {
232 my $self = shift;
233 my $handle = shift or return;
234 my $opts = shift || {};
235
236 my $count = $opts->{limit} || 0;
237 my $extract = $opts->{extract} || 0;
238
239 ### set a cap on the amount of files to extract ###
240 my $limit = 0;
241 $limit = 1 if $count > 0;
242
243 my $tarfile = [ ];
244 my $chunk;
245 my $read = 0;
246 my $real_name; # to set the name of a file when
247 # we're encountering @longlink
248 my $data;
249
250 LOOP:
251 while( $handle->read( $chunk, HEAD ) ) {
252 ### IO::Zlib doesn't support this yet
253 my $offset = eval { tell $handle } || 'unknown';
254
255 unless( $read++ ) {
256 my $gzip = GZIP_MAGIC_NUM;
257 if( $chunk =~ /$gzip/ ) {
258 $self->_error( qq[Cannot read compressed format in tar-mode] );
259 return;
260 }
261 }
262
263 ### if we can't read in all bytes... ###
264 last if length $chunk != HEAD;
265
266 ### Apparently this should really be two blocks of 512 zeroes,
267 ### but GNU tar sometimes gets it wrong. See comment in the
268 ### source code (tar.c) to GNU cpio.
269 next if $chunk eq TAR_END;
270
b30bcf62 271 ### according to the posix spec, the last 12 bytes of the header are
272 ### null bytes, to pad it to a 512 byte block. That means if these
273 ### bytes are NOT null bytes, it's a corrrupt header. See:
274 ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
275 ### line 111
276 { my $nulls = join '', "\0" x 12;
277 unless( $nulls eq substr( $chunk, 500, 12 ) ) {
278 $self->_error( qq[Invalid header block at offset $offset] );
279 next LOOP;
280 }
281 }
282
81a5970e 283 ### pass the realname, so we can set it 'proper' right away
284 ### some of the heuristics are done on the name, so important
285 ### to set it ASAP
39713df4 286 my $entry;
81a5970e 287 { my %extra_args = ();
288 $extra_args{'name'} = $$real_name if defined $real_name;
289
290 unless( $entry = Archive::Tar::File->new( chunk => $chunk,
291 %extra_args )
292 ) {
293 $self->_error( qq[Couldn't read chunk at offset $offset] );
b30bcf62 294 next LOOP;
81a5970e 295 }
39713df4 296 }
297
298 ### ignore labels:
299 ### http://www.gnu.org/manual/tar/html_node/tar_139.html
300 next if $entry->is_label;
301
302 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
303
304 if ( $entry->is_file && !$entry->validate ) {
305 ### sometimes the chunk is rather fux0r3d and a whole 512
c3745331 306 ### bytes ends up in the ->name area.
39713df4 307 ### clean it up, if need be
308 my $name = $entry->name;
309 $name = substr($name, 0, 100) if length $name > 100;
310 $name =~ s/\n/ /g;
311
312 $self->_error( $name . qq[: checksum error] );
313 next LOOP;
314 }
315
316 my $block = BLOCK_SIZE->( $entry->size );
317
318 $data = $entry->get_content_by_ref;
319
320 ### just read everything into memory
321 ### can't do lazy loading since IO::Zlib doesn't support 'seek'
322 ### this is because Compress::Zlib doesn't support it =/
323 ### this reads in the whole data in one read() call.
324 if( $handle->read( $$data, $block ) < $block ) {
325 $self->_error( qq[Read error on tarfile (missing data) '].
326 $entry->full_path ."' at offset $offset" );
b30bcf62 327 next LOOP;
39713df4 328 }
329
330 ### throw away trailing garbage ###
376cc5ea 331 substr ($$data, $entry->size) = "" if defined $$data;
39713df4 332
333 ### part II of the @LongLink munging -- need to do /after/
334 ### the checksum check.
335 if( $entry->is_longlink ) {
336 ### weird thing in tarfiles -- if the file is actually a
337 ### @LongLink, the data part seems to have a trailing ^@
338 ### (unprintable) char. to display, pipe output through less.
339 ### but that doesn't *always* happen.. so check if the last
340 ### character is a control character, and if so remove it
341 ### at any rate, we better remove that character here, or tests
342 ### like 'eq' and hashlook ups based on names will SO not work
343 ### remove it by calculating the proper size, and then
344 ### tossing out everything that's longer than that size.
345
346 ### count number of nulls
347 my $nulls = $$data =~ tr/\0/\0/;
348
349 ### cut data + size by that many bytes
350 $entry->size( $entry->size - $nulls );
351 substr ($$data, $entry->size) = "";
352 }
353 }
354
355 ### clean up of the entries.. posix tar /apparently/ has some
356 ### weird 'feature' that allows for filenames > 255 characters
357 ### they'll put a header in with as name '././@LongLink' and the
358 ### contents will be the name of the /next/ file in the archive
359 ### pretty crappy and kludgy if you ask me
360
361 ### set the name for the next entry if this is a @LongLink;
362 ### this is one ugly hack =/ but needed for direct extraction
363 if( $entry->is_longlink ) {
364 $real_name = $data;
b30bcf62 365 next LOOP;
39713df4 366 } elsif ( defined $real_name ) {
367 $entry->name( $$real_name );
368 $entry->prefix('');
369 undef $real_name;
370 }
371
372 $self->_extract_file( $entry ) if $extract
373 && !$entry->is_longlink
374 && !$entry->is_unknown
375 && !$entry->is_label;
376
377 ### Guard against tarfiles with garbage at the end
378 last LOOP if $entry->name eq '';
379
380 ### push only the name on the rv if we're extracting
381 ### -- for extract_archive
382 push @$tarfile, ($extract ? $entry->name : $entry);
383
384 if( $limit ) {
385 $count-- unless $entry->is_longlink || $entry->is_dir;
386 last LOOP unless $count;
387 }
388 } continue {
389 undef $data;
390 }
391
392 return $tarfile;
393}
394
395=head2 $tar->contains_file( $filename )
396
397Check if the archive contains a certain file.
398It will return true if the file is in the archive, false otherwise.
399
400Note however, that this function does an exact match using C<eq>
401on the full path. So it cannot compensate for case-insensitive file-
402systems or compare 2 paths to see if they would point to the same
403underlying file.
404
405=cut
406
407sub contains_file {
408 my $self = shift;
409 my $full = shift or return;
410
c3745331 411 ### don't warn if the entry isn't there.. that's what this function
412 ### is for after all.
413 local $WARN = 0;
39713df4 414 return 1 if $self->_find_entry($full);
415 return;
416}
417
418=head2 $tar->extract( [@filenames] )
419
420Write files whose names are equivalent to any of the names in
421C<@filenames> to disk, creating subdirectories as necessary. This
422might not work too well under VMS.
423Under MacPerl, the file's modification time will be converted to the
424MacOS zero of time, and appropriate conversions will be done to the
425path. However, the length of each element of the path is not
426inspected to see whether it's longer than MacOS currently allows (32
427characters).
428
429If C<extract> is called without a list of file names, the entire
430contents of the archive are extracted.
431
432Returns a list of filenames extracted.
433
434=cut
435
436sub extract {
437 my $self = shift;
b30bcf62 438 my @args = @_;
39713df4 439 my @files;
440
f38c1908 441 # use the speed optimization for all extracted files
442 local($self->{cwd}) = cwd() unless $self->{cwd};
443
39713df4 444 ### you requested the extraction of only certian files
b30bcf62 445 if( @args ) {
446 for my $file ( @args ) {
447
448 ### it's already an object?
449 if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
450 push @files, $file;
451 next;
39713df4 452
b30bcf62 453 ### go find it then
454 } else {
455
456 my $found;
457 for my $entry ( @{$self->_data} ) {
458 next unless $file eq $entry->full_path;
459
460 ### we found the file you're looking for
461 push @files, $entry;
462 $found++;
463 }
464
465 unless( $found ) {
466 return $self->_error(
467 qq[Could not find '$file' in archive] );
468 }
39713df4 469 }
470 }
471
472 ### just grab all the file items
473 } else {
474 @files = $self->get_files;
475 }
476
477 ### nothing found? that's an error
478 unless( scalar @files ) {
479 $self->_error( qq[No files found for ] . $self->_file );
480 return;
481 }
482
483 ### now extract them
484 for my $entry ( @files ) {
485 unless( $self->_extract_file( $entry ) ) {
486 $self->_error(q[Could not extract ']. $entry->full_path .q['] );
487 return;
488 }
489 }
490
491 return @files;
492}
493
494=head2 $tar->extract_file( $file, [$extract_path] )
495
496Write an entry, whose name is equivalent to the file name provided to
497disk. Optionally takes a second parameter, which is the full (unix)
498path (including filename) the entry will be written to.
499
500For example:
501
502 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
503
b30bcf62 504 $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' );
505
39713df4 506Returns true on success, false on failure.
507
508=cut
509
510sub extract_file {
511 my $self = shift;
512 my $file = shift or return;
513 my $alt = shift;
514
515 my $entry = $self->_find_entry( $file )
516 or $self->_error( qq[Could not find an entry for '$file'] ), return;
517
518 return $self->_extract_file( $entry, $alt );
519}
520
521sub _extract_file {
522 my $self = shift;
523 my $entry = shift or return;
524 my $alt = shift;
39713df4 525
526 ### you wanted an alternate extraction location ###
527 my $name = defined $alt ? $alt : $entry->full_path;
528
529 ### splitpath takes a bool at the end to indicate
530 ### that it's splitting a dir
7f10f74b 531 my ($vol,$dirs,$file);
532 if ( defined $alt ) { # It's a local-OS path
533 ($vol,$dirs,$file) = File::Spec->splitpath( $alt,
534 $entry->is_dir );
535 } else {
536 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
537 $entry->is_dir );
538 }
539
39713df4 540 my $dir;
541 ### is $name an absolute path? ###
542 if( File::Spec->file_name_is_absolute( $dirs ) ) {
543 $dir = $dirs;
544
545 ### it's a relative path ###
546 } else {
f38c1908 547 my $cwd = (defined $self->{cwd} ? $self->{cwd} : cwd());
39713df4 548 my @dirs = File::Spec::Unix->splitdir( $dirs );
549 my @cwd = File::Spec->splitdir( $cwd );
81a5970e 550 $dir = File::Spec->catdir( @cwd, @dirs );
551
552 # catdir() returns undef if the path is longer than 255 chars on VMS
553 unless ( defined $dir ) {
554 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
555 return;
556 }
557
39713df4 558 }
559
560 if( -e $dir && !-d _ ) {
561 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
562 return;
563 }
564
565 unless ( -d _ ) {
566 eval { File::Path::mkpath( $dir, 0, 0777 ) };
567 if( $@ ) {
568 $self->_error( qq[Could not create directory '$dir': $@] );
569 return;
570 }
c3745331 571
572 ### XXX chown here? that might not be the same as in the archive
573 ### as we're only chown'ing to the owner of the file we're extracting
574 ### not to the owner of the directory itself, which may or may not
575 ### be another entry in the archive
576 ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
577 ### way to go.
578 #if( $CHOWN && CAN_CHOWN ) {
579 # chown $entry->uid, $entry->gid, $dir or
580 # $self->_error( qq[Could not set uid/gid on '$dir'] );
581 #}
39713df4 582 }
583
584 ### we're done if we just needed to create a dir ###
585 return 1 if $entry->is_dir;
586
587 my $full = File::Spec->catfile( $dir, $file );
588
589 if( $entry->is_unknown ) {
590 $self->_error( qq[Unknown file type for file '$full'] );
591 return;
592 }
593
594 if( length $entry->type && $entry->is_file ) {
595 my $fh = IO::File->new;
596 $fh->open( '>' . $full ) or (
597 $self->_error( qq[Could not open file '$full': $!] ),
598 return
599 );
600
601 if( $entry->size ) {
602 binmode $fh;
603 syswrite $fh, $entry->data or (
604 $self->_error( qq[Could not write data to '$full'] ),
605 return
606 );
607 }
608
609 close $fh or (
610 $self->_error( qq[Could not close file '$full'] ),
611 return
612 );
613
614 } else {
615 $self->_make_special_file( $entry, $full ) or return;
616 }
617
618 utime time, $entry->mtime - TIME_OFFSET, $full or
619 $self->_error( qq[Could not update timestamp] );
620
621 if( $CHOWN && CAN_CHOWN ) {
622 chown $entry->uid, $entry->gid, $full or
623 $self->_error( qq[Could not set uid/gid on '$full'] );
624 }
625
626 ### only chmod if we're allowed to, but never chmod symlinks, since they'll
627 ### change the perms on the file they're linking too...
628 if( $CHMOD and not -l $full ) {
629 chmod $entry->mode, $full or
630 $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
631 }
632
633 return 1;
634}
635
636sub _make_special_file {
637 my $self = shift;
638 my $entry = shift or return;
639 my $file = shift; return unless defined $file;
640
641 my $err;
642
643 if( $entry->is_symlink ) {
644 my $fail;
645 if( ON_UNIX ) {
646 symlink( $entry->linkname, $file ) or $fail++;
647
648 } else {
649 $self->_extract_special_file_as_plain_file( $entry, $file )
650 or $fail++;
651 }
652
653 $err = qq[Making symbolink link from '] . $entry->linkname .
654 qq[' to '$file' failed] if $fail;
655
656 } elsif ( $entry->is_hardlink ) {
657 my $fail;
658 if( ON_UNIX ) {
659 link( $entry->linkname, $file ) or $fail++;
660
661 } else {
662 $self->_extract_special_file_as_plain_file( $entry, $file )
663 or $fail++;
664 }
665
666 $err = qq[Making hard link from '] . $entry->linkname .
667 qq[' to '$file' failed] if $fail;
668
669 } elsif ( $entry->is_fifo ) {
670 ON_UNIX && !system('mknod', $file, 'p') or
671 $err = qq[Making fifo ']. $entry->name .qq[' failed];
672
673 } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
674 my $mode = $entry->is_blockdev ? 'b' : 'c';
675
676 ON_UNIX && !system('mknod', $file, $mode,
677 $entry->devmajor, $entry->devminor) or
678 $err = qq[Making block device ']. $entry->name .qq[' (maj=] .
679 $entry->devmajor . qq[ min=] . $entry->devminor .
680 qq[) failed.];
681
682 } elsif ( $entry->is_socket ) {
683 ### the original doesn't do anything special for sockets.... ###
684 1;
685 }
686
687 return $err ? $self->_error( $err ) : 1;
688}
689
690### don't know how to make symlinks, let's just extract the file as
691### a plain file
692sub _extract_special_file_as_plain_file {
693 my $self = shift;
694 my $entry = shift or return;
695 my $file = shift; return unless defined $file;
696
697 my $err;
698 TRY: {
699 my $orig = $self->_find_entry( $entry->linkname );
700
701 unless( $orig ) {
702 $err = qq[Could not find file '] . $entry->linkname .
703 qq[' in memory.];
704 last TRY;
705 }
706
707 ### clone the entry, make it appear as a normal file ###
708 my $clone = $entry->clone;
709 $clone->_downgrade_to_plainfile;
710 $self->_extract_file( $clone, $file ) or last TRY;
711
712 return 1;
713 }
714
715 return $self->_error($err);
716}
717
718=head2 $tar->list_files( [\@properties] )
719
720Returns a list of the names of all the files in the archive.
721
722If C<list_files()> is passed an array reference as its first argument
723it returns a list of hash references containing the requested
724properties of each file. The following list of properties is
725supported: name, size, mtime (last modified date), mode, uid, gid,
726linkname, uname, gname, devmajor, devminor, prefix.
727
728Passing an array reference containing only one element, 'name', is
729special cased to return a list of names rather than a list of hash
730references, making it equivalent to calling C<list_files> without
731arguments.
732
733=cut
734
735sub list_files {
736 my $self = shift;
737 my $aref = shift || [ ];
738
739 unless( $self->_data ) {
740 $self->read() or return;
741 }
742
743 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
744 return map { $_->full_path } @{$self->_data};
745 } else {
746
747 #my @rv;
748 #for my $obj ( @{$self->_data} ) {
749 # push @rv, { map { $_ => $obj->$_() } @$aref };
750 #}
751 #return @rv;
752
753 ### this does the same as the above.. just needs a +{ }
754 ### to make sure perl doesn't confuse it for a block
755 return map { my $o=$_;
756 +{ map { $_ => $o->$_() } @$aref }
757 } @{$self->_data};
758 }
759}
760
761sub _find_entry {
762 my $self = shift;
763 my $file = shift;
764
765 unless( defined $file ) {
766 $self->_error( qq[No file specified] );
767 return;
768 }
769
b30bcf62 770 ### it's an object already
771 return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
772
39713df4 773 for my $entry ( @{$self->_data} ) {
774 my $path = $entry->full_path;
775 return $entry if $path eq $file;
776 }
777
778 $self->_error( qq[No such file in archive: '$file'] );
779 return;
780}
781
782=head2 $tar->get_files( [@filenames] )
783
784Returns the C<Archive::Tar::File> objects matching the filenames
785provided. If no filename list was passed, all C<Archive::Tar::File>
786objects in the current Tar object are returned.
787
788Please refer to the C<Archive::Tar::File> documentation on how to
789handle these objects.
790
791=cut
792
793sub get_files {
794 my $self = shift;
795
796 return @{ $self->_data } unless @_;
797
798 my @list;
799 for my $file ( @_ ) {
800 push @list, grep { defined } $self->_find_entry( $file );
801 }
802
803 return @list;
804}
805
806=head2 $tar->get_content( $file )
807
808Return the content of the named file.
809
810=cut
811
812sub get_content {
813 my $self = shift;
814 my $entry = $self->_find_entry( shift ) or return;
815
816 return $entry->data;
817}
818
819=head2 $tar->replace_content( $file, $content )
820
821Make the string $content be the content for the file named $file.
822
823=cut
824
825sub replace_content {
826 my $self = shift;
827 my $entry = $self->_find_entry( shift ) or return;
828
829 return $entry->replace_content( shift );
830}
831
832=head2 $tar->rename( $file, $new_name )
833
834Rename the file of the in-memory archive to $new_name.
835
836Note that you must specify a Unix path for $new_name, since per tar
837standard, all files in the archive must be Unix paths.
838
839Returns true on success and false on failure.
840
841=cut
842
843sub rename {
844 my $self = shift;
845 my $file = shift; return unless defined $file;
846 my $new = shift; return unless defined $new;
847
848 my $entry = $self->_find_entry( $file ) or return;
849
850 return $entry->rename( $new );
851}
852
853=head2 $tar->remove (@filenamelist)
854
855Removes any entries with names matching any of the given filenames
856from the in-memory archive. Returns a list of C<Archive::Tar::File>
857objects that remain.
858
859=cut
860
861sub remove {
862 my $self = shift;
863 my @list = @_;
864
865 my %seen = map { $_->full_path => $_ } @{$self->_data};
866 delete $seen{ $_ } for @list;
867
868 $self->_data( [values %seen] );
869
870 return values %seen;
871}
872
873=head2 $tar->clear
874
875C<clear> clears the current in-memory archive. This effectively gives
876you a 'blank' object, ready to be filled again. Note that C<clear>
877only has effect on the object, not the underlying tarfile.
878
879=cut
880
881sub clear {
882 my $self = shift or return;
883
884 $self->_data( [] );
885 $self->_file( '' );
886
887 return 1;
888}
889
890
891=head2 $tar->write ( [$file, $compressed, $prefix] )
892
893Write the in-memory archive to disk. The first argument can either
894be the name of a file or a reference to an already open filehandle (a
895GLOB reference). If the second argument is true, the module will use
896IO::Zlib to write the file in a compressed format. If IO::Zlib is
897not available, the C<write> method will fail and return.
898
899Note that when you pass in a filehandle, the compression argument
900is ignored, as all files are printed verbatim to your filehandle.
901If you wish to enable compression with filehandles, use an
902C<IO::Zlib> filehandle instead.
903
904Specific levels of compression can be chosen by passing the values 2
905through 9 as the second parameter.
906
907The third argument is an optional prefix. All files will be tucked
908away in the directory you specify as prefix. So if you have files
909'a' and 'b' in your archive, and you specify 'foo' as prefix, they
910will be written to the archive as 'foo/a' and 'foo/b'.
911
912If no arguments are given, C<write> returns the entire formatted
913archive as a string, which could be useful if you'd like to stuff the
914archive into a socket or a pipe to gzip or something.
915
916=cut
917
918sub write {
919 my $self = shift;
920 my $file = shift; $file = '' unless defined $file;
921 my $gzip = shift || 0;
922 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
923 my $dummy = '';
924
925 ### only need a handle if we have a file to print to ###
926 my $handle = length($file)
927 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
928 or return )
929 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h }
930 : $HAS_IO_STRING ? IO::String->new
931 : __PACKAGE__->no_string_support();
932
933
934
935 for my $entry ( @{$self->_data} ) {
936 ### entries to be written to the tarfile ###
937 my @write_me;
938
939 ### only now will we change the object to reflect the current state
940 ### of the name and prefix fields -- this needs to be limited to
941 ### write() only!
942 my $clone = $entry->clone;
943
944
945 ### so, if you don't want use to use the prefix, we'll stuff
946 ### everything in the name field instead
947 if( $DO_NOT_USE_PREFIX ) {
948
949 ### you might have an extended prefix, if so, set it in the clone
950 ### XXX is ::Unix right?
951 $clone->name( length $ext_prefix
952 ? File::Spec::Unix->catdir( $ext_prefix,
953 $clone->full_path)
954 : $clone->full_path );
955 $clone->prefix( '' );
956
957 ### otherwise, we'll have to set it properly -- prefix part in the
958 ### prefix and name part in the name field.
959 } else {
960
961 ### split them here, not before!
962 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
963
964 ### you might have an extended prefix, if so, set it in the clone
965 ### XXX is ::Unix right?
966 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
967 if length $ext_prefix;
968
969 $clone->prefix( $prefix );
970 $clone->name( $name );
971 }
972
973 ### names are too long, and will get truncated if we don't add a
974 ### '@LongLink' file...
975 my $make_longlink = ( length($clone->name) > NAME_LENGTH or
976 length($clone->prefix) > PREFIX_LENGTH
977 ) || 0;
978
979 ### perhaps we need to make a longlink file?
980 if( $make_longlink ) {
981 my $longlink = Archive::Tar::File->new(
982 data => LONGLINK_NAME,
983 $clone->full_path,
984 { type => LONGLINK }
985 );
986
987 unless( $longlink ) {
988 $self->_error( qq[Could not create 'LongLink' entry for ] .
989 qq[oversize file '] . $clone->full_path ."'" );
990 return;
991 };
992
993 push @write_me, $longlink;
994 }
995
996 push @write_me, $clone;
997
998 ### write the one, optionally 2 a::t::file objects to the handle
999 for my $clone (@write_me) {
1000
1001 ### if the file is a symlink, there are 2 options:
1002 ### either we leave the symlink intact, but then we don't write any
1003 ### data OR we follow the symlink, which means we actually make a
1004 ### copy. if we do the latter, we have to change the TYPE of the
1005 ### clone to 'FILE'
1006 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1007 my $data_ok = !$clone->is_symlink && $clone->has_content;
1008
1009 ### downgrade to a 'normal' file if it's a symlink we're going to
1010 ### treat as a regular file
1011 $clone->_downgrade_to_plainfile if $link_ok;
1012
1013 ### get the header for this block
1014 my $header = $self->_format_tar_entry( $clone );
1015 unless( $header ) {
1016 $self->_error(q[Could not format header for: ] .
1017 $clone->full_path );
1018 return;
1019 }
1020
1021 unless( print $handle $header ) {
1022 $self->_error(q[Could not write header for: ] .
1023 $clone->full_path);
1024 return;
1025 }
1026
1027 if( $link_ok or $data_ok ) {
1028 unless( print $handle $clone->data ) {
1029 $self->_error(q[Could not write data for: ] .
1030 $clone->full_path);
1031 return;
1032 }
1033
1034 ### pad the end of the clone if required ###
1035 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1036 }
1037
1038 } ### done writing these entries
1039 }
1040
1041 ### write the end markers ###
1042 print $handle TAR_END x 2 or
1043 return $self->_error( qq[Could not write tar end markers] );
b30bcf62 1044
39713df4 1045 ### did you want it written to a file, or returned as a string? ###
b30bcf62 1046 my $rv = length($file) ? 1
39713df4 1047 : $HAS_PERLIO ? $dummy
b30bcf62 1048 : do { seek $handle, 0, 0; local $/; <$handle> };
1049
1050 ### make sure to close the handle;
1051 close $handle;
1052
1053 return $rv;
39713df4 1054}
1055
1056sub _format_tar_entry {
1057 my $self = shift;
1058 my $entry = shift or return;
1059 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
1060 my $no_prefix = shift || 0;
1061
1062 my $file = $entry->name;
1063 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix;
1064
1065 ### remove the prefix from the file name
1066 ### not sure if this is still neeeded --kane
1067 ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1068 ### this for us. Even worse, this would break if we tried to add a file
1069 ### like x/x.
1070 #if( length $prefix ) {
1071 # $file =~ s/^$match//;
1072 #}
1073
1074 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1075 if length $ext_prefix;
1076
1077 ### not sure why this is... ###
1078 my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1079 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1080
1081 my $f1 = "%06o"; my $f2 = "%11o";
1082
1083 ### this might be optimizable with a 'changed' flag in the file objects ###
1084 my $tar = pack (
1085 PACK,
1086 $file,
1087
1088 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1089 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1090
1091 "", # checksum field - space padded a bit down
1092
1093 (map { $entry->$_() } qw[type linkname magic]),
1094
1095 $entry->version || TAR_VERSION,
1096
1097 (map { $entry->$_() } qw[uname gname]),
1098 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1099
1100 ($no_prefix ? '' : $prefix)
1101 );
1102
1103 ### add the checksum ###
1104 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1105
1106 return $tar;
1107}
1108
1109=head2 $tar->add_files( @filenamelist )
1110
1111Takes a list of filenames and adds them to the in-memory archive.
1112
1113The path to the file is automatically converted to a Unix like
1114equivalent for use in the archive, and, if on MacOS, the file's
1115modification time is converted from the MacOS epoch to the Unix epoch.
1116So tar archives created on MacOS with B<Archive::Tar> can be read
1117both with I<tar> on Unix and applications like I<suntar> or
1118I<Stuffit Expander> on MacOS.
1119
1120Be aware that the file's type/creator and resource fork will be lost,
1121which is usually what you want in cross-platform archives.
1122
1123Returns a list of C<Archive::Tar::File> objects that were just added.
1124
1125=cut
1126
1127sub add_files {
1128 my $self = shift;
1129 my @files = @_ or return;
1130
1131 my @rv;
1132 for my $file ( @files ) {
c3745331 1133 unless( -e $file || -l $file ) {
39713df4 1134 $self->_error( qq[No such file: '$file'] );
1135 next;
1136 }
1137
1138 my $obj = Archive::Tar::File->new( file => $file );
1139 unless( $obj ) {
1140 $self->_error( qq[Unable to add file: '$file'] );
1141 next;
1142 }
1143
1144 push @rv, $obj;
1145 }
1146
1147 push @{$self->{_data}}, @rv;
1148
1149 return @rv;
1150}
1151
1152=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1153
1154Takes a filename, a scalar full of data and optionally a reference to
1155a hash with specific options.
1156
1157Will add a file to the in-memory archive, with name C<$filename> and
1158content C<$data>. Specific properties can be set using C<$opthashref>.
1159The following list of properties is supported: name, size, mtime
1160(last modified date), mode, uid, gid, linkname, uname, gname,
b3200c5d 1161devmajor, devminor, prefix, type. (On MacOS, the file's path and
39713df4 1162modification times are converted to Unix equivalents.)
1163
b3200c5d 1164Valid values for the file type are the following constants defined in
1165Archive::Tar::Constants:
1166
1167=over 4
1168
1169=item FILE
1170
1171Regular file.
1172
1173=item HARDLINK
1174
1175=item SYMLINK
1176
1177Hard and symbolic ("soft") links; linkname should specify target.
1178
1179=item CHARDEV
1180
1181=item BLOCKDEV
1182
1183Character and block devices. devmajor and devminor should specify the major
1184and minor device numbers.
1185
1186=item DIR
1187
1188Directory.
1189
1190=item FIFO
1191
1192FIFO (named pipe).
1193
1194=item SOCKET
1195
1196Socket.
1197
1198=back
1199
39713df4 1200Returns the C<Archive::Tar::File> object that was just added, or
1201C<undef> on failure.
1202
1203=cut
1204
1205sub add_data {
1206 my $self = shift;
1207 my ($file, $data, $opt) = @_;
1208
1209 my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1210 unless( $obj ) {
1211 $self->_error( qq[Unable to add file: '$file'] );
1212 return;
1213 }
1214
1215 push @{$self->{_data}}, $obj;
1216
1217 return $obj;
1218}
1219
1220=head2 $tar->error( [$BOOL] )
1221
1222Returns the current errorstring (usually, the last error reported).
1223If a true value was specified, it will give the C<Carp::longmess>
1224equivalent of the error, in effect giving you a stacktrace.
1225
1226For backwards compatibility, this error is also available as
1227C<$Archive::Tar::error> although it is much recommended you use the
1228method call instead.
1229
1230=cut
1231
1232{
1233 $error = '';
1234 my $longmess;
1235
1236 sub _error {
1237 my $self = shift;
1238 my $msg = $error = shift;
1239 $longmess = Carp::longmess($error);
1240
1241 ### set Archive::Tar::WARN to 0 to disable printing
1242 ### of errors
1243 if( $WARN ) {
1244 carp $DEBUG ? $longmess : $msg;
1245 }
1246
1247 return;
1248 }
1249
1250 sub error {
1251 my $self = shift;
1252 return shift() ? $longmess : $error;
1253 }
1254}
1255
f38c1908 1256=head2 $tar->setcwd( $cwd );
1257
1258C<Archive::Tar> needs to know the current directory, and it will run
1259C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
1260tarfile and saves it in the file system. (As of version 1.30, however,
1261C<Archive::Tar> will use the speed optimization described below
1262automatically, so it's only relevant if you're using C<extract_file()>).
1263
1264Since C<Archive::Tar> doesn't change the current directory internally
1265while it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1266can be avoided if we can guarantee that the current directory doesn't
1267get changed externally.
1268
1269To use this performance boost, set the current directory via
1270
1271 use Cwd;
1272 $tar->setcwd( cwd() );
1273
1274once before calling a function like C<extract_file> and
1275C<Archive::Tar> will use the current directory setting from then on
1276and won't call C<Cwd::cwd()> internally.
1277
1278To switch back to the default behaviour, use
1279
1280 $tar->setcwd( undef );
1281
1282and C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1283
1284If you're using C<Archive::Tar>'s C<exract()> method, C<setcwd()> will
1285be called for you.
1286
1287=cut
1288
1289sub setcwd {
1290 my $self = shift;
1291 my $cwd = shift;
1292
1293 $self->{cwd} = $cwd;
1294}
39713df4 1295
1296=head2 $bool = $tar->has_io_string
1297
1298Returns true if we currently have C<IO::String> support loaded.
1299
1300Either C<IO::String> or C<perlio> support is needed to support writing
3c4b39be 1301stringified archives. Currently, C<perlio> is the preferred method, if
39713df4 1302available.
1303
1304See the C<GLOBAL VARIABLES> section to see how to change this preference.
1305
1306=cut
1307
1308sub has_io_string { return $HAS_IO_STRING; }
1309
1310=head2 $bool = $tar->has_perlio
1311
1312Returns true if we currently have C<perlio> support loaded.
1313
1314This requires C<perl-5.8> or higher, compiled with C<perlio>
1315
1316Either C<IO::String> or C<perlio> support is needed to support writing
3c4b39be 1317stringified archives. Currently, C<perlio> is the preferred method, if
39713df4 1318available.
1319
1320See the C<GLOBAL VARIABLES> section to see how to change this preference.
1321
1322=cut
1323
1324sub has_perlio { return $HAS_PERLIO; }
1325
1326
1327=head1 Class Methods
1328
1329=head2 Archive::Tar->create_archive($file, $compression, @filelist)
1330
1331Creates a tar file from the list of files provided. The first
1332argument can either be the name of the tar file to create or a
1333reference to an open file handle (e.g. a GLOB reference).
1334
1335The second argument specifies the level of compression to be used, if
1336any. Compression of tar files requires the installation of the
1337IO::Zlib module. Specific levels of compression may be
1338requested by passing a value between 2 and 9 as the second argument.
1339Any other value evaluating as true will result in the default
1340compression level being used.
1341
1342Note that when you pass in a filehandle, the compression argument
1343is ignored, as all files are printed verbatim to your filehandle.
1344If you wish to enable compression with filehandles, use an
1345C<IO::Zlib> filehandle instead.
1346
1347The remaining arguments list the files to be included in the tar file.
1348These files must all exist. Any files which don't exist or can't be
1349read are silently ignored.
1350
1351If the archive creation fails for any reason, C<create_archive> will
1352return false. Please use the C<error> method to find the cause of the
1353failure.
1354
1355Note that this method does not write C<on the fly> as it were; it
1356still reads all the files into memory before writing out the archive.
1357Consult the FAQ below if this is a problem.
1358
1359=cut
1360
1361sub create_archive {
1362 my $class = shift;
1363
1364 my $file = shift; return unless defined $file;
1365 my $gzip = shift || 0;
1366 my @files = @_;
1367
1368 unless( @files ) {
1369 return $class->_error( qq[Cowardly refusing to create empty archive!] );
1370 }
1371
1372 my $tar = $class->new;
1373 $tar->add_files( @files );
1374 return $tar->write( $file, $gzip );
1375}
1376
1377=head2 Archive::Tar->list_archive ($file, $compressed, [\@properties])
1378
1379Returns a list of the names of all the files in the archive. The
1380first argument can either be the name of the tar file to list or a
1381reference to an open file handle (e.g. a GLOB reference).
1382
1383If C<list_archive()> is passed an array reference as its third
1384argument it returns a list of hash references containing the requested
1385properties of each file. The following list of properties is
b3200c5d 1386supported: full_path, name, size, mtime (last modified date), mode,
1387uid, gid, linkname, uname, gname, devmajor, devminor, prefix.
1388
1389See C<Archive::Tar::File> for details about supported properties.
39713df4 1390
1391Passing an array reference containing only one element, 'name', is
1392special cased to return a list of names rather than a list of hash
1393references.
1394
1395=cut
1396
1397sub list_archive {
1398 my $class = shift;
1399 my $file = shift; return unless defined $file;
1400 my $gzip = shift || 0;
1401
1402 my $tar = $class->new($file, $gzip);
1403 return unless $tar;
1404
1405 return $tar->list_files( @_ );
1406}
1407
1408=head2 Archive::Tar->extract_archive ($file, $gzip)
1409
1410Extracts the contents of the tar file. The first argument can either
1411be the name of the tar file to create or a reference to an open file
1412handle (e.g. a GLOB reference). All relative paths in the tar file will
1413be created underneath the current working directory.
1414
1415C<extract_archive> will return a list of files it extracted.
1416If the archive extraction fails for any reason, C<extract_archive>
1417will return false. Please use the C<error> method to find the cause
1418of the failure.
1419
1420=cut
1421
1422sub extract_archive {
1423 my $class = shift;
1424 my $file = shift; return unless defined $file;
1425 my $gzip = shift || 0;
1426
1427 my $tar = $class->new( ) or return;
1428
1429 return $tar->read( $file, $gzip, { extract => 1 } );
1430}
1431
1432=head2 Archive::Tar->can_handle_compressed_files
1433
1434A simple checking routine, which will return true if C<Archive::Tar>
1435is able to uncompress compressed archives on the fly with C<IO::Zlib>,
1436or false if C<IO::Zlib> is not installed.
1437
1438You can use this as a shortcut to determine whether C<Archive::Tar>
1439will do what you think before passing compressed archives to its
1440C<read> method.
1441
1442=cut
1443
1444sub can_handle_compressed_files { return ZLIB ? 1 : 0 }
1445
1446sub no_string_support {
1447 croak("You have to install IO::String to support writing archives to strings");
1448}
1449
14501;
1451
1452__END__
1453
1454=head1 GLOBAL VARIABLES
1455
1456=head2 $Archive::Tar::FOLLOW_SYMLINK
1457
1458Set this variable to C<1> to make C<Archive::Tar> effectively make a
1459copy of the file when extracting. Default is C<0>, which
1460means the symlink stays intact. Of course, you will have to pack the
1461file linked to as well.
1462
1463This option is checked when you write out the tarfile using C<write>
1464or C<create_archive>.
1465
1466This works just like C</bin/tar>'s C<-h> option.
1467
1468=head2 $Archive::Tar::CHOWN
1469
1470By default, C<Archive::Tar> will try to C<chown> your files if it is
1471able to. In some cases, this may not be desired. In that case, set
1472this variable to C<0> to disable C<chown>-ing, even if it were
1473possible.
1474
1475The default is C<1>.
1476
1477=head2 $Archive::Tar::CHMOD
1478
1479By default, C<Archive::Tar> will try to C<chmod> your files to
1480whatever mode was specified for the particular file in the archive.
1481In some cases, this may not be desired. In that case, set this
1482variable to C<0> to disable C<chmod>-ing.
1483
1484The default is C<1>.
1485
1486=head2 $Archive::Tar::DO_NOT_USE_PREFIX
1487
f38c1908 1488By default, C<Archive::Tar> will try to put paths that are over
1489100 characters in the C<prefix> field of your tar header, as
1490defined per POSIX-standard. However, some (older) tar programs
1491do not implement this spec. To retain compatibility with these older
1492or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
1493variable to a true value, and C<Archive::Tar> will use an alternate
1494way of dealing with paths over 100 characters by using the
1495C<GNU Extended Header> feature.
1496
1497Note that clients who do not support the C<GNU Extended Header>
1498feature will not be able to read these archives. Such clients include
1499tars on C<Solaris>, C<Irix> and C<AIX>.
39713df4 1500
1501The default is C<0>.
1502
1503=head2 $Archive::Tar::DEBUG
1504
1505Set this variable to C<1> to always get the C<Carp::longmess> output
1506of the warnings, instead of the regular C<carp>. This is the same
1507message you would get by doing:
1508
1509 $tar->error(1);
1510
1511Defaults to C<0>.
1512
1513=head2 $Archive::Tar::WARN
1514
1515Set this variable to C<0> if you do not want any warnings printed.
1516Personally I recommend against doing this, but people asked for the
1517option. Also, be advised that this is of course not threadsafe.
1518
1519Defaults to C<1>.
1520
1521=head2 $Archive::Tar::error
1522
1523Holds the last reported error. Kept for historical reasons, but its
1524use is very much discouraged. Use the C<error()> method instead:
1525
1526 warn $tar->error unless $tar->extract;
1527
1528=head2 $Archive::Tar::HAS_PERLIO
1529
1530This variable holds a boolean indicating if we currently have
1531C<perlio> support loaded. This will be enabled for any perl
1532greater than C<5.8> compiled with C<perlio>.
1533
1534If you feel strongly about disabling it, set this variable to
1535C<false>. Note that you will then need C<IO::String> installed
1536to support writing stringified archives.
1537
1538Don't change this variable unless you B<really> know what you're
1539doing.
1540
1541=head2 $Archive::Tar::HAS_IO_STRING
1542
1543This variable holds a boolean indicating if we currently have
1544C<IO::String> support loaded. This will be enabled for any perl
1545that has a loadable C<IO::String> module.
1546
1547If you feel strongly about disabling it, set this variable to
1548C<false>. Note that you will then need C<perlio> support from
1549your perl to be able to write stringified archives.
1550
1551Don't change this variable unless you B<really> know what you're
1552doing.
1553
1554=head1 FAQ
1555
1556=over 4
1557
1558=item What's the minimum perl version required to run Archive::Tar?
1559
1560You will need perl version 5.005_03 or newer.
1561
1562=item Isn't Archive::Tar slow?
1563
1564Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
1565However, it's very portable. If speed is an issue, consider using
1566C</bin/tar> instead.
1567
1568=item Isn't Archive::Tar heavier on memory than /bin/tar?
1569
1570Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
1571C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
1572choice but to read the archive into memory.
1573This is ok if you want to do in-memory manipulation of the archive.
1574If you just want to extract, use the C<extract_archive> class method
1575instead. It will optimize and write to disk immediately.
1576
1577=item Can't you lazy-load data instead?
1578
1579No, not easily. See previous question.
1580
1581=item How much memory will an X kb tar file need?
1582
1583Probably more than X kb, since it will all be read into memory. If
1584this is a problem, and you don't need to do in memory manipulation
1585of the archive, consider using C</bin/tar> instead.
1586
1587=item What do you do with unsupported filetypes in an archive?
1588
1589C<Unix> has a few filetypes that aren't supported on other platforms,
1590like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
1591try to make a copy of the original file, rather than throwing an error.
1592
1593This does require you to read the entire archive in to memory first,
1594since otherwise we wouldn't know what data to fill the copy with.
1595(This means that you cannot use the class methods on archives that
1596have incompatible filetypes and still expect things to work).
1597
1598For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
1599the extraction of this particular item didn't work.
1600
f38c1908 1601=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
1602
1603By default, C<Archive::Tar> is in a completely POSIX-compatible
1604mode, which uses the POSIX-specification of C<tar> to store files.
1605For paths greather than 100 characters, this is done using the
1606C<POSIX header prefix>. Non-POSIX-compatible clients may not support
1607this part of the specification, and may only support the C<GNU Extended
1608Header> functionality. To facilitate those clients, you can set the
1609C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
1610C<GLOBAL VARIABLES> section for details on this variable.
1611
c3745331 1612Note that GNU tar earlier than version 1.14 does not cope well with
1613the C<POSIX header prefix>. If you use such a version, consider setting
1614the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
1615
b30bcf62 1616=item How do I extract only files that have property X from an archive?
1617
1618Sometimes, you might not wish to extract a complete archive, just
1619the files that are relevant to you, based on some criteria.
1620
1621You can do this by filtering a list of C<Archive::Tar::File> objects
1622based on your criteria. For example, to extract only files that have
1623the string C<foo> in their title, you would use:
1624
1625 $tar->extract(
1626 grep { $_->full_path =~ /foo/ } $tar->get_files
1627 );
1628
1629This way, you can filter on any attribute of the files in the archive.
1630Consult the C<Archive::Tar::File> documentation on how to use these
1631objects.
1632
81a5970e 1633=item How do I access .tar.Z files?
1634
1635The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
1636the C<IO::Zlib> module) to access tar files that have been compressed
1637with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
1638utility cannot be read by C<Compress::Zlib> and so cannot be directly
1639accesses by C<Archive::Tar>.
1640
1641If the C<uncompress> or C<gunzip> programs are available, you can use
1642one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
1643
1644Firstly with C<uncompress>
1645
1646 use Archive::Tar;
1647
1648 open F, "uncompress -c $filename |";
1649 my $tar = Archive::Tar->new(*F);
1650 ...
1651
1652and this with C<gunzip>
1653
1654 use Archive::Tar;
1655
1656 open F, "gunzip -c $filename |";
1657 my $tar = Archive::Tar->new(*F);
1658 ...
1659
1660Similarly, if the C<compress> program is available, you can use this to
1661write a C<.tar.Z> file
1662
1663 use Archive::Tar;
1664 use IO::File;
1665
1666 my $fh = new IO::File "| compress -c >$filename";
1667 my $tar = Archive::Tar->new();
1668 ...
1669 $tar->write($fh);
1670 $fh->close ;
1671
1672
39713df4 1673=back
1674
1675=head1 TODO
1676
1677=over 4
1678
1679=item Check if passed in handles are open for read/write
1680
1681Currently I don't know of any portable pure perl way to do this.
1682Suggestions welcome.
1683
b3200c5d 1684=item Allow archives to be passed in as string
1685
1686Currently, we only allow opened filehandles or filenames, but
1687not strings. The internals would need some reworking to facilitate
1688stringified archives.
1689
1690=item Facilitate processing an opened filehandle of a compressed archive
1691
1692Currently, we only support this if the filehandle is an IO::Zlib object.
1693Environments, like apache, will present you with an opened filehandle
1694to an uploaded file, which might be a compressed archive.
1695
39713df4 1696=back
1697
f38c1908 1698=head1 SEE ALSO
1699
1700=over 4
1701
1702=item The GNU tar specification
1703
1704C<http://www.gnu.org/software/tar/manual/tar.html>
1705
1706=item The PAX format specication
1707
1708The specifcation which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
1709
1710=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>
1711
1712=item GNU tar intends to switch to POSIX compatibility
1713
1714GNU Tar authors have expressed their intention to become completely
1715POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html>
1716
1717=item A Comparison between various tar implementations
1718
1719Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs>
1720
1721=back
1722
39713df4 1723=head1 AUTHOR
1724
c3745331 1725This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1726
1727Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
39713df4 1728
1729=head1 ACKNOWLEDGEMENTS
1730
1731Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney and
1732especially Andrew Savige for their help and suggestions.
1733
1734=head1 COPYRIGHT
1735
c3745331 1736This module is copyright (c) 2002 - 2007 Jos Boumans
1737E<lt>kane@cpan.orgE<gt>. All rights reserved.
39713df4 1738
c3745331 1739This library is free software; you may redistribute and/or modify
1740it under the same terms as Perl itself.
39713df4 1741
1742=cut