perlsub's docs on state assignment are busted
[p5sagit/p5-mst-13.2.git] / lib / Archive / Tar.pm
CommitLineData
39713df4 1### the gnu tar specification:
f38c1908 2### http://www.gnu.org/software/tar/manual/tar.html
39713df4 3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
10use strict;
11use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
12 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING];
13
14$DEBUG = 0;
15$WARN = 1;
16$FOLLOW_SYMLINK = 0;
f5afd28d 17$VERSION = "1.36";
39713df4 18$CHOWN = 1;
19$CHMOD = 1;
20$DO_NOT_USE_PREFIX = 0;
21
22BEGIN {
23 use Config;
24 $HAS_PERLIO = $Config::Config{useperlio};
25
26 ### try and load IO::String anyway, so you can dynamically
27 ### switch between perlio and IO::String
28 eval {
29 require IO::String;
30 import IO::String;
31 };
32 $HAS_IO_STRING = $@ ? 0 : 1;
33
34}
35
36use Cwd;
37use IO::File;
38use Carp qw(carp croak);
39use File::Spec ();
40use File::Spec::Unix ();
41use File::Path ();
42
43use Archive::Tar::File;
44use Archive::Tar::Constant;
45
46=head1 NAME
47
48Archive::Tar - module for manipulations of tar archives
49
50=head1 SYNOPSIS
51
52 use Archive::Tar;
53 my $tar = Archive::Tar->new;
54
55 $tar->read('origin.tgz',1);
56 $tar->extract();
57
58 $tar->add_files('file/foo.pl', 'docs/README');
59 $tar->add_data('file/baz.txt', 'This is the contents now');
60
61 $tar->rename('oldname', 'new/file/name');
62
63 $tar->write('files.tar');
64
65=head1 DESCRIPTION
66
67Archive::Tar provides an object oriented mechanism for handling tar
68files. It provides class methods for quick and easy files handling
69while also allowing for the creation of tar file objects for custom
70manipulation. If you have the IO::Zlib module installed,
71Archive::Tar will also support compressed or gzipped tar files.
72
73An object of class Archive::Tar represents a .tar(.gz) archive full
74of files and things.
75
76=head1 Object Methods
77
78=head2 Archive::Tar->new( [$file, $compressed] )
79
80Returns a new Tar object. If given any arguments, C<new()> calls the
81C<read()> method automatically, passing on the arguments provided to
82the C<read()> method.
83
84If C<new()> is invoked with arguments and the C<read()> method fails
85for any reason, C<new()> returns undef.
86
87=cut
88
89my $tmpl = {
90 _data => [ ],
91 _file => 'Unknown',
92};
93
94### install get/set accessors for this object.
95for my $key ( keys %$tmpl ) {
96 no strict 'refs';
97 *{__PACKAGE__."::$key"} = sub {
98 my $self = shift;
99 $self->{$key} = $_[0] if @_;
100 return $self->{$key};
101 }
102}
103
104sub new {
105 my $class = shift;
106 $class = ref $class if ref $class;
107
108 ### copying $tmpl here since a shallow copy makes it use the
109 ### same aref, causing for files to remain in memory always.
110 my $obj = bless { _data => [ ], _file => 'Unknown' }, $class;
111
112 if (@_) {
81a5970e 113 unless ( $obj->read( @_ ) ) {
114 $obj->_error(qq[No data could be read from file]);
115 return;
116 }
39713df4 117 }
118
119 return $obj;
120}
121
122=head2 $tar->read ( $filename|$handle, $compressed, {opt => 'val'} )
123
124Read the given tar file into memory.
125The first argument can either be the name of a file or a reference to
126an already open filehandle (or an IO::Zlib object if it's compressed)
127The second argument indicates whether the file referenced by the first
128argument is compressed.
129
130The C<read> will I<replace> any previous content in C<$tar>!
131
132The second argument may be considered optional if IO::Zlib is
133installed, since it will transparently Do The Right Thing.
134Archive::Tar will warn if you try to pass a compressed file if
135IO::Zlib is not available and simply return.
136
b3200c5d 137Note that you can currently B<not> pass a C<gzip> compressed
138filehandle, which is not opened with C<IO::Zlib>, nor a string
139containing the full archive information (either compressed or
140uncompressed). These are worth while features, but not currently
141implemented. See the C<TODO> section.
142
39713df4 143The third argument can be a hash reference with options. Note that
144all options are case-sensitive.
145
146=over 4
147
148=item limit
149
150Do not read more than C<limit> files. This is useful if you have
151very big archives, and are only interested in the first few files.
152
153=item extract
154
155If set to true, immediately extract entries when reading them. This
156gives you the same memory break as the C<extract_archive> function.
157Note however that entries will not be read into memory, but written
158straight to disk.
159
160=back
161
162All files are stored internally as C<Archive::Tar::File> objects.
163Please consult the L<Archive::Tar::File> documentation for details.
164
165Returns the number of files read in scalar context, and a list of
166C<Archive::Tar::File> objects in list context.
167
168=cut
169
170sub read {
171 my $self = shift;
172 my $file = shift;
173 my $gzip = shift || 0;
174 my $opts = shift || {};
175
176 unless( defined $file ) {
177 $self->_error( qq[No file to read from!] );
178 return;
179 } else {
180 $self->_file( $file );
181 }
182
183 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
184 or return;
185
186 my $data = $self->_read_tar( $handle, $opts ) or return;
187
188 $self->_data( $data );
189
190 return wantarray ? @$data : scalar @$data;
191}
192
193sub _get_handle {
194 my $self = shift;
195 my $file = shift; return unless defined $file;
196 return $file if ref $file;
197
198 my $gzip = shift || 0;
199 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only
200
201 my $fh; my $bin;
202
203 ### only default to ZLIB if we're not trying to /write/ to a handle ###
204 if( ZLIB and $gzip || MODE_READ->( $mode ) ) {
205
206 ### IO::Zlib will Do The Right Thing, even when passed
207 ### a plain file ###
208 $fh = new IO::Zlib;
209
210 } else {
211 if( $gzip ) {
212 $self->_error(qq[Compression not available - Install IO::Zlib!]);
213 return;
214
215 } else {
216 $fh = new IO::File;
217 $bin++;
218 }
219 }
220
221 unless( $fh->open( $file, $mode ) ) {
222 $self->_error( qq[Could not create filehandle for '$file': $!!] );
223 return;
224 }
225
226 binmode $fh if $bin;
227
228 return $fh;
229}
230
231sub _read_tar {
232 my $self = shift;
233 my $handle = shift or return;
234 my $opts = shift || {};
235
236 my $count = $opts->{limit} || 0;
237 my $extract = $opts->{extract} || 0;
238
239 ### set a cap on the amount of files to extract ###
240 my $limit = 0;
241 $limit = 1 if $count > 0;
242
243 my $tarfile = [ ];
244 my $chunk;
245 my $read = 0;
246 my $real_name; # to set the name of a file when
247 # we're encountering @longlink
248 my $data;
249
250 LOOP:
251 while( $handle->read( $chunk, HEAD ) ) {
252 ### IO::Zlib doesn't support this yet
253 my $offset = eval { tell $handle } || 'unknown';
254
255 unless( $read++ ) {
256 my $gzip = GZIP_MAGIC_NUM;
257 if( $chunk =~ /$gzip/ ) {
258 $self->_error( qq[Cannot read compressed format in tar-mode] );
259 return;
260 }
261 }
262
263 ### if we can't read in all bytes... ###
264 last if length $chunk != HEAD;
265
266 ### Apparently this should really be two blocks of 512 zeroes,
267 ### but GNU tar sometimes gets it wrong. See comment in the
268 ### source code (tar.c) to GNU cpio.
269 next if $chunk eq TAR_END;
270
b30bcf62 271 ### according to the posix spec, the last 12 bytes of the header are
272 ### null bytes, to pad it to a 512 byte block. That means if these
273 ### bytes are NOT null bytes, it's a corrrupt header. See:
274 ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
275 ### line 111
276 { my $nulls = join '', "\0" x 12;
277 unless( $nulls eq substr( $chunk, 500, 12 ) ) {
278 $self->_error( qq[Invalid header block at offset $offset] );
279 next LOOP;
280 }
281 }
282
81a5970e 283 ### pass the realname, so we can set it 'proper' right away
284 ### some of the heuristics are done on the name, so important
285 ### to set it ASAP
39713df4 286 my $entry;
81a5970e 287 { my %extra_args = ();
288 $extra_args{'name'} = $$real_name if defined $real_name;
289
290 unless( $entry = Archive::Tar::File->new( chunk => $chunk,
291 %extra_args )
292 ) {
293 $self->_error( qq[Couldn't read chunk at offset $offset] );
b30bcf62 294 next LOOP;
81a5970e 295 }
39713df4 296 }
297
298 ### ignore labels:
299 ### http://www.gnu.org/manual/tar/html_node/tar_139.html
300 next if $entry->is_label;
301
302 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
303
304 if ( $entry->is_file && !$entry->validate ) {
305 ### sometimes the chunk is rather fux0r3d and a whole 512
c3745331 306 ### bytes ends up in the ->name area.
39713df4 307 ### clean it up, if need be
308 my $name = $entry->name;
309 $name = substr($name, 0, 100) if length $name > 100;
310 $name =~ s/\n/ /g;
311
312 $self->_error( $name . qq[: checksum error] );
313 next LOOP;
314 }
315
316 my $block = BLOCK_SIZE->( $entry->size );
317
318 $data = $entry->get_content_by_ref;
319
320 ### just read everything into memory
321 ### can't do lazy loading since IO::Zlib doesn't support 'seek'
322 ### this is because Compress::Zlib doesn't support it =/
323 ### this reads in the whole data in one read() call.
324 if( $handle->read( $$data, $block ) < $block ) {
325 $self->_error( qq[Read error on tarfile (missing data) '].
326 $entry->full_path ."' at offset $offset" );
b30bcf62 327 next LOOP;
39713df4 328 }
329
330 ### throw away trailing garbage ###
376cc5ea 331 substr ($$data, $entry->size) = "" if defined $$data;
39713df4 332
333 ### part II of the @LongLink munging -- need to do /after/
334 ### the checksum check.
335 if( $entry->is_longlink ) {
336 ### weird thing in tarfiles -- if the file is actually a
337 ### @LongLink, the data part seems to have a trailing ^@
338 ### (unprintable) char. to display, pipe output through less.
339 ### but that doesn't *always* happen.. so check if the last
340 ### character is a control character, and if so remove it
341 ### at any rate, we better remove that character here, or tests
342 ### like 'eq' and hashlook ups based on names will SO not work
343 ### remove it by calculating the proper size, and then
344 ### tossing out everything that's longer than that size.
345
346 ### count number of nulls
347 my $nulls = $$data =~ tr/\0/\0/;
348
349 ### cut data + size by that many bytes
350 $entry->size( $entry->size - $nulls );
351 substr ($$data, $entry->size) = "";
352 }
353 }
354
355 ### clean up of the entries.. posix tar /apparently/ has some
356 ### weird 'feature' that allows for filenames > 255 characters
357 ### they'll put a header in with as name '././@LongLink' and the
358 ### contents will be the name of the /next/ file in the archive
359 ### pretty crappy and kludgy if you ask me
360
361 ### set the name for the next entry if this is a @LongLink;
362 ### this is one ugly hack =/ but needed for direct extraction
363 if( $entry->is_longlink ) {
364 $real_name = $data;
b30bcf62 365 next LOOP;
39713df4 366 } elsif ( defined $real_name ) {
367 $entry->name( $$real_name );
368 $entry->prefix('');
369 undef $real_name;
370 }
371
372 $self->_extract_file( $entry ) if $extract
373 && !$entry->is_longlink
374 && !$entry->is_unknown
375 && !$entry->is_label;
376
377 ### Guard against tarfiles with garbage at the end
378 last LOOP if $entry->name eq '';
379
380 ### push only the name on the rv if we're extracting
381 ### -- for extract_archive
382 push @$tarfile, ($extract ? $entry->name : $entry);
383
384 if( $limit ) {
385 $count-- unless $entry->is_longlink || $entry->is_dir;
386 last LOOP unless $count;
387 }
388 } continue {
389 undef $data;
390 }
391
392 return $tarfile;
393}
394
395=head2 $tar->contains_file( $filename )
396
397Check if the archive contains a certain file.
398It will return true if the file is in the archive, false otherwise.
399
400Note however, that this function does an exact match using C<eq>
401on the full path. So it cannot compensate for case-insensitive file-
402systems or compare 2 paths to see if they would point to the same
403underlying file.
404
405=cut
406
407sub contains_file {
408 my $self = shift;
01d11a1c 409 my $full = shift;
410
411 return unless defined $full;
39713df4 412
c3745331 413 ### don't warn if the entry isn't there.. that's what this function
414 ### is for after all.
415 local $WARN = 0;
39713df4 416 return 1 if $self->_find_entry($full);
417 return;
418}
419
420=head2 $tar->extract( [@filenames] )
421
422Write files whose names are equivalent to any of the names in
423C<@filenames> to disk, creating subdirectories as necessary. This
424might not work too well under VMS.
425Under MacPerl, the file's modification time will be converted to the
426MacOS zero of time, and appropriate conversions will be done to the
427path. However, the length of each element of the path is not
428inspected to see whether it's longer than MacOS currently allows (32
429characters).
430
431If C<extract> is called without a list of file names, the entire
432contents of the archive are extracted.
433
434Returns a list of filenames extracted.
435
436=cut
437
438sub extract {
439 my $self = shift;
b30bcf62 440 my @args = @_;
39713df4 441 my @files;
442
f38c1908 443 # use the speed optimization for all extracted files
444 local($self->{cwd}) = cwd() unless $self->{cwd};
445
39713df4 446 ### you requested the extraction of only certian files
b30bcf62 447 if( @args ) {
448 for my $file ( @args ) {
449
450 ### it's already an object?
451 if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
452 push @files, $file;
453 next;
39713df4 454
b30bcf62 455 ### go find it then
456 } else {
457
458 my $found;
459 for my $entry ( @{$self->_data} ) {
460 next unless $file eq $entry->full_path;
461
462 ### we found the file you're looking for
463 push @files, $entry;
464 $found++;
465 }
466
467 unless( $found ) {
468 return $self->_error(
469 qq[Could not find '$file' in archive] );
470 }
39713df4 471 }
472 }
473
474 ### just grab all the file items
475 } else {
476 @files = $self->get_files;
477 }
478
479 ### nothing found? that's an error
480 unless( scalar @files ) {
481 $self->_error( qq[No files found for ] . $self->_file );
482 return;
483 }
484
485 ### now extract them
486 for my $entry ( @files ) {
487 unless( $self->_extract_file( $entry ) ) {
488 $self->_error(q[Could not extract ']. $entry->full_path .q['] );
489 return;
490 }
491 }
492
493 return @files;
494}
495
496=head2 $tar->extract_file( $file, [$extract_path] )
497
498Write an entry, whose name is equivalent to the file name provided to
48e76d2d 499disk. Optionally takes a second parameter, which is the full native
39713df4 500path (including filename) the entry will be written to.
501
502For example:
503
504 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
505
b30bcf62 506 $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' );
507
39713df4 508Returns true on success, false on failure.
509
510=cut
511
512sub extract_file {
513 my $self = shift;
01d11a1c 514 my $file = shift; return unless defined $file;
39713df4 515 my $alt = shift;
516
517 my $entry = $self->_find_entry( $file )
518 or $self->_error( qq[Could not find an entry for '$file'] ), return;
519
520 return $self->_extract_file( $entry, $alt );
521}
522
523sub _extract_file {
524 my $self = shift;
525 my $entry = shift or return;
526 my $alt = shift;
39713df4 527
528 ### you wanted an alternate extraction location ###
529 my $name = defined $alt ? $alt : $entry->full_path;
530
531 ### splitpath takes a bool at the end to indicate
532 ### that it's splitting a dir
7f10f74b 533 my ($vol,$dirs,$file);
534 if ( defined $alt ) { # It's a local-OS path
535 ($vol,$dirs,$file) = File::Spec->splitpath( $alt,
536 $entry->is_dir );
537 } else {
538 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
539 $entry->is_dir );
540 }
541
39713df4 542 my $dir;
543 ### is $name an absolute path? ###
544 if( File::Spec->file_name_is_absolute( $dirs ) ) {
545 $dir = $dirs;
546
547 ### it's a relative path ###
548 } else {
f38c1908 549 my $cwd = (defined $self->{cwd} ? $self->{cwd} : cwd());
f5afd28d 550
551
552
553 my @dirs = defined $alt
554 ? File::Spec->splitdir( $dirs ) # It's a local-OS path
555 : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely
556 # straight from the tarball
557
558 ### '.' is the directory delimiter, of which the first one has to
559 ### be escaped/changed.
560 map tr/\./_/, @dirs if ON_VMS;
561
48e76d2d 562 my ($cwd_vol,$cwd_dir,$cwd_file)
563 = File::Spec->splitpath( $cwd );
564 my @cwd = File::Spec->splitdir( $cwd_dir );
565 push @cwd, $cwd_file if length $cwd_file;
81a5970e 566
f5afd28d 567 ### We need to pass '' as the last elemant to catpath. Craig Berry
568 ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
569 ### The root problem is that splitpath on UNIX always returns the
570 ### final path element as a file even if it is a directory, and of
571 ### course there is no way it can know the difference without checking
572 ### against the filesystem, which it is documented as not doing. When
573 ### you turn around and call catpath, on VMS you have to know which bits
574 ### are directory bits and which bits are file bits. In this case we
575 ### know the result should be a directory. I had thought you could omit
576 ### the file argument to catpath in such a case, but apparently on UNIX
577 ### you can't.
578 $dir = File::Spec->catpath(
579 $cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
580 );
581
582 ### catdir() returns undef if the path is longer than 255 chars on VMS
81a5970e 583 unless ( defined $dir ) {
584 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
585 return;
586 }
587
39713df4 588 }
589
590 if( -e $dir && !-d _ ) {
591 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
592 return;
593 }
594
595 unless ( -d _ ) {
596 eval { File::Path::mkpath( $dir, 0, 0777 ) };
597 if( $@ ) {
598 $self->_error( qq[Could not create directory '$dir': $@] );
599 return;
600 }
c3745331 601
602 ### XXX chown here? that might not be the same as in the archive
603 ### as we're only chown'ing to the owner of the file we're extracting
604 ### not to the owner of the directory itself, which may or may not
605 ### be another entry in the archive
606 ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
607 ### way to go.
608 #if( $CHOWN && CAN_CHOWN ) {
609 # chown $entry->uid, $entry->gid, $dir or
610 # $self->_error( qq[Could not set uid/gid on '$dir'] );
611 #}
39713df4 612 }
613
614 ### we're done if we just needed to create a dir ###
615 return 1 if $entry->is_dir;
616
617 my $full = File::Spec->catfile( $dir, $file );
618
619 if( $entry->is_unknown ) {
620 $self->_error( qq[Unknown file type for file '$full'] );
621 return;
622 }
623
624 if( length $entry->type && $entry->is_file ) {
625 my $fh = IO::File->new;
626 $fh->open( '>' . $full ) or (
627 $self->_error( qq[Could not open file '$full': $!] ),
628 return
629 );
630
631 if( $entry->size ) {
632 binmode $fh;
633 syswrite $fh, $entry->data or (
634 $self->_error( qq[Could not write data to '$full'] ),
635 return
636 );
637 }
638
639 close $fh or (
640 $self->_error( qq[Could not close file '$full'] ),
641 return
642 );
643
644 } else {
645 $self->_make_special_file( $entry, $full ) or return;
646 }
647
648 utime time, $entry->mtime - TIME_OFFSET, $full or
649 $self->_error( qq[Could not update timestamp] );
650
651 if( $CHOWN && CAN_CHOWN ) {
652 chown $entry->uid, $entry->gid, $full or
653 $self->_error( qq[Could not set uid/gid on '$full'] );
654 }
655
656 ### only chmod if we're allowed to, but never chmod symlinks, since they'll
657 ### change the perms on the file they're linking too...
658 if( $CHMOD and not -l $full ) {
659 chmod $entry->mode, $full or
660 $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
661 }
662
663 return 1;
664}
665
666sub _make_special_file {
667 my $self = shift;
668 my $entry = shift or return;
669 my $file = shift; return unless defined $file;
670
671 my $err;
672
673 if( $entry->is_symlink ) {
674 my $fail;
675 if( ON_UNIX ) {
676 symlink( $entry->linkname, $file ) or $fail++;
677
678 } else {
679 $self->_extract_special_file_as_plain_file( $entry, $file )
680 or $fail++;
681 }
682
683 $err = qq[Making symbolink link from '] . $entry->linkname .
684 qq[' to '$file' failed] if $fail;
685
686 } elsif ( $entry->is_hardlink ) {
687 my $fail;
688 if( ON_UNIX ) {
689 link( $entry->linkname, $file ) or $fail++;
690
691 } else {
692 $self->_extract_special_file_as_plain_file( $entry, $file )
693 or $fail++;
694 }
695
696 $err = qq[Making hard link from '] . $entry->linkname .
697 qq[' to '$file' failed] if $fail;
698
699 } elsif ( $entry->is_fifo ) {
700 ON_UNIX && !system('mknod', $file, 'p') or
701 $err = qq[Making fifo ']. $entry->name .qq[' failed];
702
703 } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
704 my $mode = $entry->is_blockdev ? 'b' : 'c';
705
706 ON_UNIX && !system('mknod', $file, $mode,
707 $entry->devmajor, $entry->devminor) or
708 $err = qq[Making block device ']. $entry->name .qq[' (maj=] .
709 $entry->devmajor . qq[ min=] . $entry->devminor .
710 qq[) failed.];
711
712 } elsif ( $entry->is_socket ) {
713 ### the original doesn't do anything special for sockets.... ###
714 1;
715 }
716
717 return $err ? $self->_error( $err ) : 1;
718}
719
720### don't know how to make symlinks, let's just extract the file as
721### a plain file
722sub _extract_special_file_as_plain_file {
723 my $self = shift;
724 my $entry = shift or return;
725 my $file = shift; return unless defined $file;
726
727 my $err;
728 TRY: {
729 my $orig = $self->_find_entry( $entry->linkname );
730
731 unless( $orig ) {
732 $err = qq[Could not find file '] . $entry->linkname .
733 qq[' in memory.];
734 last TRY;
735 }
736
737 ### clone the entry, make it appear as a normal file ###
738 my $clone = $entry->clone;
739 $clone->_downgrade_to_plainfile;
740 $self->_extract_file( $clone, $file ) or last TRY;
741
742 return 1;
743 }
744
745 return $self->_error($err);
746}
747
748=head2 $tar->list_files( [\@properties] )
749
750Returns a list of the names of all the files in the archive.
751
752If C<list_files()> is passed an array reference as its first argument
753it returns a list of hash references containing the requested
754properties of each file. The following list of properties is
755supported: name, size, mtime (last modified date), mode, uid, gid,
756linkname, uname, gname, devmajor, devminor, prefix.
757
758Passing an array reference containing only one element, 'name', is
759special cased to return a list of names rather than a list of hash
760references, making it equivalent to calling C<list_files> without
761arguments.
762
763=cut
764
765sub list_files {
766 my $self = shift;
767 my $aref = shift || [ ];
768
769 unless( $self->_data ) {
770 $self->read() or return;
771 }
772
773 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
774 return map { $_->full_path } @{$self->_data};
775 } else {
776
777 #my @rv;
778 #for my $obj ( @{$self->_data} ) {
779 # push @rv, { map { $_ => $obj->$_() } @$aref };
780 #}
781 #return @rv;
782
783 ### this does the same as the above.. just needs a +{ }
784 ### to make sure perl doesn't confuse it for a block
785 return map { my $o=$_;
786 +{ map { $_ => $o->$_() } @$aref }
787 } @{$self->_data};
788 }
789}
790
791sub _find_entry {
792 my $self = shift;
793 my $file = shift;
794
795 unless( defined $file ) {
796 $self->_error( qq[No file specified] );
797 return;
798 }
799
b30bcf62 800 ### it's an object already
801 return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
802
39713df4 803 for my $entry ( @{$self->_data} ) {
804 my $path = $entry->full_path;
805 return $entry if $path eq $file;
806 }
807
808 $self->_error( qq[No such file in archive: '$file'] );
809 return;
810}
811
812=head2 $tar->get_files( [@filenames] )
813
814Returns the C<Archive::Tar::File> objects matching the filenames
815provided. If no filename list was passed, all C<Archive::Tar::File>
816objects in the current Tar object are returned.
817
818Please refer to the C<Archive::Tar::File> documentation on how to
819handle these objects.
820
821=cut
822
823sub get_files {
824 my $self = shift;
825
826 return @{ $self->_data } unless @_;
827
828 my @list;
829 for my $file ( @_ ) {
830 push @list, grep { defined } $self->_find_entry( $file );
831 }
832
833 return @list;
834}
835
836=head2 $tar->get_content( $file )
837
838Return the content of the named file.
839
840=cut
841
842sub get_content {
843 my $self = shift;
844 my $entry = $self->_find_entry( shift ) or return;
845
846 return $entry->data;
847}
848
849=head2 $tar->replace_content( $file, $content )
850
851Make the string $content be the content for the file named $file.
852
853=cut
854
855sub replace_content {
856 my $self = shift;
857 my $entry = $self->_find_entry( shift ) or return;
858
859 return $entry->replace_content( shift );
860}
861
862=head2 $tar->rename( $file, $new_name )
863
864Rename the file of the in-memory archive to $new_name.
865
866Note that you must specify a Unix path for $new_name, since per tar
867standard, all files in the archive must be Unix paths.
868
869Returns true on success and false on failure.
870
871=cut
872
873sub rename {
874 my $self = shift;
875 my $file = shift; return unless defined $file;
876 my $new = shift; return unless defined $new;
877
878 my $entry = $self->_find_entry( $file ) or return;
879
880 return $entry->rename( $new );
881}
882
883=head2 $tar->remove (@filenamelist)
884
885Removes any entries with names matching any of the given filenames
886from the in-memory archive. Returns a list of C<Archive::Tar::File>
887objects that remain.
888
889=cut
890
891sub remove {
892 my $self = shift;
893 my @list = @_;
894
895 my %seen = map { $_->full_path => $_ } @{$self->_data};
896 delete $seen{ $_ } for @list;
897
898 $self->_data( [values %seen] );
899
900 return values %seen;
901}
902
903=head2 $tar->clear
904
905C<clear> clears the current in-memory archive. This effectively gives
906you a 'blank' object, ready to be filled again. Note that C<clear>
907only has effect on the object, not the underlying tarfile.
908
909=cut
910
911sub clear {
912 my $self = shift or return;
913
914 $self->_data( [] );
915 $self->_file( '' );
916
917 return 1;
918}
919
920
921=head2 $tar->write ( [$file, $compressed, $prefix] )
922
923Write the in-memory archive to disk. The first argument can either
924be the name of a file or a reference to an already open filehandle (a
925GLOB reference). If the second argument is true, the module will use
926IO::Zlib to write the file in a compressed format. If IO::Zlib is
927not available, the C<write> method will fail and return.
928
929Note that when you pass in a filehandle, the compression argument
930is ignored, as all files are printed verbatim to your filehandle.
931If you wish to enable compression with filehandles, use an
932C<IO::Zlib> filehandle instead.
933
934Specific levels of compression can be chosen by passing the values 2
935through 9 as the second parameter.
936
937The third argument is an optional prefix. All files will be tucked
938away in the directory you specify as prefix. So if you have files
939'a' and 'b' in your archive, and you specify 'foo' as prefix, they
940will be written to the archive as 'foo/a' and 'foo/b'.
941
942If no arguments are given, C<write> returns the entire formatted
943archive as a string, which could be useful if you'd like to stuff the
944archive into a socket or a pipe to gzip or something.
945
946=cut
947
948sub write {
949 my $self = shift;
950 my $file = shift; $file = '' unless defined $file;
951 my $gzip = shift || 0;
952 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
953 my $dummy = '';
954
955 ### only need a handle if we have a file to print to ###
956 my $handle = length($file)
957 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
958 or return )
959 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h }
960 : $HAS_IO_STRING ? IO::String->new
961 : __PACKAGE__->no_string_support();
962
963
964
965 for my $entry ( @{$self->_data} ) {
966 ### entries to be written to the tarfile ###
967 my @write_me;
968
969 ### only now will we change the object to reflect the current state
970 ### of the name and prefix fields -- this needs to be limited to
971 ### write() only!
972 my $clone = $entry->clone;
973
974
975 ### so, if you don't want use to use the prefix, we'll stuff
976 ### everything in the name field instead
977 if( $DO_NOT_USE_PREFIX ) {
978
979 ### you might have an extended prefix, if so, set it in the clone
980 ### XXX is ::Unix right?
981 $clone->name( length $ext_prefix
982 ? File::Spec::Unix->catdir( $ext_prefix,
983 $clone->full_path)
984 : $clone->full_path );
985 $clone->prefix( '' );
986
987 ### otherwise, we'll have to set it properly -- prefix part in the
988 ### prefix and name part in the name field.
989 } else {
990
991 ### split them here, not before!
992 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
993
994 ### you might have an extended prefix, if so, set it in the clone
995 ### XXX is ::Unix right?
996 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
997 if length $ext_prefix;
998
999 $clone->prefix( $prefix );
1000 $clone->name( $name );
1001 }
1002
1003 ### names are too long, and will get truncated if we don't add a
1004 ### '@LongLink' file...
1005 my $make_longlink = ( length($clone->name) > NAME_LENGTH or
1006 length($clone->prefix) > PREFIX_LENGTH
1007 ) || 0;
1008
1009 ### perhaps we need to make a longlink file?
1010 if( $make_longlink ) {
1011 my $longlink = Archive::Tar::File->new(
1012 data => LONGLINK_NAME,
1013 $clone->full_path,
1014 { type => LONGLINK }
1015 );
1016
1017 unless( $longlink ) {
1018 $self->_error( qq[Could not create 'LongLink' entry for ] .
1019 qq[oversize file '] . $clone->full_path ."'" );
1020 return;
1021 };
1022
1023 push @write_me, $longlink;
1024 }
1025
1026 push @write_me, $clone;
1027
1028 ### write the one, optionally 2 a::t::file objects to the handle
1029 for my $clone (@write_me) {
1030
1031 ### if the file is a symlink, there are 2 options:
1032 ### either we leave the symlink intact, but then we don't write any
1033 ### data OR we follow the symlink, which means we actually make a
1034 ### copy. if we do the latter, we have to change the TYPE of the
1035 ### clone to 'FILE'
1036 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1037 my $data_ok = !$clone->is_symlink && $clone->has_content;
1038
1039 ### downgrade to a 'normal' file if it's a symlink we're going to
1040 ### treat as a regular file
1041 $clone->_downgrade_to_plainfile if $link_ok;
1042
1043 ### get the header for this block
1044 my $header = $self->_format_tar_entry( $clone );
1045 unless( $header ) {
1046 $self->_error(q[Could not format header for: ] .
1047 $clone->full_path );
1048 return;
1049 }
1050
1051 unless( print $handle $header ) {
1052 $self->_error(q[Could not write header for: ] .
1053 $clone->full_path);
1054 return;
1055 }
1056
1057 if( $link_ok or $data_ok ) {
1058 unless( print $handle $clone->data ) {
1059 $self->_error(q[Could not write data for: ] .
1060 $clone->full_path);
1061 return;
1062 }
1063
1064 ### pad the end of the clone if required ###
1065 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1066 }
1067
1068 } ### done writing these entries
1069 }
1070
1071 ### write the end markers ###
1072 print $handle TAR_END x 2 or
1073 return $self->_error( qq[Could not write tar end markers] );
b30bcf62 1074
39713df4 1075 ### did you want it written to a file, or returned as a string? ###
b30bcf62 1076 my $rv = length($file) ? 1
39713df4 1077 : $HAS_PERLIO ? $dummy
b30bcf62 1078 : do { seek $handle, 0, 0; local $/; <$handle> };
1079
1080 ### make sure to close the handle;
1081 close $handle;
1082
1083 return $rv;
39713df4 1084}
1085
1086sub _format_tar_entry {
1087 my $self = shift;
1088 my $entry = shift or return;
1089 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
1090 my $no_prefix = shift || 0;
1091
1092 my $file = $entry->name;
1093 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix;
1094
1095 ### remove the prefix from the file name
1096 ### not sure if this is still neeeded --kane
1097 ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1098 ### this for us. Even worse, this would break if we tried to add a file
1099 ### like x/x.
1100 #if( length $prefix ) {
1101 # $file =~ s/^$match//;
1102 #}
1103
1104 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1105 if length $ext_prefix;
1106
1107 ### not sure why this is... ###
1108 my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1109 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1110
1111 my $f1 = "%06o"; my $f2 = "%11o";
1112
1113 ### this might be optimizable with a 'changed' flag in the file objects ###
1114 my $tar = pack (
1115 PACK,
1116 $file,
1117
1118 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1119 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1120
1121 "", # checksum field - space padded a bit down
1122
1123 (map { $entry->$_() } qw[type linkname magic]),
1124
1125 $entry->version || TAR_VERSION,
1126
1127 (map { $entry->$_() } qw[uname gname]),
1128 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1129
1130 ($no_prefix ? '' : $prefix)
1131 );
1132
1133 ### add the checksum ###
1134 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1135
1136 return $tar;
1137}
1138
1139=head2 $tar->add_files( @filenamelist )
1140
1141Takes a list of filenames and adds them to the in-memory archive.
1142
1143The path to the file is automatically converted to a Unix like
1144equivalent for use in the archive, and, if on MacOS, the file's
1145modification time is converted from the MacOS epoch to the Unix epoch.
1146So tar archives created on MacOS with B<Archive::Tar> can be read
1147both with I<tar> on Unix and applications like I<suntar> or
1148I<Stuffit Expander> on MacOS.
1149
1150Be aware that the file's type/creator and resource fork will be lost,
1151which is usually what you want in cross-platform archives.
1152
1153Returns a list of C<Archive::Tar::File> objects that were just added.
1154
1155=cut
1156
1157sub add_files {
1158 my $self = shift;
1159 my @files = @_ or return;
1160
1161 my @rv;
1162 for my $file ( @files ) {
c3745331 1163 unless( -e $file || -l $file ) {
39713df4 1164 $self->_error( qq[No such file: '$file'] );
1165 next;
1166 }
1167
1168 my $obj = Archive::Tar::File->new( file => $file );
1169 unless( $obj ) {
1170 $self->_error( qq[Unable to add file: '$file'] );
1171 next;
1172 }
1173
1174 push @rv, $obj;
1175 }
1176
1177 push @{$self->{_data}}, @rv;
1178
1179 return @rv;
1180}
1181
1182=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1183
1184Takes a filename, a scalar full of data and optionally a reference to
1185a hash with specific options.
1186
1187Will add a file to the in-memory archive, with name C<$filename> and
1188content C<$data>. Specific properties can be set using C<$opthashref>.
1189The following list of properties is supported: name, size, mtime
1190(last modified date), mode, uid, gid, linkname, uname, gname,
b3200c5d 1191devmajor, devminor, prefix, type. (On MacOS, the file's path and
39713df4 1192modification times are converted to Unix equivalents.)
1193
b3200c5d 1194Valid values for the file type are the following constants defined in
1195Archive::Tar::Constants:
1196
1197=over 4
1198
1199=item FILE
1200
1201Regular file.
1202
1203=item HARDLINK
1204
1205=item SYMLINK
1206
1207Hard and symbolic ("soft") links; linkname should specify target.
1208
1209=item CHARDEV
1210
1211=item BLOCKDEV
1212
1213Character and block devices. devmajor and devminor should specify the major
1214and minor device numbers.
1215
1216=item DIR
1217
1218Directory.
1219
1220=item FIFO
1221
1222FIFO (named pipe).
1223
1224=item SOCKET
1225
1226Socket.
1227
1228=back
1229
39713df4 1230Returns the C<Archive::Tar::File> object that was just added, or
1231C<undef> on failure.
1232
1233=cut
1234
1235sub add_data {
1236 my $self = shift;
1237 my ($file, $data, $opt) = @_;
1238
1239 my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1240 unless( $obj ) {
1241 $self->_error( qq[Unable to add file: '$file'] );
1242 return;
1243 }
1244
1245 push @{$self->{_data}}, $obj;
1246
1247 return $obj;
1248}
1249
1250=head2 $tar->error( [$BOOL] )
1251
1252Returns the current errorstring (usually, the last error reported).
1253If a true value was specified, it will give the C<Carp::longmess>
1254equivalent of the error, in effect giving you a stacktrace.
1255
1256For backwards compatibility, this error is also available as
1257C<$Archive::Tar::error> although it is much recommended you use the
1258method call instead.
1259
1260=cut
1261
1262{
1263 $error = '';
1264 my $longmess;
1265
1266 sub _error {
1267 my $self = shift;
1268 my $msg = $error = shift;
1269 $longmess = Carp::longmess($error);
1270
1271 ### set Archive::Tar::WARN to 0 to disable printing
1272 ### of errors
1273 if( $WARN ) {
1274 carp $DEBUG ? $longmess : $msg;
1275 }
1276
1277 return;
1278 }
1279
1280 sub error {
1281 my $self = shift;
1282 return shift() ? $longmess : $error;
1283 }
1284}
1285
f38c1908 1286=head2 $tar->setcwd( $cwd );
1287
1288C<Archive::Tar> needs to know the current directory, and it will run
1289C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
1290tarfile and saves it in the file system. (As of version 1.30, however,
1291C<Archive::Tar> will use the speed optimization described below
1292automatically, so it's only relevant if you're using C<extract_file()>).
1293
1294Since C<Archive::Tar> doesn't change the current directory internally
1295while it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1296can be avoided if we can guarantee that the current directory doesn't
1297get changed externally.
1298
1299To use this performance boost, set the current directory via
1300
1301 use Cwd;
1302 $tar->setcwd( cwd() );
1303
1304once before calling a function like C<extract_file> and
1305C<Archive::Tar> will use the current directory setting from then on
1306and won't call C<Cwd::cwd()> internally.
1307
1308To switch back to the default behaviour, use
1309
1310 $tar->setcwd( undef );
1311
1312and C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1313
1314If you're using C<Archive::Tar>'s C<exract()> method, C<setcwd()> will
1315be called for you.
1316
1317=cut
1318
1319sub setcwd {
1320 my $self = shift;
1321 my $cwd = shift;
1322
1323 $self->{cwd} = $cwd;
1324}
39713df4 1325
1326=head2 $bool = $tar->has_io_string
1327
1328Returns true if we currently have C<IO::String> support loaded.
1329
1330Either C<IO::String> or C<perlio> support is needed to support writing
3c4b39be 1331stringified archives. Currently, C<perlio> is the preferred method, if
39713df4 1332available.
1333
1334See the C<GLOBAL VARIABLES> section to see how to change this preference.
1335
1336=cut
1337
1338sub has_io_string { return $HAS_IO_STRING; }
1339
1340=head2 $bool = $tar->has_perlio
1341
1342Returns true if we currently have C<perlio> support loaded.
1343
1344This requires C<perl-5.8> or higher, compiled with C<perlio>
1345
1346Either C<IO::String> or C<perlio> support is needed to support writing
3c4b39be 1347stringified archives. Currently, C<perlio> is the preferred method, if
39713df4 1348available.
1349
1350See the C<GLOBAL VARIABLES> section to see how to change this preference.
1351
1352=cut
1353
1354sub has_perlio { return $HAS_PERLIO; }
1355
1356
1357=head1 Class Methods
1358
1359=head2 Archive::Tar->create_archive($file, $compression, @filelist)
1360
1361Creates a tar file from the list of files provided. The first
1362argument can either be the name of the tar file to create or a
1363reference to an open file handle (e.g. a GLOB reference).
1364
1365The second argument specifies the level of compression to be used, if
1366any. Compression of tar files requires the installation of the
1367IO::Zlib module. Specific levels of compression may be
1368requested by passing a value between 2 and 9 as the second argument.
1369Any other value evaluating as true will result in the default
1370compression level being used.
1371
1372Note that when you pass in a filehandle, the compression argument
1373is ignored, as all files are printed verbatim to your filehandle.
1374If you wish to enable compression with filehandles, use an
1375C<IO::Zlib> filehandle instead.
1376
1377The remaining arguments list the files to be included in the tar file.
1378These files must all exist. Any files which don't exist or can't be
1379read are silently ignored.
1380
1381If the archive creation fails for any reason, C<create_archive> will
1382return false. Please use the C<error> method to find the cause of the
1383failure.
1384
1385Note that this method does not write C<on the fly> as it were; it
1386still reads all the files into memory before writing out the archive.
1387Consult the FAQ below if this is a problem.
1388
1389=cut
1390
1391sub create_archive {
1392 my $class = shift;
1393
1394 my $file = shift; return unless defined $file;
1395 my $gzip = shift || 0;
1396 my @files = @_;
1397
1398 unless( @files ) {
1399 return $class->_error( qq[Cowardly refusing to create empty archive!] );
1400 }
1401
1402 my $tar = $class->new;
1403 $tar->add_files( @files );
1404 return $tar->write( $file, $gzip );
1405}
1406
1407=head2 Archive::Tar->list_archive ($file, $compressed, [\@properties])
1408
1409Returns a list of the names of all the files in the archive. The
1410first argument can either be the name of the tar file to list or a
1411reference to an open file handle (e.g. a GLOB reference).
1412
1413If C<list_archive()> is passed an array reference as its third
1414argument it returns a list of hash references containing the requested
1415properties of each file. The following list of properties is
b3200c5d 1416supported: full_path, name, size, mtime (last modified date), mode,
1417uid, gid, linkname, uname, gname, devmajor, devminor, prefix.
1418
1419See C<Archive::Tar::File> for details about supported properties.
39713df4 1420
1421Passing an array reference containing only one element, 'name', is
1422special cased to return a list of names rather than a list of hash
1423references.
1424
1425=cut
1426
1427sub list_archive {
1428 my $class = shift;
1429 my $file = shift; return unless defined $file;
1430 my $gzip = shift || 0;
1431
1432 my $tar = $class->new($file, $gzip);
1433 return unless $tar;
1434
1435 return $tar->list_files( @_ );
1436}
1437
1438=head2 Archive::Tar->extract_archive ($file, $gzip)
1439
1440Extracts the contents of the tar file. The first argument can either
1441be the name of the tar file to create or a reference to an open file
1442handle (e.g. a GLOB reference). All relative paths in the tar file will
1443be created underneath the current working directory.
1444
1445C<extract_archive> will return a list of files it extracted.
1446If the archive extraction fails for any reason, C<extract_archive>
1447will return false. Please use the C<error> method to find the cause
1448of the failure.
1449
1450=cut
1451
1452sub extract_archive {
1453 my $class = shift;
1454 my $file = shift; return unless defined $file;
1455 my $gzip = shift || 0;
1456
1457 my $tar = $class->new( ) or return;
1458
1459 return $tar->read( $file, $gzip, { extract => 1 } );
1460}
1461
1462=head2 Archive::Tar->can_handle_compressed_files
1463
1464A simple checking routine, which will return true if C<Archive::Tar>
1465is able to uncompress compressed archives on the fly with C<IO::Zlib>,
1466or false if C<IO::Zlib> is not installed.
1467
1468You can use this as a shortcut to determine whether C<Archive::Tar>
1469will do what you think before passing compressed archives to its
1470C<read> method.
1471
1472=cut
1473
1474sub can_handle_compressed_files { return ZLIB ? 1 : 0 }
1475
1476sub no_string_support {
1477 croak("You have to install IO::String to support writing archives to strings");
1478}
1479
14801;
1481
1482__END__
1483
1484=head1 GLOBAL VARIABLES
1485
1486=head2 $Archive::Tar::FOLLOW_SYMLINK
1487
1488Set this variable to C<1> to make C<Archive::Tar> effectively make a
1489copy of the file when extracting. Default is C<0>, which
1490means the symlink stays intact. Of course, you will have to pack the
1491file linked to as well.
1492
1493This option is checked when you write out the tarfile using C<write>
1494or C<create_archive>.
1495
1496This works just like C</bin/tar>'s C<-h> option.
1497
1498=head2 $Archive::Tar::CHOWN
1499
1500By default, C<Archive::Tar> will try to C<chown> your files if it is
1501able to. In some cases, this may not be desired. In that case, set
1502this variable to C<0> to disable C<chown>-ing, even if it were
1503possible.
1504
1505The default is C<1>.
1506
1507=head2 $Archive::Tar::CHMOD
1508
1509By default, C<Archive::Tar> will try to C<chmod> your files to
1510whatever mode was specified for the particular file in the archive.
1511In some cases, this may not be desired. In that case, set this
1512variable to C<0> to disable C<chmod>-ing.
1513
1514The default is C<1>.
1515
1516=head2 $Archive::Tar::DO_NOT_USE_PREFIX
1517
f38c1908 1518By default, C<Archive::Tar> will try to put paths that are over
1519100 characters in the C<prefix> field of your tar header, as
1520defined per POSIX-standard. However, some (older) tar programs
1521do not implement this spec. To retain compatibility with these older
1522or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
1523variable to a true value, and C<Archive::Tar> will use an alternate
1524way of dealing with paths over 100 characters by using the
1525C<GNU Extended Header> feature.
1526
1527Note that clients who do not support the C<GNU Extended Header>
1528feature will not be able to read these archives. Such clients include
1529tars on C<Solaris>, C<Irix> and C<AIX>.
39713df4 1530
1531The default is C<0>.
1532
1533=head2 $Archive::Tar::DEBUG
1534
1535Set this variable to C<1> to always get the C<Carp::longmess> output
1536of the warnings, instead of the regular C<carp>. This is the same
1537message you would get by doing:
1538
1539 $tar->error(1);
1540
1541Defaults to C<0>.
1542
1543=head2 $Archive::Tar::WARN
1544
1545Set this variable to C<0> if you do not want any warnings printed.
1546Personally I recommend against doing this, but people asked for the
1547option. Also, be advised that this is of course not threadsafe.
1548
1549Defaults to C<1>.
1550
1551=head2 $Archive::Tar::error
1552
1553Holds the last reported error. Kept for historical reasons, but its
1554use is very much discouraged. Use the C<error()> method instead:
1555
1556 warn $tar->error unless $tar->extract;
1557
1558=head2 $Archive::Tar::HAS_PERLIO
1559
1560This variable holds a boolean indicating if we currently have
1561C<perlio> support loaded. This will be enabled for any perl
1562greater than C<5.8> compiled with C<perlio>.
1563
1564If you feel strongly about disabling it, set this variable to
1565C<false>. Note that you will then need C<IO::String> installed
1566to support writing stringified archives.
1567
1568Don't change this variable unless you B<really> know what you're
1569doing.
1570
1571=head2 $Archive::Tar::HAS_IO_STRING
1572
1573This variable holds a boolean indicating if we currently have
1574C<IO::String> support loaded. This will be enabled for any perl
1575that has a loadable C<IO::String> module.
1576
1577If you feel strongly about disabling it, set this variable to
1578C<false>. Note that you will then need C<perlio> support from
1579your perl to be able to write stringified archives.
1580
1581Don't change this variable unless you B<really> know what you're
1582doing.
1583
1584=head1 FAQ
1585
1586=over 4
1587
1588=item What's the minimum perl version required to run Archive::Tar?
1589
1590You will need perl version 5.005_03 or newer.
1591
1592=item Isn't Archive::Tar slow?
1593
1594Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
1595However, it's very portable. If speed is an issue, consider using
1596C</bin/tar> instead.
1597
1598=item Isn't Archive::Tar heavier on memory than /bin/tar?
1599
1600Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
1601C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
1602choice but to read the archive into memory.
1603This is ok if you want to do in-memory manipulation of the archive.
1604If you just want to extract, use the C<extract_archive> class method
1605instead. It will optimize and write to disk immediately.
1606
1607=item Can't you lazy-load data instead?
1608
1609No, not easily. See previous question.
1610
1611=item How much memory will an X kb tar file need?
1612
1613Probably more than X kb, since it will all be read into memory. If
1614this is a problem, and you don't need to do in memory manipulation
1615of the archive, consider using C</bin/tar> instead.
1616
1617=item What do you do with unsupported filetypes in an archive?
1618
1619C<Unix> has a few filetypes that aren't supported on other platforms,
1620like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
1621try to make a copy of the original file, rather than throwing an error.
1622
1623This does require you to read the entire archive in to memory first,
1624since otherwise we wouldn't know what data to fill the copy with.
1625(This means that you cannot use the class methods on archives that
1626have incompatible filetypes and still expect things to work).
1627
1628For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
1629the extraction of this particular item didn't work.
1630
f38c1908 1631=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
1632
1633By default, C<Archive::Tar> is in a completely POSIX-compatible
1634mode, which uses the POSIX-specification of C<tar> to store files.
1635For paths greather than 100 characters, this is done using the
1636C<POSIX header prefix>. Non-POSIX-compatible clients may not support
1637this part of the specification, and may only support the C<GNU Extended
1638Header> functionality. To facilitate those clients, you can set the
1639C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
1640C<GLOBAL VARIABLES> section for details on this variable.
1641
c3745331 1642Note that GNU tar earlier than version 1.14 does not cope well with
1643the C<POSIX header prefix>. If you use such a version, consider setting
1644the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
1645
b30bcf62 1646=item How do I extract only files that have property X from an archive?
1647
1648Sometimes, you might not wish to extract a complete archive, just
1649the files that are relevant to you, based on some criteria.
1650
1651You can do this by filtering a list of C<Archive::Tar::File> objects
1652based on your criteria. For example, to extract only files that have
1653the string C<foo> in their title, you would use:
1654
1655 $tar->extract(
1656 grep { $_->full_path =~ /foo/ } $tar->get_files
1657 );
1658
1659This way, you can filter on any attribute of the files in the archive.
1660Consult the C<Archive::Tar::File> documentation on how to use these
1661objects.
1662
81a5970e 1663=item How do I access .tar.Z files?
1664
1665The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
1666the C<IO::Zlib> module) to access tar files that have been compressed
1667with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
1668utility cannot be read by C<Compress::Zlib> and so cannot be directly
1669accesses by C<Archive::Tar>.
1670
1671If the C<uncompress> or C<gunzip> programs are available, you can use
1672one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
1673
1674Firstly with C<uncompress>
1675
1676 use Archive::Tar;
1677
1678 open F, "uncompress -c $filename |";
1679 my $tar = Archive::Tar->new(*F);
1680 ...
1681
1682and this with C<gunzip>
1683
1684 use Archive::Tar;
1685
1686 open F, "gunzip -c $filename |";
1687 my $tar = Archive::Tar->new(*F);
1688 ...
1689
1690Similarly, if the C<compress> program is available, you can use this to
1691write a C<.tar.Z> file
1692
1693 use Archive::Tar;
1694 use IO::File;
1695
1696 my $fh = new IO::File "| compress -c >$filename";
1697 my $tar = Archive::Tar->new();
1698 ...
1699 $tar->write($fh);
1700 $fh->close ;
1701
01d11a1c 1702=item How do I handle Unicode strings?
1703
1704C<Archive::Tar> uses byte semantics for any files it reads from or writes
1705to disk. This is not a problem if you only deal with files and never
1706look at their content or work solely with byte strings. But if you use
1707Unicode strings with character semantics, some additional steps need
1708to be taken.
1709
1710For example, if you add a Unicode string like
1711
1712 # Problem
1713 $tar->add_data('file.txt', "Euro: \x{20AC}");
1714
1715then there will be a problem later when the tarfile gets written out
1716to disk via C<$tar->write()>:
1717
1718 Wide character in print at .../Archive/Tar.pm line 1014.
1719
1720The data was added as a Unicode string and when writing it out to disk,
1721the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
1722tried to convert the string to ISO-8859 and failed. The written file
1723now contains garbage.
1724
1725For this reason, Unicode strings need to be converted to UTF-8-encoded
1726bytestrings before they are handed off to C<add_data()>:
1727
1728 use Encode;
1729 my $data = "Accented character: \x{20AC}";
1730 $data = encode('utf8', $data);
1731
1732 $tar->add_data('file.txt', $data);
1733
1734A opposite problem occurs if you extract a UTF8-encoded file from a
1735tarball. Using C<get_content()> on the C<Archive::Tar::File> object
1736will return its content as a bytestring, not as a Unicode string.
1737
1738If you want it to be a Unicode string (because you want character
1739semantics with operations like regular expression matching), you need
1740to decode the UTF8-encoded content and have Perl convert it into
1741a Unicode string:
1742
1743 use Encode;
1744 my $data = $tar->get_content();
1745
1746 # Make it a Unicode string
1747 $data = decode('utf8', $data);
1748
1749There is no easy way to provide this functionality in C<Archive::Tar>,
1750because a tarball can contain many files, and each of which could be
1751encoded in a different way.
81a5970e 1752
39713df4 1753=back
1754
1755=head1 TODO
1756
1757=over 4
1758
1759=item Check if passed in handles are open for read/write
1760
1761Currently I don't know of any portable pure perl way to do this.
1762Suggestions welcome.
1763
b3200c5d 1764=item Allow archives to be passed in as string
1765
1766Currently, we only allow opened filehandles or filenames, but
1767not strings. The internals would need some reworking to facilitate
1768stringified archives.
1769
1770=item Facilitate processing an opened filehandle of a compressed archive
1771
1772Currently, we only support this if the filehandle is an IO::Zlib object.
1773Environments, like apache, will present you with an opened filehandle
1774to an uploaded file, which might be a compressed archive.
1775
39713df4 1776=back
1777
f38c1908 1778=head1 SEE ALSO
1779
1780=over 4
1781
1782=item The GNU tar specification
1783
1784C<http://www.gnu.org/software/tar/manual/tar.html>
1785
1786=item The PAX format specication
1787
1788The specifcation which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
1789
1790=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>
1791
1792=item GNU tar intends to switch to POSIX compatibility
1793
1794GNU Tar authors have expressed their intention to become completely
1795POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html>
1796
1797=item A Comparison between various tar implementations
1798
1799Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs>
1800
1801=back
1802
39713df4 1803=head1 AUTHOR
1804
c3745331 1805This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1806
1807Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
39713df4 1808
1809=head1 ACKNOWLEDGEMENTS
1810
1811Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney and
1812especially Andrew Savige for their help and suggestions.
1813
1814=head1 COPYRIGHT
1815
c3745331 1816This module is copyright (c) 2002 - 2007 Jos Boumans
1817E<lt>kane@cpan.orgE<gt>. All rights reserved.
39713df4 1818
c3745331 1819This library is free software; you may redistribute and/or modify
1820it under the same terms as Perl itself.
39713df4 1821
1822=cut