Tweaks to get Test::Builder::Tester's tests to work in the core.
[p5sagit/p5-mst-13.2.git] / lib / Archive / Tar.pm
CommitLineData
39713df4 1### the gnu tar specification:
2### http://www.gnu.org/software/tar/manual/html_mono/tar.html
3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
10use strict;
11use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
12 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING];
13
14$DEBUG = 0;
15$WARN = 1;
16$FOLLOW_SYMLINK = 0;
81a5970e 17$VERSION = "1.26_01";
39713df4 18$CHOWN = 1;
19$CHMOD = 1;
20$DO_NOT_USE_PREFIX = 0;
21
22BEGIN {
23 use Config;
24 $HAS_PERLIO = $Config::Config{useperlio};
25
26 ### try and load IO::String anyway, so you can dynamically
27 ### switch between perlio and IO::String
28 eval {
29 require IO::String;
30 import IO::String;
31 };
32 $HAS_IO_STRING = $@ ? 0 : 1;
33
34}
35
36use Cwd;
37use IO::File;
38use Carp qw(carp croak);
39use File::Spec ();
40use File::Spec::Unix ();
41use File::Path ();
42
43use Archive::Tar::File;
44use Archive::Tar::Constant;
45
46=head1 NAME
47
48Archive::Tar - module for manipulations of tar archives
49
50=head1 SYNOPSIS
51
52 use Archive::Tar;
53 my $tar = Archive::Tar->new;
54
55 $tar->read('origin.tgz',1);
56 $tar->extract();
57
58 $tar->add_files('file/foo.pl', 'docs/README');
59 $tar->add_data('file/baz.txt', 'This is the contents now');
60
61 $tar->rename('oldname', 'new/file/name');
62
63 $tar->write('files.tar');
64
65=head1 DESCRIPTION
66
67Archive::Tar provides an object oriented mechanism for handling tar
68files. It provides class methods for quick and easy files handling
69while also allowing for the creation of tar file objects for custom
70manipulation. If you have the IO::Zlib module installed,
71Archive::Tar will also support compressed or gzipped tar files.
72
73An object of class Archive::Tar represents a .tar(.gz) archive full
74of files and things.
75
76=head1 Object Methods
77
78=head2 Archive::Tar->new( [$file, $compressed] )
79
80Returns a new Tar object. If given any arguments, C<new()> calls the
81C<read()> method automatically, passing on the arguments provided to
82the C<read()> method.
83
84If C<new()> is invoked with arguments and the C<read()> method fails
85for any reason, C<new()> returns undef.
86
87=cut
88
89my $tmpl = {
90 _data => [ ],
91 _file => 'Unknown',
92};
93
94### install get/set accessors for this object.
95for my $key ( keys %$tmpl ) {
96 no strict 'refs';
97 *{__PACKAGE__."::$key"} = sub {
98 my $self = shift;
99 $self->{$key} = $_[0] if @_;
100 return $self->{$key};
101 }
102}
103
104sub new {
105 my $class = shift;
106 $class = ref $class if ref $class;
107
108 ### copying $tmpl here since a shallow copy makes it use the
109 ### same aref, causing for files to remain in memory always.
110 my $obj = bless { _data => [ ], _file => 'Unknown' }, $class;
111
112 if (@_) {
81a5970e 113 unless ( $obj->read( @_ ) ) {
114 $obj->_error(qq[No data could be read from file]);
115 return;
116 }
39713df4 117 }
118
119 return $obj;
120}
121
122=head2 $tar->read ( $filename|$handle, $compressed, {opt => 'val'} )
123
124Read the given tar file into memory.
125The first argument can either be the name of a file or a reference to
126an already open filehandle (or an IO::Zlib object if it's compressed)
127The second argument indicates whether the file referenced by the first
128argument is compressed.
129
130The C<read> will I<replace> any previous content in C<$tar>!
131
132The second argument may be considered optional if IO::Zlib is
133installed, since it will transparently Do The Right Thing.
134Archive::Tar will warn if you try to pass a compressed file if
135IO::Zlib is not available and simply return.
136
137The third argument can be a hash reference with options. Note that
138all options are case-sensitive.
139
140=over 4
141
142=item limit
143
144Do not read more than C<limit> files. This is useful if you have
145very big archives, and are only interested in the first few files.
146
147=item extract
148
149If set to true, immediately extract entries when reading them. This
150gives you the same memory break as the C<extract_archive> function.
151Note however that entries will not be read into memory, but written
152straight to disk.
153
154=back
155
156All files are stored internally as C<Archive::Tar::File> objects.
157Please consult the L<Archive::Tar::File> documentation for details.
158
159Returns the number of files read in scalar context, and a list of
160C<Archive::Tar::File> objects in list context.
161
162=cut
163
164sub read {
165 my $self = shift;
166 my $file = shift;
167 my $gzip = shift || 0;
168 my $opts = shift || {};
169
170 unless( defined $file ) {
171 $self->_error( qq[No file to read from!] );
172 return;
173 } else {
174 $self->_file( $file );
175 }
176
177 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
178 or return;
179
180 my $data = $self->_read_tar( $handle, $opts ) or return;
181
182 $self->_data( $data );
183
184 return wantarray ? @$data : scalar @$data;
185}
186
187sub _get_handle {
188 my $self = shift;
189 my $file = shift; return unless defined $file;
190 return $file if ref $file;
191
192 my $gzip = shift || 0;
193 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only
194
195 my $fh; my $bin;
196
197 ### only default to ZLIB if we're not trying to /write/ to a handle ###
198 if( ZLIB and $gzip || MODE_READ->( $mode ) ) {
199
200 ### IO::Zlib will Do The Right Thing, even when passed
201 ### a plain file ###
202 $fh = new IO::Zlib;
203
204 } else {
205 if( $gzip ) {
206 $self->_error(qq[Compression not available - Install IO::Zlib!]);
207 return;
208
209 } else {
210 $fh = new IO::File;
211 $bin++;
212 }
213 }
214
215 unless( $fh->open( $file, $mode ) ) {
216 $self->_error( qq[Could not create filehandle for '$file': $!!] );
217 return;
218 }
219
220 binmode $fh if $bin;
221
222 return $fh;
223}
224
225sub _read_tar {
226 my $self = shift;
227 my $handle = shift or return;
228 my $opts = shift || {};
229
230 my $count = $opts->{limit} || 0;
231 my $extract = $opts->{extract} || 0;
232
233 ### set a cap on the amount of files to extract ###
234 my $limit = 0;
235 $limit = 1 if $count > 0;
236
237 my $tarfile = [ ];
238 my $chunk;
239 my $read = 0;
240 my $real_name; # to set the name of a file when
241 # we're encountering @longlink
242 my $data;
243
244 LOOP:
245 while( $handle->read( $chunk, HEAD ) ) {
246 ### IO::Zlib doesn't support this yet
247 my $offset = eval { tell $handle } || 'unknown';
248
249 unless( $read++ ) {
250 my $gzip = GZIP_MAGIC_NUM;
251 if( $chunk =~ /$gzip/ ) {
252 $self->_error( qq[Cannot read compressed format in tar-mode] );
253 return;
254 }
255 }
256
257 ### if we can't read in all bytes... ###
258 last if length $chunk != HEAD;
259
260 ### Apparently this should really be two blocks of 512 zeroes,
261 ### but GNU tar sometimes gets it wrong. See comment in the
262 ### source code (tar.c) to GNU cpio.
263 next if $chunk eq TAR_END;
264
81a5970e 265 ### pass the realname, so we can set it 'proper' right away
266 ### some of the heuristics are done on the name, so important
267 ### to set it ASAP
39713df4 268 my $entry;
81a5970e 269 { my %extra_args = ();
270 $extra_args{'name'} = $$real_name if defined $real_name;
271
272 unless( $entry = Archive::Tar::File->new( chunk => $chunk,
273 %extra_args )
274 ) {
275 $self->_error( qq[Couldn't read chunk at offset $offset] );
276 next;
277 }
39713df4 278 }
279
280 ### ignore labels:
281 ### http://www.gnu.org/manual/tar/html_node/tar_139.html
282 next if $entry->is_label;
283
284 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
285
286 if ( $entry->is_file && !$entry->validate ) {
287 ### sometimes the chunk is rather fux0r3d and a whole 512
288 ### bytes ends p in the ->name area.
289 ### clean it up, if need be
290 my $name = $entry->name;
291 $name = substr($name, 0, 100) if length $name > 100;
292 $name =~ s/\n/ /g;
293
294 $self->_error( $name . qq[: checksum error] );
295 next LOOP;
296 }
297
298 my $block = BLOCK_SIZE->( $entry->size );
299
300 $data = $entry->get_content_by_ref;
301
302 ### just read everything into memory
303 ### can't do lazy loading since IO::Zlib doesn't support 'seek'
304 ### this is because Compress::Zlib doesn't support it =/
305 ### this reads in the whole data in one read() call.
306 if( $handle->read( $$data, $block ) < $block ) {
307 $self->_error( qq[Read error on tarfile (missing data) '].
308 $entry->full_path ."' at offset $offset" );
309 next;
310 }
311
312 ### throw away trailing garbage ###
313 substr ($$data, $entry->size) = "";
314
315 ### part II of the @LongLink munging -- need to do /after/
316 ### the checksum check.
317 if( $entry->is_longlink ) {
318 ### weird thing in tarfiles -- if the file is actually a
319 ### @LongLink, the data part seems to have a trailing ^@
320 ### (unprintable) char. to display, pipe output through less.
321 ### but that doesn't *always* happen.. so check if the last
322 ### character is a control character, and if so remove it
323 ### at any rate, we better remove that character here, or tests
324 ### like 'eq' and hashlook ups based on names will SO not work
325 ### remove it by calculating the proper size, and then
326 ### tossing out everything that's longer than that size.
327
328 ### count number of nulls
329 my $nulls = $$data =~ tr/\0/\0/;
330
331 ### cut data + size by that many bytes
332 $entry->size( $entry->size - $nulls );
333 substr ($$data, $entry->size) = "";
334 }
335 }
336
337 ### clean up of the entries.. posix tar /apparently/ has some
338 ### weird 'feature' that allows for filenames > 255 characters
339 ### they'll put a header in with as name '././@LongLink' and the
340 ### contents will be the name of the /next/ file in the archive
341 ### pretty crappy and kludgy if you ask me
342
343 ### set the name for the next entry if this is a @LongLink;
344 ### this is one ugly hack =/ but needed for direct extraction
345 if( $entry->is_longlink ) {
346 $real_name = $data;
347 next;
348 } elsif ( defined $real_name ) {
349 $entry->name( $$real_name );
350 $entry->prefix('');
351 undef $real_name;
352 }
353
354 $self->_extract_file( $entry ) if $extract
355 && !$entry->is_longlink
356 && !$entry->is_unknown
357 && !$entry->is_label;
358
359 ### Guard against tarfiles with garbage at the end
360 last LOOP if $entry->name eq '';
361
362 ### push only the name on the rv if we're extracting
363 ### -- for extract_archive
364 push @$tarfile, ($extract ? $entry->name : $entry);
365
366 if( $limit ) {
367 $count-- unless $entry->is_longlink || $entry->is_dir;
368 last LOOP unless $count;
369 }
370 } continue {
371 undef $data;
372 }
373
374 return $tarfile;
375}
376
377=head2 $tar->contains_file( $filename )
378
379Check if the archive contains a certain file.
380It will return true if the file is in the archive, false otherwise.
381
382Note however, that this function does an exact match using C<eq>
383on the full path. So it cannot compensate for case-insensitive file-
384systems or compare 2 paths to see if they would point to the same
385underlying file.
386
387=cut
388
389sub contains_file {
390 my $self = shift;
391 my $full = shift or return;
392
393 return 1 if $self->_find_entry($full);
394 return;
395}
396
397=head2 $tar->extract( [@filenames] )
398
399Write files whose names are equivalent to any of the names in
400C<@filenames> to disk, creating subdirectories as necessary. This
401might not work too well under VMS.
402Under MacPerl, the file's modification time will be converted to the
403MacOS zero of time, and appropriate conversions will be done to the
404path. However, the length of each element of the path is not
405inspected to see whether it's longer than MacOS currently allows (32
406characters).
407
408If C<extract> is called without a list of file names, the entire
409contents of the archive are extracted.
410
411Returns a list of filenames extracted.
412
413=cut
414
415sub extract {
416 my $self = shift;
417 my @files;
418
419 ### you requested the extraction of only certian files
420 if( @_ ) {
421 for my $file (@_) {
422 my $found;
423 for my $entry ( @{$self->_data} ) {
424 next unless $file eq $entry->full_path;
425
426 ### we found the file you're looking for
427 push @files, $entry;
428 $found++;
429 }
430
431 unless( $found ) {
432 return $self->_error( qq[Could not find '$file' in archive] );
433 }
434 }
435
436 ### just grab all the file items
437 } else {
438 @files = $self->get_files;
439 }
440
441 ### nothing found? that's an error
442 unless( scalar @files ) {
443 $self->_error( qq[No files found for ] . $self->_file );
444 return;
445 }
446
447 ### now extract them
448 for my $entry ( @files ) {
449 unless( $self->_extract_file( $entry ) ) {
450 $self->_error(q[Could not extract ']. $entry->full_path .q['] );
451 return;
452 }
453 }
454
455 return @files;
456}
457
458=head2 $tar->extract_file( $file, [$extract_path] )
459
460Write an entry, whose name is equivalent to the file name provided to
461disk. Optionally takes a second parameter, which is the full (unix)
462path (including filename) the entry will be written to.
463
464For example:
465
466 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
467
468Returns true on success, false on failure.
469
470=cut
471
472sub extract_file {
473 my $self = shift;
474 my $file = shift or return;
475 my $alt = shift;
476
477 my $entry = $self->_find_entry( $file )
478 or $self->_error( qq[Could not find an entry for '$file'] ), return;
479
480 return $self->_extract_file( $entry, $alt );
481}
482
483sub _extract_file {
484 my $self = shift;
485 my $entry = shift or return;
486 my $alt = shift;
487 my $cwd = cwd();
488
489 ### you wanted an alternate extraction location ###
490 my $name = defined $alt ? $alt : $entry->full_path;
491
492 ### splitpath takes a bool at the end to indicate
493 ### that it's splitting a dir
7f10f74b 494 my ($vol,$dirs,$file);
495 if ( defined $alt ) { # It's a local-OS path
496 ($vol,$dirs,$file) = File::Spec->splitpath( $alt,
497 $entry->is_dir );
498 } else {
499 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
500 $entry->is_dir );
501 }
502
39713df4 503 my $dir;
504 ### is $name an absolute path? ###
505 if( File::Spec->file_name_is_absolute( $dirs ) ) {
506 $dir = $dirs;
507
508 ### it's a relative path ###
509 } else {
510 my @dirs = File::Spec::Unix->splitdir( $dirs );
511 my @cwd = File::Spec->splitdir( $cwd );
81a5970e 512 $dir = File::Spec->catdir( @cwd, @dirs );
513
514 # catdir() returns undef if the path is longer than 255 chars on VMS
515 unless ( defined $dir ) {
516 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
517 return;
518 }
519
39713df4 520 }
521
522 if( -e $dir && !-d _ ) {
523 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
524 return;
525 }
526
527 unless ( -d _ ) {
528 eval { File::Path::mkpath( $dir, 0, 0777 ) };
529 if( $@ ) {
530 $self->_error( qq[Could not create directory '$dir': $@] );
531 return;
532 }
533 }
534
535 ### we're done if we just needed to create a dir ###
536 return 1 if $entry->is_dir;
537
538 my $full = File::Spec->catfile( $dir, $file );
539
540 if( $entry->is_unknown ) {
541 $self->_error( qq[Unknown file type for file '$full'] );
542 return;
543 }
544
545 if( length $entry->type && $entry->is_file ) {
546 my $fh = IO::File->new;
547 $fh->open( '>' . $full ) or (
548 $self->_error( qq[Could not open file '$full': $!] ),
549 return
550 );
551
552 if( $entry->size ) {
553 binmode $fh;
554 syswrite $fh, $entry->data or (
555 $self->_error( qq[Could not write data to '$full'] ),
556 return
557 );
558 }
559
560 close $fh or (
561 $self->_error( qq[Could not close file '$full'] ),
562 return
563 );
564
565 } else {
566 $self->_make_special_file( $entry, $full ) or return;
567 }
568
569 utime time, $entry->mtime - TIME_OFFSET, $full or
570 $self->_error( qq[Could not update timestamp] );
571
572 if( $CHOWN && CAN_CHOWN ) {
573 chown $entry->uid, $entry->gid, $full or
574 $self->_error( qq[Could not set uid/gid on '$full'] );
575 }
576
577 ### only chmod if we're allowed to, but never chmod symlinks, since they'll
578 ### change the perms on the file they're linking too...
579 if( $CHMOD and not -l $full ) {
580 chmod $entry->mode, $full or
581 $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
582 }
583
584 return 1;
585}
586
587sub _make_special_file {
588 my $self = shift;
589 my $entry = shift or return;
590 my $file = shift; return unless defined $file;
591
592 my $err;
593
594 if( $entry->is_symlink ) {
595 my $fail;
596 if( ON_UNIX ) {
597 symlink( $entry->linkname, $file ) or $fail++;
598
599 } else {
600 $self->_extract_special_file_as_plain_file( $entry, $file )
601 or $fail++;
602 }
603
604 $err = qq[Making symbolink link from '] . $entry->linkname .
605 qq[' to '$file' failed] if $fail;
606
607 } elsif ( $entry->is_hardlink ) {
608 my $fail;
609 if( ON_UNIX ) {
610 link( $entry->linkname, $file ) or $fail++;
611
612 } else {
613 $self->_extract_special_file_as_plain_file( $entry, $file )
614 or $fail++;
615 }
616
617 $err = qq[Making hard link from '] . $entry->linkname .
618 qq[' to '$file' failed] if $fail;
619
620 } elsif ( $entry->is_fifo ) {
621 ON_UNIX && !system('mknod', $file, 'p') or
622 $err = qq[Making fifo ']. $entry->name .qq[' failed];
623
624 } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
625 my $mode = $entry->is_blockdev ? 'b' : 'c';
626
627 ON_UNIX && !system('mknod', $file, $mode,
628 $entry->devmajor, $entry->devminor) or
629 $err = qq[Making block device ']. $entry->name .qq[' (maj=] .
630 $entry->devmajor . qq[ min=] . $entry->devminor .
631 qq[) failed.];
632
633 } elsif ( $entry->is_socket ) {
634 ### the original doesn't do anything special for sockets.... ###
635 1;
636 }
637
638 return $err ? $self->_error( $err ) : 1;
639}
640
641### don't know how to make symlinks, let's just extract the file as
642### a plain file
643sub _extract_special_file_as_plain_file {
644 my $self = shift;
645 my $entry = shift or return;
646 my $file = shift; return unless defined $file;
647
648 my $err;
649 TRY: {
650 my $orig = $self->_find_entry( $entry->linkname );
651
652 unless( $orig ) {
653 $err = qq[Could not find file '] . $entry->linkname .
654 qq[' in memory.];
655 last TRY;
656 }
657
658 ### clone the entry, make it appear as a normal file ###
659 my $clone = $entry->clone;
660 $clone->_downgrade_to_plainfile;
661 $self->_extract_file( $clone, $file ) or last TRY;
662
663 return 1;
664 }
665
666 return $self->_error($err);
667}
668
669=head2 $tar->list_files( [\@properties] )
670
671Returns a list of the names of all the files in the archive.
672
673If C<list_files()> is passed an array reference as its first argument
674it returns a list of hash references containing the requested
675properties of each file. The following list of properties is
676supported: name, size, mtime (last modified date), mode, uid, gid,
677linkname, uname, gname, devmajor, devminor, prefix.
678
679Passing an array reference containing only one element, 'name', is
680special cased to return a list of names rather than a list of hash
681references, making it equivalent to calling C<list_files> without
682arguments.
683
684=cut
685
686sub list_files {
687 my $self = shift;
688 my $aref = shift || [ ];
689
690 unless( $self->_data ) {
691 $self->read() or return;
692 }
693
694 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
695 return map { $_->full_path } @{$self->_data};
696 } else {
697
698 #my @rv;
699 #for my $obj ( @{$self->_data} ) {
700 # push @rv, { map { $_ => $obj->$_() } @$aref };
701 #}
702 #return @rv;
703
704 ### this does the same as the above.. just needs a +{ }
705 ### to make sure perl doesn't confuse it for a block
706 return map { my $o=$_;
707 +{ map { $_ => $o->$_() } @$aref }
708 } @{$self->_data};
709 }
710}
711
712sub _find_entry {
713 my $self = shift;
714 my $file = shift;
715
716 unless( defined $file ) {
717 $self->_error( qq[No file specified] );
718 return;
719 }
720
721 for my $entry ( @{$self->_data} ) {
722 my $path = $entry->full_path;
723 return $entry if $path eq $file;
724 }
725
726 $self->_error( qq[No such file in archive: '$file'] );
727 return;
728}
729
730=head2 $tar->get_files( [@filenames] )
731
732Returns the C<Archive::Tar::File> objects matching the filenames
733provided. If no filename list was passed, all C<Archive::Tar::File>
734objects in the current Tar object are returned.
735
736Please refer to the C<Archive::Tar::File> documentation on how to
737handle these objects.
738
739=cut
740
741sub get_files {
742 my $self = shift;
743
744 return @{ $self->_data } unless @_;
745
746 my @list;
747 for my $file ( @_ ) {
748 push @list, grep { defined } $self->_find_entry( $file );
749 }
750
751 return @list;
752}
753
754=head2 $tar->get_content( $file )
755
756Return the content of the named file.
757
758=cut
759
760sub get_content {
761 my $self = shift;
762 my $entry = $self->_find_entry( shift ) or return;
763
764 return $entry->data;
765}
766
767=head2 $tar->replace_content( $file, $content )
768
769Make the string $content be the content for the file named $file.
770
771=cut
772
773sub replace_content {
774 my $self = shift;
775 my $entry = $self->_find_entry( shift ) or return;
776
777 return $entry->replace_content( shift );
778}
779
780=head2 $tar->rename( $file, $new_name )
781
782Rename the file of the in-memory archive to $new_name.
783
784Note that you must specify a Unix path for $new_name, since per tar
785standard, all files in the archive must be Unix paths.
786
787Returns true on success and false on failure.
788
789=cut
790
791sub rename {
792 my $self = shift;
793 my $file = shift; return unless defined $file;
794 my $new = shift; return unless defined $new;
795
796 my $entry = $self->_find_entry( $file ) or return;
797
798 return $entry->rename( $new );
799}
800
801=head2 $tar->remove (@filenamelist)
802
803Removes any entries with names matching any of the given filenames
804from the in-memory archive. Returns a list of C<Archive::Tar::File>
805objects that remain.
806
807=cut
808
809sub remove {
810 my $self = shift;
811 my @list = @_;
812
813 my %seen = map { $_->full_path => $_ } @{$self->_data};
814 delete $seen{ $_ } for @list;
815
816 $self->_data( [values %seen] );
817
818 return values %seen;
819}
820
821=head2 $tar->clear
822
823C<clear> clears the current in-memory archive. This effectively gives
824you a 'blank' object, ready to be filled again. Note that C<clear>
825only has effect on the object, not the underlying tarfile.
826
827=cut
828
829sub clear {
830 my $self = shift or return;
831
832 $self->_data( [] );
833 $self->_file( '' );
834
835 return 1;
836}
837
838
839=head2 $tar->write ( [$file, $compressed, $prefix] )
840
841Write the in-memory archive to disk. The first argument can either
842be the name of a file or a reference to an already open filehandle (a
843GLOB reference). If the second argument is true, the module will use
844IO::Zlib to write the file in a compressed format. If IO::Zlib is
845not available, the C<write> method will fail and return.
846
847Note that when you pass in a filehandle, the compression argument
848is ignored, as all files are printed verbatim to your filehandle.
849If you wish to enable compression with filehandles, use an
850C<IO::Zlib> filehandle instead.
851
852Specific levels of compression can be chosen by passing the values 2
853through 9 as the second parameter.
854
855The third argument is an optional prefix. All files will be tucked
856away in the directory you specify as prefix. So if you have files
857'a' and 'b' in your archive, and you specify 'foo' as prefix, they
858will be written to the archive as 'foo/a' and 'foo/b'.
859
860If no arguments are given, C<write> returns the entire formatted
861archive as a string, which could be useful if you'd like to stuff the
862archive into a socket or a pipe to gzip or something.
863
864=cut
865
866sub write {
867 my $self = shift;
868 my $file = shift; $file = '' unless defined $file;
869 my $gzip = shift || 0;
870 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
871 my $dummy = '';
872
873 ### only need a handle if we have a file to print to ###
874 my $handle = length($file)
875 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
876 or return )
877 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h }
878 : $HAS_IO_STRING ? IO::String->new
879 : __PACKAGE__->no_string_support();
880
881
882
883 for my $entry ( @{$self->_data} ) {
884 ### entries to be written to the tarfile ###
885 my @write_me;
886
887 ### only now will we change the object to reflect the current state
888 ### of the name and prefix fields -- this needs to be limited to
889 ### write() only!
890 my $clone = $entry->clone;
891
892
893 ### so, if you don't want use to use the prefix, we'll stuff
894 ### everything in the name field instead
895 if( $DO_NOT_USE_PREFIX ) {
896
897 ### you might have an extended prefix, if so, set it in the clone
898 ### XXX is ::Unix right?
899 $clone->name( length $ext_prefix
900 ? File::Spec::Unix->catdir( $ext_prefix,
901 $clone->full_path)
902 : $clone->full_path );
903 $clone->prefix( '' );
904
905 ### otherwise, we'll have to set it properly -- prefix part in the
906 ### prefix and name part in the name field.
907 } else {
908
909 ### split them here, not before!
910 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
911
912 ### you might have an extended prefix, if so, set it in the clone
913 ### XXX is ::Unix right?
914 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
915 if length $ext_prefix;
916
917 $clone->prefix( $prefix );
918 $clone->name( $name );
919 }
920
921 ### names are too long, and will get truncated if we don't add a
922 ### '@LongLink' file...
923 my $make_longlink = ( length($clone->name) > NAME_LENGTH or
924 length($clone->prefix) > PREFIX_LENGTH
925 ) || 0;
926
927 ### perhaps we need to make a longlink file?
928 if( $make_longlink ) {
929 my $longlink = Archive::Tar::File->new(
930 data => LONGLINK_NAME,
931 $clone->full_path,
932 { type => LONGLINK }
933 );
934
935 unless( $longlink ) {
936 $self->_error( qq[Could not create 'LongLink' entry for ] .
937 qq[oversize file '] . $clone->full_path ."'" );
938 return;
939 };
940
941 push @write_me, $longlink;
942 }
943
944 push @write_me, $clone;
945
946 ### write the one, optionally 2 a::t::file objects to the handle
947 for my $clone (@write_me) {
948
949 ### if the file is a symlink, there are 2 options:
950 ### either we leave the symlink intact, but then we don't write any
951 ### data OR we follow the symlink, which means we actually make a
952 ### copy. if we do the latter, we have to change the TYPE of the
953 ### clone to 'FILE'
954 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
955 my $data_ok = !$clone->is_symlink && $clone->has_content;
956
957 ### downgrade to a 'normal' file if it's a symlink we're going to
958 ### treat as a regular file
959 $clone->_downgrade_to_plainfile if $link_ok;
960
961 ### get the header for this block
962 my $header = $self->_format_tar_entry( $clone );
963 unless( $header ) {
964 $self->_error(q[Could not format header for: ] .
965 $clone->full_path );
966 return;
967 }
968
969 unless( print $handle $header ) {
970 $self->_error(q[Could not write header for: ] .
971 $clone->full_path);
972 return;
973 }
974
975 if( $link_ok or $data_ok ) {
976 unless( print $handle $clone->data ) {
977 $self->_error(q[Could not write data for: ] .
978 $clone->full_path);
979 return;
980 }
981
982 ### pad the end of the clone if required ###
983 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
984 }
985
986 } ### done writing these entries
987 }
988
989 ### write the end markers ###
990 print $handle TAR_END x 2 or
991 return $self->_error( qq[Could not write tar end markers] );
992 ### did you want it written to a file, or returned as a string? ###
993 return length($file) ? 1
994 : $HAS_PERLIO ? $dummy
995 : do { seek $handle, 0, 0; local $/; <$handle> }
996}
997
998sub _format_tar_entry {
999 my $self = shift;
1000 my $entry = shift or return;
1001 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
1002 my $no_prefix = shift || 0;
1003
1004 my $file = $entry->name;
1005 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix;
1006
1007 ### remove the prefix from the file name
1008 ### not sure if this is still neeeded --kane
1009 ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1010 ### this for us. Even worse, this would break if we tried to add a file
1011 ### like x/x.
1012 #if( length $prefix ) {
1013 # $file =~ s/^$match//;
1014 #}
1015
1016 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1017 if length $ext_prefix;
1018
1019 ### not sure why this is... ###
1020 my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1021 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1022
1023 my $f1 = "%06o"; my $f2 = "%11o";
1024
1025 ### this might be optimizable with a 'changed' flag in the file objects ###
1026 my $tar = pack (
1027 PACK,
1028 $file,
1029
1030 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1031 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1032
1033 "", # checksum field - space padded a bit down
1034
1035 (map { $entry->$_() } qw[type linkname magic]),
1036
1037 $entry->version || TAR_VERSION,
1038
1039 (map { $entry->$_() } qw[uname gname]),
1040 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1041
1042 ($no_prefix ? '' : $prefix)
1043 );
1044
1045 ### add the checksum ###
1046 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1047
1048 return $tar;
1049}
1050
1051=head2 $tar->add_files( @filenamelist )
1052
1053Takes a list of filenames and adds them to the in-memory archive.
1054
1055The path to the file is automatically converted to a Unix like
1056equivalent for use in the archive, and, if on MacOS, the file's
1057modification time is converted from the MacOS epoch to the Unix epoch.
1058So tar archives created on MacOS with B<Archive::Tar> can be read
1059both with I<tar> on Unix and applications like I<suntar> or
1060I<Stuffit Expander> on MacOS.
1061
1062Be aware that the file's type/creator and resource fork will be lost,
1063which is usually what you want in cross-platform archives.
1064
1065Returns a list of C<Archive::Tar::File> objects that were just added.
1066
1067=cut
1068
1069sub add_files {
1070 my $self = shift;
1071 my @files = @_ or return;
1072
1073 my @rv;
1074 for my $file ( @files ) {
1075 unless( -e $file ) {
1076 $self->_error( qq[No such file: '$file'] );
1077 next;
1078 }
1079
1080 my $obj = Archive::Tar::File->new( file => $file );
1081 unless( $obj ) {
1082 $self->_error( qq[Unable to add file: '$file'] );
1083 next;
1084 }
1085
1086 push @rv, $obj;
1087 }
1088
1089 push @{$self->{_data}}, @rv;
1090
1091 return @rv;
1092}
1093
1094=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1095
1096Takes a filename, a scalar full of data and optionally a reference to
1097a hash with specific options.
1098
1099Will add a file to the in-memory archive, with name C<$filename> and
1100content C<$data>. Specific properties can be set using C<$opthashref>.
1101The following list of properties is supported: name, size, mtime
1102(last modified date), mode, uid, gid, linkname, uname, gname,
1103devmajor, devminor, prefix. (On MacOS, the file's path and
1104modification times are converted to Unix equivalents.)
1105
1106Returns the C<Archive::Tar::File> object that was just added, or
1107C<undef> on failure.
1108
1109=cut
1110
1111sub add_data {
1112 my $self = shift;
1113 my ($file, $data, $opt) = @_;
1114
1115 my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1116 unless( $obj ) {
1117 $self->_error( qq[Unable to add file: '$file'] );
1118 return;
1119 }
1120
1121 push @{$self->{_data}}, $obj;
1122
1123 return $obj;
1124}
1125
1126=head2 $tar->error( [$BOOL] )
1127
1128Returns the current errorstring (usually, the last error reported).
1129If a true value was specified, it will give the C<Carp::longmess>
1130equivalent of the error, in effect giving you a stacktrace.
1131
1132For backwards compatibility, this error is also available as
1133C<$Archive::Tar::error> although it is much recommended you use the
1134method call instead.
1135
1136=cut
1137
1138{
1139 $error = '';
1140 my $longmess;
1141
1142 sub _error {
1143 my $self = shift;
1144 my $msg = $error = shift;
1145 $longmess = Carp::longmess($error);
1146
1147 ### set Archive::Tar::WARN to 0 to disable printing
1148 ### of errors
1149 if( $WARN ) {
1150 carp $DEBUG ? $longmess : $msg;
1151 }
1152
1153 return;
1154 }
1155
1156 sub error {
1157 my $self = shift;
1158 return shift() ? $longmess : $error;
1159 }
1160}
1161
1162
1163=head2 $bool = $tar->has_io_string
1164
1165Returns true if we currently have C<IO::String> support loaded.
1166
1167Either C<IO::String> or C<perlio> support is needed to support writing
3c4b39be 1168stringified archives. Currently, C<perlio> is the preferred method, if
39713df4 1169available.
1170
1171See the C<GLOBAL VARIABLES> section to see how to change this preference.
1172
1173=cut
1174
1175sub has_io_string { return $HAS_IO_STRING; }
1176
1177=head2 $bool = $tar->has_perlio
1178
1179Returns true if we currently have C<perlio> support loaded.
1180
1181This requires C<perl-5.8> or higher, compiled with C<perlio>
1182
1183Either C<IO::String> or C<perlio> support is needed to support writing
3c4b39be 1184stringified archives. Currently, C<perlio> is the preferred method, if
39713df4 1185available.
1186
1187See the C<GLOBAL VARIABLES> section to see how to change this preference.
1188
1189=cut
1190
1191sub has_perlio { return $HAS_PERLIO; }
1192
1193
1194=head1 Class Methods
1195
1196=head2 Archive::Tar->create_archive($file, $compression, @filelist)
1197
1198Creates a tar file from the list of files provided. The first
1199argument can either be the name of the tar file to create or a
1200reference to an open file handle (e.g. a GLOB reference).
1201
1202The second argument specifies the level of compression to be used, if
1203any. Compression of tar files requires the installation of the
1204IO::Zlib module. Specific levels of compression may be
1205requested by passing a value between 2 and 9 as the second argument.
1206Any other value evaluating as true will result in the default
1207compression level being used.
1208
1209Note that when you pass in a filehandle, the compression argument
1210is ignored, as all files are printed verbatim to your filehandle.
1211If you wish to enable compression with filehandles, use an
1212C<IO::Zlib> filehandle instead.
1213
1214The remaining arguments list the files to be included in the tar file.
1215These files must all exist. Any files which don't exist or can't be
1216read are silently ignored.
1217
1218If the archive creation fails for any reason, C<create_archive> will
1219return false. Please use the C<error> method to find the cause of the
1220failure.
1221
1222Note that this method does not write C<on the fly> as it were; it
1223still reads all the files into memory before writing out the archive.
1224Consult the FAQ below if this is a problem.
1225
1226=cut
1227
1228sub create_archive {
1229 my $class = shift;
1230
1231 my $file = shift; return unless defined $file;
1232 my $gzip = shift || 0;
1233 my @files = @_;
1234
1235 unless( @files ) {
1236 return $class->_error( qq[Cowardly refusing to create empty archive!] );
1237 }
1238
1239 my $tar = $class->new;
1240 $tar->add_files( @files );
1241 return $tar->write( $file, $gzip );
1242}
1243
1244=head2 Archive::Tar->list_archive ($file, $compressed, [\@properties])
1245
1246Returns a list of the names of all the files in the archive. The
1247first argument can either be the name of the tar file to list or a
1248reference to an open file handle (e.g. a GLOB reference).
1249
1250If C<list_archive()> is passed an array reference as its third
1251argument it returns a list of hash references containing the requested
1252properties of each file. The following list of properties is
1253supported: name, size, mtime (last modified date), mode, uid, gid,
1254linkname, uname, gname, devmajor, devminor, prefix.
1255
1256Passing an array reference containing only one element, 'name', is
1257special cased to return a list of names rather than a list of hash
1258references.
1259
1260=cut
1261
1262sub list_archive {
1263 my $class = shift;
1264 my $file = shift; return unless defined $file;
1265 my $gzip = shift || 0;
1266
1267 my $tar = $class->new($file, $gzip);
1268 return unless $tar;
1269
1270 return $tar->list_files( @_ );
1271}
1272
1273=head2 Archive::Tar->extract_archive ($file, $gzip)
1274
1275Extracts the contents of the tar file. The first argument can either
1276be the name of the tar file to create or a reference to an open file
1277handle (e.g. a GLOB reference). All relative paths in the tar file will
1278be created underneath the current working directory.
1279
1280C<extract_archive> will return a list of files it extracted.
1281If the archive extraction fails for any reason, C<extract_archive>
1282will return false. Please use the C<error> method to find the cause
1283of the failure.
1284
1285=cut
1286
1287sub extract_archive {
1288 my $class = shift;
1289 my $file = shift; return unless defined $file;
1290 my $gzip = shift || 0;
1291
1292 my $tar = $class->new( ) or return;
1293
1294 return $tar->read( $file, $gzip, { extract => 1 } );
1295}
1296
1297=head2 Archive::Tar->can_handle_compressed_files
1298
1299A simple checking routine, which will return true if C<Archive::Tar>
1300is able to uncompress compressed archives on the fly with C<IO::Zlib>,
1301or false if C<IO::Zlib> is not installed.
1302
1303You can use this as a shortcut to determine whether C<Archive::Tar>
1304will do what you think before passing compressed archives to its
1305C<read> method.
1306
1307=cut
1308
1309sub can_handle_compressed_files { return ZLIB ? 1 : 0 }
1310
1311sub no_string_support {
1312 croak("You have to install IO::String to support writing archives to strings");
1313}
1314
13151;
1316
1317__END__
1318
1319=head1 GLOBAL VARIABLES
1320
1321=head2 $Archive::Tar::FOLLOW_SYMLINK
1322
1323Set this variable to C<1> to make C<Archive::Tar> effectively make a
1324copy of the file when extracting. Default is C<0>, which
1325means the symlink stays intact. Of course, you will have to pack the
1326file linked to as well.
1327
1328This option is checked when you write out the tarfile using C<write>
1329or C<create_archive>.
1330
1331This works just like C</bin/tar>'s C<-h> option.
1332
1333=head2 $Archive::Tar::CHOWN
1334
1335By default, C<Archive::Tar> will try to C<chown> your files if it is
1336able to. In some cases, this may not be desired. In that case, set
1337this variable to C<0> to disable C<chown>-ing, even if it were
1338possible.
1339
1340The default is C<1>.
1341
1342=head2 $Archive::Tar::CHMOD
1343
1344By default, C<Archive::Tar> will try to C<chmod> your files to
1345whatever mode was specified for the particular file in the archive.
1346In some cases, this may not be desired. In that case, set this
1347variable to C<0> to disable C<chmod>-ing.
1348
1349The default is C<1>.
1350
1351=head2 $Archive::Tar::DO_NOT_USE_PREFIX
1352
1353By default, C<Archive::Tar> will try to put paths that are over
1354100 characters in the C<prefix> field of your tar header. However,
1355some older tar programs do not implement this spec. To retain
1356compatibility with these older versions, you can set the
1357C<$DO_NOT_USE_PREFIX> variable to a true value, and C<Archive::Tar>
1358will use an alternate way of dealing with paths over 100 characters
1359by using the C<GNU Extended Header> feature.
1360
1361The default is C<0>.
1362
1363=head2 $Archive::Tar::DEBUG
1364
1365Set this variable to C<1> to always get the C<Carp::longmess> output
1366of the warnings, instead of the regular C<carp>. This is the same
1367message you would get by doing:
1368
1369 $tar->error(1);
1370
1371Defaults to C<0>.
1372
1373=head2 $Archive::Tar::WARN
1374
1375Set this variable to C<0> if you do not want any warnings printed.
1376Personally I recommend against doing this, but people asked for the
1377option. Also, be advised that this is of course not threadsafe.
1378
1379Defaults to C<1>.
1380
1381=head2 $Archive::Tar::error
1382
1383Holds the last reported error. Kept for historical reasons, but its
1384use is very much discouraged. Use the C<error()> method instead:
1385
1386 warn $tar->error unless $tar->extract;
1387
1388=head2 $Archive::Tar::HAS_PERLIO
1389
1390This variable holds a boolean indicating if we currently have
1391C<perlio> support loaded. This will be enabled for any perl
1392greater than C<5.8> compiled with C<perlio>.
1393
1394If you feel strongly about disabling it, set this variable to
1395C<false>. Note that you will then need C<IO::String> installed
1396to support writing stringified archives.
1397
1398Don't change this variable unless you B<really> know what you're
1399doing.
1400
1401=head2 $Archive::Tar::HAS_IO_STRING
1402
1403This variable holds a boolean indicating if we currently have
1404C<IO::String> support loaded. This will be enabled for any perl
1405that has a loadable C<IO::String> module.
1406
1407If you feel strongly about disabling it, set this variable to
1408C<false>. Note that you will then need C<perlio> support from
1409your perl to be able to write stringified archives.
1410
1411Don't change this variable unless you B<really> know what you're
1412doing.
1413
1414=head1 FAQ
1415
1416=over 4
1417
1418=item What's the minimum perl version required to run Archive::Tar?
1419
1420You will need perl version 5.005_03 or newer.
1421
1422=item Isn't Archive::Tar slow?
1423
1424Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
1425However, it's very portable. If speed is an issue, consider using
1426C</bin/tar> instead.
1427
1428=item Isn't Archive::Tar heavier on memory than /bin/tar?
1429
1430Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
1431C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
1432choice but to read the archive into memory.
1433This is ok if you want to do in-memory manipulation of the archive.
1434If you just want to extract, use the C<extract_archive> class method
1435instead. It will optimize and write to disk immediately.
1436
1437=item Can't you lazy-load data instead?
1438
1439No, not easily. See previous question.
1440
1441=item How much memory will an X kb tar file need?
1442
1443Probably more than X kb, since it will all be read into memory. If
1444this is a problem, and you don't need to do in memory manipulation
1445of the archive, consider using C</bin/tar> instead.
1446
1447=item What do you do with unsupported filetypes in an archive?
1448
1449C<Unix> has a few filetypes that aren't supported on other platforms,
1450like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
1451try to make a copy of the original file, rather than throwing an error.
1452
1453This does require you to read the entire archive in to memory first,
1454since otherwise we wouldn't know what data to fill the copy with.
1455(This means that you cannot use the class methods on archives that
1456have incompatible filetypes and still expect things to work).
1457
1458For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
1459the extraction of this particular item didn't work.
1460
81a5970e 1461=item How do I access .tar.Z files?
1462
1463The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
1464the C<IO::Zlib> module) to access tar files that have been compressed
1465with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
1466utility cannot be read by C<Compress::Zlib> and so cannot be directly
1467accesses by C<Archive::Tar>.
1468
1469If the C<uncompress> or C<gunzip> programs are available, you can use
1470one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
1471
1472Firstly with C<uncompress>
1473
1474 use Archive::Tar;
1475
1476 open F, "uncompress -c $filename |";
1477 my $tar = Archive::Tar->new(*F);
1478 ...
1479
1480and this with C<gunzip>
1481
1482 use Archive::Tar;
1483
1484 open F, "gunzip -c $filename |";
1485 my $tar = Archive::Tar->new(*F);
1486 ...
1487
1488Similarly, if the C<compress> program is available, you can use this to
1489write a C<.tar.Z> file
1490
1491 use Archive::Tar;
1492 use IO::File;
1493
1494 my $fh = new IO::File "| compress -c >$filename";
1495 my $tar = Archive::Tar->new();
1496 ...
1497 $tar->write($fh);
1498 $fh->close ;
1499
1500
39713df4 1501=back
1502
1503=head1 TODO
1504
1505=over 4
1506
1507=item Check if passed in handles are open for read/write
1508
1509Currently I don't know of any portable pure perl way to do this.
1510Suggestions welcome.
1511
1512=back
1513
1514=head1 AUTHOR
1515
1516This module by
1517Jos Boumans E<lt>kane@cpan.orgE<gt>.
1518
1519=head1 ACKNOWLEDGEMENTS
1520
1521Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney and
1522especially Andrew Savige for their help and suggestions.
1523
1524=head1 COPYRIGHT
1525
1526This module is
1527copyright (c) 2002 Jos Boumans E<lt>kane@cpan.orgE<gt>.
1528All rights reserved.
1529
1530This library is free software;
1531you may redistribute and/or modify it under the same
1532terms as Perl itself.
1533
1534=cut