Redo generation of change #24898
[p5sagit/p5-mst-13.2.git] / lib / Archive / Tar.pm
CommitLineData
39713df4 1### the gnu tar specification:
2### http://www.gnu.org/software/tar/manual/html_mono/tar.html
3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
10use strict;
11use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
12 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING];
13
14$DEBUG = 0;
15$WARN = 1;
16$FOLLOW_SYMLINK = 0;
17$VERSION = "1.24";
18$CHOWN = 1;
19$CHMOD = 1;
20$DO_NOT_USE_PREFIX = 0;
21
22BEGIN {
23 use Config;
24 $HAS_PERLIO = $Config::Config{useperlio};
25
26 ### try and load IO::String anyway, so you can dynamically
27 ### switch between perlio and IO::String
28 eval {
29 require IO::String;
30 import IO::String;
31 };
32 $HAS_IO_STRING = $@ ? 0 : 1;
33
34}
35
36use Cwd;
37use IO::File;
38use Carp qw(carp croak);
39use File::Spec ();
40use File::Spec::Unix ();
41use File::Path ();
42
43use Archive::Tar::File;
44use Archive::Tar::Constant;
45
46=head1 NAME
47
48Archive::Tar - module for manipulations of tar archives
49
50=head1 SYNOPSIS
51
52 use Archive::Tar;
53 my $tar = Archive::Tar->new;
54
55 $tar->read('origin.tgz',1);
56 $tar->extract();
57
58 $tar->add_files('file/foo.pl', 'docs/README');
59 $tar->add_data('file/baz.txt', 'This is the contents now');
60
61 $tar->rename('oldname', 'new/file/name');
62
63 $tar->write('files.tar');
64
65=head1 DESCRIPTION
66
67Archive::Tar provides an object oriented mechanism for handling tar
68files. It provides class methods for quick and easy files handling
69while also allowing for the creation of tar file objects for custom
70manipulation. If you have the IO::Zlib module installed,
71Archive::Tar will also support compressed or gzipped tar files.
72
73An object of class Archive::Tar represents a .tar(.gz) archive full
74of files and things.
75
76=head1 Object Methods
77
78=head2 Archive::Tar->new( [$file, $compressed] )
79
80Returns a new Tar object. If given any arguments, C<new()> calls the
81C<read()> method automatically, passing on the arguments provided to
82the C<read()> method.
83
84If C<new()> is invoked with arguments and the C<read()> method fails
85for any reason, C<new()> returns undef.
86
87=cut
88
89my $tmpl = {
90 _data => [ ],
91 _file => 'Unknown',
92};
93
94### install get/set accessors for this object.
95for my $key ( keys %$tmpl ) {
96 no strict 'refs';
97 *{__PACKAGE__."::$key"} = sub {
98 my $self = shift;
99 $self->{$key} = $_[0] if @_;
100 return $self->{$key};
101 }
102}
103
104sub new {
105 my $class = shift;
106 $class = ref $class if ref $class;
107
108 ### copying $tmpl here since a shallow copy makes it use the
109 ### same aref, causing for files to remain in memory always.
110 my $obj = bless { _data => [ ], _file => 'Unknown' }, $class;
111
112 if (@_) {
113 return unless $obj->read( @_ );
114 }
115
116 return $obj;
117}
118
119=head2 $tar->read ( $filename|$handle, $compressed, {opt => 'val'} )
120
121Read the given tar file into memory.
122The first argument can either be the name of a file or a reference to
123an already open filehandle (or an IO::Zlib object if it's compressed)
124The second argument indicates whether the file referenced by the first
125argument is compressed.
126
127The C<read> will I<replace> any previous content in C<$tar>!
128
129The second argument may be considered optional if IO::Zlib is
130installed, since it will transparently Do The Right Thing.
131Archive::Tar will warn if you try to pass a compressed file if
132IO::Zlib is not available and simply return.
133
134The third argument can be a hash reference with options. Note that
135all options are case-sensitive.
136
137=over 4
138
139=item limit
140
141Do not read more than C<limit> files. This is useful if you have
142very big archives, and are only interested in the first few files.
143
144=item extract
145
146If set to true, immediately extract entries when reading them. This
147gives you the same memory break as the C<extract_archive> function.
148Note however that entries will not be read into memory, but written
149straight to disk.
150
151=back
152
153All files are stored internally as C<Archive::Tar::File> objects.
154Please consult the L<Archive::Tar::File> documentation for details.
155
156Returns the number of files read in scalar context, and a list of
157C<Archive::Tar::File> objects in list context.
158
159=cut
160
161sub read {
162 my $self = shift;
163 my $file = shift;
164 my $gzip = shift || 0;
165 my $opts = shift || {};
166
167 unless( defined $file ) {
168 $self->_error( qq[No file to read from!] );
169 return;
170 } else {
171 $self->_file( $file );
172 }
173
174 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
175 or return;
176
177 my $data = $self->_read_tar( $handle, $opts ) or return;
178
179 $self->_data( $data );
180
181 return wantarray ? @$data : scalar @$data;
182}
183
184sub _get_handle {
185 my $self = shift;
186 my $file = shift; return unless defined $file;
187 return $file if ref $file;
188
189 my $gzip = shift || 0;
190 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only
191
192 my $fh; my $bin;
193
194 ### only default to ZLIB if we're not trying to /write/ to a handle ###
195 if( ZLIB and $gzip || MODE_READ->( $mode ) ) {
196
197 ### IO::Zlib will Do The Right Thing, even when passed
198 ### a plain file ###
199 $fh = new IO::Zlib;
200
201 } else {
202 if( $gzip ) {
203 $self->_error(qq[Compression not available - Install IO::Zlib!]);
204 return;
205
206 } else {
207 $fh = new IO::File;
208 $bin++;
209 }
210 }
211
212 unless( $fh->open( $file, $mode ) ) {
213 $self->_error( qq[Could not create filehandle for '$file': $!!] );
214 return;
215 }
216
217 binmode $fh if $bin;
218
219 return $fh;
220}
221
222sub _read_tar {
223 my $self = shift;
224 my $handle = shift or return;
225 my $opts = shift || {};
226
227 my $count = $opts->{limit} || 0;
228 my $extract = $opts->{extract} || 0;
229
230 ### set a cap on the amount of files to extract ###
231 my $limit = 0;
232 $limit = 1 if $count > 0;
233
234 my $tarfile = [ ];
235 my $chunk;
236 my $read = 0;
237 my $real_name; # to set the name of a file when
238 # we're encountering @longlink
239 my $data;
240
241 LOOP:
242 while( $handle->read( $chunk, HEAD ) ) {
243 ### IO::Zlib doesn't support this yet
244 my $offset = eval { tell $handle } || 'unknown';
245
246 unless( $read++ ) {
247 my $gzip = GZIP_MAGIC_NUM;
248 if( $chunk =~ /$gzip/ ) {
249 $self->_error( qq[Cannot read compressed format in tar-mode] );
250 return;
251 }
252 }
253
254 ### if we can't read in all bytes... ###
255 last if length $chunk != HEAD;
256
257 ### Apparently this should really be two blocks of 512 zeroes,
258 ### but GNU tar sometimes gets it wrong. See comment in the
259 ### source code (tar.c) to GNU cpio.
260 next if $chunk eq TAR_END;
261
262 my $entry;
263 unless( $entry = Archive::Tar::File->new( chunk => $chunk ) ) {
264 $self->_error( qq[Couldn't read chunk at offset $offset] );
265 next;
266 }
267
268 ### ignore labels:
269 ### http://www.gnu.org/manual/tar/html_node/tar_139.html
270 next if $entry->is_label;
271
272 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
273
274 if ( $entry->is_file && !$entry->validate ) {
275 ### sometimes the chunk is rather fux0r3d and a whole 512
276 ### bytes ends p in the ->name area.
277 ### clean it up, if need be
278 my $name = $entry->name;
279 $name = substr($name, 0, 100) if length $name > 100;
280 $name =~ s/\n/ /g;
281
282 $self->_error( $name . qq[: checksum error] );
283 next LOOP;
284 }
285
286 my $block = BLOCK_SIZE->( $entry->size );
287
288 $data = $entry->get_content_by_ref;
289
290 ### just read everything into memory
291 ### can't do lazy loading since IO::Zlib doesn't support 'seek'
292 ### this is because Compress::Zlib doesn't support it =/
293 ### this reads in the whole data in one read() call.
294 if( $handle->read( $$data, $block ) < $block ) {
295 $self->_error( qq[Read error on tarfile (missing data) '].
296 $entry->full_path ."' at offset $offset" );
297 next;
298 }
299
300 ### throw away trailing garbage ###
301 substr ($$data, $entry->size) = "";
302
303 ### part II of the @LongLink munging -- need to do /after/
304 ### the checksum check.
305 if( $entry->is_longlink ) {
306 ### weird thing in tarfiles -- if the file is actually a
307 ### @LongLink, the data part seems to have a trailing ^@
308 ### (unprintable) char. to display, pipe output through less.
309 ### but that doesn't *always* happen.. so check if the last
310 ### character is a control character, and if so remove it
311 ### at any rate, we better remove that character here, or tests
312 ### like 'eq' and hashlook ups based on names will SO not work
313 ### remove it by calculating the proper size, and then
314 ### tossing out everything that's longer than that size.
315
316 ### count number of nulls
317 my $nulls = $$data =~ tr/\0/\0/;
318
319 ### cut data + size by that many bytes
320 $entry->size( $entry->size - $nulls );
321 substr ($$data, $entry->size) = "";
322 }
323 }
324
325 ### clean up of the entries.. posix tar /apparently/ has some
326 ### weird 'feature' that allows for filenames > 255 characters
327 ### they'll put a header in with as name '././@LongLink' and the
328 ### contents will be the name of the /next/ file in the archive
329 ### pretty crappy and kludgy if you ask me
330
331 ### set the name for the next entry if this is a @LongLink;
332 ### this is one ugly hack =/ but needed for direct extraction
333 if( $entry->is_longlink ) {
334 $real_name = $data;
335 next;
336 } elsif ( defined $real_name ) {
337 $entry->name( $$real_name );
338 $entry->prefix('');
339 undef $real_name;
340 }
341
342 $self->_extract_file( $entry ) if $extract
343 && !$entry->is_longlink
344 && !$entry->is_unknown
345 && !$entry->is_label;
346
347 ### Guard against tarfiles with garbage at the end
348 last LOOP if $entry->name eq '';
349
350 ### push only the name on the rv if we're extracting
351 ### -- for extract_archive
352 push @$tarfile, ($extract ? $entry->name : $entry);
353
354 if( $limit ) {
355 $count-- unless $entry->is_longlink || $entry->is_dir;
356 last LOOP unless $count;
357 }
358 } continue {
359 undef $data;
360 }
361
362 return $tarfile;
363}
364
365=head2 $tar->contains_file( $filename )
366
367Check if the archive contains a certain file.
368It will return true if the file is in the archive, false otherwise.
369
370Note however, that this function does an exact match using C<eq>
371on the full path. So it cannot compensate for case-insensitive file-
372systems or compare 2 paths to see if they would point to the same
373underlying file.
374
375=cut
376
377sub contains_file {
378 my $self = shift;
379 my $full = shift or return;
380
381 return 1 if $self->_find_entry($full);
382 return;
383}
384
385=head2 $tar->extract( [@filenames] )
386
387Write files whose names are equivalent to any of the names in
388C<@filenames> to disk, creating subdirectories as necessary. This
389might not work too well under VMS.
390Under MacPerl, the file's modification time will be converted to the
391MacOS zero of time, and appropriate conversions will be done to the
392path. However, the length of each element of the path is not
393inspected to see whether it's longer than MacOS currently allows (32
394characters).
395
396If C<extract> is called without a list of file names, the entire
397contents of the archive are extracted.
398
399Returns a list of filenames extracted.
400
401=cut
402
403sub extract {
404 my $self = shift;
405 my @files;
406
407 ### you requested the extraction of only certian files
408 if( @_ ) {
409 for my $file (@_) {
410 my $found;
411 for my $entry ( @{$self->_data} ) {
412 next unless $file eq $entry->full_path;
413
414 ### we found the file you're looking for
415 push @files, $entry;
416 $found++;
417 }
418
419 unless( $found ) {
420 return $self->_error( qq[Could not find '$file' in archive] );
421 }
422 }
423
424 ### just grab all the file items
425 } else {
426 @files = $self->get_files;
427 }
428
429 ### nothing found? that's an error
430 unless( scalar @files ) {
431 $self->_error( qq[No files found for ] . $self->_file );
432 return;
433 }
434
435 ### now extract them
436 for my $entry ( @files ) {
437 unless( $self->_extract_file( $entry ) ) {
438 $self->_error(q[Could not extract ']. $entry->full_path .q['] );
439 return;
440 }
441 }
442
443 return @files;
444}
445
446=head2 $tar->extract_file( $file, [$extract_path] )
447
448Write an entry, whose name is equivalent to the file name provided to
449disk. Optionally takes a second parameter, which is the full (unix)
450path (including filename) the entry will be written to.
451
452For example:
453
454 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
455
456Returns true on success, false on failure.
457
458=cut
459
460sub extract_file {
461 my $self = shift;
462 my $file = shift or return;
463 my $alt = shift;
464
465 my $entry = $self->_find_entry( $file )
466 or $self->_error( qq[Could not find an entry for '$file'] ), return;
467
468 return $self->_extract_file( $entry, $alt );
469}
470
471sub _extract_file {
472 my $self = shift;
473 my $entry = shift or return;
474 my $alt = shift;
475 my $cwd = cwd();
476
477 ### you wanted an alternate extraction location ###
478 my $name = defined $alt ? $alt : $entry->full_path;
479
480 ### splitpath takes a bool at the end to indicate
481 ### that it's splitting a dir
482 my ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
483 $entry->is_dir );
484 my $dir;
485 ### is $name an absolute path? ###
486 if( File::Spec->file_name_is_absolute( $dirs ) ) {
487 $dir = $dirs;
488
489 ### it's a relative path ###
490 } else {
491 my @dirs = File::Spec::Unix->splitdir( $dirs );
492 my @cwd = File::Spec->splitdir( $cwd );
493 $dir = File::Spec->catdir(@cwd, @dirs);
494 }
495
496 if( -e $dir && !-d _ ) {
497 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
498 return;
499 }
500
501 unless ( -d _ ) {
502 eval { File::Path::mkpath( $dir, 0, 0777 ) };
503 if( $@ ) {
504 $self->_error( qq[Could not create directory '$dir': $@] );
505 return;
506 }
507 }
508
509 ### we're done if we just needed to create a dir ###
510 return 1 if $entry->is_dir;
511
512 my $full = File::Spec->catfile( $dir, $file );
513
514 if( $entry->is_unknown ) {
515 $self->_error( qq[Unknown file type for file '$full'] );
516 return;
517 }
518
519 if( length $entry->type && $entry->is_file ) {
520 my $fh = IO::File->new;
521 $fh->open( '>' . $full ) or (
522 $self->_error( qq[Could not open file '$full': $!] ),
523 return
524 );
525
526 if( $entry->size ) {
527 binmode $fh;
528 syswrite $fh, $entry->data or (
529 $self->_error( qq[Could not write data to '$full'] ),
530 return
531 );
532 }
533
534 close $fh or (
535 $self->_error( qq[Could not close file '$full'] ),
536 return
537 );
538
539 } else {
540 $self->_make_special_file( $entry, $full ) or return;
541 }
542
543 utime time, $entry->mtime - TIME_OFFSET, $full or
544 $self->_error( qq[Could not update timestamp] );
545
546 if( $CHOWN && CAN_CHOWN ) {
547 chown $entry->uid, $entry->gid, $full or
548 $self->_error( qq[Could not set uid/gid on '$full'] );
549 }
550
551 ### only chmod if we're allowed to, but never chmod symlinks, since they'll
552 ### change the perms on the file they're linking too...
553 if( $CHMOD and not -l $full ) {
554 chmod $entry->mode, $full or
555 $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
556 }
557
558 return 1;
559}
560
561sub _make_special_file {
562 my $self = shift;
563 my $entry = shift or return;
564 my $file = shift; return unless defined $file;
565
566 my $err;
567
568 if( $entry->is_symlink ) {
569 my $fail;
570 if( ON_UNIX ) {
571 symlink( $entry->linkname, $file ) or $fail++;
572
573 } else {
574 $self->_extract_special_file_as_plain_file( $entry, $file )
575 or $fail++;
576 }
577
578 $err = qq[Making symbolink link from '] . $entry->linkname .
579 qq[' to '$file' failed] if $fail;
580
581 } elsif ( $entry->is_hardlink ) {
582 my $fail;
583 if( ON_UNIX ) {
584 link( $entry->linkname, $file ) or $fail++;
585
586 } else {
587 $self->_extract_special_file_as_plain_file( $entry, $file )
588 or $fail++;
589 }
590
591 $err = qq[Making hard link from '] . $entry->linkname .
592 qq[' to '$file' failed] if $fail;
593
594 } elsif ( $entry->is_fifo ) {
595 ON_UNIX && !system('mknod', $file, 'p') or
596 $err = qq[Making fifo ']. $entry->name .qq[' failed];
597
598 } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
599 my $mode = $entry->is_blockdev ? 'b' : 'c';
600
601 ON_UNIX && !system('mknod', $file, $mode,
602 $entry->devmajor, $entry->devminor) or
603 $err = qq[Making block device ']. $entry->name .qq[' (maj=] .
604 $entry->devmajor . qq[ min=] . $entry->devminor .
605 qq[) failed.];
606
607 } elsif ( $entry->is_socket ) {
608 ### the original doesn't do anything special for sockets.... ###
609 1;
610 }
611
612 return $err ? $self->_error( $err ) : 1;
613}
614
615### don't know how to make symlinks, let's just extract the file as
616### a plain file
617sub _extract_special_file_as_plain_file {
618 my $self = shift;
619 my $entry = shift or return;
620 my $file = shift; return unless defined $file;
621
622 my $err;
623 TRY: {
624 my $orig = $self->_find_entry( $entry->linkname );
625
626 unless( $orig ) {
627 $err = qq[Could not find file '] . $entry->linkname .
628 qq[' in memory.];
629 last TRY;
630 }
631
632 ### clone the entry, make it appear as a normal file ###
633 my $clone = $entry->clone;
634 $clone->_downgrade_to_plainfile;
635 $self->_extract_file( $clone, $file ) or last TRY;
636
637 return 1;
638 }
639
640 return $self->_error($err);
641}
642
643=head2 $tar->list_files( [\@properties] )
644
645Returns a list of the names of all the files in the archive.
646
647If C<list_files()> is passed an array reference as its first argument
648it returns a list of hash references containing the requested
649properties of each file. The following list of properties is
650supported: name, size, mtime (last modified date), mode, uid, gid,
651linkname, uname, gname, devmajor, devminor, prefix.
652
653Passing an array reference containing only one element, 'name', is
654special cased to return a list of names rather than a list of hash
655references, making it equivalent to calling C<list_files> without
656arguments.
657
658=cut
659
660sub list_files {
661 my $self = shift;
662 my $aref = shift || [ ];
663
664 unless( $self->_data ) {
665 $self->read() or return;
666 }
667
668 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
669 return map { $_->full_path } @{$self->_data};
670 } else {
671
672 #my @rv;
673 #for my $obj ( @{$self->_data} ) {
674 # push @rv, { map { $_ => $obj->$_() } @$aref };
675 #}
676 #return @rv;
677
678 ### this does the same as the above.. just needs a +{ }
679 ### to make sure perl doesn't confuse it for a block
680 return map { my $o=$_;
681 +{ map { $_ => $o->$_() } @$aref }
682 } @{$self->_data};
683 }
684}
685
686sub _find_entry {
687 my $self = shift;
688 my $file = shift;
689
690 unless( defined $file ) {
691 $self->_error( qq[No file specified] );
692 return;
693 }
694
695 for my $entry ( @{$self->_data} ) {
696 my $path = $entry->full_path;
697 return $entry if $path eq $file;
698 }
699
700 $self->_error( qq[No such file in archive: '$file'] );
701 return;
702}
703
704=head2 $tar->get_files( [@filenames] )
705
706Returns the C<Archive::Tar::File> objects matching the filenames
707provided. If no filename list was passed, all C<Archive::Tar::File>
708objects in the current Tar object are returned.
709
710Please refer to the C<Archive::Tar::File> documentation on how to
711handle these objects.
712
713=cut
714
715sub get_files {
716 my $self = shift;
717
718 return @{ $self->_data } unless @_;
719
720 my @list;
721 for my $file ( @_ ) {
722 push @list, grep { defined } $self->_find_entry( $file );
723 }
724
725 return @list;
726}
727
728=head2 $tar->get_content( $file )
729
730Return the content of the named file.
731
732=cut
733
734sub get_content {
735 my $self = shift;
736 my $entry = $self->_find_entry( shift ) or return;
737
738 return $entry->data;
739}
740
741=head2 $tar->replace_content( $file, $content )
742
743Make the string $content be the content for the file named $file.
744
745=cut
746
747sub replace_content {
748 my $self = shift;
749 my $entry = $self->_find_entry( shift ) or return;
750
751 return $entry->replace_content( shift );
752}
753
754=head2 $tar->rename( $file, $new_name )
755
756Rename the file of the in-memory archive to $new_name.
757
758Note that you must specify a Unix path for $new_name, since per tar
759standard, all files in the archive must be Unix paths.
760
761Returns true on success and false on failure.
762
763=cut
764
765sub rename {
766 my $self = shift;
767 my $file = shift; return unless defined $file;
768 my $new = shift; return unless defined $new;
769
770 my $entry = $self->_find_entry( $file ) or return;
771
772 return $entry->rename( $new );
773}
774
775=head2 $tar->remove (@filenamelist)
776
777Removes any entries with names matching any of the given filenames
778from the in-memory archive. Returns a list of C<Archive::Tar::File>
779objects that remain.
780
781=cut
782
783sub remove {
784 my $self = shift;
785 my @list = @_;
786
787 my %seen = map { $_->full_path => $_ } @{$self->_data};
788 delete $seen{ $_ } for @list;
789
790 $self->_data( [values %seen] );
791
792 return values %seen;
793}
794
795=head2 $tar->clear
796
797C<clear> clears the current in-memory archive. This effectively gives
798you a 'blank' object, ready to be filled again. Note that C<clear>
799only has effect on the object, not the underlying tarfile.
800
801=cut
802
803sub clear {
804 my $self = shift or return;
805
806 $self->_data( [] );
807 $self->_file( '' );
808
809 return 1;
810}
811
812
813=head2 $tar->write ( [$file, $compressed, $prefix] )
814
815Write the in-memory archive to disk. The first argument can either
816be the name of a file or a reference to an already open filehandle (a
817GLOB reference). If the second argument is true, the module will use
818IO::Zlib to write the file in a compressed format. If IO::Zlib is
819not available, the C<write> method will fail and return.
820
821Note that when you pass in a filehandle, the compression argument
822is ignored, as all files are printed verbatim to your filehandle.
823If you wish to enable compression with filehandles, use an
824C<IO::Zlib> filehandle instead.
825
826Specific levels of compression can be chosen by passing the values 2
827through 9 as the second parameter.
828
829The third argument is an optional prefix. All files will be tucked
830away in the directory you specify as prefix. So if you have files
831'a' and 'b' in your archive, and you specify 'foo' as prefix, they
832will be written to the archive as 'foo/a' and 'foo/b'.
833
834If no arguments are given, C<write> returns the entire formatted
835archive as a string, which could be useful if you'd like to stuff the
836archive into a socket or a pipe to gzip or something.
837
838=cut
839
840sub write {
841 my $self = shift;
842 my $file = shift; $file = '' unless defined $file;
843 my $gzip = shift || 0;
844 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
845 my $dummy = '';
846
847 ### only need a handle if we have a file to print to ###
848 my $handle = length($file)
849 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
850 or return )
851 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h }
852 : $HAS_IO_STRING ? IO::String->new
853 : __PACKAGE__->no_string_support();
854
855
856
857 for my $entry ( @{$self->_data} ) {
858 ### entries to be written to the tarfile ###
859 my @write_me;
860
861 ### only now will we change the object to reflect the current state
862 ### of the name and prefix fields -- this needs to be limited to
863 ### write() only!
864 my $clone = $entry->clone;
865
866
867 ### so, if you don't want use to use the prefix, we'll stuff
868 ### everything in the name field instead
869 if( $DO_NOT_USE_PREFIX ) {
870
871 ### you might have an extended prefix, if so, set it in the clone
872 ### XXX is ::Unix right?
873 $clone->name( length $ext_prefix
874 ? File::Spec::Unix->catdir( $ext_prefix,
875 $clone->full_path)
876 : $clone->full_path );
877 $clone->prefix( '' );
878
879 ### otherwise, we'll have to set it properly -- prefix part in the
880 ### prefix and name part in the name field.
881 } else {
882
883 ### split them here, not before!
884 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
885
886 ### you might have an extended prefix, if so, set it in the clone
887 ### XXX is ::Unix right?
888 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
889 if length $ext_prefix;
890
891 $clone->prefix( $prefix );
892 $clone->name( $name );
893 }
894
895 ### names are too long, and will get truncated if we don't add a
896 ### '@LongLink' file...
897 my $make_longlink = ( length($clone->name) > NAME_LENGTH or
898 length($clone->prefix) > PREFIX_LENGTH
899 ) || 0;
900
901 ### perhaps we need to make a longlink file?
902 if( $make_longlink ) {
903 my $longlink = Archive::Tar::File->new(
904 data => LONGLINK_NAME,
905 $clone->full_path,
906 { type => LONGLINK }
907 );
908
909 unless( $longlink ) {
910 $self->_error( qq[Could not create 'LongLink' entry for ] .
911 qq[oversize file '] . $clone->full_path ."'" );
912 return;
913 };
914
915 push @write_me, $longlink;
916 }
917
918 push @write_me, $clone;
919
920 ### write the one, optionally 2 a::t::file objects to the handle
921 for my $clone (@write_me) {
922
923 ### if the file is a symlink, there are 2 options:
924 ### either we leave the symlink intact, but then we don't write any
925 ### data OR we follow the symlink, which means we actually make a
926 ### copy. if we do the latter, we have to change the TYPE of the
927 ### clone to 'FILE'
928 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
929 my $data_ok = !$clone->is_symlink && $clone->has_content;
930
931 ### downgrade to a 'normal' file if it's a symlink we're going to
932 ### treat as a regular file
933 $clone->_downgrade_to_plainfile if $link_ok;
934
935 ### get the header for this block
936 my $header = $self->_format_tar_entry( $clone );
937 unless( $header ) {
938 $self->_error(q[Could not format header for: ] .
939 $clone->full_path );
940 return;
941 }
942
943 unless( print $handle $header ) {
944 $self->_error(q[Could not write header for: ] .
945 $clone->full_path);
946 return;
947 }
948
949 if( $link_ok or $data_ok ) {
950 unless( print $handle $clone->data ) {
951 $self->_error(q[Could not write data for: ] .
952 $clone->full_path);
953 return;
954 }
955
956 ### pad the end of the clone if required ###
957 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
958 }
959
960 } ### done writing these entries
961 }
962
963 ### write the end markers ###
964 print $handle TAR_END x 2 or
965 return $self->_error( qq[Could not write tar end markers] );
966 ### did you want it written to a file, or returned as a string? ###
967 return length($file) ? 1
968 : $HAS_PERLIO ? $dummy
969 : do { seek $handle, 0, 0; local $/; <$handle> }
970}
971
972sub _format_tar_entry {
973 my $self = shift;
974 my $entry = shift or return;
975 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
976 my $no_prefix = shift || 0;
977
978 my $file = $entry->name;
979 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix;
980
981 ### remove the prefix from the file name
982 ### not sure if this is still neeeded --kane
983 ### no it's not -- Archive::Tar::File->_new_from_file will take care of
984 ### this for us. Even worse, this would break if we tried to add a file
985 ### like x/x.
986 #if( length $prefix ) {
987 # $file =~ s/^$match//;
988 #}
989
990 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
991 if length $ext_prefix;
992
993 ### not sure why this is... ###
994 my $l = PREFIX_LENGTH; # is ambiguous otherwise...
995 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
996
997 my $f1 = "%06o"; my $f2 = "%11o";
998
999 ### this might be optimizable with a 'changed' flag in the file objects ###
1000 my $tar = pack (
1001 PACK,
1002 $file,
1003
1004 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1005 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1006
1007 "", # checksum field - space padded a bit down
1008
1009 (map { $entry->$_() } qw[type linkname magic]),
1010
1011 $entry->version || TAR_VERSION,
1012
1013 (map { $entry->$_() } qw[uname gname]),
1014 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1015
1016 ($no_prefix ? '' : $prefix)
1017 );
1018
1019 ### add the checksum ###
1020 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1021
1022 return $tar;
1023}
1024
1025=head2 $tar->add_files( @filenamelist )
1026
1027Takes a list of filenames and adds them to the in-memory archive.
1028
1029The path to the file is automatically converted to a Unix like
1030equivalent for use in the archive, and, if on MacOS, the file's
1031modification time is converted from the MacOS epoch to the Unix epoch.
1032So tar archives created on MacOS with B<Archive::Tar> can be read
1033both with I<tar> on Unix and applications like I<suntar> or
1034I<Stuffit Expander> on MacOS.
1035
1036Be aware that the file's type/creator and resource fork will be lost,
1037which is usually what you want in cross-platform archives.
1038
1039Returns a list of C<Archive::Tar::File> objects that were just added.
1040
1041=cut
1042
1043sub add_files {
1044 my $self = shift;
1045 my @files = @_ or return;
1046
1047 my @rv;
1048 for my $file ( @files ) {
1049 unless( -e $file ) {
1050 $self->_error( qq[No such file: '$file'] );
1051 next;
1052 }
1053
1054 my $obj = Archive::Tar::File->new( file => $file );
1055 unless( $obj ) {
1056 $self->_error( qq[Unable to add file: '$file'] );
1057 next;
1058 }
1059
1060 push @rv, $obj;
1061 }
1062
1063 push @{$self->{_data}}, @rv;
1064
1065 return @rv;
1066}
1067
1068=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1069
1070Takes a filename, a scalar full of data and optionally a reference to
1071a hash with specific options.
1072
1073Will add a file to the in-memory archive, with name C<$filename> and
1074content C<$data>. Specific properties can be set using C<$opthashref>.
1075The following list of properties is supported: name, size, mtime
1076(last modified date), mode, uid, gid, linkname, uname, gname,
1077devmajor, devminor, prefix. (On MacOS, the file's path and
1078modification times are converted to Unix equivalents.)
1079
1080Returns the C<Archive::Tar::File> object that was just added, or
1081C<undef> on failure.
1082
1083=cut
1084
1085sub add_data {
1086 my $self = shift;
1087 my ($file, $data, $opt) = @_;
1088
1089 my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1090 unless( $obj ) {
1091 $self->_error( qq[Unable to add file: '$file'] );
1092 return;
1093 }
1094
1095 push @{$self->{_data}}, $obj;
1096
1097 return $obj;
1098}
1099
1100=head2 $tar->error( [$BOOL] )
1101
1102Returns the current errorstring (usually, the last error reported).
1103If a true value was specified, it will give the C<Carp::longmess>
1104equivalent of the error, in effect giving you a stacktrace.
1105
1106For backwards compatibility, this error is also available as
1107C<$Archive::Tar::error> although it is much recommended you use the
1108method call instead.
1109
1110=cut
1111
1112{
1113 $error = '';
1114 my $longmess;
1115
1116 sub _error {
1117 my $self = shift;
1118 my $msg = $error = shift;
1119 $longmess = Carp::longmess($error);
1120
1121 ### set Archive::Tar::WARN to 0 to disable printing
1122 ### of errors
1123 if( $WARN ) {
1124 carp $DEBUG ? $longmess : $msg;
1125 }
1126
1127 return;
1128 }
1129
1130 sub error {
1131 my $self = shift;
1132 return shift() ? $longmess : $error;
1133 }
1134}
1135
1136
1137=head2 $bool = $tar->has_io_string
1138
1139Returns true if we currently have C<IO::String> support loaded.
1140
1141Either C<IO::String> or C<perlio> support is needed to support writing
1142stringified archives. Currently, C<perlio> is the preffered method, if
1143available.
1144
1145See the C<GLOBAL VARIABLES> section to see how to change this preference.
1146
1147=cut
1148
1149sub has_io_string { return $HAS_IO_STRING; }
1150
1151=head2 $bool = $tar->has_perlio
1152
1153Returns true if we currently have C<perlio> support loaded.
1154
1155This requires C<perl-5.8> or higher, compiled with C<perlio>
1156
1157Either C<IO::String> or C<perlio> support is needed to support writing
1158stringified archives. Currently, C<perlio> is the preffered method, if
1159available.
1160
1161See the C<GLOBAL VARIABLES> section to see how to change this preference.
1162
1163=cut
1164
1165sub has_perlio { return $HAS_PERLIO; }
1166
1167
1168=head1 Class Methods
1169
1170=head2 Archive::Tar->create_archive($file, $compression, @filelist)
1171
1172Creates a tar file from the list of files provided. The first
1173argument can either be the name of the tar file to create or a
1174reference to an open file handle (e.g. a GLOB reference).
1175
1176The second argument specifies the level of compression to be used, if
1177any. Compression of tar files requires the installation of the
1178IO::Zlib module. Specific levels of compression may be
1179requested by passing a value between 2 and 9 as the second argument.
1180Any other value evaluating as true will result in the default
1181compression level being used.
1182
1183Note that when you pass in a filehandle, the compression argument
1184is ignored, as all files are printed verbatim to your filehandle.
1185If you wish to enable compression with filehandles, use an
1186C<IO::Zlib> filehandle instead.
1187
1188The remaining arguments list the files to be included in the tar file.
1189These files must all exist. Any files which don't exist or can't be
1190read are silently ignored.
1191
1192If the archive creation fails for any reason, C<create_archive> will
1193return false. Please use the C<error> method to find the cause of the
1194failure.
1195
1196Note that this method does not write C<on the fly> as it were; it
1197still reads all the files into memory before writing out the archive.
1198Consult the FAQ below if this is a problem.
1199
1200=cut
1201
1202sub create_archive {
1203 my $class = shift;
1204
1205 my $file = shift; return unless defined $file;
1206 my $gzip = shift || 0;
1207 my @files = @_;
1208
1209 unless( @files ) {
1210 return $class->_error( qq[Cowardly refusing to create empty archive!] );
1211 }
1212
1213 my $tar = $class->new;
1214 $tar->add_files( @files );
1215 return $tar->write( $file, $gzip );
1216}
1217
1218=head2 Archive::Tar->list_archive ($file, $compressed, [\@properties])
1219
1220Returns a list of the names of all the files in the archive. The
1221first argument can either be the name of the tar file to list or a
1222reference to an open file handle (e.g. a GLOB reference).
1223
1224If C<list_archive()> is passed an array reference as its third
1225argument it returns a list of hash references containing the requested
1226properties of each file. The following list of properties is
1227supported: name, size, mtime (last modified date), mode, uid, gid,
1228linkname, uname, gname, devmajor, devminor, prefix.
1229
1230Passing an array reference containing only one element, 'name', is
1231special cased to return a list of names rather than a list of hash
1232references.
1233
1234=cut
1235
1236sub list_archive {
1237 my $class = shift;
1238 my $file = shift; return unless defined $file;
1239 my $gzip = shift || 0;
1240
1241 my $tar = $class->new($file, $gzip);
1242 return unless $tar;
1243
1244 return $tar->list_files( @_ );
1245}
1246
1247=head2 Archive::Tar->extract_archive ($file, $gzip)
1248
1249Extracts the contents of the tar file. The first argument can either
1250be the name of the tar file to create or a reference to an open file
1251handle (e.g. a GLOB reference). All relative paths in the tar file will
1252be created underneath the current working directory.
1253
1254C<extract_archive> will return a list of files it extracted.
1255If the archive extraction fails for any reason, C<extract_archive>
1256will return false. Please use the C<error> method to find the cause
1257of the failure.
1258
1259=cut
1260
1261sub extract_archive {
1262 my $class = shift;
1263 my $file = shift; return unless defined $file;
1264 my $gzip = shift || 0;
1265
1266 my $tar = $class->new( ) or return;
1267
1268 return $tar->read( $file, $gzip, { extract => 1 } );
1269}
1270
1271=head2 Archive::Tar->can_handle_compressed_files
1272
1273A simple checking routine, which will return true if C<Archive::Tar>
1274is able to uncompress compressed archives on the fly with C<IO::Zlib>,
1275or false if C<IO::Zlib> is not installed.
1276
1277You can use this as a shortcut to determine whether C<Archive::Tar>
1278will do what you think before passing compressed archives to its
1279C<read> method.
1280
1281=cut
1282
1283sub can_handle_compressed_files { return ZLIB ? 1 : 0 }
1284
1285sub no_string_support {
1286 croak("You have to install IO::String to support writing archives to strings");
1287}
1288
12891;
1290
1291__END__
1292
1293=head1 GLOBAL VARIABLES
1294
1295=head2 $Archive::Tar::FOLLOW_SYMLINK
1296
1297Set this variable to C<1> to make C<Archive::Tar> effectively make a
1298copy of the file when extracting. Default is C<0>, which
1299means the symlink stays intact. Of course, you will have to pack the
1300file linked to as well.
1301
1302This option is checked when you write out the tarfile using C<write>
1303or C<create_archive>.
1304
1305This works just like C</bin/tar>'s C<-h> option.
1306
1307=head2 $Archive::Tar::CHOWN
1308
1309By default, C<Archive::Tar> will try to C<chown> your files if it is
1310able to. In some cases, this may not be desired. In that case, set
1311this variable to C<0> to disable C<chown>-ing, even if it were
1312possible.
1313
1314The default is C<1>.
1315
1316=head2 $Archive::Tar::CHMOD
1317
1318By default, C<Archive::Tar> will try to C<chmod> your files to
1319whatever mode was specified for the particular file in the archive.
1320In some cases, this may not be desired. In that case, set this
1321variable to C<0> to disable C<chmod>-ing.
1322
1323The default is C<1>.
1324
1325=head2 $Archive::Tar::DO_NOT_USE_PREFIX
1326
1327By default, C<Archive::Tar> will try to put paths that are over
1328100 characters in the C<prefix> field of your tar header. However,
1329some older tar programs do not implement this spec. To retain
1330compatibility with these older versions, you can set the
1331C<$DO_NOT_USE_PREFIX> variable to a true value, and C<Archive::Tar>
1332will use an alternate way of dealing with paths over 100 characters
1333by using the C<GNU Extended Header> feature.
1334
1335The default is C<0>.
1336
1337=head2 $Archive::Tar::DEBUG
1338
1339Set this variable to C<1> to always get the C<Carp::longmess> output
1340of the warnings, instead of the regular C<carp>. This is the same
1341message you would get by doing:
1342
1343 $tar->error(1);
1344
1345Defaults to C<0>.
1346
1347=head2 $Archive::Tar::WARN
1348
1349Set this variable to C<0> if you do not want any warnings printed.
1350Personally I recommend against doing this, but people asked for the
1351option. Also, be advised that this is of course not threadsafe.
1352
1353Defaults to C<1>.
1354
1355=head2 $Archive::Tar::error
1356
1357Holds the last reported error. Kept for historical reasons, but its
1358use is very much discouraged. Use the C<error()> method instead:
1359
1360 warn $tar->error unless $tar->extract;
1361
1362=head2 $Archive::Tar::HAS_PERLIO
1363
1364This variable holds a boolean indicating if we currently have
1365C<perlio> support loaded. This will be enabled for any perl
1366greater than C<5.8> compiled with C<perlio>.
1367
1368If you feel strongly about disabling it, set this variable to
1369C<false>. Note that you will then need C<IO::String> installed
1370to support writing stringified archives.
1371
1372Don't change this variable unless you B<really> know what you're
1373doing.
1374
1375=head2 $Archive::Tar::HAS_IO_STRING
1376
1377This variable holds a boolean indicating if we currently have
1378C<IO::String> support loaded. This will be enabled for any perl
1379that has a loadable C<IO::String> module.
1380
1381If you feel strongly about disabling it, set this variable to
1382C<false>. Note that you will then need C<perlio> support from
1383your perl to be able to write stringified archives.
1384
1385Don't change this variable unless you B<really> know what you're
1386doing.
1387
1388=head1 FAQ
1389
1390=over 4
1391
1392=item What's the minimum perl version required to run Archive::Tar?
1393
1394You will need perl version 5.005_03 or newer.
1395
1396=item Isn't Archive::Tar slow?
1397
1398Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
1399However, it's very portable. If speed is an issue, consider using
1400C</bin/tar> instead.
1401
1402=item Isn't Archive::Tar heavier on memory than /bin/tar?
1403
1404Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
1405C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
1406choice but to read the archive into memory.
1407This is ok if you want to do in-memory manipulation of the archive.
1408If you just want to extract, use the C<extract_archive> class method
1409instead. It will optimize and write to disk immediately.
1410
1411=item Can't you lazy-load data instead?
1412
1413No, not easily. See previous question.
1414
1415=item How much memory will an X kb tar file need?
1416
1417Probably more than X kb, since it will all be read into memory. If
1418this is a problem, and you don't need to do in memory manipulation
1419of the archive, consider using C</bin/tar> instead.
1420
1421=item What do you do with unsupported filetypes in an archive?
1422
1423C<Unix> has a few filetypes that aren't supported on other platforms,
1424like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
1425try to make a copy of the original file, rather than throwing an error.
1426
1427This does require you to read the entire archive in to memory first,
1428since otherwise we wouldn't know what data to fill the copy with.
1429(This means that you cannot use the class methods on archives that
1430have incompatible filetypes and still expect things to work).
1431
1432For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
1433the extraction of this particular item didn't work.
1434
1435=back
1436
1437=head1 TODO
1438
1439=over 4
1440
1441=item Check if passed in handles are open for read/write
1442
1443Currently I don't know of any portable pure perl way to do this.
1444Suggestions welcome.
1445
1446=back
1447
1448=head1 AUTHOR
1449
1450This module by
1451Jos Boumans E<lt>kane@cpan.orgE<gt>.
1452
1453=head1 ACKNOWLEDGEMENTS
1454
1455Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney and
1456especially Andrew Savige for their help and suggestions.
1457
1458=head1 COPYRIGHT
1459
1460This module is
1461copyright (c) 2002 Jos Boumans E<lt>kane@cpan.orgE<gt>.
1462All rights reserved.
1463
1464This library is free software;
1465you may redistribute and/or modify it under the same
1466terms as Perl itself.
1467
1468=cut