Add and remove files forgotten in change #27384
[p5sagit/p5-mst-13.2.git] / lib / Archive / Tar.pm
CommitLineData
39713df4 1### the gnu tar specification:
2### http://www.gnu.org/software/tar/manual/html_mono/tar.html
3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
10use strict;
11use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
12 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING];
13
14$DEBUG = 0;
15$WARN = 1;
16$FOLLOW_SYMLINK = 0;
b3200c5d 17$VERSION = "1.28";
39713df4 18$CHOWN = 1;
19$CHMOD = 1;
20$DO_NOT_USE_PREFIX = 0;
21
22BEGIN {
23 use Config;
24 $HAS_PERLIO = $Config::Config{useperlio};
25
26 ### try and load IO::String anyway, so you can dynamically
27 ### switch between perlio and IO::String
28 eval {
29 require IO::String;
30 import IO::String;
31 };
32 $HAS_IO_STRING = $@ ? 0 : 1;
33
34}
35
36use Cwd;
37use IO::File;
38use Carp qw(carp croak);
39use File::Spec ();
40use File::Spec::Unix ();
41use File::Path ();
42
43use Archive::Tar::File;
44use Archive::Tar::Constant;
45
46=head1 NAME
47
48Archive::Tar - module for manipulations of tar archives
49
50=head1 SYNOPSIS
51
52 use Archive::Tar;
53 my $tar = Archive::Tar->new;
54
55 $tar->read('origin.tgz',1);
56 $tar->extract();
57
58 $tar->add_files('file/foo.pl', 'docs/README');
59 $tar->add_data('file/baz.txt', 'This is the contents now');
60
61 $tar->rename('oldname', 'new/file/name');
62
63 $tar->write('files.tar');
64
65=head1 DESCRIPTION
66
67Archive::Tar provides an object oriented mechanism for handling tar
68files. It provides class methods for quick and easy files handling
69while also allowing for the creation of tar file objects for custom
70manipulation. If you have the IO::Zlib module installed,
71Archive::Tar will also support compressed or gzipped tar files.
72
73An object of class Archive::Tar represents a .tar(.gz) archive full
74of files and things.
75
76=head1 Object Methods
77
78=head2 Archive::Tar->new( [$file, $compressed] )
79
80Returns a new Tar object. If given any arguments, C<new()> calls the
81C<read()> method automatically, passing on the arguments provided to
82the C<read()> method.
83
84If C<new()> is invoked with arguments and the C<read()> method fails
85for any reason, C<new()> returns undef.
86
87=cut
88
89my $tmpl = {
90 _data => [ ],
91 _file => 'Unknown',
92};
93
94### install get/set accessors for this object.
95for my $key ( keys %$tmpl ) {
96 no strict 'refs';
97 *{__PACKAGE__."::$key"} = sub {
98 my $self = shift;
99 $self->{$key} = $_[0] if @_;
100 return $self->{$key};
101 }
102}
103
104sub new {
105 my $class = shift;
106 $class = ref $class if ref $class;
107
108 ### copying $tmpl here since a shallow copy makes it use the
109 ### same aref, causing for files to remain in memory always.
110 my $obj = bless { _data => [ ], _file => 'Unknown' }, $class;
111
112 if (@_) {
81a5970e 113 unless ( $obj->read( @_ ) ) {
114 $obj->_error(qq[No data could be read from file]);
115 return;
116 }
39713df4 117 }
118
119 return $obj;
120}
121
122=head2 $tar->read ( $filename|$handle, $compressed, {opt => 'val'} )
123
124Read the given tar file into memory.
125The first argument can either be the name of a file or a reference to
126an already open filehandle (or an IO::Zlib object if it's compressed)
127The second argument indicates whether the file referenced by the first
128argument is compressed.
129
130The C<read> will I<replace> any previous content in C<$tar>!
131
132The second argument may be considered optional if IO::Zlib is
133installed, since it will transparently Do The Right Thing.
134Archive::Tar will warn if you try to pass a compressed file if
135IO::Zlib is not available and simply return.
136
b3200c5d 137Note that you can currently B<not> pass a C<gzip> compressed
138filehandle, which is not opened with C<IO::Zlib>, nor a string
139containing the full archive information (either compressed or
140uncompressed). These are worth while features, but not currently
141implemented. See the C<TODO> section.
142
39713df4 143The third argument can be a hash reference with options. Note that
144all options are case-sensitive.
145
146=over 4
147
148=item limit
149
150Do not read more than C<limit> files. This is useful if you have
151very big archives, and are only interested in the first few files.
152
153=item extract
154
155If set to true, immediately extract entries when reading them. This
156gives you the same memory break as the C<extract_archive> function.
157Note however that entries will not be read into memory, but written
158straight to disk.
159
160=back
161
162All files are stored internally as C<Archive::Tar::File> objects.
163Please consult the L<Archive::Tar::File> documentation for details.
164
165Returns the number of files read in scalar context, and a list of
166C<Archive::Tar::File> objects in list context.
167
168=cut
169
170sub read {
171 my $self = shift;
172 my $file = shift;
173 my $gzip = shift || 0;
174 my $opts = shift || {};
175
176 unless( defined $file ) {
177 $self->_error( qq[No file to read from!] );
178 return;
179 } else {
180 $self->_file( $file );
181 }
182
183 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
184 or return;
185
186 my $data = $self->_read_tar( $handle, $opts ) or return;
187
188 $self->_data( $data );
189
190 return wantarray ? @$data : scalar @$data;
191}
192
193sub _get_handle {
194 my $self = shift;
195 my $file = shift; return unless defined $file;
196 return $file if ref $file;
197
198 my $gzip = shift || 0;
199 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only
200
201 my $fh; my $bin;
202
203 ### only default to ZLIB if we're not trying to /write/ to a handle ###
204 if( ZLIB and $gzip || MODE_READ->( $mode ) ) {
205
206 ### IO::Zlib will Do The Right Thing, even when passed
207 ### a plain file ###
208 $fh = new IO::Zlib;
209
210 } else {
211 if( $gzip ) {
212 $self->_error(qq[Compression not available - Install IO::Zlib!]);
213 return;
214
215 } else {
216 $fh = new IO::File;
217 $bin++;
218 }
219 }
220
221 unless( $fh->open( $file, $mode ) ) {
222 $self->_error( qq[Could not create filehandle for '$file': $!!] );
223 return;
224 }
225
226 binmode $fh if $bin;
227
228 return $fh;
229}
230
231sub _read_tar {
232 my $self = shift;
233 my $handle = shift or return;
234 my $opts = shift || {};
235
236 my $count = $opts->{limit} || 0;
237 my $extract = $opts->{extract} || 0;
238
239 ### set a cap on the amount of files to extract ###
240 my $limit = 0;
241 $limit = 1 if $count > 0;
242
243 my $tarfile = [ ];
244 my $chunk;
245 my $read = 0;
246 my $real_name; # to set the name of a file when
247 # we're encountering @longlink
248 my $data;
249
250 LOOP:
251 while( $handle->read( $chunk, HEAD ) ) {
252 ### IO::Zlib doesn't support this yet
253 my $offset = eval { tell $handle } || 'unknown';
254
255 unless( $read++ ) {
256 my $gzip = GZIP_MAGIC_NUM;
257 if( $chunk =~ /$gzip/ ) {
258 $self->_error( qq[Cannot read compressed format in tar-mode] );
259 return;
260 }
261 }
262
263 ### if we can't read in all bytes... ###
264 last if length $chunk != HEAD;
265
266 ### Apparently this should really be two blocks of 512 zeroes,
267 ### but GNU tar sometimes gets it wrong. See comment in the
268 ### source code (tar.c) to GNU cpio.
269 next if $chunk eq TAR_END;
270
81a5970e 271 ### pass the realname, so we can set it 'proper' right away
272 ### some of the heuristics are done on the name, so important
273 ### to set it ASAP
39713df4 274 my $entry;
81a5970e 275 { my %extra_args = ();
276 $extra_args{'name'} = $$real_name if defined $real_name;
277
278 unless( $entry = Archive::Tar::File->new( chunk => $chunk,
279 %extra_args )
280 ) {
281 $self->_error( qq[Couldn't read chunk at offset $offset] );
282 next;
283 }
39713df4 284 }
285
286 ### ignore labels:
287 ### http://www.gnu.org/manual/tar/html_node/tar_139.html
288 next if $entry->is_label;
289
290 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
291
292 if ( $entry->is_file && !$entry->validate ) {
293 ### sometimes the chunk is rather fux0r3d and a whole 512
294 ### bytes ends p in the ->name area.
295 ### clean it up, if need be
296 my $name = $entry->name;
297 $name = substr($name, 0, 100) if length $name > 100;
298 $name =~ s/\n/ /g;
299
300 $self->_error( $name . qq[: checksum error] );
301 next LOOP;
302 }
303
304 my $block = BLOCK_SIZE->( $entry->size );
305
306 $data = $entry->get_content_by_ref;
307
308 ### just read everything into memory
309 ### can't do lazy loading since IO::Zlib doesn't support 'seek'
310 ### this is because Compress::Zlib doesn't support it =/
311 ### this reads in the whole data in one read() call.
312 if( $handle->read( $$data, $block ) < $block ) {
313 $self->_error( qq[Read error on tarfile (missing data) '].
314 $entry->full_path ."' at offset $offset" );
315 next;
316 }
317
318 ### throw away trailing garbage ###
319 substr ($$data, $entry->size) = "";
320
321 ### part II of the @LongLink munging -- need to do /after/
322 ### the checksum check.
323 if( $entry->is_longlink ) {
324 ### weird thing in tarfiles -- if the file is actually a
325 ### @LongLink, the data part seems to have a trailing ^@
326 ### (unprintable) char. to display, pipe output through less.
327 ### but that doesn't *always* happen.. so check if the last
328 ### character is a control character, and if so remove it
329 ### at any rate, we better remove that character here, or tests
330 ### like 'eq' and hashlook ups based on names will SO not work
331 ### remove it by calculating the proper size, and then
332 ### tossing out everything that's longer than that size.
333
334 ### count number of nulls
335 my $nulls = $$data =~ tr/\0/\0/;
336
337 ### cut data + size by that many bytes
338 $entry->size( $entry->size - $nulls );
339 substr ($$data, $entry->size) = "";
340 }
341 }
342
343 ### clean up of the entries.. posix tar /apparently/ has some
344 ### weird 'feature' that allows for filenames > 255 characters
345 ### they'll put a header in with as name '././@LongLink' and the
346 ### contents will be the name of the /next/ file in the archive
347 ### pretty crappy and kludgy if you ask me
348
349 ### set the name for the next entry if this is a @LongLink;
350 ### this is one ugly hack =/ but needed for direct extraction
351 if( $entry->is_longlink ) {
352 $real_name = $data;
353 next;
354 } elsif ( defined $real_name ) {
355 $entry->name( $$real_name );
356 $entry->prefix('');
357 undef $real_name;
358 }
359
360 $self->_extract_file( $entry ) if $extract
361 && !$entry->is_longlink
362 && !$entry->is_unknown
363 && !$entry->is_label;
364
365 ### Guard against tarfiles with garbage at the end
366 last LOOP if $entry->name eq '';
367
368 ### push only the name on the rv if we're extracting
369 ### -- for extract_archive
370 push @$tarfile, ($extract ? $entry->name : $entry);
371
372 if( $limit ) {
373 $count-- unless $entry->is_longlink || $entry->is_dir;
374 last LOOP unless $count;
375 }
376 } continue {
377 undef $data;
378 }
379
380 return $tarfile;
381}
382
383=head2 $tar->contains_file( $filename )
384
385Check if the archive contains a certain file.
386It will return true if the file is in the archive, false otherwise.
387
388Note however, that this function does an exact match using C<eq>
389on the full path. So it cannot compensate for case-insensitive file-
390systems or compare 2 paths to see if they would point to the same
391underlying file.
392
393=cut
394
395sub contains_file {
396 my $self = shift;
397 my $full = shift or return;
398
399 return 1 if $self->_find_entry($full);
400 return;
401}
402
403=head2 $tar->extract( [@filenames] )
404
405Write files whose names are equivalent to any of the names in
406C<@filenames> to disk, creating subdirectories as necessary. This
407might not work too well under VMS.
408Under MacPerl, the file's modification time will be converted to the
409MacOS zero of time, and appropriate conversions will be done to the
410path. However, the length of each element of the path is not
411inspected to see whether it's longer than MacOS currently allows (32
412characters).
413
414If C<extract> is called without a list of file names, the entire
415contents of the archive are extracted.
416
417Returns a list of filenames extracted.
418
419=cut
420
421sub extract {
422 my $self = shift;
423 my @files;
424
425 ### you requested the extraction of only certian files
426 if( @_ ) {
427 for my $file (@_) {
428 my $found;
429 for my $entry ( @{$self->_data} ) {
430 next unless $file eq $entry->full_path;
431
432 ### we found the file you're looking for
433 push @files, $entry;
434 $found++;
435 }
436
437 unless( $found ) {
438 return $self->_error( qq[Could not find '$file' in archive] );
439 }
440 }
441
442 ### just grab all the file items
443 } else {
444 @files = $self->get_files;
445 }
446
447 ### nothing found? that's an error
448 unless( scalar @files ) {
449 $self->_error( qq[No files found for ] . $self->_file );
450 return;
451 }
452
453 ### now extract them
454 for my $entry ( @files ) {
455 unless( $self->_extract_file( $entry ) ) {
456 $self->_error(q[Could not extract ']. $entry->full_path .q['] );
457 return;
458 }
459 }
460
461 return @files;
462}
463
464=head2 $tar->extract_file( $file, [$extract_path] )
465
466Write an entry, whose name is equivalent to the file name provided to
467disk. Optionally takes a second parameter, which is the full (unix)
468path (including filename) the entry will be written to.
469
470For example:
471
472 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
473
474Returns true on success, false on failure.
475
476=cut
477
478sub extract_file {
479 my $self = shift;
480 my $file = shift or return;
481 my $alt = shift;
482
483 my $entry = $self->_find_entry( $file )
484 or $self->_error( qq[Could not find an entry for '$file'] ), return;
485
486 return $self->_extract_file( $entry, $alt );
487}
488
489sub _extract_file {
490 my $self = shift;
491 my $entry = shift or return;
492 my $alt = shift;
493 my $cwd = cwd();
494
495 ### you wanted an alternate extraction location ###
496 my $name = defined $alt ? $alt : $entry->full_path;
497
498 ### splitpath takes a bool at the end to indicate
499 ### that it's splitting a dir
7f10f74b 500 my ($vol,$dirs,$file);
501 if ( defined $alt ) { # It's a local-OS path
502 ($vol,$dirs,$file) = File::Spec->splitpath( $alt,
503 $entry->is_dir );
504 } else {
505 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
506 $entry->is_dir );
507 }
508
39713df4 509 my $dir;
510 ### is $name an absolute path? ###
511 if( File::Spec->file_name_is_absolute( $dirs ) ) {
512 $dir = $dirs;
513
514 ### it's a relative path ###
515 } else {
516 my @dirs = File::Spec::Unix->splitdir( $dirs );
517 my @cwd = File::Spec->splitdir( $cwd );
81a5970e 518 $dir = File::Spec->catdir( @cwd, @dirs );
519
520 # catdir() returns undef if the path is longer than 255 chars on VMS
521 unless ( defined $dir ) {
522 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
523 return;
524 }
525
39713df4 526 }
527
528 if( -e $dir && !-d _ ) {
529 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
530 return;
531 }
532
533 unless ( -d _ ) {
534 eval { File::Path::mkpath( $dir, 0, 0777 ) };
535 if( $@ ) {
536 $self->_error( qq[Could not create directory '$dir': $@] );
537 return;
538 }
539 }
540
541 ### we're done if we just needed to create a dir ###
542 return 1 if $entry->is_dir;
543
544 my $full = File::Spec->catfile( $dir, $file );
545
546 if( $entry->is_unknown ) {
547 $self->_error( qq[Unknown file type for file '$full'] );
548 return;
549 }
550
551 if( length $entry->type && $entry->is_file ) {
552 my $fh = IO::File->new;
553 $fh->open( '>' . $full ) or (
554 $self->_error( qq[Could not open file '$full': $!] ),
555 return
556 );
557
558 if( $entry->size ) {
559 binmode $fh;
560 syswrite $fh, $entry->data or (
561 $self->_error( qq[Could not write data to '$full'] ),
562 return
563 );
564 }
565
566 close $fh or (
567 $self->_error( qq[Could not close file '$full'] ),
568 return
569 );
570
571 } else {
572 $self->_make_special_file( $entry, $full ) or return;
573 }
574
575 utime time, $entry->mtime - TIME_OFFSET, $full or
576 $self->_error( qq[Could not update timestamp] );
577
578 if( $CHOWN && CAN_CHOWN ) {
579 chown $entry->uid, $entry->gid, $full or
580 $self->_error( qq[Could not set uid/gid on '$full'] );
581 }
582
583 ### only chmod if we're allowed to, but never chmod symlinks, since they'll
584 ### change the perms on the file they're linking too...
585 if( $CHMOD and not -l $full ) {
586 chmod $entry->mode, $full or
587 $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
588 }
589
590 return 1;
591}
592
593sub _make_special_file {
594 my $self = shift;
595 my $entry = shift or return;
596 my $file = shift; return unless defined $file;
597
598 my $err;
599
600 if( $entry->is_symlink ) {
601 my $fail;
602 if( ON_UNIX ) {
603 symlink( $entry->linkname, $file ) or $fail++;
604
605 } else {
606 $self->_extract_special_file_as_plain_file( $entry, $file )
607 or $fail++;
608 }
609
610 $err = qq[Making symbolink link from '] . $entry->linkname .
611 qq[' to '$file' failed] if $fail;
612
613 } elsif ( $entry->is_hardlink ) {
614 my $fail;
615 if( ON_UNIX ) {
616 link( $entry->linkname, $file ) or $fail++;
617
618 } else {
619 $self->_extract_special_file_as_plain_file( $entry, $file )
620 or $fail++;
621 }
622
623 $err = qq[Making hard link from '] . $entry->linkname .
624 qq[' to '$file' failed] if $fail;
625
626 } elsif ( $entry->is_fifo ) {
627 ON_UNIX && !system('mknod', $file, 'p') or
628 $err = qq[Making fifo ']. $entry->name .qq[' failed];
629
630 } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
631 my $mode = $entry->is_blockdev ? 'b' : 'c';
632
633 ON_UNIX && !system('mknod', $file, $mode,
634 $entry->devmajor, $entry->devminor) or
635 $err = qq[Making block device ']. $entry->name .qq[' (maj=] .
636 $entry->devmajor . qq[ min=] . $entry->devminor .
637 qq[) failed.];
638
639 } elsif ( $entry->is_socket ) {
640 ### the original doesn't do anything special for sockets.... ###
641 1;
642 }
643
644 return $err ? $self->_error( $err ) : 1;
645}
646
647### don't know how to make symlinks, let's just extract the file as
648### a plain file
649sub _extract_special_file_as_plain_file {
650 my $self = shift;
651 my $entry = shift or return;
652 my $file = shift; return unless defined $file;
653
654 my $err;
655 TRY: {
656 my $orig = $self->_find_entry( $entry->linkname );
657
658 unless( $orig ) {
659 $err = qq[Could not find file '] . $entry->linkname .
660 qq[' in memory.];
661 last TRY;
662 }
663
664 ### clone the entry, make it appear as a normal file ###
665 my $clone = $entry->clone;
666 $clone->_downgrade_to_plainfile;
667 $self->_extract_file( $clone, $file ) or last TRY;
668
669 return 1;
670 }
671
672 return $self->_error($err);
673}
674
675=head2 $tar->list_files( [\@properties] )
676
677Returns a list of the names of all the files in the archive.
678
679If C<list_files()> is passed an array reference as its first argument
680it returns a list of hash references containing the requested
681properties of each file. The following list of properties is
682supported: name, size, mtime (last modified date), mode, uid, gid,
683linkname, uname, gname, devmajor, devminor, prefix.
684
685Passing an array reference containing only one element, 'name', is
686special cased to return a list of names rather than a list of hash
687references, making it equivalent to calling C<list_files> without
688arguments.
689
690=cut
691
692sub list_files {
693 my $self = shift;
694 my $aref = shift || [ ];
695
696 unless( $self->_data ) {
697 $self->read() or return;
698 }
699
700 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
701 return map { $_->full_path } @{$self->_data};
702 } else {
703
704 #my @rv;
705 #for my $obj ( @{$self->_data} ) {
706 # push @rv, { map { $_ => $obj->$_() } @$aref };
707 #}
708 #return @rv;
709
710 ### this does the same as the above.. just needs a +{ }
711 ### to make sure perl doesn't confuse it for a block
712 return map { my $o=$_;
713 +{ map { $_ => $o->$_() } @$aref }
714 } @{$self->_data};
715 }
716}
717
718sub _find_entry {
719 my $self = shift;
720 my $file = shift;
721
722 unless( defined $file ) {
723 $self->_error( qq[No file specified] );
724 return;
725 }
726
727 for my $entry ( @{$self->_data} ) {
728 my $path = $entry->full_path;
729 return $entry if $path eq $file;
730 }
731
732 $self->_error( qq[No such file in archive: '$file'] );
733 return;
734}
735
736=head2 $tar->get_files( [@filenames] )
737
738Returns the C<Archive::Tar::File> objects matching the filenames
739provided. If no filename list was passed, all C<Archive::Tar::File>
740objects in the current Tar object are returned.
741
742Please refer to the C<Archive::Tar::File> documentation on how to
743handle these objects.
744
745=cut
746
747sub get_files {
748 my $self = shift;
749
750 return @{ $self->_data } unless @_;
751
752 my @list;
753 for my $file ( @_ ) {
754 push @list, grep { defined } $self->_find_entry( $file );
755 }
756
757 return @list;
758}
759
760=head2 $tar->get_content( $file )
761
762Return the content of the named file.
763
764=cut
765
766sub get_content {
767 my $self = shift;
768 my $entry = $self->_find_entry( shift ) or return;
769
770 return $entry->data;
771}
772
773=head2 $tar->replace_content( $file, $content )
774
775Make the string $content be the content for the file named $file.
776
777=cut
778
779sub replace_content {
780 my $self = shift;
781 my $entry = $self->_find_entry( shift ) or return;
782
783 return $entry->replace_content( shift );
784}
785
786=head2 $tar->rename( $file, $new_name )
787
788Rename the file of the in-memory archive to $new_name.
789
790Note that you must specify a Unix path for $new_name, since per tar
791standard, all files in the archive must be Unix paths.
792
793Returns true on success and false on failure.
794
795=cut
796
797sub rename {
798 my $self = shift;
799 my $file = shift; return unless defined $file;
800 my $new = shift; return unless defined $new;
801
802 my $entry = $self->_find_entry( $file ) or return;
803
804 return $entry->rename( $new );
805}
806
807=head2 $tar->remove (@filenamelist)
808
809Removes any entries with names matching any of the given filenames
810from the in-memory archive. Returns a list of C<Archive::Tar::File>
811objects that remain.
812
813=cut
814
815sub remove {
816 my $self = shift;
817 my @list = @_;
818
819 my %seen = map { $_->full_path => $_ } @{$self->_data};
820 delete $seen{ $_ } for @list;
821
822 $self->_data( [values %seen] );
823
824 return values %seen;
825}
826
827=head2 $tar->clear
828
829C<clear> clears the current in-memory archive. This effectively gives
830you a 'blank' object, ready to be filled again. Note that C<clear>
831only has effect on the object, not the underlying tarfile.
832
833=cut
834
835sub clear {
836 my $self = shift or return;
837
838 $self->_data( [] );
839 $self->_file( '' );
840
841 return 1;
842}
843
844
845=head2 $tar->write ( [$file, $compressed, $prefix] )
846
847Write the in-memory archive to disk. The first argument can either
848be the name of a file or a reference to an already open filehandle (a
849GLOB reference). If the second argument is true, the module will use
850IO::Zlib to write the file in a compressed format. If IO::Zlib is
851not available, the C<write> method will fail and return.
852
853Note that when you pass in a filehandle, the compression argument
854is ignored, as all files are printed verbatim to your filehandle.
855If you wish to enable compression with filehandles, use an
856C<IO::Zlib> filehandle instead.
857
858Specific levels of compression can be chosen by passing the values 2
859through 9 as the second parameter.
860
861The third argument is an optional prefix. All files will be tucked
862away in the directory you specify as prefix. So if you have files
863'a' and 'b' in your archive, and you specify 'foo' as prefix, they
864will be written to the archive as 'foo/a' and 'foo/b'.
865
866If no arguments are given, C<write> returns the entire formatted
867archive as a string, which could be useful if you'd like to stuff the
868archive into a socket or a pipe to gzip or something.
869
870=cut
871
872sub write {
873 my $self = shift;
874 my $file = shift; $file = '' unless defined $file;
875 my $gzip = shift || 0;
876 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
877 my $dummy = '';
878
879 ### only need a handle if we have a file to print to ###
880 my $handle = length($file)
881 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
882 or return )
883 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h }
884 : $HAS_IO_STRING ? IO::String->new
885 : __PACKAGE__->no_string_support();
886
887
888
889 for my $entry ( @{$self->_data} ) {
890 ### entries to be written to the tarfile ###
891 my @write_me;
892
893 ### only now will we change the object to reflect the current state
894 ### of the name and prefix fields -- this needs to be limited to
895 ### write() only!
896 my $clone = $entry->clone;
897
898
899 ### so, if you don't want use to use the prefix, we'll stuff
900 ### everything in the name field instead
901 if( $DO_NOT_USE_PREFIX ) {
902
903 ### you might have an extended prefix, if so, set it in the clone
904 ### XXX is ::Unix right?
905 $clone->name( length $ext_prefix
906 ? File::Spec::Unix->catdir( $ext_prefix,
907 $clone->full_path)
908 : $clone->full_path );
909 $clone->prefix( '' );
910
911 ### otherwise, we'll have to set it properly -- prefix part in the
912 ### prefix and name part in the name field.
913 } else {
914
915 ### split them here, not before!
916 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
917
918 ### you might have an extended prefix, if so, set it in the clone
919 ### XXX is ::Unix right?
920 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
921 if length $ext_prefix;
922
923 $clone->prefix( $prefix );
924 $clone->name( $name );
925 }
926
927 ### names are too long, and will get truncated if we don't add a
928 ### '@LongLink' file...
929 my $make_longlink = ( length($clone->name) > NAME_LENGTH or
930 length($clone->prefix) > PREFIX_LENGTH
931 ) || 0;
932
933 ### perhaps we need to make a longlink file?
934 if( $make_longlink ) {
935 my $longlink = Archive::Tar::File->new(
936 data => LONGLINK_NAME,
937 $clone->full_path,
938 { type => LONGLINK }
939 );
940
941 unless( $longlink ) {
942 $self->_error( qq[Could not create 'LongLink' entry for ] .
943 qq[oversize file '] . $clone->full_path ."'" );
944 return;
945 };
946
947 push @write_me, $longlink;
948 }
949
950 push @write_me, $clone;
951
952 ### write the one, optionally 2 a::t::file objects to the handle
953 for my $clone (@write_me) {
954
955 ### if the file is a symlink, there are 2 options:
956 ### either we leave the symlink intact, but then we don't write any
957 ### data OR we follow the symlink, which means we actually make a
958 ### copy. if we do the latter, we have to change the TYPE of the
959 ### clone to 'FILE'
960 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
961 my $data_ok = !$clone->is_symlink && $clone->has_content;
962
963 ### downgrade to a 'normal' file if it's a symlink we're going to
964 ### treat as a regular file
965 $clone->_downgrade_to_plainfile if $link_ok;
966
967 ### get the header for this block
968 my $header = $self->_format_tar_entry( $clone );
969 unless( $header ) {
970 $self->_error(q[Could not format header for: ] .
971 $clone->full_path );
972 return;
973 }
974
975 unless( print $handle $header ) {
976 $self->_error(q[Could not write header for: ] .
977 $clone->full_path);
978 return;
979 }
980
981 if( $link_ok or $data_ok ) {
982 unless( print $handle $clone->data ) {
983 $self->_error(q[Could not write data for: ] .
984 $clone->full_path);
985 return;
986 }
987
988 ### pad the end of the clone if required ###
989 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
990 }
991
992 } ### done writing these entries
993 }
994
995 ### write the end markers ###
996 print $handle TAR_END x 2 or
997 return $self->_error( qq[Could not write tar end markers] );
998 ### did you want it written to a file, or returned as a string? ###
999 return length($file) ? 1
1000 : $HAS_PERLIO ? $dummy
1001 : do { seek $handle, 0, 0; local $/; <$handle> }
1002}
1003
1004sub _format_tar_entry {
1005 my $self = shift;
1006 my $entry = shift or return;
1007 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
1008 my $no_prefix = shift || 0;
1009
1010 my $file = $entry->name;
1011 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix;
1012
1013 ### remove the prefix from the file name
1014 ### not sure if this is still neeeded --kane
1015 ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1016 ### this for us. Even worse, this would break if we tried to add a file
1017 ### like x/x.
1018 #if( length $prefix ) {
1019 # $file =~ s/^$match//;
1020 #}
1021
1022 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1023 if length $ext_prefix;
1024
1025 ### not sure why this is... ###
1026 my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1027 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1028
1029 my $f1 = "%06o"; my $f2 = "%11o";
1030
1031 ### this might be optimizable with a 'changed' flag in the file objects ###
1032 my $tar = pack (
1033 PACK,
1034 $file,
1035
1036 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1037 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1038
1039 "", # checksum field - space padded a bit down
1040
1041 (map { $entry->$_() } qw[type linkname magic]),
1042
1043 $entry->version || TAR_VERSION,
1044
1045 (map { $entry->$_() } qw[uname gname]),
1046 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1047
1048 ($no_prefix ? '' : $prefix)
1049 );
1050
1051 ### add the checksum ###
1052 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1053
1054 return $tar;
1055}
1056
1057=head2 $tar->add_files( @filenamelist )
1058
1059Takes a list of filenames and adds them to the in-memory archive.
1060
1061The path to the file is automatically converted to a Unix like
1062equivalent for use in the archive, and, if on MacOS, the file's
1063modification time is converted from the MacOS epoch to the Unix epoch.
1064So tar archives created on MacOS with B<Archive::Tar> can be read
1065both with I<tar> on Unix and applications like I<suntar> or
1066I<Stuffit Expander> on MacOS.
1067
1068Be aware that the file's type/creator and resource fork will be lost,
1069which is usually what you want in cross-platform archives.
1070
1071Returns a list of C<Archive::Tar::File> objects that were just added.
1072
1073=cut
1074
1075sub add_files {
1076 my $self = shift;
1077 my @files = @_ or return;
1078
1079 my @rv;
1080 for my $file ( @files ) {
1081 unless( -e $file ) {
1082 $self->_error( qq[No such file: '$file'] );
1083 next;
1084 }
1085
1086 my $obj = Archive::Tar::File->new( file => $file );
1087 unless( $obj ) {
1088 $self->_error( qq[Unable to add file: '$file'] );
1089 next;
1090 }
1091
1092 push @rv, $obj;
1093 }
1094
1095 push @{$self->{_data}}, @rv;
1096
1097 return @rv;
1098}
1099
1100=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1101
1102Takes a filename, a scalar full of data and optionally a reference to
1103a hash with specific options.
1104
1105Will add a file to the in-memory archive, with name C<$filename> and
1106content C<$data>. Specific properties can be set using C<$opthashref>.
1107The following list of properties is supported: name, size, mtime
1108(last modified date), mode, uid, gid, linkname, uname, gname,
b3200c5d 1109devmajor, devminor, prefix, type. (On MacOS, the file's path and
39713df4 1110modification times are converted to Unix equivalents.)
1111
b3200c5d 1112Valid values for the file type are the following constants defined in
1113Archive::Tar::Constants:
1114
1115=over 4
1116
1117=item FILE
1118
1119Regular file.
1120
1121=item HARDLINK
1122
1123=item SYMLINK
1124
1125Hard and symbolic ("soft") links; linkname should specify target.
1126
1127=item CHARDEV
1128
1129=item BLOCKDEV
1130
1131Character and block devices. devmajor and devminor should specify the major
1132and minor device numbers.
1133
1134=item DIR
1135
1136Directory.
1137
1138=item FIFO
1139
1140FIFO (named pipe).
1141
1142=item SOCKET
1143
1144Socket.
1145
1146=back
1147
39713df4 1148Returns the C<Archive::Tar::File> object that was just added, or
1149C<undef> on failure.
1150
1151=cut
1152
1153sub add_data {
1154 my $self = shift;
1155 my ($file, $data, $opt) = @_;
1156
1157 my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1158 unless( $obj ) {
1159 $self->_error( qq[Unable to add file: '$file'] );
1160 return;
1161 }
1162
1163 push @{$self->{_data}}, $obj;
1164
1165 return $obj;
1166}
1167
1168=head2 $tar->error( [$BOOL] )
1169
1170Returns the current errorstring (usually, the last error reported).
1171If a true value was specified, it will give the C<Carp::longmess>
1172equivalent of the error, in effect giving you a stacktrace.
1173
1174For backwards compatibility, this error is also available as
1175C<$Archive::Tar::error> although it is much recommended you use the
1176method call instead.
1177
1178=cut
1179
1180{
1181 $error = '';
1182 my $longmess;
1183
1184 sub _error {
1185 my $self = shift;
1186 my $msg = $error = shift;
1187 $longmess = Carp::longmess($error);
1188
1189 ### set Archive::Tar::WARN to 0 to disable printing
1190 ### of errors
1191 if( $WARN ) {
1192 carp $DEBUG ? $longmess : $msg;
1193 }
1194
1195 return;
1196 }
1197
1198 sub error {
1199 my $self = shift;
1200 return shift() ? $longmess : $error;
1201 }
1202}
1203
1204
1205=head2 $bool = $tar->has_io_string
1206
1207Returns true if we currently have C<IO::String> support loaded.
1208
1209Either C<IO::String> or C<perlio> support is needed to support writing
3c4b39be 1210stringified archives. Currently, C<perlio> is the preferred method, if
39713df4 1211available.
1212
1213See the C<GLOBAL VARIABLES> section to see how to change this preference.
1214
1215=cut
1216
1217sub has_io_string { return $HAS_IO_STRING; }
1218
1219=head2 $bool = $tar->has_perlio
1220
1221Returns true if we currently have C<perlio> support loaded.
1222
1223This requires C<perl-5.8> or higher, compiled with C<perlio>
1224
1225Either C<IO::String> or C<perlio> support is needed to support writing
3c4b39be 1226stringified archives. Currently, C<perlio> is the preferred method, if
39713df4 1227available.
1228
1229See the C<GLOBAL VARIABLES> section to see how to change this preference.
1230
1231=cut
1232
1233sub has_perlio { return $HAS_PERLIO; }
1234
1235
1236=head1 Class Methods
1237
1238=head2 Archive::Tar->create_archive($file, $compression, @filelist)
1239
1240Creates a tar file from the list of files provided. The first
1241argument can either be the name of the tar file to create or a
1242reference to an open file handle (e.g. a GLOB reference).
1243
1244The second argument specifies the level of compression to be used, if
1245any. Compression of tar files requires the installation of the
1246IO::Zlib module. Specific levels of compression may be
1247requested by passing a value between 2 and 9 as the second argument.
1248Any other value evaluating as true will result in the default
1249compression level being used.
1250
1251Note that when you pass in a filehandle, the compression argument
1252is ignored, as all files are printed verbatim to your filehandle.
1253If you wish to enable compression with filehandles, use an
1254C<IO::Zlib> filehandle instead.
1255
1256The remaining arguments list the files to be included in the tar file.
1257These files must all exist. Any files which don't exist or can't be
1258read are silently ignored.
1259
1260If the archive creation fails for any reason, C<create_archive> will
1261return false. Please use the C<error> method to find the cause of the
1262failure.
1263
1264Note that this method does not write C<on the fly> as it were; it
1265still reads all the files into memory before writing out the archive.
1266Consult the FAQ below if this is a problem.
1267
1268=cut
1269
1270sub create_archive {
1271 my $class = shift;
1272
1273 my $file = shift; return unless defined $file;
1274 my $gzip = shift || 0;
1275 my @files = @_;
1276
1277 unless( @files ) {
1278 return $class->_error( qq[Cowardly refusing to create empty archive!] );
1279 }
1280
1281 my $tar = $class->new;
1282 $tar->add_files( @files );
1283 return $tar->write( $file, $gzip );
1284}
1285
1286=head2 Archive::Tar->list_archive ($file, $compressed, [\@properties])
1287
1288Returns a list of the names of all the files in the archive. The
1289first argument can either be the name of the tar file to list or a
1290reference to an open file handle (e.g. a GLOB reference).
1291
1292If C<list_archive()> is passed an array reference as its third
1293argument it returns a list of hash references containing the requested
1294properties of each file. The following list of properties is
b3200c5d 1295supported: full_path, name, size, mtime (last modified date), mode,
1296uid, gid, linkname, uname, gname, devmajor, devminor, prefix.
1297
1298See C<Archive::Tar::File> for details about supported properties.
39713df4 1299
1300Passing an array reference containing only one element, 'name', is
1301special cased to return a list of names rather than a list of hash
1302references.
1303
1304=cut
1305
1306sub list_archive {
1307 my $class = shift;
1308 my $file = shift; return unless defined $file;
1309 my $gzip = shift || 0;
1310
1311 my $tar = $class->new($file, $gzip);
1312 return unless $tar;
1313
1314 return $tar->list_files( @_ );
1315}
1316
1317=head2 Archive::Tar->extract_archive ($file, $gzip)
1318
1319Extracts the contents of the tar file. The first argument can either
1320be the name of the tar file to create or a reference to an open file
1321handle (e.g. a GLOB reference). All relative paths in the tar file will
1322be created underneath the current working directory.
1323
1324C<extract_archive> will return a list of files it extracted.
1325If the archive extraction fails for any reason, C<extract_archive>
1326will return false. Please use the C<error> method to find the cause
1327of the failure.
1328
1329=cut
1330
1331sub extract_archive {
1332 my $class = shift;
1333 my $file = shift; return unless defined $file;
1334 my $gzip = shift || 0;
1335
1336 my $tar = $class->new( ) or return;
1337
1338 return $tar->read( $file, $gzip, { extract => 1 } );
1339}
1340
1341=head2 Archive::Tar->can_handle_compressed_files
1342
1343A simple checking routine, which will return true if C<Archive::Tar>
1344is able to uncompress compressed archives on the fly with C<IO::Zlib>,
1345or false if C<IO::Zlib> is not installed.
1346
1347You can use this as a shortcut to determine whether C<Archive::Tar>
1348will do what you think before passing compressed archives to its
1349C<read> method.
1350
1351=cut
1352
1353sub can_handle_compressed_files { return ZLIB ? 1 : 0 }
1354
1355sub no_string_support {
1356 croak("You have to install IO::String to support writing archives to strings");
1357}
1358
13591;
1360
1361__END__
1362
1363=head1 GLOBAL VARIABLES
1364
1365=head2 $Archive::Tar::FOLLOW_SYMLINK
1366
1367Set this variable to C<1> to make C<Archive::Tar> effectively make a
1368copy of the file when extracting. Default is C<0>, which
1369means the symlink stays intact. Of course, you will have to pack the
1370file linked to as well.
1371
1372This option is checked when you write out the tarfile using C<write>
1373or C<create_archive>.
1374
1375This works just like C</bin/tar>'s C<-h> option.
1376
1377=head2 $Archive::Tar::CHOWN
1378
1379By default, C<Archive::Tar> will try to C<chown> your files if it is
1380able to. In some cases, this may not be desired. In that case, set
1381this variable to C<0> to disable C<chown>-ing, even if it were
1382possible.
1383
1384The default is C<1>.
1385
1386=head2 $Archive::Tar::CHMOD
1387
1388By default, C<Archive::Tar> will try to C<chmod> your files to
1389whatever mode was specified for the particular file in the archive.
1390In some cases, this may not be desired. In that case, set this
1391variable to C<0> to disable C<chmod>-ing.
1392
1393The default is C<1>.
1394
1395=head2 $Archive::Tar::DO_NOT_USE_PREFIX
1396
1397By default, C<Archive::Tar> will try to put paths that are over
1398100 characters in the C<prefix> field of your tar header. However,
1399some older tar programs do not implement this spec. To retain
1400compatibility with these older versions, you can set the
1401C<$DO_NOT_USE_PREFIX> variable to a true value, and C<Archive::Tar>
1402will use an alternate way of dealing with paths over 100 characters
1403by using the C<GNU Extended Header> feature.
1404
1405The default is C<0>.
1406
1407=head2 $Archive::Tar::DEBUG
1408
1409Set this variable to C<1> to always get the C<Carp::longmess> output
1410of the warnings, instead of the regular C<carp>. This is the same
1411message you would get by doing:
1412
1413 $tar->error(1);
1414
1415Defaults to C<0>.
1416
1417=head2 $Archive::Tar::WARN
1418
1419Set this variable to C<0> if you do not want any warnings printed.
1420Personally I recommend against doing this, but people asked for the
1421option. Also, be advised that this is of course not threadsafe.
1422
1423Defaults to C<1>.
1424
1425=head2 $Archive::Tar::error
1426
1427Holds the last reported error. Kept for historical reasons, but its
1428use is very much discouraged. Use the C<error()> method instead:
1429
1430 warn $tar->error unless $tar->extract;
1431
1432=head2 $Archive::Tar::HAS_PERLIO
1433
1434This variable holds a boolean indicating if we currently have
1435C<perlio> support loaded. This will be enabled for any perl
1436greater than C<5.8> compiled with C<perlio>.
1437
1438If you feel strongly about disabling it, set this variable to
1439C<false>. Note that you will then need C<IO::String> installed
1440to support writing stringified archives.
1441
1442Don't change this variable unless you B<really> know what you're
1443doing.
1444
1445=head2 $Archive::Tar::HAS_IO_STRING
1446
1447This variable holds a boolean indicating if we currently have
1448C<IO::String> support loaded. This will be enabled for any perl
1449that has a loadable C<IO::String> module.
1450
1451If you feel strongly about disabling it, set this variable to
1452C<false>. Note that you will then need C<perlio> support from
1453your perl to be able to write stringified archives.
1454
1455Don't change this variable unless you B<really> know what you're
1456doing.
1457
1458=head1 FAQ
1459
1460=over 4
1461
1462=item What's the minimum perl version required to run Archive::Tar?
1463
1464You will need perl version 5.005_03 or newer.
1465
1466=item Isn't Archive::Tar slow?
1467
1468Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
1469However, it's very portable. If speed is an issue, consider using
1470C</bin/tar> instead.
1471
1472=item Isn't Archive::Tar heavier on memory than /bin/tar?
1473
1474Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
1475C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
1476choice but to read the archive into memory.
1477This is ok if you want to do in-memory manipulation of the archive.
1478If you just want to extract, use the C<extract_archive> class method
1479instead. It will optimize and write to disk immediately.
1480
1481=item Can't you lazy-load data instead?
1482
1483No, not easily. See previous question.
1484
1485=item How much memory will an X kb tar file need?
1486
1487Probably more than X kb, since it will all be read into memory. If
1488this is a problem, and you don't need to do in memory manipulation
1489of the archive, consider using C</bin/tar> instead.
1490
1491=item What do you do with unsupported filetypes in an archive?
1492
1493C<Unix> has a few filetypes that aren't supported on other platforms,
1494like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
1495try to make a copy of the original file, rather than throwing an error.
1496
1497This does require you to read the entire archive in to memory first,
1498since otherwise we wouldn't know what data to fill the copy with.
1499(This means that you cannot use the class methods on archives that
1500have incompatible filetypes and still expect things to work).
1501
1502For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
1503the extraction of this particular item didn't work.
1504
81a5970e 1505=item How do I access .tar.Z files?
1506
1507The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
1508the C<IO::Zlib> module) to access tar files that have been compressed
1509with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
1510utility cannot be read by C<Compress::Zlib> and so cannot be directly
1511accesses by C<Archive::Tar>.
1512
1513If the C<uncompress> or C<gunzip> programs are available, you can use
1514one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
1515
1516Firstly with C<uncompress>
1517
1518 use Archive::Tar;
1519
1520 open F, "uncompress -c $filename |";
1521 my $tar = Archive::Tar->new(*F);
1522 ...
1523
1524and this with C<gunzip>
1525
1526 use Archive::Tar;
1527
1528 open F, "gunzip -c $filename |";
1529 my $tar = Archive::Tar->new(*F);
1530 ...
1531
1532Similarly, if the C<compress> program is available, you can use this to
1533write a C<.tar.Z> file
1534
1535 use Archive::Tar;
1536 use IO::File;
1537
1538 my $fh = new IO::File "| compress -c >$filename";
1539 my $tar = Archive::Tar->new();
1540 ...
1541 $tar->write($fh);
1542 $fh->close ;
1543
1544
39713df4 1545=back
1546
1547=head1 TODO
1548
1549=over 4
1550
1551=item Check if passed in handles are open for read/write
1552
1553Currently I don't know of any portable pure perl way to do this.
1554Suggestions welcome.
1555
b3200c5d 1556=item Allow archives to be passed in as string
1557
1558Currently, we only allow opened filehandles or filenames, but
1559not strings. The internals would need some reworking to facilitate
1560stringified archives.
1561
1562=item Facilitate processing an opened filehandle of a compressed archive
1563
1564Currently, we only support this if the filehandle is an IO::Zlib object.
1565Environments, like apache, will present you with an opened filehandle
1566to an uploaded file, which might be a compressed archive.
1567
39713df4 1568=back
1569
1570=head1 AUTHOR
1571
1572This module by
1573Jos Boumans E<lt>kane@cpan.orgE<gt>.
1574
1575=head1 ACKNOWLEDGEMENTS
1576
1577Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney and
1578especially Andrew Savige for their help and suggestions.
1579
1580=head1 COPYRIGHT
1581
1582This module is
1583copyright (c) 2002 Jos Boumans E<lt>kane@cpan.orgE<gt>.
1584All rights reserved.
1585
1586This library is free software;
1587you may redistribute and/or modify it under the same
1588terms as Perl itself.
1589
1590=cut