1 package Archive::Tar::File;
5 use File::Spec::Unix ();
9 use Archive::Tar::Constant;
11 use vars qw[@ISA $VERSION];
12 @ISA = qw[Archive::Tar];
15 ### set value to 1 to oct() it during the unpack ###
24 type => 0, # character
25 linkname => 0, # string
27 version => 0, # 2 bytes
30 devmajor => 1, # octal
31 devminor => 1, # octal
34 ### end UNPACK items ###
35 raw => 0, # the raw data chunk
36 data => 0, # the data associated with the file --
37 # This might be very memory intensive
40 ### install get/set accessors for this object.
41 for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
42 my $key = $tmpl->[$i];
44 *{__PACKAGE__."::$key"} = sub {
46 $self->{$key} = $_[0] if @_;
48 ### just in case the key is not there or undef or something ###
57 Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
61 my @items = $tar->get_files;
63 print $_->name, ' ', $_->size, "\n" for @items;
65 print $object->get_content;
66 $object->replace_content('new content');
68 $object->rename( 'new/full/path/to/file.c' );
72 Archive::Tar::Files provides a neat little object layer for in-memory
73 extracted files. It's mostly used internally in Archive::Tar to tidy
74 up the code, but there's no reason users shouldn't use this API as
79 A lot of the methods in this package are accessors to the various
80 fields in the tar header:
94 The user id owning the file
98 The group id owning the file
106 Modification time. Adjusted to mac-time on MacOS if required
110 Checksum field for the tar header
114 File type -- numeric, but comparable to exported constants -- see
115 Archive::Tar's documentation
119 If the file is a symlink, the file it's pointing to
123 Tar magic string -- not useful for most users
127 Tar version string -- not useful for most users
131 The user name that owns the file
135 The group name that owns the file
139 Device major number in case of a special file
143 Device minor number in case of a special file
147 Any directory to prefix to the extraction path, if any
151 Raw tar header -- not useful for most users
157 =head2 new( file => $path )
159 Returns a new Archive::Tar::File object from an existing file.
161 Returns undef on failure.
163 =head2 new( data => $path, $data, $opt )
165 Returns a new Archive::Tar::File object from data.
167 C<$path> defines the file name (which need not exist), C<$data> the
168 file contents, and C<$opt> is a reference to a hash of attributes
169 which may be used to override the default attributes (fields in the
170 tar header), which are described above in the Accessors section.
172 Returns undef on failure.
174 =head2 new( chunk => $chunk )
176 Returns a new Archive::Tar::File object from a raw 512-byte tar
179 Returns undef on failure.
187 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
188 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
189 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
195 ### copies the data, creates a clone ###
198 return bless { %$self }, ref $self;
201 sub _new_from_chunk {
203 my $chunk = shift or return; # 512 bytes of tar header
206 ### filter any arguments on defined-ness of values.
207 ### this allows overriding from what the tar-header is saying
208 ### about this tar-entry. Particularly useful for @LongLink files
209 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
211 ### makes it start at 0 actually... :) ###
214 $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
215 } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );
217 my $obj = bless { %entry, %args }, $class;
219 ### magic is a filetype string.. it should have something like 'ustar' or
220 ### something similar... if the chunk is garbage, skip it
221 return unless $obj->magic !~ /\W/;
223 ### store the original chunk ###
226 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
227 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
238 ### path has to at least exist
239 return unless defined $path;
241 my $type = __PACKAGE__->_filetype($path);
245 unless ($type == DIR ) {
246 my $fh = IO::File->new;
248 unless( $fh->open($path) ) {
249 ### dangling symlinks are fine, stop reading but continue
250 ### creating the object
251 last READ if $type == SYMLINK;
253 ### otherwise, return from this function --
254 ### anything that's *not* a symlink should be
259 ### binmode needed to read files properly on win32 ###
261 $data = do { local $/; <$fh> };
266 my @items = qw[mode uid gid size mtime];
267 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
269 ### you *must* set size == 0 on symlinks, or the next entry will be
270 ### though of as the contents of the symlink, which is wrong.
271 ### this fixes bug #7937
272 $hash{size} = 0 if ($type == DIR or $type == SYMLINK);
273 $hash{mtime} -= TIME_OFFSET;
275 ### strip the high bits off the mode, which we don't need to store
276 $hash{mode} = STRIP_MODE->( $hash{mode} );
279 ### probably requires some file path munging here ... ###
280 ### name and prefix are set later
286 linkname => ($type == SYMLINK and CAN_READLINK)
290 version => TAR_VERSION,
291 uname => UNAME->( $hash{uid} ),
292 gname => GNAME->( $hash{gid} ),
293 devmajor => 0, # not handled
294 devminor => 0, # not handled
301 ### fix up the prefix and file from the path
302 my($prefix,$file) = $obj->_prefix_and_file( $path );
303 $obj->prefix( $prefix );
311 my $path = shift; return unless defined $path;
312 my $data = shift; return unless defined $data;
321 size => length $data,
322 mtime => time - TIME_OFFSET,
327 version => TAR_VERSION,
328 uname => UNAME->( UID ),
329 gname => GNAME->( GID ),
335 ### overwrite with user options, if provided ###
336 if( $opt and ref $opt eq 'HASH' ) {
337 for my $key ( keys %$opt ) {
339 ### don't write bogus options ###
340 next unless exists $obj->{$key};
341 $obj->{$key} = $opt->{$key};
347 ### fix up the prefix and file from the path
348 my($prefix,$file) = $obj->_prefix_and_file( $path );
349 $obj->prefix( $prefix );
355 sub _prefix_and_file {
359 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
360 my @dirs = File::Spec->splitdir( $dirs );
362 ### so sometimes the last element is '' -- probably when trailing
363 ### dir slashes are encountered... this is is of course pointless,
365 pop @dirs while @dirs and not length $dirs[-1];
367 ### if it's a directory, then $file might be empty
368 $file = pop @dirs if $self->is_dir and not length $file;
370 my $prefix = File::Spec::Unix->catdir(
371 grep { length } $vol, @dirs
373 return( $prefix, $file );
380 return unless defined $file;
382 return SYMLINK if (-l $file); # Symlink
384 return FILE if (-f _); # Plain file
386 return DIR if (-d _); # Directory
388 return FIFO if (-p _); # Named pipe
390 return SOCKET if (-S _); # Socket
392 return BLOCKDEV if (-b _); # Block special
394 return CHARDEV if (-c _); # Character special
396 ### shouldn't happen, this is when making archives, not reading ###
397 return LONGLINK if ( $file eq LONGLINK_NAME );
399 return UNKNOWN; # Something else (like what?)
403 ### this method 'downgrades' a file to plain file -- this is used for
404 ### symlinks when FOLLOW_SYMLINKS is true.
405 sub _downgrade_to_plainfile {
407 $entry->type( FILE );
408 $entry->mode( MODE );
409 $entry->linkname('');
416 Returns the full path from the tar header; this is basically a
417 concatenation of the C<prefix> and C<name> fields.
424 ### if prefix field is emtpy
425 return $self->name unless defined $self->prefix and length $self->prefix;
427 ### or otherwise, catfile'd
428 return File::Spec::Unix->catfile( $self->prefix, $self->name );
434 Done by Archive::Tar internally when reading the tar file:
435 validate the header against the checksum to ensure integer tar file.
437 Returns true on success, false on failure
444 my $raw = $self->raw;
446 ### don't know why this one is different from the one we /write/ ###
447 substr ($raw, 148, 8) = " ";
448 return unpack ("%16C*", $raw) == $self->chksum ? 1 : 0;
453 Returns a boolean to indicate whether the current object has content.
454 Some special files like directories and so on never will have any
455 content. This method is mainly to make sure you don't get warnings
456 for using uninitialized values when looking at an object's content.
462 return defined $self->data() && length $self->data() ? 1 : 0;
467 Returns the current content for the in-memory file
476 =head2 get_content_by_ref
478 Returns the current content for the in-memory file as a scalar
479 reference. Normal users won't need this, but it will save memory if
480 you are dealing with very large data files in your tar archive, since
481 it will pass the contents by reference, rather than make a copy of it
486 sub get_content_by_ref {
489 return \$self->{data};
492 =head2 replace_content( $content )
494 Replace the current content of the file with the new content. This
495 only affects the in-memory archive, not the on-disk version until
498 Returns true on success, false on failure.
502 sub replace_content {
504 my $data = shift || '';
506 $self->data( $data );
507 $self->size( length $data );
511 =head2 rename( $new_name )
513 Rename the current file to $new_name.
515 Note that you must specify a Unix path for $new_name, since per tar
516 standard, all files in the archive must be Unix paths.
518 Returns true on success and false on failure.
526 return unless defined $path;
528 my ($prefix,$file) = $self->_prefix_and_file( $path );
530 $self->name( $file );
531 $self->prefix( $prefix );
536 =head1 Convenience methods
538 To quickly check the type of a C<Archive::Tar::File> object, you can
539 use the following methods:
545 Returns true if the file is of type C<file>
549 Returns true if the file is of type C<dir>
553 Returns true if the file is of type C<hardlink>
557 Returns true if the file is of type C<symlink>
561 Returns true if the file is of type C<chardev>
565 Returns true if the file is of type C<blockdev>
569 Returns true if the file is of type C<fifo>
573 Returns true if the file is of type C<socket>
577 Returns true if the file is of type C<LongLink>.
578 Should not happen after a successful C<read>.
582 Returns true if the file is of type C<Label>.
583 Should not happen after a successful C<read>.
587 Returns true if the file type is C<unknown>
593 #stupid perl5.5.3 needs to warn if it's not numeric
594 sub is_file { local $^W; FILE == $_[0]->type }
595 sub is_dir { local $^W; DIR == $_[0]->type }
596 sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
597 sub is_symlink { local $^W; SYMLINK == $_[0]->type }
598 sub is_chardev { local $^W; CHARDEV == $_[0]->type }
599 sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
600 sub is_fifo { local $^W; FIFO == $_[0]->type }
601 sub is_socket { local $^W; SOCKET == $_[0]->type }
602 sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
603 sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
604 sub is_label { local $^W; LABEL eq $_[0]->type }