Upgrade to Archive-Tar-1.39_04
[p5sagit/p5-mst-13.2.git] / lib / Archive / Tar / File.pm
CommitLineData
39713df4 1package Archive::Tar::File;
2use strict;
3
642eb381 4use Carp ();
39713df4 5use IO::File;
81a5970e 6use File::Spec::Unix ();
7use File::Spec ();
8use File::Basename ();
9
642eb381 10### avoid circular use, so only require;
11require Archive::Tar;
39713df4 12use Archive::Tar::Constant;
13
14use vars qw[@ISA $VERSION];
642eb381 15#@ISA = qw[Archive::Tar];
39713df4 16$VERSION = '0.02';
17
18### set value to 1 to oct() it during the unpack ###
19my $tmpl = [
20 name => 0, # string
21 mode => 1, # octal
22 uid => 1, # octal
23 gid => 1, # octal
24 size => 1, # octal
25 mtime => 1, # octal
26 chksum => 1, # octal
27 type => 0, # character
28 linkname => 0, # string
29 magic => 0, # string
30 version => 0, # 2 bytes
31 uname => 0, # string
32 gname => 0, # string
33 devmajor => 1, # octal
34 devminor => 1, # octal
35 prefix => 0,
36
37### end UNPACK items ###
38 raw => 0, # the raw data chunk
39 data => 0, # the data associated with the file --
40 # This might be very memory intensive
41];
42
43### install get/set accessors for this object.
44for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
45 my $key = $tmpl->[$i];
46 no strict 'refs';
47 *{__PACKAGE__."::$key"} = sub {
48 my $self = shift;
49 $self->{$key} = $_[0] if @_;
50
51 ### just in case the key is not there or undef or something ###
52 { local $^W = 0;
53 return $self->{$key};
54 }
55 }
56}
57
58=head1 NAME
59
60Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
61
62=head1 SYNOPSIS
63
64 my @items = $tar->get_files;
65
66 print $_->name, ' ', $_->size, "\n" for @items;
67
68 print $object->get_content;
69 $object->replace_content('new content');
70
71 $object->rename( 'new/full/path/to/file.c' );
72
73=head1 DESCRIPTION
74
75Archive::Tar::Files provides a neat little object layer for in-memory
76extracted files. It's mostly used internally in Archive::Tar to tidy
77up the code, but there's no reason users shouldn't use this API as
78well.
79
80=head2 Accessors
81
82A lot of the methods in this package are accessors to the various
83fields in the tar header:
84
85=over 4
86
87=item name
88
89The file's name
90
91=item mode
92
93The file's mode
94
95=item uid
96
97The user id owning the file
98
99=item gid
100
101The group id owning the file
102
103=item size
104
105File size in bytes
106
107=item mtime
108
109Modification time. Adjusted to mac-time on MacOS if required
110
111=item chksum
112
113Checksum field for the tar header
114
115=item type
116
117File type -- numeric, but comparable to exported constants -- see
118Archive::Tar's documentation
119
120=item linkname
121
122If the file is a symlink, the file it's pointing to
123
124=item magic
125
126Tar magic string -- not useful for most users
127
128=item version
129
130Tar version string -- not useful for most users
131
132=item uname
133
134The user name that owns the file
135
136=item gname
137
138The group name that owns the file
139
140=item devmajor
141
142Device major number in case of a special file
143
144=item devminor
145
146Device minor number in case of a special file
147
148=item prefix
149
150Any directory to prefix to the extraction path, if any
151
152=item raw
153
154Raw tar header -- not useful for most users
155
156=back
157
158=head1 Methods
159
642eb381 160=head2 Archive::Tar::File->new( file => $path )
39713df4 161
162Returns a new Archive::Tar::File object from an existing file.
163
164Returns undef on failure.
165
642eb381 166=head2 Archive::Tar::File->new( data => $path, $data, $opt )
39713df4 167
168Returns a new Archive::Tar::File object from data.
169
170C<$path> defines the file name (which need not exist), C<$data> the
171file contents, and C<$opt> is a reference to a hash of attributes
172which may be used to override the default attributes (fields in the
173tar header), which are described above in the Accessors section.
174
175Returns undef on failure.
176
642eb381 177=head2 Archive::Tar::File->new( chunk => $chunk )
39713df4 178
179Returns a new Archive::Tar::File object from a raw 512-byte tar
180archive chunk.
181
182Returns undef on failure.
183
184=cut
185
186sub new {
187 my $class = shift;
188 my $what = shift;
189
190 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
191 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
192 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
193 undef;
194
195 return $obj;
196}
197
198### copies the data, creates a clone ###
199sub clone {
200 my $self = shift;
201 return bless { %$self }, ref $self;
202}
203
204sub _new_from_chunk {
205 my $class = shift;
01d11a1c 206 my $chunk = shift or return; # 512 bytes of tar header
81a5970e 207 my %hash = @_;
208
209 ### filter any arguments on defined-ness of values.
210 ### this allows overriding from what the tar-header is saying
211 ### about this tar-entry. Particularly useful for @LongLink files
212 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
39713df4 213
214 ### makes it start at 0 actually... :) ###
215 my $i = -1;
216 my %entry = map {
217 $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
218 } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );
219
81a5970e 220 my $obj = bless { %entry, %args }, $class;
39713df4 221
222 ### magic is a filetype string.. it should have something like 'ustar' or
223 ### something similar... if the chunk is garbage, skip it
224 return unless $obj->magic !~ /\W/;
225
226 ### store the original chunk ###
227 $obj->raw( $chunk );
228
229 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
230 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
231
232
233 return $obj;
234
235}
236
237sub _new_from_file {
238 my $class = shift;
01d11a1c 239 my $path = shift;
240
241 ### path has to at least exist
242 return unless defined $path;
243
39713df4 244 my $type = __PACKAGE__->_filetype($path);
245 my $data = '';
246
97a504ba 247 READ: {
248 unless ($type == DIR ) {
249 my $fh = IO::File->new;
250
251 unless( $fh->open($path) ) {
252 ### dangling symlinks are fine, stop reading but continue
253 ### creating the object
254 last READ if $type == SYMLINK;
255
256 ### otherwise, return from this function --
257 ### anything that's *not* a symlink should be
258 ### resolvable
259 return;
260 }
261
262 ### binmode needed to read files properly on win32 ###
263 binmode $fh;
264 $data = do { local $/; <$fh> };
265 close $fh;
266 }
39713df4 267 }
268
269 my @items = qw[mode uid gid size mtime];
270 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
271
642eb381 272 if (ON_VMS) {
273 ### VMS has two UID modes, traditional and POSIX. Normally POSIX is
274 ### not used. We currently do not have an easy way to see if we are in
275 ### POSIX mode. In traditional mode, the UID is actually the VMS UIC.
276 ### The VMS UIC has the upper 16 bits is the GID, which in many cases
277 ### the VMS UIC will be larger than 209715, the largest that TAR can
278 ### handle. So for now, assume it is traditional if the UID is larger
279 ### than 0x10000.
280
281 if ($hash{uid} > 0x10000) {
282 $hash{uid} = $hash{uid} & 0xFFFF;
283 }
284
285 ### The file length from stat() is the physical length of the file
286 ### However the amount of data read in may be more for some file types.
287 ### Fixed length files are read past the logical EOF to end of the block
288 ### containing. Other file types get expanded on read because record
289 ### delimiters are added.
290
291 my $data_len = length $data;
292 $hash{size} = $data_len if $hash{size} < $data_len;
293
294 }
39713df4 295 ### you *must* set size == 0 on symlinks, or the next entry will be
296 ### though of as the contents of the symlink, which is wrong.
297 ### this fixes bug #7937
298 $hash{size} = 0 if ($type == DIR or $type == SYMLINK);
299 $hash{mtime} -= TIME_OFFSET;
300
301 ### strip the high bits off the mode, which we don't need to store
302 $hash{mode} = STRIP_MODE->( $hash{mode} );
303
304
305 ### probably requires some file path munging here ... ###
306 ### name and prefix are set later
307 my $obj = {
308 %hash,
309 name => '',
310 chksum => CHECK_SUM,
311 type => $type,
312 linkname => ($type == SYMLINK and CAN_READLINK)
313 ? readlink $path
314 : '',
315 magic => MAGIC,
316 version => TAR_VERSION,
317 uname => UNAME->( $hash{uid} ),
318 gname => GNAME->( $hash{gid} ),
319 devmajor => 0, # not handled
320 devminor => 0, # not handled
321 prefix => '',
322 data => $data,
323 };
324
325 bless $obj, $class;
326
327 ### fix up the prefix and file from the path
328 my($prefix,$file) = $obj->_prefix_and_file( $path );
329 $obj->prefix( $prefix );
330 $obj->name( $file );
331
332 return $obj;
333}
334
335sub _new_from_data {
336 my $class = shift;
01d11a1c 337 my $path = shift; return unless defined $path;
39713df4 338 my $data = shift; return unless defined $data;
339 my $opt = shift;
340
341 my $obj = {
342 data => $data,
343 name => '',
344 mode => MODE,
345 uid => UID,
346 gid => GID,
347 size => length $data,
348 mtime => time - TIME_OFFSET,
349 chksum => CHECK_SUM,
350 type => FILE,
351 linkname => '',
352 magic => MAGIC,
353 version => TAR_VERSION,
354 uname => UNAME->( UID ),
355 gname => GNAME->( GID ),
356 devminor => 0,
357 devmajor => 0,
358 prefix => '',
359 };
360
361 ### overwrite with user options, if provided ###
362 if( $opt and ref $opt eq 'HASH' ) {
363 for my $key ( keys %$opt ) {
364
365 ### don't write bogus options ###
366 next unless exists $obj->{$key};
367 $obj->{$key} = $opt->{$key};
368 }
369 }
370
371 bless $obj, $class;
372
373 ### fix up the prefix and file from the path
374 my($prefix,$file) = $obj->_prefix_and_file( $path );
375 $obj->prefix( $prefix );
376 $obj->name( $file );
377
378 return $obj;
379}
380
381sub _prefix_and_file {
382 my $self = shift;
383 my $path = shift;
384
385 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
386 my @dirs = File::Spec->splitdir( $dirs );
387
388 ### so sometimes the last element is '' -- probably when trailing
389 ### dir slashes are encountered... this is is of course pointless,
390 ### so remove it
391 pop @dirs while @dirs and not length $dirs[-1];
392
393 ### if it's a directory, then $file might be empty
394 $file = pop @dirs if $self->is_dir and not length $file;
395
396 my $prefix = File::Spec::Unix->catdir(
397 grep { length } $vol, @dirs
398 );
399 return( $prefix, $file );
400}
401
402sub _filetype {
403 my $self = shift;
01d11a1c 404 my $file = shift;
405
406 return unless defined $file;
39713df4 407
408 return SYMLINK if (-l $file); # Symlink
409
410 return FILE if (-f _); # Plain file
411
412 return DIR if (-d _); # Directory
413
414 return FIFO if (-p _); # Named pipe
415
416 return SOCKET if (-S _); # Socket
417
418 return BLOCKDEV if (-b _); # Block special
419
420 return CHARDEV if (-c _); # Character special
421
422 ### shouldn't happen, this is when making archives, not reading ###
423 return LONGLINK if ( $file eq LONGLINK_NAME );
424
425 return UNKNOWN; # Something else (like what?)
426
427}
428
429### this method 'downgrades' a file to plain file -- this is used for
430### symlinks when FOLLOW_SYMLINKS is true.
431sub _downgrade_to_plainfile {
432 my $entry = shift;
433 $entry->type( FILE );
434 $entry->mode( MODE );
435 $entry->linkname('');
436
437 return 1;
438}
439
642eb381 440=head2 $bool = $file->extract( [ $alternative_name ] )
441
442Extract this object, optionally to an alternative name.
443
444See C<< Archive::Tar->extract_file >> for details.
445
446Returns true on success and false on failure.
447
448=cut
449
450sub extract {
451 my $self = shift;
452
453 local $Carp::CarpLevel += 1;
454
455 return Archive::Tar->_extract_file( $self, @_ );
456}
457
458=head2 $path = $file->full_path
39713df4 459
460Returns the full path from the tar header; this is basically a
461concatenation of the C<prefix> and C<name> fields.
462
463=cut
464
465sub full_path {
466 my $self = shift;
467
468 ### if prefix field is emtpy
469 return $self->name unless defined $self->prefix and length $self->prefix;
470
471 ### or otherwise, catfile'd
472 return File::Spec::Unix->catfile( $self->prefix, $self->name );
473}
474
475
642eb381 476=head2 $bool = $file->validate
39713df4 477
478Done by Archive::Tar internally when reading the tar file:
479validate the header against the checksum to ensure integer tar file.
480
481Returns true on success, false on failure
482
483=cut
484
485sub validate {
486 my $self = shift;
487
488 my $raw = $self->raw;
489
490 ### don't know why this one is different from the one we /write/ ###
491 substr ($raw, 148, 8) = " ";
492 return unpack ("%16C*", $raw) == $self->chksum ? 1 : 0;
493}
494
642eb381 495=head2 $bool = $file->has_content
39713df4 496
497Returns a boolean to indicate whether the current object has content.
498Some special files like directories and so on never will have any
499content. This method is mainly to make sure you don't get warnings
500for using uninitialized values when looking at an object's content.
501
502=cut
503
504sub has_content {
505 my $self = shift;
506 return defined $self->data() && length $self->data() ? 1 : 0;
507}
508
642eb381 509=head2 $content = $file->get_content
39713df4 510
511Returns the current content for the in-memory file
512
513=cut
514
515sub get_content {
516 my $self = shift;
517 $self->data( );
518}
519
642eb381 520=head2 $cref = $file->get_content_by_ref
39713df4 521
522Returns the current content for the in-memory file as a scalar
523reference. Normal users won't need this, but it will save memory if
524you are dealing with very large data files in your tar archive, since
525it will pass the contents by reference, rather than make a copy of it
526first.
527
528=cut
529
530sub get_content_by_ref {
531 my $self = shift;
532
533 return \$self->{data};
534}
535
642eb381 536=head2 $bool = $file->replace_content( $content )
39713df4 537
538Replace the current content of the file with the new content. This
539only affects the in-memory archive, not the on-disk version until
540you write it.
541
542Returns true on success, false on failure.
543
544=cut
545
546sub replace_content {
547 my $self = shift;
548 my $data = shift || '';
549
550 $self->data( $data );
551 $self->size( length $data );
552 return 1;
553}
554
642eb381 555=head2 $bool = $file->rename( $new_name )
39713df4 556
557Rename the current file to $new_name.
558
559Note that you must specify a Unix path for $new_name, since per tar
560standard, all files in the archive must be Unix paths.
561
562Returns true on success and false on failure.
563
564=cut
565
566sub rename {
567 my $self = shift;
01d11a1c 568 my $path = shift;
569
570 return unless defined $path;
39713df4 571
572 my ($prefix,$file) = $self->_prefix_and_file( $path );
573
574 $self->name( $file );
575 $self->prefix( $prefix );
576
577 return 1;
578}
579
580=head1 Convenience methods
581
582To quickly check the type of a C<Archive::Tar::File> object, you can
583use the following methods:
584
585=over 4
586
642eb381 587=item $file->is_file
39713df4 588
589Returns true if the file is of type C<file>
590
642eb381 591=item $file->is_dir
39713df4 592
593Returns true if the file is of type C<dir>
594
642eb381 595=item $file->is_hardlink
39713df4 596
597Returns true if the file is of type C<hardlink>
598
642eb381 599=item $file->is_symlink
39713df4 600
601Returns true if the file is of type C<symlink>
602
642eb381 603=item $file->is_chardev
39713df4 604
605Returns true if the file is of type C<chardev>
606
642eb381 607=item $file->is_blockdev
39713df4 608
609Returns true if the file is of type C<blockdev>
610
642eb381 611=item $file->is_fifo
39713df4 612
613Returns true if the file is of type C<fifo>
614
642eb381 615=item $file->is_socket
39713df4 616
617Returns true if the file is of type C<socket>
618
642eb381 619=item $file->is_longlink
39713df4 620
621Returns true if the file is of type C<LongLink>.
622Should not happen after a successful C<read>.
623
642eb381 624=item $file->is_label
39713df4 625
626Returns true if the file is of type C<Label>.
627Should not happen after a successful C<read>.
628
642eb381 629=item $file->is_unknown
39713df4 630
631Returns true if the file type is C<unknown>
632
633=back
634
635=cut
636
637#stupid perl5.5.3 needs to warn if it's not numeric
638sub is_file { local $^W; FILE == $_[0]->type }
639sub is_dir { local $^W; DIR == $_[0]->type }
640sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
641sub is_symlink { local $^W; SYMLINK == $_[0]->type }
642sub is_chardev { local $^W; CHARDEV == $_[0]->type }
643sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
644sub is_fifo { local $^W; FIFO == $_[0]->type }
645sub is_socket { local $^W; SOCKET == $_[0]->type }
646sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
647sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
648sub is_label { local $^W; LABEL eq $_[0]->type }
649
6501;