Skip the POSIX::strftime() time test with a 60sec parameter on Vista:
[p5sagit/p5-mst-13.2.git] / lib / Archive / Tar / File.pm
CommitLineData
39713df4 1package Archive::Tar::File;
2use strict;
3
4use IO::File;
81a5970e 5use File::Spec::Unix ();
6use File::Spec ();
7use File::Basename ();
8
39713df4 9use Archive::Tar::Constant;
10
11use vars qw[@ISA $VERSION];
12@ISA = qw[Archive::Tar];
13$VERSION = '0.02';
14
15### set value to 1 to oct() it during the unpack ###
16my $tmpl = [
17 name => 0, # string
18 mode => 1, # octal
19 uid => 1, # octal
20 gid => 1, # octal
21 size => 1, # octal
22 mtime => 1, # octal
23 chksum => 1, # octal
24 type => 0, # character
25 linkname => 0, # string
26 magic => 0, # string
27 version => 0, # 2 bytes
28 uname => 0, # string
29 gname => 0, # string
30 devmajor => 1, # octal
31 devminor => 1, # octal
32 prefix => 0,
33
34### end UNPACK items ###
35 raw => 0, # the raw data chunk
36 data => 0, # the data associated with the file --
37 # This might be very memory intensive
38];
39
40### install get/set accessors for this object.
41for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
42 my $key = $tmpl->[$i];
43 no strict 'refs';
44 *{__PACKAGE__."::$key"} = sub {
45 my $self = shift;
46 $self->{$key} = $_[0] if @_;
47
48 ### just in case the key is not there or undef or something ###
49 { local $^W = 0;
50 return $self->{$key};
51 }
52 }
53}
54
55=head1 NAME
56
57Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
58
59=head1 SYNOPSIS
60
61 my @items = $tar->get_files;
62
63 print $_->name, ' ', $_->size, "\n" for @items;
64
65 print $object->get_content;
66 $object->replace_content('new content');
67
68 $object->rename( 'new/full/path/to/file.c' );
69
70=head1 DESCRIPTION
71
72Archive::Tar::Files provides a neat little object layer for in-memory
73extracted files. It's mostly used internally in Archive::Tar to tidy
74up the code, but there's no reason users shouldn't use this API as
75well.
76
77=head2 Accessors
78
79A lot of the methods in this package are accessors to the various
80fields in the tar header:
81
82=over 4
83
84=item name
85
86The file's name
87
88=item mode
89
90The file's mode
91
92=item uid
93
94The user id owning the file
95
96=item gid
97
98The group id owning the file
99
100=item size
101
102File size in bytes
103
104=item mtime
105
106Modification time. Adjusted to mac-time on MacOS if required
107
108=item chksum
109
110Checksum field for the tar header
111
112=item type
113
114File type -- numeric, but comparable to exported constants -- see
115Archive::Tar's documentation
116
117=item linkname
118
119If the file is a symlink, the file it's pointing to
120
121=item magic
122
123Tar magic string -- not useful for most users
124
125=item version
126
127Tar version string -- not useful for most users
128
129=item uname
130
131The user name that owns the file
132
133=item gname
134
135The group name that owns the file
136
137=item devmajor
138
139Device major number in case of a special file
140
141=item devminor
142
143Device minor number in case of a special file
144
145=item prefix
146
147Any directory to prefix to the extraction path, if any
148
149=item raw
150
151Raw tar header -- not useful for most users
152
153=back
154
155=head1 Methods
156
157=head2 new( file => $path )
158
159Returns a new Archive::Tar::File object from an existing file.
160
161Returns undef on failure.
162
163=head2 new( data => $path, $data, $opt )
164
165Returns a new Archive::Tar::File object from data.
166
167C<$path> defines the file name (which need not exist), C<$data> the
168file contents, and C<$opt> is a reference to a hash of attributes
169which may be used to override the default attributes (fields in the
170tar header), which are described above in the Accessors section.
171
172Returns undef on failure.
173
174=head2 new( chunk => $chunk )
175
176Returns a new Archive::Tar::File object from a raw 512-byte tar
177archive chunk.
178
179Returns undef on failure.
180
181=cut
182
183sub new {
184 my $class = shift;
185 my $what = shift;
186
187 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
188 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
189 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
190 undef;
191
192 return $obj;
193}
194
195### copies the data, creates a clone ###
196sub clone {
197 my $self = shift;
198 return bless { %$self }, ref $self;
199}
200
201sub _new_from_chunk {
202 my $class = shift;
203 my $chunk = shift or return;
81a5970e 204 my %hash = @_;
205
206 ### filter any arguments on defined-ness of values.
207 ### this allows overriding from what the tar-header is saying
208 ### about this tar-entry. Particularly useful for @LongLink files
209 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
39713df4 210
211 ### makes it start at 0 actually... :) ###
212 my $i = -1;
213 my %entry = map {
214 $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
215 } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );
216
81a5970e 217 my $obj = bless { %entry, %args }, $class;
39713df4 218
219 ### magic is a filetype string.. it should have something like 'ustar' or
220 ### something similar... if the chunk is garbage, skip it
221 return unless $obj->magic !~ /\W/;
222
223 ### store the original chunk ###
224 $obj->raw( $chunk );
225
226 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
227 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
228
229
230 return $obj;
231
232}
233
234sub _new_from_file {
235 my $class = shift;
236 my $path = shift or return;
237 my $type = __PACKAGE__->_filetype($path);
238 my $data = '';
239
97a504ba 240 READ: {
241 unless ($type == DIR ) {
242 my $fh = IO::File->new;
243
244 unless( $fh->open($path) ) {
245 ### dangling symlinks are fine, stop reading but continue
246 ### creating the object
247 last READ if $type == SYMLINK;
248
249 ### otherwise, return from this function --
250 ### anything that's *not* a symlink should be
251 ### resolvable
252 return;
253 }
254
255 ### binmode needed to read files properly on win32 ###
256 binmode $fh;
257 $data = do { local $/; <$fh> };
258 close $fh;
259 }
39713df4 260 }
261
262 my @items = qw[mode uid gid size mtime];
263 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
264
265 ### you *must* set size == 0 on symlinks, or the next entry will be
266 ### though of as the contents of the symlink, which is wrong.
267 ### this fixes bug #7937
268 $hash{size} = 0 if ($type == DIR or $type == SYMLINK);
269 $hash{mtime} -= TIME_OFFSET;
270
271 ### strip the high bits off the mode, which we don't need to store
272 $hash{mode} = STRIP_MODE->( $hash{mode} );
273
274
275 ### probably requires some file path munging here ... ###
276 ### name and prefix are set later
277 my $obj = {
278 %hash,
279 name => '',
280 chksum => CHECK_SUM,
281 type => $type,
282 linkname => ($type == SYMLINK and CAN_READLINK)
283 ? readlink $path
284 : '',
285 magic => MAGIC,
286 version => TAR_VERSION,
287 uname => UNAME->( $hash{uid} ),
288 gname => GNAME->( $hash{gid} ),
289 devmajor => 0, # not handled
290 devminor => 0, # not handled
291 prefix => '',
292 data => $data,
293 };
294
295 bless $obj, $class;
296
297 ### fix up the prefix and file from the path
298 my($prefix,$file) = $obj->_prefix_and_file( $path );
299 $obj->prefix( $prefix );
300 $obj->name( $file );
301
302 return $obj;
303}
304
305sub _new_from_data {
306 my $class = shift;
307 my $path = shift or return;
308 my $data = shift; return unless defined $data;
309 my $opt = shift;
310
311 my $obj = {
312 data => $data,
313 name => '',
314 mode => MODE,
315 uid => UID,
316 gid => GID,
317 size => length $data,
318 mtime => time - TIME_OFFSET,
319 chksum => CHECK_SUM,
320 type => FILE,
321 linkname => '',
322 magic => MAGIC,
323 version => TAR_VERSION,
324 uname => UNAME->( UID ),
325 gname => GNAME->( GID ),
326 devminor => 0,
327 devmajor => 0,
328 prefix => '',
329 };
330
331 ### overwrite with user options, if provided ###
332 if( $opt and ref $opt eq 'HASH' ) {
333 for my $key ( keys %$opt ) {
334
335 ### don't write bogus options ###
336 next unless exists $obj->{$key};
337 $obj->{$key} = $opt->{$key};
338 }
339 }
340
341 bless $obj, $class;
342
343 ### fix up the prefix and file from the path
344 my($prefix,$file) = $obj->_prefix_and_file( $path );
345 $obj->prefix( $prefix );
346 $obj->name( $file );
347
348 return $obj;
349}
350
351sub _prefix_and_file {
352 my $self = shift;
353 my $path = shift;
354
355 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
356 my @dirs = File::Spec->splitdir( $dirs );
357
358 ### so sometimes the last element is '' -- probably when trailing
359 ### dir slashes are encountered... this is is of course pointless,
360 ### so remove it
361 pop @dirs while @dirs and not length $dirs[-1];
362
363 ### if it's a directory, then $file might be empty
364 $file = pop @dirs if $self->is_dir and not length $file;
365
366 my $prefix = File::Spec::Unix->catdir(
367 grep { length } $vol, @dirs
368 );
369 return( $prefix, $file );
370}
371
372sub _filetype {
373 my $self = shift;
374 my $file = shift or return;
375
376 return SYMLINK if (-l $file); # Symlink
377
378 return FILE if (-f _); # Plain file
379
380 return DIR if (-d _); # Directory
381
382 return FIFO if (-p _); # Named pipe
383
384 return SOCKET if (-S _); # Socket
385
386 return BLOCKDEV if (-b _); # Block special
387
388 return CHARDEV if (-c _); # Character special
389
390 ### shouldn't happen, this is when making archives, not reading ###
391 return LONGLINK if ( $file eq LONGLINK_NAME );
392
393 return UNKNOWN; # Something else (like what?)
394
395}
396
397### this method 'downgrades' a file to plain file -- this is used for
398### symlinks when FOLLOW_SYMLINKS is true.
399sub _downgrade_to_plainfile {
400 my $entry = shift;
401 $entry->type( FILE );
402 $entry->mode( MODE );
403 $entry->linkname('');
404
405 return 1;
406}
407
408=head2 full_path
409
410Returns the full path from the tar header; this is basically a
411concatenation of the C<prefix> and C<name> fields.
412
413=cut
414
415sub full_path {
416 my $self = shift;
417
418 ### if prefix field is emtpy
419 return $self->name unless defined $self->prefix and length $self->prefix;
420
421 ### or otherwise, catfile'd
422 return File::Spec::Unix->catfile( $self->prefix, $self->name );
423}
424
425
426=head2 validate
427
428Done by Archive::Tar internally when reading the tar file:
429validate the header against the checksum to ensure integer tar file.
430
431Returns true on success, false on failure
432
433=cut
434
435sub validate {
436 my $self = shift;
437
438 my $raw = $self->raw;
439
440 ### don't know why this one is different from the one we /write/ ###
441 substr ($raw, 148, 8) = " ";
442 return unpack ("%16C*", $raw) == $self->chksum ? 1 : 0;
443}
444
445=head2 has_content
446
447Returns a boolean to indicate whether the current object has content.
448Some special files like directories and so on never will have any
449content. This method is mainly to make sure you don't get warnings
450for using uninitialized values when looking at an object's content.
451
452=cut
453
454sub has_content {
455 my $self = shift;
456 return defined $self->data() && length $self->data() ? 1 : 0;
457}
458
459=head2 get_content
460
461Returns the current content for the in-memory file
462
463=cut
464
465sub get_content {
466 my $self = shift;
467 $self->data( );
468}
469
470=head2 get_content_by_ref
471
472Returns the current content for the in-memory file as a scalar
473reference. Normal users won't need this, but it will save memory if
474you are dealing with very large data files in your tar archive, since
475it will pass the contents by reference, rather than make a copy of it
476first.
477
478=cut
479
480sub get_content_by_ref {
481 my $self = shift;
482
483 return \$self->{data};
484}
485
486=head2 replace_content( $content )
487
488Replace the current content of the file with the new content. This
489only affects the in-memory archive, not the on-disk version until
490you write it.
491
492Returns true on success, false on failure.
493
494=cut
495
496sub replace_content {
497 my $self = shift;
498 my $data = shift || '';
499
500 $self->data( $data );
501 $self->size( length $data );
502 return 1;
503}
504
505=head2 rename( $new_name )
506
507Rename the current file to $new_name.
508
509Note that you must specify a Unix path for $new_name, since per tar
510standard, all files in the archive must be Unix paths.
511
512Returns true on success and false on failure.
513
514=cut
515
516sub rename {
517 my $self = shift;
518 my $path = shift or return;
519
520 my ($prefix,$file) = $self->_prefix_and_file( $path );
521
522 $self->name( $file );
523 $self->prefix( $prefix );
524
525 return 1;
526}
527
528=head1 Convenience methods
529
530To quickly check the type of a C<Archive::Tar::File> object, you can
531use the following methods:
532
533=over 4
534
535=item is_file
536
537Returns true if the file is of type C<file>
538
539=item is_dir
540
541Returns true if the file is of type C<dir>
542
543=item is_hardlink
544
545Returns true if the file is of type C<hardlink>
546
547=item is_symlink
548
549Returns true if the file is of type C<symlink>
550
551=item is_chardev
552
553Returns true if the file is of type C<chardev>
554
555=item is_blockdev
556
557Returns true if the file is of type C<blockdev>
558
559=item is_fifo
560
561Returns true if the file is of type C<fifo>
562
563=item is_socket
564
565Returns true if the file is of type C<socket>
566
567=item is_longlink
568
569Returns true if the file is of type C<LongLink>.
570Should not happen after a successful C<read>.
571
572=item is_label
573
574Returns true if the file is of type C<Label>.
575Should not happen after a successful C<read>.
576
577=item is_unknown
578
579Returns true if the file type is C<unknown>
580
581=back
582
583=cut
584
585#stupid perl5.5.3 needs to warn if it's not numeric
586sub is_file { local $^W; FILE == $_[0]->type }
587sub is_dir { local $^W; DIR == $_[0]->type }
588sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
589sub is_symlink { local $^W; SYMLINK == $_[0]->type }
590sub is_chardev { local $^W; CHARDEV == $_[0]->type }
591sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
592sub is_fifo { local $^W; FIFO == $_[0]->type }
593sub is_socket { local $^W; SOCKET == $_[0]->type }
594sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
595sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
596sub is_label { local $^W; LABEL eq $_[0]->type }
597
5981;