Re: [perl #46011] [RESOLVED] overload "0+" doesn't handle integer results
[p5sagit/p5-mst-13.2.git] / lib / Archive / Tar / File.pm
CommitLineData
39713df4 1package Archive::Tar::File;
2use strict;
3
4use IO::File;
81a5970e 5use File::Spec::Unix ();
6use File::Spec ();
7use File::Basename ();
8
39713df4 9use Archive::Tar::Constant;
10
11use vars qw[@ISA $VERSION];
12@ISA = qw[Archive::Tar];
13$VERSION = '0.02';
14
15### set value to 1 to oct() it during the unpack ###
16my $tmpl = [
17 name => 0, # string
18 mode => 1, # octal
19 uid => 1, # octal
20 gid => 1, # octal
21 size => 1, # octal
22 mtime => 1, # octal
23 chksum => 1, # octal
24 type => 0, # character
25 linkname => 0, # string
26 magic => 0, # string
27 version => 0, # 2 bytes
28 uname => 0, # string
29 gname => 0, # string
30 devmajor => 1, # octal
31 devminor => 1, # octal
32 prefix => 0,
33
34### end UNPACK items ###
35 raw => 0, # the raw data chunk
36 data => 0, # the data associated with the file --
37 # This might be very memory intensive
38];
39
40### install get/set accessors for this object.
41for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
42 my $key = $tmpl->[$i];
43 no strict 'refs';
44 *{__PACKAGE__."::$key"} = sub {
45 my $self = shift;
46 $self->{$key} = $_[0] if @_;
47
48 ### just in case the key is not there or undef or something ###
49 { local $^W = 0;
50 return $self->{$key};
51 }
52 }
53}
54
55=head1 NAME
56
57Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
58
59=head1 SYNOPSIS
60
61 my @items = $tar->get_files;
62
63 print $_->name, ' ', $_->size, "\n" for @items;
64
65 print $object->get_content;
66 $object->replace_content('new content');
67
68 $object->rename( 'new/full/path/to/file.c' );
69
70=head1 DESCRIPTION
71
72Archive::Tar::Files provides a neat little object layer for in-memory
73extracted files. It's mostly used internally in Archive::Tar to tidy
74up the code, but there's no reason users shouldn't use this API as
75well.
76
77=head2 Accessors
78
79A lot of the methods in this package are accessors to the various
80fields in the tar header:
81
82=over 4
83
84=item name
85
86The file's name
87
88=item mode
89
90The file's mode
91
92=item uid
93
94The user id owning the file
95
96=item gid
97
98The group id owning the file
99
100=item size
101
102File size in bytes
103
104=item mtime
105
106Modification time. Adjusted to mac-time on MacOS if required
107
108=item chksum
109
110Checksum field for the tar header
111
112=item type
113
114File type -- numeric, but comparable to exported constants -- see
115Archive::Tar's documentation
116
117=item linkname
118
119If the file is a symlink, the file it's pointing to
120
121=item magic
122
123Tar magic string -- not useful for most users
124
125=item version
126
127Tar version string -- not useful for most users
128
129=item uname
130
131The user name that owns the file
132
133=item gname
134
135The group name that owns the file
136
137=item devmajor
138
139Device major number in case of a special file
140
141=item devminor
142
143Device minor number in case of a special file
144
145=item prefix
146
147Any directory to prefix to the extraction path, if any
148
149=item raw
150
151Raw tar header -- not useful for most users
152
153=back
154
155=head1 Methods
156
157=head2 new( file => $path )
158
159Returns a new Archive::Tar::File object from an existing file.
160
161Returns undef on failure.
162
163=head2 new( data => $path, $data, $opt )
164
165Returns a new Archive::Tar::File object from data.
166
167C<$path> defines the file name (which need not exist), C<$data> the
168file contents, and C<$opt> is a reference to a hash of attributes
169which may be used to override the default attributes (fields in the
170tar header), which are described above in the Accessors section.
171
172Returns undef on failure.
173
174=head2 new( chunk => $chunk )
175
176Returns a new Archive::Tar::File object from a raw 512-byte tar
177archive chunk.
178
179Returns undef on failure.
180
181=cut
182
183sub new {
184 my $class = shift;
185 my $what = shift;
186
187 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
188 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
189 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
190 undef;
191
192 return $obj;
193}
194
195### copies the data, creates a clone ###
196sub clone {
197 my $self = shift;
198 return bless { %$self }, ref $self;
199}
200
201sub _new_from_chunk {
202 my $class = shift;
01d11a1c 203 my $chunk = shift or return; # 512 bytes of tar header
81a5970e 204 my %hash = @_;
205
206 ### filter any arguments on defined-ness of values.
207 ### this allows overriding from what the tar-header is saying
208 ### about this tar-entry. Particularly useful for @LongLink files
209 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
39713df4 210
211 ### makes it start at 0 actually... :) ###
212 my $i = -1;
213 my %entry = map {
214 $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
215 } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );
216
81a5970e 217 my $obj = bless { %entry, %args }, $class;
39713df4 218
219 ### magic is a filetype string.. it should have something like 'ustar' or
220 ### something similar... if the chunk is garbage, skip it
221 return unless $obj->magic !~ /\W/;
222
223 ### store the original chunk ###
224 $obj->raw( $chunk );
225
226 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
227 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
228
229
230 return $obj;
231
232}
233
234sub _new_from_file {
235 my $class = shift;
01d11a1c 236 my $path = shift;
237
238 ### path has to at least exist
239 return unless defined $path;
240
39713df4 241 my $type = __PACKAGE__->_filetype($path);
242 my $data = '';
243
97a504ba 244 READ: {
245 unless ($type == DIR ) {
246 my $fh = IO::File->new;
247
248 unless( $fh->open($path) ) {
249 ### dangling symlinks are fine, stop reading but continue
250 ### creating the object
251 last READ if $type == SYMLINK;
252
253 ### otherwise, return from this function --
254 ### anything that's *not* a symlink should be
255 ### resolvable
256 return;
257 }
258
259 ### binmode needed to read files properly on win32 ###
260 binmode $fh;
261 $data = do { local $/; <$fh> };
262 close $fh;
263 }
39713df4 264 }
265
266 my @items = qw[mode uid gid size mtime];
267 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
268
269 ### you *must* set size == 0 on symlinks, or the next entry will be
270 ### though of as the contents of the symlink, which is wrong.
271 ### this fixes bug #7937
272 $hash{size} = 0 if ($type == DIR or $type == SYMLINK);
273 $hash{mtime} -= TIME_OFFSET;
274
275 ### strip the high bits off the mode, which we don't need to store
276 $hash{mode} = STRIP_MODE->( $hash{mode} );
277
278
279 ### probably requires some file path munging here ... ###
280 ### name and prefix are set later
281 my $obj = {
282 %hash,
283 name => '',
284 chksum => CHECK_SUM,
285 type => $type,
286 linkname => ($type == SYMLINK and CAN_READLINK)
287 ? readlink $path
288 : '',
289 magic => MAGIC,
290 version => TAR_VERSION,
291 uname => UNAME->( $hash{uid} ),
292 gname => GNAME->( $hash{gid} ),
293 devmajor => 0, # not handled
294 devminor => 0, # not handled
295 prefix => '',
296 data => $data,
297 };
298
299 bless $obj, $class;
300
301 ### fix up the prefix and file from the path
302 my($prefix,$file) = $obj->_prefix_and_file( $path );
303 $obj->prefix( $prefix );
304 $obj->name( $file );
305
306 return $obj;
307}
308
309sub _new_from_data {
310 my $class = shift;
01d11a1c 311 my $path = shift; return unless defined $path;
39713df4 312 my $data = shift; return unless defined $data;
313 my $opt = shift;
314
315 my $obj = {
316 data => $data,
317 name => '',
318 mode => MODE,
319 uid => UID,
320 gid => GID,
321 size => length $data,
322 mtime => time - TIME_OFFSET,
323 chksum => CHECK_SUM,
324 type => FILE,
325 linkname => '',
326 magic => MAGIC,
327 version => TAR_VERSION,
328 uname => UNAME->( UID ),
329 gname => GNAME->( GID ),
330 devminor => 0,
331 devmajor => 0,
332 prefix => '',
333 };
334
335 ### overwrite with user options, if provided ###
336 if( $opt and ref $opt eq 'HASH' ) {
337 for my $key ( keys %$opt ) {
338
339 ### don't write bogus options ###
340 next unless exists $obj->{$key};
341 $obj->{$key} = $opt->{$key};
342 }
343 }
344
345 bless $obj, $class;
346
347 ### fix up the prefix and file from the path
348 my($prefix,$file) = $obj->_prefix_and_file( $path );
349 $obj->prefix( $prefix );
350 $obj->name( $file );
351
352 return $obj;
353}
354
355sub _prefix_and_file {
356 my $self = shift;
357 my $path = shift;
358
359 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
360 my @dirs = File::Spec->splitdir( $dirs );
361
362 ### so sometimes the last element is '' -- probably when trailing
363 ### dir slashes are encountered... this is is of course pointless,
364 ### so remove it
365 pop @dirs while @dirs and not length $dirs[-1];
366
367 ### if it's a directory, then $file might be empty
368 $file = pop @dirs if $self->is_dir and not length $file;
369
370 my $prefix = File::Spec::Unix->catdir(
371 grep { length } $vol, @dirs
372 );
373 return( $prefix, $file );
374}
375
376sub _filetype {
377 my $self = shift;
01d11a1c 378 my $file = shift;
379
380 return unless defined $file;
39713df4 381
382 return SYMLINK if (-l $file); # Symlink
383
384 return FILE if (-f _); # Plain file
385
386 return DIR if (-d _); # Directory
387
388 return FIFO if (-p _); # Named pipe
389
390 return SOCKET if (-S _); # Socket
391
392 return BLOCKDEV if (-b _); # Block special
393
394 return CHARDEV if (-c _); # Character special
395
396 ### shouldn't happen, this is when making archives, not reading ###
397 return LONGLINK if ( $file eq LONGLINK_NAME );
398
399 return UNKNOWN; # Something else (like what?)
400
401}
402
403### this method 'downgrades' a file to plain file -- this is used for
404### symlinks when FOLLOW_SYMLINKS is true.
405sub _downgrade_to_plainfile {
406 my $entry = shift;
407 $entry->type( FILE );
408 $entry->mode( MODE );
409 $entry->linkname('');
410
411 return 1;
412}
413
414=head2 full_path
415
416Returns the full path from the tar header; this is basically a
417concatenation of the C<prefix> and C<name> fields.
418
419=cut
420
421sub full_path {
422 my $self = shift;
423
424 ### if prefix field is emtpy
425 return $self->name unless defined $self->prefix and length $self->prefix;
426
427 ### or otherwise, catfile'd
428 return File::Spec::Unix->catfile( $self->prefix, $self->name );
429}
430
431
432=head2 validate
433
434Done by Archive::Tar internally when reading the tar file:
435validate the header against the checksum to ensure integer tar file.
436
437Returns true on success, false on failure
438
439=cut
440
441sub validate {
442 my $self = shift;
443
444 my $raw = $self->raw;
445
446 ### don't know why this one is different from the one we /write/ ###
447 substr ($raw, 148, 8) = " ";
448 return unpack ("%16C*", $raw) == $self->chksum ? 1 : 0;
449}
450
451=head2 has_content
452
453Returns a boolean to indicate whether the current object has content.
454Some special files like directories and so on never will have any
455content. This method is mainly to make sure you don't get warnings
456for using uninitialized values when looking at an object's content.
457
458=cut
459
460sub has_content {
461 my $self = shift;
462 return defined $self->data() && length $self->data() ? 1 : 0;
463}
464
465=head2 get_content
466
467Returns the current content for the in-memory file
468
469=cut
470
471sub get_content {
472 my $self = shift;
473 $self->data( );
474}
475
476=head2 get_content_by_ref
477
478Returns the current content for the in-memory file as a scalar
479reference. Normal users won't need this, but it will save memory if
480you are dealing with very large data files in your tar archive, since
481it will pass the contents by reference, rather than make a copy of it
482first.
483
484=cut
485
486sub get_content_by_ref {
487 my $self = shift;
488
489 return \$self->{data};
490}
491
492=head2 replace_content( $content )
493
494Replace the current content of the file with the new content. This
495only affects the in-memory archive, not the on-disk version until
496you write it.
497
498Returns true on success, false on failure.
499
500=cut
501
502sub replace_content {
503 my $self = shift;
504 my $data = shift || '';
505
506 $self->data( $data );
507 $self->size( length $data );
508 return 1;
509}
510
511=head2 rename( $new_name )
512
513Rename the current file to $new_name.
514
515Note that you must specify a Unix path for $new_name, since per tar
516standard, all files in the archive must be Unix paths.
517
518Returns true on success and false on failure.
519
520=cut
521
522sub rename {
523 my $self = shift;
01d11a1c 524 my $path = shift;
525
526 return unless defined $path;
39713df4 527
528 my ($prefix,$file) = $self->_prefix_and_file( $path );
529
530 $self->name( $file );
531 $self->prefix( $prefix );
532
533 return 1;
534}
535
536=head1 Convenience methods
537
538To quickly check the type of a C<Archive::Tar::File> object, you can
539use the following methods:
540
541=over 4
542
543=item is_file
544
545Returns true if the file is of type C<file>
546
547=item is_dir
548
549Returns true if the file is of type C<dir>
550
551=item is_hardlink
552
553Returns true if the file is of type C<hardlink>
554
555=item is_symlink
556
557Returns true if the file is of type C<symlink>
558
559=item is_chardev
560
561Returns true if the file is of type C<chardev>
562
563=item is_blockdev
564
565Returns true if the file is of type C<blockdev>
566
567=item is_fifo
568
569Returns true if the file is of type C<fifo>
570
571=item is_socket
572
573Returns true if the file is of type C<socket>
574
575=item is_longlink
576
577Returns true if the file is of type C<LongLink>.
578Should not happen after a successful C<read>.
579
580=item is_label
581
582Returns true if the file is of type C<Label>.
583Should not happen after a successful C<read>.
584
585=item is_unknown
586
587Returns true if the file type is C<unknown>
588
589=back
590
591=cut
592
593#stupid perl5.5.3 needs to warn if it's not numeric
594sub is_file { local $^W; FILE == $_[0]->type }
595sub is_dir { local $^W; DIR == $_[0]->type }
596sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
597sub is_symlink { local $^W; SYMLINK == $_[0]->type }
598sub is_chardev { local $^W; CHARDEV == $_[0]->type }
599sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
600sub is_fifo { local $^W; FIFO == $_[0]->type }
601sub is_socket { local $^W; SOCKET == $_[0]->type }
602sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
603sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
604sub is_label { local $^W; LABEL eq $_[0]->type }
605
6061;