[patch@25305] t/op/arith.t fix for VMS with IEEE float
[p5sagit/p5-mst-13.2.git] / lib / Archive / Tar / File.pm
CommitLineData
39713df4 1package Archive::Tar::File;
2use strict;
3
4use IO::File;
5use File::Spec::Unix ();
6use File::Spec ();
7use File::Basename ();
8use Archive::Tar::Constant;
9
10use vars qw[@ISA $VERSION];
11@ISA = qw[Archive::Tar];
12$VERSION = '0.02';
13
14### set value to 1 to oct() it during the unpack ###
15my $tmpl = [
16 name => 0, # string
17 mode => 1, # octal
18 uid => 1, # octal
19 gid => 1, # octal
20 size => 1, # octal
21 mtime => 1, # octal
22 chksum => 1, # octal
23 type => 0, # character
24 linkname => 0, # string
25 magic => 0, # string
26 version => 0, # 2 bytes
27 uname => 0, # string
28 gname => 0, # string
29 devmajor => 1, # octal
30 devminor => 1, # octal
31 prefix => 0,
32
33### end UNPACK items ###
34 raw => 0, # the raw data chunk
35 data => 0, # the data associated with the file --
36 # This might be very memory intensive
37];
38
39### install get/set accessors for this object.
40for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
41 my $key = $tmpl->[$i];
42 no strict 'refs';
43 *{__PACKAGE__."::$key"} = sub {
44 my $self = shift;
45 $self->{$key} = $_[0] if @_;
46
47 ### just in case the key is not there or undef or something ###
48 { local $^W = 0;
49 return $self->{$key};
50 }
51 }
52}
53
54=head1 NAME
55
56Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
57
58=head1 SYNOPSIS
59
60 my @items = $tar->get_files;
61
62 print $_->name, ' ', $_->size, "\n" for @items;
63
64 print $object->get_content;
65 $object->replace_content('new content');
66
67 $object->rename( 'new/full/path/to/file.c' );
68
69=head1 DESCRIPTION
70
71Archive::Tar::Files provides a neat little object layer for in-memory
72extracted files. It's mostly used internally in Archive::Tar to tidy
73up the code, but there's no reason users shouldn't use this API as
74well.
75
76=head2 Accessors
77
78A lot of the methods in this package are accessors to the various
79fields in the tar header:
80
81=over 4
82
83=item name
84
85The file's name
86
87=item mode
88
89The file's mode
90
91=item uid
92
93The user id owning the file
94
95=item gid
96
97The group id owning the file
98
99=item size
100
101File size in bytes
102
103=item mtime
104
105Modification time. Adjusted to mac-time on MacOS if required
106
107=item chksum
108
109Checksum field for the tar header
110
111=item type
112
113File type -- numeric, but comparable to exported constants -- see
114Archive::Tar's documentation
115
116=item linkname
117
118If the file is a symlink, the file it's pointing to
119
120=item magic
121
122Tar magic string -- not useful for most users
123
124=item version
125
126Tar version string -- not useful for most users
127
128=item uname
129
130The user name that owns the file
131
132=item gname
133
134The group name that owns the file
135
136=item devmajor
137
138Device major number in case of a special file
139
140=item devminor
141
142Device minor number in case of a special file
143
144=item prefix
145
146Any directory to prefix to the extraction path, if any
147
148=item raw
149
150Raw tar header -- not useful for most users
151
152=back
153
154=head1 Methods
155
156=head2 new( file => $path )
157
158Returns a new Archive::Tar::File object from an existing file.
159
160Returns undef on failure.
161
162=head2 new( data => $path, $data, $opt )
163
164Returns a new Archive::Tar::File object from data.
165
166C<$path> defines the file name (which need not exist), C<$data> the
167file contents, and C<$opt> is a reference to a hash of attributes
168which may be used to override the default attributes (fields in the
169tar header), which are described above in the Accessors section.
170
171Returns undef on failure.
172
173=head2 new( chunk => $chunk )
174
175Returns a new Archive::Tar::File object from a raw 512-byte tar
176archive chunk.
177
178Returns undef on failure.
179
180=cut
181
182sub new {
183 my $class = shift;
184 my $what = shift;
185
186 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
187 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
188 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
189 undef;
190
191 return $obj;
192}
193
194### copies the data, creates a clone ###
195sub clone {
196 my $self = shift;
197 return bless { %$self }, ref $self;
198}
199
200sub _new_from_chunk {
201 my $class = shift;
202 my $chunk = shift or return;
203
204 ### makes it start at 0 actually... :) ###
205 my $i = -1;
206 my %entry = map {
207 $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
208 } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );
209
210 my $obj = bless \%entry, $class;
211
212 ### magic is a filetype string.. it should have something like 'ustar' or
213 ### something similar... if the chunk is garbage, skip it
214 return unless $obj->magic !~ /\W/;
215
216 ### store the original chunk ###
217 $obj->raw( $chunk );
218
219 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
220 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
221
222
223 return $obj;
224
225}
226
227sub _new_from_file {
228 my $class = shift;
229 my $path = shift or return;
230 my $type = __PACKAGE__->_filetype($path);
231 my $data = '';
232
233 unless ($type == DIR) {
234 my $fh = IO::File->new;
235 $fh->open($path) or return;
236
237 ### binmode needed to read files properly on win32 ###
238 binmode $fh;
239 $data = do { local $/; <$fh> };
240 close $fh;
241 }
242
243 my @items = qw[mode uid gid size mtime];
244 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
245
246 ### you *must* set size == 0 on symlinks, or the next entry will be
247 ### though of as the contents of the symlink, which is wrong.
248 ### this fixes bug #7937
249 $hash{size} = 0 if ($type == DIR or $type == SYMLINK);
250 $hash{mtime} -= TIME_OFFSET;
251
252 ### strip the high bits off the mode, which we don't need to store
253 $hash{mode} = STRIP_MODE->( $hash{mode} );
254
255
256 ### probably requires some file path munging here ... ###
257 ### name and prefix are set later
258 my $obj = {
259 %hash,
260 name => '',
261 chksum => CHECK_SUM,
262 type => $type,
263 linkname => ($type == SYMLINK and CAN_READLINK)
264 ? readlink $path
265 : '',
266 magic => MAGIC,
267 version => TAR_VERSION,
268 uname => UNAME->( $hash{uid} ),
269 gname => GNAME->( $hash{gid} ),
270 devmajor => 0, # not handled
271 devminor => 0, # not handled
272 prefix => '',
273 data => $data,
274 };
275
276 bless $obj, $class;
277
278 ### fix up the prefix and file from the path
279 my($prefix,$file) = $obj->_prefix_and_file( $path );
280 $obj->prefix( $prefix );
281 $obj->name( $file );
282
283 return $obj;
284}
285
286sub _new_from_data {
287 my $class = shift;
288 my $path = shift or return;
289 my $data = shift; return unless defined $data;
290 my $opt = shift;
291
292 my $obj = {
293 data => $data,
294 name => '',
295 mode => MODE,
296 uid => UID,
297 gid => GID,
298 size => length $data,
299 mtime => time - TIME_OFFSET,
300 chksum => CHECK_SUM,
301 type => FILE,
302 linkname => '',
303 magic => MAGIC,
304 version => TAR_VERSION,
305 uname => UNAME->( UID ),
306 gname => GNAME->( GID ),
307 devminor => 0,
308 devmajor => 0,
309 prefix => '',
310 };
311
312 ### overwrite with user options, if provided ###
313 if( $opt and ref $opt eq 'HASH' ) {
314 for my $key ( keys %$opt ) {
315
316 ### don't write bogus options ###
317 next unless exists $obj->{$key};
318 $obj->{$key} = $opt->{$key};
319 }
320 }
321
322 bless $obj, $class;
323
324 ### fix up the prefix and file from the path
325 my($prefix,$file) = $obj->_prefix_and_file( $path );
326 $obj->prefix( $prefix );
327 $obj->name( $file );
328
329 return $obj;
330}
331
332sub _prefix_and_file {
333 my $self = shift;
334 my $path = shift;
335
336 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
337 my @dirs = File::Spec->splitdir( $dirs );
338
339 ### so sometimes the last element is '' -- probably when trailing
340 ### dir slashes are encountered... this is is of course pointless,
341 ### so remove it
342 pop @dirs while @dirs and not length $dirs[-1];
343
344 ### if it's a directory, then $file might be empty
345 $file = pop @dirs if $self->is_dir and not length $file;
346
347 my $prefix = File::Spec::Unix->catdir(
348 grep { length } $vol, @dirs
349 );
350 return( $prefix, $file );
351}
352
353sub _filetype {
354 my $self = shift;
355 my $file = shift or return;
356
357 return SYMLINK if (-l $file); # Symlink
358
359 return FILE if (-f _); # Plain file
360
361 return DIR if (-d _); # Directory
362
363 return FIFO if (-p _); # Named pipe
364
365 return SOCKET if (-S _); # Socket
366
367 return BLOCKDEV if (-b _); # Block special
368
369 return CHARDEV if (-c _); # Character special
370
371 ### shouldn't happen, this is when making archives, not reading ###
372 return LONGLINK if ( $file eq LONGLINK_NAME );
373
374 return UNKNOWN; # Something else (like what?)
375
376}
377
378### this method 'downgrades' a file to plain file -- this is used for
379### symlinks when FOLLOW_SYMLINKS is true.
380sub _downgrade_to_plainfile {
381 my $entry = shift;
382 $entry->type( FILE );
383 $entry->mode( MODE );
384 $entry->linkname('');
385
386 return 1;
387}
388
389=head2 full_path
390
391Returns the full path from the tar header; this is basically a
392concatenation of the C<prefix> and C<name> fields.
393
394=cut
395
396sub full_path {
397 my $self = shift;
398
399 ### if prefix field is emtpy
400 return $self->name unless defined $self->prefix and length $self->prefix;
401
402 ### or otherwise, catfile'd
403 return File::Spec::Unix->catfile( $self->prefix, $self->name );
404}
405
406
407=head2 validate
408
409Done by Archive::Tar internally when reading the tar file:
410validate the header against the checksum to ensure integer tar file.
411
412Returns true on success, false on failure
413
414=cut
415
416sub validate {
417 my $self = shift;
418
419 my $raw = $self->raw;
420
421 ### don't know why this one is different from the one we /write/ ###
422 substr ($raw, 148, 8) = " ";
423 return unpack ("%16C*", $raw) == $self->chksum ? 1 : 0;
424}
425
426=head2 has_content
427
428Returns a boolean to indicate whether the current object has content.
429Some special files like directories and so on never will have any
430content. This method is mainly to make sure you don't get warnings
431for using uninitialized values when looking at an object's content.
432
433=cut
434
435sub has_content {
436 my $self = shift;
437 return defined $self->data() && length $self->data() ? 1 : 0;
438}
439
440=head2 get_content
441
442Returns the current content for the in-memory file
443
444=cut
445
446sub get_content {
447 my $self = shift;
448 $self->data( );
449}
450
451=head2 get_content_by_ref
452
453Returns the current content for the in-memory file as a scalar
454reference. Normal users won't need this, but it will save memory if
455you are dealing with very large data files in your tar archive, since
456it will pass the contents by reference, rather than make a copy of it
457first.
458
459=cut
460
461sub get_content_by_ref {
462 my $self = shift;
463
464 return \$self->{data};
465}
466
467=head2 replace_content( $content )
468
469Replace the current content of the file with the new content. This
470only affects the in-memory archive, not the on-disk version until
471you write it.
472
473Returns true on success, false on failure.
474
475=cut
476
477sub replace_content {
478 my $self = shift;
479 my $data = shift || '';
480
481 $self->data( $data );
482 $self->size( length $data );
483 return 1;
484}
485
486=head2 rename( $new_name )
487
488Rename the current file to $new_name.
489
490Note that you must specify a Unix path for $new_name, since per tar
491standard, all files in the archive must be Unix paths.
492
493Returns true on success and false on failure.
494
495=cut
496
497sub rename {
498 my $self = shift;
499 my $path = shift or return;
500
501 my ($prefix,$file) = $self->_prefix_and_file( $path );
502
503 $self->name( $file );
504 $self->prefix( $prefix );
505
506 return 1;
507}
508
509=head1 Convenience methods
510
511To quickly check the type of a C<Archive::Tar::File> object, you can
512use the following methods:
513
514=over 4
515
516=item is_file
517
518Returns true if the file is of type C<file>
519
520=item is_dir
521
522Returns true if the file is of type C<dir>
523
524=item is_hardlink
525
526Returns true if the file is of type C<hardlink>
527
528=item is_symlink
529
530Returns true if the file is of type C<symlink>
531
532=item is_chardev
533
534Returns true if the file is of type C<chardev>
535
536=item is_blockdev
537
538Returns true if the file is of type C<blockdev>
539
540=item is_fifo
541
542Returns true if the file is of type C<fifo>
543
544=item is_socket
545
546Returns true if the file is of type C<socket>
547
548=item is_longlink
549
550Returns true if the file is of type C<LongLink>.
551Should not happen after a successful C<read>.
552
553=item is_label
554
555Returns true if the file is of type C<Label>.
556Should not happen after a successful C<read>.
557
558=item is_unknown
559
560Returns true if the file type is C<unknown>
561
562=back
563
564=cut
565
566#stupid perl5.5.3 needs to warn if it's not numeric
567sub is_file { local $^W; FILE == $_[0]->type }
568sub is_dir { local $^W; DIR == $_[0]->type }
569sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
570sub is_symlink { local $^W; SYMLINK == $_[0]->type }
571sub is_chardev { local $^W; CHARDEV == $_[0]->type }
572sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
573sub is_fifo { local $^W; FIFO == $_[0]->type }
574sub is_socket { local $^W; SOCKET == $_[0]->type }
575sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
576sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
577sub is_label { local $^W; LABEL eq $_[0]->type }
578
5791;