Commit | Line | Data |
39713df4 |
1 | package Archive::Tar::File; |
2 | use strict; |
3 | |
642eb381 |
4 | use Carp (); |
39713df4 |
5 | use IO::File; |
81a5970e |
6 | use File::Spec::Unix (); |
7 | use File::Spec (); |
8 | use File::Basename (); |
9 | |
642eb381 |
10 | ### avoid circular use, so only require; |
11 | require Archive::Tar; |
39713df4 |
12 | use Archive::Tar::Constant; |
13 | |
14 | use vars qw[@ISA $VERSION]; |
642eb381 |
15 | #@ISA = qw[Archive::Tar]; |
39713df4 |
16 | $VERSION = '0.02'; |
17 | |
18 | ### set value to 1 to oct() it during the unpack ### |
19 | my $tmpl = [ |
20 | name => 0, # string |
21 | mode => 1, # octal |
22 | uid => 1, # octal |
23 | gid => 1, # octal |
24 | size => 1, # octal |
25 | mtime => 1, # octal |
26 | chksum => 1, # octal |
27 | type => 0, # character |
28 | linkname => 0, # string |
29 | magic => 0, # string |
30 | version => 0, # 2 bytes |
31 | uname => 0, # string |
32 | gname => 0, # string |
33 | devmajor => 1, # octal |
34 | devminor => 1, # octal |
35 | prefix => 0, |
36 | |
37 | ### end UNPACK items ### |
38 | raw => 0, # the raw data chunk |
39 | data => 0, # the data associated with the file -- |
40 | # This might be very memory intensive |
41 | ]; |
42 | |
43 | ### install get/set accessors for this object. |
44 | for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) { |
45 | my $key = $tmpl->[$i]; |
46 | no strict 'refs'; |
47 | *{__PACKAGE__."::$key"} = sub { |
48 | my $self = shift; |
49 | $self->{$key} = $_[0] if @_; |
50 | |
51 | ### just in case the key is not there or undef or something ### |
52 | { local $^W = 0; |
53 | return $self->{$key}; |
54 | } |
55 | } |
56 | } |
57 | |
58 | =head1 NAME |
59 | |
60 | Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar |
61 | |
62 | =head1 SYNOPSIS |
63 | |
64 | my @items = $tar->get_files; |
65 | |
66 | print $_->name, ' ', $_->size, "\n" for @items; |
67 | |
68 | print $object->get_content; |
69 | $object->replace_content('new content'); |
70 | |
71 | $object->rename( 'new/full/path/to/file.c' ); |
72 | |
73 | =head1 DESCRIPTION |
74 | |
75 | Archive::Tar::Files provides a neat little object layer for in-memory |
76 | extracted files. It's mostly used internally in Archive::Tar to tidy |
77 | up the code, but there's no reason users shouldn't use this API as |
78 | well. |
79 | |
80 | =head2 Accessors |
81 | |
82 | A lot of the methods in this package are accessors to the various |
83 | fields in the tar header: |
84 | |
85 | =over 4 |
86 | |
87 | =item name |
88 | |
89 | The file's name |
90 | |
91 | =item mode |
92 | |
93 | The file's mode |
94 | |
95 | =item uid |
96 | |
97 | The user id owning the file |
98 | |
99 | =item gid |
100 | |
101 | The group id owning the file |
102 | |
103 | =item size |
104 | |
105 | File size in bytes |
106 | |
107 | =item mtime |
108 | |
109 | Modification time. Adjusted to mac-time on MacOS if required |
110 | |
111 | =item chksum |
112 | |
113 | Checksum field for the tar header |
114 | |
115 | =item type |
116 | |
117 | File type -- numeric, but comparable to exported constants -- see |
118 | Archive::Tar's documentation |
119 | |
120 | =item linkname |
121 | |
122 | If the file is a symlink, the file it's pointing to |
123 | |
124 | =item magic |
125 | |
126 | Tar magic string -- not useful for most users |
127 | |
128 | =item version |
129 | |
130 | Tar version string -- not useful for most users |
131 | |
132 | =item uname |
133 | |
134 | The user name that owns the file |
135 | |
136 | =item gname |
137 | |
138 | The group name that owns the file |
139 | |
140 | =item devmajor |
141 | |
142 | Device major number in case of a special file |
143 | |
144 | =item devminor |
145 | |
146 | Device minor number in case of a special file |
147 | |
148 | =item prefix |
149 | |
150 | Any directory to prefix to the extraction path, if any |
151 | |
152 | =item raw |
153 | |
154 | Raw tar header -- not useful for most users |
155 | |
156 | =back |
157 | |
158 | =head1 Methods |
159 | |
642eb381 |
160 | =head2 Archive::Tar::File->new( file => $path ) |
39713df4 |
161 | |
162 | Returns a new Archive::Tar::File object from an existing file. |
163 | |
164 | Returns undef on failure. |
165 | |
642eb381 |
166 | =head2 Archive::Tar::File->new( data => $path, $data, $opt ) |
39713df4 |
167 | |
168 | Returns a new Archive::Tar::File object from data. |
169 | |
170 | C<$path> defines the file name (which need not exist), C<$data> the |
171 | file contents, and C<$opt> is a reference to a hash of attributes |
172 | which may be used to override the default attributes (fields in the |
173 | tar header), which are described above in the Accessors section. |
174 | |
175 | Returns undef on failure. |
176 | |
642eb381 |
177 | =head2 Archive::Tar::File->new( chunk => $chunk ) |
39713df4 |
178 | |
179 | Returns a new Archive::Tar::File object from a raw 512-byte tar |
180 | archive chunk. |
181 | |
182 | Returns undef on failure. |
183 | |
184 | =cut |
185 | |
186 | sub new { |
187 | my $class = shift; |
188 | my $what = shift; |
189 | |
190 | my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) : |
191 | ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) : |
192 | ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) : |
193 | undef; |
194 | |
195 | return $obj; |
196 | } |
197 | |
198 | ### copies the data, creates a clone ### |
199 | sub clone { |
200 | my $self = shift; |
201 | return bless { %$self }, ref $self; |
202 | } |
203 | |
204 | sub _new_from_chunk { |
205 | my $class = shift; |
01d11a1c |
206 | my $chunk = shift or return; # 512 bytes of tar header |
81a5970e |
207 | my %hash = @_; |
208 | |
209 | ### filter any arguments on defined-ness of values. |
210 | ### this allows overriding from what the tar-header is saying |
211 | ### about this tar-entry. Particularly useful for @LongLink files |
212 | my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash; |
39713df4 |
213 | |
214 | ### makes it start at 0 actually... :) ### |
215 | my $i = -1; |
216 | my %entry = map { |
217 | $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ |
218 | } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); |
219 | |
81a5970e |
220 | my $obj = bless { %entry, %args }, $class; |
39713df4 |
221 | |
222 | ### magic is a filetype string.. it should have something like 'ustar' or |
223 | ### something similar... if the chunk is garbage, skip it |
224 | return unless $obj->magic !~ /\W/; |
225 | |
226 | ### store the original chunk ### |
227 | $obj->raw( $chunk ); |
228 | |
229 | $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) ); |
230 | $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) ); |
231 | |
232 | |
233 | return $obj; |
234 | |
235 | } |
236 | |
237 | sub _new_from_file { |
238 | my $class = shift; |
01d11a1c |
239 | my $path = shift; |
240 | |
241 | ### path has to at least exist |
242 | return unless defined $path; |
243 | |
39713df4 |
244 | my $type = __PACKAGE__->_filetype($path); |
245 | my $data = ''; |
246 | |
97a504ba |
247 | READ: { |
248 | unless ($type == DIR ) { |
249 | my $fh = IO::File->new; |
250 | |
251 | unless( $fh->open($path) ) { |
252 | ### dangling symlinks are fine, stop reading but continue |
253 | ### creating the object |
254 | last READ if $type == SYMLINK; |
255 | |
256 | ### otherwise, return from this function -- |
257 | ### anything that's *not* a symlink should be |
258 | ### resolvable |
259 | return; |
260 | } |
261 | |
262 | ### binmode needed to read files properly on win32 ### |
263 | binmode $fh; |
264 | $data = do { local $/; <$fh> }; |
265 | close $fh; |
266 | } |
39713df4 |
267 | } |
268 | |
269 | my @items = qw[mode uid gid size mtime]; |
270 | my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; |
271 | |
642eb381 |
272 | if (ON_VMS) { |
273 | ### VMS has two UID modes, traditional and POSIX. Normally POSIX is |
274 | ### not used. We currently do not have an easy way to see if we are in |
275 | ### POSIX mode. In traditional mode, the UID is actually the VMS UIC. |
276 | ### The VMS UIC has the upper 16 bits is the GID, which in many cases |
277 | ### the VMS UIC will be larger than 209715, the largest that TAR can |
278 | ### handle. So for now, assume it is traditional if the UID is larger |
279 | ### than 0x10000. |
280 | |
281 | if ($hash{uid} > 0x10000) { |
282 | $hash{uid} = $hash{uid} & 0xFFFF; |
283 | } |
284 | |
285 | ### The file length from stat() is the physical length of the file |
286 | ### However the amount of data read in may be more for some file types. |
287 | ### Fixed length files are read past the logical EOF to end of the block |
288 | ### containing. Other file types get expanded on read because record |
289 | ### delimiters are added. |
290 | |
291 | my $data_len = length $data; |
292 | $hash{size} = $data_len if $hash{size} < $data_len; |
293 | |
294 | } |
39713df4 |
295 | ### you *must* set size == 0 on symlinks, or the next entry will be |
296 | ### though of as the contents of the symlink, which is wrong. |
297 | ### this fixes bug #7937 |
298 | $hash{size} = 0 if ($type == DIR or $type == SYMLINK); |
299 | $hash{mtime} -= TIME_OFFSET; |
300 | |
301 | ### strip the high bits off the mode, which we don't need to store |
302 | $hash{mode} = STRIP_MODE->( $hash{mode} ); |
303 | |
304 | |
305 | ### probably requires some file path munging here ... ### |
306 | ### name and prefix are set later |
307 | my $obj = { |
308 | %hash, |
309 | name => '', |
310 | chksum => CHECK_SUM, |
311 | type => $type, |
312 | linkname => ($type == SYMLINK and CAN_READLINK) |
313 | ? readlink $path |
314 | : '', |
315 | magic => MAGIC, |
316 | version => TAR_VERSION, |
317 | uname => UNAME->( $hash{uid} ), |
318 | gname => GNAME->( $hash{gid} ), |
319 | devmajor => 0, # not handled |
320 | devminor => 0, # not handled |
321 | prefix => '', |
322 | data => $data, |
323 | }; |
324 | |
325 | bless $obj, $class; |
326 | |
327 | ### fix up the prefix and file from the path |
328 | my($prefix,$file) = $obj->_prefix_and_file( $path ); |
329 | $obj->prefix( $prefix ); |
330 | $obj->name( $file ); |
331 | |
332 | return $obj; |
333 | } |
334 | |
335 | sub _new_from_data { |
336 | my $class = shift; |
01d11a1c |
337 | my $path = shift; return unless defined $path; |
39713df4 |
338 | my $data = shift; return unless defined $data; |
339 | my $opt = shift; |
340 | |
341 | my $obj = { |
342 | data => $data, |
343 | name => '', |
344 | mode => MODE, |
345 | uid => UID, |
346 | gid => GID, |
347 | size => length $data, |
348 | mtime => time - TIME_OFFSET, |
349 | chksum => CHECK_SUM, |
350 | type => FILE, |
351 | linkname => '', |
352 | magic => MAGIC, |
353 | version => TAR_VERSION, |
354 | uname => UNAME->( UID ), |
355 | gname => GNAME->( GID ), |
356 | devminor => 0, |
357 | devmajor => 0, |
358 | prefix => '', |
359 | }; |
360 | |
361 | ### overwrite with user options, if provided ### |
362 | if( $opt and ref $opt eq 'HASH' ) { |
363 | for my $key ( keys %$opt ) { |
364 | |
365 | ### don't write bogus options ### |
366 | next unless exists $obj->{$key}; |
367 | $obj->{$key} = $opt->{$key}; |
368 | } |
369 | } |
370 | |
371 | bless $obj, $class; |
372 | |
373 | ### fix up the prefix and file from the path |
374 | my($prefix,$file) = $obj->_prefix_and_file( $path ); |
375 | $obj->prefix( $prefix ); |
376 | $obj->name( $file ); |
377 | |
378 | return $obj; |
379 | } |
380 | |
381 | sub _prefix_and_file { |
382 | my $self = shift; |
383 | my $path = shift; |
384 | |
385 | my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir ); |
386 | my @dirs = File::Spec->splitdir( $dirs ); |
387 | |
388 | ### so sometimes the last element is '' -- probably when trailing |
389 | ### dir slashes are encountered... this is is of course pointless, |
390 | ### so remove it |
391 | pop @dirs while @dirs and not length $dirs[-1]; |
392 | |
393 | ### if it's a directory, then $file might be empty |
394 | $file = pop @dirs if $self->is_dir and not length $file; |
395 | |
f5695358 |
396 | ### splitting ../ gives you the relative path in native syntax |
397 | map { $_ = '..' if $_ eq '-' } @dirs if ON_VMS; |
398 | |
39713df4 |
399 | my $prefix = File::Spec::Unix->catdir( |
400 | grep { length } $vol, @dirs |
401 | ); |
402 | return( $prefix, $file ); |
403 | } |
404 | |
405 | sub _filetype { |
406 | my $self = shift; |
01d11a1c |
407 | my $file = shift; |
408 | |
409 | return unless defined $file; |
39713df4 |
410 | |
411 | return SYMLINK if (-l $file); # Symlink |
412 | |
413 | return FILE if (-f _); # Plain file |
414 | |
415 | return DIR if (-d _); # Directory |
416 | |
417 | return FIFO if (-p _); # Named pipe |
418 | |
419 | return SOCKET if (-S _); # Socket |
420 | |
421 | return BLOCKDEV if (-b _); # Block special |
422 | |
423 | return CHARDEV if (-c _); # Character special |
424 | |
425 | ### shouldn't happen, this is when making archives, not reading ### |
426 | return LONGLINK if ( $file eq LONGLINK_NAME ); |
427 | |
428 | return UNKNOWN; # Something else (like what?) |
429 | |
430 | } |
431 | |
432 | ### this method 'downgrades' a file to plain file -- this is used for |
433 | ### symlinks when FOLLOW_SYMLINKS is true. |
434 | sub _downgrade_to_plainfile { |
435 | my $entry = shift; |
436 | $entry->type( FILE ); |
437 | $entry->mode( MODE ); |
438 | $entry->linkname(''); |
439 | |
440 | return 1; |
441 | } |
442 | |
642eb381 |
443 | =head2 $bool = $file->extract( [ $alternative_name ] ) |
444 | |
445 | Extract this object, optionally to an alternative name. |
446 | |
447 | See C<< Archive::Tar->extract_file >> for details. |
448 | |
449 | Returns true on success and false on failure. |
450 | |
451 | =cut |
452 | |
453 | sub extract { |
454 | my $self = shift; |
455 | |
456 | local $Carp::CarpLevel += 1; |
457 | |
458 | return Archive::Tar->_extract_file( $self, @_ ); |
459 | } |
460 | |
461 | =head2 $path = $file->full_path |
39713df4 |
462 | |
463 | Returns the full path from the tar header; this is basically a |
464 | concatenation of the C<prefix> and C<name> fields. |
465 | |
466 | =cut |
467 | |
468 | sub full_path { |
469 | my $self = shift; |
470 | |
471 | ### if prefix field is emtpy |
472 | return $self->name unless defined $self->prefix and length $self->prefix; |
473 | |
474 | ### or otherwise, catfile'd |
475 | return File::Spec::Unix->catfile( $self->prefix, $self->name ); |
476 | } |
477 | |
478 | |
642eb381 |
479 | =head2 $bool = $file->validate |
39713df4 |
480 | |
481 | Done by Archive::Tar internally when reading the tar file: |
482 | validate the header against the checksum to ensure integer tar file. |
483 | |
484 | Returns true on success, false on failure |
485 | |
486 | =cut |
487 | |
488 | sub validate { |
489 | my $self = shift; |
490 | |
491 | my $raw = $self->raw; |
492 | |
493 | ### don't know why this one is different from the one we /write/ ### |
494 | substr ($raw, 148, 8) = " "; |
bef46b70 |
495 | |
496 | ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar |
497 | ### like GNU tar does. See here for details: |
498 | ### http://www.gnu.org/software/tar/manual/tar.html#SEC139 |
499 | ### so we do both a signed AND unsigned validate. if one succeeds, that's |
500 | ### good enough |
501 | return ( (unpack ("%16C*", $raw) == $self->chksum) |
502 | or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0; |
39713df4 |
503 | } |
504 | |
642eb381 |
505 | =head2 $bool = $file->has_content |
39713df4 |
506 | |
507 | Returns a boolean to indicate whether the current object has content. |
508 | Some special files like directories and so on never will have any |
509 | content. This method is mainly to make sure you don't get warnings |
510 | for using uninitialized values when looking at an object's content. |
511 | |
512 | =cut |
513 | |
514 | sub has_content { |
515 | my $self = shift; |
516 | return defined $self->data() && length $self->data() ? 1 : 0; |
517 | } |
518 | |
642eb381 |
519 | =head2 $content = $file->get_content |
39713df4 |
520 | |
521 | Returns the current content for the in-memory file |
522 | |
523 | =cut |
524 | |
525 | sub get_content { |
526 | my $self = shift; |
527 | $self->data( ); |
528 | } |
529 | |
642eb381 |
530 | =head2 $cref = $file->get_content_by_ref |
39713df4 |
531 | |
532 | Returns the current content for the in-memory file as a scalar |
533 | reference. Normal users won't need this, but it will save memory if |
534 | you are dealing with very large data files in your tar archive, since |
535 | it will pass the contents by reference, rather than make a copy of it |
536 | first. |
537 | |
538 | =cut |
539 | |
540 | sub get_content_by_ref { |
541 | my $self = shift; |
542 | |
543 | return \$self->{data}; |
544 | } |
545 | |
642eb381 |
546 | =head2 $bool = $file->replace_content( $content ) |
39713df4 |
547 | |
548 | Replace the current content of the file with the new content. This |
549 | only affects the in-memory archive, not the on-disk version until |
550 | you write it. |
551 | |
552 | Returns true on success, false on failure. |
553 | |
554 | =cut |
555 | |
556 | sub replace_content { |
557 | my $self = shift; |
558 | my $data = shift || ''; |
559 | |
560 | $self->data( $data ); |
561 | $self->size( length $data ); |
562 | return 1; |
563 | } |
564 | |
642eb381 |
565 | =head2 $bool = $file->rename( $new_name ) |
39713df4 |
566 | |
567 | Rename the current file to $new_name. |
568 | |
569 | Note that you must specify a Unix path for $new_name, since per tar |
570 | standard, all files in the archive must be Unix paths. |
571 | |
572 | Returns true on success and false on failure. |
573 | |
574 | =cut |
575 | |
576 | sub rename { |
577 | my $self = shift; |
01d11a1c |
578 | my $path = shift; |
579 | |
580 | return unless defined $path; |
39713df4 |
581 | |
582 | my ($prefix,$file) = $self->_prefix_and_file( $path ); |
583 | |
584 | $self->name( $file ); |
585 | $self->prefix( $prefix ); |
586 | |
587 | return 1; |
588 | } |
589 | |
590 | =head1 Convenience methods |
591 | |
592 | To quickly check the type of a C<Archive::Tar::File> object, you can |
593 | use the following methods: |
594 | |
595 | =over 4 |
596 | |
642eb381 |
597 | =item $file->is_file |
39713df4 |
598 | |
599 | Returns true if the file is of type C<file> |
600 | |
642eb381 |
601 | =item $file->is_dir |
39713df4 |
602 | |
603 | Returns true if the file is of type C<dir> |
604 | |
642eb381 |
605 | =item $file->is_hardlink |
39713df4 |
606 | |
607 | Returns true if the file is of type C<hardlink> |
608 | |
642eb381 |
609 | =item $file->is_symlink |
39713df4 |
610 | |
611 | Returns true if the file is of type C<symlink> |
612 | |
642eb381 |
613 | =item $file->is_chardev |
39713df4 |
614 | |
615 | Returns true if the file is of type C<chardev> |
616 | |
642eb381 |
617 | =item $file->is_blockdev |
39713df4 |
618 | |
619 | Returns true if the file is of type C<blockdev> |
620 | |
642eb381 |
621 | =item $file->is_fifo |
39713df4 |
622 | |
623 | Returns true if the file is of type C<fifo> |
624 | |
642eb381 |
625 | =item $file->is_socket |
39713df4 |
626 | |
627 | Returns true if the file is of type C<socket> |
628 | |
642eb381 |
629 | =item $file->is_longlink |
39713df4 |
630 | |
631 | Returns true if the file is of type C<LongLink>. |
632 | Should not happen after a successful C<read>. |
633 | |
642eb381 |
634 | =item $file->is_label |
39713df4 |
635 | |
636 | Returns true if the file is of type C<Label>. |
637 | Should not happen after a successful C<read>. |
638 | |
642eb381 |
639 | =item $file->is_unknown |
39713df4 |
640 | |
641 | Returns true if the file type is C<unknown> |
642 | |
643 | =back |
644 | |
645 | =cut |
646 | |
647 | #stupid perl5.5.3 needs to warn if it's not numeric |
648 | sub is_file { local $^W; FILE == $_[0]->type } |
649 | sub is_dir { local $^W; DIR == $_[0]->type } |
650 | sub is_hardlink { local $^W; HARDLINK == $_[0]->type } |
651 | sub is_symlink { local $^W; SYMLINK == $_[0]->type } |
652 | sub is_chardev { local $^W; CHARDEV == $_[0]->type } |
653 | sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type } |
654 | sub is_fifo { local $^W; FIFO == $_[0]->type } |
655 | sub is_socket { local $^W; SOCKET == $_[0]->type } |
656 | sub is_unknown { local $^W; UNKNOWN == $_[0]->type } |
657 | sub is_longlink { local $^W; LONGLINK eq $_[0]->type } |
658 | sub is_label { local $^W; LABEL eq $_[0]->type } |
659 | |
660 | 1; |