Commit | Line | Data |
39713df4 |
1 | package Archive::Tar::File; |
2 | use strict; |
3 | |
4 | use IO::File; |
81a5970e |
5 | use File::Spec::Unix (); |
6 | use File::Spec (); |
7 | use File::Basename (); |
8 | |
39713df4 |
9 | use Archive::Tar::Constant; |
10 | |
11 | use vars qw[@ISA $VERSION]; |
12 | @ISA = qw[Archive::Tar]; |
13 | $VERSION = '0.02'; |
14 | |
15 | ### set value to 1 to oct() it during the unpack ### |
16 | my $tmpl = [ |
17 | name => 0, # string |
18 | mode => 1, # octal |
19 | uid => 1, # octal |
20 | gid => 1, # octal |
21 | size => 1, # octal |
22 | mtime => 1, # octal |
23 | chksum => 1, # octal |
24 | type => 0, # character |
25 | linkname => 0, # string |
26 | magic => 0, # string |
27 | version => 0, # 2 bytes |
28 | uname => 0, # string |
29 | gname => 0, # string |
30 | devmajor => 1, # octal |
31 | devminor => 1, # octal |
32 | prefix => 0, |
33 | |
34 | ### end UNPACK items ### |
35 | raw => 0, # the raw data chunk |
36 | data => 0, # the data associated with the file -- |
37 | # This might be very memory intensive |
38 | ]; |
39 | |
40 | ### install get/set accessors for this object. |
41 | for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) { |
42 | my $key = $tmpl->[$i]; |
43 | no strict 'refs'; |
44 | *{__PACKAGE__."::$key"} = sub { |
45 | my $self = shift; |
46 | $self->{$key} = $_[0] if @_; |
47 | |
48 | ### just in case the key is not there or undef or something ### |
49 | { local $^W = 0; |
50 | return $self->{$key}; |
51 | } |
52 | } |
53 | } |
54 | |
55 | =head1 NAME |
56 | |
57 | Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar |
58 | |
59 | =head1 SYNOPSIS |
60 | |
61 | my @items = $tar->get_files; |
62 | |
63 | print $_->name, ' ', $_->size, "\n" for @items; |
64 | |
65 | print $object->get_content; |
66 | $object->replace_content('new content'); |
67 | |
68 | $object->rename( 'new/full/path/to/file.c' ); |
69 | |
70 | =head1 DESCRIPTION |
71 | |
72 | Archive::Tar::Files provides a neat little object layer for in-memory |
73 | extracted files. It's mostly used internally in Archive::Tar to tidy |
74 | up the code, but there's no reason users shouldn't use this API as |
75 | well. |
76 | |
77 | =head2 Accessors |
78 | |
79 | A lot of the methods in this package are accessors to the various |
80 | fields in the tar header: |
81 | |
82 | =over 4 |
83 | |
84 | =item name |
85 | |
86 | The file's name |
87 | |
88 | =item mode |
89 | |
90 | The file's mode |
91 | |
92 | =item uid |
93 | |
94 | The user id owning the file |
95 | |
96 | =item gid |
97 | |
98 | The group id owning the file |
99 | |
100 | =item size |
101 | |
102 | File size in bytes |
103 | |
104 | =item mtime |
105 | |
106 | Modification time. Adjusted to mac-time on MacOS if required |
107 | |
108 | =item chksum |
109 | |
110 | Checksum field for the tar header |
111 | |
112 | =item type |
113 | |
114 | File type -- numeric, but comparable to exported constants -- see |
115 | Archive::Tar's documentation |
116 | |
117 | =item linkname |
118 | |
119 | If the file is a symlink, the file it's pointing to |
120 | |
121 | =item magic |
122 | |
123 | Tar magic string -- not useful for most users |
124 | |
125 | =item version |
126 | |
127 | Tar version string -- not useful for most users |
128 | |
129 | =item uname |
130 | |
131 | The user name that owns the file |
132 | |
133 | =item gname |
134 | |
135 | The group name that owns the file |
136 | |
137 | =item devmajor |
138 | |
139 | Device major number in case of a special file |
140 | |
141 | =item devminor |
142 | |
143 | Device minor number in case of a special file |
144 | |
145 | =item prefix |
146 | |
147 | Any directory to prefix to the extraction path, if any |
148 | |
149 | =item raw |
150 | |
151 | Raw tar header -- not useful for most users |
152 | |
153 | =back |
154 | |
155 | =head1 Methods |
156 | |
157 | =head2 new( file => $path ) |
158 | |
159 | Returns a new Archive::Tar::File object from an existing file. |
160 | |
161 | Returns undef on failure. |
162 | |
163 | =head2 new( data => $path, $data, $opt ) |
164 | |
165 | Returns a new Archive::Tar::File object from data. |
166 | |
167 | C<$path> defines the file name (which need not exist), C<$data> the |
168 | file contents, and C<$opt> is a reference to a hash of attributes |
169 | which may be used to override the default attributes (fields in the |
170 | tar header), which are described above in the Accessors section. |
171 | |
172 | Returns undef on failure. |
173 | |
174 | =head2 new( chunk => $chunk ) |
175 | |
176 | Returns a new Archive::Tar::File object from a raw 512-byte tar |
177 | archive chunk. |
178 | |
179 | Returns undef on failure. |
180 | |
181 | =cut |
182 | |
183 | sub new { |
184 | my $class = shift; |
185 | my $what = shift; |
186 | |
187 | my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) : |
188 | ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) : |
189 | ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) : |
190 | undef; |
191 | |
192 | return $obj; |
193 | } |
194 | |
195 | ### copies the data, creates a clone ### |
196 | sub clone { |
197 | my $self = shift; |
198 | return bless { %$self }, ref $self; |
199 | } |
200 | |
201 | sub _new_from_chunk { |
202 | my $class = shift; |
01d11a1c |
203 | my $chunk = shift or return; # 512 bytes of tar header |
81a5970e |
204 | my %hash = @_; |
205 | |
206 | ### filter any arguments on defined-ness of values. |
207 | ### this allows overriding from what the tar-header is saying |
208 | ### about this tar-entry. Particularly useful for @LongLink files |
209 | my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash; |
39713df4 |
210 | |
211 | ### makes it start at 0 actually... :) ### |
212 | my $i = -1; |
213 | my %entry = map { |
214 | $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ |
215 | } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); |
216 | |
81a5970e |
217 | my $obj = bless { %entry, %args }, $class; |
39713df4 |
218 | |
219 | ### magic is a filetype string.. it should have something like 'ustar' or |
220 | ### something similar... if the chunk is garbage, skip it |
221 | return unless $obj->magic !~ /\W/; |
222 | |
223 | ### store the original chunk ### |
224 | $obj->raw( $chunk ); |
225 | |
226 | $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) ); |
227 | $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) ); |
228 | |
229 | |
230 | return $obj; |
231 | |
232 | } |
233 | |
234 | sub _new_from_file { |
235 | my $class = shift; |
01d11a1c |
236 | my $path = shift; |
237 | |
238 | ### path has to at least exist |
239 | return unless defined $path; |
240 | |
39713df4 |
241 | my $type = __PACKAGE__->_filetype($path); |
242 | my $data = ''; |
243 | |
97a504ba |
244 | READ: { |
245 | unless ($type == DIR ) { |
246 | my $fh = IO::File->new; |
247 | |
248 | unless( $fh->open($path) ) { |
249 | ### dangling symlinks are fine, stop reading but continue |
250 | ### creating the object |
251 | last READ if $type == SYMLINK; |
252 | |
253 | ### otherwise, return from this function -- |
254 | ### anything that's *not* a symlink should be |
255 | ### resolvable |
256 | return; |
257 | } |
258 | |
259 | ### binmode needed to read files properly on win32 ### |
260 | binmode $fh; |
261 | $data = do { local $/; <$fh> }; |
262 | close $fh; |
263 | } |
39713df4 |
264 | } |
265 | |
266 | my @items = qw[mode uid gid size mtime]; |
267 | my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; |
268 | |
269 | ### you *must* set size == 0 on symlinks, or the next entry will be |
270 | ### though of as the contents of the symlink, which is wrong. |
271 | ### this fixes bug #7937 |
272 | $hash{size} = 0 if ($type == DIR or $type == SYMLINK); |
273 | $hash{mtime} -= TIME_OFFSET; |
274 | |
275 | ### strip the high bits off the mode, which we don't need to store |
276 | $hash{mode} = STRIP_MODE->( $hash{mode} ); |
277 | |
278 | |
279 | ### probably requires some file path munging here ... ### |
280 | ### name and prefix are set later |
281 | my $obj = { |
282 | %hash, |
283 | name => '', |
284 | chksum => CHECK_SUM, |
285 | type => $type, |
286 | linkname => ($type == SYMLINK and CAN_READLINK) |
287 | ? readlink $path |
288 | : '', |
289 | magic => MAGIC, |
290 | version => TAR_VERSION, |
291 | uname => UNAME->( $hash{uid} ), |
292 | gname => GNAME->( $hash{gid} ), |
293 | devmajor => 0, # not handled |
294 | devminor => 0, # not handled |
295 | prefix => '', |
296 | data => $data, |
297 | }; |
298 | |
299 | bless $obj, $class; |
300 | |
301 | ### fix up the prefix and file from the path |
302 | my($prefix,$file) = $obj->_prefix_and_file( $path ); |
303 | $obj->prefix( $prefix ); |
304 | $obj->name( $file ); |
305 | |
306 | return $obj; |
307 | } |
308 | |
309 | sub _new_from_data { |
310 | my $class = shift; |
01d11a1c |
311 | my $path = shift; return unless defined $path; |
39713df4 |
312 | my $data = shift; return unless defined $data; |
313 | my $opt = shift; |
314 | |
315 | my $obj = { |
316 | data => $data, |
317 | name => '', |
318 | mode => MODE, |
319 | uid => UID, |
320 | gid => GID, |
321 | size => length $data, |
322 | mtime => time - TIME_OFFSET, |
323 | chksum => CHECK_SUM, |
324 | type => FILE, |
325 | linkname => '', |
326 | magic => MAGIC, |
327 | version => TAR_VERSION, |
328 | uname => UNAME->( UID ), |
329 | gname => GNAME->( GID ), |
330 | devminor => 0, |
331 | devmajor => 0, |
332 | prefix => '', |
333 | }; |
334 | |
335 | ### overwrite with user options, if provided ### |
336 | if( $opt and ref $opt eq 'HASH' ) { |
337 | for my $key ( keys %$opt ) { |
338 | |
339 | ### don't write bogus options ### |
340 | next unless exists $obj->{$key}; |
341 | $obj->{$key} = $opt->{$key}; |
342 | } |
343 | } |
344 | |
345 | bless $obj, $class; |
346 | |
347 | ### fix up the prefix and file from the path |
348 | my($prefix,$file) = $obj->_prefix_and_file( $path ); |
349 | $obj->prefix( $prefix ); |
350 | $obj->name( $file ); |
351 | |
352 | return $obj; |
353 | } |
354 | |
355 | sub _prefix_and_file { |
356 | my $self = shift; |
357 | my $path = shift; |
358 | |
359 | my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir ); |
360 | my @dirs = File::Spec->splitdir( $dirs ); |
361 | |
362 | ### so sometimes the last element is '' -- probably when trailing |
363 | ### dir slashes are encountered... this is is of course pointless, |
364 | ### so remove it |
365 | pop @dirs while @dirs and not length $dirs[-1]; |
366 | |
367 | ### if it's a directory, then $file might be empty |
368 | $file = pop @dirs if $self->is_dir and not length $file; |
369 | |
370 | my $prefix = File::Spec::Unix->catdir( |
371 | grep { length } $vol, @dirs |
372 | ); |
373 | return( $prefix, $file ); |
374 | } |
375 | |
376 | sub _filetype { |
377 | my $self = shift; |
01d11a1c |
378 | my $file = shift; |
379 | |
380 | return unless defined $file; |
39713df4 |
381 | |
382 | return SYMLINK if (-l $file); # Symlink |
383 | |
384 | return FILE if (-f _); # Plain file |
385 | |
386 | return DIR if (-d _); # Directory |
387 | |
388 | return FIFO if (-p _); # Named pipe |
389 | |
390 | return SOCKET if (-S _); # Socket |
391 | |
392 | return BLOCKDEV if (-b _); # Block special |
393 | |
394 | return CHARDEV if (-c _); # Character special |
395 | |
396 | ### shouldn't happen, this is when making archives, not reading ### |
397 | return LONGLINK if ( $file eq LONGLINK_NAME ); |
398 | |
399 | return UNKNOWN; # Something else (like what?) |
400 | |
401 | } |
402 | |
403 | ### this method 'downgrades' a file to plain file -- this is used for |
404 | ### symlinks when FOLLOW_SYMLINKS is true. |
405 | sub _downgrade_to_plainfile { |
406 | my $entry = shift; |
407 | $entry->type( FILE ); |
408 | $entry->mode( MODE ); |
409 | $entry->linkname(''); |
410 | |
411 | return 1; |
412 | } |
413 | |
414 | =head2 full_path |
415 | |
416 | Returns the full path from the tar header; this is basically a |
417 | concatenation of the C<prefix> and C<name> fields. |
418 | |
419 | =cut |
420 | |
421 | sub full_path { |
422 | my $self = shift; |
423 | |
424 | ### if prefix field is emtpy |
425 | return $self->name unless defined $self->prefix and length $self->prefix; |
426 | |
427 | ### or otherwise, catfile'd |
428 | return File::Spec::Unix->catfile( $self->prefix, $self->name ); |
429 | } |
430 | |
431 | |
432 | =head2 validate |
433 | |
434 | Done by Archive::Tar internally when reading the tar file: |
435 | validate the header against the checksum to ensure integer tar file. |
436 | |
437 | Returns true on success, false on failure |
438 | |
439 | =cut |
440 | |
441 | sub validate { |
442 | my $self = shift; |
443 | |
444 | my $raw = $self->raw; |
445 | |
446 | ### don't know why this one is different from the one we /write/ ### |
447 | substr ($raw, 148, 8) = " "; |
448 | return unpack ("%16C*", $raw) == $self->chksum ? 1 : 0; |
449 | } |
450 | |
451 | =head2 has_content |
452 | |
453 | Returns a boolean to indicate whether the current object has content. |
454 | Some special files like directories and so on never will have any |
455 | content. This method is mainly to make sure you don't get warnings |
456 | for using uninitialized values when looking at an object's content. |
457 | |
458 | =cut |
459 | |
460 | sub has_content { |
461 | my $self = shift; |
462 | return defined $self->data() && length $self->data() ? 1 : 0; |
463 | } |
464 | |
465 | =head2 get_content |
466 | |
467 | Returns the current content for the in-memory file |
468 | |
469 | =cut |
470 | |
471 | sub get_content { |
472 | my $self = shift; |
473 | $self->data( ); |
474 | } |
475 | |
476 | =head2 get_content_by_ref |
477 | |
478 | Returns the current content for the in-memory file as a scalar |
479 | reference. Normal users won't need this, but it will save memory if |
480 | you are dealing with very large data files in your tar archive, since |
481 | it will pass the contents by reference, rather than make a copy of it |
482 | first. |
483 | |
484 | =cut |
485 | |
486 | sub get_content_by_ref { |
487 | my $self = shift; |
488 | |
489 | return \$self->{data}; |
490 | } |
491 | |
492 | =head2 replace_content( $content ) |
493 | |
494 | Replace the current content of the file with the new content. This |
495 | only affects the in-memory archive, not the on-disk version until |
496 | you write it. |
497 | |
498 | Returns true on success, false on failure. |
499 | |
500 | =cut |
501 | |
502 | sub replace_content { |
503 | my $self = shift; |
504 | my $data = shift || ''; |
505 | |
506 | $self->data( $data ); |
507 | $self->size( length $data ); |
508 | return 1; |
509 | } |
510 | |
511 | =head2 rename( $new_name ) |
512 | |
513 | Rename the current file to $new_name. |
514 | |
515 | Note that you must specify a Unix path for $new_name, since per tar |
516 | standard, all files in the archive must be Unix paths. |
517 | |
518 | Returns true on success and false on failure. |
519 | |
520 | =cut |
521 | |
522 | sub rename { |
523 | my $self = shift; |
01d11a1c |
524 | my $path = shift; |
525 | |
526 | return unless defined $path; |
39713df4 |
527 | |
528 | my ($prefix,$file) = $self->_prefix_and_file( $path ); |
529 | |
530 | $self->name( $file ); |
531 | $self->prefix( $prefix ); |
532 | |
533 | return 1; |
534 | } |
535 | |
536 | =head1 Convenience methods |
537 | |
538 | To quickly check the type of a C<Archive::Tar::File> object, you can |
539 | use the following methods: |
540 | |
541 | =over 4 |
542 | |
543 | =item is_file |
544 | |
545 | Returns true if the file is of type C<file> |
546 | |
547 | =item is_dir |
548 | |
549 | Returns true if the file is of type C<dir> |
550 | |
551 | =item is_hardlink |
552 | |
553 | Returns true if the file is of type C<hardlink> |
554 | |
555 | =item is_symlink |
556 | |
557 | Returns true if the file is of type C<symlink> |
558 | |
559 | =item is_chardev |
560 | |
561 | Returns true if the file is of type C<chardev> |
562 | |
563 | =item is_blockdev |
564 | |
565 | Returns true if the file is of type C<blockdev> |
566 | |
567 | =item is_fifo |
568 | |
569 | Returns true if the file is of type C<fifo> |
570 | |
571 | =item is_socket |
572 | |
573 | Returns true if the file is of type C<socket> |
574 | |
575 | =item is_longlink |
576 | |
577 | Returns true if the file is of type C<LongLink>. |
578 | Should not happen after a successful C<read>. |
579 | |
580 | =item is_label |
581 | |
582 | Returns true if the file is of type C<Label>. |
583 | Should not happen after a successful C<read>. |
584 | |
585 | =item is_unknown |
586 | |
587 | Returns true if the file type is C<unknown> |
588 | |
589 | =back |
590 | |
591 | =cut |
592 | |
593 | #stupid perl5.5.3 needs to warn if it's not numeric |
594 | sub is_file { local $^W; FILE == $_[0]->type } |
595 | sub is_dir { local $^W; DIR == $_[0]->type } |
596 | sub is_hardlink { local $^W; HARDLINK == $_[0]->type } |
597 | sub is_symlink { local $^W; SYMLINK == $_[0]->type } |
598 | sub is_chardev { local $^W; CHARDEV == $_[0]->type } |
599 | sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type } |
600 | sub is_fifo { local $^W; FIFO == $_[0]->type } |
601 | sub is_socket { local $^W; SOCKET == $_[0]->type } |
602 | sub is_unknown { local $^W; UNKNOWN == $_[0]->type } |
603 | sub is_longlink { local $^W; LONGLINK eq $_[0]->type } |
604 | sub is_label { local $^W; LABEL eq $_[0]->type } |
605 | |
606 | 1; |