Commit | Line | Data |
39713df4 |
1 | package Archive::Tar::File; |
2 | use strict; |
3 | |
4 | use IO::File; |
81a5970e |
5 | use File::Spec::Unix (); |
6 | use File::Spec (); |
7 | use File::Basename (); |
8 | |
39713df4 |
9 | use Archive::Tar::Constant; |
10 | |
11 | use vars qw[@ISA $VERSION]; |
12 | @ISA = qw[Archive::Tar]; |
13 | $VERSION = '0.02'; |
14 | |
15 | ### set value to 1 to oct() it during the unpack ### |
16 | my $tmpl = [ |
17 | name => 0, # string |
18 | mode => 1, # octal |
19 | uid => 1, # octal |
20 | gid => 1, # octal |
21 | size => 1, # octal |
22 | mtime => 1, # octal |
23 | chksum => 1, # octal |
24 | type => 0, # character |
25 | linkname => 0, # string |
26 | magic => 0, # string |
27 | version => 0, # 2 bytes |
28 | uname => 0, # string |
29 | gname => 0, # string |
30 | devmajor => 1, # octal |
31 | devminor => 1, # octal |
32 | prefix => 0, |
33 | |
34 | ### end UNPACK items ### |
35 | raw => 0, # the raw data chunk |
36 | data => 0, # the data associated with the file -- |
37 | # This might be very memory intensive |
38 | ]; |
39 | |
40 | ### install get/set accessors for this object. |
41 | for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) { |
42 | my $key = $tmpl->[$i]; |
43 | no strict 'refs'; |
44 | *{__PACKAGE__."::$key"} = sub { |
45 | my $self = shift; |
46 | $self->{$key} = $_[0] if @_; |
47 | |
48 | ### just in case the key is not there or undef or something ### |
49 | { local $^W = 0; |
50 | return $self->{$key}; |
51 | } |
52 | } |
53 | } |
54 | |
55 | =head1 NAME |
56 | |
57 | Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar |
58 | |
59 | =head1 SYNOPSIS |
60 | |
61 | my @items = $tar->get_files; |
62 | |
63 | print $_->name, ' ', $_->size, "\n" for @items; |
64 | |
65 | print $object->get_content; |
66 | $object->replace_content('new content'); |
67 | |
68 | $object->rename( 'new/full/path/to/file.c' ); |
69 | |
70 | =head1 DESCRIPTION |
71 | |
72 | Archive::Tar::Files provides a neat little object layer for in-memory |
73 | extracted files. It's mostly used internally in Archive::Tar to tidy |
74 | up the code, but there's no reason users shouldn't use this API as |
75 | well. |
76 | |
77 | =head2 Accessors |
78 | |
79 | A lot of the methods in this package are accessors to the various |
80 | fields in the tar header: |
81 | |
82 | =over 4 |
83 | |
84 | =item name |
85 | |
86 | The file's name |
87 | |
88 | =item mode |
89 | |
90 | The file's mode |
91 | |
92 | =item uid |
93 | |
94 | The user id owning the file |
95 | |
96 | =item gid |
97 | |
98 | The group id owning the file |
99 | |
100 | =item size |
101 | |
102 | File size in bytes |
103 | |
104 | =item mtime |
105 | |
106 | Modification time. Adjusted to mac-time on MacOS if required |
107 | |
108 | =item chksum |
109 | |
110 | Checksum field for the tar header |
111 | |
112 | =item type |
113 | |
114 | File type -- numeric, but comparable to exported constants -- see |
115 | Archive::Tar's documentation |
116 | |
117 | =item linkname |
118 | |
119 | If the file is a symlink, the file it's pointing to |
120 | |
121 | =item magic |
122 | |
123 | Tar magic string -- not useful for most users |
124 | |
125 | =item version |
126 | |
127 | Tar version string -- not useful for most users |
128 | |
129 | =item uname |
130 | |
131 | The user name that owns the file |
132 | |
133 | =item gname |
134 | |
135 | The group name that owns the file |
136 | |
137 | =item devmajor |
138 | |
139 | Device major number in case of a special file |
140 | |
141 | =item devminor |
142 | |
143 | Device minor number in case of a special file |
144 | |
145 | =item prefix |
146 | |
147 | Any directory to prefix to the extraction path, if any |
148 | |
149 | =item raw |
150 | |
151 | Raw tar header -- not useful for most users |
152 | |
153 | =back |
154 | |
155 | =head1 Methods |
156 | |
157 | =head2 new( file => $path ) |
158 | |
159 | Returns a new Archive::Tar::File object from an existing file. |
160 | |
161 | Returns undef on failure. |
162 | |
163 | =head2 new( data => $path, $data, $opt ) |
164 | |
165 | Returns a new Archive::Tar::File object from data. |
166 | |
167 | C<$path> defines the file name (which need not exist), C<$data> the |
168 | file contents, and C<$opt> is a reference to a hash of attributes |
169 | which may be used to override the default attributes (fields in the |
170 | tar header), which are described above in the Accessors section. |
171 | |
172 | Returns undef on failure. |
173 | |
174 | =head2 new( chunk => $chunk ) |
175 | |
176 | Returns a new Archive::Tar::File object from a raw 512-byte tar |
177 | archive chunk. |
178 | |
179 | Returns undef on failure. |
180 | |
181 | =cut |
182 | |
183 | sub new { |
184 | my $class = shift; |
185 | my $what = shift; |
186 | |
187 | my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) : |
188 | ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) : |
189 | ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) : |
190 | undef; |
191 | |
192 | return $obj; |
193 | } |
194 | |
195 | ### copies the data, creates a clone ### |
196 | sub clone { |
197 | my $self = shift; |
198 | return bless { %$self }, ref $self; |
199 | } |
200 | |
201 | sub _new_from_chunk { |
202 | my $class = shift; |
203 | my $chunk = shift or return; |
81a5970e |
204 | my %hash = @_; |
205 | |
206 | ### filter any arguments on defined-ness of values. |
207 | ### this allows overriding from what the tar-header is saying |
208 | ### about this tar-entry. Particularly useful for @LongLink files |
209 | my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash; |
39713df4 |
210 | |
211 | ### makes it start at 0 actually... :) ### |
212 | my $i = -1; |
213 | my %entry = map { |
214 | $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ |
215 | } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); |
216 | |
81a5970e |
217 | my $obj = bless { %entry, %args }, $class; |
39713df4 |
218 | |
219 | ### magic is a filetype string.. it should have something like 'ustar' or |
220 | ### something similar... if the chunk is garbage, skip it |
221 | return unless $obj->magic !~ /\W/; |
222 | |
223 | ### store the original chunk ### |
224 | $obj->raw( $chunk ); |
225 | |
226 | $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) ); |
227 | $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) ); |
228 | |
229 | |
230 | return $obj; |
231 | |
232 | } |
233 | |
234 | sub _new_from_file { |
235 | my $class = shift; |
236 | my $path = shift or return; |
237 | my $type = __PACKAGE__->_filetype($path); |
238 | my $data = ''; |
239 | |
97a504ba |
240 | READ: { |
241 | unless ($type == DIR ) { |
242 | my $fh = IO::File->new; |
243 | |
244 | unless( $fh->open($path) ) { |
245 | ### dangling symlinks are fine, stop reading but continue |
246 | ### creating the object |
247 | last READ if $type == SYMLINK; |
248 | |
249 | ### otherwise, return from this function -- |
250 | ### anything that's *not* a symlink should be |
251 | ### resolvable |
252 | return; |
253 | } |
254 | |
255 | ### binmode needed to read files properly on win32 ### |
256 | binmode $fh; |
257 | $data = do { local $/; <$fh> }; |
258 | close $fh; |
259 | } |
39713df4 |
260 | } |
261 | |
262 | my @items = qw[mode uid gid size mtime]; |
263 | my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; |
264 | |
265 | ### you *must* set size == 0 on symlinks, or the next entry will be |
266 | ### though of as the contents of the symlink, which is wrong. |
267 | ### this fixes bug #7937 |
268 | $hash{size} = 0 if ($type == DIR or $type == SYMLINK); |
269 | $hash{mtime} -= TIME_OFFSET; |
270 | |
271 | ### strip the high bits off the mode, which we don't need to store |
272 | $hash{mode} = STRIP_MODE->( $hash{mode} ); |
273 | |
274 | |
275 | ### probably requires some file path munging here ... ### |
276 | ### name and prefix are set later |
277 | my $obj = { |
278 | %hash, |
279 | name => '', |
280 | chksum => CHECK_SUM, |
281 | type => $type, |
282 | linkname => ($type == SYMLINK and CAN_READLINK) |
283 | ? readlink $path |
284 | : '', |
285 | magic => MAGIC, |
286 | version => TAR_VERSION, |
287 | uname => UNAME->( $hash{uid} ), |
288 | gname => GNAME->( $hash{gid} ), |
289 | devmajor => 0, # not handled |
290 | devminor => 0, # not handled |
291 | prefix => '', |
292 | data => $data, |
293 | }; |
294 | |
295 | bless $obj, $class; |
296 | |
297 | ### fix up the prefix and file from the path |
298 | my($prefix,$file) = $obj->_prefix_and_file( $path ); |
299 | $obj->prefix( $prefix ); |
300 | $obj->name( $file ); |
301 | |
302 | return $obj; |
303 | } |
304 | |
305 | sub _new_from_data { |
306 | my $class = shift; |
307 | my $path = shift or return; |
308 | my $data = shift; return unless defined $data; |
309 | my $opt = shift; |
310 | |
311 | my $obj = { |
312 | data => $data, |
313 | name => '', |
314 | mode => MODE, |
315 | uid => UID, |
316 | gid => GID, |
317 | size => length $data, |
318 | mtime => time - TIME_OFFSET, |
319 | chksum => CHECK_SUM, |
320 | type => FILE, |
321 | linkname => '', |
322 | magic => MAGIC, |
323 | version => TAR_VERSION, |
324 | uname => UNAME->( UID ), |
325 | gname => GNAME->( GID ), |
326 | devminor => 0, |
327 | devmajor => 0, |
328 | prefix => '', |
329 | }; |
330 | |
331 | ### overwrite with user options, if provided ### |
332 | if( $opt and ref $opt eq 'HASH' ) { |
333 | for my $key ( keys %$opt ) { |
334 | |
335 | ### don't write bogus options ### |
336 | next unless exists $obj->{$key}; |
337 | $obj->{$key} = $opt->{$key}; |
338 | } |
339 | } |
340 | |
341 | bless $obj, $class; |
342 | |
343 | ### fix up the prefix and file from the path |
344 | my($prefix,$file) = $obj->_prefix_and_file( $path ); |
345 | $obj->prefix( $prefix ); |
346 | $obj->name( $file ); |
347 | |
348 | return $obj; |
349 | } |
350 | |
351 | sub _prefix_and_file { |
352 | my $self = shift; |
353 | my $path = shift; |
354 | |
355 | my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir ); |
356 | my @dirs = File::Spec->splitdir( $dirs ); |
357 | |
358 | ### so sometimes the last element is '' -- probably when trailing |
359 | ### dir slashes are encountered... this is is of course pointless, |
360 | ### so remove it |
361 | pop @dirs while @dirs and not length $dirs[-1]; |
362 | |
363 | ### if it's a directory, then $file might be empty |
364 | $file = pop @dirs if $self->is_dir and not length $file; |
365 | |
366 | my $prefix = File::Spec::Unix->catdir( |
367 | grep { length } $vol, @dirs |
368 | ); |
369 | return( $prefix, $file ); |
370 | } |
371 | |
372 | sub _filetype { |
373 | my $self = shift; |
374 | my $file = shift or return; |
375 | |
376 | return SYMLINK if (-l $file); # Symlink |
377 | |
378 | return FILE if (-f _); # Plain file |
379 | |
380 | return DIR if (-d _); # Directory |
381 | |
382 | return FIFO if (-p _); # Named pipe |
383 | |
384 | return SOCKET if (-S _); # Socket |
385 | |
386 | return BLOCKDEV if (-b _); # Block special |
387 | |
388 | return CHARDEV if (-c _); # Character special |
389 | |
390 | ### shouldn't happen, this is when making archives, not reading ### |
391 | return LONGLINK if ( $file eq LONGLINK_NAME ); |
392 | |
393 | return UNKNOWN; # Something else (like what?) |
394 | |
395 | } |
396 | |
397 | ### this method 'downgrades' a file to plain file -- this is used for |
398 | ### symlinks when FOLLOW_SYMLINKS is true. |
399 | sub _downgrade_to_plainfile { |
400 | my $entry = shift; |
401 | $entry->type( FILE ); |
402 | $entry->mode( MODE ); |
403 | $entry->linkname(''); |
404 | |
405 | return 1; |
406 | } |
407 | |
408 | =head2 full_path |
409 | |
410 | Returns the full path from the tar header; this is basically a |
411 | concatenation of the C<prefix> and C<name> fields. |
412 | |
413 | =cut |
414 | |
415 | sub full_path { |
416 | my $self = shift; |
417 | |
418 | ### if prefix field is emtpy |
419 | return $self->name unless defined $self->prefix and length $self->prefix; |
420 | |
421 | ### or otherwise, catfile'd |
422 | return File::Spec::Unix->catfile( $self->prefix, $self->name ); |
423 | } |
424 | |
425 | |
426 | =head2 validate |
427 | |
428 | Done by Archive::Tar internally when reading the tar file: |
429 | validate the header against the checksum to ensure integer tar file. |
430 | |
431 | Returns true on success, false on failure |
432 | |
433 | =cut |
434 | |
435 | sub validate { |
436 | my $self = shift; |
437 | |
438 | my $raw = $self->raw; |
439 | |
440 | ### don't know why this one is different from the one we /write/ ### |
441 | substr ($raw, 148, 8) = " "; |
442 | return unpack ("%16C*", $raw) == $self->chksum ? 1 : 0; |
443 | } |
444 | |
445 | =head2 has_content |
446 | |
447 | Returns a boolean to indicate whether the current object has content. |
448 | Some special files like directories and so on never will have any |
449 | content. This method is mainly to make sure you don't get warnings |
450 | for using uninitialized values when looking at an object's content. |
451 | |
452 | =cut |
453 | |
454 | sub has_content { |
455 | my $self = shift; |
456 | return defined $self->data() && length $self->data() ? 1 : 0; |
457 | } |
458 | |
459 | =head2 get_content |
460 | |
461 | Returns the current content for the in-memory file |
462 | |
463 | =cut |
464 | |
465 | sub get_content { |
466 | my $self = shift; |
467 | $self->data( ); |
468 | } |
469 | |
470 | =head2 get_content_by_ref |
471 | |
472 | Returns the current content for the in-memory file as a scalar |
473 | reference. Normal users won't need this, but it will save memory if |
474 | you are dealing with very large data files in your tar archive, since |
475 | it will pass the contents by reference, rather than make a copy of it |
476 | first. |
477 | |
478 | =cut |
479 | |
480 | sub get_content_by_ref { |
481 | my $self = shift; |
482 | |
483 | return \$self->{data}; |
484 | } |
485 | |
486 | =head2 replace_content( $content ) |
487 | |
488 | Replace the current content of the file with the new content. This |
489 | only affects the in-memory archive, not the on-disk version until |
490 | you write it. |
491 | |
492 | Returns true on success, false on failure. |
493 | |
494 | =cut |
495 | |
496 | sub replace_content { |
497 | my $self = shift; |
498 | my $data = shift || ''; |
499 | |
500 | $self->data( $data ); |
501 | $self->size( length $data ); |
502 | return 1; |
503 | } |
504 | |
505 | =head2 rename( $new_name ) |
506 | |
507 | Rename the current file to $new_name. |
508 | |
509 | Note that you must specify a Unix path for $new_name, since per tar |
510 | standard, all files in the archive must be Unix paths. |
511 | |
512 | Returns true on success and false on failure. |
513 | |
514 | =cut |
515 | |
516 | sub rename { |
517 | my $self = shift; |
518 | my $path = shift or return; |
519 | |
520 | my ($prefix,$file) = $self->_prefix_and_file( $path ); |
521 | |
522 | $self->name( $file ); |
523 | $self->prefix( $prefix ); |
524 | |
525 | return 1; |
526 | } |
527 | |
528 | =head1 Convenience methods |
529 | |
530 | To quickly check the type of a C<Archive::Tar::File> object, you can |
531 | use the following methods: |
532 | |
533 | =over 4 |
534 | |
535 | =item is_file |
536 | |
537 | Returns true if the file is of type C<file> |
538 | |
539 | =item is_dir |
540 | |
541 | Returns true if the file is of type C<dir> |
542 | |
543 | =item is_hardlink |
544 | |
545 | Returns true if the file is of type C<hardlink> |
546 | |
547 | =item is_symlink |
548 | |
549 | Returns true if the file is of type C<symlink> |
550 | |
551 | =item is_chardev |
552 | |
553 | Returns true if the file is of type C<chardev> |
554 | |
555 | =item is_blockdev |
556 | |
557 | Returns true if the file is of type C<blockdev> |
558 | |
559 | =item is_fifo |
560 | |
561 | Returns true if the file is of type C<fifo> |
562 | |
563 | =item is_socket |
564 | |
565 | Returns true if the file is of type C<socket> |
566 | |
567 | =item is_longlink |
568 | |
569 | Returns true if the file is of type C<LongLink>. |
570 | Should not happen after a successful C<read>. |
571 | |
572 | =item is_label |
573 | |
574 | Returns true if the file is of type C<Label>. |
575 | Should not happen after a successful C<read>. |
576 | |
577 | =item is_unknown |
578 | |
579 | Returns true if the file type is C<unknown> |
580 | |
581 | =back |
582 | |
583 | =cut |
584 | |
585 | #stupid perl5.5.3 needs to warn if it's not numeric |
586 | sub is_file { local $^W; FILE == $_[0]->type } |
587 | sub is_dir { local $^W; DIR == $_[0]->type } |
588 | sub is_hardlink { local $^W; HARDLINK == $_[0]->type } |
589 | sub is_symlink { local $^W; SYMLINK == $_[0]->type } |
590 | sub is_chardev { local $^W; CHARDEV == $_[0]->type } |
591 | sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type } |
592 | sub is_fifo { local $^W; FIFO == $_[0]->type } |
593 | sub is_socket { local $^W; SOCKET == $_[0]->type } |
594 | sub is_unknown { local $^W; UNKNOWN == $_[0]->type } |
595 | sub is_longlink { local $^W; LONGLINK eq $_[0]->type } |
596 | sub is_label { local $^W; LABEL eq $_[0]->type } |
597 | |
598 | 1; |