Commit | Line | Data |
39713df4 |
1 | package Archive::Tar::File; |
2 | use strict; |
3 | |
4 | use IO::File; |
81a5970e |
5 | use File::Spec::Unix (); |
6 | use File::Spec (); |
7 | use File::Basename (); |
8 | |
39713df4 |
9 | use Archive::Tar::Constant; |
10 | |
11 | use vars qw[@ISA $VERSION]; |
12 | @ISA = qw[Archive::Tar]; |
13 | $VERSION = '0.02'; |
14 | |
15 | ### set value to 1 to oct() it during the unpack ### |
16 | my $tmpl = [ |
17 | name => 0, # string |
18 | mode => 1, # octal |
19 | uid => 1, # octal |
20 | gid => 1, # octal |
21 | size => 1, # octal |
22 | mtime => 1, # octal |
23 | chksum => 1, # octal |
24 | type => 0, # character |
25 | linkname => 0, # string |
26 | magic => 0, # string |
27 | version => 0, # 2 bytes |
28 | uname => 0, # string |
29 | gname => 0, # string |
30 | devmajor => 1, # octal |
31 | devminor => 1, # octal |
32 | prefix => 0, |
33 | |
34 | ### end UNPACK items ### |
35 | raw => 0, # the raw data chunk |
36 | data => 0, # the data associated with the file -- |
37 | # This might be very memory intensive |
38 | ]; |
39 | |
40 | ### install get/set accessors for this object. |
41 | for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) { |
42 | my $key = $tmpl->[$i]; |
43 | no strict 'refs'; |
44 | *{__PACKAGE__."::$key"} = sub { |
45 | my $self = shift; |
46 | $self->{$key} = $_[0] if @_; |
47 | |
48 | ### just in case the key is not there or undef or something ### |
49 | { local $^W = 0; |
50 | return $self->{$key}; |
51 | } |
52 | } |
53 | } |
54 | |
55 | =head1 NAME |
56 | |
57 | Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar |
58 | |
59 | =head1 SYNOPSIS |
60 | |
61 | my @items = $tar->get_files; |
62 | |
63 | print $_->name, ' ', $_->size, "\n" for @items; |
64 | |
65 | print $object->get_content; |
66 | $object->replace_content('new content'); |
67 | |
68 | $object->rename( 'new/full/path/to/file.c' ); |
69 | |
70 | =head1 DESCRIPTION |
71 | |
72 | Archive::Tar::Files provides a neat little object layer for in-memory |
73 | extracted files. It's mostly used internally in Archive::Tar to tidy |
74 | up the code, but there's no reason users shouldn't use this API as |
75 | well. |
76 | |
77 | =head2 Accessors |
78 | |
79 | A lot of the methods in this package are accessors to the various |
80 | fields in the tar header: |
81 | |
82 | =over 4 |
83 | |
84 | =item name |
85 | |
86 | The file's name |
87 | |
88 | =item mode |
89 | |
90 | The file's mode |
91 | |
92 | =item uid |
93 | |
94 | The user id owning the file |
95 | |
96 | =item gid |
97 | |
98 | The group id owning the file |
99 | |
100 | =item size |
101 | |
102 | File size in bytes |
103 | |
104 | =item mtime |
105 | |
106 | Modification time. Adjusted to mac-time on MacOS if required |
107 | |
108 | =item chksum |
109 | |
110 | Checksum field for the tar header |
111 | |
112 | =item type |
113 | |
114 | File type -- numeric, but comparable to exported constants -- see |
115 | Archive::Tar's documentation |
116 | |
117 | =item linkname |
118 | |
119 | If the file is a symlink, the file it's pointing to |
120 | |
121 | =item magic |
122 | |
123 | Tar magic string -- not useful for most users |
124 | |
125 | =item version |
126 | |
127 | Tar version string -- not useful for most users |
128 | |
129 | =item uname |
130 | |
131 | The user name that owns the file |
132 | |
133 | =item gname |
134 | |
135 | The group name that owns the file |
136 | |
137 | =item devmajor |
138 | |
139 | Device major number in case of a special file |
140 | |
141 | =item devminor |
142 | |
143 | Device minor number in case of a special file |
144 | |
145 | =item prefix |
146 | |
147 | Any directory to prefix to the extraction path, if any |
148 | |
149 | =item raw |
150 | |
151 | Raw tar header -- not useful for most users |
152 | |
153 | =back |
154 | |
155 | =head1 Methods |
156 | |
157 | =head2 new( file => $path ) |
158 | |
159 | Returns a new Archive::Tar::File object from an existing file. |
160 | |
161 | Returns undef on failure. |
162 | |
163 | =head2 new( data => $path, $data, $opt ) |
164 | |
165 | Returns a new Archive::Tar::File object from data. |
166 | |
167 | C<$path> defines the file name (which need not exist), C<$data> the |
168 | file contents, and C<$opt> is a reference to a hash of attributes |
169 | which may be used to override the default attributes (fields in the |
170 | tar header), which are described above in the Accessors section. |
171 | |
172 | Returns undef on failure. |
173 | |
174 | =head2 new( chunk => $chunk ) |
175 | |
176 | Returns a new Archive::Tar::File object from a raw 512-byte tar |
177 | archive chunk. |
178 | |
179 | Returns undef on failure. |
180 | |
181 | =cut |
182 | |
183 | sub new { |
184 | my $class = shift; |
185 | my $what = shift; |
186 | |
187 | my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) : |
188 | ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) : |
189 | ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) : |
190 | undef; |
191 | |
192 | return $obj; |
193 | } |
194 | |
195 | ### copies the data, creates a clone ### |
196 | sub clone { |
197 | my $self = shift; |
198 | return bless { %$self }, ref $self; |
199 | } |
200 | |
201 | sub _new_from_chunk { |
202 | my $class = shift; |
203 | my $chunk = shift or return; |
81a5970e |
204 | my %hash = @_; |
205 | |
206 | ### filter any arguments on defined-ness of values. |
207 | ### this allows overriding from what the tar-header is saying |
208 | ### about this tar-entry. Particularly useful for @LongLink files |
209 | my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash; |
39713df4 |
210 | |
211 | ### makes it start at 0 actually... :) ### |
212 | my $i = -1; |
213 | my %entry = map { |
214 | $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ |
215 | } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); |
216 | |
81a5970e |
217 | my $obj = bless { %entry, %args }, $class; |
39713df4 |
218 | |
219 | ### magic is a filetype string.. it should have something like 'ustar' or |
220 | ### something similar... if the chunk is garbage, skip it |
221 | return unless $obj->magic !~ /\W/; |
222 | |
223 | ### store the original chunk ### |
224 | $obj->raw( $chunk ); |
225 | |
226 | $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) ); |
227 | $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) ); |
228 | |
229 | |
230 | return $obj; |
231 | |
232 | } |
233 | |
234 | sub _new_from_file { |
235 | my $class = shift; |
236 | my $path = shift or return; |
237 | my $type = __PACKAGE__->_filetype($path); |
238 | my $data = ''; |
239 | |
240 | unless ($type == DIR) { |
241 | my $fh = IO::File->new; |
242 | $fh->open($path) or return; |
243 | |
244 | ### binmode needed to read files properly on win32 ### |
245 | binmode $fh; |
246 | $data = do { local $/; <$fh> }; |
247 | close $fh; |
248 | } |
249 | |
250 | my @items = qw[mode uid gid size mtime]; |
251 | my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; |
252 | |
253 | ### you *must* set size == 0 on symlinks, or the next entry will be |
254 | ### though of as the contents of the symlink, which is wrong. |
255 | ### this fixes bug #7937 |
256 | $hash{size} = 0 if ($type == DIR or $type == SYMLINK); |
257 | $hash{mtime} -= TIME_OFFSET; |
258 | |
259 | ### strip the high bits off the mode, which we don't need to store |
260 | $hash{mode} = STRIP_MODE->( $hash{mode} ); |
261 | |
262 | |
263 | ### probably requires some file path munging here ... ### |
264 | ### name and prefix are set later |
265 | my $obj = { |
266 | %hash, |
267 | name => '', |
268 | chksum => CHECK_SUM, |
269 | type => $type, |
270 | linkname => ($type == SYMLINK and CAN_READLINK) |
271 | ? readlink $path |
272 | : '', |
273 | magic => MAGIC, |
274 | version => TAR_VERSION, |
275 | uname => UNAME->( $hash{uid} ), |
276 | gname => GNAME->( $hash{gid} ), |
277 | devmajor => 0, # not handled |
278 | devminor => 0, # not handled |
279 | prefix => '', |
280 | data => $data, |
281 | }; |
282 | |
283 | bless $obj, $class; |
284 | |
285 | ### fix up the prefix and file from the path |
286 | my($prefix,$file) = $obj->_prefix_and_file( $path ); |
287 | $obj->prefix( $prefix ); |
288 | $obj->name( $file ); |
289 | |
290 | return $obj; |
291 | } |
292 | |
293 | sub _new_from_data { |
294 | my $class = shift; |
295 | my $path = shift or return; |
296 | my $data = shift; return unless defined $data; |
297 | my $opt = shift; |
298 | |
299 | my $obj = { |
300 | data => $data, |
301 | name => '', |
302 | mode => MODE, |
303 | uid => UID, |
304 | gid => GID, |
305 | size => length $data, |
306 | mtime => time - TIME_OFFSET, |
307 | chksum => CHECK_SUM, |
308 | type => FILE, |
309 | linkname => '', |
310 | magic => MAGIC, |
311 | version => TAR_VERSION, |
312 | uname => UNAME->( UID ), |
313 | gname => GNAME->( GID ), |
314 | devminor => 0, |
315 | devmajor => 0, |
316 | prefix => '', |
317 | }; |
318 | |
319 | ### overwrite with user options, if provided ### |
320 | if( $opt and ref $opt eq 'HASH' ) { |
321 | for my $key ( keys %$opt ) { |
322 | |
323 | ### don't write bogus options ### |
324 | next unless exists $obj->{$key}; |
325 | $obj->{$key} = $opt->{$key}; |
326 | } |
327 | } |
328 | |
329 | bless $obj, $class; |
330 | |
331 | ### fix up the prefix and file from the path |
332 | my($prefix,$file) = $obj->_prefix_and_file( $path ); |
333 | $obj->prefix( $prefix ); |
334 | $obj->name( $file ); |
335 | |
336 | return $obj; |
337 | } |
338 | |
339 | sub _prefix_and_file { |
340 | my $self = shift; |
341 | my $path = shift; |
342 | |
343 | my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir ); |
344 | my @dirs = File::Spec->splitdir( $dirs ); |
345 | |
346 | ### so sometimes the last element is '' -- probably when trailing |
347 | ### dir slashes are encountered... this is is of course pointless, |
348 | ### so remove it |
349 | pop @dirs while @dirs and not length $dirs[-1]; |
350 | |
351 | ### if it's a directory, then $file might be empty |
352 | $file = pop @dirs if $self->is_dir and not length $file; |
353 | |
354 | my $prefix = File::Spec::Unix->catdir( |
355 | grep { length } $vol, @dirs |
356 | ); |
357 | return( $prefix, $file ); |
358 | } |
359 | |
360 | sub _filetype { |
361 | my $self = shift; |
362 | my $file = shift or return; |
363 | |
364 | return SYMLINK if (-l $file); # Symlink |
365 | |
366 | return FILE if (-f _); # Plain file |
367 | |
368 | return DIR if (-d _); # Directory |
369 | |
370 | return FIFO if (-p _); # Named pipe |
371 | |
372 | return SOCKET if (-S _); # Socket |
373 | |
374 | return BLOCKDEV if (-b _); # Block special |
375 | |
376 | return CHARDEV if (-c _); # Character special |
377 | |
378 | ### shouldn't happen, this is when making archives, not reading ### |
379 | return LONGLINK if ( $file eq LONGLINK_NAME ); |
380 | |
381 | return UNKNOWN; # Something else (like what?) |
382 | |
383 | } |
384 | |
385 | ### this method 'downgrades' a file to plain file -- this is used for |
386 | ### symlinks when FOLLOW_SYMLINKS is true. |
387 | sub _downgrade_to_plainfile { |
388 | my $entry = shift; |
389 | $entry->type( FILE ); |
390 | $entry->mode( MODE ); |
391 | $entry->linkname(''); |
392 | |
393 | return 1; |
394 | } |
395 | |
396 | =head2 full_path |
397 | |
398 | Returns the full path from the tar header; this is basically a |
399 | concatenation of the C<prefix> and C<name> fields. |
400 | |
401 | =cut |
402 | |
403 | sub full_path { |
404 | my $self = shift; |
405 | |
406 | ### if prefix field is emtpy |
407 | return $self->name unless defined $self->prefix and length $self->prefix; |
408 | |
409 | ### or otherwise, catfile'd |
410 | return File::Spec::Unix->catfile( $self->prefix, $self->name ); |
411 | } |
412 | |
413 | |
414 | =head2 validate |
415 | |
416 | Done by Archive::Tar internally when reading the tar file: |
417 | validate the header against the checksum to ensure integer tar file. |
418 | |
419 | Returns true on success, false on failure |
420 | |
421 | =cut |
422 | |
423 | sub validate { |
424 | my $self = shift; |
425 | |
426 | my $raw = $self->raw; |
427 | |
428 | ### don't know why this one is different from the one we /write/ ### |
429 | substr ($raw, 148, 8) = " "; |
430 | return unpack ("%16C*", $raw) == $self->chksum ? 1 : 0; |
431 | } |
432 | |
433 | =head2 has_content |
434 | |
435 | Returns a boolean to indicate whether the current object has content. |
436 | Some special files like directories and so on never will have any |
437 | content. This method is mainly to make sure you don't get warnings |
438 | for using uninitialized values when looking at an object's content. |
439 | |
440 | =cut |
441 | |
442 | sub has_content { |
443 | my $self = shift; |
444 | return defined $self->data() && length $self->data() ? 1 : 0; |
445 | } |
446 | |
447 | =head2 get_content |
448 | |
449 | Returns the current content for the in-memory file |
450 | |
451 | =cut |
452 | |
453 | sub get_content { |
454 | my $self = shift; |
455 | $self->data( ); |
456 | } |
457 | |
458 | =head2 get_content_by_ref |
459 | |
460 | Returns the current content for the in-memory file as a scalar |
461 | reference. Normal users won't need this, but it will save memory if |
462 | you are dealing with very large data files in your tar archive, since |
463 | it will pass the contents by reference, rather than make a copy of it |
464 | first. |
465 | |
466 | =cut |
467 | |
468 | sub get_content_by_ref { |
469 | my $self = shift; |
470 | |
471 | return \$self->{data}; |
472 | } |
473 | |
474 | =head2 replace_content( $content ) |
475 | |
476 | Replace the current content of the file with the new content. This |
477 | only affects the in-memory archive, not the on-disk version until |
478 | you write it. |
479 | |
480 | Returns true on success, false on failure. |
481 | |
482 | =cut |
483 | |
484 | sub replace_content { |
485 | my $self = shift; |
486 | my $data = shift || ''; |
487 | |
488 | $self->data( $data ); |
489 | $self->size( length $data ); |
490 | return 1; |
491 | } |
492 | |
493 | =head2 rename( $new_name ) |
494 | |
495 | Rename the current file to $new_name. |
496 | |
497 | Note that you must specify a Unix path for $new_name, since per tar |
498 | standard, all files in the archive must be Unix paths. |
499 | |
500 | Returns true on success and false on failure. |
501 | |
502 | =cut |
503 | |
504 | sub rename { |
505 | my $self = shift; |
506 | my $path = shift or return; |
507 | |
508 | my ($prefix,$file) = $self->_prefix_and_file( $path ); |
509 | |
510 | $self->name( $file ); |
511 | $self->prefix( $prefix ); |
512 | |
513 | return 1; |
514 | } |
515 | |
516 | =head1 Convenience methods |
517 | |
518 | To quickly check the type of a C<Archive::Tar::File> object, you can |
519 | use the following methods: |
520 | |
521 | =over 4 |
522 | |
523 | =item is_file |
524 | |
525 | Returns true if the file is of type C<file> |
526 | |
527 | =item is_dir |
528 | |
529 | Returns true if the file is of type C<dir> |
530 | |
531 | =item is_hardlink |
532 | |
533 | Returns true if the file is of type C<hardlink> |
534 | |
535 | =item is_symlink |
536 | |
537 | Returns true if the file is of type C<symlink> |
538 | |
539 | =item is_chardev |
540 | |
541 | Returns true if the file is of type C<chardev> |
542 | |
543 | =item is_blockdev |
544 | |
545 | Returns true if the file is of type C<blockdev> |
546 | |
547 | =item is_fifo |
548 | |
549 | Returns true if the file is of type C<fifo> |
550 | |
551 | =item is_socket |
552 | |
553 | Returns true if the file is of type C<socket> |
554 | |
555 | =item is_longlink |
556 | |
557 | Returns true if the file is of type C<LongLink>. |
558 | Should not happen after a successful C<read>. |
559 | |
560 | =item is_label |
561 | |
562 | Returns true if the file is of type C<Label>. |
563 | Should not happen after a successful C<read>. |
564 | |
565 | =item is_unknown |
566 | |
567 | Returns true if the file type is C<unknown> |
568 | |
569 | =back |
570 | |
571 | =cut |
572 | |
573 | #stupid perl5.5.3 needs to warn if it's not numeric |
574 | sub is_file { local $^W; FILE == $_[0]->type } |
575 | sub is_dir { local $^W; DIR == $_[0]->type } |
576 | sub is_hardlink { local $^W; HARDLINK == $_[0]->type } |
577 | sub is_symlink { local $^W; SYMLINK == $_[0]->type } |
578 | sub is_chardev { local $^W; CHARDEV == $_[0]->type } |
579 | sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type } |
580 | sub is_fifo { local $^W; FIFO == $_[0]->type } |
581 | sub is_socket { local $^W; SOCKET == $_[0]->type } |
582 | sub is_unknown { local $^W; UNKNOWN == $_[0]->type } |
583 | sub is_longlink { local $^W; LONGLINK eq $_[0]->type } |
584 | sub is_label { local $^W; LABEL eq $_[0]->type } |
585 | |
586 | 1; |