Re: perl pragma [PATCH]
[p5sagit/p5-mst-13.2.git] / lib / Archive / Tar / File.pm
CommitLineData
39713df4 1package Archive::Tar::File;
2use strict;
3
4use IO::File;
81a5970e 5use File::Spec::Unix ();
6use File::Spec ();
7use File::Basename ();
8
39713df4 9use Archive::Tar::Constant;
10
11use vars qw[@ISA $VERSION];
12@ISA = qw[Archive::Tar];
13$VERSION = '0.02';
14
15### set value to 1 to oct() it during the unpack ###
16my $tmpl = [
17 name => 0, # string
18 mode => 1, # octal
19 uid => 1, # octal
20 gid => 1, # octal
21 size => 1, # octal
22 mtime => 1, # octal
23 chksum => 1, # octal
24 type => 0, # character
25 linkname => 0, # string
26 magic => 0, # string
27 version => 0, # 2 bytes
28 uname => 0, # string
29 gname => 0, # string
30 devmajor => 1, # octal
31 devminor => 1, # octal
32 prefix => 0,
33
34### end UNPACK items ###
35 raw => 0, # the raw data chunk
36 data => 0, # the data associated with the file --
37 # This might be very memory intensive
38];
39
40### install get/set accessors for this object.
41for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
42 my $key = $tmpl->[$i];
43 no strict 'refs';
44 *{__PACKAGE__."::$key"} = sub {
45 my $self = shift;
46 $self->{$key} = $_[0] if @_;
47
48 ### just in case the key is not there or undef or something ###
49 { local $^W = 0;
50 return $self->{$key};
51 }
52 }
53}
54
55=head1 NAME
56
57Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
58
59=head1 SYNOPSIS
60
61 my @items = $tar->get_files;
62
63 print $_->name, ' ', $_->size, "\n" for @items;
64
65 print $object->get_content;
66 $object->replace_content('new content');
67
68 $object->rename( 'new/full/path/to/file.c' );
69
70=head1 DESCRIPTION
71
72Archive::Tar::Files provides a neat little object layer for in-memory
73extracted files. It's mostly used internally in Archive::Tar to tidy
74up the code, but there's no reason users shouldn't use this API as
75well.
76
77=head2 Accessors
78
79A lot of the methods in this package are accessors to the various
80fields in the tar header:
81
82=over 4
83
84=item name
85
86The file's name
87
88=item mode
89
90The file's mode
91
92=item uid
93
94The user id owning the file
95
96=item gid
97
98The group id owning the file
99
100=item size
101
102File size in bytes
103
104=item mtime
105
106Modification time. Adjusted to mac-time on MacOS if required
107
108=item chksum
109
110Checksum field for the tar header
111
112=item type
113
114File type -- numeric, but comparable to exported constants -- see
115Archive::Tar's documentation
116
117=item linkname
118
119If the file is a symlink, the file it's pointing to
120
121=item magic
122
123Tar magic string -- not useful for most users
124
125=item version
126
127Tar version string -- not useful for most users
128
129=item uname
130
131The user name that owns the file
132
133=item gname
134
135The group name that owns the file
136
137=item devmajor
138
139Device major number in case of a special file
140
141=item devminor
142
143Device minor number in case of a special file
144
145=item prefix
146
147Any directory to prefix to the extraction path, if any
148
149=item raw
150
151Raw tar header -- not useful for most users
152
153=back
154
155=head1 Methods
156
157=head2 new( file => $path )
158
159Returns a new Archive::Tar::File object from an existing file.
160
161Returns undef on failure.
162
163=head2 new( data => $path, $data, $opt )
164
165Returns a new Archive::Tar::File object from data.
166
167C<$path> defines the file name (which need not exist), C<$data> the
168file contents, and C<$opt> is a reference to a hash of attributes
169which may be used to override the default attributes (fields in the
170tar header), which are described above in the Accessors section.
171
172Returns undef on failure.
173
174=head2 new( chunk => $chunk )
175
176Returns a new Archive::Tar::File object from a raw 512-byte tar
177archive chunk.
178
179Returns undef on failure.
180
181=cut
182
183sub new {
184 my $class = shift;
185 my $what = shift;
186
187 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
188 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
189 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
190 undef;
191
192 return $obj;
193}
194
195### copies the data, creates a clone ###
196sub clone {
197 my $self = shift;
198 return bless { %$self }, ref $self;
199}
200
201sub _new_from_chunk {
202 my $class = shift;
203 my $chunk = shift or return;
81a5970e 204 my %hash = @_;
205
206 ### filter any arguments on defined-ness of values.
207 ### this allows overriding from what the tar-header is saying
208 ### about this tar-entry. Particularly useful for @LongLink files
209 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
39713df4 210
211 ### makes it start at 0 actually... :) ###
212 my $i = -1;
213 my %entry = map {
214 $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
215 } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );
216
81a5970e 217 my $obj = bless { %entry, %args }, $class;
39713df4 218
219 ### magic is a filetype string.. it should have something like 'ustar' or
220 ### something similar... if the chunk is garbage, skip it
221 return unless $obj->magic !~ /\W/;
222
223 ### store the original chunk ###
224 $obj->raw( $chunk );
225
226 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
227 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
228
229
230 return $obj;
231
232}
233
234sub _new_from_file {
235 my $class = shift;
236 my $path = shift or return;
237 my $type = __PACKAGE__->_filetype($path);
238 my $data = '';
239
c3745331 240 unless ($type == DIR || $type == SYMLINK) {
39713df4 241 my $fh = IO::File->new;
242 $fh->open($path) or return;
243
244 ### binmode needed to read files properly on win32 ###
245 binmode $fh;
246 $data = do { local $/; <$fh> };
247 close $fh;
248 }
249
250 my @items = qw[mode uid gid size mtime];
251 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
252
253 ### you *must* set size == 0 on symlinks, or the next entry will be
254 ### though of as the contents of the symlink, which is wrong.
255 ### this fixes bug #7937
256 $hash{size} = 0 if ($type == DIR or $type == SYMLINK);
257 $hash{mtime} -= TIME_OFFSET;
258
259 ### strip the high bits off the mode, which we don't need to store
260 $hash{mode} = STRIP_MODE->( $hash{mode} );
261
262
263 ### probably requires some file path munging here ... ###
264 ### name and prefix are set later
265 my $obj = {
266 %hash,
267 name => '',
268 chksum => CHECK_SUM,
269 type => $type,
270 linkname => ($type == SYMLINK and CAN_READLINK)
271 ? readlink $path
272 : '',
273 magic => MAGIC,
274 version => TAR_VERSION,
275 uname => UNAME->( $hash{uid} ),
276 gname => GNAME->( $hash{gid} ),
277 devmajor => 0, # not handled
278 devminor => 0, # not handled
279 prefix => '',
280 data => $data,
281 };
282
283 bless $obj, $class;
284
285 ### fix up the prefix and file from the path
286 my($prefix,$file) = $obj->_prefix_and_file( $path );
287 $obj->prefix( $prefix );
288 $obj->name( $file );
289
290 return $obj;
291}
292
293sub _new_from_data {
294 my $class = shift;
295 my $path = shift or return;
296 my $data = shift; return unless defined $data;
297 my $opt = shift;
298
299 my $obj = {
300 data => $data,
301 name => '',
302 mode => MODE,
303 uid => UID,
304 gid => GID,
305 size => length $data,
306 mtime => time - TIME_OFFSET,
307 chksum => CHECK_SUM,
308 type => FILE,
309 linkname => '',
310 magic => MAGIC,
311 version => TAR_VERSION,
312 uname => UNAME->( UID ),
313 gname => GNAME->( GID ),
314 devminor => 0,
315 devmajor => 0,
316 prefix => '',
317 };
318
319 ### overwrite with user options, if provided ###
320 if( $opt and ref $opt eq 'HASH' ) {
321 for my $key ( keys %$opt ) {
322
323 ### don't write bogus options ###
324 next unless exists $obj->{$key};
325 $obj->{$key} = $opt->{$key};
326 }
327 }
328
329 bless $obj, $class;
330
331 ### fix up the prefix and file from the path
332 my($prefix,$file) = $obj->_prefix_and_file( $path );
333 $obj->prefix( $prefix );
334 $obj->name( $file );
335
336 return $obj;
337}
338
339sub _prefix_and_file {
340 my $self = shift;
341 my $path = shift;
342
343 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
344 my @dirs = File::Spec->splitdir( $dirs );
345
346 ### so sometimes the last element is '' -- probably when trailing
347 ### dir slashes are encountered... this is is of course pointless,
348 ### so remove it
349 pop @dirs while @dirs and not length $dirs[-1];
350
351 ### if it's a directory, then $file might be empty
352 $file = pop @dirs if $self->is_dir and not length $file;
353
354 my $prefix = File::Spec::Unix->catdir(
355 grep { length } $vol, @dirs
356 );
357 return( $prefix, $file );
358}
359
360sub _filetype {
361 my $self = shift;
362 my $file = shift or return;
363
364 return SYMLINK if (-l $file); # Symlink
365
366 return FILE if (-f _); # Plain file
367
368 return DIR if (-d _); # Directory
369
370 return FIFO if (-p _); # Named pipe
371
372 return SOCKET if (-S _); # Socket
373
374 return BLOCKDEV if (-b _); # Block special
375
376 return CHARDEV if (-c _); # Character special
377
378 ### shouldn't happen, this is when making archives, not reading ###
379 return LONGLINK if ( $file eq LONGLINK_NAME );
380
381 return UNKNOWN; # Something else (like what?)
382
383}
384
385### this method 'downgrades' a file to plain file -- this is used for
386### symlinks when FOLLOW_SYMLINKS is true.
387sub _downgrade_to_plainfile {
388 my $entry = shift;
389 $entry->type( FILE );
390 $entry->mode( MODE );
391 $entry->linkname('');
392
393 return 1;
394}
395
396=head2 full_path
397
398Returns the full path from the tar header; this is basically a
399concatenation of the C<prefix> and C<name> fields.
400
401=cut
402
403sub full_path {
404 my $self = shift;
405
406 ### if prefix field is emtpy
407 return $self->name unless defined $self->prefix and length $self->prefix;
408
409 ### or otherwise, catfile'd
410 return File::Spec::Unix->catfile( $self->prefix, $self->name );
411}
412
413
414=head2 validate
415
416Done by Archive::Tar internally when reading the tar file:
417validate the header against the checksum to ensure integer tar file.
418
419Returns true on success, false on failure
420
421=cut
422
423sub validate {
424 my $self = shift;
425
426 my $raw = $self->raw;
427
428 ### don't know why this one is different from the one we /write/ ###
429 substr ($raw, 148, 8) = " ";
430 return unpack ("%16C*", $raw) == $self->chksum ? 1 : 0;
431}
432
433=head2 has_content
434
435Returns a boolean to indicate whether the current object has content.
436Some special files like directories and so on never will have any
437content. This method is mainly to make sure you don't get warnings
438for using uninitialized values when looking at an object's content.
439
440=cut
441
442sub has_content {
443 my $self = shift;
444 return defined $self->data() && length $self->data() ? 1 : 0;
445}
446
447=head2 get_content
448
449Returns the current content for the in-memory file
450
451=cut
452
453sub get_content {
454 my $self = shift;
455 $self->data( );
456}
457
458=head2 get_content_by_ref
459
460Returns the current content for the in-memory file as a scalar
461reference. Normal users won't need this, but it will save memory if
462you are dealing with very large data files in your tar archive, since
463it will pass the contents by reference, rather than make a copy of it
464first.
465
466=cut
467
468sub get_content_by_ref {
469 my $self = shift;
470
471 return \$self->{data};
472}
473
474=head2 replace_content( $content )
475
476Replace the current content of the file with the new content. This
477only affects the in-memory archive, not the on-disk version until
478you write it.
479
480Returns true on success, false on failure.
481
482=cut
483
484sub replace_content {
485 my $self = shift;
486 my $data = shift || '';
487
488 $self->data( $data );
489 $self->size( length $data );
490 return 1;
491}
492
493=head2 rename( $new_name )
494
495Rename the current file to $new_name.
496
497Note that you must specify a Unix path for $new_name, since per tar
498standard, all files in the archive must be Unix paths.
499
500Returns true on success and false on failure.
501
502=cut
503
504sub rename {
505 my $self = shift;
506 my $path = shift or return;
507
508 my ($prefix,$file) = $self->_prefix_and_file( $path );
509
510 $self->name( $file );
511 $self->prefix( $prefix );
512
513 return 1;
514}
515
516=head1 Convenience methods
517
518To quickly check the type of a C<Archive::Tar::File> object, you can
519use the following methods:
520
521=over 4
522
523=item is_file
524
525Returns true if the file is of type C<file>
526
527=item is_dir
528
529Returns true if the file is of type C<dir>
530
531=item is_hardlink
532
533Returns true if the file is of type C<hardlink>
534
535=item is_symlink
536
537Returns true if the file is of type C<symlink>
538
539=item is_chardev
540
541Returns true if the file is of type C<chardev>
542
543=item is_blockdev
544
545Returns true if the file is of type C<blockdev>
546
547=item is_fifo
548
549Returns true if the file is of type C<fifo>
550
551=item is_socket
552
553Returns true if the file is of type C<socket>
554
555=item is_longlink
556
557Returns true if the file is of type C<LongLink>.
558Should not happen after a successful C<read>.
559
560=item is_label
561
562Returns true if the file is of type C<Label>.
563Should not happen after a successful C<read>.
564
565=item is_unknown
566
567Returns true if the file type is C<unknown>
568
569=back
570
571=cut
572
573#stupid perl5.5.3 needs to warn if it's not numeric
574sub is_file { local $^W; FILE == $_[0]->type }
575sub is_dir { local $^W; DIR == $_[0]->type }
576sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
577sub is_symlink { local $^W; SYMLINK == $_[0]->type }
578sub is_chardev { local $^W; CHARDEV == $_[0]->type }
579sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
580sub is_fifo { local $^W; FIFO == $_[0]->type }
581sub is_socket { local $^W; SOCKET == $_[0]->type }
582sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
583sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
584sub is_label { local $^W; LABEL eq $_[0]->type }
585
5861;