Commit | Line | Data |
39713df4 |
1 | ### the gnu tar specification: |
f38c1908 |
2 | ### http://www.gnu.org/software/tar/manual/tar.html |
39713df4 |
3 | ### |
4 | ### and the pax format spec, which tar derives from: |
5 | ### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html |
6 | |
7 | package Archive::Tar; |
8 | require 5.005_03; |
9 | |
642eb381 |
10 | use Cwd; |
11 | use IO::Zlib; |
12 | use IO::File; |
13 | use Carp qw(carp croak); |
14 | use File::Spec (); |
15 | use File::Spec::Unix (); |
16 | use File::Path (); |
17 | |
18 | use Archive::Tar::File; |
19 | use Archive::Tar::Constant; |
20 | |
21 | require Exporter; |
22 | |
39713df4 |
23 | use strict; |
24 | use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD |
1c82faa7 |
25 | $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS |
642eb381 |
26 | $INSECURE_EXTRACT_MODE @ISA @EXPORT |
178aef9a |
27 | ]; |
28 | |
642eb381 |
29 | @ISA = qw[Exporter]; |
bef46b70 |
30 | @EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP ]; |
178aef9a |
31 | $DEBUG = 0; |
32 | $WARN = 1; |
33 | $FOLLOW_SYMLINK = 0; |
1c82faa7 |
34 | $VERSION = "1.50"; |
178aef9a |
35 | $CHOWN = 1; |
36 | $CHMOD = 1; |
1c82faa7 |
37 | $SAME_PERMISSIONS = $> == 0 ? 1 : 0; |
178aef9a |
38 | $DO_NOT_USE_PREFIX = 0; |
39 | $INSECURE_EXTRACT_MODE = 0; |
39713df4 |
40 | |
41 | BEGIN { |
42 | use Config; |
43 | $HAS_PERLIO = $Config::Config{useperlio}; |
44 | |
45 | ### try and load IO::String anyway, so you can dynamically |
46 | ### switch between perlio and IO::String |
642eb381 |
47 | $HAS_IO_STRING = eval { |
39713df4 |
48 | require IO::String; |
49 | import IO::String; |
642eb381 |
50 | 1; |
51 | } || 0; |
39713df4 |
52 | } |
53 | |
39713df4 |
54 | =head1 NAME |
55 | |
56 | Archive::Tar - module for manipulations of tar archives |
57 | |
58 | =head1 SYNOPSIS |
59 | |
60 | use Archive::Tar; |
61 | my $tar = Archive::Tar->new; |
62 | |
642eb381 |
63 | $tar->read('origin.tgz'); |
39713df4 |
64 | $tar->extract(); |
65 | |
66 | $tar->add_files('file/foo.pl', 'docs/README'); |
67 | $tar->add_data('file/baz.txt', 'This is the contents now'); |
68 | |
69 | $tar->rename('oldname', 'new/file/name'); |
70 | |
642eb381 |
71 | $tar->write('files.tar'); # plain tar |
bef46b70 |
72 | $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed |
73 | $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed |
39713df4 |
74 | |
75 | =head1 DESCRIPTION |
76 | |
77 | Archive::Tar provides an object oriented mechanism for handling tar |
78 | files. It provides class methods for quick and easy files handling |
79 | while also allowing for the creation of tar file objects for custom |
80 | manipulation. If you have the IO::Zlib module installed, |
81 | Archive::Tar will also support compressed or gzipped tar files. |
82 | |
83 | An object of class Archive::Tar represents a .tar(.gz) archive full |
84 | of files and things. |
85 | |
86 | =head1 Object Methods |
87 | |
88 | =head2 Archive::Tar->new( [$file, $compressed] ) |
89 | |
90 | Returns a new Tar object. If given any arguments, C<new()> calls the |
91 | C<read()> method automatically, passing on the arguments provided to |
92 | the C<read()> method. |
93 | |
94 | If C<new()> is invoked with arguments and the C<read()> method fails |
95 | for any reason, C<new()> returns undef. |
96 | |
97 | =cut |
98 | |
99 | my $tmpl = { |
100 | _data => [ ], |
101 | _file => 'Unknown', |
102 | }; |
103 | |
104 | ### install get/set accessors for this object. |
105 | for my $key ( keys %$tmpl ) { |
106 | no strict 'refs'; |
107 | *{__PACKAGE__."::$key"} = sub { |
108 | my $self = shift; |
109 | $self->{$key} = $_[0] if @_; |
110 | return $self->{$key}; |
111 | } |
112 | } |
113 | |
114 | sub new { |
115 | my $class = shift; |
116 | $class = ref $class if ref $class; |
117 | |
118 | ### copying $tmpl here since a shallow copy makes it use the |
119 | ### same aref, causing for files to remain in memory always. |
120 | my $obj = bless { _data => [ ], _file => 'Unknown' }, $class; |
121 | |
122 | if (@_) { |
81a5970e |
123 | unless ( $obj->read( @_ ) ) { |
124 | $obj->_error(qq[No data could be read from file]); |
125 | return; |
126 | } |
39713df4 |
127 | } |
128 | |
129 | return $obj; |
130 | } |
131 | |
642eb381 |
132 | =head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] ) |
39713df4 |
133 | |
134 | Read the given tar file into memory. |
135 | The first argument can either be the name of a file or a reference to |
136 | an already open filehandle (or an IO::Zlib object if it's compressed) |
39713df4 |
137 | |
138 | The C<read> will I<replace> any previous content in C<$tar>! |
139 | |
e0d68803 |
140 | The second argument may be considered optional, but remains for |
642eb381 |
141 | backwards compatibility. Archive::Tar now looks at the file |
142 | magic to determine what class should be used to open the file |
143 | and will transparently Do The Right Thing. |
144 | |
145 | Archive::Tar will warn if you try to pass a bzip2 compressed file and the |
146 | IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return. |
39713df4 |
147 | |
b3200c5d |
148 | Note that you can currently B<not> pass a C<gzip> compressed |
642eb381 |
149 | filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed |
150 | filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string |
b3200c5d |
151 | containing the full archive information (either compressed or |
152 | uncompressed). These are worth while features, but not currently |
153 | implemented. See the C<TODO> section. |
154 | |
39713df4 |
155 | The third argument can be a hash reference with options. Note that |
156 | all options are case-sensitive. |
157 | |
158 | =over 4 |
159 | |
160 | =item limit |
161 | |
162 | Do not read more than C<limit> files. This is useful if you have |
163 | very big archives, and are only interested in the first few files. |
164 | |
642eb381 |
165 | =item filter |
166 | |
167 | Can be set to a regular expression. Only files with names that match |
168 | the expression will be read. |
169 | |
39713df4 |
170 | =item extract |
171 | |
172 | If set to true, immediately extract entries when reading them. This |
173 | gives you the same memory break as the C<extract_archive> function. |
174 | Note however that entries will not be read into memory, but written |
e0d68803 |
175 | straight to disk. This means no C<Archive::Tar::File> objects are |
642eb381 |
176 | created for you to inspect. |
39713df4 |
177 | |
178 | =back |
179 | |
180 | All files are stored internally as C<Archive::Tar::File> objects. |
181 | Please consult the L<Archive::Tar::File> documentation for details. |
182 | |
183 | Returns the number of files read in scalar context, and a list of |
184 | C<Archive::Tar::File> objects in list context. |
185 | |
186 | =cut |
187 | |
188 | sub read { |
189 | my $self = shift; |
190 | my $file = shift; |
191 | my $gzip = shift || 0; |
192 | my $opts = shift || {}; |
193 | |
194 | unless( defined $file ) { |
195 | $self->_error( qq[No file to read from!] ); |
196 | return; |
197 | } else { |
198 | $self->_file( $file ); |
199 | } |
200 | |
201 | my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) ) |
202 | or return; |
203 | |
204 | my $data = $self->_read_tar( $handle, $opts ) or return; |
205 | |
206 | $self->_data( $data ); |
207 | |
208 | return wantarray ? @$data : scalar @$data; |
209 | } |
210 | |
211 | sub _get_handle { |
642eb381 |
212 | my $self = shift; |
213 | my $file = shift; return unless defined $file; |
214 | return $file if ref $file; |
215 | my $compress = shift || 0; |
216 | my $mode = shift || READ_ONLY->( ZLIB ); # default to read only |
217 | |
218 | |
219 | ### get a FH opened to the right class, so we can use it transparently |
220 | ### throughout the program |
221 | my $fh; |
222 | { ### reading magic only makes sense if we're opening a file for |
223 | ### reading. otherwise, just use what the user requested. |
224 | my $magic = ''; |
225 | if( MODE_READ->($mode) ) { |
226 | open my $tmp, $file or do { |
227 | $self->_error( qq[Could not open '$file' for reading: $!] ); |
228 | return; |
229 | }; |
e0d68803 |
230 | |
642eb381 |
231 | ### read the first 4 bites of the file to figure out which class to |
232 | ### use to open the file. |
e0d68803 |
233 | sysread( $tmp, $magic, 4 ); |
642eb381 |
234 | close $tmp; |
235 | } |
39713df4 |
236 | |
642eb381 |
237 | ### is it bzip? |
238 | ### if you asked specifically for bzip compression, or if we're in |
239 | ### read mode and the magic numbers add up, use bzip |
240 | if( BZIP and ( |
e0d68803 |
241 | ($compress eq COMPRESS_BZIP) or |
642eb381 |
242 | ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM ) |
243 | ) |
244 | ) { |
e0d68803 |
245 | |
642eb381 |
246 | ### different reader/writer modules, different error vars... sigh |
247 | if( MODE_READ->($mode) ) { |
248 | $fh = IO::Uncompress::Bunzip2->new( $file ) or do { |
249 | $self->_error( qq[Could not read '$file': ] . |
250 | $IO::Uncompress::Bunzip2::Bunzip2Error |
251 | ); |
252 | return; |
253 | }; |
e0d68803 |
254 | |
642eb381 |
255 | } else { |
256 | $fh = IO::Compress::Bzip2->new( $file ) or do { |
257 | $self->_error( qq[Could not write to '$file': ] . |
258 | $IO::Compress::Bzip2::Bzip2Error |
259 | ); |
260 | return; |
261 | }; |
262 | } |
e0d68803 |
263 | |
642eb381 |
264 | ### is it gzip? |
265 | ### if you asked for compression, if you wanted to read or the gzip |
266 | ### magic number is present (redundant with read) |
267 | } elsif( ZLIB and ( |
268 | $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM |
e0d68803 |
269 | ) |
642eb381 |
270 | ) { |
271 | $fh = IO::Zlib->new; |
39713df4 |
272 | |
642eb381 |
273 | unless( $fh->open( $file, $mode ) ) { |
274 | $self->_error(qq[Could not create filehandle for '$file': $!]); |
275 | return; |
276 | } |
e0d68803 |
277 | |
642eb381 |
278 | ### is it plain tar? |
39713df4 |
279 | } else { |
642eb381 |
280 | $fh = IO::File->new; |
39713df4 |
281 | |
642eb381 |
282 | unless( $fh->open( $file, $mode ) ) { |
283 | $self->_error(qq[Could not create filehandle for '$file': $!]); |
284 | return; |
285 | } |
39713df4 |
286 | |
642eb381 |
287 | ### enable bin mode on tar archives |
288 | binmode $fh; |
e0d68803 |
289 | } |
642eb381 |
290 | } |
39713df4 |
291 | |
292 | return $fh; |
293 | } |
294 | |
642eb381 |
295 | |
39713df4 |
296 | sub _read_tar { |
297 | my $self = shift; |
298 | my $handle = shift or return; |
299 | my $opts = shift || {}; |
300 | |
301 | my $count = $opts->{limit} || 0; |
642eb381 |
302 | my $filter = $opts->{filter}; |
39713df4 |
303 | my $extract = $opts->{extract} || 0; |
304 | |
305 | ### set a cap on the amount of files to extract ### |
306 | my $limit = 0; |
307 | $limit = 1 if $count > 0; |
308 | |
309 | my $tarfile = [ ]; |
310 | my $chunk; |
311 | my $read = 0; |
312 | my $real_name; # to set the name of a file when |
313 | # we're encountering @longlink |
314 | my $data; |
315 | |
316 | LOOP: |
317 | while( $handle->read( $chunk, HEAD ) ) { |
318 | ### IO::Zlib doesn't support this yet |
319 | my $offset = eval { tell $handle } || 'unknown'; |
320 | |
321 | unless( $read++ ) { |
322 | my $gzip = GZIP_MAGIC_NUM; |
323 | if( $chunk =~ /$gzip/ ) { |
324 | $self->_error( qq[Cannot read compressed format in tar-mode] ); |
325 | return; |
326 | } |
4feb3b72 |
327 | |
328 | ### size is < HEAD, which means a corrupted file, as the minimum |
329 | ### length is _at least_ HEAD |
330 | if (length $chunk != HEAD) { |
331 | $self->_error( qq[Cannot read enough bytes from the tarfile] ); |
332 | return; |
333 | } |
39713df4 |
334 | } |
335 | |
336 | ### if we can't read in all bytes... ### |
337 | last if length $chunk != HEAD; |
338 | |
339 | ### Apparently this should really be two blocks of 512 zeroes, |
340 | ### but GNU tar sometimes gets it wrong. See comment in the |
341 | ### source code (tar.c) to GNU cpio. |
342 | next if $chunk eq TAR_END; |
343 | |
b30bcf62 |
344 | ### according to the posix spec, the last 12 bytes of the header are |
345 | ### null bytes, to pad it to a 512 byte block. That means if these |
346 | ### bytes are NOT null bytes, it's a corrrupt header. See: |
347 | ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx |
348 | ### line 111 |
349 | { my $nulls = join '', "\0" x 12; |
350 | unless( $nulls eq substr( $chunk, 500, 12 ) ) { |
351 | $self->_error( qq[Invalid header block at offset $offset] ); |
352 | next LOOP; |
353 | } |
354 | } |
355 | |
81a5970e |
356 | ### pass the realname, so we can set it 'proper' right away |
357 | ### some of the heuristics are done on the name, so important |
358 | ### to set it ASAP |
39713df4 |
359 | my $entry; |
81a5970e |
360 | { my %extra_args = (); |
361 | $extra_args{'name'} = $$real_name if defined $real_name; |
e0d68803 |
362 | |
363 | unless( $entry = Archive::Tar::File->new( chunk => $chunk, |
364 | %extra_args ) |
81a5970e |
365 | ) { |
366 | $self->_error( qq[Couldn't read chunk at offset $offset] ); |
b30bcf62 |
367 | next LOOP; |
81a5970e |
368 | } |
39713df4 |
369 | } |
370 | |
371 | ### ignore labels: |
372 | ### http://www.gnu.org/manual/tar/html_node/tar_139.html |
373 | next if $entry->is_label; |
374 | |
375 | if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) { |
376 | |
377 | if ( $entry->is_file && !$entry->validate ) { |
378 | ### sometimes the chunk is rather fux0r3d and a whole 512 |
c3745331 |
379 | ### bytes ends up in the ->name area. |
39713df4 |
380 | ### clean it up, if need be |
381 | my $name = $entry->name; |
382 | $name = substr($name, 0, 100) if length $name > 100; |
383 | $name =~ s/\n/ /g; |
384 | |
385 | $self->_error( $name . qq[: checksum error] ); |
386 | next LOOP; |
387 | } |
388 | |
389 | my $block = BLOCK_SIZE->( $entry->size ); |
390 | |
391 | $data = $entry->get_content_by_ref; |
392 | |
393 | ### just read everything into memory |
394 | ### can't do lazy loading since IO::Zlib doesn't support 'seek' |
395 | ### this is because Compress::Zlib doesn't support it =/ |
396 | ### this reads in the whole data in one read() call. |
397 | if( $handle->read( $$data, $block ) < $block ) { |
398 | $self->_error( qq[Read error on tarfile (missing data) ']. |
399 | $entry->full_path ."' at offset $offset" ); |
b30bcf62 |
400 | next LOOP; |
39713df4 |
401 | } |
402 | |
403 | ### throw away trailing garbage ### |
376cc5ea |
404 | substr ($$data, $entry->size) = "" if defined $$data; |
39713df4 |
405 | |
406 | ### part II of the @LongLink munging -- need to do /after/ |
407 | ### the checksum check. |
408 | if( $entry->is_longlink ) { |
409 | ### weird thing in tarfiles -- if the file is actually a |
410 | ### @LongLink, the data part seems to have a trailing ^@ |
411 | ### (unprintable) char. to display, pipe output through less. |
412 | ### but that doesn't *always* happen.. so check if the last |
413 | ### character is a control character, and if so remove it |
414 | ### at any rate, we better remove that character here, or tests |
415 | ### like 'eq' and hashlook ups based on names will SO not work |
416 | ### remove it by calculating the proper size, and then |
417 | ### tossing out everything that's longer than that size. |
418 | |
419 | ### count number of nulls |
420 | my $nulls = $$data =~ tr/\0/\0/; |
421 | |
422 | ### cut data + size by that many bytes |
423 | $entry->size( $entry->size - $nulls ); |
424 | substr ($$data, $entry->size) = ""; |
425 | } |
426 | } |
427 | |
428 | ### clean up of the entries.. posix tar /apparently/ has some |
429 | ### weird 'feature' that allows for filenames > 255 characters |
430 | ### they'll put a header in with as name '././@LongLink' and the |
431 | ### contents will be the name of the /next/ file in the archive |
432 | ### pretty crappy and kludgy if you ask me |
433 | |
434 | ### set the name for the next entry if this is a @LongLink; |
435 | ### this is one ugly hack =/ but needed for direct extraction |
436 | if( $entry->is_longlink ) { |
437 | $real_name = $data; |
b30bcf62 |
438 | next LOOP; |
39713df4 |
439 | } elsif ( defined $real_name ) { |
440 | $entry->name( $$real_name ); |
441 | $entry->prefix(''); |
442 | undef $real_name; |
443 | } |
444 | |
642eb381 |
445 | ### skip this entry if we're filtering |
446 | if ($filter && $entry->name !~ $filter) { |
447 | next LOOP; |
e0d68803 |
448 | |
642eb381 |
449 | ### skip this entry if it's a pax header. This is a special file added |
450 | ### by, among others, git-generated tarballs. It holds comments and is |
e0d68803 |
451 | ### not meant for extracting. See #38932: pax_global_header extracted |
642eb381 |
452 | } elsif ( $entry->name eq PAX_HEADER ) { |
453 | next LOOP; |
454 | } |
e0d68803 |
455 | |
39713df4 |
456 | $self->_extract_file( $entry ) if $extract |
457 | && !$entry->is_longlink |
458 | && !$entry->is_unknown |
459 | && !$entry->is_label; |
460 | |
461 | ### Guard against tarfiles with garbage at the end |
462 | last LOOP if $entry->name eq ''; |
463 | |
464 | ### push only the name on the rv if we're extracting |
465 | ### -- for extract_archive |
466 | push @$tarfile, ($extract ? $entry->name : $entry); |
467 | |
468 | if( $limit ) { |
469 | $count-- unless $entry->is_longlink || $entry->is_dir; |
470 | last LOOP unless $count; |
471 | } |
472 | } continue { |
473 | undef $data; |
474 | } |
475 | |
476 | return $tarfile; |
477 | } |
478 | |
479 | =head2 $tar->contains_file( $filename ) |
480 | |
481 | Check if the archive contains a certain file. |
482 | It will return true if the file is in the archive, false otherwise. |
483 | |
484 | Note however, that this function does an exact match using C<eq> |
485 | on the full path. So it cannot compensate for case-insensitive file- |
486 | systems or compare 2 paths to see if they would point to the same |
487 | underlying file. |
488 | |
489 | =cut |
490 | |
491 | sub contains_file { |
492 | my $self = shift; |
01d11a1c |
493 | my $full = shift; |
e0d68803 |
494 | |
01d11a1c |
495 | return unless defined $full; |
39713df4 |
496 | |
c3745331 |
497 | ### don't warn if the entry isn't there.. that's what this function |
498 | ### is for after all. |
499 | local $WARN = 0; |
39713df4 |
500 | return 1 if $self->_find_entry($full); |
501 | return; |
502 | } |
503 | |
504 | =head2 $tar->extract( [@filenames] ) |
505 | |
506 | Write files whose names are equivalent to any of the names in |
507 | C<@filenames> to disk, creating subdirectories as necessary. This |
508 | might not work too well under VMS. |
509 | Under MacPerl, the file's modification time will be converted to the |
510 | MacOS zero of time, and appropriate conversions will be done to the |
511 | path. However, the length of each element of the path is not |
512 | inspected to see whether it's longer than MacOS currently allows (32 |
513 | characters). |
514 | |
515 | If C<extract> is called without a list of file names, the entire |
516 | contents of the archive are extracted. |
517 | |
518 | Returns a list of filenames extracted. |
519 | |
520 | =cut |
521 | |
522 | sub extract { |
523 | my $self = shift; |
b30bcf62 |
524 | my @args = @_; |
39713df4 |
525 | my @files; |
526 | |
f38c1908 |
527 | # use the speed optimization for all extracted files |
528 | local($self->{cwd}) = cwd() unless $self->{cwd}; |
529 | |
39713df4 |
530 | ### you requested the extraction of only certian files |
b30bcf62 |
531 | if( @args ) { |
532 | for my $file ( @args ) { |
e0d68803 |
533 | |
b30bcf62 |
534 | ### it's already an object? |
535 | if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) { |
536 | push @files, $file; |
537 | next; |
39713df4 |
538 | |
b30bcf62 |
539 | ### go find it then |
540 | } else { |
e0d68803 |
541 | |
b30bcf62 |
542 | my $found; |
543 | for my $entry ( @{$self->_data} ) { |
544 | next unless $file eq $entry->full_path; |
e0d68803 |
545 | |
b30bcf62 |
546 | ### we found the file you're looking for |
547 | push @files, $entry; |
548 | $found++; |
549 | } |
e0d68803 |
550 | |
b30bcf62 |
551 | unless( $found ) { |
e0d68803 |
552 | return $self->_error( |
b30bcf62 |
553 | qq[Could not find '$file' in archive] ); |
554 | } |
39713df4 |
555 | } |
556 | } |
557 | |
558 | ### just grab all the file items |
559 | } else { |
560 | @files = $self->get_files; |
561 | } |
562 | |
563 | ### nothing found? that's an error |
564 | unless( scalar @files ) { |
565 | $self->_error( qq[No files found for ] . $self->_file ); |
566 | return; |
567 | } |
568 | |
569 | ### now extract them |
570 | for my $entry ( @files ) { |
571 | unless( $self->_extract_file( $entry ) ) { |
572 | $self->_error(q[Could not extract ']. $entry->full_path .q['] ); |
573 | return; |
574 | } |
575 | } |
576 | |
577 | return @files; |
578 | } |
579 | |
580 | =head2 $tar->extract_file( $file, [$extract_path] ) |
581 | |
582 | Write an entry, whose name is equivalent to the file name provided to |
48e76d2d |
583 | disk. Optionally takes a second parameter, which is the full native |
39713df4 |
584 | path (including filename) the entry will be written to. |
585 | |
586 | For example: |
587 | |
588 | $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' ); |
589 | |
b30bcf62 |
590 | $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' ); |
591 | |
39713df4 |
592 | Returns true on success, false on failure. |
593 | |
594 | =cut |
595 | |
596 | sub extract_file { |
597 | my $self = shift; |
01d11a1c |
598 | my $file = shift; return unless defined $file; |
39713df4 |
599 | my $alt = shift; |
600 | |
601 | my $entry = $self->_find_entry( $file ) |
602 | or $self->_error( qq[Could not find an entry for '$file'] ), return; |
603 | |
604 | return $self->_extract_file( $entry, $alt ); |
605 | } |
606 | |
607 | sub _extract_file { |
608 | my $self = shift; |
609 | my $entry = shift or return; |
610 | my $alt = shift; |
39713df4 |
611 | |
612 | ### you wanted an alternate extraction location ### |
613 | my $name = defined $alt ? $alt : $entry->full_path; |
614 | |
615 | ### splitpath takes a bool at the end to indicate |
616 | ### that it's splitting a dir |
7f10f74b |
617 | my ($vol,$dirs,$file); |
618 | if ( defined $alt ) { # It's a local-OS path |
619 | ($vol,$dirs,$file) = File::Spec->splitpath( $alt, |
620 | $entry->is_dir ); |
621 | } else { |
622 | ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name, |
623 | $entry->is_dir ); |
624 | } |
625 | |
39713df4 |
626 | my $dir; |
627 | ### is $name an absolute path? ### |
642eb381 |
628 | if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) { |
178aef9a |
629 | |
630 | ### absolute names are not allowed to be in tarballs under |
631 | ### strict mode, so only allow it if a user tells us to do it |
632 | if( not defined $alt and not $INSECURE_EXTRACT_MODE ) { |
e0d68803 |
633 | $self->_error( |
178aef9a |
634 | q[Entry ']. $entry->full_path .q[' is an absolute path. ]. |
635 | q[Not extracting absolute paths under SECURE EXTRACT MODE] |
e0d68803 |
636 | ); |
178aef9a |
637 | return; |
638 | } |
e0d68803 |
639 | |
178aef9a |
640 | ### user asked us to, it's fine. |
642eb381 |
641 | $dir = File::Spec->catpath( $vol, $dirs, "" ); |
39713df4 |
642 | |
643 | ### it's a relative path ### |
644 | } else { |
e0d68803 |
645 | my $cwd = (ref $self and defined $self->{cwd}) |
646 | ? $self->{cwd} |
642eb381 |
647 | : cwd(); |
f5afd28d |
648 | |
f5afd28d |
649 | my @dirs = defined $alt |
650 | ? File::Spec->splitdir( $dirs ) # It's a local-OS path |
651 | : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely |
652 | # straight from the tarball |
178aef9a |
653 | |
e0d68803 |
654 | if( not defined $alt and |
655 | not $INSECURE_EXTRACT_MODE |
656 | ) { |
642eb381 |
657 | |
658 | ### paths that leave the current directory are not allowed under |
659 | ### strict mode, so only allow it if a user tells us to do this. |
660 | if( grep { $_ eq '..' } @dirs ) { |
e0d68803 |
661 | |
642eb381 |
662 | $self->_error( |
663 | q[Entry ']. $entry->full_path .q[' is attempting to leave ]. |
664 | q[the current working directory. Not extracting under ]. |
665 | q[SECURE EXTRACT MODE] |
666 | ); |
667 | return; |
e0d68803 |
668 | } |
669 | |
642eb381 |
670 | ### the archive may be asking us to extract into a symlink. This |
671 | ### is not sane and a possible security issue, as outlined here: |
672 | ### https://rt.cpan.org/Ticket/Display.html?id=30380 |
673 | ### https://bugzilla.redhat.com/show_bug.cgi?id=295021 |
674 | ### https://issues.rpath.com/browse/RPL-1716 |
675 | my $full_path = $cwd; |
676 | for my $d ( @dirs ) { |
677 | $full_path = File::Spec->catdir( $full_path, $d ); |
e0d68803 |
678 | |
642eb381 |
679 | ### we've already checked this one, and it's safe. Move on. |
680 | next if ref $self and $self->{_link_cache}->{$full_path}; |
681 | |
682 | if( -l $full_path ) { |
683 | my $to = readlink $full_path; |
684 | my $diag = "symlinked directory ($full_path => $to)"; |
685 | |
686 | $self->_error( |
687 | q[Entry ']. $entry->full_path .q[' is attempting to ]. |
688 | qq[extract to a $diag. This is considered a security ]. |
689 | q[vulnerability and not allowed under SECURE EXTRACT ]. |
690 | q[MODE] |
691 | ); |
692 | return; |
693 | } |
e0d68803 |
694 | |
642eb381 |
695 | ### XXX keep a cache if possible, so the stats become cheaper: |
696 | $self->{_link_cache}->{$full_path} = 1 if ref $self; |
697 | } |
698 | } |
699 | |
2610e7a4 |
700 | ### '.' is the directory delimiter on VMS, which has to be escaped |
701 | ### or changed to '_' on vms. vmsify is used, because older versions |
702 | ### of vmspath do not handle this properly. |
703 | ### Must not add a '/' to an empty directory though. |
e0d68803 |
704 | map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS; |
f5afd28d |
705 | |
e0d68803 |
706 | my ($cwd_vol,$cwd_dir,$cwd_file) |
48e76d2d |
707 | = File::Spec->splitpath( $cwd ); |
708 | my @cwd = File::Spec->splitdir( $cwd_dir ); |
709 | push @cwd, $cwd_file if length $cwd_file; |
81a5970e |
710 | |
f5afd28d |
711 | ### We need to pass '' as the last elemant to catpath. Craig Berry |
712 | ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>): |
e0d68803 |
713 | ### The root problem is that splitpath on UNIX always returns the |
f5afd28d |
714 | ### final path element as a file even if it is a directory, and of |
715 | ### course there is no way it can know the difference without checking |
716 | ### against the filesystem, which it is documented as not doing. When |
717 | ### you turn around and call catpath, on VMS you have to know which bits |
718 | ### are directory bits and which bits are file bits. In this case we |
719 | ### know the result should be a directory. I had thought you could omit |
720 | ### the file argument to catpath in such a case, but apparently on UNIX |
721 | ### you can't. |
e0d68803 |
722 | $dir = File::Spec->catpath( |
723 | $cwd_vol, File::Spec->catdir( @cwd, @dirs ), '' |
f5afd28d |
724 | ); |
725 | |
e0d68803 |
726 | ### catdir() returns undef if the path is longer than 255 chars on |
2610e7a4 |
727 | ### older VMS systems. |
81a5970e |
728 | unless ( defined $dir ) { |
729 | $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] ); |
730 | return; |
731 | } |
732 | |
39713df4 |
733 | } |
734 | |
735 | if( -e $dir && !-d _ ) { |
736 | $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] ); |
737 | return; |
738 | } |
739 | |
740 | unless ( -d _ ) { |
741 | eval { File::Path::mkpath( $dir, 0, 0777 ) }; |
742 | if( $@ ) { |
642eb381 |
743 | my $fp = $entry->full_path; |
744 | $self->_error(qq[Could not create directory '$dir' for '$fp': $@]); |
39713df4 |
745 | return; |
746 | } |
e0d68803 |
747 | |
c3745331 |
748 | ### XXX chown here? that might not be the same as in the archive |
749 | ### as we're only chown'ing to the owner of the file we're extracting |
750 | ### not to the owner of the directory itself, which may or may not |
751 | ### be another entry in the archive |
752 | ### Answer: no, gnu tar doesn't do it either, it'd be the wrong |
753 | ### way to go. |
754 | #if( $CHOWN && CAN_CHOWN ) { |
755 | # chown $entry->uid, $entry->gid, $dir or |
756 | # $self->_error( qq[Could not set uid/gid on '$dir'] ); |
757 | #} |
39713df4 |
758 | } |
759 | |
760 | ### we're done if we just needed to create a dir ### |
761 | return 1 if $entry->is_dir; |
762 | |
763 | my $full = File::Spec->catfile( $dir, $file ); |
764 | |
765 | if( $entry->is_unknown ) { |
766 | $self->_error( qq[Unknown file type for file '$full'] ); |
767 | return; |
768 | } |
769 | |
770 | if( length $entry->type && $entry->is_file ) { |
771 | my $fh = IO::File->new; |
772 | $fh->open( '>' . $full ) or ( |
773 | $self->_error( qq[Could not open file '$full': $!] ), |
774 | return |
775 | ); |
776 | |
777 | if( $entry->size ) { |
778 | binmode $fh; |
779 | syswrite $fh, $entry->data or ( |
780 | $self->_error( qq[Could not write data to '$full'] ), |
781 | return |
782 | ); |
783 | } |
784 | |
785 | close $fh or ( |
786 | $self->_error( qq[Could not close file '$full'] ), |
787 | return |
788 | ); |
789 | |
790 | } else { |
791 | $self->_make_special_file( $entry, $full ) or return; |
792 | } |
793 | |
642eb381 |
794 | ### only update the timestamp if it's not a symlink; that will change the |
795 | ### timestamp of the original. This addresses bug #33669: Could not update |
796 | ### timestamp warning on symlinks |
797 | if( not -l $full ) { |
798 | utime time, $entry->mtime - TIME_OFFSET, $full or |
799 | $self->_error( qq[Could not update timestamp] ); |
800 | } |
39713df4 |
801 | |
2610e7a4 |
802 | if( $CHOWN && CAN_CHOWN->() ) { |
39713df4 |
803 | chown $entry->uid, $entry->gid, $full or |
804 | $self->_error( qq[Could not set uid/gid on '$full'] ); |
805 | } |
806 | |
807 | ### only chmod if we're allowed to, but never chmod symlinks, since they'll |
808 | ### change the perms on the file they're linking too... |
809 | if( $CHMOD and not -l $full ) { |
1c82faa7 |
810 | my $mode = $entry->mode; |
811 | unless ($SAME_PERMISSIONS) { |
812 | $mode &= ~(oct(7000) | umask); |
813 | } |
814 | chmod $mode, $full or |
39713df4 |
815 | $self->_error( qq[Could not chown '$full' to ] . $entry->mode ); |
816 | } |
817 | |
818 | return 1; |
819 | } |
820 | |
821 | sub _make_special_file { |
822 | my $self = shift; |
823 | my $entry = shift or return; |
824 | my $file = shift; return unless defined $file; |
825 | |
826 | my $err; |
827 | |
828 | if( $entry->is_symlink ) { |
829 | my $fail; |
830 | if( ON_UNIX ) { |
831 | symlink( $entry->linkname, $file ) or $fail++; |
832 | |
833 | } else { |
834 | $self->_extract_special_file_as_plain_file( $entry, $file ) |
835 | or $fail++; |
836 | } |
837 | |
642eb381 |
838 | $err = qq[Making symbolic link '$file' to '] . |
839 | $entry->linkname .q[' failed] if $fail; |
39713df4 |
840 | |
841 | } elsif ( $entry->is_hardlink ) { |
842 | my $fail; |
843 | if( ON_UNIX ) { |
844 | link( $entry->linkname, $file ) or $fail++; |
845 | |
846 | } else { |
847 | $self->_extract_special_file_as_plain_file( $entry, $file ) |
848 | or $fail++; |
849 | } |
850 | |
851 | $err = qq[Making hard link from '] . $entry->linkname . |
852 | qq[' to '$file' failed] if $fail; |
853 | |
854 | } elsif ( $entry->is_fifo ) { |
855 | ON_UNIX && !system('mknod', $file, 'p') or |
856 | $err = qq[Making fifo ']. $entry->name .qq[' failed]; |
857 | |
858 | } elsif ( $entry->is_blockdev or $entry->is_chardev ) { |
859 | my $mode = $entry->is_blockdev ? 'b' : 'c'; |
860 | |
861 | ON_UNIX && !system('mknod', $file, $mode, |
862 | $entry->devmajor, $entry->devminor) or |
863 | $err = qq[Making block device ']. $entry->name .qq[' (maj=] . |
864 | $entry->devmajor . qq[ min=] . $entry->devminor . |
865 | qq[) failed.]; |
866 | |
867 | } elsif ( $entry->is_socket ) { |
868 | ### the original doesn't do anything special for sockets.... ### |
869 | 1; |
870 | } |
871 | |
872 | return $err ? $self->_error( $err ) : 1; |
873 | } |
874 | |
875 | ### don't know how to make symlinks, let's just extract the file as |
876 | ### a plain file |
877 | sub _extract_special_file_as_plain_file { |
878 | my $self = shift; |
879 | my $entry = shift or return; |
880 | my $file = shift; return unless defined $file; |
881 | |
882 | my $err; |
883 | TRY: { |
884 | my $orig = $self->_find_entry( $entry->linkname ); |
885 | |
886 | unless( $orig ) { |
887 | $err = qq[Could not find file '] . $entry->linkname . |
888 | qq[' in memory.]; |
889 | last TRY; |
890 | } |
891 | |
892 | ### clone the entry, make it appear as a normal file ### |
893 | my $clone = $entry->clone; |
894 | $clone->_downgrade_to_plainfile; |
895 | $self->_extract_file( $clone, $file ) or last TRY; |
896 | |
897 | return 1; |
898 | } |
899 | |
900 | return $self->_error($err); |
901 | } |
902 | |
903 | =head2 $tar->list_files( [\@properties] ) |
904 | |
905 | Returns a list of the names of all the files in the archive. |
906 | |
907 | If C<list_files()> is passed an array reference as its first argument |
908 | it returns a list of hash references containing the requested |
909 | properties of each file. The following list of properties is |
910 | supported: name, size, mtime (last modified date), mode, uid, gid, |
911 | linkname, uname, gname, devmajor, devminor, prefix. |
912 | |
913 | Passing an array reference containing only one element, 'name', is |
914 | special cased to return a list of names rather than a list of hash |
915 | references, making it equivalent to calling C<list_files> without |
916 | arguments. |
917 | |
918 | =cut |
919 | |
920 | sub list_files { |
921 | my $self = shift; |
922 | my $aref = shift || [ ]; |
923 | |
924 | unless( $self->_data ) { |
925 | $self->read() or return; |
926 | } |
927 | |
928 | if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) { |
929 | return map { $_->full_path } @{$self->_data}; |
930 | } else { |
931 | |
932 | #my @rv; |
933 | #for my $obj ( @{$self->_data} ) { |
934 | # push @rv, { map { $_ => $obj->$_() } @$aref }; |
935 | #} |
936 | #return @rv; |
937 | |
938 | ### this does the same as the above.. just needs a +{ } |
939 | ### to make sure perl doesn't confuse it for a block |
940 | return map { my $o=$_; |
941 | +{ map { $_ => $o->$_() } @$aref } |
942 | } @{$self->_data}; |
943 | } |
944 | } |
945 | |
946 | sub _find_entry { |
947 | my $self = shift; |
948 | my $file = shift; |
949 | |
950 | unless( defined $file ) { |
951 | $self->_error( qq[No file specified] ); |
952 | return; |
953 | } |
954 | |
b30bcf62 |
955 | ### it's an object already |
956 | return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' ); |
957 | |
39713df4 |
958 | for my $entry ( @{$self->_data} ) { |
959 | my $path = $entry->full_path; |
960 | return $entry if $path eq $file; |
961 | } |
962 | |
963 | $self->_error( qq[No such file in archive: '$file'] ); |
964 | return; |
965 | } |
966 | |
967 | =head2 $tar->get_files( [@filenames] ) |
968 | |
969 | Returns the C<Archive::Tar::File> objects matching the filenames |
970 | provided. If no filename list was passed, all C<Archive::Tar::File> |
971 | objects in the current Tar object are returned. |
972 | |
973 | Please refer to the C<Archive::Tar::File> documentation on how to |
974 | handle these objects. |
975 | |
976 | =cut |
977 | |
978 | sub get_files { |
979 | my $self = shift; |
980 | |
981 | return @{ $self->_data } unless @_; |
982 | |
983 | my @list; |
984 | for my $file ( @_ ) { |
985 | push @list, grep { defined } $self->_find_entry( $file ); |
986 | } |
987 | |
988 | return @list; |
989 | } |
990 | |
991 | =head2 $tar->get_content( $file ) |
992 | |
993 | Return the content of the named file. |
994 | |
995 | =cut |
996 | |
997 | sub get_content { |
998 | my $self = shift; |
999 | my $entry = $self->_find_entry( shift ) or return; |
1000 | |
1001 | return $entry->data; |
1002 | } |
1003 | |
1004 | =head2 $tar->replace_content( $file, $content ) |
1005 | |
1006 | Make the string $content be the content for the file named $file. |
1007 | |
1008 | =cut |
1009 | |
1010 | sub replace_content { |
1011 | my $self = shift; |
1012 | my $entry = $self->_find_entry( shift ) or return; |
1013 | |
1014 | return $entry->replace_content( shift ); |
1015 | } |
1016 | |
1017 | =head2 $tar->rename( $file, $new_name ) |
1018 | |
1019 | Rename the file of the in-memory archive to $new_name. |
1020 | |
1021 | Note that you must specify a Unix path for $new_name, since per tar |
1022 | standard, all files in the archive must be Unix paths. |
1023 | |
1024 | Returns true on success and false on failure. |
1025 | |
1026 | =cut |
1027 | |
1028 | sub rename { |
1029 | my $self = shift; |
1030 | my $file = shift; return unless defined $file; |
1031 | my $new = shift; return unless defined $new; |
1032 | |
1033 | my $entry = $self->_find_entry( $file ) or return; |
1034 | |
1035 | return $entry->rename( $new ); |
1036 | } |
1037 | |
1038 | =head2 $tar->remove (@filenamelist) |
1039 | |
1040 | Removes any entries with names matching any of the given filenames |
1041 | from the in-memory archive. Returns a list of C<Archive::Tar::File> |
1042 | objects that remain. |
1043 | |
1044 | =cut |
1045 | |
1046 | sub remove { |
1047 | my $self = shift; |
1048 | my @list = @_; |
1049 | |
1050 | my %seen = map { $_->full_path => $_ } @{$self->_data}; |
1051 | delete $seen{ $_ } for @list; |
1052 | |
1053 | $self->_data( [values %seen] ); |
1054 | |
1055 | return values %seen; |
1056 | } |
1057 | |
1058 | =head2 $tar->clear |
1059 | |
1060 | C<clear> clears the current in-memory archive. This effectively gives |
1061 | you a 'blank' object, ready to be filled again. Note that C<clear> |
1062 | only has effect on the object, not the underlying tarfile. |
1063 | |
1064 | =cut |
1065 | |
1066 | sub clear { |
1067 | my $self = shift or return; |
1068 | |
1069 | $self->_data( [] ); |
1070 | $self->_file( '' ); |
1071 | |
1072 | return 1; |
1073 | } |
1074 | |
1075 | |
1076 | =head2 $tar->write ( [$file, $compressed, $prefix] ) |
1077 | |
1078 | Write the in-memory archive to disk. The first argument can either |
1079 | be the name of a file or a reference to an already open filehandle (a |
e0d68803 |
1080 | GLOB reference). |
642eb381 |
1081 | |
e0d68803 |
1082 | The second argument is used to indicate compression. You can either |
642eb381 |
1083 | compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed |
e0d68803 |
1084 | to be the C<gzip> compression level (between 1 and 9), but the use of |
642eb381 |
1085 | constants is prefered: |
1086 | |
1087 | # write a gzip compressed file |
bef46b70 |
1088 | $tar->write( 'out.tgz', COMPRESS_GZIP ); |
642eb381 |
1089 | |
e0d68803 |
1090 | # write a bzip compressed file |
bef46b70 |
1091 | $tar->write( 'out.tbz', COMPRESS_BZIP ); |
39713df4 |
1092 | |
1093 | Note that when you pass in a filehandle, the compression argument |
1094 | is ignored, as all files are printed verbatim to your filehandle. |
1095 | If you wish to enable compression with filehandles, use an |
642eb381 |
1096 | C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. |
39713df4 |
1097 | |
1098 | The third argument is an optional prefix. All files will be tucked |
1099 | away in the directory you specify as prefix. So if you have files |
1100 | 'a' and 'b' in your archive, and you specify 'foo' as prefix, they |
1101 | will be written to the archive as 'foo/a' and 'foo/b'. |
1102 | |
1103 | If no arguments are given, C<write> returns the entire formatted |
1104 | archive as a string, which could be useful if you'd like to stuff the |
1105 | archive into a socket or a pipe to gzip or something. |
1106 | |
642eb381 |
1107 | |
39713df4 |
1108 | =cut |
1109 | |
1110 | sub write { |
1111 | my $self = shift; |
1112 | my $file = shift; $file = '' unless defined $file; |
1113 | my $gzip = shift || 0; |
1114 | my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; |
1115 | my $dummy = ''; |
e0d68803 |
1116 | |
39713df4 |
1117 | ### only need a handle if we have a file to print to ### |
1118 | my $handle = length($file) |
1119 | ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) ) |
1120 | or return ) |
1121 | : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h } |
e0d68803 |
1122 | : $HAS_IO_STRING ? IO::String->new |
39713df4 |
1123 | : __PACKAGE__->no_string_support(); |
1124 | |
e0d68803 |
1125 | ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a |
1126 | ### corrupt TAR file. Must clear out $\ to make sure no garbage is |
1127 | ### printed to the archive |
1128 | local $\; |
39713df4 |
1129 | |
1130 | for my $entry ( @{$self->_data} ) { |
1131 | ### entries to be written to the tarfile ### |
1132 | my @write_me; |
1133 | |
1134 | ### only now will we change the object to reflect the current state |
1135 | ### of the name and prefix fields -- this needs to be limited to |
1136 | ### write() only! |
1137 | my $clone = $entry->clone; |
1138 | |
1139 | |
e0d68803 |
1140 | ### so, if you don't want use to use the prefix, we'll stuff |
39713df4 |
1141 | ### everything in the name field instead |
1142 | if( $DO_NOT_USE_PREFIX ) { |
1143 | |
1144 | ### you might have an extended prefix, if so, set it in the clone |
1145 | ### XXX is ::Unix right? |
1146 | $clone->name( length $ext_prefix |
1147 | ? File::Spec::Unix->catdir( $ext_prefix, |
1148 | $clone->full_path) |
1149 | : $clone->full_path ); |
1150 | $clone->prefix( '' ); |
1151 | |
1152 | ### otherwise, we'll have to set it properly -- prefix part in the |
1153 | ### prefix and name part in the name field. |
1154 | } else { |
1155 | |
1156 | ### split them here, not before! |
1157 | my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path ); |
1158 | |
1159 | ### you might have an extended prefix, if so, set it in the clone |
1160 | ### XXX is ::Unix right? |
1161 | $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix ) |
1162 | if length $ext_prefix; |
1163 | |
1164 | $clone->prefix( $prefix ); |
1165 | $clone->name( $name ); |
1166 | } |
1167 | |
1168 | ### names are too long, and will get truncated if we don't add a |
1169 | ### '@LongLink' file... |
1170 | my $make_longlink = ( length($clone->name) > NAME_LENGTH or |
1171 | length($clone->prefix) > PREFIX_LENGTH |
1172 | ) || 0; |
1173 | |
1174 | ### perhaps we need to make a longlink file? |
1175 | if( $make_longlink ) { |
1176 | my $longlink = Archive::Tar::File->new( |
1177 | data => LONGLINK_NAME, |
1178 | $clone->full_path, |
1179 | { type => LONGLINK } |
1180 | ); |
1181 | |
1182 | unless( $longlink ) { |
1183 | $self->_error( qq[Could not create 'LongLink' entry for ] . |
1184 | qq[oversize file '] . $clone->full_path ."'" ); |
1185 | return; |
1186 | }; |
1187 | |
1188 | push @write_me, $longlink; |
1189 | } |
1190 | |
1191 | push @write_me, $clone; |
1192 | |
1193 | ### write the one, optionally 2 a::t::file objects to the handle |
1194 | for my $clone (@write_me) { |
1195 | |
1196 | ### if the file is a symlink, there are 2 options: |
1197 | ### either we leave the symlink intact, but then we don't write any |
1198 | ### data OR we follow the symlink, which means we actually make a |
1199 | ### copy. if we do the latter, we have to change the TYPE of the |
1200 | ### clone to 'FILE' |
1201 | my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK; |
1202 | my $data_ok = !$clone->is_symlink && $clone->has_content; |
1203 | |
1204 | ### downgrade to a 'normal' file if it's a symlink we're going to |
1205 | ### treat as a regular file |
1206 | $clone->_downgrade_to_plainfile if $link_ok; |
1207 | |
1208 | ### get the header for this block |
1209 | my $header = $self->_format_tar_entry( $clone ); |
1210 | unless( $header ) { |
1211 | $self->_error(q[Could not format header for: ] . |
1212 | $clone->full_path ); |
1213 | return; |
1214 | } |
1215 | |
1216 | unless( print $handle $header ) { |
1217 | $self->_error(q[Could not write header for: ] . |
1218 | $clone->full_path); |
1219 | return; |
1220 | } |
1221 | |
1222 | if( $link_ok or $data_ok ) { |
1223 | unless( print $handle $clone->data ) { |
1224 | $self->_error(q[Could not write data for: ] . |
1225 | $clone->full_path); |
1226 | return; |
1227 | } |
1228 | |
1229 | ### pad the end of the clone if required ### |
1230 | print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK |
1231 | } |
1232 | |
1233 | } ### done writing these entries |
1234 | } |
1235 | |
1236 | ### write the end markers ### |
1237 | print $handle TAR_END x 2 or |
1238 | return $self->_error( qq[Could not write tar end markers] ); |
b30bcf62 |
1239 | |
39713df4 |
1240 | ### did you want it written to a file, or returned as a string? ### |
b30bcf62 |
1241 | my $rv = length($file) ? 1 |
39713df4 |
1242 | : $HAS_PERLIO ? $dummy |
b30bcf62 |
1243 | : do { seek $handle, 0, 0; local $/; <$handle> }; |
1244 | |
1245 | ### make sure to close the handle; |
1246 | close $handle; |
e0d68803 |
1247 | |
b30bcf62 |
1248 | return $rv; |
39713df4 |
1249 | } |
1250 | |
1251 | sub _format_tar_entry { |
1252 | my $self = shift; |
1253 | my $entry = shift or return; |
1254 | my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; |
1255 | my $no_prefix = shift || 0; |
1256 | |
1257 | my $file = $entry->name; |
1258 | my $prefix = $entry->prefix; $prefix = '' unless defined $prefix; |
1259 | |
1260 | ### remove the prefix from the file name |
1261 | ### not sure if this is still neeeded --kane |
1262 | ### no it's not -- Archive::Tar::File->_new_from_file will take care of |
1263 | ### this for us. Even worse, this would break if we tried to add a file |
1264 | ### like x/x. |
1265 | #if( length $prefix ) { |
1266 | # $file =~ s/^$match//; |
1267 | #} |
1268 | |
1269 | $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix) |
1270 | if length $ext_prefix; |
1271 | |
1272 | ### not sure why this is... ### |
1273 | my $l = PREFIX_LENGTH; # is ambiguous otherwise... |
1274 | substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH; |
1275 | |
1276 | my $f1 = "%06o"; my $f2 = "%11o"; |
1277 | |
1278 | ### this might be optimizable with a 'changed' flag in the file objects ### |
1279 | my $tar = pack ( |
1280 | PACK, |
1281 | $file, |
1282 | |
1283 | (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]), |
1284 | (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]), |
1285 | |
1286 | "", # checksum field - space padded a bit down |
1287 | |
1288 | (map { $entry->$_() } qw[type linkname magic]), |
1289 | |
1290 | $entry->version || TAR_VERSION, |
1291 | |
1292 | (map { $entry->$_() } qw[uname gname]), |
1293 | (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]), |
1294 | |
1295 | ($no_prefix ? '' : $prefix) |
1296 | ); |
1297 | |
1298 | ### add the checksum ### |
1299 | substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar)); |
1300 | |
1301 | return $tar; |
1302 | } |
1303 | |
1304 | =head2 $tar->add_files( @filenamelist ) |
1305 | |
1306 | Takes a list of filenames and adds them to the in-memory archive. |
1307 | |
1308 | The path to the file is automatically converted to a Unix like |
1309 | equivalent for use in the archive, and, if on MacOS, the file's |
1310 | modification time is converted from the MacOS epoch to the Unix epoch. |
1311 | So tar archives created on MacOS with B<Archive::Tar> can be read |
1312 | both with I<tar> on Unix and applications like I<suntar> or |
1313 | I<Stuffit Expander> on MacOS. |
1314 | |
1315 | Be aware that the file's type/creator and resource fork will be lost, |
1316 | which is usually what you want in cross-platform archives. |
1317 | |
2610e7a4 |
1318 | Instead of a filename, you can also pass it an existing C<Archive::Tar::File> |
1319 | object from, for example, another archive. The object will be clone, and |
1320 | effectively be a copy of the original, not an alias. |
1321 | |
39713df4 |
1322 | Returns a list of C<Archive::Tar::File> objects that were just added. |
1323 | |
1324 | =cut |
1325 | |
1326 | sub add_files { |
1327 | my $self = shift; |
1328 | my @files = @_ or return; |
1329 | |
1330 | my @rv; |
1331 | for my $file ( @files ) { |
2610e7a4 |
1332 | |
1333 | ### you passed an Archive::Tar::File object |
1334 | ### clone it so we don't accidentally have a reference to |
1335 | ### an object from another archive |
1336 | if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) { |
e0d68803 |
1337 | push @rv, $file->clone; |
2610e7a4 |
1338 | next; |
1339 | } |
e0d68803 |
1340 | |
c3745331 |
1341 | unless( -e $file || -l $file ) { |
39713df4 |
1342 | $self->_error( qq[No such file: '$file'] ); |
1343 | next; |
1344 | } |
1345 | |
1346 | my $obj = Archive::Tar::File->new( file => $file ); |
1347 | unless( $obj ) { |
1348 | $self->_error( qq[Unable to add file: '$file'] ); |
1349 | next; |
1350 | } |
1351 | |
1352 | push @rv, $obj; |
1353 | } |
1354 | |
1355 | push @{$self->{_data}}, @rv; |
1356 | |
1357 | return @rv; |
1358 | } |
1359 | |
1360 | =head2 $tar->add_data ( $filename, $data, [$opthashref] ) |
1361 | |
1362 | Takes a filename, a scalar full of data and optionally a reference to |
1363 | a hash with specific options. |
1364 | |
1365 | Will add a file to the in-memory archive, with name C<$filename> and |
1366 | content C<$data>. Specific properties can be set using C<$opthashref>. |
1367 | The following list of properties is supported: name, size, mtime |
1368 | (last modified date), mode, uid, gid, linkname, uname, gname, |
b3200c5d |
1369 | devmajor, devminor, prefix, type. (On MacOS, the file's path and |
39713df4 |
1370 | modification times are converted to Unix equivalents.) |
1371 | |
b3200c5d |
1372 | Valid values for the file type are the following constants defined in |
1373 | Archive::Tar::Constants: |
1374 | |
1375 | =over 4 |
1376 | |
1377 | =item FILE |
1378 | |
1379 | Regular file. |
1380 | |
1381 | =item HARDLINK |
1382 | |
1383 | =item SYMLINK |
1384 | |
1385 | Hard and symbolic ("soft") links; linkname should specify target. |
1386 | |
1387 | =item CHARDEV |
1388 | |
1389 | =item BLOCKDEV |
1390 | |
1391 | Character and block devices. devmajor and devminor should specify the major |
1392 | and minor device numbers. |
1393 | |
1394 | =item DIR |
1395 | |
1396 | Directory. |
1397 | |
1398 | =item FIFO |
1399 | |
1400 | FIFO (named pipe). |
1401 | |
1402 | =item SOCKET |
1403 | |
1404 | Socket. |
1405 | |
1406 | =back |
1407 | |
39713df4 |
1408 | Returns the C<Archive::Tar::File> object that was just added, or |
1409 | C<undef> on failure. |
1410 | |
1411 | =cut |
1412 | |
1413 | sub add_data { |
1414 | my $self = shift; |
1415 | my ($file, $data, $opt) = @_; |
1416 | |
1417 | my $obj = Archive::Tar::File->new( data => $file, $data, $opt ); |
1418 | unless( $obj ) { |
1419 | $self->_error( qq[Unable to add file: '$file'] ); |
1420 | return; |
1421 | } |
1422 | |
1423 | push @{$self->{_data}}, $obj; |
1424 | |
1425 | return $obj; |
1426 | } |
1427 | |
1428 | =head2 $tar->error( [$BOOL] ) |
1429 | |
1430 | Returns the current errorstring (usually, the last error reported). |
1431 | If a true value was specified, it will give the C<Carp::longmess> |
1432 | equivalent of the error, in effect giving you a stacktrace. |
1433 | |
1434 | For backwards compatibility, this error is also available as |
1435 | C<$Archive::Tar::error> although it is much recommended you use the |
1436 | method call instead. |
1437 | |
1438 | =cut |
1439 | |
1440 | { |
1441 | $error = ''; |
1442 | my $longmess; |
1443 | |
1444 | sub _error { |
1445 | my $self = shift; |
1446 | my $msg = $error = shift; |
1447 | $longmess = Carp::longmess($error); |
1448 | |
1449 | ### set Archive::Tar::WARN to 0 to disable printing |
1450 | ### of errors |
1451 | if( $WARN ) { |
1452 | carp $DEBUG ? $longmess : $msg; |
1453 | } |
1454 | |
1455 | return; |
1456 | } |
1457 | |
1458 | sub error { |
1459 | my $self = shift; |
1460 | return shift() ? $longmess : $error; |
1461 | } |
1462 | } |
1463 | |
f38c1908 |
1464 | =head2 $tar->setcwd( $cwd ); |
1465 | |
1466 | C<Archive::Tar> needs to know the current directory, and it will run |
e0d68803 |
1467 | C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the |
f38c1908 |
1468 | tarfile and saves it in the file system. (As of version 1.30, however, |
e0d68803 |
1469 | C<Archive::Tar> will use the speed optimization described below |
f38c1908 |
1470 | automatically, so it's only relevant if you're using C<extract_file()>). |
1471 | |
1472 | Since C<Archive::Tar> doesn't change the current directory internally |
1473 | while it is extracting the items in a tarball, all calls to C<Cwd::cwd()> |
1474 | can be avoided if we can guarantee that the current directory doesn't |
1475 | get changed externally. |
1476 | |
1477 | To use this performance boost, set the current directory via |
1478 | |
1479 | use Cwd; |
1480 | $tar->setcwd( cwd() ); |
1481 | |
1482 | once before calling a function like C<extract_file> and |
1483 | C<Archive::Tar> will use the current directory setting from then on |
e0d68803 |
1484 | and won't call C<Cwd::cwd()> internally. |
f38c1908 |
1485 | |
1486 | To switch back to the default behaviour, use |
1487 | |
1488 | $tar->setcwd( undef ); |
1489 | |
1490 | and C<Archive::Tar> will call C<Cwd::cwd()> internally again. |
1491 | |
1492 | If you're using C<Archive::Tar>'s C<exract()> method, C<setcwd()> will |
1493 | be called for you. |
1494 | |
e0d68803 |
1495 | =cut |
f38c1908 |
1496 | |
1497 | sub setcwd { |
1498 | my $self = shift; |
1499 | my $cwd = shift; |
1500 | |
1501 | $self->{cwd} = $cwd; |
1502 | } |
39713df4 |
1503 | |
39713df4 |
1504 | =head1 Class Methods |
1505 | |
642eb381 |
1506 | =head2 Archive::Tar->create_archive($file, $compressed, @filelist) |
39713df4 |
1507 | |
1508 | Creates a tar file from the list of files provided. The first |
1509 | argument can either be the name of the tar file to create or a |
1510 | reference to an open file handle (e.g. a GLOB reference). |
1511 | |
e0d68803 |
1512 | The second argument is used to indicate compression. You can either |
642eb381 |
1513 | compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed |
e0d68803 |
1514 | to be the C<gzip> compression level (between 1 and 9), but the use of |
642eb381 |
1515 | constants is prefered: |
1516 | |
1517 | # write a gzip compressed file |
bef46b70 |
1518 | Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist ); |
642eb381 |
1519 | |
e0d68803 |
1520 | # write a bzip compressed file |
bef46b70 |
1521 | Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist ); |
39713df4 |
1522 | |
1523 | Note that when you pass in a filehandle, the compression argument |
1524 | is ignored, as all files are printed verbatim to your filehandle. |
1525 | If you wish to enable compression with filehandles, use an |
642eb381 |
1526 | C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. |
39713df4 |
1527 | |
1528 | The remaining arguments list the files to be included in the tar file. |
1529 | These files must all exist. Any files which don't exist or can't be |
1530 | read are silently ignored. |
1531 | |
1532 | If the archive creation fails for any reason, C<create_archive> will |
1533 | return false. Please use the C<error> method to find the cause of the |
1534 | failure. |
1535 | |
1536 | Note that this method does not write C<on the fly> as it were; it |
1537 | still reads all the files into memory before writing out the archive. |
1538 | Consult the FAQ below if this is a problem. |
1539 | |
1540 | =cut |
1541 | |
1542 | sub create_archive { |
1543 | my $class = shift; |
1544 | |
1545 | my $file = shift; return unless defined $file; |
1546 | my $gzip = shift || 0; |
1547 | my @files = @_; |
1548 | |
1549 | unless( @files ) { |
1550 | return $class->_error( qq[Cowardly refusing to create empty archive!] ); |
1551 | } |
1552 | |
1553 | my $tar = $class->new; |
1554 | $tar->add_files( @files ); |
1555 | return $tar->write( $file, $gzip ); |
1556 | } |
1557 | |
642eb381 |
1558 | =head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] ) |
1559 | |
1560 | Returns an iterator function that reads the tar file without loading |
1561 | it all in memory. Each time the function is called it will return the |
1562 | next file in the tarball. The files are returned as |
1563 | C<Archive::Tar::File> objects. The iterator function returns the |
1564 | empty list once it has exhausted the the files contained. |
1565 | |
1566 | The second argument can be a hash reference with options, which are |
1567 | identical to the arguments passed to C<read()>. |
1568 | |
1569 | Example usage: |
1570 | |
1571 | my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} ); |
1572 | |
1573 | while( my $f = $next->() ) { |
1574 | print $f->name, "\n"; |
1575 | |
1576 | $f->extract or warn "Extraction failed"; |
e0d68803 |
1577 | |
642eb381 |
1578 | # .... |
1579 | } |
1580 | |
1581 | =cut |
1582 | |
1583 | |
1584 | sub iter { |
1585 | my $class = shift; |
1586 | my $filename = shift or return; |
1587 | my $compressed = shift or 0; |
1588 | my $opts = shift || {}; |
1589 | |
1590 | ### get a handle to read from. |
1591 | my $handle = $class->_get_handle( |
e0d68803 |
1592 | $filename, |
1593 | $compressed, |
642eb381 |
1594 | READ_ONLY->( ZLIB ) |
1595 | ) or return; |
1596 | |
1597 | my @data; |
1598 | return sub { |
1599 | return shift(@data) if @data; # more than one file returned? |
1600 | return unless $handle; # handle exhausted? |
1601 | |
1602 | ### read data, should only return file |
1603 | @data = @{ $class->_read_tar($handle, { %$opts, limit => 1 }) }; |
1604 | |
1605 | ### return one piece of data |
1606 | return shift(@data) if @data; |
e0d68803 |
1607 | |
642eb381 |
1608 | ### data is exhausted, free the filehandle |
1609 | undef $handle; |
1610 | return; |
1611 | }; |
1612 | } |
1613 | |
1614 | =head2 Archive::Tar->list_archive($file, $compressed, [\@properties]) |
39713df4 |
1615 | |
1616 | Returns a list of the names of all the files in the archive. The |
1617 | first argument can either be the name of the tar file to list or a |
1618 | reference to an open file handle (e.g. a GLOB reference). |
1619 | |
1620 | If C<list_archive()> is passed an array reference as its third |
1621 | argument it returns a list of hash references containing the requested |
1622 | properties of each file. The following list of properties is |
e0d68803 |
1623 | supported: full_path, name, size, mtime (last modified date), mode, |
b3200c5d |
1624 | uid, gid, linkname, uname, gname, devmajor, devminor, prefix. |
1625 | |
1626 | See C<Archive::Tar::File> for details about supported properties. |
39713df4 |
1627 | |
1628 | Passing an array reference containing only one element, 'name', is |
1629 | special cased to return a list of names rather than a list of hash |
1630 | references. |
1631 | |
1632 | =cut |
1633 | |
1634 | sub list_archive { |
1635 | my $class = shift; |
1636 | my $file = shift; return unless defined $file; |
1637 | my $gzip = shift || 0; |
1638 | |
1639 | my $tar = $class->new($file, $gzip); |
1640 | return unless $tar; |
1641 | |
1642 | return $tar->list_files( @_ ); |
1643 | } |
1644 | |
642eb381 |
1645 | =head2 Archive::Tar->extract_archive($file, $compressed) |
39713df4 |
1646 | |
1647 | Extracts the contents of the tar file. The first argument can either |
1648 | be the name of the tar file to create or a reference to an open file |
1649 | handle (e.g. a GLOB reference). All relative paths in the tar file will |
1650 | be created underneath the current working directory. |
1651 | |
1652 | C<extract_archive> will return a list of files it extracted. |
1653 | If the archive extraction fails for any reason, C<extract_archive> |
1654 | will return false. Please use the C<error> method to find the cause |
1655 | of the failure. |
1656 | |
1657 | =cut |
1658 | |
1659 | sub extract_archive { |
1660 | my $class = shift; |
1661 | my $file = shift; return unless defined $file; |
1662 | my $gzip = shift || 0; |
1663 | |
1664 | my $tar = $class->new( ) or return; |
1665 | |
1666 | return $tar->read( $file, $gzip, { extract => 1 } ); |
1667 | } |
1668 | |
f5695358 |
1669 | =head2 $bool = Archive::Tar->has_io_string |
1670 | |
1671 | Returns true if we currently have C<IO::String> support loaded. |
1672 | |
e0d68803 |
1673 | Either C<IO::String> or C<perlio> support is needed to support writing |
f5695358 |
1674 | stringified archives. Currently, C<perlio> is the preferred method, if |
1675 | available. |
1676 | |
1677 | See the C<GLOBAL VARIABLES> section to see how to change this preference. |
1678 | |
1679 | =cut |
1680 | |
1681 | sub has_io_string { return $HAS_IO_STRING; } |
1682 | |
1683 | =head2 $bool = Archive::Tar->has_perlio |
1684 | |
1685 | Returns true if we currently have C<perlio> support loaded. |
1686 | |
e0d68803 |
1687 | This requires C<perl-5.8> or higher, compiled with C<perlio> |
f5695358 |
1688 | |
e0d68803 |
1689 | Either C<IO::String> or C<perlio> support is needed to support writing |
f5695358 |
1690 | stringified archives. Currently, C<perlio> is the preferred method, if |
1691 | available. |
1692 | |
1693 | See the C<GLOBAL VARIABLES> section to see how to change this preference. |
1694 | |
1695 | =cut |
1696 | |
1697 | sub has_perlio { return $HAS_PERLIO; } |
1698 | |
1699 | =head2 $bool = Archive::Tar->has_zlib_support |
1700 | |
1701 | Returns true if C<Archive::Tar> can extract C<zlib> compressed archives |
1702 | |
1703 | =cut |
1704 | |
1705 | sub has_zlib_support { return ZLIB } |
1706 | |
1707 | =head2 $bool = Archive::Tar->has_bzip2_support |
1708 | |
1709 | Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives |
1710 | |
1711 | =cut |
1712 | |
1713 | sub has_bzip2_support { return BZIP } |
1714 | |
39713df4 |
1715 | =head2 Archive::Tar->can_handle_compressed_files |
1716 | |
1717 | A simple checking routine, which will return true if C<Archive::Tar> |
642eb381 |
1718 | is able to uncompress compressed archives on the fly with C<IO::Zlib> |
1719 | and C<IO::Compress::Bzip2> or false if not both are installed. |
39713df4 |
1720 | |
1721 | You can use this as a shortcut to determine whether C<Archive::Tar> |
1722 | will do what you think before passing compressed archives to its |
1723 | C<read> method. |
1724 | |
1725 | =cut |
1726 | |
642eb381 |
1727 | sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 } |
39713df4 |
1728 | |
1729 | sub no_string_support { |
1730 | croak("You have to install IO::String to support writing archives to strings"); |
1731 | } |
1732 | |
1733 | 1; |
1734 | |
1735 | __END__ |
1736 | |
1737 | =head1 GLOBAL VARIABLES |
1738 | |
1739 | =head2 $Archive::Tar::FOLLOW_SYMLINK |
1740 | |
1741 | Set this variable to C<1> to make C<Archive::Tar> effectively make a |
1742 | copy of the file when extracting. Default is C<0>, which |
1743 | means the symlink stays intact. Of course, you will have to pack the |
1744 | file linked to as well. |
1745 | |
1746 | This option is checked when you write out the tarfile using C<write> |
1747 | or C<create_archive>. |
1748 | |
1749 | This works just like C</bin/tar>'s C<-h> option. |
1750 | |
1751 | =head2 $Archive::Tar::CHOWN |
1752 | |
1753 | By default, C<Archive::Tar> will try to C<chown> your files if it is |
1754 | able to. In some cases, this may not be desired. In that case, set |
1755 | this variable to C<0> to disable C<chown>-ing, even if it were |
1756 | possible. |
1757 | |
1758 | The default is C<1>. |
1759 | |
1760 | =head2 $Archive::Tar::CHMOD |
1761 | |
1762 | By default, C<Archive::Tar> will try to C<chmod> your files to |
1763 | whatever mode was specified for the particular file in the archive. |
1764 | In some cases, this may not be desired. In that case, set this |
1765 | variable to C<0> to disable C<chmod>-ing. |
1766 | |
1767 | The default is C<1>. |
1768 | |
1c82faa7 |
1769 | =head2 $Archive::Tar::SAME_PERMISSIONS |
1770 | |
1771 | When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether |
1772 | the permissions on files from the archive are used without modification |
1773 | of if they are filtered by removing any setid bits and applying the |
1774 | current umask. |
1775 | |
1776 | The default is C<1> for the root user and C<0> for normal users. |
1777 | |
39713df4 |
1778 | =head2 $Archive::Tar::DO_NOT_USE_PREFIX |
1779 | |
e0d68803 |
1780 | By default, C<Archive::Tar> will try to put paths that are over |
f38c1908 |
1781 | 100 characters in the C<prefix> field of your tar header, as |
e0d68803 |
1782 | defined per POSIX-standard. However, some (older) tar programs |
1783 | do not implement this spec. To retain compatibility with these older |
1784 | or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX> |
1785 | variable to a true value, and C<Archive::Tar> will use an alternate |
1786 | way of dealing with paths over 100 characters by using the |
f38c1908 |
1787 | C<GNU Extended Header> feature. |
1788 | |
1789 | Note that clients who do not support the C<GNU Extended Header> |
1790 | feature will not be able to read these archives. Such clients include |
1791 | tars on C<Solaris>, C<Irix> and C<AIX>. |
39713df4 |
1792 | |
1793 | The default is C<0>. |
1794 | |
1795 | =head2 $Archive::Tar::DEBUG |
1796 | |
1797 | Set this variable to C<1> to always get the C<Carp::longmess> output |
1798 | of the warnings, instead of the regular C<carp>. This is the same |
1799 | message you would get by doing: |
1800 | |
1801 | $tar->error(1); |
1802 | |
1803 | Defaults to C<0>. |
1804 | |
1805 | =head2 $Archive::Tar::WARN |
1806 | |
1807 | Set this variable to C<0> if you do not want any warnings printed. |
1808 | Personally I recommend against doing this, but people asked for the |
1809 | option. Also, be advised that this is of course not threadsafe. |
1810 | |
1811 | Defaults to C<1>. |
1812 | |
1813 | =head2 $Archive::Tar::error |
1814 | |
1815 | Holds the last reported error. Kept for historical reasons, but its |
1816 | use is very much discouraged. Use the C<error()> method instead: |
1817 | |
1818 | warn $tar->error unless $tar->extract; |
1819 | |
178aef9a |
1820 | =head2 $Archive::Tar::INSECURE_EXTRACT_MODE |
1821 | |
1822 | This variable indicates whether C<Archive::Tar> should allow |
1823 | files to be extracted outside their current working directory. |
1824 | |
1825 | Allowing this could have security implications, as a malicious |
1826 | tar archive could alter or replace any file the extracting user |
e0d68803 |
1827 | has permissions to. Therefor, the default is to not allow |
1828 | insecure extractions. |
178aef9a |
1829 | |
e0d68803 |
1830 | If you trust the archive, or have other reasons to allow the |
1831 | archive to write files outside your current working directory, |
178aef9a |
1832 | set this variable to C<true>. |
1833 | |
1834 | Note that this is a backwards incompatible change from version |
1835 | C<1.36> and before. |
1836 | |
39713df4 |
1837 | =head2 $Archive::Tar::HAS_PERLIO |
1838 | |
e0d68803 |
1839 | This variable holds a boolean indicating if we currently have |
39713df4 |
1840 | C<perlio> support loaded. This will be enabled for any perl |
e0d68803 |
1841 | greater than C<5.8> compiled with C<perlio>. |
39713df4 |
1842 | |
1843 | If you feel strongly about disabling it, set this variable to |
1844 | C<false>. Note that you will then need C<IO::String> installed |
1845 | to support writing stringified archives. |
1846 | |
1847 | Don't change this variable unless you B<really> know what you're |
1848 | doing. |
1849 | |
1850 | =head2 $Archive::Tar::HAS_IO_STRING |
1851 | |
e0d68803 |
1852 | This variable holds a boolean indicating if we currently have |
39713df4 |
1853 | C<IO::String> support loaded. This will be enabled for any perl |
1854 | that has a loadable C<IO::String> module. |
1855 | |
1856 | If you feel strongly about disabling it, set this variable to |
1857 | C<false>. Note that you will then need C<perlio> support from |
1858 | your perl to be able to write stringified archives. |
1859 | |
1860 | Don't change this variable unless you B<really> know what you're |
1861 | doing. |
1862 | |
1863 | =head1 FAQ |
1864 | |
1865 | =over 4 |
1866 | |
1867 | =item What's the minimum perl version required to run Archive::Tar? |
1868 | |
1869 | You will need perl version 5.005_03 or newer. |
1870 | |
1871 | =item Isn't Archive::Tar slow? |
1872 | |
1873 | Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar> |
1874 | However, it's very portable. If speed is an issue, consider using |
1875 | C</bin/tar> instead. |
1876 | |
1877 | =item Isn't Archive::Tar heavier on memory than /bin/tar? |
1878 | |
1879 | Yes it is, see previous answer. Since C<Compress::Zlib> and therefore |
1880 | C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little |
1881 | choice but to read the archive into memory. |
1882 | This is ok if you want to do in-memory manipulation of the archive. |
642eb381 |
1883 | |
39713df4 |
1884 | If you just want to extract, use the C<extract_archive> class method |
1885 | instead. It will optimize and write to disk immediately. |
1886 | |
642eb381 |
1887 | Another option is to use the C<iter> class method to iterate over |
1888 | the files in the tarball without reading them all in memory at once. |
1889 | |
1890 | =item Can you lazy-load data instead? |
39713df4 |
1891 | |
642eb381 |
1892 | In some cases, yes. You can use the C<iter> class method to iterate |
1893 | over the files in the tarball without reading them all in memory at once. |
39713df4 |
1894 | |
1895 | =item How much memory will an X kb tar file need? |
1896 | |
1897 | Probably more than X kb, since it will all be read into memory. If |
1898 | this is a problem, and you don't need to do in memory manipulation |
e0d68803 |
1899 | of the archive, consider using the C<iter> class method, or C</bin/tar> |
642eb381 |
1900 | instead. |
39713df4 |
1901 | |
1902 | =item What do you do with unsupported filetypes in an archive? |
1903 | |
1904 | C<Unix> has a few filetypes that aren't supported on other platforms, |
1905 | like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just |
1906 | try to make a copy of the original file, rather than throwing an error. |
1907 | |
1908 | This does require you to read the entire archive in to memory first, |
1909 | since otherwise we wouldn't know what data to fill the copy with. |
e0d68803 |
1910 | (This means that you cannot use the class methods, including C<iter> |
1911 | on archives that have incompatible filetypes and still expect things |
642eb381 |
1912 | to work). |
39713df4 |
1913 | |
1914 | For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that |
1915 | the extraction of this particular item didn't work. |
1916 | |
f38c1908 |
1917 | =item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly! |
1918 | |
1919 | By default, C<Archive::Tar> is in a completely POSIX-compatible |
1920 | mode, which uses the POSIX-specification of C<tar> to store files. |
1921 | For paths greather than 100 characters, this is done using the |
1922 | C<POSIX header prefix>. Non-POSIX-compatible clients may not support |
1923 | this part of the specification, and may only support the C<GNU Extended |
1924 | Header> functionality. To facilitate those clients, you can set the |
e0d68803 |
1925 | C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the |
f38c1908 |
1926 | C<GLOBAL VARIABLES> section for details on this variable. |
1927 | |
c3745331 |
1928 | Note that GNU tar earlier than version 1.14 does not cope well with |
1929 | the C<POSIX header prefix>. If you use such a version, consider setting |
1930 | the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. |
1931 | |
b30bcf62 |
1932 | =item How do I extract only files that have property X from an archive? |
1933 | |
1934 | Sometimes, you might not wish to extract a complete archive, just |
1935 | the files that are relevant to you, based on some criteria. |
1936 | |
1937 | You can do this by filtering a list of C<Archive::Tar::File> objects |
1938 | based on your criteria. For example, to extract only files that have |
1939 | the string C<foo> in their title, you would use: |
1940 | |
e0d68803 |
1941 | $tar->extract( |
b30bcf62 |
1942 | grep { $_->full_path =~ /foo/ } $tar->get_files |
e0d68803 |
1943 | ); |
b30bcf62 |
1944 | |
1945 | This way, you can filter on any attribute of the files in the archive. |
1946 | Consult the C<Archive::Tar::File> documentation on how to use these |
1947 | objects. |
1948 | |
81a5970e |
1949 | =item How do I access .tar.Z files? |
1950 | |
1951 | The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via |
1952 | the C<IO::Zlib> module) to access tar files that have been compressed |
1953 | with C<gzip>. Unfortunately tar files compressed with the Unix C<compress> |
1954 | utility cannot be read by C<Compress::Zlib> and so cannot be directly |
1955 | accesses by C<Archive::Tar>. |
1956 | |
1957 | If the C<uncompress> or C<gunzip> programs are available, you can use |
1958 | one of these workarounds to read C<.tar.Z> files from C<Archive::Tar> |
1959 | |
1960 | Firstly with C<uncompress> |
1961 | |
1962 | use Archive::Tar; |
1963 | |
1964 | open F, "uncompress -c $filename |"; |
1965 | my $tar = Archive::Tar->new(*F); |
1966 | ... |
1967 | |
1968 | and this with C<gunzip> |
1969 | |
1970 | use Archive::Tar; |
1971 | |
1972 | open F, "gunzip -c $filename |"; |
1973 | my $tar = Archive::Tar->new(*F); |
1974 | ... |
1975 | |
1976 | Similarly, if the C<compress> program is available, you can use this to |
1977 | write a C<.tar.Z> file |
1978 | |
1979 | use Archive::Tar; |
1980 | use IO::File; |
1981 | |
1982 | my $fh = new IO::File "| compress -c >$filename"; |
1983 | my $tar = Archive::Tar->new(); |
1984 | ... |
1985 | $tar->write($fh); |
1986 | $fh->close ; |
1987 | |
01d11a1c |
1988 | =item How do I handle Unicode strings? |
1989 | |
1990 | C<Archive::Tar> uses byte semantics for any files it reads from or writes |
1991 | to disk. This is not a problem if you only deal with files and never |
1992 | look at their content or work solely with byte strings. But if you use |
1993 | Unicode strings with character semantics, some additional steps need |
1994 | to be taken. |
1995 | |
1996 | For example, if you add a Unicode string like |
1997 | |
1998 | # Problem |
1999 | $tar->add_data('file.txt', "Euro: \x{20AC}"); |
2000 | |
2001 | then there will be a problem later when the tarfile gets written out |
2002 | to disk via C<$tar->write()>: |
2003 | |
2004 | Wide character in print at .../Archive/Tar.pm line 1014. |
2005 | |
2006 | The data was added as a Unicode string and when writing it out to disk, |
2007 | the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl |
2008 | tried to convert the string to ISO-8859 and failed. The written file |
2009 | now contains garbage. |
2010 | |
2011 | For this reason, Unicode strings need to be converted to UTF-8-encoded |
2012 | bytestrings before they are handed off to C<add_data()>: |
2013 | |
2014 | use Encode; |
2015 | my $data = "Accented character: \x{20AC}"; |
2016 | $data = encode('utf8', $data); |
2017 | |
2018 | $tar->add_data('file.txt', $data); |
2019 | |
e0d68803 |
2020 | A opposite problem occurs if you extract a UTF8-encoded file from a |
01d11a1c |
2021 | tarball. Using C<get_content()> on the C<Archive::Tar::File> object |
2022 | will return its content as a bytestring, not as a Unicode string. |
2023 | |
2024 | If you want it to be a Unicode string (because you want character |
2025 | semantics with operations like regular expression matching), you need |
e0d68803 |
2026 | to decode the UTF8-encoded content and have Perl convert it into |
01d11a1c |
2027 | a Unicode string: |
2028 | |
2029 | use Encode; |
2030 | my $data = $tar->get_content(); |
e0d68803 |
2031 | |
01d11a1c |
2032 | # Make it a Unicode string |
2033 | $data = decode('utf8', $data); |
2034 | |
e0d68803 |
2035 | There is no easy way to provide this functionality in C<Archive::Tar>, |
01d11a1c |
2036 | because a tarball can contain many files, and each of which could be |
2037 | encoded in a different way. |
81a5970e |
2038 | |
39713df4 |
2039 | =back |
2040 | |
2041 | =head1 TODO |
2042 | |
2043 | =over 4 |
2044 | |
2045 | =item Check if passed in handles are open for read/write |
2046 | |
2047 | Currently I don't know of any portable pure perl way to do this. |
2048 | Suggestions welcome. |
2049 | |
b3200c5d |
2050 | =item Allow archives to be passed in as string |
2051 | |
2052 | Currently, we only allow opened filehandles or filenames, but |
2053 | not strings. The internals would need some reworking to facilitate |
2054 | stringified archives. |
2055 | |
2056 | =item Facilitate processing an opened filehandle of a compressed archive |
2057 | |
2058 | Currently, we only support this if the filehandle is an IO::Zlib object. |
2059 | Environments, like apache, will present you with an opened filehandle |
2060 | to an uploaded file, which might be a compressed archive. |
2061 | |
39713df4 |
2062 | =back |
2063 | |
f38c1908 |
2064 | =head1 SEE ALSO |
2065 | |
2066 | =over 4 |
2067 | |
2068 | =item The GNU tar specification |
2069 | |
2070 | C<http://www.gnu.org/software/tar/manual/tar.html> |
2071 | |
2072 | =item The PAX format specication |
2073 | |
2074 | The specifcation which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html> |
2075 | |
2076 | =item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html> |
2077 | |
2078 | =item GNU tar intends to switch to POSIX compatibility |
2079 | |
2080 | GNU Tar authors have expressed their intention to become completely |
2081 | POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html> |
2082 | |
2083 | =item A Comparison between various tar implementations |
2084 | |
2085 | Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs> |
2086 | |
2087 | =back |
2088 | |
39713df4 |
2089 | =head1 AUTHOR |
2090 | |
c3745331 |
2091 | This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. |
2092 | |
2093 | Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>. |
39713df4 |
2094 | |
2095 | =head1 ACKNOWLEDGEMENTS |
2096 | |
642eb381 |
2097 | Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas |
2098 | and especially Andrew Savige for their help and suggestions. |
39713df4 |
2099 | |
2100 | =head1 COPYRIGHT |
2101 | |
e0d68803 |
2102 | This module is copyright (c) 2002 - 2008 Jos Boumans |
c3745331 |
2103 | E<lt>kane@cpan.orgE<gt>. All rights reserved. |
39713df4 |
2104 | |
e0d68803 |
2105 | This library is free software; you may redistribute and/or modify |
c3745331 |
2106 | it under the same terms as Perl itself. |
39713df4 |
2107 | |
2108 | =cut |