From: Steve Hay Date: Wed, 1 Oct 2008 16:53:57 +0000 (+0000) Subject: Upgrade to Archive-Tar-1.39_04 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=642eb38136f2ca16919538298be0521b16a2091e;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Archive-Tar-1.39_04 Local change 32352 remains in 04_resolved_issues.t p4raw-id: //depot/perl@34452 --- diff --git a/MANIFEST b/MANIFEST index 01f80e0..b78f137 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1620,11 +1620,17 @@ lib/Archive/Tar/t/01_use.t Archive::Tar tests lib/Archive/Tar/t/02_methods.t Archive::Tar tests lib/Archive/Tar/t/03_file.t Archive::Tar tests lib/Archive/Tar/t/04_resolved_issues.t Archive::Tar tests +lib/Archive/Tar/t/05_iter.t Archive::Tar tests +lib/Archive/Tar/t/90_symlink.t Archive::Tar tests +lib/Archive/Tar/t/src/linktest/linktest_missing_dir.tar.packed Archive::Tar tests +lib/Archive/Tar/t/src/linktest/linktest_with_dir.tar.packed Archive::Tar tests lib/Archive/Tar/t/src/long/b Archive::Tar tests lib/Archive/Tar/t/src/long/bar.tar.packed Archive::Tar tests +lib/Archive/Tar/t/src/long/foo.tbz.packed Archive::Tar tests lib/Archive/Tar/t/src/long/foo.tgz.packed Archive::Tar tests lib/Archive/Tar/t/src/short/b Archive::Tar tests lib/Archive/Tar/t/src/short/bar.tar.packed Archive::Tar tests +lib/Archive/Tar/t/src/short/foo.tbz.packed Archive::Tar tests lib/Archive/Tar/t/src/short/foo.tgz.packed Archive::Tar tests lib/assert.pl assertion and panic with stack trace lib/Attribute/Handlers/Changes Attribute::Handlers diff --git a/lib/Archive/Tar.pm b/lib/Archive/Tar.pm index 508bcfe..1590ec7 100644 --- a/lib/Archive/Tar.pm +++ b/lib/Archive/Tar.pm @@ -7,16 +7,31 @@ package Archive::Tar; require 5.005_03; +use Cwd; +use IO::Zlib; +use IO::File; +use Carp qw(carp croak); +use File::Spec (); +use File::Spec::Unix (); +use File::Path (); + +use Archive::Tar::File; +use Archive::Tar::Constant; + +require Exporter; + use strict; use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING - $INSECURE_EXTRACT_MODE + $INSECURE_EXTRACT_MODE @ISA @EXPORT ]; +@ISA = qw[Exporter]; +@EXPORT = ( COMPRESS_GZIP, COMPRESS_BZIP ); $DEBUG = 0; $WARN = 1; $FOLLOW_SYMLINK = 0; -$VERSION = "1.38"; +$VERSION = "1.39_04"; $CHOWN = 1; $CHMOD = 1; $DO_NOT_USE_PREFIX = 0; @@ -28,24 +43,13 @@ BEGIN { ### try and load IO::String anyway, so you can dynamically ### switch between perlio and IO::String - eval { + $HAS_IO_STRING = eval { require IO::String; import IO::String; - }; - $HAS_IO_STRING = $@ ? 0 : 1; - + 1; + } || 0; } -use Cwd; -use IO::File; -use Carp qw(carp croak); -use File::Spec (); -use File::Spec::Unix (); -use File::Path (); - -use Archive::Tar::File; -use Archive::Tar::Constant; - =head1 NAME Archive::Tar - module for manipulations of tar archives @@ -55,7 +59,7 @@ Archive::Tar - module for manipulations of tar archives use Archive::Tar; my $tar = Archive::Tar->new; - $tar->read('origin.tgz',1); + $tar->read('origin.tgz'); $tar->extract(); $tar->add_files('file/foo.pl', 'docs/README'); @@ -63,7 +67,9 @@ Archive::Tar - module for manipulations of tar archives $tar->rename('oldname', 'new/file/name'); - $tar->write('files.tar'); + $tar->write('files.tar'); # plain tar + $tar->write('files.tgz', COMPRESSED_GZIP); # gzip compressed + $tar->write('files.tbz', COMPRESSED_BZIP); # bzip2 compressed =head1 DESCRIPTION @@ -122,23 +128,25 @@ sub new { return $obj; } -=head2 $tar->read ( $filename|$handle, $compressed, {opt => 'val'} ) +=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] ) Read the given tar file into memory. The first argument can either be the name of a file or a reference to an already open filehandle (or an IO::Zlib object if it's compressed) -The second argument indicates whether the file referenced by the first -argument is compressed. The C will I any previous content in C<$tar>! -The second argument may be considered optional if IO::Zlib is -installed, since it will transparently Do The Right Thing. -Archive::Tar will warn if you try to pass a compressed file if -IO::Zlib is not available and simply return. +The second argument may be considered optional, but remains for +backwards compatibility. Archive::Tar now looks at the file +magic to determine what class should be used to open the file +and will transparently Do The Right Thing. + +Archive::Tar will warn if you try to pass a bzip2 compressed file and the +IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return. Note that you can currently B pass a C compressed -filehandle, which is not opened with C, nor a string +filehandle, which is not opened with C, a C compressed +filehandle, which is not opened with C, nor a string containing the full archive information (either compressed or uncompressed). These are worth while features, but not currently implemented. See the C section. @@ -153,12 +161,18 @@ all options are case-sensitive. Do not read more than C files. This is useful if you have very big archives, and are only interested in the first few files. +=item filter + +Can be set to a regular expression. Only files with names that match +the expression will be read. + =item extract If set to true, immediately extract entries when reading them. This gives you the same memory break as the C function. Note however that entries will not be read into memory, but written -straight to disk. +straight to disk. This means no C objects are +created for you to inspect. =back @@ -194,49 +208,97 @@ sub read { } sub _get_handle { - my $self = shift; - my $file = shift; return unless defined $file; - return $file if ref $file; - - my $gzip = shift || 0; - my $mode = shift || READ_ONLY->( ZLIB ); # default to read only - - my $fh; my $bin; - - ### only default to ZLIB if we're not trying to /write/ to a handle ### - if( ZLIB and $gzip || MODE_READ->( $mode ) ) { - - ### IO::Zlib will Do The Right Thing, even when passed - ### a plain file ### - $fh = new IO::Zlib; + my $self = shift; + my $file = shift; return unless defined $file; + return $file if ref $file; + my $compress = shift || 0; + my $mode = shift || READ_ONLY->( ZLIB ); # default to read only + + + ### get a FH opened to the right class, so we can use it transparently + ### throughout the program + my $fh; + { ### reading magic only makes sense if we're opening a file for + ### reading. otherwise, just use what the user requested. + my $magic = ''; + if( MODE_READ->($mode) ) { + open my $tmp, $file or do { + $self->_error( qq[Could not open '$file' for reading: $!] ); + return; + }; + + ### read the first 4 bites of the file to figure out which class to + ### use to open the file. + sysread( $tmp, $magic, 4 ); + close $tmp; + } - } else { - if( $gzip ) { - $self->_error(qq[Compression not available - Install IO::Zlib!]); - return; + ### is it bzip? + ### if you asked specifically for bzip compression, or if we're in + ### read mode and the magic numbers add up, use bzip + if( BZIP and ( + ($compress eq COMPRESS_BZIP) or + ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM ) + ) + ) { + + ### different reader/writer modules, different error vars... sigh + if( MODE_READ->($mode) ) { + $fh = IO::Uncompress::Bunzip2->new( $file ) or do { + $self->_error( qq[Could not read '$file': ] . + $IO::Uncompress::Bunzip2::Bunzip2Error + ); + return; + }; + + } else { + $fh = IO::Compress::Bzip2->new( $file ) or do { + $self->_error( qq[Could not write to '$file': ] . + $IO::Compress::Bzip2::Bzip2Error + ); + return; + }; + } + + ### is it gzip? + ### if you asked for compression, if you wanted to read or the gzip + ### magic number is present (redundant with read) + } elsif( ZLIB and ( + $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM + ) + ) { + $fh = IO::Zlib->new; + unless( $fh->open( $file, $mode ) ) { + $self->_error(qq[Could not create filehandle for '$file': $!]); + return; + } + + ### is it plain tar? } else { - $fh = new IO::File; - $bin++; - } - } + $fh = IO::File->new; - unless( $fh->open( $file, $mode ) ) { - $self->_error( qq[Could not create filehandle for '$file': $!!] ); - return; - } + unless( $fh->open( $file, $mode ) ) { + $self->_error(qq[Could not create filehandle for '$file': $!]); + return; + } - binmode $fh if $bin; + ### enable bin mode on tar archives + binmode $fh; + } + } return $fh; } + sub _read_tar { my $self = shift; my $handle = shift or return; my $opts = shift || {}; my $count = $opts->{limit} || 0; + my $filter = $opts->{filter}; my $extract = $opts->{extract} || 0; ### set a cap on the amount of files to extract ### @@ -372,6 +434,17 @@ sub _read_tar { undef $real_name; } + ### skip this entry if we're filtering + if ($filter && $entry->name !~ $filter) { + next LOOP; + + ### skip this entry if it's a pax header. This is a special file added + ### by, among others, git-generated tarballs. It holds comments and is + ### not meant for extracting. See #38932: pax_global_header extracted + } elsif ( $entry->name eq PAX_HEADER ) { + next LOOP; + } + $self->_extract_file( $entry ) if $extract && !$entry->is_longlink && !$entry->is_unknown @@ -544,7 +617,7 @@ sub _extract_file { my $dir; ### is $name an absolute path? ### - if( File::Spec->file_name_is_absolute( $dirs ) ) { + if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) { ### absolute names are not allowed to be in tarballs under ### strict mode, so only allow it if a user tells us to do it @@ -557,30 +630,65 @@ sub _extract_file { } ### user asked us to, it's fine. - $dir = $dirs; + $dir = File::Spec->catpath( $vol, $dirs, "" ); ### it's a relative path ### } else { - my $cwd = (defined $self->{cwd} ? $self->{cwd} : cwd()); + my $cwd = (ref $self and defined $self->{cwd}) + ? $self->{cwd} + : cwd(); my @dirs = defined $alt ? File::Spec->splitdir( $dirs ) # It's a local-OS path : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely # straight from the tarball - ### paths that leave the current directory are not allowed under - ### strict mode, so only allow it if a user tells us to do this. if( not defined $alt and - not $INSECURE_EXTRACT_MODE and - grep { $_ eq '..' } @dirs - ) { - $self->_error( - q[Entry ']. $entry->full_path .q[' is attempting to leave the ]. - q[current working directory. Not extracting under SECURE ]. - q[EXTRACT MODE] - ); - return; - } + not $INSECURE_EXTRACT_MODE + ) { + + ### paths that leave the current directory are not allowed under + ### strict mode, so only allow it if a user tells us to do this. + if( grep { $_ eq '..' } @dirs ) { + + $self->_error( + q[Entry ']. $entry->full_path .q[' is attempting to leave ]. + q[the current working directory. Not extracting under ]. + q[SECURE EXTRACT MODE] + ); + return; + } + + ### the archive may be asking us to extract into a symlink. This + ### is not sane and a possible security issue, as outlined here: + ### https://rt.cpan.org/Ticket/Display.html?id=30380 + ### https://bugzilla.redhat.com/show_bug.cgi?id=295021 + ### https://issues.rpath.com/browse/RPL-1716 + my $full_path = $cwd; + for my $d ( @dirs ) { + $full_path = File::Spec->catdir( $full_path, $d ); + + ### we've already checked this one, and it's safe. Move on. + next if ref $self and $self->{_link_cache}->{$full_path}; + + if( -l $full_path ) { + my $to = readlink $full_path; + my $diag = "symlinked directory ($full_path => $to)"; + + $self->_error( + q[Entry ']. $entry->full_path .q[' is attempting to ]. + qq[extract to a $diag. This is considered a security ]. + q[vulnerability and not allowed under SECURE EXTRACT ]. + q[MODE] + ); + return; + } + + ### XXX keep a cache if possible, so the stats become cheaper: + $self->{_link_cache}->{$full_path} = 1 if ref $self; + } + } + ### '.' is the directory delimiter, of which the first one has to ### be escaped/changed. @@ -622,7 +730,8 @@ sub _extract_file { unless ( -d _ ) { eval { File::Path::mkpath( $dir, 0, 0777 ) }; if( $@ ) { - $self->_error( qq[Could not create directory '$dir': $@] ); + my $fp = $entry->full_path; + $self->_error(qq[Could not create directory '$dir' for '$fp': $@]); return; } @@ -672,8 +781,13 @@ sub _extract_file { $self->_make_special_file( $entry, $full ) or return; } - utime time, $entry->mtime - TIME_OFFSET, $full or - $self->_error( qq[Could not update timestamp] ); + ### only update the timestamp if it's not a symlink; that will change the + ### timestamp of the original. This addresses bug #33669: Could not update + ### timestamp warning on symlinks + if( not -l $full ) { + utime time, $entry->mtime - TIME_OFFSET, $full or + $self->_error( qq[Could not update timestamp] ); + } if( $CHOWN && CAN_CHOWN ) { chown $entry->uid, $entry->gid, $full or @@ -707,8 +821,8 @@ sub _make_special_file { or $fail++; } - $err = qq[Making symbolink link from '] . $entry->linkname . - qq[' to '$file' failed] if $fail; + $err = qq[Making symbolic link '$file' to '] . + $entry->linkname .q[' failed] if $fail; } elsif ( $entry->is_hardlink ) { my $fail; @@ -949,17 +1063,23 @@ sub clear { Write the in-memory archive to disk. The first argument can either be the name of a file or a reference to an already open filehandle (a -GLOB reference). If the second argument is true, the module will use -IO::Zlib to write the file in a compressed format. If IO::Zlib is -not available, the C method will fail and return. +GLOB reference). + +The second argument is used to indicate compression. You can either +compress using C or C. If you pass a digit, it's assumed +to be the C compression level (between 1 and 9), but the use of +constants is prefered: + + # write a gzip compressed file + $tar->write( 'out.tgz', COMPRESSION_GZIP ); + + # write a bzip compressed file + $tar->write( 'out.tbz', COMPRESSION_BZIP ); Note that when you pass in a filehandle, the compression argument is ignored, as all files are printed verbatim to your filehandle. If you wish to enable compression with filehandles, use an -C filehandle instead. - -Specific levels of compression can be chosen by passing the values 2 -through 9 as the second parameter. +C or C filehandle instead. The third argument is an optional prefix. All files will be tucked away in the directory you specify as prefix. So if you have files @@ -970,6 +1090,7 @@ If no arguments are given, C returns the entire formatted archive as a string, which could be useful if you'd like to stuff the archive into a socket or a pipe to gzip or something. + =cut sub write { @@ -1383,23 +1504,27 @@ sub has_perlio { return $HAS_PERLIO; } =head1 Class Methods -=head2 Archive::Tar->create_archive($file, $compression, @filelist) +=head2 Archive::Tar->create_archive($file, $compressed, @filelist) Creates a tar file from the list of files provided. The first argument can either be the name of the tar file to create or a reference to an open file handle (e.g. a GLOB reference). -The second argument specifies the level of compression to be used, if -any. Compression of tar files requires the installation of the -IO::Zlib module. Specific levels of compression may be -requested by passing a value between 2 and 9 as the second argument. -Any other value evaluating as true will result in the default -compression level being used. +The second argument is used to indicate compression. You can either +compress using C or C. If you pass a digit, it's assumed +to be the C compression level (between 1 and 9), but the use of +constants is prefered: + + # write a gzip compressed file + Archive::Tar->create_archive( 'out.tgz', COMPRESSION_GZIP, @filelist ); + + # write a bzip compressed file + Archive::Tar->create_archive( 'out.tbz', COMPRESSION_BZIP, @filelist ); Note that when you pass in a filehandle, the compression argument is ignored, as all files are printed verbatim to your filehandle. If you wish to enable compression with filehandles, use an -C filehandle instead. +C or C filehandle instead. The remaining arguments list the files to be included in the tar file. These files must all exist. Any files which don't exist or can't be @@ -1431,7 +1556,63 @@ sub create_archive { return $tar->write( $file, $gzip ); } -=head2 Archive::Tar->list_archive ($file, $compressed, [\@properties]) +=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] ) + +Returns an iterator function that reads the tar file without loading +it all in memory. Each time the function is called it will return the +next file in the tarball. The files are returned as +C objects. The iterator function returns the +empty list once it has exhausted the the files contained. + +The second argument can be a hash reference with options, which are +identical to the arguments passed to C. + +Example usage: + + my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} ); + + while( my $f = $next->() ) { + print $f->name, "\n"; + + $f->extract or warn "Extraction failed"; + + # .... + } + +=cut + + +sub iter { + my $class = shift; + my $filename = shift or return; + my $compressed = shift or 0; + my $opts = shift || {}; + + ### get a handle to read from. + my $handle = $class->_get_handle( + $filename, + $compressed, + READ_ONLY->( ZLIB ) + ) or return; + + my @data; + return sub { + return shift(@data) if @data; # more than one file returned? + return unless $handle; # handle exhausted? + + ### read data, should only return file + @data = @{ $class->_read_tar($handle, { %$opts, limit => 1 }) }; + + ### return one piece of data + return shift(@data) if @data; + + ### data is exhausted, free the filehandle + undef $handle; + return; + }; +} + +=head2 Archive::Tar->list_archive($file, $compressed, [\@properties]) Returns a list of the names of all the files in the archive. The first argument can either be the name of the tar file to list or a @@ -1462,7 +1643,7 @@ sub list_archive { return $tar->list_files( @_ ); } -=head2 Archive::Tar->extract_archive ($file, $gzip) +=head2 Archive::Tar->extract_archive($file, $compressed) Extracts the contents of the tar file. The first argument can either be the name of the tar file to create or a reference to an open file @@ -1489,8 +1670,8 @@ sub extract_archive { =head2 Archive::Tar->can_handle_compressed_files A simple checking routine, which will return true if C -is able to uncompress compressed archives on the fly with C, -or false if C is not installed. +is able to uncompress compressed archives on the fly with C +and C or false if not both are installed. You can use this as a shortcut to determine whether C will do what you think before passing compressed archives to its @@ -1498,7 +1679,7 @@ C method. =cut -sub can_handle_compressed_files { return ZLIB ? 1 : 0 } +sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 } sub no_string_support { croak("You have to install IO::String to support writing archives to strings"); @@ -1645,18 +1826,24 @@ Yes it is, see previous answer. Since C and therefore C doesn't support C on their filehandles, there is little choice but to read the archive into memory. This is ok if you want to do in-memory manipulation of the archive. + If you just want to extract, use the C class method instead. It will optimize and write to disk immediately. -=item Can't you lazy-load data instead? +Another option is to use the C class method to iterate over +the files in the tarball without reading them all in memory at once. + +=item Can you lazy-load data instead? -No, not easily. See previous question. +In some cases, yes. You can use the C class method to iterate +over the files in the tarball without reading them all in memory at once. =item How much memory will an X kb tar file need? Probably more than X kb, since it will all be read into memory. If this is a problem, and you don't need to do in memory manipulation -of the archive, consider using C instead. +of the archive, consider using the C class method, or C +instead. =item What do you do with unsupported filetypes in an archive? @@ -1666,8 +1853,9 @@ try to make a copy of the original file, rather than throwing an error. This does require you to read the entire archive in to memory first, since otherwise we wouldn't know what data to fill the copy with. -(This means that you cannot use the class methods on archives that -have incompatible filetypes and still expect things to work). +(This means that you cannot use the class methods, including C +on archives that have incompatible filetypes and still expect things +to work). For other filetypes, like C and C we'll warn that the extraction of this particular item didn't work. @@ -1852,8 +2040,8 @@ Please reports bugs to Ebug-archive-tar@rt.cpan.orgE. =head1 ACKNOWLEDGEMENTS -Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney and -especially Andrew Savige for their help and suggestions. +Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas +and especially Andrew Savige for their help and suggestions. =head1 COPYRIGHT diff --git a/lib/Archive/Tar/Constant.pm b/lib/Archive/Tar/Constant.pm index 00416d5..699d985 100644 --- a/lib/Archive/Tar/Constant.pm +++ b/lib/Archive/Tar/Constant.pm @@ -2,20 +2,16 @@ package Archive::Tar::Constant; BEGIN { require Exporter; - $VERSION= '0.02'; - @ISA = qw[Exporter]; - @EXPORT = qw[ - FILE HARDLINK SYMLINK CHARDEV BLOCKDEV DIR FIFO SOCKET UNKNOWN - BUFFER HEAD READ_ONLY WRITE_ONLY UNPACK PACK TIME_OFFSET ZLIB - BLOCK_SIZE TAR_PAD TAR_END ON_UNIX BLOCK CAN_READLINK MAGIC - TAR_VERSION UNAME GNAME CAN_CHOWN MODE CHECK_SUM UID GID - GZIP_MAGIC_NUM MODE_READ LONGLINK LONGLINK_NAME PREFIX_LENGTH - LABEL NAME_LENGTH STRIP_MODE ON_VMS - ]; + + $VERSION = '0.02'; + @ISA = qw[Exporter]; require Time::Local if $^O eq "MacOS"; } +use Package::Constants; +@EXPORT = Package::Constants->list( __PACKAGE__ ); + use constant FILE => 0; use constant HARDLINK => 1; use constant SYMLINK => 2; @@ -32,6 +28,9 @@ use constant BUFFER => 4096; use constant HEAD => 512; use constant BLOCK => 512; +use constant COMPRESS_GZIP => 9; +use constant COMPRESS_BZIP => 'bzip2'; + use constant BLOCK_SIZE => sub { my $n = int($_[0]/BLOCK); $n++ if $_[0] % BLOCK; $n * BLOCK }; use constant TAR_PAD => sub { my $x = shift || return; return "\0" x (BLOCK - ($x % BLOCK) ) }; use constant TAR_END => "\0" x BLOCK; @@ -61,14 +60,23 @@ use constant TIME_OFFSET => ($^O eq "MacOS") ? Time::Local::timelocal(0,0,0,1 use constant MAGIC => "ustar"; use constant TAR_VERSION => "00"; use constant LONGLINK_NAME => '././@LongLink'; +use constant PAX_HEADER => 'pax_global_header'; - ### allow ZLIB to be turned off using ENV - ### DEBUG only + ### allow ZLIB to be turned off using ENV: DEBUG only use constant ZLIB => do { !$ENV{'PERL5_AT_NO_ZLIB'} and eval { require IO::Zlib }; - $ENV{'PERL5_AT_NO_ZLIB'} || $@ ? 0 : 1 }; - + $ENV{'PERL5_AT_NO_ZLIB'} || $@ ? 0 : 1 + }; + + ### allow BZIP to be turned off using ENV: DEBUG only +use constant BZIP => do { !$ENV{'PERL5_AT_NO_BZIP'} and + eval { require IO::Uncompress::Bunzip2; + require IO::Compress::Bzip2; }; + $ENV{'PERL5_AT_NO_BZIP'} || $@ ? 0 : 1 + }; + use constant GZIP_MAGIC_NUM => qr/^(?:\037\213|\037\235)/; +use constant BZIP_MAGIC_NUM => qr/^BZh\d/; use constant CAN_CHOWN => do { ($> == 0 and $^O ne "MacOS" and $^O ne "MSWin32") }; use constant CAN_READLINK => ($^O ne 'MSWin32' and $^O !~ /RISC(?:[ _])?OS/i and $^O ne 'VMS'); diff --git a/lib/Archive/Tar/File.pm b/lib/Archive/Tar/File.pm index 8c96577..d5c2fee 100644 --- a/lib/Archive/Tar/File.pm +++ b/lib/Archive/Tar/File.pm @@ -1,15 +1,18 @@ package Archive::Tar::File; use strict; +use Carp (); use IO::File; use File::Spec::Unix (); use File::Spec (); use File::Basename (); +### avoid circular use, so only require; +require Archive::Tar; use Archive::Tar::Constant; use vars qw[@ISA $VERSION]; -@ISA = qw[Archive::Tar]; +#@ISA = qw[Archive::Tar]; $VERSION = '0.02'; ### set value to 1 to oct() it during the unpack ### @@ -154,13 +157,13 @@ Raw tar header -- not useful for most users =head1 Methods -=head2 new( file => $path ) +=head2 Archive::Tar::File->new( file => $path ) Returns a new Archive::Tar::File object from an existing file. Returns undef on failure. -=head2 new( data => $path, $data, $opt ) +=head2 Archive::Tar::File->new( data => $path, $data, $opt ) Returns a new Archive::Tar::File object from data. @@ -171,7 +174,7 @@ tar header), which are described above in the Accessors section. Returns undef on failure. -=head2 new( chunk => $chunk ) +=head2 Archive::Tar::File->new( chunk => $chunk ) Returns a new Archive::Tar::File object from a raw 512-byte tar archive chunk. @@ -266,6 +269,29 @@ sub _new_from_file { my @items = qw[mode uid gid size mtime]; my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; + if (ON_VMS) { + ### VMS has two UID modes, traditional and POSIX. Normally POSIX is + ### not used. We currently do not have an easy way to see if we are in + ### POSIX mode. In traditional mode, the UID is actually the VMS UIC. + ### The VMS UIC has the upper 16 bits is the GID, which in many cases + ### the VMS UIC will be larger than 209715, the largest that TAR can + ### handle. So for now, assume it is traditional if the UID is larger + ### than 0x10000. + + if ($hash{uid} > 0x10000) { + $hash{uid} = $hash{uid} & 0xFFFF; + } + + ### The file length from stat() is the physical length of the file + ### However the amount of data read in may be more for some file types. + ### Fixed length files are read past the logical EOF to end of the block + ### containing. Other file types get expanded on read because record + ### delimiters are added. + + my $data_len = length $data; + $hash{size} = $data_len if $hash{size} < $data_len; + + } ### you *must* set size == 0 on symlinks, or the next entry will be ### though of as the contents of the symlink, which is wrong. ### this fixes bug #7937 @@ -411,7 +437,25 @@ sub _downgrade_to_plainfile { return 1; } -=head2 full_path +=head2 $bool = $file->extract( [ $alternative_name ] ) + +Extract this object, optionally to an alternative name. + +See C<< Archive::Tar->extract_file >> for details. + +Returns true on success and false on failure. + +=cut + +sub extract { + my $self = shift; + + local $Carp::CarpLevel += 1; + + return Archive::Tar->_extract_file( $self, @_ ); +} + +=head2 $path = $file->full_path Returns the full path from the tar header; this is basically a concatenation of the C and C fields. @@ -429,7 +473,7 @@ sub full_path { } -=head2 validate +=head2 $bool = $file->validate Done by Archive::Tar internally when reading the tar file: validate the header against the checksum to ensure integer tar file. @@ -448,7 +492,7 @@ sub validate { return unpack ("%16C*", $raw) == $self->chksum ? 1 : 0; } -=head2 has_content +=head2 $bool = $file->has_content Returns a boolean to indicate whether the current object has content. Some special files like directories and so on never will have any @@ -462,7 +506,7 @@ sub has_content { return defined $self->data() && length $self->data() ? 1 : 0; } -=head2 get_content +=head2 $content = $file->get_content Returns the current content for the in-memory file @@ -473,7 +517,7 @@ sub get_content { $self->data( ); } -=head2 get_content_by_ref +=head2 $cref = $file->get_content_by_ref Returns the current content for the in-memory file as a scalar reference. Normal users won't need this, but it will save memory if @@ -489,7 +533,7 @@ sub get_content_by_ref { return \$self->{data}; } -=head2 replace_content( $content ) +=head2 $bool = $file->replace_content( $content ) Replace the current content of the file with the new content. This only affects the in-memory archive, not the on-disk version until @@ -508,7 +552,7 @@ sub replace_content { return 1; } -=head2 rename( $new_name ) +=head2 $bool = $file->rename( $new_name ) Rename the current file to $new_name. @@ -540,49 +584,49 @@ use the following methods: =over 4 -=item is_file +=item $file->is_file Returns true if the file is of type C -=item is_dir +=item $file->is_dir Returns true if the file is of type C -=item is_hardlink +=item $file->is_hardlink Returns true if the file is of type C -=item is_symlink +=item $file->is_symlink Returns true if the file is of type C -=item is_chardev +=item $file->is_chardev Returns true if the file is of type C -=item is_blockdev +=item $file->is_blockdev Returns true if the file is of type C -=item is_fifo +=item $file->is_fifo Returns true if the file is of type C -=item is_socket +=item $file->is_socket Returns true if the file is of type C -=item is_longlink +=item $file->is_longlink Returns true if the file is of type C. Should not happen after a successful C. -=item is_label +=item $file->is_label Returns true if the file is of type C