[p5sagit/p5-mst-13.2.git] / ext / Archive-Tar / lib / Archive / Tar / File.pm

package Archive::Tar::File;
use strict;

use Carp                ();
use IO::File;
use File::Spec::Unix    ();
use File::Spec          ();
use File::Basename      ();

### avoid circular use, so only require;
require Archive::Tar;
use Archive::Tar::Constant;

use vars qw[@ISA $VERSION];
#@ISA        = qw[Archive::Tar];
$VERSION    = '0.02';

### set value to 1 to oct() it during the unpack ###
my $tmpl = [
        name        => 0,   # string
        mode        => 1,   # octal
        uid         => 1,   # octal
        gid         => 1,   # octal
        size        => 1,   # octal
        mtime       => 1,   # octal
        chksum      => 1,   # octal
        type        => 0,   # character
        linkname    => 0,   # string
        magic       => 0,   # string
        version     => 0,   # 2 bytes
        uname       => 0,   # string
        gname       => 0,   # string
        devmajor    => 1,   # octal
        devminor    => 1,   # octal
        prefix      => 0,

### end UNPACK items ###
        raw         => 0,   # the raw data chunk
        data        => 0,   # the data associated with the file --
                            # This  might be very memory intensive
];

### install get/set accessors for this object.
for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
    my $key = $tmpl->[$i];
    no strict 'refs';
    *{__PACKAGE__."::$key"} = sub {
        my $self = shift;
        $self->{$key} = $_[0] if @_;

        ### just in case the key is not there or undef or something ###
        {   local $^W = 0;
            return $self->{$key};
        }
    }
}

=head1 NAME

Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar

=head1 SYNOPSIS

    my @items = $tar->get_files;

    print $_->name, ' ', $_->size, "\n" for @items;

    print $object->get_content;
    $object->replace_content('new content');

    $object->rename( 'new/full/path/to/file.c' );

=head1 DESCRIPTION

Archive::Tar::Files provides a neat little object layer for in-memory
extracted files. It's mostly used internally in Archive::Tar to tidy
up the code, but there's no reason users shouldn't use this API as
well.

=head2 Accessors

A lot of the methods in this package are accessors to the various
fields in the tar header:

=over 4

=item name

The file's name

=item mode

The file's mode

=item uid

The user id owning the file

=item gid

The group id owning the file

=item size

File size in bytes

=item mtime

Modification time. Adjusted to mac-time on MacOS if required

=item chksum

Checksum field for the tar header

=item type

File type -- numeric, but comparable to exported constants -- see
Archive::Tar's documentation

=item linkname

If the file is a symlink, the file it's pointing to

=item magic

Tar magic string -- not useful for most users

=item version

Tar version string -- not useful for most users

=item uname

The user name that owns the file

=item gname

The group name that owns the file

=item devmajor

Device major number in case of a special file

=item devminor

Device minor number in case of a special file

=item prefix

Any directory to prefix to the extraction path, if any

=item raw

Raw tar header -- not useful for most users

=back

=head1 Methods

=head2 Archive::Tar::File->new( file => $path )

Returns a new Archive::Tar::File object from an existing file.

Returns undef on failure.

=head2 Archive::Tar::File->new( data => $path, $data, $opt )

Returns a new Archive::Tar::File object from data.

C<$path> defines the file name (which need not exist), C<$data> the
file contents, and C<$opt> is a reference to a hash of attributes
which may be used to override the default attributes (fields in the
tar header), which are described above in the Accessors section.

Returns undef on failure.

=head2 Archive::Tar::File->new( chunk => $chunk )

Returns a new Archive::Tar::File object from a raw 512-byte tar
archive chunk.

Returns undef on failure.

=cut

sub new {
    my $class   = shift;
    my $what    = shift;

    my $obj =   ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
                ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
                ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
                undef;

    return $obj;
}

### copies the data, creates a clone ###
sub clone {
    my $self = shift;
    return bless { %$self }, ref $self;
}

sub _new_from_chunk {
    my $class = shift;
    my $chunk = shift or return;    # 512 bytes of tar header
    my %hash  = @_;

    ### filter any arguments on defined-ness of values.
    ### this allows overriding from what the tar-header is saying
    ### about this tar-entry. Particularly useful for @LongLink files
    my %args  = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;

    ### makes it start at 0 actually... :) ###
    my $i = -1;
    my %entry = map {
        $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
    } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );

    my $obj = bless { %entry, %args }, $class;

	### magic is a filetype string.. it should have something like 'ustar' or
	### something similar... if the chunk is garbage, skip it
	return unless $obj->magic !~ /\W/;

    ### store the original chunk ###
    $obj->raw( $chunk );

    $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
    $obj->type(DIR)  if ( ($obj->is_file) && ($obj->name =~ m|/$|) );


    return $obj;

}

sub _new_from_file {
    my $class       = shift;
    my $path        = shift;        
    
    ### path has to at least exist
    return unless defined $path;
    
    my $type        = __PACKAGE__->_filetype($path);
    my $data        = '';

    READ: { 
        unless ($type == DIR ) {
            my $fh = IO::File->new;
        
            unless( $fh->open($path) ) {
                ### dangling symlinks are fine, stop reading but continue
                ### creating the object
                last READ if $type == SYMLINK;
                
                ### otherwise, return from this function --
                ### anything that's *not* a symlink should be
                ### resolvable
                return;
            }

            ### binmode needed to read files properly on win32 ###
            binmode $fh;
            $data = do { local $/; <$fh> };
            close $fh;
        }
    }

    my @items       = qw[mode uid gid size mtime];
    my %hash        = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];

    if (ON_VMS) {
        ### VMS has two UID modes, traditional and POSIX.  Normally POSIX is
        ### not used.  We currently do not have an easy way to see if we are in
        ### POSIX mode.  In traditional mode, the UID is actually the VMS UIC.
        ### The VMS UIC has the upper 16 bits is the GID, which in many cases
        ### the VMS UIC will be larger than 209715, the largest that TAR can
        ### handle.  So for now, assume it is traditional if the UID is larger
        ### than 0x10000.

        if ($hash{uid} > 0x10000) {
            $hash{uid} = $hash{uid} & 0xFFFF;
        }

        ### The file length from stat() is the physical length of the file
        ### However the amount of data read in may be more for some file types.
        ### Fixed length files are read past the logical EOF to end of the block
        ### containing.  Other file types get expanded on read because record
        ### delimiters are added.

        my $data_len = length $data;
        $hash{size} = $data_len if $hash{size} < $data_len;

    }
    ### you *must* set size == 0 on symlinks, or the next entry will be
    ### though of as the contents of the symlink, which is wrong.
    ### this fixes bug #7937
    $hash{size}     = 0 if ($type == DIR or $type == SYMLINK);
    $hash{mtime}    -= TIME_OFFSET;

    ### strip the high bits off the mode, which we don't need to store
    $hash{mode}     = STRIP_MODE->( $hash{mode} );


    ### probably requires some file path munging here ... ###
    ### name and prefix are set later
    my $obj = {
        %hash,
        name        => '',
        chksum      => CHECK_SUM,
        type        => $type,
        linkname    => ($type == SYMLINK and CAN_READLINK)
                            ? readlink $path
                            : '',
        magic       => MAGIC,
        version     => TAR_VERSION,
        uname       => UNAME->( $hash{uid} ),
        gname       => GNAME->( $hash{gid} ),
        devmajor    => 0,   # not handled
        devminor    => 0,   # not handled
        prefix      => '',
        data        => $data,
    };

    bless $obj, $class;

    ### fix up the prefix and file from the path
    my($prefix,$file) = $obj->_prefix_and_file( $path );
    $obj->prefix( $prefix );
    $obj->name( $file );

    return $obj;
}

sub _new_from_data {
    my $class   = shift;
    my $path    = shift;    return unless defined $path;
    my $data    = shift;    return unless defined $data;
    my $opt     = shift;

    my $obj = {
        data        => $data,
        name        => '',
        mode        => MODE,
        uid         => UID,
        gid         => GID,
        size        => length $data,
        mtime       => time - TIME_OFFSET,
        chksum      => CHECK_SUM,
        type        => FILE,
        linkname    => '',
        magic       => MAGIC,
        version     => TAR_VERSION,
        uname       => UNAME->( UID ),
        gname       => GNAME->( GID ),
        devminor    => 0,
        devmajor    => 0,
        prefix      => '',
    };

    ### overwrite with user options, if provided ###
    if( $opt and ref $opt eq 'HASH' ) {
        for my $key ( keys %$opt ) {

            ### don't write bogus options ###
            next unless exists $obj->{$key};
            $obj->{$key} = $opt->{$key};
        }
    }

    bless $obj, $class;

    ### fix up the prefix and file from the path
    my($prefix,$file) = $obj->_prefix_and_file( $path );
    $obj->prefix( $prefix );
    $obj->name( $file );

    return $obj;
}

sub _prefix_and_file {
    my $self = shift;
    my $path = shift;

    my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
    my @dirs = File::Spec->splitdir( $dirs );

    ### so sometimes the last element is '' -- probably when trailing
    ### dir slashes are encountered... this is is of course pointless,
    ### so remove it
    pop @dirs while @dirs and not length $dirs[-1];

    ### if it's a directory, then $file might be empty
    $file = pop @dirs if $self->is_dir and not length $file;

    ### splitting ../ gives you the relative path in native syntax
    map { $_ = '..' if $_  eq '-' } @dirs if ON_VMS;

    my $prefix = File::Spec::Unix->catdir(
                        grep { length } $vol, @dirs
                    );
    return( $prefix, $file );
}

sub _filetype {
    my $self = shift;
    my $file = shift;
    
    return unless defined $file;

    return SYMLINK  if (-l $file);	# Symlink

    return FILE     if (-f _);		# Plain file

    return DIR      if (-d _);		# Directory

    return FIFO     if (-p _);		# Named pipe

    return SOCKET   if (-S _);		# Socket

    return BLOCKDEV if (-b _);		# Block special

    return CHARDEV  if (-c _);		# Character special

    ### shouldn't happen, this is when making archives, not reading ###
    return LONGLINK if ( $file eq LONGLINK_NAME );

    return UNKNOWN;		            # Something else (like what?)

}

### this method 'downgrades' a file to plain file -- this is used for
### symlinks when FOLLOW_SYMLINKS is true.
sub _downgrade_to_plainfile {
    my $entry = shift;
    $entry->type( FILE );
    $entry->mode( MODE );
    $entry->linkname('');

    return 1;
}

=head2 $bool = $file->extract( [ $alternative_name ] )

Extract this object, optionally to an alternative name. 

See C<< Archive::Tar->extract_file >> for details.

Returns true on success and false on failure.

=cut

sub extract {
    my $self = shift;
    
    local $Carp::CarpLevel += 1;
    
    return Archive::Tar->_extract_file( $self, @_ );
}

=head2 $path = $file->full_path

Returns the full path from the tar header; this is basically a
concatenation of the C<prefix> and C<name> fields.

=cut

sub full_path {
    my $self = shift;

    ### if prefix field is emtpy
    return $self->name unless defined $self->prefix and length $self->prefix;

    ### or otherwise, catfile'd
    return File::Spec::Unix->catfile( $self->prefix, $self->name );
}


=head2 $bool = $file->validate

Done by Archive::Tar internally when reading the tar file:
validate the header against the checksum to ensure integer tar file.

Returns true on success, false on failure

=cut

sub validate {
    my $self = shift;

    my $raw = $self->raw;

    ### don't know why this one is different from the one we /write/ ###
    substr ($raw, 148, 8) = "        ";

    ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
    ### like GNU tar does. See here for details:
    ### http://www.gnu.org/software/tar/manual/tar.html#SEC139
    ### so we do both a signed AND unsigned validate. if one succeeds, that's
    ### good enough
	return (   (unpack ("%16C*", $raw) == $self->chksum)
	        or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
}

=head2 $bool = $file->has_content

Returns a boolean to indicate whether the current object has content.
Some special files like directories and so on never will have any
content. This method is mainly to make sure you don't get warnings
for using uninitialized values when looking at an object's content.

=cut

sub has_content {
    my $self = shift;
    return defined $self->data() && length $self->data() ? 1 : 0;
}

=head2 $content = $file->get_content

Returns the current content for the in-memory file

=cut

sub get_content {
    my $self = shift;
    $self->data( );
}

=head2 $cref = $file->get_content_by_ref

Returns the current content for the in-memory file as a scalar
reference. Normal users won't need this, but it will save memory if
you are dealing with very large data files in your tar archive, since
it will pass the contents by reference, rather than make a copy of it
first.

=cut

sub get_content_by_ref {
    my $self = shift;

    return \$self->{data};
}

=head2 $bool = $file->replace_content( $content )

Replace the current content of the file with the new content. This
only affects the in-memory archive, not the on-disk version until
you write it.

Returns true on success, false on failure.

=cut

sub replace_content {
    my $self = shift;
    my $data = shift || '';

    $self->data( $data );
    $self->size( length $data );
    return 1;
}

=head2 $bool = $file->rename( $new_name )

Rename the current file to $new_name.

Note that you must specify a Unix path for $new_name, since per tar
standard, all files in the archive must be Unix paths.

Returns true on success and false on failure.

=cut

sub rename {
    my $self = shift;
    my $path = shift;
    
    return unless defined $path;

    my ($prefix,$file) = $self->_prefix_and_file( $path );

    $self->name( $file );
    $self->prefix( $prefix );

	return 1;
}

=head1 Convenience methods

To quickly check the type of a C<Archive::Tar::File> object, you can
use the following methods:

=over 4

=item $file->is_file

Returns true if the file is of type C<file>

=item $file->is_dir

Returns true if the file is of type C<dir>

=item $file->is_hardlink

Returns true if the file is of type C<hardlink>

=item $file->is_symlink

Returns true if the file is of type C<symlink>

=item $file->is_chardev

Returns true if the file is of type C<chardev>

=item $file->is_blockdev

Returns true if the file is of type C<blockdev>

=item $file->is_fifo

Returns true if the file is of type C<fifo>

=item $file->is_socket

Returns true if the file is of type C<socket>

=item $file->is_longlink

Returns true if the file is of type C<LongLink>.
Should not happen after a successful C<read>.

=item $file->is_label

Returns true if the file is of type C<Label>.
Should not happen after a successful C<read>.

=item $file->is_unknown

Returns true if the file type is C<unknown>

=back

=cut

#stupid perl5.5.3 needs to warn if it's not numeric
sub is_file     { local $^W;    FILE      == $_[0]->type }
sub is_dir      { local $^W;    DIR       == $_[0]->type }
sub is_hardlink { local $^W;    HARDLINK  == $_[0]->type }
sub is_symlink  { local $^W;    SYMLINK   == $_[0]->type }
sub is_chardev  { local $^W;    CHARDEV   == $_[0]->type }
sub is_blockdev { local $^W;    BLOCKDEV  == $_[0]->type }
sub is_fifo     { local $^W;    FIFO      == $_[0]->type }
sub is_socket   { local $^W;    SOCKET    == $_[0]->type }
sub is_unknown  { local $^W;    UNKNOWN   == $_[0]->type }
sub is_longlink { local $^W;    LONGLINK  eq $_[0]->type }
sub is_label    { local $^W;    LABEL     eq $_[0]->type }

1;
Commit	Line	Data
39713df4	1	package Archive::Tar::File;
	2	use strict;
	3
642eb381	4	use Carp ();
39713df4	5	use IO::File;
81a5970e	6	use File::Spec::Unix ();
	7	use File::Spec ();
	8	use File::Basename ();
	9
642eb381	10	### avoid circular use, so only require;
642eb381	11	require Archive::Tar;
39713df4	12	use Archive::Tar::Constant;
	13
	14	use vars qw[@ISA $VERSION];
642eb381	15	#@ISA = qw[Archive::Tar];
39713df4	16	$VERSION = '0.02';
	17
	18	### set value to 1 to oct() it during the unpack ###
	19	my $tmpl = [
	20	name => 0, # string
	21	mode => 1, # octal
	22	uid => 1, # octal
	23	gid => 1, # octal
	24	size => 1, # octal
	25	mtime => 1, # octal
	26	chksum => 1, # octal
	27	type => 0, # character
	28	linkname => 0, # string
	29	magic => 0, # string
	30	version => 0, # 2 bytes
	31	uname => 0, # string
	32	gname => 0, # string
	33	devmajor => 1, # octal
	34	devminor => 1, # octal
	35	prefix => 0,
	36
	37	### end UNPACK items ###
	38	raw => 0, # the raw data chunk
	39	data => 0, # the data associated with the file --
	40	# This might be very memory intensive
	41	];
	42
	43	### install get/set accessors for this object.
	44	for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
	45	my $key = $tmpl->[$i];
	46	no strict 'refs';
	47	*{__PACKAGE__."::$key"} = sub {
	48	my $self = shift;
	49	$self->{$key} = $_[0] if @_;
	50
	51	### just in case the key is not there or undef or something ###
	52	{ local $^W = 0;
	53	return $self->{$key};
	54	}
	55	}
	56	}
	57
	58	=head1 NAME
	59
	60	Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
	61
	62	=head1 SYNOPSIS
	63
	64	my @items = $tar->get_files;
	65
	66	print $_->name, ' ', $_->size, "\n" for @items;
	67
	68	print $object->get_content;
	69	$object->replace_content('new content');
	70
	71	$object->rename( 'new/full/path/to/file.c' );
	72
	73	=head1 DESCRIPTION
	74
	75	Archive::Tar::Files provides a neat little object layer for in-memory
	76	extracted files. It's mostly used internally in Archive::Tar to tidy
	77	up the code, but there's no reason users shouldn't use this API as
	78	well.
	79
80	=head2 Accessors
81
82	A lot of the methods in this package are accessors to the various
83	fields in the tar header:
84
85	=over 4
86
87	=item name
88
89	The file's name
90
91	=item mode
92
93	The file's mode
94
95	=item uid
96
97	The user id owning the file
98
99	=item gid
100
101	The group id owning the file
102
103	=item size
104
105	File size in bytes
106
107	=item mtime
108
109	Modification time. Adjusted to mac-time on MacOS if required
110
111	=item chksum
112
113	Checksum field for the tar header
114
115	=item type
116
117	File type -- numeric, but comparable to exported constants -- see
118	Archive::Tar's documentation
119
120	=item linkname
121
122	If the file is a symlink, the file it's pointing to
123
124	=item magic
125
126	Tar magic string -- not useful for most users
127
128	=item version
129
130	Tar version string -- not useful for most users
131
132	=item uname
133
134	The user name that owns the file
135
136	=item gname
137
138	The group name that owns the file
139
140	=item devmajor
141
142	Device major number in case of a special file
143
144	=item devminor
145
146	Device minor number in case of a special file
147
148	=item prefix
149
150	Any directory to prefix to the extraction path, if any
151
152	=item raw
153
154	Raw tar header -- not useful for most users
155
156	=back
157
158	=head1 Methods
159
642eb381	160	=head2 Archive::Tar::File->new( file => $path )
39713df4	161
	162	Returns a new Archive::Tar::File object from an existing file.
	163
	164	Returns undef on failure.
	165
642eb381	166	=head2 Archive::Tar::File->new( data => $path, $data, $opt )
39713df4	167
	168	Returns a new Archive::Tar::File object from data.
	169
	170	C<$path> defines the file name (which need not exist), C<$data> the
	171	file contents, and C<$opt> is a reference to a hash of attributes
	172	which may be used to override the default attributes (fields in the
	173	tar header), which are described above in the Accessors section.
	174
	175	Returns undef on failure.
	176
642eb381	177	=head2 Archive::Tar::File->new( chunk => $chunk )
39713df4	178
	179	Returns a new Archive::Tar::File object from a raw 512-byte tar
	180	archive chunk.
	181
	182	Returns undef on failure.
	183
	184	=cut
	185
	186	sub new {
	187	my $class = shift;
	188	my $what = shift;
	189
	190	my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
	191	($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
	192	($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
	193	undef;
	194
	195	return $obj;
	196	}
	197
	198	### copies the data, creates a clone ###
	199	sub clone {
	200	my $self = shift;
	201	return bless { %$self }, ref $self;
	202	}
	203
	204	sub _new_from_chunk {
	205	my $class = shift;
01d11a1c	206	my $chunk = shift or return; # 512 bytes of tar header
81a5970e	207	my %hash = @_;
	208
	209	### filter any arguments on defined-ness of values.
	210	### this allows overriding from what the tar-header is saying
	211	### about this tar-entry. Particularly useful for @LongLink files
	212	my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
39713df4	213
	214	### makes it start at 0 actually... :) ###
	215	my $i = -1;
	216	my %entry = map {
	217	$tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
	218	} map { /^([^\0]*)/ } unpack( UNPACK, $chunk );
	219
81a5970e	220	my $obj = bless { %entry, %args }, $class;
39713df4	221
	222	### magic is a filetype string.. it should have something like 'ustar' or
	223	### something similar... if the chunk is garbage, skip it
	224	return unless $obj->magic !~ /\W/;
	225
	226	### store the original chunk ###
	227	$obj->raw( $chunk );
	228
	229	$obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
	230	$obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m\|/$\|) );
	231
	232
	233	return $obj;
	234
	235	}
	236
	237	sub _new_from_file {
	238	my $class = shift;
01d11a1c	239	my $path = shift;
	240
	241	### path has to at least exist
	242	return unless defined $path;
	243
39713df4	244	my $type = __PACKAGE__->_filetype($path);
	245	my $data = '';
	246
97a504ba	247	READ: {
	248	unless ($type == DIR ) {
	249	my $fh = IO::File->new;
	250
	251	unless( $fh->open($path) ) {
	252	### dangling symlinks are fine, stop reading but continue
	253	### creating the object
	254	last READ if $type == SYMLINK;
	255
	256	### otherwise, return from this function --
	257	### anything that's not a symlink should be
	258	### resolvable
	259	return;
	260	}
	261
	262	### binmode needed to read files properly on win32 ###
	263	binmode $fh;
	264	$data = do { local $/; <$fh> };
	265	close $fh;
	266	}
39713df4	267	}
	268
	269	my @items = qw[mode uid gid size mtime];
	270	my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
	271
642eb381	272	if (ON_VMS) {
	273	### VMS has two UID modes, traditional and POSIX. Normally POSIX is
	274	### not used. We currently do not have an easy way to see if we are in
	275	### POSIX mode. In traditional mode, the UID is actually the VMS UIC.
	276	### The VMS UIC has the upper 16 bits is the GID, which in many cases
	277	### the VMS UIC will be larger than 209715, the largest that TAR can
	278	### handle. So for now, assume it is traditional if the UID is larger
	279	### than 0x10000.
	280
	281	if ($hash{uid} > 0x10000) {
	282	$hash{uid} = $hash{uid} & 0xFFFF;
	283	}
	284
	285	### The file length from stat() is the physical length of the file
	286	### However the amount of data read in may be more for some file types.
	287	### Fixed length files are read past the logical EOF to end of the block
	288	### containing. Other file types get expanded on read because record
	289	### delimiters are added.
	290
	291	my $data_len = length $data;
	292	$hash{size} = $data_len if $hash{size} < $data_len;
	293
	294	}
39713df4	295	### you must set size == 0 on symlinks, or the next entry will be
	296	### though of as the contents of the symlink, which is wrong.
	297	### this fixes bug #7937
	298	$hash{size} = 0 if ($type == DIR or $type == SYMLINK);
	299	$hash{mtime} -= TIME_OFFSET;
	300
	301	### strip the high bits off the mode, which we don't need to store
	302	$hash{mode} = STRIP_MODE->( $hash{mode} );
	303
	304
	305	### probably requires some file path munging here ... ###
	306	### name and prefix are set later
	307	my $obj = {
	308	%hash,
	309	name => '',
	310	chksum => CHECK_SUM,
	311	type => $type,
	312	linkname => ($type == SYMLINK and CAN_READLINK)
	313	? readlink $path
	314	: '',
	315	magic => MAGIC,
	316	version => TAR_VERSION,
	317	uname => UNAME->( $hash{uid} ),
	318	gname => GNAME->( $hash{gid} ),
	319	devmajor => 0, # not handled
	320	devminor => 0, # not handled
	321	prefix => '',
	322	data => $data,
	323	};
	324
	325	bless $obj, $class;
	326
	327	### fix up the prefix and file from the path
	328	my($prefix,$file) = $obj->_prefix_and_file( $path );
	329	$obj->prefix( $prefix );
	330	$obj->name( $file );
	331
	332	return $obj;
	333	}
	334
	335	sub _new_from_data {
	336	my $class = shift;
01d11a1c	337	my $path = shift; return unless defined $path;
39713df4	338	my $data = shift; return unless defined $data;
	339	my $opt = shift;
	340
	341	my $obj = {
	342	data => $data,
	343	name => '',
	344	mode => MODE,
	345	uid => UID,
	346	gid => GID,
	347	size => length $data,
	348	mtime => time - TIME_OFFSET,
	349	chksum => CHECK_SUM,
	350	type => FILE,
	351	linkname => '',
	352	magic => MAGIC,
	353	version => TAR_VERSION,
	354	uname => UNAME->( UID ),
	355	gname => GNAME->( GID ),
	356	devminor => 0,
	357	devmajor => 0,
	358	prefix => '',
	359	};
	360
	361	### overwrite with user options, if provided ###
	362	if( $opt and ref $opt eq 'HASH' ) {
	363	for my $key ( keys %$opt ) {
	364
	365	### don't write bogus options ###
	366	next unless exists $obj->{$key};
	367	$obj->{$key} = $opt->{$key};
	368	}
	369	}
	370
	371	bless $obj, $class;
	372
	373	### fix up the prefix and file from the path
	374	my($prefix,$file) = $obj->_prefix_and_file( $path );
	375	$obj->prefix( $prefix );
	376	$obj->name( $file );
	377
	378	return $obj;
	379	}
	380
	381	sub _prefix_and_file {
	382	my $self = shift;
	383	my $path = shift;
	384
	385	my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
	386	my @dirs = File::Spec->splitdir( $dirs );
	387
	388	### so sometimes the last element is '' -- probably when trailing
	389	### dir slashes are encountered... this is is of course pointless,
	390	### so remove it
	391	pop @dirs while @dirs and not length $dirs[-1];
	392
	393	### if it's a directory, then $file might be empty
	394	$file = pop @dirs if $self->is_dir and not length $file;
	395
f5695358	396	### splitting ../ gives you the relative path in native syntax
	397	map { $_ = '..' if $_ eq '-' } @dirs if ON_VMS;
	398
39713df4	399	my $prefix = File::Spec::Unix->catdir(
	400	grep { length } $vol, @dirs
	401	);
	402	return( $prefix, $file );
	403	}
	404
	405	sub _filetype {
	406	my $self = shift;
01d11a1c	407	my $file = shift;
	408
	409	return unless defined $file;
39713df4	410
	411	return SYMLINK if (-l $file); # Symlink
	412
	413	return FILE if (-f _); # Plain file
	414
	415	return DIR if (-d _); # Directory
	416
	417	return FIFO if (-p _); # Named pipe
	418
	419	return SOCKET if (-S _); # Socket
	420
	421	return BLOCKDEV if (-b _); # Block special
	422
	423	return CHARDEV if (-c _); # Character special
	424
	425	### shouldn't happen, this is when making archives, not reading ###
	426	return LONGLINK if ( $file eq LONGLINK_NAME );
	427
	428	return UNKNOWN; # Something else (like what?)
	429
	430	}
	431
	432	### this method 'downgrades' a file to plain file -- this is used for
	433	### symlinks when FOLLOW_SYMLINKS is true.
	434	sub _downgrade_to_plainfile {
	435	my $entry = shift;
	436	$entry->type( FILE );
	437	$entry->mode( MODE );
	438	$entry->linkname('');
	439
	440	return 1;
	441	}
	442
642eb381	443	=head2 $bool = $file->extract( [ $alternative_name ] )
	444
	445	Extract this object, optionally to an alternative name.
	446
	447	See C<< Archive::Tar->extract_file >> for details.
	448
	449	Returns true on success and false on failure.
	450
	451	=cut
	452
	453	sub extract {
	454	my $self = shift;
	455
	456	local $Carp::CarpLevel += 1;
	457
	458	return Archive::Tar->_extract_file( $self, @_ );
	459	}
	460
	461	=head2 $path = $file->full_path
39713df4	462
	463	Returns the full path from the tar header; this is basically a
	464	concatenation of the C<prefix> and C<name> fields.
	465
	466	=cut
	467
	468	sub full_path {
	469	my $self = shift;
	470
	471	### if prefix field is emtpy
	472	return $self->name unless defined $self->prefix and length $self->prefix;
	473
	474	### or otherwise, catfile'd
	475	return File::Spec::Unix->catfile( $self->prefix, $self->name );
	476	}
	477
	478
642eb381	479	=head2 $bool = $file->validate
39713df4	480
	481	Done by Archive::Tar internally when reading the tar file:
	482	validate the header against the checksum to ensure integer tar file.
	483
	484	Returns true on success, false on failure
	485
	486	=cut
	487
	488	sub validate {
	489	my $self = shift;
	490
	491	my $raw = $self->raw;
	492
	493	### don't know why this one is different from the one we /write/ ###
	494	substr ($raw, 148, 8) = " ";
bef46b70	495
	496	### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
	497	### like GNU tar does. See here for details:
	498	### http://www.gnu.org/software/tar/manual/tar.html#SEC139
	499	### so we do both a signed AND unsigned validate. if one succeeds, that's
	500	### good enough
	501	return ( (unpack ("%16C*", $raw) == $self->chksum)
	502	or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
39713df4	503	}
39713df4	504
642eb381	505	=head2 $bool = $file->has_content
39713df4	506
	507	Returns a boolean to indicate whether the current object has content.
	508	Some special files like directories and so on never will have any
	509	content. This method is mainly to make sure you don't get warnings
	510	for using uninitialized values when looking at an object's content.
	511
	512	=cut
	513
	514	sub has_content {
	515	my $self = shift;
	516	return defined $self->data() && length $self->data() ? 1 : 0;
	517	}
	518
642eb381	519	=head2 $content = $file->get_content
39713df4	520
	521	Returns the current content for the in-memory file
	522
	523	=cut
	524
	525	sub get_content {
	526	my $self = shift;
	527	$self->data( );
	528	}
	529
642eb381	530	=head2 $cref = $file->get_content_by_ref
39713df4	531
	532	Returns the current content for the in-memory file as a scalar
	533	reference. Normal users won't need this, but it will save memory if
	534	you are dealing with very large data files in your tar archive, since
	535	it will pass the contents by reference, rather than make a copy of it
	536	first.
	537
	538	=cut
	539
	540	sub get_content_by_ref {
	541	my $self = shift;
	542
	543	return \$self->{data};
	544	}
	545
642eb381	546	=head2 $bool = $file->replace_content( $content )
39713df4	547
	548	Replace the current content of the file with the new content. This
	549	only affects the in-memory archive, not the on-disk version until
	550	you write it.
	551
	552	Returns true on success, false on failure.
	553
	554	=cut
	555
	556	sub replace_content {
	557	my $self = shift;
	558	my $data = shift \|\| '';
	559
	560	$self->data( $data );
	561	$self->size( length $data );
	562	return 1;
	563	}
	564
642eb381	565	=head2 $bool = $file->rename( $new_name )
39713df4	566
	567	Rename the current file to $new_name.
	568
	569	Note that you must specify a Unix path for $new_name, since per tar
	570	standard, all files in the archive must be Unix paths.
	571
	572	Returns true on success and false on failure.
	573
	574	=cut
	575
	576	sub rename {
	577	my $self = shift;
01d11a1c	578	my $path = shift;
	579
	580	return unless defined $path;
39713df4	581
	582	my ($prefix,$file) = $self->_prefix_and_file( $path );
	583
	584	$self->name( $file );
	585	$self->prefix( $prefix );
	586
	587	return 1;
	588	}
	589
	590	=head1 Convenience methods
	591
	592	To quickly check the type of a C<Archive::Tar::File> object, you can
	593	use the following methods:
	594
	595	=over 4
	596
642eb381	597	=item $file->is_file
39713df4	598
	599	Returns true if the file is of type C<file>
	600
642eb381	601	=item $file->is_dir
39713df4	602
	603	Returns true if the file is of type C<dir>
	604
642eb381	605	=item $file->is_hardlink
39713df4	606
	607	Returns true if the file is of type C<hardlink>
	608
642eb381	609	=item $file->is_symlink
39713df4	610
	611	Returns true if the file is of type C<symlink>
	612
642eb381	613	=item $file->is_chardev
39713df4	614
	615	Returns true if the file is of type C<chardev>
	616
642eb381	617	=item $file->is_blockdev
39713df4	618
	619	Returns true if the file is of type C<blockdev>
	620
642eb381	621	=item $file->is_fifo
39713df4	622
	623	Returns true if the file is of type C<fifo>
	624
642eb381	625	=item $file->is_socket
39713df4	626
	627	Returns true if the file is of type C<socket>
	628
642eb381	629	=item $file->is_longlink
39713df4	630
	631	Returns true if the file is of type C<LongLink>.
	632	Should not happen after a successful C<read>.
	633
642eb381	634	=item $file->is_label
39713df4	635
	636	Returns true if the file is of type C<Label>.
	637	Should not happen after a successful C<read>.
	638
642eb381	639	=item $file->is_unknown
39713df4	640
	641	Returns true if the file type is C<unknown>
	642
	643	=back
	644
	645	=cut
	646
	647	#stupid perl5.5.3 needs to warn if it's not numeric
	648	sub is_file { local $^W; FILE == $_[0]->type }
	649	sub is_dir { local $^W; DIR == $_[0]->type }
	650	sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
	651	sub is_symlink { local $^W; SYMLINK == $_[0]->type }
	652	sub is_chardev { local $^W; CHARDEV == $_[0]->type }
	653	sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
	654	sub is_fifo { local $^W; FIFO == $_[0]->type }
	655	sub is_socket { local $^W; SOCKET == $_[0]->type }
	656	sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
	657	sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
	658	sub is_label { local $^W; LABEL eq $_[0]->type }
	659
	660	1;