1 package IO::Uncompress::AnyUncompress ;
7 use IO::Compress::Base::Common qw(createSelfTiedObject);
9 use IO::Uncompress::Base ;
13 eval { require IO::Uncompress::Adapter::Inflate; import IO::Uncompress::Adapter::Inflate };
14 eval { require IO::Uncompress::Adapter::Bunzip2; import IO::Uncompress::Adapter::Bunzip2 };
15 eval { require IO::Uncompress::Adapter::LZO; import IO::Uncompress::Adapter::LZO };
17 eval { require IO::Uncompress::Bunzip2; import IO::Uncompress::Bunzip2 };
18 eval { require IO::Uncompress::UnLzop; import IO::Uncompress::UnLzop };
19 eval { require IO::Uncompress::Gunzip; import IO::Uncompress::Gunzip };
20 eval { require IO::Uncompress::Inflate; import IO::Uncompress::Inflate };
21 eval { require IO::Uncompress::RawInflate; import IO::Uncompress::RawInflate };
22 eval { require IO::Uncompress::Unzip; import IO::Uncompress::Unzip };
27 our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $AnyUncompressError);
29 $VERSION = '2.000_13';
30 $AnyUncompressError = '';
32 @ISA = qw( Exporter IO::Uncompress::Base );
33 @EXPORT_OK = qw( $AnyUncompressError anyuncompress ) ;
34 %EXPORT_TAGS = %IO::Uncompress::Base::DEFLATE_CONSTANTS ;
35 push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
36 Exporter::export_ok_tags('all');
38 # TODO - allow the user to pick a set of the three formats to allow
39 # or just assume want to auto-detect any of the three formats.
44 my $obj = createSelfTiedObject($class, \$AnyUncompressError);
45 $obj->_create(undef, 0, @_);
50 my $obj = createSelfTiedObject(undef, \$AnyUncompressError);
51 return $obj->_inf(@_) ;
56 use IO::Compress::Base::Common qw(:Parse);
57 return ( 'RawInflate' => [1, 1, Parse_boolean, 0] ) ;
65 # any always needs both crc32 and adler32
66 $got->value('CRC32' => 1);
67 $got->value('ADLER32' => 1);
81 if (defined $IO::Uncompress::RawInflate::VERSION )
83 my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Inflate::mkUncompObject();
85 return $self->saveErrorString(undef, $errstr, $errno)
88 *$self->{Uncomp} = $obj;
90 my @possible = qw( Inflate Gunzip Unzip );
91 unshift @possible, 'RawInflate'
92 if $got->value('RawInflate');
94 $magic = $self->ckMagic( @possible );
97 *$self->{Info} = $self->readHeader($magic)
104 if (defined $IO::Uncompress::Bunzip2::VERSION and
105 $magic = $self->ckMagic('Bunzip2')) {
106 *$self->{Info} = $self->readHeader($magic)
109 my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Bunzip2::mkUncompObject();
111 return $self->saveErrorString(undef, $errstr, $errno)
114 *$self->{Uncomp} = $obj;
119 if (defined $IO::Uncompress::UnLzop::VERSION and
120 $magic = $self->ckMagic('UnLzop')) {
122 *$self->{Info} = $self->readHeader($magic)
125 my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::LZO::mkUncompObject();
127 return $self->saveErrorString(undef, $errstr, $errno)
130 *$self->{Uncomp} = $obj;
145 my $keep = ref $self ;
146 for my $class ( map { "IO::Uncompress::$_" } @names)
148 bless $self => $class;
149 my $magic = $self->ckMagic();
153 #bless $self => $class;
157 $self->pushBack(*$self->{HeaderPending}) ;
158 *$self->{HeaderPending} = '' ;
161 bless $self => $keep;
173 IO::Uncompress::AnyUncompress - Uncompress gzip, zip, bzip2 or lzop file/buffer
178 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
180 my $status = anyuncompress $input => $output [,OPTS]
181 or die "anyuncompress failed: $AnyUncompressError\n";
183 my $z = new IO::Uncompress::AnyUncompress $input [OPTS]
184 or die "anyuncompress failed: $AnyUncompressError\n";
186 $status = $z->read($buffer)
187 $status = $z->read($buffer, $length)
188 $status = $z->read($buffer, $length, $offset)
189 $line = $z->getline()
194 $data = $z->trailingData()
195 $status = $z->nextStream()
196 $data = $z->getHeaderInfo()
198 $z->seek($position, $whence)
204 $AnyUncompressError ;
210 read($z, $buffer, $length);
211 read($z, $buffer, $length, $offset);
213 seek($z, $position, $whence)
224 B<WARNING -- This is a Beta release>.
228 =item * DO NOT use in production code.
230 =item * The documentation is incomplete in places.
232 =item * Parts of the interface defined here are tentative.
234 =item * Please report any problems you find.
241 This module provides a Perl interface that allows the reading of
242 files/buffers that have been compressed with a variety of compression
245 The formats supported are:
253 =item gzip (RFC 1952)
263 The module will auto-detect which, if any, of the supported
264 compression formats is being used.
270 =head1 Functional Interface
272 A top-level function, C<anyuncompress>, is provided to carry out
273 "one-shot" uncompression between buffers and/or files. For finer
274 control over the uncompression process, see the L</"OO Interface">
277 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
279 anyuncompress $input => $output [,OPTS]
280 or die "anyuncompress failed: $AnyUncompressError\n";
284 The functional interface needs Perl5.005 or better.
287 =head2 anyuncompress $input => $output [, OPTS]
290 C<anyuncompress> expects at least two parameters, C<$input> and C<$output>.
292 =head3 The C<$input> parameter
294 The parameter, C<$input>, is used to define the source of
297 It can take one of the following forms:
303 If the C<$input> parameter is a simple scalar, it is assumed to be a
304 filename. This file will be opened for reading and the input data
305 will be read from it.
309 If the C<$input> parameter is a filehandle, the input data will be
311 The string '-' can be used as an alias for standard input.
313 =item A scalar reference
315 If C<$input> is a scalar reference, the input data will be read
318 =item An array reference
320 If C<$input> is an array reference, each element in the array must be a
323 The input data will be read from each file in turn.
325 The complete array will be walked to ensure that it only
326 contains valid filenames before any data is uncompressed.
330 =item An Input FileGlob string
332 If C<$input> is a string that is delimited by the characters "<" and ">"
333 C<anyuncompress> will assume that it is an I<input fileglob string>. The
334 input is the list of files that match the fileglob.
336 If the fileglob does not match any files ...
338 See L<File::GlobMapper|File::GlobMapper> for more details.
343 If the C<$input> parameter is any other type, C<undef> will be returned.
347 =head3 The C<$output> parameter
349 The parameter C<$output> is used to control the destination of the
350 uncompressed data. This parameter can take one of these forms.
356 If the C<$output> parameter is a simple scalar, it is assumed to be a
357 filename. This file will be opened for writing and the uncompressed
358 data will be written to it.
362 If the C<$output> parameter is a filehandle, the uncompressed data
363 will be written to it.
364 The string '-' can be used as an alias for standard output.
367 =item A scalar reference
369 If C<$output> is a scalar reference, the uncompressed data will be
370 stored in C<$$output>.
374 =item An Array Reference
376 If C<$output> is an array reference, the uncompressed data will be
377 pushed onto the array.
379 =item An Output FileGlob
381 If C<$output> is a string that is delimited by the characters "<" and ">"
382 C<anyuncompress> will assume that it is an I<output fileglob string>. The
383 output is the list of files that match the fileglob.
385 When C<$output> is an fileglob string, C<$input> must also be a fileglob
386 string. Anything else is an error.
390 If the C<$output> parameter is any other type, C<undef> will be returned.
397 When C<$input> maps to multiple compressed files/buffers and C<$output> is
398 a single file/buffer, after uncompression C<$output> will contain a
399 concatenation of all the uncompressed data from each of the input
406 =head2 Optional Parameters
408 Unless specified below, the optional parameters for C<anyuncompress>,
409 C<OPTS>, are the same as those used with the OO interface defined in the
410 L</"Constructor Options"> section below.
414 =item C<< AutoClose => 0|1 >>
416 This option applies to any input or output data streams to
417 C<anyuncompress> that are filehandles.
419 If C<AutoClose> is specified, and the value is true, it will result in all
420 input and/or output filehandles being closed once C<anyuncompress> has
423 This parameter defaults to 0.
426 =item C<< BinModeOut => 0|1 >>
428 When writing to a file or filehandle, set C<binmode> before writing to the
437 =item C<< Append => 0|1 >>
441 =item C<< MultiStream => 0|1 >>
443 If the input file/buffer contains multiple compressed data streams, this
444 option will uncompress the whole lot as a single data stream.
457 To read the contents of the file C<file1.txt.Compressed> and write the
458 compressed data to the file C<file1.txt>.
462 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
464 my $input = "file1.txt.Compressed";
465 my $output = "file1.txt";
466 anyuncompress $input => $output
467 or die "anyuncompress failed: $AnyUncompressError\n";
470 To read from an existing Perl filehandle, C<$input>, and write the
471 uncompressed data to a buffer, C<$buffer>.
475 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
478 my $input = new IO::File "<file1.txt.Compressed"
479 or die "Cannot open 'file1.txt.Compressed': $!\n" ;
481 anyuncompress $input => \$buffer
482 or die "anyuncompress failed: $AnyUncompressError\n";
484 To uncompress all files in the directory "/my/home" that match "*.txt.Compressed" and store the compressed data in the same directory
488 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
490 anyuncompress '</my/home/*.txt.Compressed>' => '</my/home/#1.txt>'
491 or die "anyuncompress failed: $AnyUncompressError\n";
493 and if you want to compress each file one at a time, this will do the trick
497 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
499 for my $input ( glob "/my/home/*.txt.Compressed" )
502 $output =~ s/.Compressed// ;
503 anyuncompress $input => $output
504 or die "Error compressing '$input': $AnyUncompressError\n";
511 The format of the constructor for IO::Uncompress::AnyUncompress is shown below
514 my $z = new IO::Uncompress::AnyUncompress $input [OPTS]
515 or die "IO::Uncompress::AnyUncompress failed: $AnyUncompressError\n";
517 Returns an C<IO::Uncompress::AnyUncompress> object on success and undef on failure.
518 The variable C<$AnyUncompressError> will contain an error message on failure.
520 If you are running Perl 5.005 or better the object, C<$z>, returned from
521 IO::Uncompress::AnyUncompress can be used exactly like an L<IO::File|IO::File> filehandle.
522 This means that all normal input file operations can be carried out with
523 C<$z>. For example, to read a line from a compressed file/buffer you can
524 use either of these forms
526 $line = $z->getline();
529 The mandatory parameter C<$input> is used to determine the source of the
530 compressed data. This parameter can take one of three forms.
536 If the C<$input> parameter is a scalar, it is assumed to be a filename. This
537 file will be opened for reading and the compressed data will be read from it.
541 If the C<$input> parameter is a filehandle, the compressed data will be
543 The string '-' can be used as an alias for standard input.
546 =item A scalar reference
548 If C<$input> is a scalar reference, the compressed data will be read from
553 =head2 Constructor Options
556 The option names defined below are case insensitive and can be optionally
557 prefixed by a '-'. So all of the following are valid
564 OPTS is a combination of the following options:
568 =item C<< AutoClose => 0|1 >>
570 This option is only valid when the C<$input> parameter is a filehandle. If
571 specified, and the value is true, it will result in the file being closed once
572 either the C<close> method is called or the IO::Uncompress::AnyUncompress object is
575 This parameter defaults to 0.
577 =item C<< MultiStream => 0|1 >>
581 Allows multiple concatenated compressed streams to be treated as a single
582 compressed stream. Decompression will stop once either the end of the
583 file/buffer is reached, an error is encountered (premature eof, corrupt
584 compressed data) or the end of a stream is not immediately followed by the
585 start of another stream.
587 This parameter defaults to 0.
590 =item C<< Prime => $string >>
592 This option will uncompress the contents of C<$string> before processing the
595 This option can be useful when the compressed data is embedded in another
596 file/data structure and it is not possible to work out where the compressed
597 data begins without having to read the first few bytes. If this is the
598 case, the uncompression can be I<primed> with these bytes using this
601 =item C<< Transparent => 0|1 >>
603 If this option is set and the input file or buffer is not compressed data,
604 the module will allow reading of it anyway.
606 This option defaults to 1.
608 =item C<< BlockSize => $num >>
610 When reading the compressed input data, IO::Uncompress::AnyUncompress will read it in
611 blocks of C<$num> bytes.
613 This option defaults to 4096.
615 =item C<< InputLength => $size >>
617 When present this option will limit the number of compressed bytes read
618 from the input file/buffer to C<$size>. This option can be used in the
619 situation where there is useful data directly after the compressed data
620 stream and you know beforehand the exact length of the compressed data
623 This option is mostly used when reading from a filehandle, in which case
624 the file pointer will be left pointing to the first byte directly after the
625 compressed data stream.
629 This option defaults to off.
631 =item C<< Append => 0|1 >>
633 This option controls what the C<read> method does with uncompressed data.
635 If set to 1, all uncompressed data will be appended to the output parameter
636 of the C<read> method.
638 If set to 0, the contents of the output parameter of the C<read> method
639 will be overwritten by the uncompressed data.
643 =item C<< Strict => 0|1 >>
647 This option controls whether the extra checks defined below are used when
648 carrying out the decompression. When Strict is on, the extra tests are
649 carried out, when Strict is off they are not.
651 The default for this option is off.
678 $status = $z->read($buffer)
680 Reads a block of compressed data (the size the the compressed block is
681 determined by the C<Buffer> option in the constructor), uncompresses it and
682 writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
683 set in the constructor, the uncompressed data will be appended to the
684 C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
686 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
687 or a negative number on error.
693 $status = $z->read($buffer, $length)
694 $status = $z->read($buffer, $length, $offset)
696 $status = read($z, $buffer, $length)
697 $status = read($z, $buffer, $length, $offset)
699 Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
701 The main difference between this form of the C<read> method and the
702 previous one, is that this one will attempt to return I<exactly> C<$length>
703 bytes. The only circumstances that this function will not is if end-of-file
704 or an IO error is encountered.
706 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
707 or a negative number on error.
714 $line = $z->getline()
719 This method fully supports the use of of the variable C<$/>
720 (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
721 determine what constitutes an end of line. Both paragraph mode and file
722 slurp mode are supported.
731 Read a single character.
737 $char = $z->ungetc($string)
746 $hdr = $z->getHeaderInfo();
747 @hdrs = $z->getHeaderInfo();
749 This method returns either a hash reference (in scalar context) or a list
750 or hash references (in array context) that contains information about each
751 of the header fields in the compressed data stream(s).
763 Returns the uncompressed file offset.
774 Returns true if the end of the compressed input stream has been reached.
780 $z->seek($position, $whence);
781 seek($z, $position, $whence);
786 Provides a sub-set of the C<seek> functionality, with the restriction
787 that it is only legal to seek forward in the input file/buffer.
788 It is a fatal error to attempt to seek backward.
792 The C<$whence> parameter takes one the usual values, namely SEEK_SET,
793 SEEK_CUR or SEEK_END.
795 Returns 1 on success, 0 on failure.
804 This is a noop provided for completeness.
810 Returns true if the object currently refers to a opened file/buffer.
814 my $prev = $z->autoflush()
815 my $prev = $z->autoflush(EXPR)
817 If the C<$z> object is associated with a file or a filehandle, this method
818 returns the current autoflush setting for the underlying filehandle. If
819 C<EXPR> is present, and is non-zero, it will enable flushing after every
820 write/print operation.
822 If C<$z> is associated with a buffer, this method has no effect and always
825 B<Note> that the special variable C<$|> B<cannot> be used to set or
826 retrieve the autoflush setting.
828 =head2 input_line_number
830 $z->input_line_number()
831 $z->input_line_number(EXPR)
835 Returns the current uncompressed line number. If C<EXPR> is present it has
836 the effect of setting the line number. Note that setting the line number
837 does not change the current position within the file/buffer being read.
839 The contents of C<$/> are used to to determine what constitutes a line
849 If the C<$z> object is associated with a file or a filehandle, this method
850 will return the underlying file descriptor.
852 If the C<$z> object is is associated with a buffer, this method will
862 Closes the output file/buffer.
866 For most versions of Perl this method will be automatically invoked if
867 the IO::Uncompress::AnyUncompress object is destroyed (either explicitly or by the
868 variable with the reference to the object going out of scope). The
869 exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
870 these cases, the C<close> method will be called automatically, but
871 not until global destruction of all live objects when the program is
874 Therefore, if you want your scripts to be able to run on all versions
875 of Perl, you should call C<close> explicitly and not rely on automatic
878 Returns true on success, otherwise 0.
880 If the C<AutoClose> option has been enabled when the IO::Uncompress::AnyUncompress
881 object was created, and the object is associated with a file, the
882 underlying file will also be closed.
891 my $status = $z->nextStream();
893 Skips to the next compressed data stream in the input file/buffer. If a new
894 compressed data stream is found, the eof marker will be cleared, C<$.> will
897 Returns 1 if a new stream was found, 0 if none was found, and -1 if an
898 error was encountered.
904 my $data = $z->trailingData();
906 Returns any data that
910 No symbolic constants are required by this IO::Uncompress::AnyUncompress at present.
916 Imports C<anyuncompress> and C<$AnyUncompressError>.
919 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
930 L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Uncompress::AnyInflate>
932 L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
934 L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
935 L<Archive::Tar|Archive::Tar>,
944 This module was written by Paul Marquess, F<pmqs@cpan.org>.
948 =head1 MODIFICATION HISTORY
950 See the Changes file.
952 =head1 COPYRIGHT AND LICENSE
954 Copyright (c) 2005-2006 Paul Marquess. All rights reserved.
956 This program is free software; you can redistribute it and/or
957 modify it under the same terms as Perl itself.