1 package IO::Uncompress::AnyUncompress ;
7 use IO::Compress::Base::Common 2.021 qw(createSelfTiedObject);
9 use IO::Uncompress::Base 2.021 ;
14 our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $AnyUncompressError);
17 $AnyUncompressError = '';
19 @ISA = qw( Exporter IO::Uncompress::Base );
20 @EXPORT_OK = qw( $AnyUncompressError anyuncompress ) ;
21 %EXPORT_TAGS = %IO::Uncompress::Base::DEFLATE_CONSTANTS ;
22 push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
23 Exporter::export_ok_tags('all');
25 # TODO - allow the user to pick a set of the three formats to allow
26 # or just assume want to auto-detect any of the three formats.
30 eval ' use IO::Uncompress::Adapter::Inflate 2.021 ;';
31 eval ' use IO::Uncompress::Adapter::Bunzip2 2.021 ;';
32 eval ' use IO::Uncompress::Adapter::LZO 2.021 ;';
33 eval ' use IO::Uncompress::Adapter::Lzf 2.021 ;';
34 eval ' use IO::Uncompress::Adapter::UnLzma 2.020 ;';
35 eval ' use IO::Uncompress::Adapter::UnXz 2.020 ;';
37 eval ' use IO::Uncompress::Bunzip2 2.021 ;';
38 eval ' use IO::Uncompress::UnLzop 2.021 ;';
39 eval ' use IO::Uncompress::Gunzip 2.021 ;';
40 eval ' use IO::Uncompress::Inflate 2.021 ;';
41 eval ' use IO::Uncompress::RawInflate 2.021 ;';
42 eval ' use IO::Uncompress::Unzip 2.021 ;';
43 eval ' use IO::Uncompress::UnLzf 2.021 ;';
44 eval ' use IO::Uncompress::UnLzma 2.018 ;';
45 eval ' use IO::Uncompress::UnXz 2.018 ;';
51 my $obj = createSelfTiedObject($class, \$AnyUncompressError);
52 $obj->_create(undef, 0, @_);
57 my $obj = createSelfTiedObject(undef, \$AnyUncompressError);
58 return $obj->_inf(@_) ;
63 use IO::Compress::Base::Common 2.021 qw(:Parse);
64 return ( 'RawInflate' => [1, 1, Parse_boolean, 0] ) ;
72 # any always needs both crc32 and adler32
73 $got->value('CRC32' => 1);
74 $got->value('ADLER32' => 1);
87 if (defined $IO::Uncompress::RawInflate::VERSION )
89 my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Inflate::mkUncompObject();
91 return $self->saveErrorString(undef, $errstr, $errno)
94 *$self->{Uncomp} = $obj;
96 my @possible = qw( Inflate Gunzip Unzip );
97 unshift @possible, 'RawInflate'
98 if $got->value('RawInflate');
100 $magic = $self->ckMagic( @possible );
103 *$self->{Info} = $self->readHeader($magic)
110 # if (defined $IO::Uncompress::UnLzma::VERSION )
112 # my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::UnLzma::mkUncompObject();
114 # return $self->saveErrorString(undef, $errstr, $errno)
117 # *$self->{Uncomp} = $obj;
119 # my @possible = qw( UnLzma );
120 # #unshift @possible, 'RawInflate'
121 # # if $got->value('RawInflate');
123 # if ( *$self->{Info} = $self->ckMagic( @possible ))
129 if (defined $IO::Uncompress::UnXz::VERSION and
130 $magic = $self->ckMagic('UnXz')) {
131 *$self->{Info} = $self->readHeader($magic)
134 my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::UnXz::mkUncompObject();
136 return $self->saveErrorString(undef, $errstr, $errno)
139 *$self->{Uncomp} = $obj;
144 if (defined $IO::Uncompress::Bunzip2::VERSION and
145 $magic = $self->ckMagic('Bunzip2')) {
146 *$self->{Info} = $self->readHeader($magic)
149 my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Bunzip2::mkUncompObject();
151 return $self->saveErrorString(undef, $errstr, $errno)
154 *$self->{Uncomp} = $obj;
159 if (defined $IO::Uncompress::UnLzop::VERSION and
160 $magic = $self->ckMagic('UnLzop')) {
162 *$self->{Info} = $self->readHeader($magic)
165 my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::LZO::mkUncompObject();
167 return $self->saveErrorString(undef, $errstr, $errno)
170 *$self->{Uncomp} = $obj;
175 if (defined $IO::Uncompress::UnLzf::VERSION and
176 $magic = $self->ckMagic('UnLzf')) {
178 *$self->{Info} = $self->readHeader($magic)
181 my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Lzf::mkUncompObject();
183 return $self->saveErrorString(undef, $errstr, $errno)
186 *$self->{Uncomp} = $obj;
201 my $keep = ref $self ;
202 for my $class ( map { "IO::Uncompress::$_" } @names)
204 bless $self => $class;
205 my $magic = $self->ckMagic();
209 #bless $self => $class;
213 $self->pushBack(*$self->{HeaderPending}) ;
214 *$self->{HeaderPending} = '' ;
217 bless $self => $keep;
228 IO::Uncompress::AnyUncompress - Uncompress gzip, zip, bzip2 or lzop file/buffer
232 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
234 my $status = anyuncompress $input => $output [,OPTS]
235 or die "anyuncompress failed: $AnyUncompressError\n";
237 my $z = new IO::Uncompress::AnyUncompress $input [OPTS]
238 or die "anyuncompress failed: $AnyUncompressError\n";
240 $status = $z->read($buffer)
241 $status = $z->read($buffer, $length)
242 $status = $z->read($buffer, $length, $offset)
243 $line = $z->getline()
248 $data = $z->trailingData()
249 $status = $z->nextStream()
250 $data = $z->getHeaderInfo()
252 $z->seek($position, $whence)
258 $AnyUncompressError ;
264 read($z, $buffer, $length);
265 read($z, $buffer, $length, $offset);
267 seek($z, $position, $whence)
275 This module provides a Perl interface that allows the reading of
276 files/buffers that have been compressed with a variety of compression
279 The formats supported are:
285 =item RFC 1951 (optionally)
287 =item gzip (RFC 1952)
299 The module will auto-detect which, if any, of the supported
300 compression formats is being used.
302 =head1 Functional Interface
304 A top-level function, C<anyuncompress>, is provided to carry out
305 "one-shot" uncompression between buffers and/or files. For finer
306 control over the uncompression process, see the L</"OO Interface">
309 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
311 anyuncompress $input => $output [,OPTS]
312 or die "anyuncompress failed: $AnyUncompressError\n";
314 The functional interface needs Perl5.005 or better.
316 =head2 anyuncompress $input => $output [, OPTS]
318 C<anyuncompress> expects at least two parameters, C<$input> and C<$output>.
320 =head3 The C<$input> parameter
322 The parameter, C<$input>, is used to define the source of
325 It can take one of the following forms:
331 If the C<$input> parameter is a simple scalar, it is assumed to be a
332 filename. This file will be opened for reading and the input data
333 will be read from it.
337 If the C<$input> parameter is a filehandle, the input data will be
339 The string '-' can be used as an alias for standard input.
341 =item A scalar reference
343 If C<$input> is a scalar reference, the input data will be read
346 =item An array reference
348 If C<$input> is an array reference, each element in the array must be a
351 The input data will be read from each file in turn.
353 The complete array will be walked to ensure that it only
354 contains valid filenames before any data is uncompressed.
356 =item An Input FileGlob string
358 If C<$input> is a string that is delimited by the characters "<" and ">"
359 C<anyuncompress> will assume that it is an I<input fileglob string>. The
360 input is the list of files that match the fileglob.
362 If the fileglob does not match any files ...
364 See L<File::GlobMapper|File::GlobMapper> for more details.
368 If the C<$input> parameter is any other type, C<undef> will be returned.
370 =head3 The C<$output> parameter
372 The parameter C<$output> is used to control the destination of the
373 uncompressed data. This parameter can take one of these forms.
379 If the C<$output> parameter is a simple scalar, it is assumed to be a
380 filename. This file will be opened for writing and the uncompressed
381 data will be written to it.
385 If the C<$output> parameter is a filehandle, the uncompressed data
386 will be written to it.
387 The string '-' can be used as an alias for standard output.
389 =item A scalar reference
391 If C<$output> is a scalar reference, the uncompressed data will be
392 stored in C<$$output>.
394 =item An Array Reference
396 If C<$output> is an array reference, the uncompressed data will be
397 pushed onto the array.
399 =item An Output FileGlob
401 If C<$output> is a string that is delimited by the characters "<" and ">"
402 C<anyuncompress> will assume that it is an I<output fileglob string>. The
403 output is the list of files that match the fileglob.
405 When C<$output> is an fileglob string, C<$input> must also be a fileglob
406 string. Anything else is an error.
410 If the C<$output> parameter is any other type, C<undef> will be returned.
414 When C<$input> maps to multiple compressed files/buffers and C<$output> is
415 a single file/buffer, after uncompression C<$output> will contain a
416 concatenation of all the uncompressed data from each of the input
419 =head2 Optional Parameters
421 Unless specified below, the optional parameters for C<anyuncompress>,
422 C<OPTS>, are the same as those used with the OO interface defined in the
423 L</"Constructor Options"> section below.
427 =item C<< AutoClose => 0|1 >>
429 This option applies to any input or output data streams to
430 C<anyuncompress> that are filehandles.
432 If C<AutoClose> is specified, and the value is true, it will result in all
433 input and/or output filehandles being closed once C<anyuncompress> has
436 This parameter defaults to 0.
438 =item C<< BinModeOut => 0|1 >>
440 When writing to a file or filehandle, set C<binmode> before writing to the
445 =item C<< Append => 0|1 >>
449 =item C<< MultiStream => 0|1 >>
451 If the input file/buffer contains multiple compressed data streams, this
452 option will uncompress the whole lot as a single data stream.
456 =item C<< TrailingData => $scalar >>
458 Returns the data, if any, that is present immediately after the compressed
459 data stream once uncompression is complete.
461 This option can be used when there is useful information immediately
462 following the compressed data stream, and you don't know the length of the
463 compressed data stream.
465 If the input is a buffer, C<trailingData> will return everything from the
466 end of the compressed data stream to the end of the buffer.
468 If the input is a filehandle, C<trailingData> will return the data that is
469 left in the filehandle input buffer once the end of the compressed data
470 stream has been reached. You can then use the filehandle to read the rest
473 Don't bother using C<trailingData> if the input is a filename.
475 If you know the length of the compressed data stream before you start
476 uncompressing, you can avoid having to use C<trailingData> by setting the
477 C<InputLength> option.
483 To read the contents of the file C<file1.txt.Compressed> and write the
484 uncompressed data to the file C<file1.txt>.
488 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
490 my $input = "file1.txt.Compressed";
491 my $output = "file1.txt";
492 anyuncompress $input => $output
493 or die "anyuncompress failed: $AnyUncompressError\n";
495 To read from an existing Perl filehandle, C<$input>, and write the
496 uncompressed data to a buffer, C<$buffer>.
500 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
503 my $input = new IO::File "<file1.txt.Compressed"
504 or die "Cannot open 'file1.txt.Compressed': $!\n" ;
506 anyuncompress $input => \$buffer
507 or die "anyuncompress failed: $AnyUncompressError\n";
509 To uncompress all files in the directory "/my/home" that match "*.txt.Compressed" and store the compressed data in the same directory
513 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
515 anyuncompress '</my/home/*.txt.Compressed>' => '</my/home/#1.txt>'
516 or die "anyuncompress failed: $AnyUncompressError\n";
518 and if you want to compress each file one at a time, this will do the trick
522 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
524 for my $input ( glob "/my/home/*.txt.Compressed" )
527 $output =~ s/.Compressed// ;
528 anyuncompress $input => $output
529 or die "Error compressing '$input': $AnyUncompressError\n";
536 The format of the constructor for IO::Uncompress::AnyUncompress is shown below
538 my $z = new IO::Uncompress::AnyUncompress $input [OPTS]
539 or die "IO::Uncompress::AnyUncompress failed: $AnyUncompressError\n";
541 Returns an C<IO::Uncompress::AnyUncompress> object on success and undef on failure.
542 The variable C<$AnyUncompressError> will contain an error message on failure.
544 If you are running Perl 5.005 or better the object, C<$z>, returned from
545 IO::Uncompress::AnyUncompress can be used exactly like an L<IO::File|IO::File> filehandle.
546 This means that all normal input file operations can be carried out with
547 C<$z>. For example, to read a line from a compressed file/buffer you can
548 use either of these forms
550 $line = $z->getline();
553 The mandatory parameter C<$input> is used to determine the source of the
554 compressed data. This parameter can take one of three forms.
560 If the C<$input> parameter is a scalar, it is assumed to be a filename. This
561 file will be opened for reading and the compressed data will be read from it.
565 If the C<$input> parameter is a filehandle, the compressed data will be
567 The string '-' can be used as an alias for standard input.
569 =item A scalar reference
571 If C<$input> is a scalar reference, the compressed data will be read from
576 =head2 Constructor Options
578 The option names defined below are case insensitive and can be optionally
579 prefixed by a '-'. So all of the following are valid
586 OPTS is a combination of the following options:
590 =item C<< AutoClose => 0|1 >>
592 This option is only valid when the C<$input> parameter is a filehandle. If
593 specified, and the value is true, it will result in the file being closed once
594 either the C<close> method is called or the IO::Uncompress::AnyUncompress object is
597 This parameter defaults to 0.
599 =item C<< MultiStream => 0|1 >>
601 Allows multiple concatenated compressed streams to be treated as a single
602 compressed stream. Decompression will stop once either the end of the
603 file/buffer is reached, an error is encountered (premature eof, corrupt
604 compressed data) or the end of a stream is not immediately followed by the
605 start of another stream.
607 This parameter defaults to 0.
609 =item C<< Prime => $string >>
611 This option will uncompress the contents of C<$string> before processing the
614 This option can be useful when the compressed data is embedded in another
615 file/data structure and it is not possible to work out where the compressed
616 data begins without having to read the first few bytes. If this is the
617 case, the uncompression can be I<primed> with these bytes using this
620 =item C<< Transparent => 0|1 >>
622 If this option is set and the input file/buffer is not compressed data,
623 the module will allow reading of it anyway.
625 In addition, if the input file/buffer does contain compressed data and
626 there is non-compressed data immediately following it, setting this option
627 will make this module treat the whole file/bufffer as a single data stream.
629 This option defaults to 1.
631 =item C<< BlockSize => $num >>
633 When reading the compressed input data, IO::Uncompress::AnyUncompress will read it in
634 blocks of C<$num> bytes.
636 This option defaults to 4096.
638 =item C<< InputLength => $size >>
640 When present this option will limit the number of compressed bytes read
641 from the input file/buffer to C<$size>. This option can be used in the
642 situation where there is useful data directly after the compressed data
643 stream and you know beforehand the exact length of the compressed data
646 This option is mostly used when reading from a filehandle, in which case
647 the file pointer will be left pointing to the first byte directly after the
648 compressed data stream.
650 This option defaults to off.
652 =item C<< Append => 0|1 >>
654 This option controls what the C<read> method does with uncompressed data.
656 If set to 1, all uncompressed data will be appended to the output parameter
657 of the C<read> method.
659 If set to 0, the contents of the output parameter of the C<read> method
660 will be overwritten by the uncompressed data.
664 =item C<< Strict => 0|1 >>
666 This option controls whether the extra checks defined below are used when
667 carrying out the decompression. When Strict is on, the extra tests are
668 carried out, when Strict is off they are not.
670 The default for this option is off.
672 =item C<< RawInflate => 0|1 >>
674 When auto-detecting the compressed format, try to test for raw-deflate (RFC
675 1951) content using the C<IO::Uncompress::RawInflate> module.
677 The reason this is not default behaviour is because RFC 1951 content can
678 only be detected by attempting to uncompress it. This process is error
679 prone and can result is false positives.
695 $status = $z->read($buffer)
697 Reads a block of compressed data (the size the the compressed block is
698 determined by the C<Buffer> option in the constructor), uncompresses it and
699 writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
700 set in the constructor, the uncompressed data will be appended to the
701 C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
703 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
704 or a negative number on error.
710 $status = $z->read($buffer, $length)
711 $status = $z->read($buffer, $length, $offset)
713 $status = read($z, $buffer, $length)
714 $status = read($z, $buffer, $length, $offset)
716 Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
718 The main difference between this form of the C<read> method and the
719 previous one, is that this one will attempt to return I<exactly> C<$length>
720 bytes. The only circumstances that this function will not is if end-of-file
721 or an IO error is encountered.
723 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
724 or a negative number on error.
730 $line = $z->getline()
735 This method fully supports the use of of the variable C<$/> (or
736 C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
737 determine what constitutes an end of line. Paragraph mode, record mode and
738 file slurp mode are all supported.
746 Read a single character.
752 $char = $z->ungetc($string)
758 $hdr = $z->getHeaderInfo();
759 @hdrs = $z->getHeaderInfo();
761 This method returns either a hash reference (in scalar context) or a list
762 or hash references (in array context) that contains information about each
763 of the header fields in the compressed data stream(s).
772 Returns the uncompressed file offset.
781 Returns true if the end of the compressed input stream has been reached.
785 $z->seek($position, $whence);
786 seek($z, $position, $whence);
788 Provides a sub-set of the C<seek> functionality, with the restriction
789 that it is only legal to seek forward in the input file/buffer.
790 It is a fatal error to attempt to seek backward.
792 The C<$whence> parameter takes one the usual values, namely SEEK_SET,
793 SEEK_CUR or SEEK_END.
795 Returns 1 on success, 0 on failure.
804 This is a noop provided for completeness.
810 Returns true if the object currently refers to a opened file/buffer.
814 my $prev = $z->autoflush()
815 my $prev = $z->autoflush(EXPR)
817 If the C<$z> object is associated with a file or a filehandle, this method
818 returns the current autoflush setting for the underlying filehandle. If
819 C<EXPR> is present, and is non-zero, it will enable flushing after every
820 write/print operation.
822 If C<$z> is associated with a buffer, this method has no effect and always
825 B<Note> that the special variable C<$|> B<cannot> be used to set or
826 retrieve the autoflush setting.
828 =head2 input_line_number
830 $z->input_line_number()
831 $z->input_line_number(EXPR)
833 Returns the current uncompressed line number. If C<EXPR> is present it has
834 the effect of setting the line number. Note that setting the line number
835 does not change the current position within the file/buffer being read.
837 The contents of C<$/> are used to to determine what constitutes a line
845 If the C<$z> object is associated with a file or a filehandle, C<fileno>
846 will return the underlying file descriptor. Once the C<close> method is
847 called C<fileno> will return C<undef>.
849 If the C<$z> object is is associated with a buffer, this method will return
857 Closes the output file/buffer.
859 For most versions of Perl this method will be automatically invoked if
860 the IO::Uncompress::AnyUncompress object is destroyed (either explicitly or by the
861 variable with the reference to the object going out of scope). The
862 exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
863 these cases, the C<close> method will be called automatically, but
864 not until global destruction of all live objects when the program is
867 Therefore, if you want your scripts to be able to run on all versions
868 of Perl, you should call C<close> explicitly and not rely on automatic
871 Returns true on success, otherwise 0.
873 If the C<AutoClose> option has been enabled when the IO::Uncompress::AnyUncompress
874 object was created, and the object is associated with a file, the
875 underlying file will also be closed.
881 my $status = $z->nextStream();
883 Skips to the next compressed data stream in the input file/buffer. If a new
884 compressed data stream is found, the eof marker will be cleared and C<$.>
887 Returns 1 if a new stream was found, 0 if none was found, and -1 if an
888 error was encountered.
894 my $data = $z->trailingData();
896 Returns the data, if any, that is present immediately after the compressed
897 data stream once uncompression is complete. It only makes sense to call
898 this method once the end of the compressed data stream has been
901 This option can be used when there is useful information immediately
902 following the compressed data stream, and you don't know the length of the
903 compressed data stream.
905 If the input is a buffer, C<trailingData> will return everything from the
906 end of the compressed data stream to the end of the buffer.
908 If the input is a filehandle, C<trailingData> will return the data that is
909 left in the filehandle input buffer once the end of the compressed data
910 stream has been reached. You can then use the filehandle to read the rest
913 Don't bother using C<trailingData> if the input is a filename.
915 If you know the length of the compressed data stream before you start
916 uncompressing, you can avoid having to use C<trailingData> by setting the
917 C<InputLength> option in the constructor.
921 No symbolic constants are required by this IO::Uncompress::AnyUncompress at present.
927 Imports C<anyuncompress> and C<$AnyUncompressError>.
930 use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
938 L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>
940 L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
942 L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
943 L<Archive::Tar|Archive::Tar>,
948 This module was written by Paul Marquess, F<pmqs@cpan.org>.
950 =head1 MODIFICATION HISTORY
952 See the Changes file.
954 =head1 COPYRIGHT AND LICENSE
956 Copyright (c) 2005-2009 Paul Marquess. All rights reserved.
958 This program is free software; you can redistribute it and/or
959 modify it under the same terms as Perl itself.