1 package IO::Uncompress::AnyInflate ;
3 # for RFC1950, RFC1951 or RFC1952
9 use IO::Compress::Base::Common qw(createSelfTiedObject);
11 use IO::Uncompress::Adapter::Inflate ();
14 use IO::Uncompress::Base ;
15 use IO::Uncompress::Gunzip ;
16 use IO::Uncompress::Inflate ;
17 use IO::Uncompress::RawInflate ;
18 use IO::Uncompress::Unzip ;
22 our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $AnyInflateError);
24 $VERSION = '2.000_13';
25 $AnyInflateError = '';
27 @ISA = qw( Exporter IO::Uncompress::Base );
28 @EXPORT_OK = qw( $AnyInflateError anyinflate ) ;
29 %EXPORT_TAGS = %IO::Uncompress::Base::DEFLATE_CONSTANTS ;
30 push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
31 Exporter::export_ok_tags('all');
33 # TODO - allow the user to pick a set of the three formats to allow
34 # or just assume want to auto-detect any of the three formats.
39 my $obj = createSelfTiedObject($class, \$AnyInflateError);
40 $obj->_create(undef, 0, @_);
45 my $obj = createSelfTiedObject(undef, \$AnyInflateError);
46 return $obj->_inf(@_) ;
51 use IO::Compress::Base::Common qw(:Parse);
52 return ( 'RawInflate' => [1, 1, Parse_boolean, 0] ) ;
60 # any always needs both crc32 and adler32
61 $got->value('CRC32' => 1);
62 $got->value('ADLER32' => 1);
73 my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Inflate::mkUncompObject();
75 return $self->saveErrorString(undef, $errstr, $errno)
78 *$self->{Uncomp} = $obj;
80 my @possible = qw( Inflate Gunzip Unzip );
81 unshift @possible, 'RawInflate'
82 if 1 || $got->value('RawInflate');
84 my $magic = $self->ckMagic( @possible );
87 *$self->{Info} = $self->readHeader($magic)
103 my $keep = ref $self ;
104 for my $class ( map { "IO::Uncompress::$_" } @names)
106 bless $self => $class;
107 my $magic = $self->ckMagic();
111 #bless $self => $class;
115 $self->pushBack(*$self->{HeaderPending}) ;
116 *$self->{HeaderPending} = '' ;
119 bless $self => $keep;
131 IO::Uncompress::AnyInflate - Uncompress zlib-based (zip, gzip) file/buffer
136 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
138 my $status = anyinflate $input => $output [,OPTS]
139 or die "anyinflate failed: $AnyInflateError\n";
141 my $z = new IO::Uncompress::AnyInflate $input [OPTS]
142 or die "anyinflate failed: $AnyInflateError\n";
144 $status = $z->read($buffer)
145 $status = $z->read($buffer, $length)
146 $status = $z->read($buffer, $length, $offset)
147 $line = $z->getline()
152 $status = $z->inflateSync()
154 $data = $z->trailingData()
155 $status = $z->nextStream()
156 $data = $z->getHeaderInfo()
158 $z->seek($position, $whence)
170 read($z, $buffer, $length);
171 read($z, $buffer, $length, $offset);
173 seek($z, $position, $whence)
184 B<WARNING -- This is a Beta release>.
188 =item * DO NOT use in production code.
190 =item * The documentation is incomplete in places.
192 =item * Parts of the interface defined here are tentative.
194 =item * Please report any problems you find.
201 This module provides a Perl interface that allows the reading of
202 files/buffers that have been compressed in a number of formats that use the
203 zlib compression library.
205 The formats supported are
213 =item gzip (RFC 1952)
219 The module will auto-detect which, if any, of the supported
220 compression formats is being used.
227 =head1 Functional Interface
229 A top-level function, C<anyinflate>, is provided to carry out
230 "one-shot" uncompression between buffers and/or files. For finer
231 control over the uncompression process, see the L</"OO Interface">
234 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
236 anyinflate $input => $output [,OPTS]
237 or die "anyinflate failed: $AnyInflateError\n";
241 The functional interface needs Perl5.005 or better.
244 =head2 anyinflate $input => $output [, OPTS]
247 C<anyinflate> expects at least two parameters, C<$input> and C<$output>.
249 =head3 The C<$input> parameter
251 The parameter, C<$input>, is used to define the source of
254 It can take one of the following forms:
260 If the C<$input> parameter is a simple scalar, it is assumed to be a
261 filename. This file will be opened for reading and the input data
262 will be read from it.
266 If the C<$input> parameter is a filehandle, the input data will be
268 The string '-' can be used as an alias for standard input.
270 =item A scalar reference
272 If C<$input> is a scalar reference, the input data will be read
275 =item An array reference
277 If C<$input> is an array reference, each element in the array must be a
280 The input data will be read from each file in turn.
282 The complete array will be walked to ensure that it only
283 contains valid filenames before any data is uncompressed.
287 =item An Input FileGlob string
289 If C<$input> is a string that is delimited by the characters "<" and ">"
290 C<anyinflate> will assume that it is an I<input fileglob string>. The
291 input is the list of files that match the fileglob.
293 If the fileglob does not match any files ...
295 See L<File::GlobMapper|File::GlobMapper> for more details.
300 If the C<$input> parameter is any other type, C<undef> will be returned.
304 =head3 The C<$output> parameter
306 The parameter C<$output> is used to control the destination of the
307 uncompressed data. This parameter can take one of these forms.
313 If the C<$output> parameter is a simple scalar, it is assumed to be a
314 filename. This file will be opened for writing and the uncompressed
315 data will be written to it.
319 If the C<$output> parameter is a filehandle, the uncompressed data
320 will be written to it.
321 The string '-' can be used as an alias for standard output.
324 =item A scalar reference
326 If C<$output> is a scalar reference, the uncompressed data will be
327 stored in C<$$output>.
331 =item An Array Reference
333 If C<$output> is an array reference, the uncompressed data will be
334 pushed onto the array.
336 =item An Output FileGlob
338 If C<$output> is a string that is delimited by the characters "<" and ">"
339 C<anyinflate> will assume that it is an I<output fileglob string>. The
340 output is the list of files that match the fileglob.
342 When C<$output> is an fileglob string, C<$input> must also be a fileglob
343 string. Anything else is an error.
347 If the C<$output> parameter is any other type, C<undef> will be returned.
354 When C<$input> maps to multiple compressed files/buffers and C<$output> is
355 a single file/buffer, after uncompression C<$output> will contain a
356 concatenation of all the uncompressed data from each of the input
363 =head2 Optional Parameters
365 Unless specified below, the optional parameters for C<anyinflate>,
366 C<OPTS>, are the same as those used with the OO interface defined in the
367 L</"Constructor Options"> section below.
371 =item C<< AutoClose => 0|1 >>
373 This option applies to any input or output data streams to
374 C<anyinflate> that are filehandles.
376 If C<AutoClose> is specified, and the value is true, it will result in all
377 input and/or output filehandles being closed once C<anyinflate> has
380 This parameter defaults to 0.
383 =item C<< BinModeOut => 0|1 >>
385 When writing to a file or filehandle, set C<binmode> before writing to the
394 =item C<< Append => 0|1 >>
398 =item C<< MultiStream => 0|1 >>
400 If the input file/buffer contains multiple compressed data streams, this
401 option will uncompress the whole lot as a single data stream.
414 To read the contents of the file C<file1.txt.Compressed> and write the
415 compressed data to the file C<file1.txt>.
419 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
421 my $input = "file1.txt.Compressed";
422 my $output = "file1.txt";
423 anyinflate $input => $output
424 or die "anyinflate failed: $AnyInflateError\n";
427 To read from an existing Perl filehandle, C<$input>, and write the
428 uncompressed data to a buffer, C<$buffer>.
432 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
435 my $input = new IO::File "<file1.txt.Compressed"
436 or die "Cannot open 'file1.txt.Compressed': $!\n" ;
438 anyinflate $input => \$buffer
439 or die "anyinflate failed: $AnyInflateError\n";
441 To uncompress all files in the directory "/my/home" that match "*.txt.Compressed" and store the compressed data in the same directory
445 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
447 anyinflate '</my/home/*.txt.Compressed>' => '</my/home/#1.txt>'
448 or die "anyinflate failed: $AnyInflateError\n";
450 and if you want to compress each file one at a time, this will do the trick
454 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
456 for my $input ( glob "/my/home/*.txt.Compressed" )
459 $output =~ s/.Compressed// ;
460 anyinflate $input => $output
461 or die "Error compressing '$input': $AnyInflateError\n";
468 The format of the constructor for IO::Uncompress::AnyInflate is shown below
471 my $z = new IO::Uncompress::AnyInflate $input [OPTS]
472 or die "IO::Uncompress::AnyInflate failed: $AnyInflateError\n";
474 Returns an C<IO::Uncompress::AnyInflate> object on success and undef on failure.
475 The variable C<$AnyInflateError> will contain an error message on failure.
477 If you are running Perl 5.005 or better the object, C<$z>, returned from
478 IO::Uncompress::AnyInflate can be used exactly like an L<IO::File|IO::File> filehandle.
479 This means that all normal input file operations can be carried out with
480 C<$z>. For example, to read a line from a compressed file/buffer you can
481 use either of these forms
483 $line = $z->getline();
486 The mandatory parameter C<$input> is used to determine the source of the
487 compressed data. This parameter can take one of three forms.
493 If the C<$input> parameter is a scalar, it is assumed to be a filename. This
494 file will be opened for reading and the compressed data will be read from it.
498 If the C<$input> parameter is a filehandle, the compressed data will be
500 The string '-' can be used as an alias for standard input.
503 =item A scalar reference
505 If C<$input> is a scalar reference, the compressed data will be read from
510 =head2 Constructor Options
513 The option names defined below are case insensitive and can be optionally
514 prefixed by a '-'. So all of the following are valid
521 OPTS is a combination of the following options:
525 =item C<< AutoClose => 0|1 >>
527 This option is only valid when the C<$input> parameter is a filehandle. If
528 specified, and the value is true, it will result in the file being closed once
529 either the C<close> method is called or the IO::Uncompress::AnyInflate object is
532 This parameter defaults to 0.
534 =item C<< MultiStream => 0|1 >>
538 Allows multiple concatenated compressed streams to be treated as a single
539 compressed stream. Decompression will stop once either the end of the
540 file/buffer is reached, an error is encountered (premature eof, corrupt
541 compressed data) or the end of a stream is not immediately followed by the
542 start of another stream.
544 This parameter defaults to 0.
547 =item C<< Prime => $string >>
549 This option will uncompress the contents of C<$string> before processing the
552 This option can be useful when the compressed data is embedded in another
553 file/data structure and it is not possible to work out where the compressed
554 data begins without having to read the first few bytes. If this is the
555 case, the uncompression can be I<primed> with these bytes using this
558 =item C<< Transparent => 0|1 >>
560 If this option is set and the input file or buffer is not compressed data,
561 the module will allow reading of it anyway.
563 This option defaults to 1.
565 =item C<< BlockSize => $num >>
567 When reading the compressed input data, IO::Uncompress::AnyInflate will read it in
568 blocks of C<$num> bytes.
570 This option defaults to 4096.
572 =item C<< InputLength => $size >>
574 When present this option will limit the number of compressed bytes read
575 from the input file/buffer to C<$size>. This option can be used in the
576 situation where there is useful data directly after the compressed data
577 stream and you know beforehand the exact length of the compressed data
580 This option is mostly used when reading from a filehandle, in which case
581 the file pointer will be left pointing to the first byte directly after the
582 compressed data stream.
586 This option defaults to off.
588 =item C<< Append => 0|1 >>
590 This option controls what the C<read> method does with uncompressed data.
592 If set to 1, all uncompressed data will be appended to the output parameter
593 of the C<read> method.
595 If set to 0, the contents of the output parameter of the C<read> method
596 will be overwritten by the uncompressed data.
600 =item C<< Strict => 0|1 >>
604 This option controls whether the extra checks defined below are used when
605 carrying out the decompression. When Strict is on, the extra tests are
606 carried out, when Strict is off they are not.
608 The default for this option is off.
611 If the input is an RFC 1950 data stream, the following will be checked:
620 The ADLER32 checksum field must be present.
624 The value of the ADLER32 field read must match the adler32 value of the
625 uncompressed data actually contained in the file.
631 If the input is a gzip (RFC 1952) data stream, the following will be checked:
640 If the FHCRC bit is set in the gzip FLG header byte, the CRC16 bytes in the
641 header must match the crc16 value of the gzip header actually read.
645 If the gzip header contains a name field (FNAME) it consists solely of ISO
650 If the gzip header contains a comment field (FCOMMENT) it consists solely
651 of ISO 8859-1 characters plus line-feed.
655 If the gzip FEXTRA header field is present it must conform to the sub-field
656 structure as defined in RFC 1952.
660 The CRC32 and ISIZE trailer fields must be present.
664 The value of the CRC32 field read must match the crc32 value of the
665 uncompressed data actually contained in the gzip file.
669 The value of the ISIZE fields read must match the length of the
670 uncompressed data actually read from the file.
679 =item C<< ParseExtra => 0|1 >>
681 If the gzip FEXTRA header field is present and this option is set, it will
682 force the module to check that it conforms to the sub-field structure as
685 If the C<Strict> is on it will automatically enable this option.
705 $status = $z->read($buffer)
707 Reads a block of compressed data (the size the the compressed block is
708 determined by the C<Buffer> option in the constructor), uncompresses it and
709 writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
710 set in the constructor, the uncompressed data will be appended to the
711 C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
713 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
714 or a negative number on error.
720 $status = $z->read($buffer, $length)
721 $status = $z->read($buffer, $length, $offset)
723 $status = read($z, $buffer, $length)
724 $status = read($z, $buffer, $length, $offset)
726 Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
728 The main difference between this form of the C<read> method and the
729 previous one, is that this one will attempt to return I<exactly> C<$length>
730 bytes. The only circumstances that this function will not is if end-of-file
731 or an IO error is encountered.
733 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
734 or a negative number on error.
741 $line = $z->getline()
746 This method fully supports the use of of the variable C<$/>
747 (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
748 determine what constitutes an end of line. Both paragraph mode and file
749 slurp mode are supported.
758 Read a single character.
764 $char = $z->ungetc($string)
772 $status = $z->inflateSync()
781 $hdr = $z->getHeaderInfo();
782 @hdrs = $z->getHeaderInfo();
784 This method returns either a hash reference (in scalar context) or a list
785 or hash references (in array context) that contains information about each
786 of the header fields in the compressed data stream(s).
798 Returns the uncompressed file offset.
809 Returns true if the end of the compressed input stream has been reached.
815 $z->seek($position, $whence);
816 seek($z, $position, $whence);
821 Provides a sub-set of the C<seek> functionality, with the restriction
822 that it is only legal to seek forward in the input file/buffer.
823 It is a fatal error to attempt to seek backward.
827 The C<$whence> parameter takes one the usual values, namely SEEK_SET,
828 SEEK_CUR or SEEK_END.
830 Returns 1 on success, 0 on failure.
839 This is a noop provided for completeness.
845 Returns true if the object currently refers to a opened file/buffer.
849 my $prev = $z->autoflush()
850 my $prev = $z->autoflush(EXPR)
852 If the C<$z> object is associated with a file or a filehandle, this method
853 returns the current autoflush setting for the underlying filehandle. If
854 C<EXPR> is present, and is non-zero, it will enable flushing after every
855 write/print operation.
857 If C<$z> is associated with a buffer, this method has no effect and always
860 B<Note> that the special variable C<$|> B<cannot> be used to set or
861 retrieve the autoflush setting.
863 =head2 input_line_number
865 $z->input_line_number()
866 $z->input_line_number(EXPR)
870 Returns the current uncompressed line number. If C<EXPR> is present it has
871 the effect of setting the line number. Note that setting the line number
872 does not change the current position within the file/buffer being read.
874 The contents of C<$/> are used to to determine what constitutes a line
884 If the C<$z> object is associated with a file or a filehandle, this method
885 will return the underlying file descriptor.
887 If the C<$z> object is is associated with a buffer, this method will
897 Closes the output file/buffer.
901 For most versions of Perl this method will be automatically invoked if
902 the IO::Uncompress::AnyInflate object is destroyed (either explicitly or by the
903 variable with the reference to the object going out of scope). The
904 exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
905 these cases, the C<close> method will be called automatically, but
906 not until global destruction of all live objects when the program is
909 Therefore, if you want your scripts to be able to run on all versions
910 of Perl, you should call C<close> explicitly and not rely on automatic
913 Returns true on success, otherwise 0.
915 If the C<AutoClose> option has been enabled when the IO::Uncompress::AnyInflate
916 object was created, and the object is associated with a file, the
917 underlying file will also be closed.
926 my $status = $z->nextStream();
928 Skips to the next compressed data stream in the input file/buffer. If a new
929 compressed data stream is found, the eof marker will be cleared, C<$.> will
932 Returns 1 if a new stream was found, 0 if none was found, and -1 if an
933 error was encountered.
939 my $data = $z->trailingData();
941 Returns any data that
945 No symbolic constants are required by this IO::Uncompress::AnyInflate at present.
951 Imports C<anyinflate> and C<$AnyInflateError>.
954 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
965 L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Uncompress::AnyUncompress>
967 L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
969 L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
970 L<Archive::Tar|Archive::Tar>,
974 For RFC 1950, 1951 and 1952 see
975 F<http://www.faqs.org/rfcs/rfc1950.html>,
976 F<http://www.faqs.org/rfcs/rfc1951.html> and
977 F<http://www.faqs.org/rfcs/rfc1952.html>
979 The I<zlib> compression library was written by Jean-loup Gailly
980 F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
982 The primary site for the I<zlib> compression library is
983 F<http://www.zlib.org>.
985 The primary site for gzip is F<http://www.gzip.org>.
992 This module was written by Paul Marquess, F<pmqs@cpan.org>.
996 =head1 MODIFICATION HISTORY
998 See the Changes file.
1000 =head1 COPYRIGHT AND LICENSE
1002 Copyright (c) 2005-2006 Paul Marquess. All rights reserved.
1004 This program is free software; you can redistribute it and/or
1005 modify it under the same terms as Perl itself.