1 package IO::Uncompress::AnyInflate ;
3 # for RFC1950, RFC1951 or RFC1952
8 use Compress::Zlib::Common qw(createSelfTiedObject);
10 use UncompressPlugin::Inflate ();
11 #use UncompressPlugin::Bunzip2 ();
14 #use IO::Uncompress::Base ;
15 use IO::Uncompress::Gunzip ;
16 use IO::Uncompress::Inflate ;
17 use IO::Uncompress::RawInflate ;
18 use IO::Uncompress::Unzip ;
19 #use IO::Uncompress::Bunzip2 ;
20 #use IO::Uncompress::UnLzop ;
24 our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $AnyInflateError);
26 $VERSION = '2.000_07';
27 $AnyInflateError = '';
29 @ISA = qw( Exporter IO::Uncompress::Base );
30 @EXPORT_OK = qw( $AnyInflateError anyinflate ) ;
31 %EXPORT_TAGS = %IO::Uncompress::Base::DEFLATE_CONSTANTS ;
32 push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
33 Exporter::export_ok_tags('all');
35 # TODO - allow the user to pick a set of the three formats to allow
36 # or just assume want to auto-detect any of the three formats.
41 my $obj = createSelfTiedObject($class, \$AnyInflateError);
42 $obj->_create(undef, 0, @_);
47 my $obj = createSelfTiedObject(undef, \$AnyInflateError);
48 return $obj->_inf(@_) ;
61 # any always needs both crc32 and adler32
62 $got->value('CRC32' => 1);
63 $got->value('ADLER32' => 1);
74 my ($obj, $errstr, $errno) = UncompressPlugin::Inflate::mkUncompObject();
76 return $self->saveErrorString(undef, $errstr, $errno)
79 *$self->{Uncomp} = $obj;
81 my $magic = $self->ckMagic( qw( RawInflate Inflate Gunzip Unzip ) );
84 *$self->{Info} = $self->readHeader($magic)
100 my $keep = ref $self ;
101 for my $class ( map { "IO::Uncompress::$_" } @names)
103 bless $self => $class;
104 my $magic = $self->ckMagic();
108 #bless $self => $class;
112 $self->pushBack(*$self->{HeaderPending}) ;
113 *$self->{HeaderPending} = '' ;
116 bless $self => $keep;
127 IO::Uncompress::AnyInflate - Perl interface to read RFC 1950, 1951 & 1952 files/buffers
131 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
133 my $status = anyinflate $input => $output [,OPTS]
134 or die "anyinflate failed: $AnyInflateError\n";
136 my $z = new IO::Uncompress::AnyInflate $input [OPTS]
137 or die "anyinflate failed: $AnyInflateError\n";
139 $status = $z->read($buffer)
140 $status = $z->read($buffer, $length)
141 $status = $z->read($buffer, $length, $offset)
142 $line = $z->getline()
145 $status = $z->inflateSync()
147 $data = $z->getHeaderInfo()
149 $z->seek($position, $whence)
161 read($z, $buffer, $length);
162 read($z, $buffer, $length, $offset);
164 seek($z, $position, $whence)
175 B<WARNING -- This is a Beta release>.
179 =item * DO NOT use in production code.
181 =item * The documentation is incomplete in places.
183 =item * Parts of the interface defined here are tentative.
185 =item * Please report any problems you find.
193 This module provides a Perl interface that allows the reading of
194 files/buffers that conform to RFC's 1950, 1951 and 1952.
196 The module will auto-detect which, if any, of the three supported
197 compression formats is being used.
201 =head1 Functional Interface
203 A top-level function, C<anyinflate>, is provided to carry out
204 "one-shot" uncompression between buffers and/or files. For finer
205 control over the uncompression process, see the L</"OO Interface">
208 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
210 anyinflate $input => $output [,OPTS]
211 or die "anyinflate failed: $AnyInflateError\n";
215 The functional interface needs Perl5.005 or better.
218 =head2 anyinflate $input => $output [, OPTS]
221 C<anyinflate> expects at least two parameters, C<$input> and C<$output>.
223 =head3 The C<$input> parameter
225 The parameter, C<$input>, is used to define the source of
228 It can take one of the following forms:
234 If the C<$input> parameter is a simple scalar, it is assumed to be a
235 filename. This file will be opened for reading and the input data
236 will be read from it.
240 If the C<$input> parameter is a filehandle, the input data will be
242 The string '-' can be used as an alias for standard input.
244 =item A scalar reference
246 If C<$input> is a scalar reference, the input data will be read
249 =item An array reference
251 If C<$input> is an array reference, each element in the array must be a
254 The input data will be read from each file in turn.
256 The complete array will be walked to ensure that it only
257 contains valid filenames before any data is uncompressed.
261 =item An Input FileGlob string
263 If C<$input> is a string that is delimited by the characters "<" and ">"
264 C<anyinflate> will assume that it is an I<input fileglob string>. The
265 input is the list of files that match the fileglob.
267 If the fileglob does not match any files ...
269 See L<File::GlobMapper|File::GlobMapper> for more details.
274 If the C<$input> parameter is any other type, C<undef> will be returned.
278 =head3 The C<$output> parameter
280 The parameter C<$output> is used to control the destination of the
281 uncompressed data. This parameter can take one of these forms.
287 If the C<$output> parameter is a simple scalar, it is assumed to be a
288 filename. This file will be opened for writing and the uncompressed
289 data will be written to it.
293 If the C<$output> parameter is a filehandle, the uncompressed data
294 will be written to it.
295 The string '-' can be used as an alias for standard output.
298 =item A scalar reference
300 If C<$output> is a scalar reference, the uncompressed data will be
301 stored in C<$$output>.
305 =item An Array Reference
307 If C<$output> is an array reference, the uncompressed data will be
308 pushed onto the array.
310 =item An Output FileGlob
312 If C<$output> is a string that is delimited by the characters "<" and ">"
313 C<anyinflate> will assume that it is an I<output fileglob string>. The
314 output is the list of files that match the fileglob.
316 When C<$output> is an fileglob string, C<$input> must also be a fileglob
317 string. Anything else is an error.
321 If the C<$output> parameter is any other type, C<undef> will be returned.
327 When C<$input> maps to multiple files/buffers and C<$output> is a single
328 file/buffer the uncompressed input files/buffers will all be stored
329 in C<$output> as a single uncompressed stream.
333 =head2 Optional Parameters
335 Unless specified below, the optional parameters for C<anyinflate>,
336 C<OPTS>, are the same as those used with the OO interface defined in the
337 L</"Constructor Options"> section below.
341 =item AutoClose =E<gt> 0|1
343 This option applies to any input or output data streams to
344 C<anyinflate> that are filehandles.
346 If C<AutoClose> is specified, and the value is true, it will result in all
347 input and/or output filehandles being closed once C<anyinflate> has
350 This parameter defaults to 0.
354 =item BinModeOut =E<gt> 0|1
356 When writing to a file or filehandle, set C<binmode> before writing to the
365 =item -Append =E<gt> 0|1
369 =item -MultiStream =E<gt> 0|1
371 Creates a new stream after each file.
384 To read the contents of the file C<file1.txt.Compressed> and write the
385 compressed data to the file C<file1.txt>.
389 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
391 my $input = "file1.txt.Compressed";
392 my $output = "file1.txt";
393 anyinflate $input => $output
394 or die "anyinflate failed: $AnyInflateError\n";
397 To read from an existing Perl filehandle, C<$input>, and write the
398 uncompressed data to a buffer, C<$buffer>.
402 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
405 my $input = new IO::File "<file1.txt.Compressed"
406 or die "Cannot open 'file1.txt.Compressed': $!\n" ;
408 anyinflate $input => \$buffer
409 or die "anyinflate failed: $AnyInflateError\n";
411 To uncompress all files in the directory "/my/home" that match "*.txt.Compressed" and store the compressed data in the same directory
415 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
417 anyinflate '</my/home/*.txt.Compressed>' => '</my/home/#1.txt>'
418 or die "anyinflate failed: $AnyInflateError\n";
420 and if you want to compress each file one at a time, this will do the trick
424 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
426 for my $input ( glob "/my/home/*.txt.Compressed" )
429 $output =~ s/.Compressed// ;
430 anyinflate $input => $output
431 or die "Error compressing '$input': $AnyInflateError\n";
438 The format of the constructor for IO::Uncompress::AnyInflate is shown below
441 my $z = new IO::Uncompress::AnyInflate $input [OPTS]
442 or die "IO::Uncompress::AnyInflate failed: $AnyInflateError\n";
444 Returns an C<IO::Uncompress::AnyInflate> object on success and undef on failure.
445 The variable C<$AnyInflateError> will contain an error message on failure.
447 If you are running Perl 5.005 or better the object, C<$z>, returned from
448 IO::Uncompress::AnyInflate can be used exactly like an L<IO::File|IO::File> filehandle.
449 This means that all normal input file operations can be carried out with
450 C<$z>. For example, to read a line from a compressed file/buffer you can
451 use either of these forms
453 $line = $z->getline();
456 The mandatory parameter C<$input> is used to determine the source of the
457 compressed data. This parameter can take one of three forms.
463 If the C<$input> parameter is a scalar, it is assumed to be a filename. This
464 file will be opened for reading and the compressed data will be read from it.
468 If the C<$input> parameter is a filehandle, the compressed data will be
470 The string '-' can be used as an alias for standard input.
473 =item A scalar reference
475 If C<$input> is a scalar reference, the compressed data will be read from
480 =head2 Constructor Options
483 The option names defined below are case insensitive and can be optionally
484 prefixed by a '-'. So all of the following are valid
491 OPTS is a combination of the following options:
495 =item -AutoClose =E<gt> 0|1
497 This option is only valid when the C<$input> parameter is a filehandle. If
498 specified, and the value is true, it will result in the file being closed once
499 either the C<close> method is called or the IO::Uncompress::AnyInflate object is
502 This parameter defaults to 0.
504 =item -MultiStream =E<gt> 0|1
508 Allows multiple concatenated compressed streams to be treated as a single
509 compressed stream. Decompression will stop once either the end of the
510 file/buffer is reached, an error is encountered (premature eof, corrupt
511 compressed data) or the end of a stream is not immediately followed by the
512 start of another stream.
514 This parameter defaults to 0.
518 =item -Prime =E<gt> $string
520 This option will uncompress the contents of C<$string> before processing the
523 This option can be useful when the compressed data is embedded in another
524 file/data structure and it is not possible to work out where the compressed
525 data begins without having to read the first few bytes. If this is the
526 case, the uncompression can be I<primed> with these bytes using this
529 =item -Transparent =E<gt> 0|1
531 If this option is set and the input file or buffer is not compressed data,
532 the module will allow reading of it anyway.
534 This option defaults to 1.
536 =item -BlockSize =E<gt> $num
538 When reading the compressed input data, IO::Uncompress::AnyInflate will read it in
539 blocks of C<$num> bytes.
541 This option defaults to 4096.
543 =item -InputLength =E<gt> $size
545 When present this option will limit the number of compressed bytes read
546 from the input file/buffer to C<$size>. This option can be used in the
547 situation where there is useful data directly after the compressed data
548 stream and you know beforehand the exact length of the compressed data
551 This option is mostly used when reading from a filehandle, in which case
552 the file pointer will be left pointing to the first byte directly after the
553 compressed data stream.
557 This option defaults to off.
559 =item -Append =E<gt> 0|1
561 This option controls what the C<read> method does with uncompressed data.
563 If set to 1, all uncompressed data will be appended to the output parameter
564 of the C<read> method.
566 If set to 0, the contents of the output parameter of the C<read> method
567 will be overwritten by the uncompressed data.
571 =item -Strict =E<gt> 0|1
575 This option controls whether the extra checks defined below are used when
576 carrying out the decompression. When Strict is on, the extra tests are
577 carried out, when Strict is off they are not.
579 The default for this option is off.
582 If the input is an RFC1950 data stream, the following will be checked:
591 The ADLER32 checksum field must be present.
595 The value of the ADLER32 field read must match the adler32 value of the
596 uncompressed data actually contained in the file.
602 If the input is a gzip (RFC1952) data stream, the following will be checked:
611 If the FHCRC bit is set in the gzip FLG header byte, the CRC16 bytes in the
612 header must match the crc16 value of the gzip header actually read.
616 If the gzip header contains a name field (FNAME) it consists solely of ISO
621 If the gzip header contains a comment field (FCOMMENT) it consists solely
622 of ISO 8859-1 characters plus line-feed.
626 If the gzip FEXTRA header field is present it must conform to the sub-field
627 structure as defined in RFC1952.
631 The CRC32 and ISIZE trailer fields must be present.
635 The value of the CRC32 field read must match the crc32 value of the
636 uncompressed data actually contained in the gzip file.
640 The value of the ISIZE fields read must match the length of the
641 uncompressed data actually read from the file.
650 =item -ParseExtra =E<gt> 0|1
652 If the gzip FEXTRA header field is present and this option is set, it will
653 force the module to check that it conforms to the sub-field structure as
656 If the C<Strict> is on it will automatically enable this option.
674 $status = $z->read($buffer)
676 Reads a block of compressed data (the size the the compressed block is
677 determined by the C<Buffer> option in the constructor), uncompresses it and
678 writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
679 set in the constructor, the uncompressed data will be appended to the
680 C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
682 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
683 or a negative number on error.
689 $status = $z->read($buffer, $length)
690 $status = $z->read($buffer, $length, $offset)
692 $status = read($z, $buffer, $length)
693 $status = read($z, $buffer, $length, $offset)
695 Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
697 The main difference between this form of the C<read> method and the
698 previous one, is that this one will attempt to return I<exactly> C<$length>
699 bytes. The only circumstances that this function will not is if end-of-file
700 or an IO error is encountered.
702 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
703 or a negative number on error.
710 $line = $z->getline()
715 This method fully supports the use of of the variable C<$/>
716 (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
717 determine what constitutes an end of line. Both paragraph mode and file
718 slurp mode are supported.
727 Read a single character.
733 $char = $z->ungetc($string)
740 $status = $z->inflateSync()
748 $hdr = $z->getHeaderInfo();
749 @hdrs = $z->getHeaderInfo();
751 This method returns either a hash reference (in scalar context) or a list
752 or hash references (in array context) that contains information about each
753 of the header fields in the compressed data stream(s).
765 Returns the uncompressed file offset.
776 Returns true if the end of the compressed input stream has been reached.
782 $z->seek($position, $whence);
783 seek($z, $position, $whence);
788 Provides a sub-set of the C<seek> functionality, with the restriction
789 that it is only legal to seek forward in the input file/buffer.
790 It is a fatal error to attempt to seek backward.
794 The C<$whence> parameter takes one the usual values, namely SEEK_SET,
795 SEEK_CUR or SEEK_END.
797 Returns 1 on success, 0 on failure.
806 This is a noop provided for completeness.
813 If the C<$z> object is associated with a file, this method will return
814 the underlying filehandle.
816 If the C<$z> object is is associated with a buffer, this method will
826 Closes the output file/buffer.
830 For most versions of Perl this method will be automatically invoked if
831 the IO::Uncompress::AnyInflate object is destroyed (either explicitly or by the
832 variable with the reference to the object going out of scope). The
833 exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
834 these cases, the C<close> method will be called automatically, but
835 not until global destruction of all live objects when the program is
838 Therefore, if you want your scripts to be able to run on all versions
839 of Perl, you should call C<close> explicitly and not rely on automatic
842 Returns true on success, otherwise 0.
844 If the C<AutoClose> option has been enabled when the IO::Uncompress::AnyInflate
845 object was created, and the object is associated with a file, the
846 underlying file will also be closed.
853 No symbolic constants are required by this IO::Uncompress::AnyInflate at present.
859 Imports C<anyinflate> and C<$AnyInflateError>.
862 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
873 L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>
875 L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
877 L<File::GlobMapper|File::GlobMapper>, L<Archive::Tar|Archive::Zip>,
880 For RFC 1950, 1951 and 1952 see
881 F<http://www.faqs.org/rfcs/rfc1950.html>,
882 F<http://www.faqs.org/rfcs/rfc1951.html> and
883 F<http://www.faqs.org/rfcs/rfc1952.html>
885 The primary site for the gzip program is F<http://www.gzip.org>.
889 The I<IO::Uncompress::AnyInflate> module was written by Paul Marquess,
890 F<pmqs@cpan.org>. The latest copy of the module can be
891 found on CPAN in F<modules/by-module/Compress/Compress-Zlib-x.x.tar.gz>.
893 The I<zlib> compression library was written by Jean-loup Gailly
894 F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
896 The primary site for the I<zlib> compression library is
897 F<http://www.zlib.org>.
899 =head1 MODIFICATION HISTORY
901 See the Changes file.
903 =head1 COPYRIGHT AND LICENSE
906 Copyright (c) 2005-2006 Paul Marquess. All rights reserved.
907 This program is free software; you can redistribute it and/or
908 modify it under the same terms as Perl itself.