1 package IO::Uncompress::Inflate ;
7 use Compress::Zlib::Common qw(createSelfTiedObject);
8 use Compress::Zlib::FileConstants;
10 use IO::Uncompress::RawInflate ;
13 our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $InflateError);
15 $VERSION = '2.000_07';
18 @ISA = qw( Exporter IO::Uncompress::RawInflate );
19 @EXPORT_OK = qw( $InflateError inflate ) ;
20 %EXPORT_TAGS = %IO::Uncompress::RawInflate::DEFLATE_CONSTANTS ;
21 push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
22 Exporter::export_ok_tags('all');
28 my $obj = createSelfTiedObject($class, \$InflateError);
30 $obj->_create(undef, 0, @_);
35 my $obj = createSelfTiedObject(undef, \$InflateError);
36 return $obj->_inf(@_);
49 # gunzip always needs adler32
50 $got->value('ADLER32' => 1);
60 $self->smartReadExact(\$magic, ZLIB_HEADER_SIZE);
62 *$self->{HeaderPending} = $magic ;
64 return $self->HeaderError("Header size is " .
65 ZLIB_HEADER_SIZE . " bytes")
66 if length $magic != ZLIB_HEADER_SIZE;
68 return $self->HeaderError("CRC mismatch.")
69 if ! isZlibMagic($magic) ;
71 *$self->{Type} = 'rfc1950';
80 return $self->_readDeflateHeader($magic) ;
88 my $ADLER32 = unpack("N", $trailer) ;
89 *$self->{Info}{ADLER32} = $ADLER32;
90 return $self->TrailerError("CRC mismatch")
91 if *$self->{Strict} && $ADLER32 != *$self->{Uncomp}->adler32() ;
101 return 0 if length $buffer < ZLIB_HEADER_SIZE ;
102 my $hdr = unpack("n", $buffer) ;
103 return $hdr % 31 == 0 ;
112 ($data >> $offset ) & $mask & 0xFF ;
116 sub _readDeflateHeader
118 my ($self, $buffer) = @_ ;
121 # $self->smartReadExact(\$buffer, ZLIB_HEADER_SIZE);
123 # *$self->{HeaderPending} = $buffer ;
125 # return $self->HeaderError("Header size is " .
126 # ZLIB_HEADER_SIZE . " bytes")
127 # if length $buffer != ZLIB_HEADER_SIZE;
129 # return $self->HeaderError("CRC mismatch.")
130 # if ! isZlibMagic($buffer) ;
133 my ($CMF, $FLG) = unpack "C C", $buffer;
134 my $FDICT = bits($FLG, ZLIB_FLG_FDICT_OFFSET, ZLIB_FLG_FDICT_BITS ),
136 my $cm = bits($CMF, ZLIB_CMF_CM_OFFSET, ZLIB_CMF_CM_BITS) ;
137 $cm == ZLIB_CMF_CM_DEFLATED
138 or return $self->HeaderError("Not Deflate (CM is $cm)") ;
142 $self->smartReadExact(\$buffer, ZLIB_FDICT_SIZE)
143 or return $self->TruncatedHeader("FDICT");
145 $DICTID = unpack("N", $buffer) ;
148 *$self->{Type} = 'rfc1950';
152 'FingerprintLength' => ZLIB_HEADER_SIZE,
153 'HeaderLength' => ZLIB_HEADER_SIZE,
154 'TrailerLength' => ZLIB_TRAILER_SIZE,
158 CM => bits($CMF, ZLIB_CMF_CM_OFFSET, ZLIB_CMF_CM_BITS ),
159 CINFO => bits($CMF, ZLIB_CMF_CINFO_OFFSET, ZLIB_CMF_CINFO_BITS ),
161 FCHECK => bits($FLG, ZLIB_FLG_FCHECK_OFFSET, ZLIB_FLG_FCHECK_BITS),
162 FDICT => bits($FLG, ZLIB_FLG_FDICT_OFFSET, ZLIB_FLG_FDICT_BITS ),
163 FLEVEL => bits($FLG, ZLIB_FLG_LEVEL_OFFSET, ZLIB_FLG_LEVEL_BITS ),
179 IO::Uncompress::Inflate - Perl interface to read RFC 1950 files/buffers
183 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
185 my $status = inflate $input => $output [,OPTS]
186 or die "inflate failed: $InflateError\n";
188 my $z = new IO::Uncompress::Inflate $input [OPTS]
189 or die "inflate failed: $InflateError\n";
191 $status = $z->read($buffer)
192 $status = $z->read($buffer, $length)
193 $status = $z->read($buffer, $length, $offset)
194 $line = $z->getline()
197 $status = $z->inflateSync()
199 $data = $z->getHeaderInfo()
201 $z->seek($position, $whence)
213 read($z, $buffer, $length);
214 read($z, $buffer, $length, $offset);
216 seek($z, $position, $whence)
227 B<WARNING -- This is a Beta release>.
231 =item * DO NOT use in production code.
233 =item * The documentation is incomplete in places.
235 =item * Parts of the interface defined here are tentative.
237 =item * Please report any problems you find.
245 This module provides a Perl interface that allows the reading of
246 files/buffers that conform to RFC 1950.
248 For writing RFC 1950 files/buffers, see the companion module IO::Compress::Deflate.
252 =head1 Functional Interface
254 A top-level function, C<inflate>, is provided to carry out
255 "one-shot" uncompression between buffers and/or files. For finer
256 control over the uncompression process, see the L</"OO Interface">
259 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
261 inflate $input => $output [,OPTS]
262 or die "inflate failed: $InflateError\n";
266 The functional interface needs Perl5.005 or better.
269 =head2 inflate $input => $output [, OPTS]
272 C<inflate> expects at least two parameters, C<$input> and C<$output>.
274 =head3 The C<$input> parameter
276 The parameter, C<$input>, is used to define the source of
279 It can take one of the following forms:
285 If the C<$input> parameter is a simple scalar, it is assumed to be a
286 filename. This file will be opened for reading and the input data
287 will be read from it.
291 If the C<$input> parameter is a filehandle, the input data will be
293 The string '-' can be used as an alias for standard input.
295 =item A scalar reference
297 If C<$input> is a scalar reference, the input data will be read
300 =item An array reference
302 If C<$input> is an array reference, each element in the array must be a
305 The input data will be read from each file in turn.
307 The complete array will be walked to ensure that it only
308 contains valid filenames before any data is uncompressed.
312 =item An Input FileGlob string
314 If C<$input> is a string that is delimited by the characters "<" and ">"
315 C<inflate> will assume that it is an I<input fileglob string>. The
316 input is the list of files that match the fileglob.
318 If the fileglob does not match any files ...
320 See L<File::GlobMapper|File::GlobMapper> for more details.
325 If the C<$input> parameter is any other type, C<undef> will be returned.
329 =head3 The C<$output> parameter
331 The parameter C<$output> is used to control the destination of the
332 uncompressed data. This parameter can take one of these forms.
338 If the C<$output> parameter is a simple scalar, it is assumed to be a
339 filename. This file will be opened for writing and the uncompressed
340 data will be written to it.
344 If the C<$output> parameter is a filehandle, the uncompressed data
345 will be written to it.
346 The string '-' can be used as an alias for standard output.
349 =item A scalar reference
351 If C<$output> is a scalar reference, the uncompressed data will be
352 stored in C<$$output>.
356 =item An Array Reference
358 If C<$output> is an array reference, the uncompressed data will be
359 pushed onto the array.
361 =item An Output FileGlob
363 If C<$output> is a string that is delimited by the characters "<" and ">"
364 C<inflate> will assume that it is an I<output fileglob string>. The
365 output is the list of files that match the fileglob.
367 When C<$output> is an fileglob string, C<$input> must also be a fileglob
368 string. Anything else is an error.
372 If the C<$output> parameter is any other type, C<undef> will be returned.
378 When C<$input> maps to multiple files/buffers and C<$output> is a single
379 file/buffer the uncompressed input files/buffers will all be stored
380 in C<$output> as a single uncompressed stream.
384 =head2 Optional Parameters
386 Unless specified below, the optional parameters for C<inflate>,
387 C<OPTS>, are the same as those used with the OO interface defined in the
388 L</"Constructor Options"> section below.
392 =item AutoClose =E<gt> 0|1
394 This option applies to any input or output data streams to
395 C<inflate> that are filehandles.
397 If C<AutoClose> is specified, and the value is true, it will result in all
398 input and/or output filehandles being closed once C<inflate> has
401 This parameter defaults to 0.
405 =item BinModeOut =E<gt> 0|1
407 When writing to a file or filehandle, set C<binmode> before writing to the
416 =item -Append =E<gt> 0|1
420 =item -MultiStream =E<gt> 0|1
422 Creates a new stream after each file.
435 To read the contents of the file C<file1.txt.1950> and write the
436 compressed data to the file C<file1.txt>.
440 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
442 my $input = "file1.txt.1950";
443 my $output = "file1.txt";
444 inflate $input => $output
445 or die "inflate failed: $InflateError\n";
448 To read from an existing Perl filehandle, C<$input>, and write the
449 uncompressed data to a buffer, C<$buffer>.
453 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
456 my $input = new IO::File "<file1.txt.1950"
457 or die "Cannot open 'file1.txt.1950': $!\n" ;
459 inflate $input => \$buffer
460 or die "inflate failed: $InflateError\n";
462 To uncompress all files in the directory "/my/home" that match "*.txt.1950" and store the compressed data in the same directory
466 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
468 inflate '</my/home/*.txt.1950>' => '</my/home/#1.txt>'
469 or die "inflate failed: $InflateError\n";
471 and if you want to compress each file one at a time, this will do the trick
475 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
477 for my $input ( glob "/my/home/*.txt.1950" )
480 $output =~ s/.1950// ;
481 inflate $input => $output
482 or die "Error compressing '$input': $InflateError\n";
489 The format of the constructor for IO::Uncompress::Inflate is shown below
492 my $z = new IO::Uncompress::Inflate $input [OPTS]
493 or die "IO::Uncompress::Inflate failed: $InflateError\n";
495 Returns an C<IO::Uncompress::Inflate> object on success and undef on failure.
496 The variable C<$InflateError> will contain an error message on failure.
498 If you are running Perl 5.005 or better the object, C<$z>, returned from
499 IO::Uncompress::Inflate can be used exactly like an L<IO::File|IO::File> filehandle.
500 This means that all normal input file operations can be carried out with
501 C<$z>. For example, to read a line from a compressed file/buffer you can
502 use either of these forms
504 $line = $z->getline();
507 The mandatory parameter C<$input> is used to determine the source of the
508 compressed data. This parameter can take one of three forms.
514 If the C<$input> parameter is a scalar, it is assumed to be a filename. This
515 file will be opened for reading and the compressed data will be read from it.
519 If the C<$input> parameter is a filehandle, the compressed data will be
521 The string '-' can be used as an alias for standard input.
524 =item A scalar reference
526 If C<$input> is a scalar reference, the compressed data will be read from
531 =head2 Constructor Options
534 The option names defined below are case insensitive and can be optionally
535 prefixed by a '-'. So all of the following are valid
542 OPTS is a combination of the following options:
546 =item -AutoClose =E<gt> 0|1
548 This option is only valid when the C<$input> parameter is a filehandle. If
549 specified, and the value is true, it will result in the file being closed once
550 either the C<close> method is called or the IO::Uncompress::Inflate object is
553 This parameter defaults to 0.
555 =item -MultiStream =E<gt> 0|1
559 Allows multiple concatenated compressed streams to be treated as a single
560 compressed stream. Decompression will stop once either the end of the
561 file/buffer is reached, an error is encountered (premature eof, corrupt
562 compressed data) or the end of a stream is not immediately followed by the
563 start of another stream.
565 This parameter defaults to 0.
569 =item -Prime =E<gt> $string
571 This option will uncompress the contents of C<$string> before processing the
574 This option can be useful when the compressed data is embedded in another
575 file/data structure and it is not possible to work out where the compressed
576 data begins without having to read the first few bytes. If this is the
577 case, the uncompression can be I<primed> with these bytes using this
580 =item -Transparent =E<gt> 0|1
582 If this option is set and the input file or buffer is not compressed data,
583 the module will allow reading of it anyway.
585 This option defaults to 1.
587 =item -BlockSize =E<gt> $num
589 When reading the compressed input data, IO::Uncompress::Inflate will read it in
590 blocks of C<$num> bytes.
592 This option defaults to 4096.
594 =item -InputLength =E<gt> $size
596 When present this option will limit the number of compressed bytes read
597 from the input file/buffer to C<$size>. This option can be used in the
598 situation where there is useful data directly after the compressed data
599 stream and you know beforehand the exact length of the compressed data
602 This option is mostly used when reading from a filehandle, in which case
603 the file pointer will be left pointing to the first byte directly after the
604 compressed data stream.
608 This option defaults to off.
610 =item -Append =E<gt> 0|1
612 This option controls what the C<read> method does with uncompressed data.
614 If set to 1, all uncompressed data will be appended to the output parameter
615 of the C<read> method.
617 If set to 0, the contents of the output parameter of the C<read> method
618 will be overwritten by the uncompressed data.
622 =item -Strict =E<gt> 0|1
626 This option controls whether the extra checks defined below are used when
627 carrying out the decompression. When Strict is on, the extra tests are
628 carried out, when Strict is off they are not.
630 The default for this option is off.
640 The ADLER32 checksum field must be present.
644 The value of the ADLER32 field read must match the adler32 value of the
645 uncompressed data actually contained in the file.
669 $status = $z->read($buffer)
671 Reads a block of compressed data (the size the the compressed block is
672 determined by the C<Buffer> option in the constructor), uncompresses it and
673 writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
674 set in the constructor, the uncompressed data will be appended to the
675 C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
677 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
678 or a negative number on error.
684 $status = $z->read($buffer, $length)
685 $status = $z->read($buffer, $length, $offset)
687 $status = read($z, $buffer, $length)
688 $status = read($z, $buffer, $length, $offset)
690 Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
692 The main difference between this form of the C<read> method and the
693 previous one, is that this one will attempt to return I<exactly> C<$length>
694 bytes. The only circumstances that this function will not is if end-of-file
695 or an IO error is encountered.
697 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
698 or a negative number on error.
705 $line = $z->getline()
710 This method fully supports the use of of the variable C<$/>
711 (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
712 determine what constitutes an end of line. Both paragraph mode and file
713 slurp mode are supported.
722 Read a single character.
728 $char = $z->ungetc($string)
735 $status = $z->inflateSync()
743 $hdr = $z->getHeaderInfo();
744 @hdrs = $z->getHeaderInfo();
746 This method returns either a hash reference (in scalar context) or a list
747 or hash references (in array context) that contains information about each
748 of the header fields in the compressed data stream(s).
760 Returns the uncompressed file offset.
771 Returns true if the end of the compressed input stream has been reached.
777 $z->seek($position, $whence);
778 seek($z, $position, $whence);
783 Provides a sub-set of the C<seek> functionality, with the restriction
784 that it is only legal to seek forward in the input file/buffer.
785 It is a fatal error to attempt to seek backward.
789 The C<$whence> parameter takes one the usual values, namely SEEK_SET,
790 SEEK_CUR or SEEK_END.
792 Returns 1 on success, 0 on failure.
801 This is a noop provided for completeness.
808 If the C<$z> object is associated with a file, this method will return
809 the underlying filehandle.
811 If the C<$z> object is is associated with a buffer, this method will
821 Closes the output file/buffer.
825 For most versions of Perl this method will be automatically invoked if
826 the IO::Uncompress::Inflate object is destroyed (either explicitly or by the
827 variable with the reference to the object going out of scope). The
828 exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
829 these cases, the C<close> method will be called automatically, but
830 not until global destruction of all live objects when the program is
833 Therefore, if you want your scripts to be able to run on all versions
834 of Perl, you should call C<close> explicitly and not rely on automatic
837 Returns true on success, otherwise 0.
839 If the C<AutoClose> option has been enabled when the IO::Uncompress::Inflate
840 object was created, and the object is associated with a file, the
841 underlying file will also be closed.
848 No symbolic constants are required by this IO::Uncompress::Inflate at present.
854 Imports C<inflate> and C<$InflateError>.
857 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
868 L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Uncompress::AnyInflate>
870 L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
872 L<File::GlobMapper|File::GlobMapper>, L<Archive::Tar|Archive::Zip>,
875 For RFC 1950, 1951 and 1952 see
876 F<http://www.faqs.org/rfcs/rfc1950.html>,
877 F<http://www.faqs.org/rfcs/rfc1951.html> and
878 F<http://www.faqs.org/rfcs/rfc1952.html>
880 The primary site for the gzip program is F<http://www.gzip.org>.
884 The I<IO::Uncompress::Inflate> module was written by Paul Marquess,
885 F<pmqs@cpan.org>. The latest copy of the module can be
886 found on CPAN in F<modules/by-module/Compress/Compress-Zlib-x.x.tar.gz>.
888 The I<zlib> compression library was written by Jean-loup Gailly
889 F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
891 The primary site for the I<zlib> compression library is
892 F<http://www.zlib.org>.
894 =head1 MODIFICATION HISTORY
896 See the Changes file.
898 =head1 COPYRIGHT AND LICENSE
901 Copyright (c) 2005-2006 Paul Marquess. All rights reserved.
902 This program is free software; you can redistribute it and/or
903 modify it under the same terms as Perl itself.