1 package IO::Uncompress::Inflate ;
8 use IO::Compress::Base::Common qw(:Status createSelfTiedObject);
9 use IO::Compress::Zlib::Constants;
11 use IO::Uncompress::RawInflate ;
14 our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $InflateError);
16 $VERSION = '2.000_08';
19 @ISA = qw( Exporter IO::Uncompress::RawInflate );
20 @EXPORT_OK = qw( $InflateError inflate ) ;
21 %EXPORT_TAGS = %IO::Uncompress::RawInflate::DEFLATE_CONSTANTS ;
22 push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
23 Exporter::export_ok_tags('all');
29 my $obj = createSelfTiedObject($class, \$InflateError);
31 $obj->_create(undef, 0, @_);
36 my $obj = createSelfTiedObject(undef, \$InflateError);
37 return $obj->_inf(@_);
50 # gunzip always needs adler32
51 $got->value('ADLER32' => 1);
61 $self->smartReadExact(\$magic, ZLIB_HEADER_SIZE);
63 *$self->{HeaderPending} = $magic ;
65 return $self->HeaderError("Header size is " .
66 ZLIB_HEADER_SIZE . " bytes")
67 if length $magic != ZLIB_HEADER_SIZE;
69 return $self->HeaderError("CRC mismatch.")
70 if ! isZlibMagic($magic) ;
72 *$self->{Type} = 'rfc1950';
81 return $self->_readDeflateHeader($magic) ;
89 my $ADLER32 = unpack("N", $trailer) ;
90 *$self->{Info}{ADLER32} = $ADLER32;
91 return $self->TrailerError("CRC mismatch")
92 if *$self->{Strict} && $ADLER32 != *$self->{Uncomp}->adler32() ;
102 return 0 if length $buffer < ZLIB_HEADER_SIZE ;
103 my $hdr = unpack("n", $buffer) ;
104 return $hdr % 31 == 0 ;
113 ($data >> $offset ) & $mask & 0xFF ;
117 sub _readDeflateHeader
119 my ($self, $buffer) = @_ ;
122 # $self->smartReadExact(\$buffer, ZLIB_HEADER_SIZE);
124 # *$self->{HeaderPending} = $buffer ;
126 # return $self->HeaderError("Header size is " .
127 # ZLIB_HEADER_SIZE . " bytes")
128 # if length $buffer != ZLIB_HEADER_SIZE;
130 # return $self->HeaderError("CRC mismatch.")
131 # if ! isZlibMagic($buffer) ;
134 my ($CMF, $FLG) = unpack "C C", $buffer;
135 my $FDICT = bits($FLG, ZLIB_FLG_FDICT_OFFSET, ZLIB_FLG_FDICT_BITS ),
137 my $cm = bits($CMF, ZLIB_CMF_CM_OFFSET, ZLIB_CMF_CM_BITS) ;
138 $cm == ZLIB_CMF_CM_DEFLATED
139 or return $self->HeaderError("Not Deflate (CM is $cm)") ;
143 $self->smartReadExact(\$buffer, ZLIB_FDICT_SIZE)
144 or return $self->TruncatedHeader("FDICT");
146 $DICTID = unpack("N", $buffer) ;
149 *$self->{Type} = 'rfc1950';
153 'FingerprintLength' => ZLIB_HEADER_SIZE,
154 'HeaderLength' => ZLIB_HEADER_SIZE,
155 'TrailerLength' => ZLIB_TRAILER_SIZE,
159 CM => bits($CMF, ZLIB_CMF_CM_OFFSET, ZLIB_CMF_CM_BITS ),
160 CINFO => bits($CMF, ZLIB_CMF_CINFO_OFFSET, ZLIB_CMF_CINFO_BITS ),
162 FCHECK => bits($FLG, ZLIB_FLG_FCHECK_OFFSET, ZLIB_FLG_FCHECK_BITS),
163 FDICT => bits($FLG, ZLIB_FLG_FDICT_OFFSET, ZLIB_FLG_FDICT_BITS ),
164 FLEVEL => bits($FLG, ZLIB_FLG_LEVEL_OFFSET, ZLIB_FLG_LEVEL_BITS ),
181 IO::Uncompress::Inflate - Perl interface to read RFC 1950 files/buffers
186 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
188 my $status = inflate $input => $output [,OPTS]
189 or die "inflate failed: $InflateError\n";
191 my $z = new IO::Uncompress::Inflate $input [OPTS]
192 or die "inflate failed: $InflateError\n";
194 $status = $z->read($buffer)
195 $status = $z->read($buffer, $length)
196 $status = $z->read($buffer, $length, $offset)
197 $line = $z->getline()
202 $status = $z->inflateSync()
205 $data = $z->getHeaderInfo()
207 $z->seek($position, $whence)
219 read($z, $buffer, $length);
220 read($z, $buffer, $length, $offset);
222 seek($z, $position, $whence)
233 B<WARNING -- This is a Beta release>.
237 =item * DO NOT use in production code.
239 =item * The documentation is incomplete in places.
241 =item * Parts of the interface defined here are tentative.
243 =item * Please report any problems you find.
251 This module provides a Perl interface that allows the reading of
252 files/buffers that conform to RFC 1950.
254 For writing RFC 1950 files/buffers, see the companion module IO::Compress::Deflate.
258 =head1 Functional Interface
260 A top-level function, C<inflate>, is provided to carry out
261 "one-shot" uncompression between buffers and/or files. For finer
262 control over the uncompression process, see the L</"OO Interface">
265 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
267 inflate $input => $output [,OPTS]
268 or die "inflate failed: $InflateError\n";
272 The functional interface needs Perl5.005 or better.
275 =head2 inflate $input => $output [, OPTS]
278 C<inflate> expects at least two parameters, C<$input> and C<$output>.
280 =head3 The C<$input> parameter
282 The parameter, C<$input>, is used to define the source of
285 It can take one of the following forms:
291 If the C<$input> parameter is a simple scalar, it is assumed to be a
292 filename. This file will be opened for reading and the input data
293 will be read from it.
297 If the C<$input> parameter is a filehandle, the input data will be
299 The string '-' can be used as an alias for standard input.
301 =item A scalar reference
303 If C<$input> is a scalar reference, the input data will be read
306 =item An array reference
308 If C<$input> is an array reference, each element in the array must be a
311 The input data will be read from each file in turn.
313 The complete array will be walked to ensure that it only
314 contains valid filenames before any data is uncompressed.
318 =item An Input FileGlob string
320 If C<$input> is a string that is delimited by the characters "<" and ">"
321 C<inflate> will assume that it is an I<input fileglob string>. The
322 input is the list of files that match the fileglob.
324 If the fileglob does not match any files ...
326 See L<File::GlobMapper|File::GlobMapper> for more details.
331 If the C<$input> parameter is any other type, C<undef> will be returned.
335 =head3 The C<$output> parameter
337 The parameter C<$output> is used to control the destination of the
338 uncompressed data. This parameter can take one of these forms.
344 If the C<$output> parameter is a simple scalar, it is assumed to be a
345 filename. This file will be opened for writing and the uncompressed
346 data will be written to it.
350 If the C<$output> parameter is a filehandle, the uncompressed data
351 will be written to it.
352 The string '-' can be used as an alias for standard output.
355 =item A scalar reference
357 If C<$output> is a scalar reference, the uncompressed data will be
358 stored in C<$$output>.
362 =item An Array Reference
364 If C<$output> is an array reference, the uncompressed data will be
365 pushed onto the array.
367 =item An Output FileGlob
369 If C<$output> is a string that is delimited by the characters "<" and ">"
370 C<inflate> will assume that it is an I<output fileglob string>. The
371 output is the list of files that match the fileglob.
373 When C<$output> is an fileglob string, C<$input> must also be a fileglob
374 string. Anything else is an error.
378 If the C<$output> parameter is any other type, C<undef> will be returned.
384 When C<$input> maps to multiple files/buffers and C<$output> is a single
385 file/buffer the uncompressed input files/buffers will all be stored
386 in C<$output> as a single uncompressed stream.
390 =head2 Optional Parameters
392 Unless specified below, the optional parameters for C<inflate>,
393 C<OPTS>, are the same as those used with the OO interface defined in the
394 L</"Constructor Options"> section below.
398 =item AutoClose =E<gt> 0|1
400 This option applies to any input or output data streams to
401 C<inflate> that are filehandles.
403 If C<AutoClose> is specified, and the value is true, it will result in all
404 input and/or output filehandles being closed once C<inflate> has
407 This parameter defaults to 0.
411 =item BinModeOut =E<gt> 0|1
413 When writing to a file or filehandle, set C<binmode> before writing to the
422 =item -Append =E<gt> 0|1
426 =item -MultiStream =E<gt> 0|1
428 Creates a new stream after each file.
441 To read the contents of the file C<file1.txt.1950> and write the
442 compressed data to the file C<file1.txt>.
446 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
448 my $input = "file1.txt.1950";
449 my $output = "file1.txt";
450 inflate $input => $output
451 or die "inflate failed: $InflateError\n";
454 To read from an existing Perl filehandle, C<$input>, and write the
455 uncompressed data to a buffer, C<$buffer>.
459 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
462 my $input = new IO::File "<file1.txt.1950"
463 or die "Cannot open 'file1.txt.1950': $!\n" ;
465 inflate $input => \$buffer
466 or die "inflate failed: $InflateError\n";
468 To uncompress all files in the directory "/my/home" that match "*.txt.1950" and store the compressed data in the same directory
472 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
474 inflate '</my/home/*.txt.1950>' => '</my/home/#1.txt>'
475 or die "inflate failed: $InflateError\n";
477 and if you want to compress each file one at a time, this will do the trick
481 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
483 for my $input ( glob "/my/home/*.txt.1950" )
486 $output =~ s/.1950// ;
487 inflate $input => $output
488 or die "Error compressing '$input': $InflateError\n";
495 The format of the constructor for IO::Uncompress::Inflate is shown below
498 my $z = new IO::Uncompress::Inflate $input [OPTS]
499 or die "IO::Uncompress::Inflate failed: $InflateError\n";
501 Returns an C<IO::Uncompress::Inflate> object on success and undef on failure.
502 The variable C<$InflateError> will contain an error message on failure.
504 If you are running Perl 5.005 or better the object, C<$z>, returned from
505 IO::Uncompress::Inflate can be used exactly like an L<IO::File|IO::File> filehandle.
506 This means that all normal input file operations can be carried out with
507 C<$z>. For example, to read a line from a compressed file/buffer you can
508 use either of these forms
510 $line = $z->getline();
513 The mandatory parameter C<$input> is used to determine the source of the
514 compressed data. This parameter can take one of three forms.
520 If the C<$input> parameter is a scalar, it is assumed to be a filename. This
521 file will be opened for reading and the compressed data will be read from it.
525 If the C<$input> parameter is a filehandle, the compressed data will be
527 The string '-' can be used as an alias for standard input.
530 =item A scalar reference
532 If C<$input> is a scalar reference, the compressed data will be read from
537 =head2 Constructor Options
540 The option names defined below are case insensitive and can be optionally
541 prefixed by a '-'. So all of the following are valid
548 OPTS is a combination of the following options:
552 =item -AutoClose =E<gt> 0|1
554 This option is only valid when the C<$input> parameter is a filehandle. If
555 specified, and the value is true, it will result in the file being closed once
556 either the C<close> method is called or the IO::Uncompress::Inflate object is
559 This parameter defaults to 0.
561 =item -MultiStream =E<gt> 0|1
565 Allows multiple concatenated compressed streams to be treated as a single
566 compressed stream. Decompression will stop once either the end of the
567 file/buffer is reached, an error is encountered (premature eof, corrupt
568 compressed data) or the end of a stream is not immediately followed by the
569 start of another stream.
571 This parameter defaults to 0.
575 =item -Prime =E<gt> $string
577 This option will uncompress the contents of C<$string> before processing the
580 This option can be useful when the compressed data is embedded in another
581 file/data structure and it is not possible to work out where the compressed
582 data begins without having to read the first few bytes. If this is the
583 case, the uncompression can be I<primed> with these bytes using this
586 =item -Transparent =E<gt> 0|1
588 If this option is set and the input file or buffer is not compressed data,
589 the module will allow reading of it anyway.
591 This option defaults to 1.
593 =item -BlockSize =E<gt> $num
595 When reading the compressed input data, IO::Uncompress::Inflate will read it in
596 blocks of C<$num> bytes.
598 This option defaults to 4096.
600 =item -InputLength =E<gt> $size
602 When present this option will limit the number of compressed bytes read
603 from the input file/buffer to C<$size>. This option can be used in the
604 situation where there is useful data directly after the compressed data
605 stream and you know beforehand the exact length of the compressed data
608 This option is mostly used when reading from a filehandle, in which case
609 the file pointer will be left pointing to the first byte directly after the
610 compressed data stream.
614 This option defaults to off.
616 =item -Append =E<gt> 0|1
618 This option controls what the C<read> method does with uncompressed data.
620 If set to 1, all uncompressed data will be appended to the output parameter
621 of the C<read> method.
623 If set to 0, the contents of the output parameter of the C<read> method
624 will be overwritten by the uncompressed data.
628 =item -Strict =E<gt> 0|1
632 This option controls whether the extra checks defined below are used when
633 carrying out the decompression. When Strict is on, the extra tests are
634 carried out, when Strict is off they are not.
636 The default for this option is off.
646 The ADLER32 checksum field must be present.
650 The value of the ADLER32 field read must match the adler32 value of the
651 uncompressed data actually contained in the file.
677 $status = $z->read($buffer)
679 Reads a block of compressed data (the size the the compressed block is
680 determined by the C<Buffer> option in the constructor), uncompresses it and
681 writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
682 set in the constructor, the uncompressed data will be appended to the
683 C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
685 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
686 or a negative number on error.
692 $status = $z->read($buffer, $length)
693 $status = $z->read($buffer, $length, $offset)
695 $status = read($z, $buffer, $length)
696 $status = read($z, $buffer, $length, $offset)
698 Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
700 The main difference between this form of the C<read> method and the
701 previous one, is that this one will attempt to return I<exactly> C<$length>
702 bytes. The only circumstances that this function will not is if end-of-file
703 or an IO error is encountered.
705 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
706 or a negative number on error.
713 $line = $z->getline()
718 This method fully supports the use of of the variable C<$/>
719 (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
720 determine what constitutes an end of line. Both paragraph mode and file
721 slurp mode are supported.
730 Read a single character.
736 $char = $z->ungetc($string)
744 $status = $z->inflateSync()
753 $hdr = $z->getHeaderInfo();
754 @hdrs = $z->getHeaderInfo();
756 This method returns either a hash reference (in scalar context) or a list
757 or hash references (in array context) that contains information about each
758 of the header fields in the compressed data stream(s).
770 Returns the uncompressed file offset.
781 Returns true if the end of the compressed input stream has been reached.
787 $z->seek($position, $whence);
788 seek($z, $position, $whence);
793 Provides a sub-set of the C<seek> functionality, with the restriction
794 that it is only legal to seek forward in the input file/buffer.
795 It is a fatal error to attempt to seek backward.
799 The C<$whence> parameter takes one the usual values, namely SEEK_SET,
800 SEEK_CUR or SEEK_END.
802 Returns 1 on success, 0 on failure.
811 This is a noop provided for completeness.
817 Returns true if the object currently refers to a opened file/buffer.
821 my $prev = $z->autoflush()
822 my $prev = $z->autoflush(EXPR)
824 If the C<$z> object is associated with a file or a filehandle, this method
825 returns the current autoflush setting for the underlying filehandle. If
826 C<EXPR> is present, and is non-zero, it will enable flushing after every
827 write/print operation.
829 If C<$z> is associated with a buffer, this method has no effect and always
832 B<Note> that the special variable C<$|> B<cannot> be used to set or
833 retrieve the autoflush setting.
835 =head2 input_line_number
837 $z->input_line_number()
838 $z->input_line_number(EXPR)
842 Returns the current uncompressed line number. If C<EXPR> is present it has
843 the effect of setting the line number. Note that setting the line number
844 does not change the current position within the file/buffer being read.
846 The contents of C<$/> are used to to determine what constitutes a line
856 If the C<$z> object is associated with a file or a filehandle, this method
857 will return the underlying file descriptor.
859 If the C<$z> object is is associated with a buffer, this method will
869 Closes the output file/buffer.
873 For most versions of Perl this method will be automatically invoked if
874 the IO::Uncompress::Inflate object is destroyed (either explicitly or by the
875 variable with the reference to the object going out of scope). The
876 exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
877 these cases, the C<close> method will be called automatically, but
878 not until global destruction of all live objects when the program is
881 Therefore, if you want your scripts to be able to run on all versions
882 of Perl, you should call C<close> explicitly and not rely on automatic
885 Returns true on success, otherwise 0.
887 If the C<AutoClose> option has been enabled when the IO::Uncompress::Inflate
888 object was created, and the object is associated with a file, the
889 underlying file will also be closed.
896 No symbolic constants are required by this IO::Uncompress::Inflate at present.
902 Imports C<inflate> and C<$InflateError>.
905 use IO::Uncompress::Inflate qw(inflate $InflateError) ;
916 L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
918 L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
920 L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
921 L<Archive::Tar|Archive::Tar>,
925 For RFC 1950, 1951 and 1952 see
926 F<http://www.faqs.org/rfcs/rfc1950.html>,
927 F<http://www.faqs.org/rfcs/rfc1951.html> and
928 F<http://www.faqs.org/rfcs/rfc1952.html>
930 The I<zlib> compression library was written by Jean-loup Gailly
931 F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
933 The primary site for the I<zlib> compression library is
934 F<http://www.zlib.org>.
936 The primary site for gzip is F<http://www.gzip.org>.
946 The I<IO::Uncompress::Inflate> module was written by Paul Marquess,
951 =head1 MODIFICATION HISTORY
953 See the Changes file.
955 =head1 COPYRIGHT AND LICENSE
958 Copyright (c) 2005-2006 Paul Marquess. All rights reserved.
960 This program is free software; you can redistribute it and/or
961 modify it under the same terms as Perl itself.