Move IO::Compress from ext/ to cpan/
[p5sagit/p5-mst-13.2.git] / cpan / IO-Compress / lib / IO / Uncompress / Gunzip.pm
CommitLineData
642e522c 1
2package IO::Uncompress::Gunzip ;
3
4require 5.004 ;
5
6# for RFC1952
7
8use strict ;
9use warnings;
a02d0f6f 10use bytes;
642e522c 11
10c2b2bb 12use IO::Uncompress::RawInflate 2.021 ;
1a6a8453 13
10c2b2bb 14use Compress::Raw::Zlib 2.021 qw( crc32 ) ;
15use IO::Compress::Base::Common 2.021 qw(:Status createSelfTiedObject);
16use IO::Compress::Gzip::Constants 2.021 ;
17use IO::Compress::Zlib::Extra 2.021 ;
1a6a8453 18
642e522c 19require Exporter ;
20
21our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $GunzipError);
22
1a6a8453 23@ISA = qw( Exporter IO::Uncompress::RawInflate );
642e522c 24@EXPORT_OK = qw( $GunzipError gunzip );
1a6a8453 25%EXPORT_TAGS = %IO::Uncompress::RawInflate::DEFLATE_CONSTANTS ;
642e522c 26push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
27Exporter::export_ok_tags('all');
28
642e522c 29$GunzipError = '';
30
10c2b2bb 31$VERSION = '2.021';
642e522c 32
1a6a8453 33sub new
642e522c 34{
1a6a8453 35 my $class = shift ;
36 $GunzipError = '';
37 my $obj = createSelfTiedObject($class, \$GunzipError);
642e522c 38
1a6a8453 39 $obj->_create(undef, 0, @_);
642e522c 40}
41
1a6a8453 42sub gunzip
642e522c 43{
1a6a8453 44 my $obj = createSelfTiedObject(undef, \$GunzipError);
45 return $obj->_inf(@_) ;
642e522c 46}
47
1a6a8453 48sub getExtraParams
642e522c 49{
10c2b2bb 50 use IO::Compress::Base::Common 2.021 qw(:Parse);
1a6a8453 51 return ( 'ParseExtra' => [1, 1, Parse_boolean, 0] ) ;
642e522c 52}
53
1a6a8453 54sub ckParams
642e522c 55{
1a6a8453 56 my $self = shift ;
57 my $got = shift ;
642e522c 58
1a6a8453 59 # gunzip always needs crc32
60 $got->value('CRC32' => 1);
642e522c 61
1a6a8453 62 return 1;
642e522c 63}
64
1a6a8453 65sub ckMagic
642e522c 66{
1a6a8453 67 my $self = shift;
642e522c 68
1a6a8453 69 my $magic ;
70 $self->smartReadExact(\$magic, GZIP_ID_SIZE);
642e522c 71
1a6a8453 72 *$self->{HeaderPending} = $magic ;
642e522c 73
1a6a8453 74 return $self->HeaderError("Minimum header size is " .
75 GZIP_MIN_HEADER_SIZE . " bytes")
76 if length $magic != GZIP_ID_SIZE ;
642e522c 77
1a6a8453 78 return $self->HeaderError("Bad Magic")
79 if ! isGzipMagic($magic) ;
642e522c 80
1a6a8453 81 *$self->{Type} = 'rfc1952';
642e522c 82
1a6a8453 83 return $magic ;
642e522c 84}
85
1a6a8453 86sub readHeader
642e522c 87{
1a6a8453 88 my $self = shift;
89 my $magic = shift;
642e522c 90
1a6a8453 91 return $self->_readGzipHeader($magic);
642e522c 92}
93
1a6a8453 94sub chkTrailer
642e522c 95{
1a6a8453 96 my $self = shift;
97 my $trailer = shift;
642e522c 98
1a6a8453 99 # Check CRC & ISIZE
100 my ($CRC32, $ISIZE) = unpack("V V", $trailer) ;
101 *$self->{Info}{CRC32} = $CRC32;
102 *$self->{Info}{ISIZE} = $ISIZE;
103
104 if (*$self->{Strict}) {
105 return $self->TrailerError("CRC mismatch")
106 if $CRC32 != *$self->{Uncomp}->crc32() ;
107
e7d45986 108 my $exp_isize = *$self->{UnCompSize}->get32bit();
1a6a8453 109 return $self->TrailerError("ISIZE mismatch. Got $ISIZE"
110 . ", expected $exp_isize")
111 if $ISIZE != $exp_isize ;
642e522c 112 }
113
a02d0f6f 114 return STATUS_OK;
1a6a8453 115}
642e522c 116
1a6a8453 117sub isGzipMagic
118{
119 my $buffer = shift ;
120 return 0 if length $buffer < GZIP_ID_SIZE ;
121 my ($id1, $id2) = unpack("C C", $buffer) ;
122 return $id1 == GZIP_ID1 && $id2 == GZIP_ID2 ;
642e522c 123}
124
1a6a8453 125sub _readFullGzipHeader($)
642e522c 126{
1a6a8453 127 my ($self) = @_ ;
128 my $magic = '' ;
642e522c 129
1a6a8453 130 $self->smartReadExact(\$magic, GZIP_ID_SIZE);
642e522c 131
1a6a8453 132 *$self->{HeaderPending} = $magic ;
642e522c 133
1a6a8453 134 return $self->HeaderError("Minimum header size is " .
135 GZIP_MIN_HEADER_SIZE . " bytes")
136 if length $magic != GZIP_ID_SIZE ;
642e522c 137
642e522c 138
1a6a8453 139 return $self->HeaderError("Bad Magic")
140 if ! isGzipMagic($magic) ;
642e522c 141
1a6a8453 142 my $status = $self->_readGzipHeader($magic);
143 delete *$self->{Transparent} if ! defined $status ;
144 return $status ;
642e522c 145}
146
1a6a8453 147sub _readGzipHeader($)
642e522c 148{
1a6a8453 149 my ($self, $magic) = @_ ;
150 my ($HeaderCRC) ;
151 my ($buffer) = '' ;
642e522c 152
1a6a8453 153 $self->smartReadExact(\$buffer, GZIP_MIN_HEADER_SIZE - GZIP_ID_SIZE)
154 or return $self->HeaderError("Minimum header size is " .
155 GZIP_MIN_HEADER_SIZE . " bytes") ;
642e522c 156
1a6a8453 157 my $keep = $magic . $buffer ;
158 *$self->{HeaderPending} = $keep ;
642e522c 159
1a6a8453 160 # now split out the various parts
161 my ($cm, $flag, $mtime, $xfl, $os) = unpack("C C V C C", $buffer) ;
642e522c 162
1a6a8453 163 $cm == GZIP_CM_DEFLATED
164 or return $self->HeaderError("Not Deflate (CM is $cm)") ;
642e522c 165
1a6a8453 166 # check for use of reserved bits
167 return $self->HeaderError("Use of Reserved Bits in FLG field.")
168 if $flag & GZIP_FLG_RESERVED ;
642e522c 169
1a6a8453 170 my $EXTRA ;
171 my @EXTRA = () ;
172 if ($flag & GZIP_FLG_FEXTRA) {
173 $EXTRA = "" ;
174 $self->smartReadExact(\$buffer, GZIP_FEXTRA_HEADER_SIZE)
175 or return $self->TruncatedHeader("FEXTRA Length") ;
642e522c 176
1a6a8453 177 my ($XLEN) = unpack("v", $buffer) ;
178 $self->smartReadExact(\$EXTRA, $XLEN)
179 or return $self->TruncatedHeader("FEXTRA Body");
180 $keep .= $buffer . $EXTRA ;
642e522c 181
1a6a8453 182 if ($XLEN && *$self->{'ParseExtra'}) {
c70c1701 183 my $bad = IO::Compress::Zlib::Extra::parseRawExtra($EXTRA,
184 \@EXTRA, 1, 1);
185 return $self->HeaderError($bad)
186 if defined $bad;
1a6a8453 187 }
188 }
642e522c 189
1a6a8453 190 my $origname ;
191 if ($flag & GZIP_FLG_FNAME) {
192 $origname = "" ;
193 while (1) {
194 $self->smartReadExact(\$buffer, 1)
195 or return $self->TruncatedHeader("FNAME");
196 last if $buffer eq GZIP_NULL_BYTE ;
197 $origname .= $buffer
198 }
199 $keep .= $origname . GZIP_NULL_BYTE ;
642e522c 200
1a6a8453 201 return $self->HeaderError("Non ISO 8859-1 Character found in Name")
202 if *$self->{Strict} && $origname =~ /$GZIP_FNAME_INVALID_CHAR_RE/o ;
203 }
642e522c 204
1a6a8453 205 my $comment ;
206 if ($flag & GZIP_FLG_FCOMMENT) {
207 $comment = "";
208 while (1) {
209 $self->smartReadExact(\$buffer, 1)
210 or return $self->TruncatedHeader("FCOMMENT");
211 last if $buffer eq GZIP_NULL_BYTE ;
212 $comment .= $buffer
213 }
214 $keep .= $comment . GZIP_NULL_BYTE ;
642e522c 215
1a6a8453 216 return $self->HeaderError("Non ISO 8859-1 Character found in Comment")
217 if *$self->{Strict} && $comment =~ /$GZIP_FCOMMENT_INVALID_CHAR_RE/o ;
218 }
642e522c 219
1a6a8453 220 if ($flag & GZIP_FLG_FHCRC) {
221 $self->smartReadExact(\$buffer, GZIP_FHCRC_SIZE)
222 or return $self->TruncatedHeader("FHCRC");
642e522c 223
1a6a8453 224 $HeaderCRC = unpack("v", $buffer) ;
225 my $crc16 = crc32($keep) & 0xFF ;
642e522c 226
1a6a8453 227 return $self->HeaderError("CRC16 mismatch.")
228 if *$self->{Strict} && $crc16 != $HeaderCRC;
642e522c 229
1a6a8453 230 $keep .= $buffer ;
231 }
642e522c 232
1a6a8453 233 # Assume compression method is deflated for xfl tests
234 #if ($xfl) {
235 #}
642e522c 236
1a6a8453 237 *$self->{Type} = 'rfc1952';
642e522c 238
1a6a8453 239 return {
240 'Type' => 'rfc1952',
241 'FingerprintLength' => 2,
242 'HeaderLength' => length $keep,
243 'TrailerLength' => GZIP_TRAILER_SIZE,
244 'Header' => $keep,
245 'isMinimalHeader' => $keep eq GZIP_MINIMUM_HEADER ? 1 : 0,
642e522c 246
1a6a8453 247 'MethodID' => $cm,
248 'MethodName' => $cm == GZIP_CM_DEFLATED ? "Deflated" : "Unknown" ,
249 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0,
250 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
251 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0,
252 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
253 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
254 'Name' => $origname,
255 'Comment' => $comment,
256 'Time' => $mtime,
257 'OsID' => $os,
258 'OsName' => defined $GZIP_OS_Names{$os}
259 ? $GZIP_OS_Names{$os} : "Unknown",
260 'HeaderCRC' => $HeaderCRC,
261 'Flags' => $flag,
262 'ExtraFlags' => $xfl,
263 'ExtraFieldRaw' => $EXTRA,
264 'ExtraField' => [ @EXTRA ],
642e522c 265
642e522c 266
1a6a8453 267 #'CompSize'=> $compsize,
268 #'CRC32'=> $CRC32,
269 #'OrigSize'=> $ISIZE,
270 }
642e522c 271}
272
273
1a6a8453 2741;
642e522c 275
642e522c 276__END__
277
278
279=head1 NAME
280
cb7abd7f 281IO::Uncompress::Gunzip - Read RFC 1952 files/buffers
282
642e522c 283=head1 SYNOPSIS
284
285 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
286
287 my $status = gunzip $input => $output [,OPTS]
288 or die "gunzip failed: $GunzipError\n";
289
290 my $z = new IO::Uncompress::Gunzip $input [OPTS]
291 or die "gunzip failed: $GunzipError\n";
292
293 $status = $z->read($buffer)
294 $status = $z->read($buffer, $length)
295 $status = $z->read($buffer, $length, $offset)
296 $line = $z->getline()
297 $char = $z->getc()
298 $char = $z->ungetc()
a02d0f6f 299 $char = $z->opened()
300
642e522c 301 $status = $z->inflateSync()
a02d0f6f 302
e7d45986 303 $data = $z->trailingData()
304 $status = $z->nextStream()
642e522c 305 $data = $z->getHeaderInfo()
306 $z->tell()
307 $z->seek($position, $whence)
308 $z->binmode()
309 $z->fileno()
310 $z->eof()
311 $z->close()
312
313 $GunzipError ;
314
315 # IO::File mode
316
317 <$z>
318 read($z, $buffer);
319 read($z, $buffer, $length);
320 read($z, $buffer, $length, $offset);
321 tell($z)
322 seek($z, $position, $whence)
323 binmode($z)
324 fileno($z)
325 eof($z)
326 close($z)
327
642e522c 328=head1 DESCRIPTION
329
1a6a8453 330This module provides a Perl interface that allows the reading of
642e522c 331files/buffers that conform to RFC 1952.
332
1a6a8453 333For writing RFC 1952 files/buffers, see the companion module IO::Compress::Gzip.
642e522c 334
642e522c 335=head1 Functional Interface
336
1a6a8453 337A top-level function, C<gunzip>, is provided to carry out
338"one-shot" uncompression between buffers and/or files. For finer
339control over the uncompression process, see the L</"OO Interface">
340section.
642e522c 341
342 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
343
344 gunzip $input => $output [,OPTS]
345 or die "gunzip failed: $GunzipError\n";
346
642e522c 347The functional interface needs Perl5.005 or better.
348
642e522c 349=head2 gunzip $input => $output [, OPTS]
350
1a6a8453 351C<gunzip> expects at least two parameters, C<$input> and C<$output>.
642e522c 352
353=head3 The C<$input> parameter
354
355The parameter, C<$input>, is used to define the source of
356the compressed data.
357
358It can take one of the following forms:
359
360=over 5
361
362=item A filename
363
364If the C<$input> parameter is a simple scalar, it is assumed to be a
365filename. This file will be opened for reading and the input data
366will be read from it.
367
368=item A filehandle
369
370If the C<$input> parameter is a filehandle, the input data will be
371read from it.
372The string '-' can be used as an alias for standard input.
373
374=item A scalar reference
375
376If C<$input> is a scalar reference, the input data will be read
377from C<$$input>.
378
379=item An array reference
380
1a6a8453 381If C<$input> is an array reference, each element in the array must be a
382filename.
383
384The input data will be read from each file in turn.
385
642e522c 386The complete array will be walked to ensure that it only
1a6a8453 387contains valid filenames before any data is uncompressed.
388
642e522c 389=item An Input FileGlob string
390
391If C<$input> is a string that is delimited by the characters "<" and ">"
392C<gunzip> will assume that it is an I<input fileglob string>. The
393input is the list of files that match the fileglob.
394
395If the fileglob does not match any files ...
396
397See L<File::GlobMapper|File::GlobMapper> for more details.
398
642e522c 399=back
400
401If the C<$input> parameter is any other type, C<undef> will be returned.
402
642e522c 403=head3 The C<$output> parameter
404
405The parameter C<$output> is used to control the destination of the
406uncompressed data. This parameter can take one of these forms.
407
408=over 5
409
410=item A filename
411
1a6a8453 412If the C<$output> parameter is a simple scalar, it is assumed to be a
413filename. This file will be opened for writing and the uncompressed
414data will be written to it.
642e522c 415
416=item A filehandle
417
1a6a8453 418If the C<$output> parameter is a filehandle, the uncompressed data
419will be written to it.
642e522c 420The string '-' can be used as an alias for standard output.
421
642e522c 422=item A scalar reference
423
1a6a8453 424If C<$output> is a scalar reference, the uncompressed data will be
425stored in C<$$output>.
642e522c 426
642e522c 427=item An Array Reference
428
1a6a8453 429If C<$output> is an array reference, the uncompressed data will be
430pushed onto the array.
642e522c 431
432=item An Output FileGlob
433
434If C<$output> is a string that is delimited by the characters "<" and ">"
435C<gunzip> will assume that it is an I<output fileglob string>. The
436output is the list of files that match the fileglob.
437
438When C<$output> is an fileglob string, C<$input> must also be a fileglob
439string. Anything else is an error.
440
441=back
442
443If the C<$output> parameter is any other type, C<undef> will be returned.
444
642e522c 445=head2 Notes
446
c70c1701 447When C<$input> maps to multiple compressed files/buffers and C<$output> is
448a single file/buffer, after uncompression C<$output> will contain a
449concatenation of all the uncompressed data from each of the input
450files/buffers.
451
642e522c 452=head2 Optional Parameters
453
454Unless specified below, the optional parameters for C<gunzip>,
455C<OPTS>, are the same as those used with the OO interface defined in the
456L</"Constructor Options"> section below.
457
458=over 5
459
e7d45986 460=item C<< AutoClose => 0|1 >>
642e522c 461
1a6a8453 462This option applies to any input or output data streams to
463C<gunzip> that are filehandles.
642e522c 464
465If C<AutoClose> is specified, and the value is true, it will result in all
466input and/or output filehandles being closed once C<gunzip> has
467completed.
468
469This parameter defaults to 0.
470
e7d45986 471=item C<< BinModeOut => 0|1 >>
1a6a8453 472
473When writing to a file or filehandle, set C<binmode> before writing to the
474file.
475
476Defaults to 0.
477
e7d45986 478=item C<< Append => 0|1 >>
642e522c 479
480TODO
481
e7d45986 482=item C<< MultiStream => 0|1 >>
1a6a8453 483
e7d45986 484If the input file/buffer contains multiple compressed data streams, this
485option will uncompress the whole lot as a single data stream.
1a6a8453 486
e7d45986 487Defaults to 0.
1a6a8453 488
258133d1 489=item C<< TrailingData => $scalar >>
490
491Returns the data, if any, that is present immediately after the compressed
492data stream once uncompression is complete.
493
494This option can be used when there is useful information immediately
495following the compressed data stream, and you don't know the length of the
496compressed data stream.
497
498If the input is a buffer, C<trailingData> will return everything from the
499end of the compressed data stream to the end of the buffer.
500
501If the input is a filehandle, C<trailingData> will return the data that is
502left in the filehandle input buffer once the end of the compressed data
503stream has been reached. You can then use the filehandle to read the rest
504of the input file.
505
506Don't bother using C<trailingData> if the input is a filename.
507
258133d1 508If you know the length of the compressed data stream before you start
509uncompressing, you can avoid having to use C<trailingData> by setting the
510C<InputLength> option.
511
642e522c 512=back
513
642e522c 514=head2 Examples
515
516To read the contents of the file C<file1.txt.gz> and write the
10c2b2bb 517uncompressed data to the file C<file1.txt>.
642e522c 518
519 use strict ;
520 use warnings ;
521 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
522
523 my $input = "file1.txt.gz";
524 my $output = "file1.txt";
525 gunzip $input => $output
526 or die "gunzip failed: $GunzipError\n";
527
642e522c 528To read from an existing Perl filehandle, C<$input>, and write the
529uncompressed data to a buffer, C<$buffer>.
530
531 use strict ;
532 use warnings ;
533 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
534 use IO::File ;
535
536 my $input = new IO::File "<file1.txt.gz"
537 or die "Cannot open 'file1.txt.gz': $!\n" ;
538 my $buffer ;
539 gunzip $input => \$buffer
540 or die "gunzip failed: $GunzipError\n";
541
542To uncompress all files in the directory "/my/home" that match "*.txt.gz" and store the compressed data in the same directory
543
544 use strict ;
545 use warnings ;
546 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
547
548 gunzip '</my/home/*.txt.gz>' => '</my/home/#1.txt>'
549 or die "gunzip failed: $GunzipError\n";
550
551and if you want to compress each file one at a time, this will do the trick
552
553 use strict ;
554 use warnings ;
555 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
556
557 for my $input ( glob "/my/home/*.txt.gz" )
558 {
559 my $output = $input;
560 $output =~ s/.gz// ;
561 gunzip $input => $output
562 or die "Error compressing '$input': $GunzipError\n";
563 }
564
565=head1 OO Interface
566
567=head2 Constructor
568
569The format of the constructor for IO::Uncompress::Gunzip is shown below
570
642e522c 571 my $z = new IO::Uncompress::Gunzip $input [OPTS]
572 or die "IO::Uncompress::Gunzip failed: $GunzipError\n";
573
574Returns an C<IO::Uncompress::Gunzip> object on success and undef on failure.
575The variable C<$GunzipError> will contain an error message on failure.
576
1a6a8453 577If you are running Perl 5.005 or better the object, C<$z>, returned from
578IO::Uncompress::Gunzip can be used exactly like an L<IO::File|IO::File> filehandle.
579This means that all normal input file operations can be carried out with
580C<$z>. For example, to read a line from a compressed file/buffer you can
581use either of these forms
642e522c 582
583 $line = $z->getline();
584 $line = <$z>;
585
586The mandatory parameter C<$input> is used to determine the source of the
587compressed data. This parameter can take one of three forms.
588
589=over 5
590
591=item A filename
592
593If the C<$input> parameter is a scalar, it is assumed to be a filename. This
594file will be opened for reading and the compressed data will be read from it.
595
596=item A filehandle
597
598If the C<$input> parameter is a filehandle, the compressed data will be
599read from it.
600The string '-' can be used as an alias for standard input.
601
642e522c 602=item A scalar reference
603
604If C<$input> is a scalar reference, the compressed data will be read from
605C<$$output>.
606
607=back
608
609=head2 Constructor Options
610
642e522c 611The option names defined below are case insensitive and can be optionally
612prefixed by a '-'. So all of the following are valid
613
614 -AutoClose
615 -autoclose
616 AUTOCLOSE
617 autoclose
618
619OPTS is a combination of the following options:
620
621=over 5
622
e7d45986 623=item C<< AutoClose => 0|1 >>
642e522c 624
625This option is only valid when the C<$input> parameter is a filehandle. If
626specified, and the value is true, it will result in the file being closed once
627either the C<close> method is called or the IO::Uncompress::Gunzip object is
628destroyed.
629
630This parameter defaults to 0.
631
e7d45986 632=item C<< MultiStream => 0|1 >>
642e522c 633
642e522c 634Allows multiple concatenated compressed streams to be treated as a single
635compressed stream. Decompression will stop once either the end of the
636file/buffer is reached, an error is encountered (premature eof, corrupt
637compressed data) or the end of a stream is not immediately followed by the
638start of another stream.
639
640This parameter defaults to 0.
641
e7d45986 642=item C<< Prime => $string >>
642e522c 643
644This option will uncompress the contents of C<$string> before processing the
645input file/buffer.
646
647This option can be useful when the compressed data is embedded in another
648file/data structure and it is not possible to work out where the compressed
1a6a8453 649data begins without having to read the first few bytes. If this is the
650case, the uncompression can be I<primed> with these bytes using this
651option.
642e522c 652
e7d45986 653=item C<< Transparent => 0|1 >>
642e522c 654
f6fd7794 655If this option is set and the input file/buffer is not compressed data,
642e522c 656the module will allow reading of it anyway.
657
f6fd7794 658In addition, if the input file/buffer does contain compressed data and
659there is non-compressed data immediately following it, setting this option
660will make this module treat the whole file/bufffer as a single data stream.
661
642e522c 662This option defaults to 1.
663
e7d45986 664=item C<< BlockSize => $num >>
642e522c 665
1a6a8453 666When reading the compressed input data, IO::Uncompress::Gunzip will read it in
667blocks of C<$num> bytes.
642e522c 668
669This option defaults to 4096.
670
e7d45986 671=item C<< InputLength => $size >>
642e522c 672
1a6a8453 673When present this option will limit the number of compressed bytes read
674from the input file/buffer to C<$size>. This option can be used in the
675situation where there is useful data directly after the compressed data
676stream and you know beforehand the exact length of the compressed data
677stream.
642e522c 678
1a6a8453 679This option is mostly used when reading from a filehandle, in which case
680the file pointer will be left pointing to the first byte directly after the
642e522c 681compressed data stream.
682
642e522c 683This option defaults to off.
684
e7d45986 685=item C<< Append => 0|1 >>
642e522c 686
687This option controls what the C<read> method does with uncompressed data.
688
1a6a8453 689If set to 1, all uncompressed data will be appended to the output parameter
690of the C<read> method.
642e522c 691
1a6a8453 692If set to 0, the contents of the output parameter of the C<read> method
693will be overwritten by the uncompressed data.
642e522c 694
695Defaults to 0.
696
e7d45986 697=item C<< Strict => 0|1 >>
642e522c 698
642e522c 699This option controls whether the extra checks defined below are used when
1a6a8453 700carrying out the decompression. When Strict is on, the extra tests are
701carried out, when Strict is off they are not.
642e522c 702
703The default for this option is off.
704
642e522c 705=over 5
706
707=item 1
708
709If the FHCRC bit is set in the gzip FLG header byte, the CRC16 bytes in the
710header must match the crc16 value of the gzip header actually read.
711
712=item 2
713
714If the gzip header contains a name field (FNAME) it consists solely of ISO
7158859-1 characters.
716
717=item 3
718
1a6a8453 719If the gzip header contains a comment field (FCOMMENT) it consists solely
720of ISO 8859-1 characters plus line-feed.
642e522c 721
722=item 4
723
724If the gzip FEXTRA header field is present it must conform to the sub-field
a02d0f6f 725structure as defined in RFC 1952.
642e522c 726
727=item 5
728
729The CRC32 and ISIZE trailer fields must be present.
730
731=item 6
732
733The value of the CRC32 field read must match the crc32 value of the
734uncompressed data actually contained in the gzip file.
735
736=item 7
737
1a6a8453 738The value of the ISIZE fields read must match the length of the
739uncompressed data actually read from the file.
642e522c 740
741=back
742
258133d1 743=item C<< ParseExtra => 0|1 >>
642e522c 744If the gzip FEXTRA header field is present and this option is set, it will
745force the module to check that it conforms to the sub-field structure as
a02d0f6f 746defined in RFC 1952.
642e522c 747
748If the C<Strict> is on it will automatically enable this option.
749
750Defaults to 0.
751
642e522c 752=back
753
754=head2 Examples
755
756TODO
757
758=head1 Methods
759
760=head2 read
761
762Usage is
763
764 $status = $z->read($buffer)
765
766Reads a block of compressed data (the size the the compressed block is
767determined by the C<Buffer> option in the constructor), uncompresses it and
1a6a8453 768writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
769set in the constructor, the uncompressed data will be appended to the
770C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
642e522c 771
1a6a8453 772Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
773or a negative number on error.
642e522c 774
775=head2 read
776
777Usage is
778
779 $status = $z->read($buffer, $length)
780 $status = $z->read($buffer, $length, $offset)
781
782 $status = read($z, $buffer, $length)
783 $status = read($z, $buffer, $length, $offset)
784
785Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
786
1a6a8453 787The main difference between this form of the C<read> method and the
788previous one, is that this one will attempt to return I<exactly> C<$length>
789bytes. The only circumstances that this function will not is if end-of-file
790or an IO error is encountered.
642e522c 791
1a6a8453 792Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
793or a negative number on error.
642e522c 794
642e522c 795=head2 getline
796
797Usage is
798
799 $line = $z->getline()
800 $line = <$z>
801
802Reads a single line.
803
258133d1 804This method fully supports the use of of the variable C<$/> (or
805C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
806determine what constitutes an end of line. Paragraph mode, record mode and
807file slurp mode are all supported.
642e522c 808
642e522c 809=head2 getc
810
811Usage is
812
813 $char = $z->getc()
814
815Read a single character.
816
817=head2 ungetc
818
819Usage is
820
821 $char = $z->ungetc($string)
822
642e522c 823=head2 inflateSync
824
825Usage is
826
827 $status = $z->inflateSync()
828
829TODO
830
831=head2 getHeaderInfo
832
833Usage is
834
1a6a8453 835 $hdr = $z->getHeaderInfo();
836 @hdrs = $z->getHeaderInfo();
642e522c 837
1a6a8453 838This method returns either a hash reference (in scalar context) or a list
839or hash references (in array context) that contains information about each
840of the header fields in the compressed data stream(s).
642e522c 841
1a6a8453 842=over 5
642e522c 843
1a6a8453 844=item Name
642e522c 845
1a6a8453 846The contents of the Name header field, if present. If no name is
847present, the value will be undef. Note this is different from a zero length
848name, which will return an empty string.
642e522c 849
850=item Comment
851
1a6a8453 852The contents of the Comment header field, if present. If no comment is
853present, the value will be undef. Note this is different from a zero length
854comment, which will return an empty string.
642e522c 855
856=back
857
642e522c 858=head2 tell
859
860Usage is
861
862 $z->tell()
863 tell $z
864
865Returns the uncompressed file offset.
866
867=head2 eof
868
869Usage is
870
871 $z->eof();
872 eof($z);
873
642e522c 874Returns true if the end of the compressed input stream has been reached.
875
642e522c 876=head2 seek
877
878 $z->seek($position, $whence);
879 seek($z, $position, $whence);
880
642e522c 881Provides a sub-set of the C<seek> functionality, with the restriction
882that it is only legal to seek forward in the input file/buffer.
883It is a fatal error to attempt to seek backward.
884
642e522c 885The C<$whence> parameter takes one the usual values, namely SEEK_SET,
886SEEK_CUR or SEEK_END.
887
888Returns 1 on success, 0 on failure.
889
890=head2 binmode
891
892Usage is
893
894 $z->binmode
895 binmode $z ;
896
897This is a noop provided for completeness.
898
a02d0f6f 899=head2 opened
900
901 $z->opened()
902
903Returns true if the object currently refers to a opened file/buffer.
904
905=head2 autoflush
906
907 my $prev = $z->autoflush()
908 my $prev = $z->autoflush(EXPR)
909
910If the C<$z> object is associated with a file or a filehandle, this method
911returns the current autoflush setting for the underlying filehandle. If
912C<EXPR> is present, and is non-zero, it will enable flushing after every
913write/print operation.
914
915If C<$z> is associated with a buffer, this method has no effect and always
916returns C<undef>.
917
918B<Note> that the special variable C<$|> B<cannot> be used to set or
919retrieve the autoflush setting.
920
921=head2 input_line_number
922
923 $z->input_line_number()
924 $z->input_line_number(EXPR)
925
a02d0f6f 926Returns the current uncompressed line number. If C<EXPR> is present it has
927the effect of setting the line number. Note that setting the line number
928does not change the current position within the file/buffer being read.
929
930The contents of C<$/> are used to to determine what constitutes a line
931terminator.
932
642e522c 933=head2 fileno
934
935 $z->fileno()
936 fileno($z)
937
d54256af 938If the C<$z> object is associated with a file or a filehandle, C<fileno>
939will return the underlying file descriptor. Once the C<close> method is
940called C<fileno> will return C<undef>.
642e522c 941
d54256af 942If the C<$z> object is is associated with a buffer, this method will return
943C<undef>.
642e522c 944
945=head2 close
946
947 $z->close() ;
948 close $z ;
949
642e522c 950Closes the output file/buffer.
951
642e522c 952For most versions of Perl this method will be automatically invoked if
953the IO::Uncompress::Gunzip object is destroyed (either explicitly or by the
954variable with the reference to the object going out of scope). The
955exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
956these cases, the C<close> method will be called automatically, but
957not until global destruction of all live objects when the program is
958terminating.
959
960Therefore, if you want your scripts to be able to run on all versions
961of Perl, you should call C<close> explicitly and not rely on automatic
962closing.
963
964Returns true on success, otherwise 0.
965
966If the C<AutoClose> option has been enabled when the IO::Uncompress::Gunzip
967object was created, and the object is associated with a file, the
968underlying file will also be closed.
969
e7d45986 970=head2 nextStream
971
972Usage is
973
974 my $status = $z->nextStream();
975
976Skips to the next compressed data stream in the input file/buffer. If a new
258133d1 977compressed data stream is found, the eof marker will be cleared and C<$.>
978will be reset to 0.
e7d45986 979
980Returns 1 if a new stream was found, 0 if none was found, and -1 if an
981error was encountered.
982
983=head2 trailingData
984
985Usage is
986
987 my $data = $z->trailingData();
988
258133d1 989Returns the data, if any, that is present immediately after the compressed
990data stream once uncompression is complete. It only makes sense to call
991this method once the end of the compressed data stream has been
992encountered.
993
994This option can be used when there is useful information immediately
995following the compressed data stream, and you don't know the length of the
996compressed data stream.
997
998If the input is a buffer, C<trailingData> will return everything from the
999end of the compressed data stream to the end of the buffer.
1000
1001If the input is a filehandle, C<trailingData> will return the data that is
1002left in the filehandle input buffer once the end of the compressed data
1003stream has been reached. You can then use the filehandle to read the rest
1004of the input file.
1005
1006Don't bother using C<trailingData> if the input is a filename.
1007
258133d1 1008If you know the length of the compressed data stream before you start
1009uncompressing, you can avoid having to use C<trailingData> by setting the
1010C<InputLength> option in the constructor.
e7d45986 1011
642e522c 1012=head1 Importing
1013
1014No symbolic constants are required by this IO::Uncompress::Gunzip at present.
1015
1016=over 5
1017
1018=item :all
1019
1020Imports C<gunzip> and C<$GunzipError>.
1021Same as doing this
1022
1023 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
1024
1025=back
1026
1027=head1 EXAMPLES
1028
d54256af 1029=head2 Working with Net::FTP
642e522c 1030
d54256af 1031See L<IO::Uncompress::Gunzip::FAQ|IO::Uncompress::Gunzip::FAQ/"Compressed files and Net::FTP">
642e522c 1032
1033=head1 SEE ALSO
1034
258133d1 1035L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
642e522c 1036
1037L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
1038
a02d0f6f 1039L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1040L<Archive::Tar|Archive::Tar>,
642e522c 1041L<IO::Zlib|IO::Zlib>
1042
1043For RFC 1950, 1951 and 1952 see
1044F<http://www.faqs.org/rfcs/rfc1950.html>,
1045F<http://www.faqs.org/rfcs/rfc1951.html> and
1046F<http://www.faqs.org/rfcs/rfc1952.html>
1047
a02d0f6f 1048The I<zlib> compression library was written by Jean-loup Gailly
1049F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
1050
1051The primary site for the I<zlib> compression library is
1052F<http://www.zlib.org>.
1053
1054The primary site for gzip is F<http://www.gzip.org>.
1055
642e522c 1056=head1 AUTHOR
1057
cb7abd7f 1058This module was written by Paul Marquess, F<pmqs@cpan.org>.
642e522c 1059
642e522c 1060=head1 MODIFICATION HISTORY
1061
1062See the Changes file.
1063
1064=head1 COPYRIGHT AND LICENSE
642e522c 1065
319fab50 1066Copyright (c) 2005-2009 Paul Marquess. All rights reserved.
a02d0f6f 1067
642e522c 1068This program is free software; you can redistribute it and/or
1069modify it under the same terms as Perl itself.
1070