IO::Compress modules
[p5sagit/p5-mst-13.2.git] / ext / Compress / IO / Zlib / lib / IO / Uncompress / Gunzip.pm
CommitLineData
642e522c 1
2package IO::Uncompress::Gunzip ;
3
4require 5.004 ;
5
6# for RFC1952
7
8use strict ;
9use warnings;
a02d0f6f 10use bytes;
642e522c 11
1a6a8453 12use IO::Uncompress::RawInflate ;
13
a02d0f6f 14use Compress::Raw::Zlib qw( crc32 ) ;
15use IO::Compress::Base::Common qw(:Status createSelfTiedObject);
16use IO::Compress::Gzip::Constants;
c70c1701 17use IO::Compress::Zlib::Extra;
1a6a8453 18
642e522c 19require Exporter ;
20
21our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $GunzipError);
22
1a6a8453 23@ISA = qw( Exporter IO::Uncompress::RawInflate );
642e522c 24@EXPORT_OK = qw( $GunzipError gunzip );
1a6a8453 25%EXPORT_TAGS = %IO::Uncompress::RawInflate::DEFLATE_CONSTANTS ;
642e522c 26push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
27Exporter::export_ok_tags('all');
28
642e522c 29$GunzipError = '';
30
258133d1 31$VERSION = '2.000_14';
642e522c 32
1a6a8453 33sub new
642e522c 34{
1a6a8453 35 my $class = shift ;
36 $GunzipError = '';
37 my $obj = createSelfTiedObject($class, \$GunzipError);
642e522c 38
1a6a8453 39 $obj->_create(undef, 0, @_);
642e522c 40}
41
1a6a8453 42sub gunzip
642e522c 43{
1a6a8453 44 my $obj = createSelfTiedObject(undef, \$GunzipError);
45 return $obj->_inf(@_) ;
642e522c 46}
47
1a6a8453 48sub getExtraParams
642e522c 49{
a02d0f6f 50 use IO::Compress::Base::Common qw(:Parse);
1a6a8453 51 return ( 'ParseExtra' => [1, 1, Parse_boolean, 0] ) ;
642e522c 52}
53
1a6a8453 54sub ckParams
642e522c 55{
1a6a8453 56 my $self = shift ;
57 my $got = shift ;
642e522c 58
1a6a8453 59 # gunzip always needs crc32
60 $got->value('CRC32' => 1);
642e522c 61
1a6a8453 62 return 1;
642e522c 63}
64
1a6a8453 65sub ckMagic
642e522c 66{
1a6a8453 67 my $self = shift;
642e522c 68
1a6a8453 69 my $magic ;
70 $self->smartReadExact(\$magic, GZIP_ID_SIZE);
642e522c 71
1a6a8453 72 *$self->{HeaderPending} = $magic ;
642e522c 73
1a6a8453 74 return $self->HeaderError("Minimum header size is " .
75 GZIP_MIN_HEADER_SIZE . " bytes")
76 if length $magic != GZIP_ID_SIZE ;
642e522c 77
1a6a8453 78 return $self->HeaderError("Bad Magic")
79 if ! isGzipMagic($magic) ;
642e522c 80
1a6a8453 81 *$self->{Type} = 'rfc1952';
642e522c 82
1a6a8453 83 return $magic ;
642e522c 84}
85
1a6a8453 86sub readHeader
642e522c 87{
1a6a8453 88 my $self = shift;
89 my $magic = shift;
642e522c 90
1a6a8453 91 return $self->_readGzipHeader($magic);
642e522c 92}
93
1a6a8453 94sub chkTrailer
642e522c 95{
1a6a8453 96 my $self = shift;
97 my $trailer = shift;
642e522c 98
1a6a8453 99 # Check CRC & ISIZE
100 my ($CRC32, $ISIZE) = unpack("V V", $trailer) ;
101 *$self->{Info}{CRC32} = $CRC32;
102 *$self->{Info}{ISIZE} = $ISIZE;
103
104 if (*$self->{Strict}) {
105 return $self->TrailerError("CRC mismatch")
106 if $CRC32 != *$self->{Uncomp}->crc32() ;
107
e7d45986 108 my $exp_isize = *$self->{UnCompSize}->get32bit();
1a6a8453 109 return $self->TrailerError("ISIZE mismatch. Got $ISIZE"
110 . ", expected $exp_isize")
111 if $ISIZE != $exp_isize ;
642e522c 112 }
113
a02d0f6f 114 return STATUS_OK;
1a6a8453 115}
642e522c 116
1a6a8453 117sub isGzipMagic
118{
119 my $buffer = shift ;
120 return 0 if length $buffer < GZIP_ID_SIZE ;
121 my ($id1, $id2) = unpack("C C", $buffer) ;
122 return $id1 == GZIP_ID1 && $id2 == GZIP_ID2 ;
642e522c 123}
124
1a6a8453 125sub _readFullGzipHeader($)
642e522c 126{
1a6a8453 127 my ($self) = @_ ;
128 my $magic = '' ;
642e522c 129
1a6a8453 130 $self->smartReadExact(\$magic, GZIP_ID_SIZE);
642e522c 131
1a6a8453 132 *$self->{HeaderPending} = $magic ;
642e522c 133
1a6a8453 134 return $self->HeaderError("Minimum header size is " .
135 GZIP_MIN_HEADER_SIZE . " bytes")
136 if length $magic != GZIP_ID_SIZE ;
642e522c 137
642e522c 138
1a6a8453 139 return $self->HeaderError("Bad Magic")
140 if ! isGzipMagic($magic) ;
642e522c 141
1a6a8453 142 my $status = $self->_readGzipHeader($magic);
143 delete *$self->{Transparent} if ! defined $status ;
144 return $status ;
642e522c 145}
146
1a6a8453 147sub _readGzipHeader($)
642e522c 148{
1a6a8453 149 my ($self, $magic) = @_ ;
150 my ($HeaderCRC) ;
151 my ($buffer) = '' ;
642e522c 152
1a6a8453 153 $self->smartReadExact(\$buffer, GZIP_MIN_HEADER_SIZE - GZIP_ID_SIZE)
154 or return $self->HeaderError("Minimum header size is " .
155 GZIP_MIN_HEADER_SIZE . " bytes") ;
642e522c 156
1a6a8453 157 my $keep = $magic . $buffer ;
158 *$self->{HeaderPending} = $keep ;
642e522c 159
1a6a8453 160 # now split out the various parts
161 my ($cm, $flag, $mtime, $xfl, $os) = unpack("C C V C C", $buffer) ;
642e522c 162
1a6a8453 163 $cm == GZIP_CM_DEFLATED
164 or return $self->HeaderError("Not Deflate (CM is $cm)") ;
642e522c 165
1a6a8453 166 # check for use of reserved bits
167 return $self->HeaderError("Use of Reserved Bits in FLG field.")
168 if $flag & GZIP_FLG_RESERVED ;
642e522c 169
1a6a8453 170 my $EXTRA ;
171 my @EXTRA = () ;
172 if ($flag & GZIP_FLG_FEXTRA) {
173 $EXTRA = "" ;
174 $self->smartReadExact(\$buffer, GZIP_FEXTRA_HEADER_SIZE)
175 or return $self->TruncatedHeader("FEXTRA Length") ;
642e522c 176
1a6a8453 177 my ($XLEN) = unpack("v", $buffer) ;
178 $self->smartReadExact(\$EXTRA, $XLEN)
179 or return $self->TruncatedHeader("FEXTRA Body");
180 $keep .= $buffer . $EXTRA ;
642e522c 181
1a6a8453 182 if ($XLEN && *$self->{'ParseExtra'}) {
c70c1701 183 my $bad = IO::Compress::Zlib::Extra::parseRawExtra($EXTRA,
184 \@EXTRA, 1, 1);
185 return $self->HeaderError($bad)
186 if defined $bad;
1a6a8453 187 }
188 }
642e522c 189
1a6a8453 190 my $origname ;
191 if ($flag & GZIP_FLG_FNAME) {
192 $origname = "" ;
193 while (1) {
194 $self->smartReadExact(\$buffer, 1)
195 or return $self->TruncatedHeader("FNAME");
196 last if $buffer eq GZIP_NULL_BYTE ;
197 $origname .= $buffer
198 }
199 $keep .= $origname . GZIP_NULL_BYTE ;
642e522c 200
1a6a8453 201 return $self->HeaderError("Non ISO 8859-1 Character found in Name")
202 if *$self->{Strict} && $origname =~ /$GZIP_FNAME_INVALID_CHAR_RE/o ;
203 }
642e522c 204
1a6a8453 205 my $comment ;
206 if ($flag & GZIP_FLG_FCOMMENT) {
207 $comment = "";
208 while (1) {
209 $self->smartReadExact(\$buffer, 1)
210 or return $self->TruncatedHeader("FCOMMENT");
211 last if $buffer eq GZIP_NULL_BYTE ;
212 $comment .= $buffer
213 }
214 $keep .= $comment . GZIP_NULL_BYTE ;
642e522c 215
1a6a8453 216 return $self->HeaderError("Non ISO 8859-1 Character found in Comment")
217 if *$self->{Strict} && $comment =~ /$GZIP_FCOMMENT_INVALID_CHAR_RE/o ;
218 }
642e522c 219
1a6a8453 220 if ($flag & GZIP_FLG_FHCRC) {
221 $self->smartReadExact(\$buffer, GZIP_FHCRC_SIZE)
222 or return $self->TruncatedHeader("FHCRC");
642e522c 223
1a6a8453 224 $HeaderCRC = unpack("v", $buffer) ;
225 my $crc16 = crc32($keep) & 0xFF ;
642e522c 226
1a6a8453 227 return $self->HeaderError("CRC16 mismatch.")
228 if *$self->{Strict} && $crc16 != $HeaderCRC;
642e522c 229
1a6a8453 230 $keep .= $buffer ;
231 }
642e522c 232
1a6a8453 233 # Assume compression method is deflated for xfl tests
234 #if ($xfl) {
235 #}
642e522c 236
1a6a8453 237 *$self->{Type} = 'rfc1952';
642e522c 238
1a6a8453 239 return {
240 'Type' => 'rfc1952',
241 'FingerprintLength' => 2,
242 'HeaderLength' => length $keep,
243 'TrailerLength' => GZIP_TRAILER_SIZE,
244 'Header' => $keep,
245 'isMinimalHeader' => $keep eq GZIP_MINIMUM_HEADER ? 1 : 0,
642e522c 246
1a6a8453 247 'MethodID' => $cm,
248 'MethodName' => $cm == GZIP_CM_DEFLATED ? "Deflated" : "Unknown" ,
249 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0,
250 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
251 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0,
252 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
253 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
254 'Name' => $origname,
255 'Comment' => $comment,
256 'Time' => $mtime,
257 'OsID' => $os,
258 'OsName' => defined $GZIP_OS_Names{$os}
259 ? $GZIP_OS_Names{$os} : "Unknown",
260 'HeaderCRC' => $HeaderCRC,
261 'Flags' => $flag,
262 'ExtraFlags' => $xfl,
263 'ExtraFieldRaw' => $EXTRA,
264 'ExtraField' => [ @EXTRA ],
642e522c 265
642e522c 266
1a6a8453 267 #'CompSize'=> $compsize,
268 #'CRC32'=> $CRC32,
269 #'OrigSize'=> $ISIZE,
270 }
642e522c 271}
272
273
1a6a8453 2741;
642e522c 275
642e522c 276__END__
277
278
279=head1 NAME
280
a02d0f6f 281
cb7abd7f 282
283IO::Uncompress::Gunzip - Read RFC 1952 files/buffers
284
642e522c 285
a02d0f6f 286
642e522c 287=head1 SYNOPSIS
288
289 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
290
291 my $status = gunzip $input => $output [,OPTS]
292 or die "gunzip failed: $GunzipError\n";
293
294 my $z = new IO::Uncompress::Gunzip $input [OPTS]
295 or die "gunzip failed: $GunzipError\n";
296
297 $status = $z->read($buffer)
298 $status = $z->read($buffer, $length)
299 $status = $z->read($buffer, $length, $offset)
300 $line = $z->getline()
301 $char = $z->getc()
302 $char = $z->ungetc()
a02d0f6f 303 $char = $z->opened()
304
642e522c 305 $status = $z->inflateSync()
a02d0f6f 306
e7d45986 307 $data = $z->trailingData()
308 $status = $z->nextStream()
642e522c 309 $data = $z->getHeaderInfo()
310 $z->tell()
311 $z->seek($position, $whence)
312 $z->binmode()
313 $z->fileno()
314 $z->eof()
315 $z->close()
316
317 $GunzipError ;
318
319 # IO::File mode
320
321 <$z>
322 read($z, $buffer);
323 read($z, $buffer, $length);
324 read($z, $buffer, $length, $offset);
325 tell($z)
326 seek($z, $position, $whence)
327 binmode($z)
328 fileno($z)
329 eof($z)
330 close($z)
331
332
333=head1 DESCRIPTION
334
335
336
337B<WARNING -- This is a Beta release>.
338
339=over 5
340
341=item * DO NOT use in production code.
342
343=item * The documentation is incomplete in places.
344
345=item * Parts of the interface defined here are tentative.
346
347=item * Please report any problems you find.
348
349=back
350
351
352
353
354
1a6a8453 355This module provides a Perl interface that allows the reading of
642e522c 356files/buffers that conform to RFC 1952.
357
1a6a8453 358For writing RFC 1952 files/buffers, see the companion module IO::Compress::Gzip.
642e522c 359
360
361
cb7abd7f 362
363
642e522c 364=head1 Functional Interface
365
1a6a8453 366A top-level function, C<gunzip>, is provided to carry out
367"one-shot" uncompression between buffers and/or files. For finer
368control over the uncompression process, see the L</"OO Interface">
369section.
642e522c 370
371 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
372
373 gunzip $input => $output [,OPTS]
374 or die "gunzip failed: $GunzipError\n";
375
1a6a8453 376
642e522c 377
378The functional interface needs Perl5.005 or better.
379
380
381=head2 gunzip $input => $output [, OPTS]
382
1a6a8453 383
384C<gunzip> expects at least two parameters, C<$input> and C<$output>.
642e522c 385
386=head3 The C<$input> parameter
387
388The parameter, C<$input>, is used to define the source of
389the compressed data.
390
391It can take one of the following forms:
392
393=over 5
394
395=item A filename
396
397If the C<$input> parameter is a simple scalar, it is assumed to be a
398filename. This file will be opened for reading and the input data
399will be read from it.
400
401=item A filehandle
402
403If the C<$input> parameter is a filehandle, the input data will be
404read from it.
405The string '-' can be used as an alias for standard input.
406
407=item A scalar reference
408
409If C<$input> is a scalar reference, the input data will be read
410from C<$$input>.
411
412=item An array reference
413
1a6a8453 414If C<$input> is an array reference, each element in the array must be a
415filename.
416
417The input data will be read from each file in turn.
418
642e522c 419The complete array will be walked to ensure that it only
1a6a8453 420contains valid filenames before any data is uncompressed.
421
422
642e522c 423
424=item An Input FileGlob string
425
426If C<$input> is a string that is delimited by the characters "<" and ">"
427C<gunzip> will assume that it is an I<input fileglob string>. The
428input is the list of files that match the fileglob.
429
430If the fileglob does not match any files ...
431
432See L<File::GlobMapper|File::GlobMapper> for more details.
433
434
435=back
436
437If the C<$input> parameter is any other type, C<undef> will be returned.
438
439
440
441=head3 The C<$output> parameter
442
443The parameter C<$output> is used to control the destination of the
444uncompressed data. This parameter can take one of these forms.
445
446=over 5
447
448=item A filename
449
1a6a8453 450If the C<$output> parameter is a simple scalar, it is assumed to be a
451filename. This file will be opened for writing and the uncompressed
452data will be written to it.
642e522c 453
454=item A filehandle
455
1a6a8453 456If the C<$output> parameter is a filehandle, the uncompressed data
457will be written to it.
642e522c 458The string '-' can be used as an alias for standard output.
459
460
461=item A scalar reference
462
1a6a8453 463If C<$output> is a scalar reference, the uncompressed data will be
464stored in C<$$output>.
642e522c 465
466
642e522c 467
468=item An Array Reference
469
1a6a8453 470If C<$output> is an array reference, the uncompressed data will be
471pushed onto the array.
642e522c 472
473=item An Output FileGlob
474
475If C<$output> is a string that is delimited by the characters "<" and ">"
476C<gunzip> will assume that it is an I<output fileglob string>. The
477output is the list of files that match the fileglob.
478
479When C<$output> is an fileglob string, C<$input> must also be a fileglob
480string. Anything else is an error.
481
482=back
483
484If the C<$output> parameter is any other type, C<undef> will be returned.
485
642e522c 486
642e522c 487
488=head2 Notes
489
c70c1701 490
491When C<$input> maps to multiple compressed files/buffers and C<$output> is
492a single file/buffer, after uncompression C<$output> will contain a
493concatenation of all the uncompressed data from each of the input
494files/buffers.
495
496
642e522c 497
498
499
500=head2 Optional Parameters
501
502Unless specified below, the optional parameters for C<gunzip>,
503C<OPTS>, are the same as those used with the OO interface defined in the
504L</"Constructor Options"> section below.
505
506=over 5
507
e7d45986 508=item C<< AutoClose => 0|1 >>
642e522c 509
1a6a8453 510This option applies to any input or output data streams to
511C<gunzip> that are filehandles.
642e522c 512
513If C<AutoClose> is specified, and the value is true, it will result in all
514input and/or output filehandles being closed once C<gunzip> has
515completed.
516
517This parameter defaults to 0.
518
519
e7d45986 520=item C<< BinModeOut => 0|1 >>
1a6a8453 521
522When writing to a file or filehandle, set C<binmode> before writing to the
523file.
524
525Defaults to 0.
526
527
528
529
530
e7d45986 531=item C<< Append => 0|1 >>
642e522c 532
533TODO
534
e7d45986 535=item C<< MultiStream => 0|1 >>
1a6a8453 536
258133d1 537
e7d45986 538If the input file/buffer contains multiple compressed data streams, this
539option will uncompress the whole lot as a single data stream.
1a6a8453 540
e7d45986 541Defaults to 0.
1a6a8453 542
642e522c 543
544
258133d1 545
546
547=item C<< TrailingData => $scalar >>
548
549Returns the data, if any, that is present immediately after the compressed
550data stream once uncompression is complete.
551
552This option can be used when there is useful information immediately
553following the compressed data stream, and you don't know the length of the
554compressed data stream.
555
556If the input is a buffer, C<trailingData> will return everything from the
557end of the compressed data stream to the end of the buffer.
558
559If the input is a filehandle, C<trailingData> will return the data that is
560left in the filehandle input buffer once the end of the compressed data
561stream has been reached. You can then use the filehandle to read the rest
562of the input file.
563
564Don't bother using C<trailingData> if the input is a filename.
565
566
567
568If you know the length of the compressed data stream before you start
569uncompressing, you can avoid having to use C<trailingData> by setting the
570C<InputLength> option.
571
572
573
642e522c 574=back
575
576
577
578
579=head2 Examples
580
581To read the contents of the file C<file1.txt.gz> and write the
582compressed data to the file C<file1.txt>.
583
584 use strict ;
585 use warnings ;
586 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
587
588 my $input = "file1.txt.gz";
589 my $output = "file1.txt";
590 gunzip $input => $output
591 or die "gunzip failed: $GunzipError\n";
592
593
594To read from an existing Perl filehandle, C<$input>, and write the
595uncompressed data to a buffer, C<$buffer>.
596
597 use strict ;
598 use warnings ;
599 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
600 use IO::File ;
601
602 my $input = new IO::File "<file1.txt.gz"
603 or die "Cannot open 'file1.txt.gz': $!\n" ;
604 my $buffer ;
605 gunzip $input => \$buffer
606 or die "gunzip failed: $GunzipError\n";
607
608To uncompress all files in the directory "/my/home" that match "*.txt.gz" and store the compressed data in the same directory
609
610 use strict ;
611 use warnings ;
612 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
613
614 gunzip '</my/home/*.txt.gz>' => '</my/home/#1.txt>'
615 or die "gunzip failed: $GunzipError\n";
616
617and if you want to compress each file one at a time, this will do the trick
618
619 use strict ;
620 use warnings ;
621 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
622
623 for my $input ( glob "/my/home/*.txt.gz" )
624 {
625 my $output = $input;
626 $output =~ s/.gz// ;
627 gunzip $input => $output
628 or die "Error compressing '$input': $GunzipError\n";
629 }
630
631=head1 OO Interface
632
633=head2 Constructor
634
635The format of the constructor for IO::Uncompress::Gunzip is shown below
636
637
638 my $z = new IO::Uncompress::Gunzip $input [OPTS]
639 or die "IO::Uncompress::Gunzip failed: $GunzipError\n";
640
641Returns an C<IO::Uncompress::Gunzip> object on success and undef on failure.
642The variable C<$GunzipError> will contain an error message on failure.
643
1a6a8453 644If you are running Perl 5.005 or better the object, C<$z>, returned from
645IO::Uncompress::Gunzip can be used exactly like an L<IO::File|IO::File> filehandle.
646This means that all normal input file operations can be carried out with
647C<$z>. For example, to read a line from a compressed file/buffer you can
648use either of these forms
642e522c 649
650 $line = $z->getline();
651 $line = <$z>;
652
653The mandatory parameter C<$input> is used to determine the source of the
654compressed data. This parameter can take one of three forms.
655
656=over 5
657
658=item A filename
659
660If the C<$input> parameter is a scalar, it is assumed to be a filename. This
661file will be opened for reading and the compressed data will be read from it.
662
663=item A filehandle
664
665If the C<$input> parameter is a filehandle, the compressed data will be
666read from it.
667The string '-' can be used as an alias for standard input.
668
669
670=item A scalar reference
671
672If C<$input> is a scalar reference, the compressed data will be read from
673C<$$output>.
674
675=back
676
677=head2 Constructor Options
678
679
680The option names defined below are case insensitive and can be optionally
681prefixed by a '-'. So all of the following are valid
682
683 -AutoClose
684 -autoclose
685 AUTOCLOSE
686 autoclose
687
688OPTS is a combination of the following options:
689
690=over 5
691
e7d45986 692=item C<< AutoClose => 0|1 >>
642e522c 693
694This option is only valid when the C<$input> parameter is a filehandle. If
695specified, and the value is true, it will result in the file being closed once
696either the C<close> method is called or the IO::Uncompress::Gunzip object is
697destroyed.
698
699This parameter defaults to 0.
700
e7d45986 701=item C<< MultiStream => 0|1 >>
642e522c 702
703
704
705Allows multiple concatenated compressed streams to be treated as a single
706compressed stream. Decompression will stop once either the end of the
707file/buffer is reached, an error is encountered (premature eof, corrupt
708compressed data) or the end of a stream is not immediately followed by the
709start of another stream.
710
711This parameter defaults to 0.
712
713
e7d45986 714=item C<< Prime => $string >>
642e522c 715
716This option will uncompress the contents of C<$string> before processing the
717input file/buffer.
718
719This option can be useful when the compressed data is embedded in another
720file/data structure and it is not possible to work out where the compressed
1a6a8453 721data begins without having to read the first few bytes. If this is the
722case, the uncompression can be I<primed> with these bytes using this
723option.
642e522c 724
e7d45986 725=item C<< Transparent => 0|1 >>
642e522c 726
727If this option is set and the input file or buffer is not compressed data,
728the module will allow reading of it anyway.
729
730This option defaults to 1.
731
e7d45986 732=item C<< BlockSize => $num >>
642e522c 733
1a6a8453 734When reading the compressed input data, IO::Uncompress::Gunzip will read it in
735blocks of C<$num> bytes.
642e522c 736
737This option defaults to 4096.
738
e7d45986 739=item C<< InputLength => $size >>
642e522c 740
1a6a8453 741When present this option will limit the number of compressed bytes read
742from the input file/buffer to C<$size>. This option can be used in the
743situation where there is useful data directly after the compressed data
744stream and you know beforehand the exact length of the compressed data
745stream.
642e522c 746
1a6a8453 747This option is mostly used when reading from a filehandle, in which case
748the file pointer will be left pointing to the first byte directly after the
642e522c 749compressed data stream.
750
751
752
753This option defaults to off.
754
e7d45986 755=item C<< Append => 0|1 >>
642e522c 756
757This option controls what the C<read> method does with uncompressed data.
758
1a6a8453 759If set to 1, all uncompressed data will be appended to the output parameter
760of the C<read> method.
642e522c 761
1a6a8453 762If set to 0, the contents of the output parameter of the C<read> method
763will be overwritten by the uncompressed data.
642e522c 764
765Defaults to 0.
766
e7d45986 767=item C<< Strict => 0|1 >>
642e522c 768
769
770
771This option controls whether the extra checks defined below are used when
1a6a8453 772carrying out the decompression. When Strict is on, the extra tests are
773carried out, when Strict is off they are not.
642e522c 774
775The default for this option is off.
776
777
778
779
780
781
782
783
784
785=over 5
786
787=item 1
788
789If the FHCRC bit is set in the gzip FLG header byte, the CRC16 bytes in the
790header must match the crc16 value of the gzip header actually read.
791
792=item 2
793
794If the gzip header contains a name field (FNAME) it consists solely of ISO
7958859-1 characters.
796
797=item 3
798
1a6a8453 799If the gzip header contains a comment field (FCOMMENT) it consists solely
800of ISO 8859-1 characters plus line-feed.
642e522c 801
802=item 4
803
804If the gzip FEXTRA header field is present it must conform to the sub-field
a02d0f6f 805structure as defined in RFC 1952.
642e522c 806
807=item 5
808
809The CRC32 and ISIZE trailer fields must be present.
810
811=item 6
812
813The value of the CRC32 field read must match the crc32 value of the
814uncompressed data actually contained in the gzip file.
815
816=item 7
817
1a6a8453 818The value of the ISIZE fields read must match the length of the
819uncompressed data actually read from the file.
642e522c 820
821=back
822
823
824
825
826
827
642e522c 828
258133d1 829=item C<< ParseExtra => 0|1 >>
642e522c 830If the gzip FEXTRA header field is present and this option is set, it will
831force the module to check that it conforms to the sub-field structure as
a02d0f6f 832defined in RFC 1952.
642e522c 833
834If the C<Strict> is on it will automatically enable this option.
835
836Defaults to 0.
837
838
839
a02d0f6f 840
642e522c 841=back
842
843=head2 Examples
844
845TODO
846
847=head1 Methods
848
849=head2 read
850
851Usage is
852
853 $status = $z->read($buffer)
854
855Reads a block of compressed data (the size the the compressed block is
856determined by the C<Buffer> option in the constructor), uncompresses it and
1a6a8453 857writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
858set in the constructor, the uncompressed data will be appended to the
859C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
642e522c 860
1a6a8453 861Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
862or a negative number on error.
642e522c 863
864=head2 read
865
866Usage is
867
868 $status = $z->read($buffer, $length)
869 $status = $z->read($buffer, $length, $offset)
870
871 $status = read($z, $buffer, $length)
872 $status = read($z, $buffer, $length, $offset)
873
874Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
875
1a6a8453 876The main difference between this form of the C<read> method and the
877previous one, is that this one will attempt to return I<exactly> C<$length>
878bytes. The only circumstances that this function will not is if end-of-file
879or an IO error is encountered.
642e522c 880
1a6a8453 881Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
882or a negative number on error.
642e522c 883
884
885=head2 getline
886
887Usage is
888
889 $line = $z->getline()
890 $line = <$z>
891
892Reads a single line.
893
258133d1 894This method fully supports the use of of the variable C<$/> (or
895C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
896determine what constitutes an end of line. Paragraph mode, record mode and
897file slurp mode are all supported.
642e522c 898
899
900=head2 getc
901
902Usage is
903
904 $char = $z->getc()
905
906Read a single character.
907
908=head2 ungetc
909
910Usage is
911
912 $char = $z->ungetc($string)
913
914
a02d0f6f 915
642e522c 916=head2 inflateSync
917
918Usage is
919
920 $status = $z->inflateSync()
921
922TODO
923
a02d0f6f 924
642e522c 925=head2 getHeaderInfo
926
927Usage is
928
1a6a8453 929 $hdr = $z->getHeaderInfo();
930 @hdrs = $z->getHeaderInfo();
642e522c 931
1a6a8453 932This method returns either a hash reference (in scalar context) or a list
933or hash references (in array context) that contains information about each
934of the header fields in the compressed data stream(s).
642e522c 935
936
937
1a6a8453 938=over 5
642e522c 939
1a6a8453 940=item Name
642e522c 941
1a6a8453 942The contents of the Name header field, if present. If no name is
943present, the value will be undef. Note this is different from a zero length
944name, which will return an empty string.
642e522c 945
946=item Comment
947
1a6a8453 948The contents of the Comment header field, if present. If no comment is
949present, the value will be undef. Note this is different from a zero length
950comment, which will return an empty string.
642e522c 951
952=back
953
954
955
956
957=head2 tell
958
959Usage is
960
961 $z->tell()
962 tell $z
963
964Returns the uncompressed file offset.
965
966=head2 eof
967
968Usage is
969
970 $z->eof();
971 eof($z);
972
973
974
975Returns true if the end of the compressed input stream has been reached.
976
977
978
979=head2 seek
980
981 $z->seek($position, $whence);
982 seek($z, $position, $whence);
983
984
985
986
987Provides a sub-set of the C<seek> functionality, with the restriction
988that it is only legal to seek forward in the input file/buffer.
989It is a fatal error to attempt to seek backward.
990
991
992
993The C<$whence> parameter takes one the usual values, namely SEEK_SET,
994SEEK_CUR or SEEK_END.
995
996Returns 1 on success, 0 on failure.
997
998=head2 binmode
999
1000Usage is
1001
1002 $z->binmode
1003 binmode $z ;
1004
1005This is a noop provided for completeness.
1006
a02d0f6f 1007=head2 opened
1008
1009 $z->opened()
1010
1011Returns true if the object currently refers to a opened file/buffer.
1012
1013=head2 autoflush
1014
1015 my $prev = $z->autoflush()
1016 my $prev = $z->autoflush(EXPR)
1017
1018If the C<$z> object is associated with a file or a filehandle, this method
1019returns the current autoflush setting for the underlying filehandle. If
1020C<EXPR> is present, and is non-zero, it will enable flushing after every
1021write/print operation.
1022
1023If C<$z> is associated with a buffer, this method has no effect and always
1024returns C<undef>.
1025
1026B<Note> that the special variable C<$|> B<cannot> be used to set or
1027retrieve the autoflush setting.
1028
1029=head2 input_line_number
1030
1031 $z->input_line_number()
1032 $z->input_line_number(EXPR)
1033
1034
1035
1036Returns the current uncompressed line number. If C<EXPR> is present it has
1037the effect of setting the line number. Note that setting the line number
1038does not change the current position within the file/buffer being read.
1039
1040The contents of C<$/> are used to to determine what constitutes a line
1041terminator.
1042
1043
1044
642e522c 1045=head2 fileno
1046
1047 $z->fileno()
1048 fileno($z)
1049
a02d0f6f 1050If the C<$z> object is associated with a file or a filehandle, this method
1051will return the underlying file descriptor.
642e522c 1052
1053If the C<$z> object is is associated with a buffer, this method will
1054return undef.
1055
1056=head2 close
1057
1058 $z->close() ;
1059 close $z ;
1060
1061
1062
1063Closes the output file/buffer.
1064
1065
1066
1067For most versions of Perl this method will be automatically invoked if
1068the IO::Uncompress::Gunzip object is destroyed (either explicitly or by the
1069variable with the reference to the object going out of scope). The
1070exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1071these cases, the C<close> method will be called automatically, but
1072not until global destruction of all live objects when the program is
1073terminating.
1074
1075Therefore, if you want your scripts to be able to run on all versions
1076of Perl, you should call C<close> explicitly and not rely on automatic
1077closing.
1078
1079Returns true on success, otherwise 0.
1080
1081If the C<AutoClose> option has been enabled when the IO::Uncompress::Gunzip
1082object was created, and the object is associated with a file, the
1083underlying file will also be closed.
1084
1085
1086
1087
e7d45986 1088=head2 nextStream
1089
1090Usage is
1091
1092 my $status = $z->nextStream();
1093
1094Skips to the next compressed data stream in the input file/buffer. If a new
258133d1 1095compressed data stream is found, the eof marker will be cleared and C<$.>
1096will be reset to 0.
e7d45986 1097
1098Returns 1 if a new stream was found, 0 if none was found, and -1 if an
1099error was encountered.
1100
1101=head2 trailingData
1102
1103Usage is
1104
1105 my $data = $z->trailingData();
1106
258133d1 1107Returns the data, if any, that is present immediately after the compressed
1108data stream once uncompression is complete. It only makes sense to call
1109this method once the end of the compressed data stream has been
1110encountered.
1111
1112This option can be used when there is useful information immediately
1113following the compressed data stream, and you don't know the length of the
1114compressed data stream.
1115
1116If the input is a buffer, C<trailingData> will return everything from the
1117end of the compressed data stream to the end of the buffer.
1118
1119If the input is a filehandle, C<trailingData> will return the data that is
1120left in the filehandle input buffer once the end of the compressed data
1121stream has been reached. You can then use the filehandle to read the rest
1122of the input file.
1123
1124Don't bother using C<trailingData> if the input is a filename.
1125
1126
1127
1128If you know the length of the compressed data stream before you start
1129uncompressing, you can avoid having to use C<trailingData> by setting the
1130C<InputLength> option in the constructor.
e7d45986 1131
642e522c 1132=head1 Importing
1133
1134No symbolic constants are required by this IO::Uncompress::Gunzip at present.
1135
1136=over 5
1137
1138=item :all
1139
1140Imports C<gunzip> and C<$GunzipError>.
1141Same as doing this
1142
1143 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
1144
1145=back
1146
1147=head1 EXAMPLES
1148
1149
1150
1151
1152=head1 SEE ALSO
1153
258133d1 1154L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
642e522c 1155
1156L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
1157
a02d0f6f 1158L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1159L<Archive::Tar|Archive::Tar>,
642e522c 1160L<IO::Zlib|IO::Zlib>
1161
a02d0f6f 1162
642e522c 1163For RFC 1950, 1951 and 1952 see
1164F<http://www.faqs.org/rfcs/rfc1950.html>,
1165F<http://www.faqs.org/rfcs/rfc1951.html> and
1166F<http://www.faqs.org/rfcs/rfc1952.html>
1167
a02d0f6f 1168The I<zlib> compression library was written by Jean-loup Gailly
1169F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
1170
1171The primary site for the I<zlib> compression library is
1172F<http://www.zlib.org>.
1173
1174The primary site for gzip is F<http://www.gzip.org>.
1175
1176
1177
1178
642e522c 1179=head1 AUTHOR
1180
cb7abd7f 1181This module was written by Paul Marquess, F<pmqs@cpan.org>.
642e522c 1182
642e522c 1183
642e522c 1184
1185=head1 MODIFICATION HISTORY
1186
1187See the Changes file.
1188
1189=head1 COPYRIGHT AND LICENSE
642e522c 1190
1a6a8453 1191Copyright (c) 2005-2006 Paul Marquess. All rights reserved.
a02d0f6f 1192
642e522c 1193This program is free software; you can redistribute it and/or
1194modify it under the same terms as Perl itself.
1195