ext/Compress/IO/Zlib/lib/IO/Uncompress/Gunzip.pm

   1
   2 package IO::Uncompress::Gunzip ;
   3
   4 require 5.004 ;
   5
   6 # for RFC1952
   7
   8 use strict ;
   9 use warnings;
  10 use bytes;
  11
  12 use IO::Uncompress::RawInflate ;
  13
  14 use Compress::Raw::Zlib qw( crc32 ) ;
  15 use IO::Compress::Base::Common qw(:Status createSelfTiedObject);
  16 use IO::Compress::Gzip::Constants;
  17 use IO::Compress::Zlib::Extra;
  18
  19 require Exporter ;
  20
  21 our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $GunzipError);
  22
  23 @ISA = qw( Exporter IO::Uncompress::RawInflate );
  24 @EXPORT_OK = qw( $GunzipError gunzip );
  25 %EXPORT_TAGS = %IO::Uncompress::RawInflate::DEFLATE_CONSTANTS ;
  26 push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
  27 Exporter::export_ok_tags('all');
  28
  29 $GunzipError = '';
  30
  31 $VERSION = '2.001';
  32
  33 sub new
  34 {
  35     my $class = shift ;
  36     $GunzipError = '';
  37     my $obj = createSelfTiedObject($class, \$GunzipError);
  38
  39     $obj->_create(undef, 0, @_);
  40 }
  41
  42 sub gunzip
  43 {
  44     my $obj = createSelfTiedObject(undef, \$GunzipError);
  45     return $obj->_inf(@_) ;
  46 }
  47
  48 sub getExtraParams
  49 {
  50     use IO::Compress::Base::Common qw(:Parse);
  51     return ( 'ParseExtra' => [1, 1, Parse_boolean,  0] ) ;
  52 }
  53
  54 sub ckParams
  55 {
  56     my $self = shift ;
  57     my $got = shift ;
  58
  59     # gunzip always needs crc32
  60     $got->value('CRC32' => 1);
  61
  62     return 1;
  63 }
  64
  65 sub ckMagic
  66 {
  67     my $self = shift;
  68
  69     my $magic ;
  70     $self->smartReadExact(\$magic, GZIP_ID_SIZE);
  71
  72     *$self->{HeaderPending} = $magic ;
  73
  74     return $self->HeaderError("Minimum header size is " .
  75                               GZIP_MIN_HEADER_SIZE . " bytes")
  76         if length $magic != GZIP_ID_SIZE ;
  77
  78     return $self->HeaderError("Bad Magic")
  79         if ! isGzipMagic($magic) ;
  80
  81     *$self->{Type} = 'rfc1952';
  82
  83     return $magic ;
  84 }
  85
  86 sub readHeader
  87 {
  88     my $self = shift;
  89     my $magic = shift;
  90
  91     return $self->_readGzipHeader($magic);
  92 }
  93
  94 sub chkTrailer
  95 {
  96     my $self = shift;
  97     my $trailer = shift;
  98
  99     # Check CRC & ISIZE
 100     my ($CRC32, $ISIZE) = unpack("V V", $trailer) ;
 101     *$self->{Info}{CRC32} = $CRC32;
 102     *$self->{Info}{ISIZE} = $ISIZE;
 103
 104     if (*$self->{Strict}) {
 105         return $self->TrailerError("CRC mismatch")
 106             if $CRC32 != *$self->{Uncomp}->crc32() ;
 107
 108         my $exp_isize = *$self->{UnCompSize}->get32bit();
 109         return $self->TrailerError("ISIZE mismatch. Got $ISIZE"
 110                                   . ", expected $exp_isize")
 111             if $ISIZE != $exp_isize ;
 112     }
 113
 114     return STATUS_OK;
 115 }
 116
 117 sub isGzipMagic
 118 {
 119     my $buffer = shift ;
 120     return 0 if length $buffer < GZIP_ID_SIZE ;
 121     my ($id1, $id2) = unpack("C C", $buffer) ;
 122     return $id1 == GZIP_ID1 && $id2 == GZIP_ID2 ;
 123 }
 124
 125 sub _readFullGzipHeader($)
 126 {
 127     my ($self) = @_ ;
 128     my $magic = '' ;
 129
 130     $self->smartReadExact(\$magic, GZIP_ID_SIZE);
 131
 132     *$self->{HeaderPending} = $magic ;
 133
 134     return $self->HeaderError("Minimum header size is " .
 135                               GZIP_MIN_HEADER_SIZE . " bytes")
 136         if length $magic != GZIP_ID_SIZE ;
 137
 138
 139     return $self->HeaderError("Bad Magic")
 140         if ! isGzipMagic($magic) ;
 141
 142     my $status = $self->_readGzipHeader($magic);
 143     delete *$self->{Transparent} if ! defined $status ;
 144     return $status ;
 145 }
 146
 147 sub _readGzipHeader($)
 148 {
 149     my ($self, $magic) = @_ ;
 150     my ($HeaderCRC) ;
 151     my ($buffer) = '' ;
 152
 153     $self->smartReadExact(\$buffer, GZIP_MIN_HEADER_SIZE - GZIP_ID_SIZE)
 154         or return $self->HeaderError("Minimum header size is " .
 155                                      GZIP_MIN_HEADER_SIZE . " bytes") ;
 156
 157     my $keep = $magic . $buffer ;
 158     *$self->{HeaderPending} = $keep ;
 159
 160     # now split out the various parts
 161     my ($cm, $flag, $mtime, $xfl, $os) = unpack("C C V C C", $buffer) ;
 162
 163     $cm == GZIP_CM_DEFLATED
 164         or return $self->HeaderError("Not Deflate (CM is $cm)") ;
 165
 166     # check for use of reserved bits
 167     return $self->HeaderError("Use of Reserved Bits in FLG field.")
 168         if $flag & GZIP_FLG_RESERVED ;
 169
 170     my $EXTRA ;
 171     my @EXTRA = () ;
 172     if ($flag & GZIP_FLG_FEXTRA) {
 173         $EXTRA = "" ;
 174         $self->smartReadExact(\$buffer, GZIP_FEXTRA_HEADER_SIZE)
 175             or return $self->TruncatedHeader("FEXTRA Length") ;
 176
 177         my ($XLEN) = unpack("v", $buffer) ;
 178         $self->smartReadExact(\$EXTRA, $XLEN)
 179             or return $self->TruncatedHeader("FEXTRA Body");
 180         $keep .= $buffer . $EXTRA ;
 181
 182         if ($XLEN && *$self->{'ParseExtra'}) {
 183             my $bad = IO::Compress::Zlib::Extra::parseRawExtra($EXTRA,
 184                                                 \@EXTRA, 1, 1);
 185             return $self->HeaderError($bad)
 186                 if defined $bad;
 187         }
 188     }
 189
 190     my $origname ;
 191     if ($flag & GZIP_FLG_FNAME) {
 192         $origname = "" ;
 193         while (1) {
 194             $self->smartReadExact(\$buffer, 1)
 195                 or return $self->TruncatedHeader("FNAME");
 196             last if $buffer eq GZIP_NULL_BYTE ;
 197             $origname .= $buffer
 198         }
 199         $keep .= $origname . GZIP_NULL_BYTE ;
 200
 201         return $self->HeaderError("Non ISO 8859-1 Character found in Name")
 202             if *$self->{Strict} && $origname =~ /$GZIP_FNAME_INVALID_CHAR_RE/o ;
 203     }
 204
 205     my $comment ;
 206     if ($flag & GZIP_FLG_FCOMMENT) {
 207         $comment = "";
 208         while (1) {
 209             $self->smartReadExact(\$buffer, 1)
 210                 or return $self->TruncatedHeader("FCOMMENT");
 211             last if $buffer eq GZIP_NULL_BYTE ;
 212             $comment .= $buffer
 213         }
 214         $keep .= $comment . GZIP_NULL_BYTE ;
 215
 216         return $self->HeaderError("Non ISO 8859-1 Character found in Comment")
 217             if *$self->{Strict} && $comment =~ /$GZIP_FCOMMENT_INVALID_CHAR_RE/o ;
 218     }
 219
 220     if ($flag & GZIP_FLG_FHCRC) {
 221         $self->smartReadExact(\$buffer, GZIP_FHCRC_SIZE)
 222             or return $self->TruncatedHeader("FHCRC");
 223
 224         $HeaderCRC = unpack("v", $buffer) ;
 225         my $crc16 = crc32($keep) & 0xFF ;
 226
 227         return $self->HeaderError("CRC16 mismatch.")
 228             if *$self->{Strict} && $crc16 != $HeaderCRC;
 229
 230         $keep .= $buffer ;
 231     }
 232
 233     # Assume compression method is deflated for xfl tests
 234     #if ($xfl) {
 235     #}
 236
 237     *$self->{Type} = 'rfc1952';
 238
 239     return {
 240         'Type'          => 'rfc1952',
 241         'FingerprintLength'  => 2,
 242         'HeaderLength'  => length $keep,
 243         'TrailerLength' => GZIP_TRAILER_SIZE,
 244         'Header'        => $keep,
 245         'isMinimalHeader' => $keep eq GZIP_MINIMUM_HEADER ? 1 : 0,
 246
 247         'MethodID'      => $cm,
 248         'MethodName'    => $cm == GZIP_CM_DEFLATED ? "Deflated" : "Unknown" ,
 249         'TextFlag'      => $flag & GZIP_FLG_FTEXT ? 1 : 0,
 250         'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
 251         'NameFlag'      => $flag & GZIP_FLG_FNAME ? 1 : 0,
 252         'CommentFlag'   => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
 253         'ExtraFlag'     => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
 254         'Name'          => $origname,
 255         'Comment'       => $comment,
 256         'Time'          => $mtime,
 257         'OsID'          => $os,
 258         'OsName'        => defined $GZIP_OS_Names{$os}
 259                                  ? $GZIP_OS_Names{$os} : "Unknown",
 260         'HeaderCRC'     => $HeaderCRC,
 261         'Flags'         => $flag,
 262         'ExtraFlags'    => $xfl,
 263         'ExtraFieldRaw' => $EXTRA,
 264         'ExtraField'    => [ @EXTRA ],
 265
 266
 267         #'CompSize'=> $compsize,
 268         #'CRC32'=> $CRC32,
 269         #'OrigSize'=> $ISIZE,
 270       }
 271 }
 272
 273
 274 1;
 275
 276 __END__
 277
 278
 279 =head1 NAME
 280
 281
 282
 283 IO::Uncompress::Gunzip - Read RFC 1952 files/buffers
 284
 285
 286
 287 =head1 SYNOPSIS
 288
 289     use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
 290
 291     my $status = gunzip $input => $output [,OPTS]
 292         or die "gunzip failed: $GunzipError\n";
 293
 294     my $z = new IO::Uncompress::Gunzip $input [OPTS]
 295         or die "gunzip failed: $GunzipError\n";
 296
 297     $status = $z->read($buffer)
 298     $status = $z->read($buffer, $length)
 299     $status = $z->read($buffer, $length, $offset)
 300     $line = $z->getline()
 301     $char = $z->getc()
 302     $char = $z->ungetc()
 303     $char = $z->opened()
 304
 305     $status = $z->inflateSync()
 306
 307     $data = $z->trailingData()
 308     $status = $z->nextStream()
 309     $data = $z->getHeaderInfo()
 310     $z->tell()
 311     $z->seek($position, $whence)
 312     $z->binmode()
 313     $z->fileno()
 314     $z->eof()
 315     $z->close()
 316
 317     $GunzipError ;
 318
 319     # IO::File mode
 320
 321     <$z>
 322     read($z, $buffer);
 323     read($z, $buffer, $length);
 324     read($z, $buffer, $length, $offset);
 325     tell($z)
 326     seek($z, $position, $whence)
 327     binmode($z)
 328     fileno($z)
 329     eof($z)
 330     close($z)
 331
 332
 333 =head1 DESCRIPTION
 334
 335
 336
 337 This module provides a Perl interface that allows the reading of
 338 files/buffers that conform to RFC 1952.
 339
 340 For writing RFC 1952 files/buffers, see the companion module IO::Compress::Gzip.
 341
 342
 343
 344
 345
 346 =head1 Functional Interface
 347
 348 A top-level function, C<gunzip>, is provided to carry out
 349 "one-shot" uncompression between buffers and/or files. For finer
 350 control over the uncompression process, see the L</"OO Interface">
 351 section.
 352
 353     use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
 354
 355     gunzip $input => $output [,OPTS]
 356         or die "gunzip failed: $GunzipError\n";
 357
 358
 359
 360 The functional interface needs Perl5.005 or better.
 361
 362
 363 =head2 gunzip $input => $output [, OPTS]
 364
 365
 366 C<gunzip> expects at least two parameters, C<$input> and C<$output>.
 367
 368 =head3 The C<$input> parameter
 369
 370 The parameter, C<$input>, is used to define the source of
 371 the compressed data.
 372
 373 It can take one of the following forms:
 374
 375 =over 5
 376
 377 =item A filename
 378
 379 If the C<$input> parameter is a simple scalar, it is assumed to be a
 380 filename. This file will be opened for reading and the input data
 381 will be read from it.
 382
 383 =item A filehandle
 384
 385 If the C<$input> parameter is a filehandle, the input data will be
 386 read from it.
 387 The string '-' can be used as an alias for standard input.
 388
 389 =item A scalar reference
 390
 391 If C<$input> is a scalar reference, the input data will be read
 392 from C<$$input>.
 393
 394 =item An array reference
 395
 396 If C<$input> is an array reference, each element in the array must be a
 397 filename.
 398
 399 The input data will be read from each file in turn.
 400
 401 The complete array will be walked to ensure that it only
 402 contains valid filenames before any data is uncompressed.
 403
 404
 405
 406 =item An Input FileGlob string
 407
 408 If C<$input> is a string that is delimited by the characters "<" and ">"
 409 C<gunzip> will assume that it is an I<input fileglob string>. The
 410 input is the list of files that match the fileglob.
 411
 412 If the fileglob does not match any files ...
 413
 414 See L<File::GlobMapper|File::GlobMapper> for more details.
 415
 416
 417 =back
 418
 419 If the C<$input> parameter is any other type, C<undef> will be returned.
 420
 421
 422
 423 =head3 The C<$output> parameter
 424
 425 The parameter C<$output> is used to control the destination of the
 426 uncompressed data. This parameter can take one of these forms.
 427
 428 =over 5
 429
 430 =item A filename
 431
 432 If the C<$output> parameter is a simple scalar, it is assumed to be a
 433 filename.  This file will be opened for writing and the uncompressed
 434 data will be written to it.
 435
 436 =item A filehandle
 437
 438 If the C<$output> parameter is a filehandle, the uncompressed data
 439 will be written to it.
 440 The string '-' can be used as an alias for standard output.
 441
 442
 443 =item A scalar reference
 444
 445 If C<$output> is a scalar reference, the uncompressed data will be
 446 stored in C<$$output>.
 447
 448
 449
 450 =item An Array Reference
 451
 452 If C<$output> is an array reference, the uncompressed data will be
 453 pushed onto the array.
 454
 455 =item An Output FileGlob
 456
 457 If C<$output> is a string that is delimited by the characters "<" and ">"
 458 C<gunzip> will assume that it is an I<output fileglob string>. The
 459 output is the list of files that match the fileglob.
 460
 461 When C<$output> is an fileglob string, C<$input> must also be a fileglob
 462 string. Anything else is an error.
 463
 464 =back
 465
 466 If the C<$output> parameter is any other type, C<undef> will be returned.
 467
 468
 469
 470 =head2 Notes
 471
 472
 473 When C<$input> maps to multiple compressed files/buffers and C<$output> is
 474 a single file/buffer, after uncompression C<$output> will contain a
 475 concatenation of all the uncompressed data from each of the input
 476 files/buffers.
 477
 478
 479
 480
 481
 482 =head2 Optional Parameters
 483
 484 Unless specified below, the optional parameters for C<gunzip>,
 485 C<OPTS>, are the same as those used with the OO interface defined in the
 486 L</"Constructor Options"> section below.
 487
 488 =over 5
 489
 490 =item C<< AutoClose => 0|1 >>
 491
 492 This option applies to any input or output data streams to
 493 C<gunzip> that are filehandles.
 494
 495 If C<AutoClose> is specified, and the value is true, it will result in all
 496 input and/or output filehandles being closed once C<gunzip> has
 497 completed.
 498
 499 This parameter defaults to 0.
 500
 501
 502 =item C<< BinModeOut => 0|1 >>
 503
 504 When writing to a file or filehandle, set C<binmode> before writing to the
 505 file.
 506
 507 Defaults to 0.
 508
 509
 510
 511
 512
 513 =item C<< Append => 0|1 >>
 514
 515 TODO
 516
 517 =item C<< MultiStream => 0|1 >>
 518
 519
 520 If the input file/buffer contains multiple compressed data streams, this
 521 option will uncompress the whole lot as a single data stream.
 522
 523 Defaults to 0.
 524
 525
 526
 527
 528
 529 =item C<< TrailingData => $scalar >>
 530
 531 Returns the data, if any, that is present immediately after the compressed
 532 data stream once uncompression is complete.
 533
 534 This option can be used when there is useful information immediately
 535 following the compressed data stream, and you don't know the length of the
 536 compressed data stream.
 537
 538 If the input is a buffer, C<trailingData> will return everything from the
 539 end of the compressed data stream to the end of the buffer.
 540
 541 If the input is a filehandle, C<trailingData> will return the data that is
 542 left in the filehandle input buffer once the end of the compressed data
 543 stream has been reached. You can then use the filehandle to read the rest
 544 of the input file.
 545
 546 Don't bother using C<trailingData> if the input is a filename.
 547
 548
 549
 550 If you know the length of the compressed data stream before you start
 551 uncompressing, you can avoid having to use C<trailingData> by setting the
 552 C<InputLength> option.
 553
 554
 555
 556 =back
 557
 558
 559
 560
 561 =head2 Examples
 562
 563 To read the contents of the file C<file1.txt.gz> and write the
 564 compressed data to the file C<file1.txt>.
 565
 566     use strict ;
 567     use warnings ;
 568     use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
 569
 570     my $input = "file1.txt.gz";
 571     my $output = "file1.txt";
 572     gunzip $input => $output
 573         or die "gunzip failed: $GunzipError\n";
 574
 575
 576 To read from an existing Perl filehandle, C<$input>, and write the
 577 uncompressed data to a buffer, C<$buffer>.
 578
 579     use strict ;
 580     use warnings ;
 581     use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
 582     use IO::File ;
 583
 584     my $input = new IO::File "<file1.txt.gz"
 585         or die "Cannot open 'file1.txt.gz': $!\n" ;
 586     my $buffer ;
 587     gunzip $input => \$buffer
 588         or die "gunzip failed: $GunzipError\n";
 589
 590 To uncompress all files in the directory "/my/home" that match "*.txt.gz" and store the compressed data in the same directory
 591
 592     use strict ;
 593     use warnings ;
 594     use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
 595
 596     gunzip '</my/home/*.txt.gz>' => '</my/home/#1.txt>'
 597         or die "gunzip failed: $GunzipError\n";
 598
 599 and if you want to compress each file one at a time, this will do the trick
 600
 601     use strict ;
 602     use warnings ;
 603     use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
 604
 605     for my $input ( glob "/my/home/*.txt.gz" )
 606     {
 607         my $output = $input;
 608         $output =~ s/.gz// ;
 609         gunzip $input => $output
 610             or die "Error compressing '$input': $GunzipError\n";
 611     }
 612
 613 =head1 OO Interface
 614
 615 =head2 Constructor
 616
 617 The format of the constructor for IO::Uncompress::Gunzip is shown below
 618
 619
 620     my $z = new IO::Uncompress::Gunzip $input [OPTS]
 621         or die "IO::Uncompress::Gunzip failed: $GunzipError\n";
 622
 623 Returns an C<IO::Uncompress::Gunzip> object on success and undef on failure.
 624 The variable C<$GunzipError> will contain an error message on failure.
 625
 626 If you are running Perl 5.005 or better the object, C<$z>, returned from
 627 IO::Uncompress::Gunzip can be used exactly like an L<IO::File|IO::File> filehandle.
 628 This means that all normal input file operations can be carried out with
 629 C<$z>.  For example, to read a line from a compressed file/buffer you can
 630 use either of these forms
 631
 632     $line = $z->getline();
 633     $line = <$z>;
 634
 635 The mandatory parameter C<$input> is used to determine the source of the
 636 compressed data. This parameter can take one of three forms.
 637
 638 =over 5
 639
 640 =item A filename
 641
 642 If the C<$input> parameter is a scalar, it is assumed to be a filename. This
 643 file will be opened for reading and the compressed data will be read from it.
 644
 645 =item A filehandle
 646
 647 If the C<$input> parameter is a filehandle, the compressed data will be
 648 read from it.
 649 The string '-' can be used as an alias for standard input.
 650
 651
 652 =item A scalar reference
 653
 654 If C<$input> is a scalar reference, the compressed data will be read from
 655 C<$$output>.
 656
 657 =back
 658
 659 =head2 Constructor Options
 660
 661
 662 The option names defined below are case insensitive and can be optionally
 663 prefixed by a '-'.  So all of the following are valid
 664
 665     -AutoClose
 666     -autoclose
 667     AUTOCLOSE
 668     autoclose
 669
 670 OPTS is a combination of the following options:
 671
 672 =over 5
 673
 674 =item C<< AutoClose => 0|1 >>
 675
 676 This option is only valid when the C<$input> parameter is a filehandle. If
 677 specified, and the value is true, it will result in the file being closed once
 678 either the C<close> method is called or the IO::Uncompress::Gunzip object is
 679 destroyed.
 680
 681 This parameter defaults to 0.
 682
 683 =item C<< MultiStream => 0|1 >>
 684
 685
 686
 687 Allows multiple concatenated compressed streams to be treated as a single
 688 compressed stream. Decompression will stop once either the end of the
 689 file/buffer is reached, an error is encountered (premature eof, corrupt
 690 compressed data) or the end of a stream is not immediately followed by the
 691 start of another stream.
 692
 693 This parameter defaults to 0.
 694
 695
 696 =item C<< Prime => $string >>
 697
 698 This option will uncompress the contents of C<$string> before processing the
 699 input file/buffer.
 700
 701 This option can be useful when the compressed data is embedded in another
 702 file/data structure and it is not possible to work out where the compressed
 703 data begins without having to read the first few bytes. If this is the
 704 case, the uncompression can be I<primed> with these bytes using this
 705 option.
 706
 707 =item C<< Transparent => 0|1 >>
 708
 709 If this option is set and the input file or buffer is not compressed data,
 710 the module will allow reading of it anyway.
 711
 712 This option defaults to 1.
 713
 714 =item C<< BlockSize => $num >>
 715
 716 When reading the compressed input data, IO::Uncompress::Gunzip will read it in
 717 blocks of C<$num> bytes.
 718
 719 This option defaults to 4096.
 720
 721 =item C<< InputLength => $size >>
 722
 723 When present this option will limit the number of compressed bytes read
 724 from the input file/buffer to C<$size>. This option can be used in the
 725 situation where there is useful data directly after the compressed data
 726 stream and you know beforehand the exact length of the compressed data
 727 stream.
 728
 729 This option is mostly used when reading from a filehandle, in which case
 730 the file pointer will be left pointing to the first byte directly after the
 731 compressed data stream.
 732
 733
 734
 735 This option defaults to off.
 736
 737 =item C<< Append => 0|1 >>
 738
 739 This option controls what the C<read> method does with uncompressed data.
 740
 741 If set to 1, all uncompressed data will be appended to the output parameter
 742 of the C<read> method.
 743
 744 If set to 0, the contents of the output parameter of the C<read> method
 745 will be overwritten by the uncompressed data.
 746
 747 Defaults to 0.
 748
 749 =item C<< Strict => 0|1 >>
 750
 751
 752
 753 This option controls whether the extra checks defined below are used when
 754 carrying out the decompression. When Strict is on, the extra tests are
 755 carried out, when Strict is off they are not.
 756
 757 The default for this option is off.
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767 =over 5
 768
 769 =item 1
 770
 771 If the FHCRC bit is set in the gzip FLG header byte, the CRC16 bytes in the
 772 header must match the crc16 value of the gzip header actually read.
 773
 774 =item 2
 775
 776 If the gzip header contains a name field (FNAME) it consists solely of ISO
 777 8859-1 characters.
 778
 779 =item 3
 780
 781 If the gzip header contains a comment field (FCOMMENT) it consists solely
 782 of ISO 8859-1 characters plus line-feed.
 783
 784 =item 4
 785
 786 If the gzip FEXTRA header field is present it must conform to the sub-field
 787 structure as defined in RFC 1952.
 788
 789 =item 5
 790
 791 The CRC32 and ISIZE trailer fields must be present.
 792
 793 =item 6
 794
 795 The value of the CRC32 field read must match the crc32 value of the
 796 uncompressed data actually contained in the gzip file.
 797
 798 =item 7
 799
 800 The value of the ISIZE fields read must match the length of the
 801 uncompressed data actually read from the file.
 802
 803 =back
 804
 805
 806
 807
 808
 809
 810
 811 =item C<< ParseExtra => 0|1 >>
 812 If the gzip FEXTRA header field is present and this option is set, it will
 813 force the module to check that it conforms to the sub-field structure as
 814 defined in RFC 1952.
 815
 816 If the C<Strict> is on it will automatically enable this option.
 817
 818 Defaults to 0.
 819
 820
 821
 822
 823 =back
 824
 825 =head2 Examples
 826
 827 TODO
 828
 829 =head1 Methods
 830
 831 =head2 read
 832
 833 Usage is
 834
 835     $status = $z->read($buffer)
 836
 837 Reads a block of compressed data (the size the the compressed block is
 838 determined by the C<Buffer> option in the constructor), uncompresses it and
 839 writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
 840 set in the constructor, the uncompressed data will be appended to the
 841 C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
 842
 843 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
 844 or a negative number on error.
 845
 846 =head2 read
 847
 848 Usage is
 849
 850     $status = $z->read($buffer, $length)
 851     $status = $z->read($buffer, $length, $offset)
 852
 853     $status = read($z, $buffer, $length)
 854     $status = read($z, $buffer, $length, $offset)
 855
 856 Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
 857
 858 The main difference between this form of the C<read> method and the
 859 previous one, is that this one will attempt to return I<exactly> C<$length>
 860 bytes. The only circumstances that this function will not is if end-of-file
 861 or an IO error is encountered.
 862
 863 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
 864 or a negative number on error.
 865
 866
 867 =head2 getline
 868
 869 Usage is
 870
 871     $line = $z->getline()
 872     $line = <$z>
 873
 874 Reads a single line.
 875
 876 This method fully supports the use of of the variable C<$/> (or
 877 C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
 878 determine what constitutes an end of line. Paragraph mode, record mode and
 879 file slurp mode are all supported.
 880
 881
 882 =head2 getc
 883
 884 Usage is
 885
 886     $char = $z->getc()
 887
 888 Read a single character.
 889
 890 =head2 ungetc
 891
 892 Usage is
 893
 894     $char = $z->ungetc($string)
 895
 896
 897
 898 =head2 inflateSync
 899
 900 Usage is
 901
 902     $status = $z->inflateSync()
 903
 904 TODO
 905
 906
 907 =head2 getHeaderInfo
 908
 909 Usage is
 910
 911     $hdr  = $z->getHeaderInfo();
 912     @hdrs = $z->getHeaderInfo();
 913
 914 This method returns either a hash reference (in scalar context) or a list
 915 or hash references (in array context) that contains information about each
 916 of the header fields in the compressed data stream(s).
 917
 918
 919
 920 =over 5
 921
 922 =item Name
 923
 924 The contents of the Name header field, if present. If no name is
 925 present, the value will be undef. Note this is different from a zero length
 926 name, which will return an empty string.
 927
 928 =item Comment
 929
 930 The contents of the Comment header field, if present. If no comment is
 931 present, the value will be undef. Note this is different from a zero length
 932 comment, which will return an empty string.
 933
 934 =back
 935
 936
 937
 938
 939 =head2 tell
 940
 941 Usage is
 942
 943     $z->tell()
 944     tell $z
 945
 946 Returns the uncompressed file offset.
 947
 948 =head2 eof
 949
 950 Usage is
 951
 952     $z->eof();
 953     eof($z);
 954
 955
 956
 957 Returns true if the end of the compressed input stream has been reached.
 958
 959
 960
 961 =head2 seek
 962
 963     $z->seek($position, $whence);
 964     seek($z, $position, $whence);
 965
 966
 967
 968
 969 Provides a sub-set of the C<seek> functionality, with the restriction
 970 that it is only legal to seek forward in the input file/buffer.
 971 It is a fatal error to attempt to seek backward.
 972
 973
 974
 975 The C<$whence> parameter takes one the usual values, namely SEEK_SET,
 976 SEEK_CUR or SEEK_END.
 977
 978 Returns 1 on success, 0 on failure.
 979
 980 =head2 binmode
 981
 982 Usage is
 983
 984     $z->binmode
 985     binmode $z ;
 986
 987 This is a noop provided for completeness.
 988
 989 =head2 opened
 990
 991     $z->opened()
 992
 993 Returns true if the object currently refers to a opened file/buffer.
 994
 995 =head2 autoflush
 996
 997     my $prev = $z->autoflush()
 998     my $prev = $z->autoflush(EXPR)
 999
1000 If the C<$z> object is associated with a file or a filehandle, this method
1001 returns the current autoflush setting for the underlying filehandle. If
1002 C<EXPR> is present, and is non-zero, it will enable flushing after every
1003 write/print operation.
1004
1005 If C<$z> is associated with a buffer, this method has no effect and always
1006 returns C<undef>.
1007
1008 B<Note> that the special variable C<$|> B<cannot> be used to set or
1009 retrieve the autoflush setting.
1010
1011 =head2 input_line_number
1012
1013     $z->input_line_number()
1014     $z->input_line_number(EXPR)
1015
1016
1017
1018 Returns the current uncompressed line number. If C<EXPR> is present it has
1019 the effect of setting the line number. Note that setting the line number
1020 does not change the current position within the file/buffer being read.
1021
1022 The contents of C<$/> are used to to determine what constitutes a line
1023 terminator.
1024
1025
1026
1027 =head2 fileno
1028
1029     $z->fileno()
1030     fileno($z)
1031
1032 If the C<$z> object is associated with a file or a filehandle, this method
1033 will return the underlying file descriptor.
1034
1035 If the C<$z> object is is associated with a buffer, this method will
1036 return undef.
1037
1038 =head2 close
1039
1040     $z->close() ;
1041     close $z ;
1042
1043
1044
1045 Closes the output file/buffer.
1046
1047
1048
1049 For most versions of Perl this method will be automatically invoked if
1050 the IO::Uncompress::Gunzip object is destroyed (either explicitly or by the
1051 variable with the reference to the object going out of scope). The
1052 exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1053 these cases, the C<close> method will be called automatically, but
1054 not until global destruction of all live objects when the program is
1055 terminating.
1056
1057 Therefore, if you want your scripts to be able to run on all versions
1058 of Perl, you should call C<close> explicitly and not rely on automatic
1059 closing.
1060
1061 Returns true on success, otherwise 0.
1062
1063 If the C<AutoClose> option has been enabled when the IO::Uncompress::Gunzip
1064 object was created, and the object is associated with a file, the
1065 underlying file will also be closed.
1066
1067
1068
1069
1070 =head2 nextStream
1071
1072 Usage is
1073
1074     my $status = $z->nextStream();
1075
1076 Skips to the next compressed data stream in the input file/buffer. If a new
1077 compressed data stream is found, the eof marker will be cleared and C<$.>
1078 will be reset to 0.
1079
1080 Returns 1 if a new stream was found, 0 if none was found, and -1 if an
1081 error was encountered.
1082
1083 =head2 trailingData
1084
1085 Usage is
1086
1087     my $data = $z->trailingData();
1088
1089 Returns the data, if any, that is present immediately after the compressed
1090 data stream once uncompression is complete. It only makes sense to call
1091 this method once the end of the compressed data stream has been
1092 encountered.
1093
1094 This option can be used when there is useful information immediately
1095 following the compressed data stream, and you don't know the length of the
1096 compressed data stream.
1097
1098 If the input is a buffer, C<trailingData> will return everything from the
1099 end of the compressed data stream to the end of the buffer.
1100
1101 If the input is a filehandle, C<trailingData> will return the data that is
1102 left in the filehandle input buffer once the end of the compressed data
1103 stream has been reached. You can then use the filehandle to read the rest
1104 of the input file.
1105
1106 Don't bother using C<trailingData> if the input is a filename.
1107
1108
1109
1110 If you know the length of the compressed data stream before you start
1111 uncompressing, you can avoid having to use C<trailingData> by setting the
1112 C<InputLength> option in the constructor.
1113
1114 =head1 Importing
1115
1116 No symbolic constants are required by this IO::Uncompress::Gunzip at present.
1117
1118 =over 5
1119
1120 =item :all
1121
1122 Imports C<gunzip> and C<$GunzipError>.
1123 Same as doing this
1124
1125     use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
1126
1127 =back
1128
1129 =head1 EXAMPLES
1130
1131
1132
1133
1134 =head1 SEE ALSO
1135
1136 L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
1137
1138 L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
1139
1140 L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1141 L<Archive::Tar|Archive::Tar>,
1142 L<IO::Zlib|IO::Zlib>
1143
1144
1145 For RFC 1950, 1951 and 1952 see
1146 F<http://www.faqs.org/rfcs/rfc1950.html>,
1147 F<http://www.faqs.org/rfcs/rfc1951.html> and
1148 F<http://www.faqs.org/rfcs/rfc1952.html>
1149
1150 The I<zlib> compression library was written by Jean-loup Gailly
1151 F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
1152
1153 The primary site for the I<zlib> compression library is
1154 F<http://www.zlib.org>.
1155
1156 The primary site for gzip is F<http://www.gzip.org>.
1157
1158
1159
1160
1161 =head1 AUTHOR
1162
1163 This module was written by Paul Marquess, F<pmqs@cpan.org>.
1164
1165
1166
1167 =head1 MODIFICATION HISTORY
1168
1169 See the Changes file.
1170
1171 =head1 COPYRIGHT AND LICENSE
1172
1173 Copyright (c) 2005-2006 Paul Marquess. All rights reserved.
1174
1175 This program is free software; you can redistribute it and/or
1176 modify it under the same terms as Perl itself.
1177