1 package IO::Uncompress::Unzip;
11 use IO::Uncompress::RawInflate 2.010 ;
12 use IO::Compress::Base::Common 2.010 qw(:Status createSelfTiedObject);
13 use IO::Uncompress::Adapter::Identity 2.010 ;
14 use IO::Compress::Zlib::Extra 2.010 ;
15 use IO::Compress::Zip::Constants 2.010 ;
17 use Compress::Raw::Zlib 2.010 qw(crc32) ;
21 eval { require IO::Uncompress::Adapter::Bunzip2 ;
22 import IO::Uncompress::Adapter::Bunzip2 } ;
28 our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup);
33 @ISA = qw(Exporter IO::Uncompress::RawInflate);
34 @EXPORT_OK = qw( $UnzipError unzip );
35 %EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ;
36 push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
37 Exporter::export_ok_tags('all');
40 ZIP_CENTRAL_HDR_SIG, \&skipCentralDirectory,
41 ZIP_END_CENTRAL_HDR_SIG, \&skipEndCentralDirectory,
42 ZIP64_END_CENTRAL_REC_HDR_SIG, \&skipCentralDirectory64Rec,
43 ZIP64_END_CENTRAL_LOC_HDR_SIG, \&skipCentralDirectory64Loc,
44 ZIP64_ARCHIVE_EXTRA_SIG, \&skipArchiveExtra,
45 ZIP64_DIGITAL_SIGNATURE_SIG, \&skipDigitalSignature,
51 my $obj = createSelfTiedObject($class, \$UnzipError);
52 $obj->_create(undef, 0, @_);
57 my $obj = createSelfTiedObject(undef, \$UnzipError);
58 return $obj->_inf(@_) ;
63 use IO::Compress::Base::Common 2.010 qw(:Parse);
68 'Name' => [1, 1, Parse_any, undef],
70 # 'Streaming' => [1, 1, Parse_boolean, 1],
79 # unzip always needs crc32
80 $got->value('CRC32' => 1);
82 *$self->{UnzipData}{Name} = $got->value('Name');
93 $self->smartReadExact(\$magic, 4);
95 *$self->{HeaderPending} = $magic ;
97 return $self->HeaderError("Minimum header size is " .
99 if length $magic != 4 ;
101 return $self->HeaderError("Bad Magic")
102 if ! _isZipMagic($magic) ;
104 *$self->{Type} = 'zip';
116 my $name = *$self->{UnzipData}{Name} ;
117 my $hdr = $self->_readZipHeader($magic) ;
121 if (! defined $name || $hdr->{Name} eq $name)
128 if (*$self->{ZipData}{Streaming}) {
133 my $status = $self->smartRead(\$b, 1024 * 16);
139 $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out);
141 return $self->saveErrorString(undef, *$self->{Uncomp}{Error},
142 *$self->{Uncomp}{ErrorNo})
143 if $self->saveStatus($status) == STATUS_ERROR;
145 if ($status == STATUS_ENDSTREAM) {
146 *$self->{Uncomp}->reset();
147 $self->pushBack($b) ;
153 $self->smartReadExact(\$buffer, $hdr->{TrailerLength})
154 or return $self->saveErrorString(undef, "Truncated file");
157 my $c = $hdr->{CompressedLength}->get32bit();
158 $self->smartReadExact(\$buffer, $c)
159 or return $self->saveErrorString(undef, "Truncated file");
163 $self->chkTrailer($buffer) == STATUS_OK
164 or return $self->saveErrorString(undef, "Truncated file");
166 $hdr = $self->_readFullZipHeader();
168 return $self->saveErrorString(undef, "Cannot find '$name'")
169 if $self->smartEof();
180 my ($sig, $CRC32, $cSize, $uSize) ;
181 my ($cSizeHi, $uSizeHi) = (0, 0);
182 if (*$self->{ZipData}{Streaming}) {
183 $sig = unpack ("V", substr($trailer, 0, 4));
184 $CRC32 = unpack ("V", substr($trailer, 4, 4));
186 if (*$self->{ZipData}{Zip64} ) {
187 $cSize = U64::newUnpack_V64 substr($trailer, 8, 8);
188 $uSize = U64::newUnpack_V64 substr($trailer, 16, 8);
191 $cSize = U64::newUnpack_V32 substr($trailer, 8, 4);
192 $uSize = U64::newUnpack_V32 substr($trailer, 12, 4);
195 return $self->TrailerError("Data Descriptor signature, got $sig")
196 if $sig != ZIP_DATA_HDR_SIG;
199 ($CRC32, $cSize, $uSize) =
200 (*$self->{ZipData}{Crc32},
201 *$self->{ZipData}{CompressedLen},
202 *$self->{ZipData}{UnCompressedLen});
205 if (*$self->{Strict}) {
206 return $self->TrailerError("CRC mismatch")
207 if $CRC32 != *$self->{ZipData}{CRC32} ;
209 return $self->TrailerError("CSIZE mismatch.")
210 if ! $cSize->equal(*$self->{CompSize});
212 return $self->TrailerError("USIZE mismatch.")
213 if ! $uSize->equal(*$self->{UnCompSize});
216 my $reachedEnd = STATUS_ERROR ;
217 # check for central directory or end of central directory
221 my $got = $self->smartRead(\$magic, 4);
223 return $self->saveErrorString(STATUS_ERROR, "Truncated file")
224 if $got != 4 && *$self->{Strict};
230 return STATUS_ERROR ;
233 $self->pushBack($magic) ;
237 my $sig = unpack("V", $magic) ;
240 if ($hdr = $headerLookup{$sig})
242 if (&$hdr($self, $magic) != STATUS_OK ) {
243 if (*$self->{Strict}) {
244 return STATUS_ERROR ;
252 if ($sig == ZIP_END_CENTRAL_HDR_SIG)
258 elsif ($sig == ZIP_LOCAL_HDR_SIG)
260 $self->pushBack($magic) ;
266 $self->pushBack($magic) ;
274 sub skipCentralDirectory
280 $self->smartReadExact(\$buffer, 46 - 4)
281 or return $self->TrailerError("Minimum header size is " .
284 my $keep = $magic . $buffer ;
285 *$self->{HeaderPending} = $keep ;
287 #my $versionMadeBy = unpack ("v", substr($buffer, 4-4, 2));
288 #my $extractVersion = unpack ("v", substr($buffer, 6-4, 2));
289 #my $gpFlag = unpack ("v", substr($buffer, 8-4, 2));
290 #my $compressedMethod = unpack ("v", substr($buffer, 10-4, 2));
291 #my $lastModTime = unpack ("V", substr($buffer, 12-4, 4));
292 #my $crc32 = unpack ("V", substr($buffer, 16-4, 4));
293 my $compressedLength = unpack ("V", substr($buffer, 20-4, 4));
294 my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4));
295 my $filename_length = unpack ("v", substr($buffer, 28-4, 2));
296 my $extra_length = unpack ("v", substr($buffer, 30-4, 2));
297 my $comment_length = unpack ("v", substr($buffer, 32-4, 2));
298 #my $disk_start = unpack ("v", substr($buffer, 34-4, 2));
299 #my $int_file_attrib = unpack ("v", substr($buffer, 36-4, 2));
300 #my $ext_file_attrib = unpack ("V", substr($buffer, 38-4, 2));
301 #my $lcl_hdr_offset = unpack ("V", substr($buffer, 42-4, 2));
307 if ($filename_length)
309 $self->smartReadExact(\$filename, $filename_length)
310 or return $self->TruncatedTrailer("filename");
316 $self->smartReadExact(\$extraField, $extra_length)
317 or return $self->TruncatedTrailer("extra");
318 $keep .= $extraField ;
323 $self->smartReadExact(\$comment, $comment_length)
324 or return $self->TruncatedTrailer("comment");
337 $self->smartReadExact(\$buffer, 4)
338 or return $self->TrailerError("Minimum header size is " .
341 my $keep = $magic . $buffer ;
343 my $size = unpack ("V", $buffer);
345 $self->smartReadExact(\$buffer, $size)
346 or return $self->TrailerError("Minimum header size is " .
350 *$self->{HeaderPending} = $keep ;
356 sub skipCentralDirectory64Rec
362 $self->smartReadExact(\$buffer, 8)
363 or return $self->TrailerError("Minimum header size is " .
366 my $keep = $magic . $buffer ;
368 my ($sizeLo, $sizeHi) = unpack ("V V", $buffer);
370 # TODO - take SizeHi into account
371 $self->smartReadExact(\$buffer, $sizeLo)
372 or return $self->TrailerError("Minimum header size is " .
373 $sizeLo . " bytes") ;
376 *$self->{HeaderPending} = $keep ;
378 #my $versionMadeBy = unpack ("v", substr($buffer, 0, 2));
379 #my $extractVersion = unpack ("v", substr($buffer, 2, 2));
380 #my $diskNumber = unpack ("V", substr($buffer, 4, 4));
381 #my $cntrlDirDiskNo = unpack ("V", substr($buffer, 8, 4));
382 #my $entriesInThisCD = unpack ("V V", substr($buffer, 12, 8));
383 #my $entriesInCD = unpack ("V V", substr($buffer, 20, 8));
384 #my $sizeOfCD = unpack ("V V", substr($buffer, 28, 8));
385 #my $offsetToCD = unpack ("V V", substr($buffer, 36, 8));
390 sub skipCentralDirectory64Loc
396 $self->smartReadExact(\$buffer, 20 - 4)
397 or return $self->TrailerError("Minimum header size is " .
400 my $keep = $magic . $buffer ;
401 *$self->{HeaderPending} = $keep ;
403 #my $startCdDisk = unpack ("V", substr($buffer, 4-4, 4));
404 #my $offsetToCD = unpack ("V V", substr($buffer, 8-4, 8));
405 #my $diskCount = unpack ("V", substr($buffer, 16-4, 4));
410 sub skipEndCentralDirectory
416 $self->smartReadExact(\$buffer, 22 - 4)
417 or return $self->TrailerError("Minimum header size is " .
420 my $keep = $magic . $buffer ;
421 *$self->{HeaderPending} = $keep ;
423 #my $diskNumber = unpack ("v", substr($buffer, 4-4, 2));
424 #my $cntrlDirDiskNo = unpack ("v", substr($buffer, 6-4, 2));
425 #my $entriesInThisCD = unpack ("v", substr($buffer, 8-4, 2));
426 #my $entriesInCD = unpack ("v", substr($buffer, 10-4, 2));
427 #my $sizeOfCD = unpack ("V", substr($buffer, 12-4, 2));
428 #my $offsetToCD = unpack ("V", substr($buffer, 16-4, 2));
429 my $comment_length = unpack ("v", substr($buffer, 20-4, 2));
435 $self->smartReadExact(\$comment, $comment_length)
436 or return $self->TruncatedTrailer("comment");
447 return 0 if length $buffer < 4 ;
448 my $sig = unpack("V", $buffer) ;
449 return $sig == ZIP_LOCAL_HDR_SIG ;
453 sub _readFullZipHeader($)
458 $self->smartReadExact(\$magic, 4);
460 *$self->{HeaderPending} = $magic ;
462 return $self->HeaderError("Minimum header size is " .
464 if length $magic != 4 ;
467 return $self->HeaderError("Bad Magic")
468 if ! _isZipMagic($magic) ;
470 my $status = $self->_readZipHeader($magic);
471 delete *$self->{Transparent} if ! defined $status ;
475 sub _readZipHeader($)
477 my ($self, $magic) = @_ ;
481 $self->smartReadExact(\$buffer, 30 - 4)
482 or return $self->HeaderError("Minimum header size is " .
485 my $keep = $magic . $buffer ;
486 *$self->{HeaderPending} = $keep ;
488 my $extractVersion = unpack ("v", substr($buffer, 4-4, 2));
489 my $gpFlag = unpack ("v", substr($buffer, 6-4, 2));
490 my $compressedMethod = unpack ("v", substr($buffer, 8-4, 2));
491 my $lastModTime = unpack ("V", substr($buffer, 10-4, 4));
492 my $crc32 = unpack ("V", substr($buffer, 14-4, 4));
493 my $compressedLength = new U64 unpack ("V", substr($buffer, 18-4, 4));
494 my $uncompressedLength = new U64 unpack ("V", substr($buffer, 22-4, 4));
495 my $filename_length = unpack ("v", substr($buffer, 26-4, 2));
496 my $extra_length = unpack ("v", substr($buffer, 28-4, 2));
501 my $streamingMode = ($gpFlag & ZIP_GP_FLAG_STREAMING_MASK) ? 1 : 0 ;
503 return $self->HeaderError("Streamed Stored content not supported")
504 if $streamingMode && $compressedMethod == 0 ;
506 return $self->HeaderError("Encrypted content not supported")
507 if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK);
509 return $self->HeaderError("Patch content not supported")
510 if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK;
512 *$self->{ZipData}{Streaming} = $streamingMode;
515 if ($filename_length)
517 $self->smartReadExact(\$filename, $filename_length)
518 or return $self->TruncatedHeader("Filename");
526 $self->smartReadExact(\$extraField, $extra_length)
527 or return $self->TruncatedHeader("Extra Field");
529 my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField,
531 return $self->HeaderError($bad)
534 $keep .= $extraField ;
539 $Extra{$_->[0]} = \$_->[1];
542 if (defined $Extra{ZIP_EXTRA_ID_ZIP64()})
546 my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} };
548 # TODO - This code assumes that all the fields in the Zip64
549 # extra field aren't necessarily present. The spec says that
550 # they only exist if the equivalent local headers are -1.
551 # Need to check that info-zip fills out -1 in the local header
554 if (! $streamingMode) {
557 $uncompressedLength = U64::newUnpack_V64 substr($buff, 0, 8)
558 if $uncompressedLength == 0xFFFF ;
562 $compressedLength = U64::newUnpack_V64 substr($buff, $offset, 8)
563 if $compressedLength == 0xFFFF ;
567 #my $cheaderOffset = U64::newUnpack_V64 substr($buff, 16, 8);
568 #my $diskNumber = unpack ("V", substr($buff, 24, 4));
573 *$self->{ZipData}{Zip64} = $zip64;
575 if (! $streamingMode) {
576 *$self->{ZipData}{Streaming} = 0;
577 *$self->{ZipData}{Crc32} = $crc32;
578 *$self->{ZipData}{CompressedLen} = $compressedLength;
579 *$self->{ZipData}{UnCompressedLen} = $uncompressedLength;
580 *$self->{CompressedInputLengthRemaining} =
581 *$self->{CompressedInputLength} = $compressedLength->get32bit();
584 *$self->{ZipData}{Method} = $compressedMethod;
585 if ($compressedMethod == ZIP_CM_DEFLATE)
587 *$self->{Type} = 'zip-deflate';
589 elsif ($compressedMethod == ZIP_CM_BZIP2)
591 #if (! defined $IO::Uncompress::Adapter::Bunzip2::VERSION)
593 *$self->{Type} = 'zip-bzip2';
595 my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject(
598 *$self->{Uncomp} = $obj;
599 *$self->{ZipData}{CRC32} = crc32(undef);
602 elsif ($compressedMethod == ZIP_CM_STORE)
604 # TODO -- add support for reading uncompressed
606 *$self->{Type} = 'zip-stored';
608 my $obj = IO::Uncompress::Adapter::Identity::mkUncompObject(# $got->value('CRC32'),
609 # $got->value('ADLER32'),
612 *$self->{Uncomp} = $obj;
617 return $self->HeaderError("Unsupported Compression format $compressedMethod");
622 'FingerprintLength' => 4,
623 #'HeaderLength' => $compressedMethod == 8 ? length $keep : 0,
624 'HeaderLength' => length $keep,
626 'TrailerLength' => ! $streamingMode ? 0 : $zip64 ? 24 : 16,
628 'CompressedLength' => $compressedLength ,
629 'UncompressedLength' => $uncompressedLength ,
632 'Time' => _dosToUnixTime($lastModTime),
633 'Stream' => $streamingMode,
635 'MethodID' => $compressedMethod,
636 'MethodName' => $compressedMethod == ZIP_CM_DEFLATE
638 : $compressedMethod == ZIP_CM_BZIP2
640 : $compressedMethod == ZIP_CM_STORE
644 # 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0,
645 # 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
646 # 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0,
647 # 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
648 # 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
649 # 'Comment' => $comment,
651 # 'OsName' => defined $GZIP_OS_Names{$os}
652 # ? $GZIP_OS_Names{$os} : "Unknown",
653 # 'HeaderCRC' => $HeaderCRC,
655 # 'ExtraFlags' => $xfl,
656 'ExtraFieldRaw' => $extraField,
657 'ExtraField' => [ @EXTRA ],
663 sub filterUncompressed
667 if (*$self->{ZipData}{Method} == 12) {
668 *$self->{ZipData}{CRC32} = crc32(${$_[0]}, *$self->{ZipData}{CRC32});
671 *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ;
679 #use Time::Local 'timelocal_nocheck';
680 use Time::Local 'timelocal';
684 my $year = ( ( $dt >> 25 ) & 0x7f ) + 80;
685 my $mon = ( ( $dt >> 21 ) & 0x0f ) - 1;
686 my $mday = ( ( $dt >> 16 ) & 0x1f );
688 my $hour = ( ( $dt >> 11 ) & 0x1f );
689 my $min = ( ( $dt >> 5 ) & 0x3f );
690 my $sec = ( ( $dt << 1 ) & 0x3e );
694 eval { timelocal( $sec, $min, $hour, $mday, $mon, $year ); };
708 IO::Uncompress::Unzip - Read zip files/buffers
712 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
714 my $status = unzip $input => $output [,OPTS]
715 or die "unzip failed: $UnzipError\n";
717 my $z = new IO::Uncompress::Unzip $input [OPTS]
718 or die "unzip failed: $UnzipError\n";
720 $status = $z->read($buffer)
721 $status = $z->read($buffer, $length)
722 $status = $z->read($buffer, $length, $offset)
723 $line = $z->getline()
728 $status = $z->inflateSync()
730 $data = $z->trailingData()
731 $status = $z->nextStream()
732 $data = $z->getHeaderInfo()
734 $z->seek($position, $whence)
746 read($z, $buffer, $length);
747 read($z, $buffer, $length, $offset);
749 seek($z, $position, $whence)
757 This module provides a Perl interface that allows the reading of
760 For writing zip files/buffers, see the companion module IO::Compress::Zip.
762 =head1 Functional Interface
764 A top-level function, C<unzip>, is provided to carry out
765 "one-shot" uncompression between buffers and/or files. For finer
766 control over the uncompression process, see the L</"OO Interface">
769 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
771 unzip $input => $output [,OPTS]
772 or die "unzip failed: $UnzipError\n";
774 The functional interface needs Perl5.005 or better.
776 =head2 unzip $input => $output [, OPTS]
778 C<unzip> expects at least two parameters, C<$input> and C<$output>.
780 =head3 The C<$input> parameter
782 The parameter, C<$input>, is used to define the source of
785 It can take one of the following forms:
791 If the C<$input> parameter is a simple scalar, it is assumed to be a
792 filename. This file will be opened for reading and the input data
793 will be read from it.
797 If the C<$input> parameter is a filehandle, the input data will be
799 The string '-' can be used as an alias for standard input.
801 =item A scalar reference
803 If C<$input> is a scalar reference, the input data will be read
806 =item An array reference
808 If C<$input> is an array reference, each element in the array must be a
811 The input data will be read from each file in turn.
813 The complete array will be walked to ensure that it only
814 contains valid filenames before any data is uncompressed.
816 =item An Input FileGlob string
818 If C<$input> is a string that is delimited by the characters "<" and ">"
819 C<unzip> will assume that it is an I<input fileglob string>. The
820 input is the list of files that match the fileglob.
822 If the fileglob does not match any files ...
824 See L<File::GlobMapper|File::GlobMapper> for more details.
828 If the C<$input> parameter is any other type, C<undef> will be returned.
830 =head3 The C<$output> parameter
832 The parameter C<$output> is used to control the destination of the
833 uncompressed data. This parameter can take one of these forms.
839 If the C<$output> parameter is a simple scalar, it is assumed to be a
840 filename. This file will be opened for writing and the uncompressed
841 data will be written to it.
845 If the C<$output> parameter is a filehandle, the uncompressed data
846 will be written to it.
847 The string '-' can be used as an alias for standard output.
849 =item A scalar reference
851 If C<$output> is a scalar reference, the uncompressed data will be
852 stored in C<$$output>.
854 =item An Array Reference
856 If C<$output> is an array reference, the uncompressed data will be
857 pushed onto the array.
859 =item An Output FileGlob
861 If C<$output> is a string that is delimited by the characters "<" and ">"
862 C<unzip> will assume that it is an I<output fileglob string>. The
863 output is the list of files that match the fileglob.
865 When C<$output> is an fileglob string, C<$input> must also be a fileglob
866 string. Anything else is an error.
870 If the C<$output> parameter is any other type, C<undef> will be returned.
874 When C<$input> maps to multiple compressed files/buffers and C<$output> is
875 a single file/buffer, after uncompression C<$output> will contain a
876 concatenation of all the uncompressed data from each of the input
879 =head2 Optional Parameters
881 Unless specified below, the optional parameters for C<unzip>,
882 C<OPTS>, are the same as those used with the OO interface defined in the
883 L</"Constructor Options"> section below.
887 =item C<< AutoClose => 0|1 >>
889 This option applies to any input or output data streams to
890 C<unzip> that are filehandles.
892 If C<AutoClose> is specified, and the value is true, it will result in all
893 input and/or output filehandles being closed once C<unzip> has
896 This parameter defaults to 0.
898 =item C<< BinModeOut => 0|1 >>
900 When writing to a file or filehandle, set C<binmode> before writing to the
905 =item C<< Append => 0|1 >>
909 =item C<< MultiStream => 0|1 >>
911 If the input file/buffer contains multiple compressed data streams, this
912 option will uncompress the whole lot as a single data stream.
916 =item C<< TrailingData => $scalar >>
918 Returns the data, if any, that is present immediately after the compressed
919 data stream once uncompression is complete.
921 This option can be used when there is useful information immediately
922 following the compressed data stream, and you don't know the length of the
923 compressed data stream.
925 If the input is a buffer, C<trailingData> will return everything from the
926 end of the compressed data stream to the end of the buffer.
928 If the input is a filehandle, C<trailingData> will return the data that is
929 left in the filehandle input buffer once the end of the compressed data
930 stream has been reached. You can then use the filehandle to read the rest
933 Don't bother using C<trailingData> if the input is a filename.
935 If you know the length of the compressed data stream before you start
936 uncompressing, you can avoid having to use C<trailingData> by setting the
937 C<InputLength> option.
943 To read the contents of the file C<file1.txt.zip> and write the
944 compressed data to the file C<file1.txt>.
948 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
950 my $input = "file1.txt.zip";
951 my $output = "file1.txt";
952 unzip $input => $output
953 or die "unzip failed: $UnzipError\n";
955 To read from an existing Perl filehandle, C<$input>, and write the
956 uncompressed data to a buffer, C<$buffer>.
960 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
963 my $input = new IO::File "<file1.txt.zip"
964 or die "Cannot open 'file1.txt.zip': $!\n" ;
966 unzip $input => \$buffer
967 or die "unzip failed: $UnzipError\n";
969 To uncompress all files in the directory "/my/home" that match "*.txt.zip" and store the compressed data in the same directory
973 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
975 unzip '</my/home/*.txt.zip>' => '</my/home/#1.txt>'
976 or die "unzip failed: $UnzipError\n";
978 and if you want to compress each file one at a time, this will do the trick
982 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
984 for my $input ( glob "/my/home/*.txt.zip" )
987 $output =~ s/.zip// ;
988 unzip $input => $output
989 or die "Error compressing '$input': $UnzipError\n";
996 The format of the constructor for IO::Uncompress::Unzip is shown below
998 my $z = new IO::Uncompress::Unzip $input [OPTS]
999 or die "IO::Uncompress::Unzip failed: $UnzipError\n";
1001 Returns an C<IO::Uncompress::Unzip> object on success and undef on failure.
1002 The variable C<$UnzipError> will contain an error message on failure.
1004 If you are running Perl 5.005 or better the object, C<$z>, returned from
1005 IO::Uncompress::Unzip can be used exactly like an L<IO::File|IO::File> filehandle.
1006 This means that all normal input file operations can be carried out with
1007 C<$z>. For example, to read a line from a compressed file/buffer you can
1008 use either of these forms
1010 $line = $z->getline();
1013 The mandatory parameter C<$input> is used to determine the source of the
1014 compressed data. This parameter can take one of three forms.
1020 If the C<$input> parameter is a scalar, it is assumed to be a filename. This
1021 file will be opened for reading and the compressed data will be read from it.
1025 If the C<$input> parameter is a filehandle, the compressed data will be
1027 The string '-' can be used as an alias for standard input.
1029 =item A scalar reference
1031 If C<$input> is a scalar reference, the compressed data will be read from
1036 =head2 Constructor Options
1038 The option names defined below are case insensitive and can be optionally
1039 prefixed by a '-'. So all of the following are valid
1046 OPTS is a combination of the following options:
1050 =item C<< AutoClose => 0|1 >>
1052 This option is only valid when the C<$input> parameter is a filehandle. If
1053 specified, and the value is true, it will result in the file being closed once
1054 either the C<close> method is called or the IO::Uncompress::Unzip object is
1057 This parameter defaults to 0.
1059 =item C<< MultiStream => 0|1 >>
1061 Treats the complete zip file/buffer as a single compressed data
1062 stream. When reading in multi-stream mode each member of the zip
1063 file/buffer will be uncompressed in turn until the end of the file/buffer
1066 This parameter defaults to 0.
1068 =item C<< Prime => $string >>
1070 This option will uncompress the contents of C<$string> before processing the
1073 This option can be useful when the compressed data is embedded in another
1074 file/data structure and it is not possible to work out where the compressed
1075 data begins without having to read the first few bytes. If this is the
1076 case, the uncompression can be I<primed> with these bytes using this
1079 =item C<< Transparent => 0|1 >>
1081 If this option is set and the input file/buffer is not compressed data,
1082 the module will allow reading of it anyway.
1084 In addition, if the input file/buffer does contain compressed data and
1085 there is non-compressed data immediately following it, setting this option
1086 will make this module treat the whole file/bufffer as a single data stream.
1088 This option defaults to 1.
1090 =item C<< BlockSize => $num >>
1092 When reading the compressed input data, IO::Uncompress::Unzip will read it in
1093 blocks of C<$num> bytes.
1095 This option defaults to 4096.
1097 =item C<< InputLength => $size >>
1099 When present this option will limit the number of compressed bytes read
1100 from the input file/buffer to C<$size>. This option can be used in the
1101 situation where there is useful data directly after the compressed data
1102 stream and you know beforehand the exact length of the compressed data
1105 This option is mostly used when reading from a filehandle, in which case
1106 the file pointer will be left pointing to the first byte directly after the
1107 compressed data stream.
1109 This option defaults to off.
1111 =item C<< Append => 0|1 >>
1113 This option controls what the C<read> method does with uncompressed data.
1115 If set to 1, all uncompressed data will be appended to the output parameter
1116 of the C<read> method.
1118 If set to 0, the contents of the output parameter of the C<read> method
1119 will be overwritten by the uncompressed data.
1123 =item C<< Strict => 0|1 >>
1125 This option controls whether the extra checks defined below are used when
1126 carrying out the decompression. When Strict is on, the extra tests are
1127 carried out, when Strict is off they are not.
1129 The default for this option is off.
1143 $status = $z->read($buffer)
1145 Reads a block of compressed data (the size the the compressed block is
1146 determined by the C<Buffer> option in the constructor), uncompresses it and
1147 writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
1148 set in the constructor, the uncompressed data will be appended to the
1149 C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
1151 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1152 or a negative number on error.
1158 $status = $z->read($buffer, $length)
1159 $status = $z->read($buffer, $length, $offset)
1161 $status = read($z, $buffer, $length)
1162 $status = read($z, $buffer, $length, $offset)
1164 Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
1166 The main difference between this form of the C<read> method and the
1167 previous one, is that this one will attempt to return I<exactly> C<$length>
1168 bytes. The only circumstances that this function will not is if end-of-file
1169 or an IO error is encountered.
1171 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1172 or a negative number on error.
1178 $line = $z->getline()
1181 Reads a single line.
1183 This method fully supports the use of of the variable C<$/> (or
1184 C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
1185 determine what constitutes an end of line. Paragraph mode, record mode and
1186 file slurp mode are all supported.
1194 Read a single character.
1200 $char = $z->ungetc($string)
1206 $status = $z->inflateSync()
1210 =head2 getHeaderInfo
1214 $hdr = $z->getHeaderInfo();
1215 @hdrs = $z->getHeaderInfo();
1217 This method returns either a hash reference (in scalar context) or a list
1218 or hash references (in array context) that contains information about each
1219 of the header fields in the compressed data stream(s).
1228 Returns the uncompressed file offset.
1237 Returns true if the end of the compressed input stream has been reached.
1241 $z->seek($position, $whence);
1242 seek($z, $position, $whence);
1244 Provides a sub-set of the C<seek> functionality, with the restriction
1245 that it is only legal to seek forward in the input file/buffer.
1246 It is a fatal error to attempt to seek backward.
1248 The C<$whence> parameter takes one the usual values, namely SEEK_SET,
1249 SEEK_CUR or SEEK_END.
1251 Returns 1 on success, 0 on failure.
1260 This is a noop provided for completeness.
1266 Returns true if the object currently refers to a opened file/buffer.
1270 my $prev = $z->autoflush()
1271 my $prev = $z->autoflush(EXPR)
1273 If the C<$z> object is associated with a file or a filehandle, this method
1274 returns the current autoflush setting for the underlying filehandle. If
1275 C<EXPR> is present, and is non-zero, it will enable flushing after every
1276 write/print operation.
1278 If C<$z> is associated with a buffer, this method has no effect and always
1281 B<Note> that the special variable C<$|> B<cannot> be used to set or
1282 retrieve the autoflush setting.
1284 =head2 input_line_number
1286 $z->input_line_number()
1287 $z->input_line_number(EXPR)
1289 Returns the current uncompressed line number. If C<EXPR> is present it has
1290 the effect of setting the line number. Note that setting the line number
1291 does not change the current position within the file/buffer being read.
1293 The contents of C<$/> are used to to determine what constitutes a line
1301 If the C<$z> object is associated with a file or a filehandle, C<fileno>
1302 will return the underlying file descriptor. Once the C<close> method is
1303 called C<fileno> will return C<undef>.
1305 If the C<$z> object is is associated with a buffer, this method will return
1313 Closes the output file/buffer.
1315 For most versions of Perl this method will be automatically invoked if
1316 the IO::Uncompress::Unzip object is destroyed (either explicitly or by the
1317 variable with the reference to the object going out of scope). The
1318 exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1319 these cases, the C<close> method will be called automatically, but
1320 not until global destruction of all live objects when the program is
1323 Therefore, if you want your scripts to be able to run on all versions
1324 of Perl, you should call C<close> explicitly and not rely on automatic
1327 Returns true on success, otherwise 0.
1329 If the C<AutoClose> option has been enabled when the IO::Uncompress::Unzip
1330 object was created, and the object is associated with a file, the
1331 underlying file will also be closed.
1337 my $status = $z->nextStream();
1339 Skips to the next compressed data stream in the input file/buffer. If a new
1340 compressed data stream is found, the eof marker will be cleared and C<$.>
1343 Returns 1 if a new stream was found, 0 if none was found, and -1 if an
1344 error was encountered.
1350 my $data = $z->trailingData();
1352 Returns the data, if any, that is present immediately after the compressed
1353 data stream once uncompression is complete. It only makes sense to call
1354 this method once the end of the compressed data stream has been
1357 This option can be used when there is useful information immediately
1358 following the compressed data stream, and you don't know the length of the
1359 compressed data stream.
1361 If the input is a buffer, C<trailingData> will return everything from the
1362 end of the compressed data stream to the end of the buffer.
1364 If the input is a filehandle, C<trailingData> will return the data that is
1365 left in the filehandle input buffer once the end of the compressed data
1366 stream has been reached. You can then use the filehandle to read the rest
1369 Don't bother using C<trailingData> if the input is a filename.
1371 If you know the length of the compressed data stream before you start
1372 uncompressing, you can avoid having to use C<trailingData> by setting the
1373 C<InputLength> option in the constructor.
1377 No symbolic constants are required by this IO::Uncompress::Unzip at present.
1383 Imports C<unzip> and C<$UnzipError>.
1386 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1392 =head2 Working with Net::FTP
1394 See L<IO::Uncompress::Unzip::FAQ|IO::Uncompress::Unzip::FAQ/"Compressed files and Net::FTP">
1396 =head2 Walking through a zip file
1398 The code below can be used to traverse a zip file, one compressed data
1401 use IO::Uncompress::Unzip qw($UnzipError);
1403 my $zipfile = "somefile.zip";
1404 my $u = new IO::Uncompress::Unzip $zipfile
1405 or die "Cannot open $filefile: $UnzipError";
1407 for (my $status = 1; ! $u->eof(); $status = $u->nextStream())
1409 my $name = $u->getHeaderInfo()->{Name};
1410 warn "Processing member $name\n" ;
1413 while (($status = $u->read($buff)) > 0) {
1417 last unless $status == 0;
1420 die "Error processing $zipfile: $!\n"
1423 Each individual compressed data stream is read until the logical
1424 end-of-file is reached. Then C<nextStream> is called. This will skip to the
1425 start of the next compressed data stream and clear the end-of-file flag.
1427 It is also worth noting that C<nextStream> can be called at any time -- you
1428 don't have to wait until you have exhausted a compressed data stream before
1429 skipping to the next one.
1433 L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
1435 L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
1437 L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1438 L<Archive::Tar|Archive::Tar>,
1439 L<IO::Zlib|IO::Zlib>
1441 For RFC 1950, 1951 and 1952 see
1442 F<http://www.faqs.org/rfcs/rfc1950.html>,
1443 F<http://www.faqs.org/rfcs/rfc1951.html> and
1444 F<http://www.faqs.org/rfcs/rfc1952.html>
1446 The I<zlib> compression library was written by Jean-loup Gailly
1447 F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
1449 The primary site for the I<zlib> compression library is
1450 F<http://www.zlib.org>.
1452 The primary site for gzip is F<http://www.gzip.org>.
1456 This module was written by Paul Marquess, F<pmqs@cpan.org>.
1458 =head1 MODIFICATION HISTORY
1460 See the Changes file.
1462 =head1 COPYRIGHT AND LICENSE
1464 Copyright (c) 2005-2008 Paul Marquess. All rights reserved.
1466 This program is free software; you can redistribute it and/or
1467 modify it under the same terms as Perl itself.