compression modules update to version 2.005
[p5sagit/p5-mst-13.2.git] / ext / IO_Compress_Zlib / lib / IO / Uncompress / Unzip.pm
CommitLineData
a02d0f6f 1package IO::Uncompress::Unzip;
2
3require 5.004 ;
4
5# for RFC1952
6
7use strict ;
8use warnings;
9use bytes;
10
93d092e2 11use IO::Uncompress::RawInflate 2.005 ;
12use IO::Compress::Base::Common 2.005 qw(:Status createSelfTiedObject);
13use IO::Uncompress::Adapter::Identity 2.005 ;
14use IO::Compress::Zlib::Extra 2.005 ;
15use IO::Compress::Zip::Constants 2.005 ;
a02d0f6f 16
93d092e2 17use Compress::Raw::Zlib 2.005 qw(crc32) ;
c70c1701 18
2b4e0969 19BEGIN
20{
c70c1701 21 eval { require IO::Uncompress::Adapter::Bunzip2 ;
22 import IO::Uncompress::Adapter::Bunzip2 } ;
2b4e0969 23}
24
25
a02d0f6f 26require Exporter ;
27
e7d45986 28our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup);
a02d0f6f 29
93d092e2 30$VERSION = '2.005';
a02d0f6f 31$UnzipError = '';
32
33@ISA = qw(Exporter IO::Uncompress::RawInflate);
34@EXPORT_OK = qw( $UnzipError unzip );
35%EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ;
36push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
37Exporter::export_ok_tags('all');
38
e7d45986 39%headerLookup = (
40 ZIP_CENTRAL_HDR_SIG, \&skipCentralDirectory,
41 ZIP_END_CENTRAL_HDR_SIG, \&skipEndCentralDirectory,
42 ZIP64_END_CENTRAL_REC_HDR_SIG, \&skipCentralDirectory64Rec,
43 ZIP64_END_CENTRAL_LOC_HDR_SIG, \&skipCentralDirectory64Loc,
44 ZIP64_ARCHIVE_EXTRA_SIG, \&skipArchiveExtra,
45 ZIP64_DIGITAL_SIGNATURE_SIG, \&skipDigitalSignature,
46 );
47
a02d0f6f 48sub new
49{
50 my $class = shift ;
51 my $obj = createSelfTiedObject($class, \$UnzipError);
52 $obj->_create(undef, 0, @_);
53}
54
55sub unzip
56{
57 my $obj = createSelfTiedObject(undef, \$UnzipError);
58 return $obj->_inf(@_) ;
59}
60
61sub getExtraParams
62{
93d092e2 63 use IO::Compress::Base::Common 2.005 qw(:Parse);
a02d0f6f 64
65
66 return (
67# # Zip header fields
68 'Name' => [1, 1, Parse_any, undef],
69
70# 'Streaming' => [1, 1, Parse_boolean, 1],
71 );
72}
73
74sub ckParams
75{
76 my $self = shift ;
77 my $got = shift ;
78
79 # unzip always needs crc32
80 $got->value('CRC32' => 1);
81
82 *$self->{UnzipData}{Name} = $got->value('Name');
83
84 return 1;
85}
86
87
88sub ckMagic
89{
90 my $self = shift;
91
92 my $magic ;
93 $self->smartReadExact(\$magic, 4);
94
95 *$self->{HeaderPending} = $magic ;
96
97 return $self->HeaderError("Minimum header size is " .
98 4 . " bytes")
99 if length $magic != 4 ;
100
101 return $self->HeaderError("Bad Magic")
102 if ! _isZipMagic($magic) ;
103
104 *$self->{Type} = 'zip';
105
106 return $magic ;
107}
108
109
110
111sub readHeader
112{
113 my $self = shift;
114 my $magic = shift ;
115
116 my $name = *$self->{UnzipData}{Name} ;
117 my $hdr = $self->_readZipHeader($magic) ;
118
119 while (defined $hdr)
120 {
121 if (! defined $name || $hdr->{Name} eq $name)
122 {
123 return $hdr ;
124 }
125
126 # skip the data
127 my $buffer;
128 if (*$self->{ZipData}{Streaming}) {
129
130 while (1) {
131
132 my $b;
133 my $status = $self->smartRead(\$b, 1024 * 16);
134 return undef
135 if $status <= 0 ;
136
137 my $temp_buf;
138 my $out;
139 $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out);
140
141 return $self->saveErrorString(undef, *$self->{Uncomp}{Error},
142 *$self->{Uncomp}{ErrorNo})
143 if $self->saveStatus($status) == STATUS_ERROR;
144
145 if ($status == STATUS_ENDSTREAM) {
146 *$self->{Uncomp}->reset();
147 $self->pushBack($b) ;
148 last;
149 }
150 }
151
152 # skip the trailer
153 $self->smartReadExact(\$buffer, $hdr->{TrailerLength})
154 or return $self->saveErrorString(undef, "Truncated file");
155 }
156 else {
e7d45986 157 my $c = $hdr->{CompressedLength}->get32bit();
a02d0f6f 158 $self->smartReadExact(\$buffer, $c)
159 or return $self->saveErrorString(undef, "Truncated file");
160 $buffer = '';
161 }
162
163 $self->chkTrailer($buffer) == STATUS_OK
164 or return $self->saveErrorString(undef, "Truncated file");
165
166 $hdr = $self->_readFullZipHeader();
167
168 return $self->saveErrorString(undef, "Cannot find '$name'")
169 if $self->smartEof();
170 }
171
172 return undef;
173}
174
175sub chkTrailer
176{
177 my $self = shift;
178 my $trailer = shift;
179
180 my ($sig, $CRC32, $cSize, $uSize) ;
e7d45986 181 my ($cSizeHi, $uSizeHi) = (0, 0);
a02d0f6f 182 if (*$self->{ZipData}{Streaming}) {
e7d45986 183 $sig = unpack ("V", substr($trailer, 0, 4));
184 $CRC32 = unpack ("V", substr($trailer, 4, 4));
185
186 if (*$self->{ZipData}{Zip64} ) {
187 $cSize = U64::newUnpack_V64 substr($trailer, 8, 8);
188 $uSize = U64::newUnpack_V64 substr($trailer, 16, 8);
189 }
190 else {
191 $cSize = U64::newUnpack_V32 substr($trailer, 8, 4);
192 $uSize = U64::newUnpack_V32 substr($trailer, 12, 4);
193 }
194
a02d0f6f 195 return $self->TrailerError("Data Descriptor signature, got $sig")
c70c1701 196 if $sig != ZIP_DATA_HDR_SIG;
a02d0f6f 197 }
198 else {
199 ($CRC32, $cSize, $uSize) =
200 (*$self->{ZipData}{Crc32},
201 *$self->{ZipData}{CompressedLen},
202 *$self->{ZipData}{UnCompressedLen});
203 }
204
205 if (*$self->{Strict}) {
2b4e0969 206 return $self->TrailerError("CRC mismatch")
207 if $CRC32 != *$self->{ZipData}{CRC32} ;
a02d0f6f 208
e7d45986 209 return $self->TrailerError("CSIZE mismatch.")
210 if ! $cSize->equal(*$self->{CompSize});
a02d0f6f 211
e7d45986 212 return $self->TrailerError("USIZE mismatch.")
213 if ! $uSize->equal(*$self->{UnCompSize});
a02d0f6f 214 }
215
216 my $reachedEnd = STATUS_ERROR ;
217 # check for central directory or end of central directory
218 while (1)
219 {
220 my $magic ;
221 my $got = $self->smartRead(\$magic, 4);
222
223 return $self->saveErrorString(STATUS_ERROR, "Truncated file")
224 if $got != 4 && *$self->{Strict};
225
226 if ($got == 0) {
227 return STATUS_EOF ;
228 }
229 elsif ($got < 0) {
230 return STATUS_ERROR ;
231 }
232 elsif ($got < 4) {
233 $self->pushBack($magic) ;
234 return STATUS_OK ;
235 }
236
237 my $sig = unpack("V", $magic) ;
238
258133d1 239 my $hdr;
240 if ($hdr = $headerLookup{$sig})
a02d0f6f 241 {
258133d1 242 if (&$hdr($self, $magic) != STATUS_OK ) {
a02d0f6f 243 if (*$self->{Strict}) {
244 return STATUS_ERROR ;
245 }
246 else {
247 $self->clearError();
248 return STATUS_OK ;
249 }
250 }
e7d45986 251
252 if ($sig == ZIP_END_CENTRAL_HDR_SIG)
253 {
254 return STATUS_OK ;
255 last;
a02d0f6f 256 }
a02d0f6f 257 }
c70c1701 258 elsif ($sig == ZIP_LOCAL_HDR_SIG)
a02d0f6f 259 {
260 $self->pushBack($magic) ;
261 return STATUS_OK ;
262 }
263 else
264 {
265 # put the data back
266 $self->pushBack($magic) ;
267 last;
268 }
269 }
270
271 return $reachedEnd ;
272}
273
274sub skipCentralDirectory
275{
276 my $self = shift;
277 my $magic = shift ;
278
279 my $buffer;
280 $self->smartReadExact(\$buffer, 46 - 4)
281 or return $self->TrailerError("Minimum header size is " .
282 46 . " bytes") ;
283
284 my $keep = $magic . $buffer ;
285 *$self->{HeaderPending} = $keep ;
286
287 #my $versionMadeBy = unpack ("v", substr($buffer, 4-4, 2));
288 #my $extractVersion = unpack ("v", substr($buffer, 6-4, 2));
289 #my $gpFlag = unpack ("v", substr($buffer, 8-4, 2));
290 #my $compressedMethod = unpack ("v", substr($buffer, 10-4, 2));
291 #my $lastModTime = unpack ("V", substr($buffer, 12-4, 4));
292 #my $crc32 = unpack ("V", substr($buffer, 16-4, 4));
e7d45986 293 my $compressedLength = unpack ("V", substr($buffer, 20-4, 4));
294 my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4));
a02d0f6f 295 my $filename_length = unpack ("v", substr($buffer, 28-4, 2));
296 my $extra_length = unpack ("v", substr($buffer, 30-4, 2));
297 my $comment_length = unpack ("v", substr($buffer, 32-4, 2));
298 #my $disk_start = unpack ("v", substr($buffer, 34-4, 2));
299 #my $int_file_attrib = unpack ("v", substr($buffer, 36-4, 2));
300 #my $ext_file_attrib = unpack ("V", substr($buffer, 38-4, 2));
301 #my $lcl_hdr_offset = unpack ("V", substr($buffer, 42-4, 2));
302
303
304 my $filename;
305 my $extraField;
306 my $comment ;
307 if ($filename_length)
308 {
309 $self->smartReadExact(\$filename, $filename_length)
2b4e0969 310 or return $self->TruncatedTrailer("filename");
a02d0f6f 311 $keep .= $filename ;
312 }
313
314 if ($extra_length)
315 {
316 $self->smartReadExact(\$extraField, $extra_length)
2b4e0969 317 or return $self->TruncatedTrailer("extra");
a02d0f6f 318 $keep .= $extraField ;
319 }
320
321 if ($comment_length)
322 {
323 $self->smartReadExact(\$comment, $comment_length)
2b4e0969 324 or return $self->TruncatedTrailer("comment");
a02d0f6f 325 $keep .= $comment ;
326 }
327
328 return STATUS_OK ;
329}
330
e7d45986 331sub skipArchiveExtra
332{
333 my $self = shift;
334 my $magic = shift ;
335
336 my $buffer;
337 $self->smartReadExact(\$buffer, 4)
338 or return $self->TrailerError("Minimum header size is " .
339 4 . " bytes") ;
340
341 my $keep = $magic . $buffer ;
342
343 my $size = unpack ("V", $buffer);
344
345 $self->smartReadExact(\$buffer, $size)
346 or return $self->TrailerError("Minimum header size is " .
347 $size . " bytes") ;
348
349 $keep .= $buffer ;
350 *$self->{HeaderPending} = $keep ;
351
352 return STATUS_OK ;
353}
354
355
356sub skipCentralDirectory64Rec
357{
358 my $self = shift;
359 my $magic = shift ;
360
361 my $buffer;
362 $self->smartReadExact(\$buffer, 8)
363 or return $self->TrailerError("Minimum header size is " .
364 8 . " bytes") ;
365
366 my $keep = $magic . $buffer ;
367
368 my ($sizeLo, $sizeHi) = unpack ("V V", $buffer);
369
370 # TODO - take SizeHi into account
371 $self->smartReadExact(\$buffer, $sizeLo)
372 or return $self->TrailerError("Minimum header size is " .
373 $sizeLo . " bytes") ;
374
375 $keep .= $buffer ;
376 *$self->{HeaderPending} = $keep ;
377
378 #my $versionMadeBy = unpack ("v", substr($buffer, 0, 2));
379 #my $extractVersion = unpack ("v", substr($buffer, 2, 2));
380 #my $diskNumber = unpack ("V", substr($buffer, 4, 4));
381 #my $cntrlDirDiskNo = unpack ("V", substr($buffer, 8, 4));
382 #my $entriesInThisCD = unpack ("V V", substr($buffer, 12, 8));
383 #my $entriesInCD = unpack ("V V", substr($buffer, 20, 8));
384 #my $sizeOfCD = unpack ("V V", substr($buffer, 28, 8));
385 #my $offsetToCD = unpack ("V V", substr($buffer, 36, 8));
386
387 return STATUS_OK ;
388}
389
390sub skipCentralDirectory64Loc
391{
392 my $self = shift;
393 my $magic = shift ;
394
395 my $buffer;
396 $self->smartReadExact(\$buffer, 20 - 4)
397 or return $self->TrailerError("Minimum header size is " .
398 20 . " bytes") ;
399
400 my $keep = $magic . $buffer ;
401 *$self->{HeaderPending} = $keep ;
402
403 #my $startCdDisk = unpack ("V", substr($buffer, 4-4, 4));
404 #my $offsetToCD = unpack ("V V", substr($buffer, 8-4, 8));
405 #my $diskCount = unpack ("V", substr($buffer, 16-4, 4));
406
407 return STATUS_OK ;
408}
409
a02d0f6f 410sub skipEndCentralDirectory
411{
412 my $self = shift;
413 my $magic = shift ;
414
415 my $buffer;
416 $self->smartReadExact(\$buffer, 22 - 4)
417 or return $self->TrailerError("Minimum header size is " .
418 22 . " bytes") ;
419
420 my $keep = $magic . $buffer ;
421 *$self->{HeaderPending} = $keep ;
422
423 #my $diskNumber = unpack ("v", substr($buffer, 4-4, 2));
424 #my $cntrlDirDiskNo = unpack ("v", substr($buffer, 6-4, 2));
425 #my $entriesInThisCD = unpack ("v", substr($buffer, 8-4, 2));
426 #my $entriesInCD = unpack ("v", substr($buffer, 10-4, 2));
427 #my $sizeOfCD = unpack ("V", substr($buffer, 12-4, 2));
428 #my $offsetToCD = unpack ("V", substr($buffer, 16-4, 2));
429 my $comment_length = unpack ("v", substr($buffer, 20-4, 2));
430
431
432 my $comment ;
433 if ($comment_length)
434 {
435 $self->smartReadExact(\$comment, $comment_length)
2b4e0969 436 or return $self->TruncatedTrailer("comment");
a02d0f6f 437 $keep .= $comment ;
438 }
439
440 return STATUS_OK ;
441}
442
443
a02d0f6f 444sub _isZipMagic
445{
446 my $buffer = shift ;
447 return 0 if length $buffer < 4 ;
448 my $sig = unpack("V", $buffer) ;
c70c1701 449 return $sig == ZIP_LOCAL_HDR_SIG ;
a02d0f6f 450}
451
452
453sub _readFullZipHeader($)
454{
455 my ($self) = @_ ;
456 my $magic = '' ;
457
458 $self->smartReadExact(\$magic, 4);
459
460 *$self->{HeaderPending} = $magic ;
461
462 return $self->HeaderError("Minimum header size is " .
463 30 . " bytes")
464 if length $magic != 4 ;
465
466
467 return $self->HeaderError("Bad Magic")
468 if ! _isZipMagic($magic) ;
469
470 my $status = $self->_readZipHeader($magic);
471 delete *$self->{Transparent} if ! defined $status ;
472 return $status ;
473}
474
475sub _readZipHeader($)
476{
477 my ($self, $magic) = @_ ;
478 my ($HeaderCRC) ;
479 my ($buffer) = '' ;
480
481 $self->smartReadExact(\$buffer, 30 - 4)
482 or return $self->HeaderError("Minimum header size is " .
483 30 . " bytes") ;
484
485 my $keep = $magic . $buffer ;
486 *$self->{HeaderPending} = $keep ;
487
488 my $extractVersion = unpack ("v", substr($buffer, 4-4, 2));
489 my $gpFlag = unpack ("v", substr($buffer, 6-4, 2));
490 my $compressedMethod = unpack ("v", substr($buffer, 8-4, 2));
491 my $lastModTime = unpack ("V", substr($buffer, 10-4, 4));
492 my $crc32 = unpack ("V", substr($buffer, 14-4, 4));
e7d45986 493 my $compressedLength = new U64 unpack ("V", substr($buffer, 18-4, 4));
494 my $uncompressedLength = new U64 unpack ("V", substr($buffer, 22-4, 4));
a02d0f6f 495 my $filename_length = unpack ("v", substr($buffer, 26-4, 2));
496 my $extra_length = unpack ("v", substr($buffer, 28-4, 2));
497
498 my $filename;
499 my $extraField;
c70c1701 500 my @EXTRA = ();
501 my $streamingMode = ($gpFlag & ZIP_GP_FLAG_STREAMING_MASK) ? 1 : 0 ;
a02d0f6f 502
503 return $self->HeaderError("Streamed Stored content not supported")
504 if $streamingMode && $compressedMethod == 0 ;
505
258133d1 506 return $self->HeaderError("Encrypted content not supported")
507 if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK);
508
509 return $self->HeaderError("Patch content not supported")
510 if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK;
511
a02d0f6f 512 *$self->{ZipData}{Streaming} = $streamingMode;
513
a02d0f6f 514
515 if ($filename_length)
516 {
517 $self->smartReadExact(\$filename, $filename_length)
c70c1701 518 or return $self->TruncatedHeader("Filename");
a02d0f6f 519 $keep .= $filename ;
520 }
521
e7d45986 522 my $zip64 = 0 ;
523
a02d0f6f 524 if ($extra_length)
525 {
526 $self->smartReadExact(\$extraField, $extra_length)
c70c1701 527 or return $self->TruncatedHeader("Extra Field");
528
529 my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField,
530 \@EXTRA, 1, 0);
531 return $self->HeaderError($bad)
532 if defined $bad;
533
a02d0f6f 534 $keep .= $extraField ;
e7d45986 535
536 my %Extra ;
537 for (@EXTRA)
538 {
539 $Extra{$_->[0]} = \$_->[1];
540 }
541
542 if (defined $Extra{ZIP_EXTRA_ID_ZIP64()})
543 {
544 $zip64 = 1 ;
545
546 my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} };
547
258133d1 548 # TODO - This code assumes that all the fields in the Zip64
549 # extra field aren't necessarily present. The spec says that
550 # they only exist if the equivalent local headers are -1.
551 # Need to check that info-zip fills out -1 in the local header
552 # correctly.
553
554 if (! $streamingMode) {
555 my $offset = 0 ;
556
557 $uncompressedLength = U64::newUnpack_V64 substr($buff, 0, 8)
558 if $uncompressedLength == 0xFFFF ;
559
560 $offset += 8 ;
561
562 $compressedLength = U64::newUnpack_V64 substr($buff, $offset, 8)
563 if $compressedLength == 0xFFFF ;
564
565 $offset += 8 ;
566
567 #my $cheaderOffset = U64::newUnpack_V64 substr($buff, 16, 8);
568 #my $diskNumber = unpack ("V", substr($buff, 24, 4));
569 }
e7d45986 570 }
571 }
572
573 *$self->{ZipData}{Zip64} = $zip64;
574
575 if (! $streamingMode) {
576 *$self->{ZipData}{Streaming} = 0;
577 *$self->{ZipData}{Crc32} = $crc32;
578 *$self->{ZipData}{CompressedLen} = $compressedLength;
579 *$self->{ZipData}{UnCompressedLen} = $uncompressedLength;
580 *$self->{CompressedInputLengthRemaining} =
581 *$self->{CompressedInputLength} = $compressedLength->get32bit();
a02d0f6f 582 }
583
2b4e0969 584 *$self->{ZipData}{Method} = $compressedMethod;
c70c1701 585 if ($compressedMethod == ZIP_CM_DEFLATE)
a02d0f6f 586 {
2b4e0969 587 *$self->{Type} = 'zip-deflate';
588 }
c70c1701 589 elsif ($compressedMethod == ZIP_CM_BZIP2)
2b4e0969 590 {
591 #if (! defined $IO::Uncompress::Adapter::Bunzip2::VERSION)
592
593 *$self->{Type} = 'zip-bzip2';
594
595 my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject(
596 );
597
598 *$self->{Uncomp} = $obj;
599 *$self->{ZipData}{CRC32} = crc32(undef);
600
a02d0f6f 601 }
c70c1701 602 elsif ($compressedMethod == ZIP_CM_STORE)
a02d0f6f 603 {
604 # TODO -- add support for reading uncompressed
605
2b4e0969 606 *$self->{Type} = 'zip-stored';
a02d0f6f 607
608 my $obj = IO::Uncompress::Adapter::Identity::mkUncompObject(# $got->value('CRC32'),
609 # $got->value('ADLER32'),
610 );
611
612 *$self->{Uncomp} = $obj;
613
614 }
615 else
616 {
617 return $self->HeaderError("Unsupported Compression format $compressedMethod");
618 }
619
620 return {
621 'Type' => 'zip',
622 'FingerprintLength' => 4,
623 #'HeaderLength' => $compressedMethod == 8 ? length $keep : 0,
624 'HeaderLength' => length $keep,
e7d45986 625 'Zip64' => $zip64,
626 'TrailerLength' => ! $streamingMode ? 0 : $zip64 ? 24 : 16,
a02d0f6f 627 'Header' => $keep,
628 'CompressedLength' => $compressedLength ,
629 'UncompressedLength' => $uncompressedLength ,
630 'CRC32' => $crc32 ,
631 'Name' => $filename,
632 'Time' => _dosToUnixTime($lastModTime),
633 'Stream' => $streamingMode,
634
635 'MethodID' => $compressedMethod,
c70c1701 636 'MethodName' => $compressedMethod == ZIP_CM_DEFLATE
a02d0f6f 637 ? "Deflated"
c70c1701 638 : $compressedMethod == ZIP_CM_BZIP2
639 ? "Bzip2"
640 : $compressedMethod == ZIP_CM_STORE
641 ? "Stored"
642 : "Unknown" ,
a02d0f6f 643
644# 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0,
645# 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
646# 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0,
647# 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
648# 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
649# 'Comment' => $comment,
650# 'OsID' => $os,
651# 'OsName' => defined $GZIP_OS_Names{$os}
652# ? $GZIP_OS_Names{$os} : "Unknown",
653# 'HeaderCRC' => $HeaderCRC,
654# 'Flags' => $flag,
655# 'ExtraFlags' => $xfl,
c70c1701 656 'ExtraFieldRaw' => $extraField,
657 'ExtraField' => [ @EXTRA ],
a02d0f6f 658
659
660 }
661}
662
2b4e0969 663sub filterUncompressed
664{
665 my $self = shift ;
666
667 if (*$self->{ZipData}{Method} == 12) {
668 *$self->{ZipData}{CRC32} = crc32(${$_[0]}, *$self->{ZipData}{CRC32});
669 }
670 else {
671 *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ;
672 }
673}
674
675
a02d0f6f 676# from Archive::Zip
677sub _dosToUnixTime
678{
679 #use Time::Local 'timelocal_nocheck';
680 use Time::Local 'timelocal';
681
682 my $dt = shift;
683
684 my $year = ( ( $dt >> 25 ) & 0x7f ) + 80;
685 my $mon = ( ( $dt >> 21 ) & 0x0f ) - 1;
686 my $mday = ( ( $dt >> 16 ) & 0x1f );
687
688 my $hour = ( ( $dt >> 11 ) & 0x1f );
689 my $min = ( ( $dt >> 5 ) & 0x3f );
690 my $sec = ( ( $dt << 1 ) & 0x3e );
691
692 # catch errors
693 my $time_t =
694 eval { timelocal( $sec, $min, $hour, $mday, $mon, $year ); };
695 return 0
696 if $@;
697 return $time_t;
698}
699
700
7011;
702
703__END__
704
705
706=head1 NAME
707
708
cb7abd7f 709
710IO::Uncompress::Unzip - Read zip files/buffers
711
a02d0f6f 712
713
714=head1 SYNOPSIS
715
716 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
717
718 my $status = unzip $input => $output [,OPTS]
719 or die "unzip failed: $UnzipError\n";
720
721 my $z = new IO::Uncompress::Unzip $input [OPTS]
722 or die "unzip failed: $UnzipError\n";
723
724 $status = $z->read($buffer)
725 $status = $z->read($buffer, $length)
726 $status = $z->read($buffer, $length, $offset)
727 $line = $z->getline()
728 $char = $z->getc()
729 $char = $z->ungetc()
730 $char = $z->opened()
731
732 $status = $z->inflateSync()
733
e7d45986 734 $data = $z->trailingData()
735 $status = $z->nextStream()
a02d0f6f 736 $data = $z->getHeaderInfo()
737 $z->tell()
738 $z->seek($position, $whence)
739 $z->binmode()
740 $z->fileno()
741 $z->eof()
742 $z->close()
743
744 $UnzipError ;
745
746 # IO::File mode
747
748 <$z>
749 read($z, $buffer);
750 read($z, $buffer, $length);
751 read($z, $buffer, $length, $offset);
752 tell($z)
753 seek($z, $position, $whence)
754 binmode($z)
755 fileno($z)
756 eof($z)
757 close($z)
758
759
760=head1 DESCRIPTION
761
762
763
a02d0f6f 764This module provides a Perl interface that allows the reading of
765zlib files/buffers.
766
767For writing zip files/buffers, see the companion module IO::Compress::Zip.
768
769
770
cb7abd7f 771
772
a02d0f6f 773=head1 Functional Interface
774
775A top-level function, C<unzip>, is provided to carry out
776"one-shot" uncompression between buffers and/or files. For finer
777control over the uncompression process, see the L</"OO Interface">
778section.
779
780 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
781
782 unzip $input => $output [,OPTS]
783 or die "unzip failed: $UnzipError\n";
784
785
786
787The functional interface needs Perl5.005 or better.
788
789
790=head2 unzip $input => $output [, OPTS]
791
792
793C<unzip> expects at least two parameters, C<$input> and C<$output>.
794
795=head3 The C<$input> parameter
796
797The parameter, C<$input>, is used to define the source of
798the compressed data.
799
800It can take one of the following forms:
801
802=over 5
803
804=item A filename
805
806If the C<$input> parameter is a simple scalar, it is assumed to be a
807filename. This file will be opened for reading and the input data
808will be read from it.
809
810=item A filehandle
811
812If the C<$input> parameter is a filehandle, the input data will be
813read from it.
814The string '-' can be used as an alias for standard input.
815
816=item A scalar reference
817
818If C<$input> is a scalar reference, the input data will be read
819from C<$$input>.
820
821=item An array reference
822
823If C<$input> is an array reference, each element in the array must be a
824filename.
825
826The input data will be read from each file in turn.
827
828The complete array will be walked to ensure that it only
829contains valid filenames before any data is uncompressed.
830
831
832
833=item An Input FileGlob string
834
835If C<$input> is a string that is delimited by the characters "<" and ">"
836C<unzip> will assume that it is an I<input fileglob string>. The
837input is the list of files that match the fileglob.
838
839If the fileglob does not match any files ...
840
841See L<File::GlobMapper|File::GlobMapper> for more details.
842
843
844=back
845
846If the C<$input> parameter is any other type, C<undef> will be returned.
847
848
849
850=head3 The C<$output> parameter
851
852The parameter C<$output> is used to control the destination of the
853uncompressed data. This parameter can take one of these forms.
854
855=over 5
856
857=item A filename
858
859If the C<$output> parameter is a simple scalar, it is assumed to be a
860filename. This file will be opened for writing and the uncompressed
861data will be written to it.
862
863=item A filehandle
864
865If the C<$output> parameter is a filehandle, the uncompressed data
866will be written to it.
867The string '-' can be used as an alias for standard output.
868
869
870=item A scalar reference
871
872If C<$output> is a scalar reference, the uncompressed data will be
873stored in C<$$output>.
874
875
876
877=item An Array Reference
878
879If C<$output> is an array reference, the uncompressed data will be
880pushed onto the array.
881
882=item An Output FileGlob
883
884If C<$output> is a string that is delimited by the characters "<" and ">"
885C<unzip> will assume that it is an I<output fileglob string>. The
886output is the list of files that match the fileglob.
887
888When C<$output> is an fileglob string, C<$input> must also be a fileglob
889string. Anything else is an error.
890
891=back
892
893If the C<$output> parameter is any other type, C<undef> will be returned.
894
895
896
897=head2 Notes
898
c70c1701 899
900When C<$input> maps to multiple compressed files/buffers and C<$output> is
901a single file/buffer, after uncompression C<$output> will contain a
902concatenation of all the uncompressed data from each of the input
903files/buffers.
904
905
a02d0f6f 906
907
908
909=head2 Optional Parameters
910
911Unless specified below, the optional parameters for C<unzip>,
912C<OPTS>, are the same as those used with the OO interface defined in the
913L</"Constructor Options"> section below.
914
915=over 5
916
e7d45986 917=item C<< AutoClose => 0|1 >>
a02d0f6f 918
919This option applies to any input or output data streams to
920C<unzip> that are filehandles.
921
922If C<AutoClose> is specified, and the value is true, it will result in all
923input and/or output filehandles being closed once C<unzip> has
924completed.
925
926This parameter defaults to 0.
927
928
e7d45986 929=item C<< BinModeOut => 0|1 >>
a02d0f6f 930
931When writing to a file or filehandle, set C<binmode> before writing to the
932file.
933
934Defaults to 0.
935
936
937
938
939
e7d45986 940=item C<< Append => 0|1 >>
a02d0f6f 941
942TODO
943
e7d45986 944=item C<< MultiStream => 0|1 >>
a02d0f6f 945
258133d1 946
e7d45986 947If the input file/buffer contains multiple compressed data streams, this
948option will uncompress the whole lot as a single data stream.
a02d0f6f 949
e7d45986 950Defaults to 0.
a02d0f6f 951
952
953
258133d1 954
955
956=item C<< TrailingData => $scalar >>
957
958Returns the data, if any, that is present immediately after the compressed
959data stream once uncompression is complete.
960
961This option can be used when there is useful information immediately
962following the compressed data stream, and you don't know the length of the
963compressed data stream.
964
965If the input is a buffer, C<trailingData> will return everything from the
966end of the compressed data stream to the end of the buffer.
967
968If the input is a filehandle, C<trailingData> will return the data that is
969left in the filehandle input buffer once the end of the compressed data
970stream has been reached. You can then use the filehandle to read the rest
971of the input file.
972
973Don't bother using C<trailingData> if the input is a filename.
974
975
976
977If you know the length of the compressed data stream before you start
978uncompressing, you can avoid having to use C<trailingData> by setting the
979C<InputLength> option.
980
981
982
a02d0f6f 983=back
984
985
986
987
988=head2 Examples
989
990To read the contents of the file C<file1.txt.zip> and write the
991compressed data to the file C<file1.txt>.
992
993 use strict ;
994 use warnings ;
995 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
996
997 my $input = "file1.txt.zip";
998 my $output = "file1.txt";
999 unzip $input => $output
1000 or die "unzip failed: $UnzipError\n";
1001
1002
1003To read from an existing Perl filehandle, C<$input>, and write the
1004uncompressed data to a buffer, C<$buffer>.
1005
1006 use strict ;
1007 use warnings ;
1008 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1009 use IO::File ;
1010
1011 my $input = new IO::File "<file1.txt.zip"
1012 or die "Cannot open 'file1.txt.zip': $!\n" ;
1013 my $buffer ;
1014 unzip $input => \$buffer
1015 or die "unzip failed: $UnzipError\n";
1016
1017To uncompress all files in the directory "/my/home" that match "*.txt.zip" and store the compressed data in the same directory
1018
1019 use strict ;
1020 use warnings ;
1021 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1022
1023 unzip '</my/home/*.txt.zip>' => '</my/home/#1.txt>'
1024 or die "unzip failed: $UnzipError\n";
1025
1026and if you want to compress each file one at a time, this will do the trick
1027
1028 use strict ;
1029 use warnings ;
1030 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1031
1032 for my $input ( glob "/my/home/*.txt.zip" )
1033 {
1034 my $output = $input;
1035 $output =~ s/.zip// ;
1036 unzip $input => $output
1037 or die "Error compressing '$input': $UnzipError\n";
1038 }
1039
1040=head1 OO Interface
1041
1042=head2 Constructor
1043
1044The format of the constructor for IO::Uncompress::Unzip is shown below
1045
1046
1047 my $z = new IO::Uncompress::Unzip $input [OPTS]
1048 or die "IO::Uncompress::Unzip failed: $UnzipError\n";
1049
1050Returns an C<IO::Uncompress::Unzip> object on success and undef on failure.
1051The variable C<$UnzipError> will contain an error message on failure.
1052
1053If you are running Perl 5.005 or better the object, C<$z>, returned from
1054IO::Uncompress::Unzip can be used exactly like an L<IO::File|IO::File> filehandle.
1055This means that all normal input file operations can be carried out with
1056C<$z>. For example, to read a line from a compressed file/buffer you can
1057use either of these forms
1058
1059 $line = $z->getline();
1060 $line = <$z>;
1061
1062The mandatory parameter C<$input> is used to determine the source of the
1063compressed data. This parameter can take one of three forms.
1064
1065=over 5
1066
1067=item A filename
1068
1069If the C<$input> parameter is a scalar, it is assumed to be a filename. This
1070file will be opened for reading and the compressed data will be read from it.
1071
1072=item A filehandle
1073
1074If the C<$input> parameter is a filehandle, the compressed data will be
1075read from it.
1076The string '-' can be used as an alias for standard input.
1077
1078
1079=item A scalar reference
1080
1081If C<$input> is a scalar reference, the compressed data will be read from
1082C<$$output>.
1083
1084=back
1085
1086=head2 Constructor Options
1087
1088
1089The option names defined below are case insensitive and can be optionally
1090prefixed by a '-'. So all of the following are valid
1091
1092 -AutoClose
1093 -autoclose
1094 AUTOCLOSE
1095 autoclose
1096
1097OPTS is a combination of the following options:
1098
1099=over 5
1100
e7d45986 1101=item C<< AutoClose => 0|1 >>
a02d0f6f 1102
1103This option is only valid when the C<$input> parameter is a filehandle. If
1104specified, and the value is true, it will result in the file being closed once
1105either the C<close> method is called or the IO::Uncompress::Unzip object is
1106destroyed.
1107
1108This parameter defaults to 0.
1109
e7d45986 1110=item C<< MultiStream => 0|1 >>
a02d0f6f 1111
1112
1113
e7d45986 1114Treats the complete zip file/buffer as a single compressed data
1115stream. When reading in multi-stream mode each member of the zip
1116file/buffer will be uncompressed in turn until the end of the file/buffer
1117is encountered.
a02d0f6f 1118
1119This parameter defaults to 0.
1120
1121
e7d45986 1122=item C<< Prime => $string >>
a02d0f6f 1123
1124This option will uncompress the contents of C<$string> before processing the
1125input file/buffer.
1126
1127This option can be useful when the compressed data is embedded in another
1128file/data structure and it is not possible to work out where the compressed
1129data begins without having to read the first few bytes. If this is the
1130case, the uncompression can be I<primed> with these bytes using this
1131option.
1132
e7d45986 1133=item C<< Transparent => 0|1 >>
a02d0f6f 1134
f6fd7794 1135If this option is set and the input file/buffer is not compressed data,
a02d0f6f 1136the module will allow reading of it anyway.
1137
f6fd7794 1138In addition, if the input file/buffer does contain compressed data and
1139there is non-compressed data immediately following it, setting this option
1140will make this module treat the whole file/bufffer as a single data stream.
1141
a02d0f6f 1142This option defaults to 1.
1143
e7d45986 1144=item C<< BlockSize => $num >>
a02d0f6f 1145
1146When reading the compressed input data, IO::Uncompress::Unzip will read it in
1147blocks of C<$num> bytes.
1148
1149This option defaults to 4096.
1150
e7d45986 1151=item C<< InputLength => $size >>
a02d0f6f 1152
1153When present this option will limit the number of compressed bytes read
1154from the input file/buffer to C<$size>. This option can be used in the
1155situation where there is useful data directly after the compressed data
1156stream and you know beforehand the exact length of the compressed data
1157stream.
1158
1159This option is mostly used when reading from a filehandle, in which case
1160the file pointer will be left pointing to the first byte directly after the
1161compressed data stream.
1162
1163
1164
1165This option defaults to off.
1166
e7d45986 1167=item C<< Append => 0|1 >>
a02d0f6f 1168
1169This option controls what the C<read> method does with uncompressed data.
1170
1171If set to 1, all uncompressed data will be appended to the output parameter
1172of the C<read> method.
1173
1174If set to 0, the contents of the output parameter of the C<read> method
1175will be overwritten by the uncompressed data.
1176
1177Defaults to 0.
1178
e7d45986 1179=item C<< Strict => 0|1 >>
a02d0f6f 1180
1181
1182
1183This option controls whether the extra checks defined below are used when
1184carrying out the decompression. When Strict is on, the extra tests are
1185carried out, when Strict is off they are not.
1186
1187The default for this option is off.
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
258133d1 1202
a02d0f6f 1203=back
1204
1205=head2 Examples
1206
1207TODO
1208
1209=head1 Methods
1210
1211=head2 read
1212
1213Usage is
1214
1215 $status = $z->read($buffer)
1216
1217Reads a block of compressed data (the size the the compressed block is
1218determined by the C<Buffer> option in the constructor), uncompresses it and
1219writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
1220set in the constructor, the uncompressed data will be appended to the
1221C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
1222
1223Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1224or a negative number on error.
1225
1226=head2 read
1227
1228Usage is
1229
1230 $status = $z->read($buffer, $length)
1231 $status = $z->read($buffer, $length, $offset)
1232
1233 $status = read($z, $buffer, $length)
1234 $status = read($z, $buffer, $length, $offset)
1235
1236Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
1237
1238The main difference between this form of the C<read> method and the
1239previous one, is that this one will attempt to return I<exactly> C<$length>
1240bytes. The only circumstances that this function will not is if end-of-file
1241or an IO error is encountered.
1242
1243Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1244or a negative number on error.
1245
1246
1247=head2 getline
1248
1249Usage is
1250
1251 $line = $z->getline()
1252 $line = <$z>
1253
1254Reads a single line.
1255
258133d1 1256This method fully supports the use of of the variable C<$/> (or
1257C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
1258determine what constitutes an end of line. Paragraph mode, record mode and
1259file slurp mode are all supported.
a02d0f6f 1260
1261
1262=head2 getc
1263
1264Usage is
1265
1266 $char = $z->getc()
1267
1268Read a single character.
1269
1270=head2 ungetc
1271
1272Usage is
1273
1274 $char = $z->ungetc($string)
1275
1276
1277
1278=head2 inflateSync
1279
1280Usage is
1281
1282 $status = $z->inflateSync()
1283
1284TODO
1285
1286
1287=head2 getHeaderInfo
1288
1289Usage is
1290
1291 $hdr = $z->getHeaderInfo();
1292 @hdrs = $z->getHeaderInfo();
1293
1294This method returns either a hash reference (in scalar context) or a list
1295or hash references (in array context) that contains information about each
1296of the header fields in the compressed data stream(s).
1297
1298
1299
1300
1301=head2 tell
1302
1303Usage is
1304
1305 $z->tell()
1306 tell $z
1307
1308Returns the uncompressed file offset.
1309
1310=head2 eof
1311
1312Usage is
1313
1314 $z->eof();
1315 eof($z);
1316
1317
1318
1319Returns true if the end of the compressed input stream has been reached.
1320
1321
1322
1323=head2 seek
1324
1325 $z->seek($position, $whence);
1326 seek($z, $position, $whence);
1327
1328
1329
1330
1331Provides a sub-set of the C<seek> functionality, with the restriction
1332that it is only legal to seek forward in the input file/buffer.
1333It is a fatal error to attempt to seek backward.
1334
1335
1336
1337The C<$whence> parameter takes one the usual values, namely SEEK_SET,
1338SEEK_CUR or SEEK_END.
1339
1340Returns 1 on success, 0 on failure.
1341
1342=head2 binmode
1343
1344Usage is
1345
1346 $z->binmode
1347 binmode $z ;
1348
1349This is a noop provided for completeness.
1350
1351=head2 opened
1352
1353 $z->opened()
1354
1355Returns true if the object currently refers to a opened file/buffer.
1356
1357=head2 autoflush
1358
1359 my $prev = $z->autoflush()
1360 my $prev = $z->autoflush(EXPR)
1361
1362If the C<$z> object is associated with a file or a filehandle, this method
1363returns the current autoflush setting for the underlying filehandle. If
1364C<EXPR> is present, and is non-zero, it will enable flushing after every
1365write/print operation.
1366
1367If C<$z> is associated with a buffer, this method has no effect and always
1368returns C<undef>.
1369
1370B<Note> that the special variable C<$|> B<cannot> be used to set or
1371retrieve the autoflush setting.
1372
1373=head2 input_line_number
1374
1375 $z->input_line_number()
1376 $z->input_line_number(EXPR)
1377
1378
1379
1380Returns the current uncompressed line number. If C<EXPR> is present it has
1381the effect of setting the line number. Note that setting the line number
1382does not change the current position within the file/buffer being read.
1383
1384The contents of C<$/> are used to to determine what constitutes a line
1385terminator.
1386
1387
1388
1389=head2 fileno
1390
1391 $z->fileno()
1392 fileno($z)
1393
1394If the C<$z> object is associated with a file or a filehandle, this method
1395will return the underlying file descriptor.
1396
1397If the C<$z> object is is associated with a buffer, this method will
1398return undef.
1399
1400=head2 close
1401
1402 $z->close() ;
1403 close $z ;
1404
1405
1406
1407Closes the output file/buffer.
1408
1409
1410
1411For most versions of Perl this method will be automatically invoked if
1412the IO::Uncompress::Unzip object is destroyed (either explicitly or by the
1413variable with the reference to the object going out of scope). The
1414exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1415these cases, the C<close> method will be called automatically, but
1416not until global destruction of all live objects when the program is
1417terminating.
1418
1419Therefore, if you want your scripts to be able to run on all versions
1420of Perl, you should call C<close> explicitly and not rely on automatic
1421closing.
1422
1423Returns true on success, otherwise 0.
1424
1425If the C<AutoClose> option has been enabled when the IO::Uncompress::Unzip
1426object was created, and the object is associated with a file, the
1427underlying file will also be closed.
1428
1429
1430
1431
e7d45986 1432=head2 nextStream
1433
1434Usage is
1435
1436 my $status = $z->nextStream();
1437
1438Skips to the next compressed data stream in the input file/buffer. If a new
258133d1 1439compressed data stream is found, the eof marker will be cleared and C<$.>
1440will be reset to 0.
e7d45986 1441
1442Returns 1 if a new stream was found, 0 if none was found, and -1 if an
1443error was encountered.
1444
1445=head2 trailingData
1446
1447Usage is
1448
1449 my $data = $z->trailingData();
1450
258133d1 1451Returns the data, if any, that is present immediately after the compressed
1452data stream once uncompression is complete. It only makes sense to call
1453this method once the end of the compressed data stream has been
1454encountered.
1455
1456This option can be used when there is useful information immediately
1457following the compressed data stream, and you don't know the length of the
1458compressed data stream.
1459
1460If the input is a buffer, C<trailingData> will return everything from the
1461end of the compressed data stream to the end of the buffer.
1462
1463If the input is a filehandle, C<trailingData> will return the data that is
1464left in the filehandle input buffer once the end of the compressed data
1465stream has been reached. You can then use the filehandle to read the rest
1466of the input file.
1467
1468Don't bother using C<trailingData> if the input is a filename.
1469
1470
1471
1472If you know the length of the compressed data stream before you start
1473uncompressing, you can avoid having to use C<trailingData> by setting the
1474C<InputLength> option in the constructor.
e7d45986 1475
a02d0f6f 1476=head1 Importing
1477
1478No symbolic constants are required by this IO::Uncompress::Unzip at present.
1479
1480=over 5
1481
1482=item :all
1483
1484Imports C<unzip> and C<$UnzipError>.
1485Same as doing this
1486
1487 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1488
1489=back
1490
1491=head1 EXAMPLES
1492
1493
1494
1495
1496=head1 SEE ALSO
1497
258133d1 1498L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
a02d0f6f 1499
1500L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
1501
1502L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1503L<Archive::Tar|Archive::Tar>,
1504L<IO::Zlib|IO::Zlib>
1505
1506
1507For RFC 1950, 1951 and 1952 see
1508F<http://www.faqs.org/rfcs/rfc1950.html>,
1509F<http://www.faqs.org/rfcs/rfc1951.html> and
1510F<http://www.faqs.org/rfcs/rfc1952.html>
1511
1512The I<zlib> compression library was written by Jean-loup Gailly
1513F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
1514
1515The primary site for the I<zlib> compression library is
1516F<http://www.zlib.org>.
1517
1518The primary site for gzip is F<http://www.gzip.org>.
1519
1520
1521
1522
a02d0f6f 1523=head1 AUTHOR
1524
cb7abd7f 1525This module was written by Paul Marquess, F<pmqs@cpan.org>.
a02d0f6f 1526
1527
1528
1529=head1 MODIFICATION HISTORY
1530
1531See the Changes file.
1532
1533=head1 COPYRIGHT AND LICENSE
a02d0f6f 1534
b0cda13f 1535Copyright (c) 2005-2007 Paul Marquess. All rights reserved.
a02d0f6f 1536
1537This program is free software; you can redistribute it and/or
1538modify it under the same terms as Perl itself.
1539