IO::Compress modules
[p5sagit/p5-mst-13.2.git] / ext / Compress / IO / Zlib / lib / IO / Uncompress / Unzip.pm
CommitLineData
a02d0f6f 1package IO::Uncompress::Unzip;
2
3require 5.004 ;
4
5# for RFC1952
6
7use strict ;
8use warnings;
9use bytes;
10
11use IO::Uncompress::RawInflate ;
12use IO::Compress::Base::Common qw(:Status createSelfTiedObject);
13use IO::Uncompress::Adapter::Identity;
c70c1701 14use IO::Compress::Zlib::Extra;
15use IO::Compress::Zip::Constants;
a02d0f6f 16
2b4e0969 17use Compress::Raw::Zlib qw(crc32) ;
c70c1701 18
2b4e0969 19BEGIN
20{
c70c1701 21 eval { require IO::Uncompress::Adapter::Bunzip2 ;
22 import IO::Uncompress::Adapter::Bunzip2 } ;
2b4e0969 23}
24
25
a02d0f6f 26require Exporter ;
27
e7d45986 28our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup);
a02d0f6f 29
258133d1 30$VERSION = '2.000_14';
a02d0f6f 31$UnzipError = '';
32
33@ISA = qw(Exporter IO::Uncompress::RawInflate);
34@EXPORT_OK = qw( $UnzipError unzip );
35%EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ;
36push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
37Exporter::export_ok_tags('all');
38
e7d45986 39%headerLookup = (
40 ZIP_CENTRAL_HDR_SIG, \&skipCentralDirectory,
41 ZIP_END_CENTRAL_HDR_SIG, \&skipEndCentralDirectory,
42 ZIP64_END_CENTRAL_REC_HDR_SIG, \&skipCentralDirectory64Rec,
43 ZIP64_END_CENTRAL_LOC_HDR_SIG, \&skipCentralDirectory64Loc,
44 ZIP64_ARCHIVE_EXTRA_SIG, \&skipArchiveExtra,
45 ZIP64_DIGITAL_SIGNATURE_SIG, \&skipDigitalSignature,
46 );
47
a02d0f6f 48sub new
49{
50 my $class = shift ;
51 my $obj = createSelfTiedObject($class, \$UnzipError);
52 $obj->_create(undef, 0, @_);
53}
54
55sub unzip
56{
57 my $obj = createSelfTiedObject(undef, \$UnzipError);
58 return $obj->_inf(@_) ;
59}
60
61sub getExtraParams
62{
63 use IO::Compress::Base::Common qw(:Parse);
64
65
66 return (
67# # Zip header fields
68 'Name' => [1, 1, Parse_any, undef],
69
70# 'Streaming' => [1, 1, Parse_boolean, 1],
71 );
72}
73
74sub ckParams
75{
76 my $self = shift ;
77 my $got = shift ;
78
79 # unzip always needs crc32
80 $got->value('CRC32' => 1);
81
82 *$self->{UnzipData}{Name} = $got->value('Name');
83
84 return 1;
85}
86
87
88sub ckMagic
89{
90 my $self = shift;
91
92 my $magic ;
93 $self->smartReadExact(\$magic, 4);
94
95 *$self->{HeaderPending} = $magic ;
96
97 return $self->HeaderError("Minimum header size is " .
98 4 . " bytes")
99 if length $magic != 4 ;
100
101 return $self->HeaderError("Bad Magic")
102 if ! _isZipMagic($magic) ;
103
104 *$self->{Type} = 'zip';
105
106 return $magic ;
107}
108
109
110
111sub readHeader
112{
113 my $self = shift;
114 my $magic = shift ;
115
116 my $name = *$self->{UnzipData}{Name} ;
117 my $hdr = $self->_readZipHeader($magic) ;
118
119 while (defined $hdr)
120 {
121 if (! defined $name || $hdr->{Name} eq $name)
122 {
123 return $hdr ;
124 }
125
126 # skip the data
127 my $buffer;
128 if (*$self->{ZipData}{Streaming}) {
129
130 while (1) {
131
132 my $b;
133 my $status = $self->smartRead(\$b, 1024 * 16);
134 return undef
135 if $status <= 0 ;
136
137 my $temp_buf;
138 my $out;
139 $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out);
140
141 return $self->saveErrorString(undef, *$self->{Uncomp}{Error},
142 *$self->{Uncomp}{ErrorNo})
143 if $self->saveStatus($status) == STATUS_ERROR;
144
145 if ($status == STATUS_ENDSTREAM) {
146 *$self->{Uncomp}->reset();
147 $self->pushBack($b) ;
148 last;
149 }
150 }
151
152 # skip the trailer
153 $self->smartReadExact(\$buffer, $hdr->{TrailerLength})
154 or return $self->saveErrorString(undef, "Truncated file");
155 }
156 else {
e7d45986 157 my $c = $hdr->{CompressedLength}->get32bit();
a02d0f6f 158 $self->smartReadExact(\$buffer, $c)
159 or return $self->saveErrorString(undef, "Truncated file");
160 $buffer = '';
161 }
162
163 $self->chkTrailer($buffer) == STATUS_OK
164 or return $self->saveErrorString(undef, "Truncated file");
165
166 $hdr = $self->_readFullZipHeader();
167
168 return $self->saveErrorString(undef, "Cannot find '$name'")
169 if $self->smartEof();
170 }
171
172 return undef;
173}
174
175sub chkTrailer
176{
177 my $self = shift;
178 my $trailer = shift;
179
180 my ($sig, $CRC32, $cSize, $uSize) ;
e7d45986 181 my ($cSizeHi, $uSizeHi) = (0, 0);
a02d0f6f 182 if (*$self->{ZipData}{Streaming}) {
e7d45986 183 $sig = unpack ("V", substr($trailer, 0, 4));
184 $CRC32 = unpack ("V", substr($trailer, 4, 4));
185
186 if (*$self->{ZipData}{Zip64} ) {
187 $cSize = U64::newUnpack_V64 substr($trailer, 8, 8);
188 $uSize = U64::newUnpack_V64 substr($trailer, 16, 8);
189 }
190 else {
191 $cSize = U64::newUnpack_V32 substr($trailer, 8, 4);
192 $uSize = U64::newUnpack_V32 substr($trailer, 12, 4);
193 }
194
a02d0f6f 195 return $self->TrailerError("Data Descriptor signature, got $sig")
c70c1701 196 if $sig != ZIP_DATA_HDR_SIG;
a02d0f6f 197 }
198 else {
199 ($CRC32, $cSize, $uSize) =
200 (*$self->{ZipData}{Crc32},
201 *$self->{ZipData}{CompressedLen},
202 *$self->{ZipData}{UnCompressedLen});
203 }
204
205 if (*$self->{Strict}) {
2b4e0969 206 return $self->TrailerError("CRC mismatch")
207 if $CRC32 != *$self->{ZipData}{CRC32} ;
a02d0f6f 208
e7d45986 209 return $self->TrailerError("CSIZE mismatch.")
210 if ! $cSize->equal(*$self->{CompSize});
a02d0f6f 211
e7d45986 212 return $self->TrailerError("USIZE mismatch.")
213 if ! $uSize->equal(*$self->{UnCompSize});
a02d0f6f 214 }
215
216 my $reachedEnd = STATUS_ERROR ;
217 # check for central directory or end of central directory
218 while (1)
219 {
220 my $magic ;
221 my $got = $self->smartRead(\$magic, 4);
222
223 return $self->saveErrorString(STATUS_ERROR, "Truncated file")
224 if $got != 4 && *$self->{Strict};
225
226 if ($got == 0) {
227 return STATUS_EOF ;
228 }
229 elsif ($got < 0) {
230 return STATUS_ERROR ;
231 }
232 elsif ($got < 4) {
233 $self->pushBack($magic) ;
234 return STATUS_OK ;
235 }
236
237 my $sig = unpack("V", $magic) ;
238
258133d1 239 my $hdr;
240 if ($hdr = $headerLookup{$sig})
a02d0f6f 241 {
258133d1 242 if (&$hdr($self, $magic) != STATUS_OK ) {
a02d0f6f 243 if (*$self->{Strict}) {
244 return STATUS_ERROR ;
245 }
246 else {
247 $self->clearError();
248 return STATUS_OK ;
249 }
250 }
e7d45986 251
252 if ($sig == ZIP_END_CENTRAL_HDR_SIG)
253 {
254 return STATUS_OK ;
255 last;
a02d0f6f 256 }
a02d0f6f 257 }
c70c1701 258 elsif ($sig == ZIP_LOCAL_HDR_SIG)
a02d0f6f 259 {
260 $self->pushBack($magic) ;
261 return STATUS_OK ;
262 }
263 else
264 {
265 # put the data back
266 $self->pushBack($magic) ;
267 last;
268 }
269 }
270
271 return $reachedEnd ;
272}
273
274sub skipCentralDirectory
275{
276 my $self = shift;
277 my $magic = shift ;
278
279 my $buffer;
280 $self->smartReadExact(\$buffer, 46 - 4)
281 or return $self->TrailerError("Minimum header size is " .
282 46 . " bytes") ;
283
284 my $keep = $magic . $buffer ;
285 *$self->{HeaderPending} = $keep ;
286
287 #my $versionMadeBy = unpack ("v", substr($buffer, 4-4, 2));
288 #my $extractVersion = unpack ("v", substr($buffer, 6-4, 2));
289 #my $gpFlag = unpack ("v", substr($buffer, 8-4, 2));
290 #my $compressedMethod = unpack ("v", substr($buffer, 10-4, 2));
291 #my $lastModTime = unpack ("V", substr($buffer, 12-4, 4));
292 #my $crc32 = unpack ("V", substr($buffer, 16-4, 4));
e7d45986 293 my $compressedLength = unpack ("V", substr($buffer, 20-4, 4));
294 my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4));
a02d0f6f 295 my $filename_length = unpack ("v", substr($buffer, 28-4, 2));
296 my $extra_length = unpack ("v", substr($buffer, 30-4, 2));
297 my $comment_length = unpack ("v", substr($buffer, 32-4, 2));
298 #my $disk_start = unpack ("v", substr($buffer, 34-4, 2));
299 #my $int_file_attrib = unpack ("v", substr($buffer, 36-4, 2));
300 #my $ext_file_attrib = unpack ("V", substr($buffer, 38-4, 2));
301 #my $lcl_hdr_offset = unpack ("V", substr($buffer, 42-4, 2));
302
303
304 my $filename;
305 my $extraField;
306 my $comment ;
307 if ($filename_length)
308 {
309 $self->smartReadExact(\$filename, $filename_length)
2b4e0969 310 or return $self->TruncatedTrailer("filename");
a02d0f6f 311 $keep .= $filename ;
312 }
313
314 if ($extra_length)
315 {
316 $self->smartReadExact(\$extraField, $extra_length)
2b4e0969 317 or return $self->TruncatedTrailer("extra");
a02d0f6f 318 $keep .= $extraField ;
319 }
320
321 if ($comment_length)
322 {
323 $self->smartReadExact(\$comment, $comment_length)
2b4e0969 324 or return $self->TruncatedTrailer("comment");
a02d0f6f 325 $keep .= $comment ;
326 }
327
328 return STATUS_OK ;
329}
330
e7d45986 331sub skipArchiveExtra
332{
333 my $self = shift;
334 my $magic = shift ;
335
336 my $buffer;
337 $self->smartReadExact(\$buffer, 4)
338 or return $self->TrailerError("Minimum header size is " .
339 4 . " bytes") ;
340
341 my $keep = $magic . $buffer ;
342
343 my $size = unpack ("V", $buffer);
344
345 $self->smartReadExact(\$buffer, $size)
346 or return $self->TrailerError("Minimum header size is " .
347 $size . " bytes") ;
348
349 $keep .= $buffer ;
350 *$self->{HeaderPending} = $keep ;
351
352 return STATUS_OK ;
353}
354
355
356sub skipCentralDirectory64Rec
357{
358 my $self = shift;
359 my $magic = shift ;
360
361 my $buffer;
362 $self->smartReadExact(\$buffer, 8)
363 or return $self->TrailerError("Minimum header size is " .
364 8 . " bytes") ;
365
366 my $keep = $magic . $buffer ;
367
368 my ($sizeLo, $sizeHi) = unpack ("V V", $buffer);
369
370 # TODO - take SizeHi into account
371 $self->smartReadExact(\$buffer, $sizeLo)
372 or return $self->TrailerError("Minimum header size is " .
373 $sizeLo . " bytes") ;
374
375 $keep .= $buffer ;
376 *$self->{HeaderPending} = $keep ;
377
378 #my $versionMadeBy = unpack ("v", substr($buffer, 0, 2));
379 #my $extractVersion = unpack ("v", substr($buffer, 2, 2));
380 #my $diskNumber = unpack ("V", substr($buffer, 4, 4));
381 #my $cntrlDirDiskNo = unpack ("V", substr($buffer, 8, 4));
382 #my $entriesInThisCD = unpack ("V V", substr($buffer, 12, 8));
383 #my $entriesInCD = unpack ("V V", substr($buffer, 20, 8));
384 #my $sizeOfCD = unpack ("V V", substr($buffer, 28, 8));
385 #my $offsetToCD = unpack ("V V", substr($buffer, 36, 8));
386
387 return STATUS_OK ;
388}
389
390sub skipCentralDirectory64Loc
391{
392 my $self = shift;
393 my $magic = shift ;
394
395 my $buffer;
396 $self->smartReadExact(\$buffer, 20 - 4)
397 or return $self->TrailerError("Minimum header size is " .
398 20 . " bytes") ;
399
400 my $keep = $magic . $buffer ;
401 *$self->{HeaderPending} = $keep ;
402
403 #my $startCdDisk = unpack ("V", substr($buffer, 4-4, 4));
404 #my $offsetToCD = unpack ("V V", substr($buffer, 8-4, 8));
405 #my $diskCount = unpack ("V", substr($buffer, 16-4, 4));
406
407 return STATUS_OK ;
408}
409
a02d0f6f 410sub skipEndCentralDirectory
411{
412 my $self = shift;
413 my $magic = shift ;
414
415 my $buffer;
416 $self->smartReadExact(\$buffer, 22 - 4)
417 or return $self->TrailerError("Minimum header size is " .
418 22 . " bytes") ;
419
420 my $keep = $magic . $buffer ;
421 *$self->{HeaderPending} = $keep ;
422
423 #my $diskNumber = unpack ("v", substr($buffer, 4-4, 2));
424 #my $cntrlDirDiskNo = unpack ("v", substr($buffer, 6-4, 2));
425 #my $entriesInThisCD = unpack ("v", substr($buffer, 8-4, 2));
426 #my $entriesInCD = unpack ("v", substr($buffer, 10-4, 2));
427 #my $sizeOfCD = unpack ("V", substr($buffer, 12-4, 2));
428 #my $offsetToCD = unpack ("V", substr($buffer, 16-4, 2));
429 my $comment_length = unpack ("v", substr($buffer, 20-4, 2));
430
431
432 my $comment ;
433 if ($comment_length)
434 {
435 $self->smartReadExact(\$comment, $comment_length)
2b4e0969 436 or return $self->TruncatedTrailer("comment");
a02d0f6f 437 $keep .= $comment ;
438 }
439
440 return STATUS_OK ;
441}
442
443
a02d0f6f 444sub _isZipMagic
445{
446 my $buffer = shift ;
447 return 0 if length $buffer < 4 ;
448 my $sig = unpack("V", $buffer) ;
c70c1701 449 return $sig == ZIP_LOCAL_HDR_SIG ;
a02d0f6f 450}
451
452
453sub _readFullZipHeader($)
454{
455 my ($self) = @_ ;
456 my $magic = '' ;
457
458 $self->smartReadExact(\$magic, 4);
459
460 *$self->{HeaderPending} = $magic ;
461
462 return $self->HeaderError("Minimum header size is " .
463 30 . " bytes")
464 if length $magic != 4 ;
465
466
467 return $self->HeaderError("Bad Magic")
468 if ! _isZipMagic($magic) ;
469
470 my $status = $self->_readZipHeader($magic);
471 delete *$self->{Transparent} if ! defined $status ;
472 return $status ;
473}
474
475sub _readZipHeader($)
476{
477 my ($self, $magic) = @_ ;
478 my ($HeaderCRC) ;
479 my ($buffer) = '' ;
480
481 $self->smartReadExact(\$buffer, 30 - 4)
482 or return $self->HeaderError("Minimum header size is " .
483 30 . " bytes") ;
484
485 my $keep = $magic . $buffer ;
486 *$self->{HeaderPending} = $keep ;
487
488 my $extractVersion = unpack ("v", substr($buffer, 4-4, 2));
489 my $gpFlag = unpack ("v", substr($buffer, 6-4, 2));
490 my $compressedMethod = unpack ("v", substr($buffer, 8-4, 2));
491 my $lastModTime = unpack ("V", substr($buffer, 10-4, 4));
492 my $crc32 = unpack ("V", substr($buffer, 14-4, 4));
e7d45986 493 my $compressedLength = new U64 unpack ("V", substr($buffer, 18-4, 4));
494 my $uncompressedLength = new U64 unpack ("V", substr($buffer, 22-4, 4));
a02d0f6f 495 my $filename_length = unpack ("v", substr($buffer, 26-4, 2));
496 my $extra_length = unpack ("v", substr($buffer, 28-4, 2));
497
498 my $filename;
499 my $extraField;
c70c1701 500 my @EXTRA = ();
501 my $streamingMode = ($gpFlag & ZIP_GP_FLAG_STREAMING_MASK) ? 1 : 0 ;
a02d0f6f 502
503 return $self->HeaderError("Streamed Stored content not supported")
504 if $streamingMode && $compressedMethod == 0 ;
505
258133d1 506 return $self->HeaderError("Encrypted content not supported")
507 if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK);
508
509 return $self->HeaderError("Patch content not supported")
510 if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK;
511
a02d0f6f 512 *$self->{ZipData}{Streaming} = $streamingMode;
513
a02d0f6f 514
515 if ($filename_length)
516 {
517 $self->smartReadExact(\$filename, $filename_length)
c70c1701 518 or return $self->TruncatedHeader("Filename");
a02d0f6f 519 $keep .= $filename ;
520 }
521
e7d45986 522 my $zip64 = 0 ;
523
a02d0f6f 524 if ($extra_length)
525 {
526 $self->smartReadExact(\$extraField, $extra_length)
c70c1701 527 or return $self->TruncatedHeader("Extra Field");
528
529 my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField,
530 \@EXTRA, 1, 0);
531 return $self->HeaderError($bad)
532 if defined $bad;
533
a02d0f6f 534 $keep .= $extraField ;
e7d45986 535
536 my %Extra ;
537 for (@EXTRA)
538 {
539 $Extra{$_->[0]} = \$_->[1];
540 }
541
542 if (defined $Extra{ZIP_EXTRA_ID_ZIP64()})
543 {
544 $zip64 = 1 ;
545
546 my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} };
547
258133d1 548 # TODO - This code assumes that all the fields in the Zip64
549 # extra field aren't necessarily present. The spec says that
550 # they only exist if the equivalent local headers are -1.
551 # Need to check that info-zip fills out -1 in the local header
552 # correctly.
553
554 if (! $streamingMode) {
555 my $offset = 0 ;
556
557 $uncompressedLength = U64::newUnpack_V64 substr($buff, 0, 8)
558 if $uncompressedLength == 0xFFFF ;
559
560 $offset += 8 ;
561
562 $compressedLength = U64::newUnpack_V64 substr($buff, $offset, 8)
563 if $compressedLength == 0xFFFF ;
564
565 $offset += 8 ;
566
567 #my $cheaderOffset = U64::newUnpack_V64 substr($buff, 16, 8);
568 #my $diskNumber = unpack ("V", substr($buff, 24, 4));
569 }
e7d45986 570 }
571 }
572
573 *$self->{ZipData}{Zip64} = $zip64;
574
575 if (! $streamingMode) {
576 *$self->{ZipData}{Streaming} = 0;
577 *$self->{ZipData}{Crc32} = $crc32;
578 *$self->{ZipData}{CompressedLen} = $compressedLength;
579 *$self->{ZipData}{UnCompressedLen} = $uncompressedLength;
580 *$self->{CompressedInputLengthRemaining} =
581 *$self->{CompressedInputLength} = $compressedLength->get32bit();
a02d0f6f 582 }
583
2b4e0969 584 *$self->{ZipData}{Method} = $compressedMethod;
c70c1701 585 if ($compressedMethod == ZIP_CM_DEFLATE)
a02d0f6f 586 {
2b4e0969 587 *$self->{Type} = 'zip-deflate';
588 }
c70c1701 589 elsif ($compressedMethod == ZIP_CM_BZIP2)
2b4e0969 590 {
591 #if (! defined $IO::Uncompress::Adapter::Bunzip2::VERSION)
592
593 *$self->{Type} = 'zip-bzip2';
594
595 my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject(
596 );
597
598 *$self->{Uncomp} = $obj;
599 *$self->{ZipData}{CRC32} = crc32(undef);
600
a02d0f6f 601 }
c70c1701 602 elsif ($compressedMethod == ZIP_CM_STORE)
a02d0f6f 603 {
604 # TODO -- add support for reading uncompressed
605
2b4e0969 606 *$self->{Type} = 'zip-stored';
a02d0f6f 607
608 my $obj = IO::Uncompress::Adapter::Identity::mkUncompObject(# $got->value('CRC32'),
609 # $got->value('ADLER32'),
610 );
611
612 *$self->{Uncomp} = $obj;
613
614 }
615 else
616 {
617 return $self->HeaderError("Unsupported Compression format $compressedMethod");
618 }
619
620 return {
621 'Type' => 'zip',
622 'FingerprintLength' => 4,
623 #'HeaderLength' => $compressedMethod == 8 ? length $keep : 0,
624 'HeaderLength' => length $keep,
e7d45986 625 'Zip64' => $zip64,
626 'TrailerLength' => ! $streamingMode ? 0 : $zip64 ? 24 : 16,
a02d0f6f 627 'Header' => $keep,
628 'CompressedLength' => $compressedLength ,
629 'UncompressedLength' => $uncompressedLength ,
630 'CRC32' => $crc32 ,
631 'Name' => $filename,
632 'Time' => _dosToUnixTime($lastModTime),
633 'Stream' => $streamingMode,
634
635 'MethodID' => $compressedMethod,
c70c1701 636 'MethodName' => $compressedMethod == ZIP_CM_DEFLATE
a02d0f6f 637 ? "Deflated"
c70c1701 638 : $compressedMethod == ZIP_CM_BZIP2
639 ? "Bzip2"
640 : $compressedMethod == ZIP_CM_STORE
641 ? "Stored"
642 : "Unknown" ,
a02d0f6f 643
644# 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0,
645# 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
646# 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0,
647# 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
648# 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
649# 'Comment' => $comment,
650# 'OsID' => $os,
651# 'OsName' => defined $GZIP_OS_Names{$os}
652# ? $GZIP_OS_Names{$os} : "Unknown",
653# 'HeaderCRC' => $HeaderCRC,
654# 'Flags' => $flag,
655# 'ExtraFlags' => $xfl,
c70c1701 656 'ExtraFieldRaw' => $extraField,
657 'ExtraField' => [ @EXTRA ],
a02d0f6f 658
659
660 }
661}
662
2b4e0969 663sub filterUncompressed
664{
665 my $self = shift ;
666
667 if (*$self->{ZipData}{Method} == 12) {
668 *$self->{ZipData}{CRC32} = crc32(${$_[0]}, *$self->{ZipData}{CRC32});
669 }
670 else {
671 *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ;
672 }
673}
674
675
a02d0f6f 676# from Archive::Zip
677sub _dosToUnixTime
678{
679 #use Time::Local 'timelocal_nocheck';
680 use Time::Local 'timelocal';
681
682 my $dt = shift;
683
684 my $year = ( ( $dt >> 25 ) & 0x7f ) + 80;
685 my $mon = ( ( $dt >> 21 ) & 0x0f ) - 1;
686 my $mday = ( ( $dt >> 16 ) & 0x1f );
687
688 my $hour = ( ( $dt >> 11 ) & 0x1f );
689 my $min = ( ( $dt >> 5 ) & 0x3f );
690 my $sec = ( ( $dt << 1 ) & 0x3e );
691
692 # catch errors
693 my $time_t =
694 eval { timelocal( $sec, $min, $hour, $mday, $mon, $year ); };
695 return 0
696 if $@;
697 return $time_t;
698}
699
700
7011;
702
703__END__
704
705
706=head1 NAME
707
708
cb7abd7f 709
710IO::Uncompress::Unzip - Read zip files/buffers
711
a02d0f6f 712
713
714=head1 SYNOPSIS
715
716 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
717
718 my $status = unzip $input => $output [,OPTS]
719 or die "unzip failed: $UnzipError\n";
720
721 my $z = new IO::Uncompress::Unzip $input [OPTS]
722 or die "unzip failed: $UnzipError\n";
723
724 $status = $z->read($buffer)
725 $status = $z->read($buffer, $length)
726 $status = $z->read($buffer, $length, $offset)
727 $line = $z->getline()
728 $char = $z->getc()
729 $char = $z->ungetc()
730 $char = $z->opened()
731
732 $status = $z->inflateSync()
733
e7d45986 734 $data = $z->trailingData()
735 $status = $z->nextStream()
a02d0f6f 736 $data = $z->getHeaderInfo()
737 $z->tell()
738 $z->seek($position, $whence)
739 $z->binmode()
740 $z->fileno()
741 $z->eof()
742 $z->close()
743
744 $UnzipError ;
745
746 # IO::File mode
747
748 <$z>
749 read($z, $buffer);
750 read($z, $buffer, $length);
751 read($z, $buffer, $length, $offset);
752 tell($z)
753 seek($z, $position, $whence)
754 binmode($z)
755 fileno($z)
756 eof($z)
757 close($z)
758
759
760=head1 DESCRIPTION
761
762
763
764B<WARNING -- This is a Beta release>.
765
766=over 5
767
768=item * DO NOT use in production code.
769
770=item * The documentation is incomplete in places.
771
772=item * Parts of the interface defined here are tentative.
773
774=item * Please report any problems you find.
775
776=back
777
778
779
780
781
782This module provides a Perl interface that allows the reading of
783zlib files/buffers.
784
785For writing zip files/buffers, see the companion module IO::Compress::Zip.
786
787
788
cb7abd7f 789
790
a02d0f6f 791=head1 Functional Interface
792
793A top-level function, C<unzip>, is provided to carry out
794"one-shot" uncompression between buffers and/or files. For finer
795control over the uncompression process, see the L</"OO Interface">
796section.
797
798 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
799
800 unzip $input => $output [,OPTS]
801 or die "unzip failed: $UnzipError\n";
802
803
804
805The functional interface needs Perl5.005 or better.
806
807
808=head2 unzip $input => $output [, OPTS]
809
810
811C<unzip> expects at least two parameters, C<$input> and C<$output>.
812
813=head3 The C<$input> parameter
814
815The parameter, C<$input>, is used to define the source of
816the compressed data.
817
818It can take one of the following forms:
819
820=over 5
821
822=item A filename
823
824If the C<$input> parameter is a simple scalar, it is assumed to be a
825filename. This file will be opened for reading and the input data
826will be read from it.
827
828=item A filehandle
829
830If the C<$input> parameter is a filehandle, the input data will be
831read from it.
832The string '-' can be used as an alias for standard input.
833
834=item A scalar reference
835
836If C<$input> is a scalar reference, the input data will be read
837from C<$$input>.
838
839=item An array reference
840
841If C<$input> is an array reference, each element in the array must be a
842filename.
843
844The input data will be read from each file in turn.
845
846The complete array will be walked to ensure that it only
847contains valid filenames before any data is uncompressed.
848
849
850
851=item An Input FileGlob string
852
853If C<$input> is a string that is delimited by the characters "<" and ">"
854C<unzip> will assume that it is an I<input fileglob string>. The
855input is the list of files that match the fileglob.
856
857If the fileglob does not match any files ...
858
859See L<File::GlobMapper|File::GlobMapper> for more details.
860
861
862=back
863
864If the C<$input> parameter is any other type, C<undef> will be returned.
865
866
867
868=head3 The C<$output> parameter
869
870The parameter C<$output> is used to control the destination of the
871uncompressed data. This parameter can take one of these forms.
872
873=over 5
874
875=item A filename
876
877If the C<$output> parameter is a simple scalar, it is assumed to be a
878filename. This file will be opened for writing and the uncompressed
879data will be written to it.
880
881=item A filehandle
882
883If the C<$output> parameter is a filehandle, the uncompressed data
884will be written to it.
885The string '-' can be used as an alias for standard output.
886
887
888=item A scalar reference
889
890If C<$output> is a scalar reference, the uncompressed data will be
891stored in C<$$output>.
892
893
894
895=item An Array Reference
896
897If C<$output> is an array reference, the uncompressed data will be
898pushed onto the array.
899
900=item An Output FileGlob
901
902If C<$output> is a string that is delimited by the characters "<" and ">"
903C<unzip> will assume that it is an I<output fileglob string>. The
904output is the list of files that match the fileglob.
905
906When C<$output> is an fileglob string, C<$input> must also be a fileglob
907string. Anything else is an error.
908
909=back
910
911If the C<$output> parameter is any other type, C<undef> will be returned.
912
913
914
915=head2 Notes
916
c70c1701 917
918When C<$input> maps to multiple compressed files/buffers and C<$output> is
919a single file/buffer, after uncompression C<$output> will contain a
920concatenation of all the uncompressed data from each of the input
921files/buffers.
922
923
a02d0f6f 924
925
926
927=head2 Optional Parameters
928
929Unless specified below, the optional parameters for C<unzip>,
930C<OPTS>, are the same as those used with the OO interface defined in the
931L</"Constructor Options"> section below.
932
933=over 5
934
e7d45986 935=item C<< AutoClose => 0|1 >>
a02d0f6f 936
937This option applies to any input or output data streams to
938C<unzip> that are filehandles.
939
940If C<AutoClose> is specified, and the value is true, it will result in all
941input and/or output filehandles being closed once C<unzip> has
942completed.
943
944This parameter defaults to 0.
945
946
e7d45986 947=item C<< BinModeOut => 0|1 >>
a02d0f6f 948
949When writing to a file or filehandle, set C<binmode> before writing to the
950file.
951
952Defaults to 0.
953
954
955
956
957
e7d45986 958=item C<< Append => 0|1 >>
a02d0f6f 959
960TODO
961
e7d45986 962=item C<< MultiStream => 0|1 >>
a02d0f6f 963
258133d1 964
e7d45986 965If the input file/buffer contains multiple compressed data streams, this
966option will uncompress the whole lot as a single data stream.
a02d0f6f 967
e7d45986 968Defaults to 0.
a02d0f6f 969
970
971
258133d1 972
973
974=item C<< TrailingData => $scalar >>
975
976Returns the data, if any, that is present immediately after the compressed
977data stream once uncompression is complete.
978
979This option can be used when there is useful information immediately
980following the compressed data stream, and you don't know the length of the
981compressed data stream.
982
983If the input is a buffer, C<trailingData> will return everything from the
984end of the compressed data stream to the end of the buffer.
985
986If the input is a filehandle, C<trailingData> will return the data that is
987left in the filehandle input buffer once the end of the compressed data
988stream has been reached. You can then use the filehandle to read the rest
989of the input file.
990
991Don't bother using C<trailingData> if the input is a filename.
992
993
994
995If you know the length of the compressed data stream before you start
996uncompressing, you can avoid having to use C<trailingData> by setting the
997C<InputLength> option.
998
999
1000
a02d0f6f 1001=back
1002
1003
1004
1005
1006=head2 Examples
1007
1008To read the contents of the file C<file1.txt.zip> and write the
1009compressed data to the file C<file1.txt>.
1010
1011 use strict ;
1012 use warnings ;
1013 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1014
1015 my $input = "file1.txt.zip";
1016 my $output = "file1.txt";
1017 unzip $input => $output
1018 or die "unzip failed: $UnzipError\n";
1019
1020
1021To read from an existing Perl filehandle, C<$input>, and write the
1022uncompressed data to a buffer, C<$buffer>.
1023
1024 use strict ;
1025 use warnings ;
1026 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1027 use IO::File ;
1028
1029 my $input = new IO::File "<file1.txt.zip"
1030 or die "Cannot open 'file1.txt.zip': $!\n" ;
1031 my $buffer ;
1032 unzip $input => \$buffer
1033 or die "unzip failed: $UnzipError\n";
1034
1035To uncompress all files in the directory "/my/home" that match "*.txt.zip" and store the compressed data in the same directory
1036
1037 use strict ;
1038 use warnings ;
1039 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1040
1041 unzip '</my/home/*.txt.zip>' => '</my/home/#1.txt>'
1042 or die "unzip failed: $UnzipError\n";
1043
1044and if you want to compress each file one at a time, this will do the trick
1045
1046 use strict ;
1047 use warnings ;
1048 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1049
1050 for my $input ( glob "/my/home/*.txt.zip" )
1051 {
1052 my $output = $input;
1053 $output =~ s/.zip// ;
1054 unzip $input => $output
1055 or die "Error compressing '$input': $UnzipError\n";
1056 }
1057
1058=head1 OO Interface
1059
1060=head2 Constructor
1061
1062The format of the constructor for IO::Uncompress::Unzip is shown below
1063
1064
1065 my $z = new IO::Uncompress::Unzip $input [OPTS]
1066 or die "IO::Uncompress::Unzip failed: $UnzipError\n";
1067
1068Returns an C<IO::Uncompress::Unzip> object on success and undef on failure.
1069The variable C<$UnzipError> will contain an error message on failure.
1070
1071If you are running Perl 5.005 or better the object, C<$z>, returned from
1072IO::Uncompress::Unzip can be used exactly like an L<IO::File|IO::File> filehandle.
1073This means that all normal input file operations can be carried out with
1074C<$z>. For example, to read a line from a compressed file/buffer you can
1075use either of these forms
1076
1077 $line = $z->getline();
1078 $line = <$z>;
1079
1080The mandatory parameter C<$input> is used to determine the source of the
1081compressed data. This parameter can take one of three forms.
1082
1083=over 5
1084
1085=item A filename
1086
1087If the C<$input> parameter is a scalar, it is assumed to be a filename. This
1088file will be opened for reading and the compressed data will be read from it.
1089
1090=item A filehandle
1091
1092If the C<$input> parameter is a filehandle, the compressed data will be
1093read from it.
1094The string '-' can be used as an alias for standard input.
1095
1096
1097=item A scalar reference
1098
1099If C<$input> is a scalar reference, the compressed data will be read from
1100C<$$output>.
1101
1102=back
1103
1104=head2 Constructor Options
1105
1106
1107The option names defined below are case insensitive and can be optionally
1108prefixed by a '-'. So all of the following are valid
1109
1110 -AutoClose
1111 -autoclose
1112 AUTOCLOSE
1113 autoclose
1114
1115OPTS is a combination of the following options:
1116
1117=over 5
1118
e7d45986 1119=item C<< AutoClose => 0|1 >>
a02d0f6f 1120
1121This option is only valid when the C<$input> parameter is a filehandle. If
1122specified, and the value is true, it will result in the file being closed once
1123either the C<close> method is called or the IO::Uncompress::Unzip object is
1124destroyed.
1125
1126This parameter defaults to 0.
1127
e7d45986 1128=item C<< MultiStream => 0|1 >>
a02d0f6f 1129
1130
1131
e7d45986 1132Treats the complete zip file/buffer as a single compressed data
1133stream. When reading in multi-stream mode each member of the zip
1134file/buffer will be uncompressed in turn until the end of the file/buffer
1135is encountered.
a02d0f6f 1136
1137This parameter defaults to 0.
1138
1139
e7d45986 1140=item C<< Prime => $string >>
a02d0f6f 1141
1142This option will uncompress the contents of C<$string> before processing the
1143input file/buffer.
1144
1145This option can be useful when the compressed data is embedded in another
1146file/data structure and it is not possible to work out where the compressed
1147data begins without having to read the first few bytes. If this is the
1148case, the uncompression can be I<primed> with these bytes using this
1149option.
1150
e7d45986 1151=item C<< Transparent => 0|1 >>
a02d0f6f 1152
1153If this option is set and the input file or buffer is not compressed data,
1154the module will allow reading of it anyway.
1155
1156This option defaults to 1.
1157
e7d45986 1158=item C<< BlockSize => $num >>
a02d0f6f 1159
1160When reading the compressed input data, IO::Uncompress::Unzip will read it in
1161blocks of C<$num> bytes.
1162
1163This option defaults to 4096.
1164
e7d45986 1165=item C<< InputLength => $size >>
a02d0f6f 1166
1167When present this option will limit the number of compressed bytes read
1168from the input file/buffer to C<$size>. This option can be used in the
1169situation where there is useful data directly after the compressed data
1170stream and you know beforehand the exact length of the compressed data
1171stream.
1172
1173This option is mostly used when reading from a filehandle, in which case
1174the file pointer will be left pointing to the first byte directly after the
1175compressed data stream.
1176
1177
1178
1179This option defaults to off.
1180
e7d45986 1181=item C<< Append => 0|1 >>
a02d0f6f 1182
1183This option controls what the C<read> method does with uncompressed data.
1184
1185If set to 1, all uncompressed data will be appended to the output parameter
1186of the C<read> method.
1187
1188If set to 0, the contents of the output parameter of the C<read> method
1189will be overwritten by the uncompressed data.
1190
1191Defaults to 0.
1192
e7d45986 1193=item C<< Strict => 0|1 >>
a02d0f6f 1194
1195
1196
1197This option controls whether the extra checks defined below are used when
1198carrying out the decompression. When Strict is on, the extra tests are
1199carried out, when Strict is off they are not.
1200
1201The default for this option is off.
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
258133d1 1216
a02d0f6f 1217=back
1218
1219=head2 Examples
1220
1221TODO
1222
1223=head1 Methods
1224
1225=head2 read
1226
1227Usage is
1228
1229 $status = $z->read($buffer)
1230
1231Reads a block of compressed data (the size the the compressed block is
1232determined by the C<Buffer> option in the constructor), uncompresses it and
1233writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
1234set in the constructor, the uncompressed data will be appended to the
1235C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
1236
1237Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1238or a negative number on error.
1239
1240=head2 read
1241
1242Usage is
1243
1244 $status = $z->read($buffer, $length)
1245 $status = $z->read($buffer, $length, $offset)
1246
1247 $status = read($z, $buffer, $length)
1248 $status = read($z, $buffer, $length, $offset)
1249
1250Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
1251
1252The main difference between this form of the C<read> method and the
1253previous one, is that this one will attempt to return I<exactly> C<$length>
1254bytes. The only circumstances that this function will not is if end-of-file
1255or an IO error is encountered.
1256
1257Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1258or a negative number on error.
1259
1260
1261=head2 getline
1262
1263Usage is
1264
1265 $line = $z->getline()
1266 $line = <$z>
1267
1268Reads a single line.
1269
258133d1 1270This method fully supports the use of of the variable C<$/> (or
1271C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
1272determine what constitutes an end of line. Paragraph mode, record mode and
1273file slurp mode are all supported.
a02d0f6f 1274
1275
1276=head2 getc
1277
1278Usage is
1279
1280 $char = $z->getc()
1281
1282Read a single character.
1283
1284=head2 ungetc
1285
1286Usage is
1287
1288 $char = $z->ungetc($string)
1289
1290
1291
1292=head2 inflateSync
1293
1294Usage is
1295
1296 $status = $z->inflateSync()
1297
1298TODO
1299
1300
1301=head2 getHeaderInfo
1302
1303Usage is
1304
1305 $hdr = $z->getHeaderInfo();
1306 @hdrs = $z->getHeaderInfo();
1307
1308This method returns either a hash reference (in scalar context) or a list
1309or hash references (in array context) that contains information about each
1310of the header fields in the compressed data stream(s).
1311
1312
1313
1314
1315=head2 tell
1316
1317Usage is
1318
1319 $z->tell()
1320 tell $z
1321
1322Returns the uncompressed file offset.
1323
1324=head2 eof
1325
1326Usage is
1327
1328 $z->eof();
1329 eof($z);
1330
1331
1332
1333Returns true if the end of the compressed input stream has been reached.
1334
1335
1336
1337=head2 seek
1338
1339 $z->seek($position, $whence);
1340 seek($z, $position, $whence);
1341
1342
1343
1344
1345Provides a sub-set of the C<seek> functionality, with the restriction
1346that it is only legal to seek forward in the input file/buffer.
1347It is a fatal error to attempt to seek backward.
1348
1349
1350
1351The C<$whence> parameter takes one the usual values, namely SEEK_SET,
1352SEEK_CUR or SEEK_END.
1353
1354Returns 1 on success, 0 on failure.
1355
1356=head2 binmode
1357
1358Usage is
1359
1360 $z->binmode
1361 binmode $z ;
1362
1363This is a noop provided for completeness.
1364
1365=head2 opened
1366
1367 $z->opened()
1368
1369Returns true if the object currently refers to a opened file/buffer.
1370
1371=head2 autoflush
1372
1373 my $prev = $z->autoflush()
1374 my $prev = $z->autoflush(EXPR)
1375
1376If the C<$z> object is associated with a file or a filehandle, this method
1377returns the current autoflush setting for the underlying filehandle. If
1378C<EXPR> is present, and is non-zero, it will enable flushing after every
1379write/print operation.
1380
1381If C<$z> is associated with a buffer, this method has no effect and always
1382returns C<undef>.
1383
1384B<Note> that the special variable C<$|> B<cannot> be used to set or
1385retrieve the autoflush setting.
1386
1387=head2 input_line_number
1388
1389 $z->input_line_number()
1390 $z->input_line_number(EXPR)
1391
1392
1393
1394Returns the current uncompressed line number. If C<EXPR> is present it has
1395the effect of setting the line number. Note that setting the line number
1396does not change the current position within the file/buffer being read.
1397
1398The contents of C<$/> are used to to determine what constitutes a line
1399terminator.
1400
1401
1402
1403=head2 fileno
1404
1405 $z->fileno()
1406 fileno($z)
1407
1408If the C<$z> object is associated with a file or a filehandle, this method
1409will return the underlying file descriptor.
1410
1411If the C<$z> object is is associated with a buffer, this method will
1412return undef.
1413
1414=head2 close
1415
1416 $z->close() ;
1417 close $z ;
1418
1419
1420
1421Closes the output file/buffer.
1422
1423
1424
1425For most versions of Perl this method will be automatically invoked if
1426the IO::Uncompress::Unzip object is destroyed (either explicitly or by the
1427variable with the reference to the object going out of scope). The
1428exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1429these cases, the C<close> method will be called automatically, but
1430not until global destruction of all live objects when the program is
1431terminating.
1432
1433Therefore, if you want your scripts to be able to run on all versions
1434of Perl, you should call C<close> explicitly and not rely on automatic
1435closing.
1436
1437Returns true on success, otherwise 0.
1438
1439If the C<AutoClose> option has been enabled when the IO::Uncompress::Unzip
1440object was created, and the object is associated with a file, the
1441underlying file will also be closed.
1442
1443
1444
1445
e7d45986 1446=head2 nextStream
1447
1448Usage is
1449
1450 my $status = $z->nextStream();
1451
1452Skips to the next compressed data stream in the input file/buffer. If a new
258133d1 1453compressed data stream is found, the eof marker will be cleared and C<$.>
1454will be reset to 0.
e7d45986 1455
1456Returns 1 if a new stream was found, 0 if none was found, and -1 if an
1457error was encountered.
1458
1459=head2 trailingData
1460
1461Usage is
1462
1463 my $data = $z->trailingData();
1464
258133d1 1465Returns the data, if any, that is present immediately after the compressed
1466data stream once uncompression is complete. It only makes sense to call
1467this method once the end of the compressed data stream has been
1468encountered.
1469
1470This option can be used when there is useful information immediately
1471following the compressed data stream, and you don't know the length of the
1472compressed data stream.
1473
1474If the input is a buffer, C<trailingData> will return everything from the
1475end of the compressed data stream to the end of the buffer.
1476
1477If the input is a filehandle, C<trailingData> will return the data that is
1478left in the filehandle input buffer once the end of the compressed data
1479stream has been reached. You can then use the filehandle to read the rest
1480of the input file.
1481
1482Don't bother using C<trailingData> if the input is a filename.
1483
1484
1485
1486If you know the length of the compressed data stream before you start
1487uncompressing, you can avoid having to use C<trailingData> by setting the
1488C<InputLength> option in the constructor.
e7d45986 1489
a02d0f6f 1490=head1 Importing
1491
1492No symbolic constants are required by this IO::Uncompress::Unzip at present.
1493
1494=over 5
1495
1496=item :all
1497
1498Imports C<unzip> and C<$UnzipError>.
1499Same as doing this
1500
1501 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1502
1503=back
1504
1505=head1 EXAMPLES
1506
1507
1508
1509
1510=head1 SEE ALSO
1511
258133d1 1512L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
a02d0f6f 1513
1514L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
1515
1516L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1517L<Archive::Tar|Archive::Tar>,
1518L<IO::Zlib|IO::Zlib>
1519
1520
1521For RFC 1950, 1951 and 1952 see
1522F<http://www.faqs.org/rfcs/rfc1950.html>,
1523F<http://www.faqs.org/rfcs/rfc1951.html> and
1524F<http://www.faqs.org/rfcs/rfc1952.html>
1525
1526The I<zlib> compression library was written by Jean-loup Gailly
1527F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
1528
1529The primary site for the I<zlib> compression library is
1530F<http://www.zlib.org>.
1531
1532The primary site for gzip is F<http://www.gzip.org>.
1533
1534
1535
1536
a02d0f6f 1537=head1 AUTHOR
1538
cb7abd7f 1539This module was written by Paul Marquess, F<pmqs@cpan.org>.
a02d0f6f 1540
1541
1542
1543=head1 MODIFICATION HISTORY
1544
1545See the Changes file.
1546
1547=head1 COPYRIGHT AND LICENSE
a02d0f6f 1548
1549Copyright (c) 2005-2006 Paul Marquess. All rights reserved.
1550
1551This program is free software; you can redistribute it and/or
1552modify it under the same terms as Perl itself.
1553