IO::Compress::* 2.000_13
[p5sagit/p5-mst-13.2.git] / ext / Compress / IO / Zlib / lib / IO / Uncompress / Unzip.pm
CommitLineData
a02d0f6f 1package IO::Uncompress::Unzip;
2
3require 5.004 ;
4
5# for RFC1952
6
7use strict ;
8use warnings;
9use bytes;
10
11use IO::Uncompress::RawInflate ;
12use IO::Compress::Base::Common qw(:Status createSelfTiedObject);
13use IO::Uncompress::Adapter::Identity;
c70c1701 14use IO::Compress::Zlib::Extra;
15use IO::Compress::Zip::Constants;
a02d0f6f 16
2b4e0969 17use Compress::Raw::Zlib qw(crc32) ;
c70c1701 18
2b4e0969 19BEGIN
20{
c70c1701 21 eval { require IO::Uncompress::Adapter::Bunzip2 ;
22 import IO::Uncompress::Adapter::Bunzip2 } ;
2b4e0969 23}
24
25
a02d0f6f 26require Exporter ;
27
e7d45986 28our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup);
a02d0f6f 29
e7d45986 30$VERSION = '2.000_13';
a02d0f6f 31$UnzipError = '';
32
33@ISA = qw(Exporter IO::Uncompress::RawInflate);
34@EXPORT_OK = qw( $UnzipError unzip );
35%EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ;
36push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
37Exporter::export_ok_tags('all');
38
e7d45986 39%headerLookup = (
40 ZIP_CENTRAL_HDR_SIG, \&skipCentralDirectory,
41 ZIP_END_CENTRAL_HDR_SIG, \&skipEndCentralDirectory,
42 ZIP64_END_CENTRAL_REC_HDR_SIG, \&skipCentralDirectory64Rec,
43 ZIP64_END_CENTRAL_LOC_HDR_SIG, \&skipCentralDirectory64Loc,
44 ZIP64_ARCHIVE_EXTRA_SIG, \&skipArchiveExtra,
45 ZIP64_DIGITAL_SIGNATURE_SIG, \&skipDigitalSignature,
46 );
47
a02d0f6f 48sub new
49{
50 my $class = shift ;
51 my $obj = createSelfTiedObject($class, \$UnzipError);
52 $obj->_create(undef, 0, @_);
53}
54
55sub unzip
56{
57 my $obj = createSelfTiedObject(undef, \$UnzipError);
58 return $obj->_inf(@_) ;
59}
60
61sub getExtraParams
62{
63 use IO::Compress::Base::Common qw(:Parse);
64
65
66 return (
67# # Zip header fields
68 'Name' => [1, 1, Parse_any, undef],
69
70# 'Streaming' => [1, 1, Parse_boolean, 1],
71 );
72}
73
74sub ckParams
75{
76 my $self = shift ;
77 my $got = shift ;
78
79 # unzip always needs crc32
80 $got->value('CRC32' => 1);
81
82 *$self->{UnzipData}{Name} = $got->value('Name');
83
84 return 1;
85}
86
87
88sub ckMagic
89{
90 my $self = shift;
91
92 my $magic ;
93 $self->smartReadExact(\$magic, 4);
94
95 *$self->{HeaderPending} = $magic ;
96
97 return $self->HeaderError("Minimum header size is " .
98 4 . " bytes")
99 if length $magic != 4 ;
100
101 return $self->HeaderError("Bad Magic")
102 if ! _isZipMagic($magic) ;
103
104 *$self->{Type} = 'zip';
105
106 return $magic ;
107}
108
109
110
111sub readHeader
112{
113 my $self = shift;
114 my $magic = shift ;
115
116 my $name = *$self->{UnzipData}{Name} ;
117 my $hdr = $self->_readZipHeader($magic) ;
118
119 while (defined $hdr)
120 {
121 if (! defined $name || $hdr->{Name} eq $name)
122 {
123 return $hdr ;
124 }
125
126 # skip the data
127 my $buffer;
128 if (*$self->{ZipData}{Streaming}) {
129
130 while (1) {
131
132 my $b;
133 my $status = $self->smartRead(\$b, 1024 * 16);
134 return undef
135 if $status <= 0 ;
136
137 my $temp_buf;
138 my $out;
139 $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out);
140
141 return $self->saveErrorString(undef, *$self->{Uncomp}{Error},
142 *$self->{Uncomp}{ErrorNo})
143 if $self->saveStatus($status) == STATUS_ERROR;
144
145 if ($status == STATUS_ENDSTREAM) {
146 *$self->{Uncomp}->reset();
147 $self->pushBack($b) ;
148 last;
149 }
150 }
151
152 # skip the trailer
153 $self->smartReadExact(\$buffer, $hdr->{TrailerLength})
154 or return $self->saveErrorString(undef, "Truncated file");
155 }
156 else {
e7d45986 157 my $c = $hdr->{CompressedLength}->get32bit();
a02d0f6f 158 $self->smartReadExact(\$buffer, $c)
159 or return $self->saveErrorString(undef, "Truncated file");
160 $buffer = '';
161 }
162
163 $self->chkTrailer($buffer) == STATUS_OK
164 or return $self->saveErrorString(undef, "Truncated file");
165
166 $hdr = $self->_readFullZipHeader();
167
168 return $self->saveErrorString(undef, "Cannot find '$name'")
169 if $self->smartEof();
170 }
171
172 return undef;
173}
174
175sub chkTrailer
176{
177 my $self = shift;
178 my $trailer = shift;
179
180 my ($sig, $CRC32, $cSize, $uSize) ;
e7d45986 181 my ($cSizeHi, $uSizeHi) = (0, 0);
a02d0f6f 182 if (*$self->{ZipData}{Streaming}) {
e7d45986 183 $sig = unpack ("V", substr($trailer, 0, 4));
184 $CRC32 = unpack ("V", substr($trailer, 4, 4));
185
186 if (*$self->{ZipData}{Zip64} ) {
187 $cSize = U64::newUnpack_V64 substr($trailer, 8, 8);
188 $uSize = U64::newUnpack_V64 substr($trailer, 16, 8);
189 }
190 else {
191 $cSize = U64::newUnpack_V32 substr($trailer, 8, 4);
192 $uSize = U64::newUnpack_V32 substr($trailer, 12, 4);
193 }
194
a02d0f6f 195 return $self->TrailerError("Data Descriptor signature, got $sig")
c70c1701 196 if $sig != ZIP_DATA_HDR_SIG;
a02d0f6f 197 }
198 else {
199 ($CRC32, $cSize, $uSize) =
200 (*$self->{ZipData}{Crc32},
201 *$self->{ZipData}{CompressedLen},
202 *$self->{ZipData}{UnCompressedLen});
203 }
204
205 if (*$self->{Strict}) {
2b4e0969 206 return $self->TrailerError("CRC mismatch")
207 if $CRC32 != *$self->{ZipData}{CRC32} ;
a02d0f6f 208
e7d45986 209 return $self->TrailerError("CSIZE mismatch.")
210 if ! $cSize->equal(*$self->{CompSize});
a02d0f6f 211
e7d45986 212 return $self->TrailerError("USIZE mismatch.")
213 if ! $uSize->equal(*$self->{UnCompSize});
a02d0f6f 214 }
215
216 my $reachedEnd = STATUS_ERROR ;
217 # check for central directory or end of central directory
218 while (1)
219 {
220 my $magic ;
221 my $got = $self->smartRead(\$magic, 4);
222
223 return $self->saveErrorString(STATUS_ERROR, "Truncated file")
224 if $got != 4 && *$self->{Strict};
225
226 if ($got == 0) {
227 return STATUS_EOF ;
228 }
229 elsif ($got < 0) {
230 return STATUS_ERROR ;
231 }
232 elsif ($got < 4) {
233 $self->pushBack($magic) ;
234 return STATUS_OK ;
235 }
236
237 my $sig = unpack("V", $magic) ;
238
e7d45986 239 if ($headerLookup{$sig})
a02d0f6f 240 {
e7d45986 241 if ($headerLookup{$sig}($self, $magic) != STATUS_OK ) {
a02d0f6f 242 if (*$self->{Strict}) {
243 return STATUS_ERROR ;
244 }
245 else {
246 $self->clearError();
247 return STATUS_OK ;
248 }
249 }
e7d45986 250
251 if ($sig == ZIP_END_CENTRAL_HDR_SIG)
252 {
253 return STATUS_OK ;
254 last;
a02d0f6f 255 }
a02d0f6f 256 }
c70c1701 257 elsif ($sig == ZIP_LOCAL_HDR_SIG)
a02d0f6f 258 {
259 $self->pushBack($magic) ;
260 return STATUS_OK ;
261 }
262 else
263 {
264 # put the data back
265 $self->pushBack($magic) ;
266 last;
267 }
268 }
269
270 return $reachedEnd ;
271}
272
273sub skipCentralDirectory
274{
275 my $self = shift;
276 my $magic = shift ;
277
278 my $buffer;
279 $self->smartReadExact(\$buffer, 46 - 4)
280 or return $self->TrailerError("Minimum header size is " .
281 46 . " bytes") ;
282
283 my $keep = $magic . $buffer ;
284 *$self->{HeaderPending} = $keep ;
285
286 #my $versionMadeBy = unpack ("v", substr($buffer, 4-4, 2));
287 #my $extractVersion = unpack ("v", substr($buffer, 6-4, 2));
288 #my $gpFlag = unpack ("v", substr($buffer, 8-4, 2));
289 #my $compressedMethod = unpack ("v", substr($buffer, 10-4, 2));
290 #my $lastModTime = unpack ("V", substr($buffer, 12-4, 4));
291 #my $crc32 = unpack ("V", substr($buffer, 16-4, 4));
e7d45986 292 my $compressedLength = unpack ("V", substr($buffer, 20-4, 4));
293 my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4));
a02d0f6f 294 my $filename_length = unpack ("v", substr($buffer, 28-4, 2));
295 my $extra_length = unpack ("v", substr($buffer, 30-4, 2));
296 my $comment_length = unpack ("v", substr($buffer, 32-4, 2));
297 #my $disk_start = unpack ("v", substr($buffer, 34-4, 2));
298 #my $int_file_attrib = unpack ("v", substr($buffer, 36-4, 2));
299 #my $ext_file_attrib = unpack ("V", substr($buffer, 38-4, 2));
300 #my $lcl_hdr_offset = unpack ("V", substr($buffer, 42-4, 2));
301
302
303 my $filename;
304 my $extraField;
305 my $comment ;
306 if ($filename_length)
307 {
308 $self->smartReadExact(\$filename, $filename_length)
2b4e0969 309 or return $self->TruncatedTrailer("filename");
a02d0f6f 310 $keep .= $filename ;
311 }
312
313 if ($extra_length)
314 {
315 $self->smartReadExact(\$extraField, $extra_length)
2b4e0969 316 or return $self->TruncatedTrailer("extra");
a02d0f6f 317 $keep .= $extraField ;
318 }
319
320 if ($comment_length)
321 {
322 $self->smartReadExact(\$comment, $comment_length)
2b4e0969 323 or return $self->TruncatedTrailer("comment");
a02d0f6f 324 $keep .= $comment ;
325 }
326
327 return STATUS_OK ;
328}
329
e7d45986 330sub skipArchiveExtra
331{
332 my $self = shift;
333 my $magic = shift ;
334
335 my $buffer;
336 $self->smartReadExact(\$buffer, 4)
337 or return $self->TrailerError("Minimum header size is " .
338 4 . " bytes") ;
339
340 my $keep = $magic . $buffer ;
341
342 my $size = unpack ("V", $buffer);
343
344 $self->smartReadExact(\$buffer, $size)
345 or return $self->TrailerError("Minimum header size is " .
346 $size . " bytes") ;
347
348 $keep .= $buffer ;
349 *$self->{HeaderPending} = $keep ;
350
351 return STATUS_OK ;
352}
353
354
355sub skipCentralDirectory64Rec
356{
357 my $self = shift;
358 my $magic = shift ;
359
360 my $buffer;
361 $self->smartReadExact(\$buffer, 8)
362 or return $self->TrailerError("Minimum header size is " .
363 8 . " bytes") ;
364
365 my $keep = $magic . $buffer ;
366
367 my ($sizeLo, $sizeHi) = unpack ("V V", $buffer);
368
369 # TODO - take SizeHi into account
370 $self->smartReadExact(\$buffer, $sizeLo)
371 or return $self->TrailerError("Minimum header size is " .
372 $sizeLo . " bytes") ;
373
374 $keep .= $buffer ;
375 *$self->{HeaderPending} = $keep ;
376
377 #my $versionMadeBy = unpack ("v", substr($buffer, 0, 2));
378 #my $extractVersion = unpack ("v", substr($buffer, 2, 2));
379 #my $diskNumber = unpack ("V", substr($buffer, 4, 4));
380 #my $cntrlDirDiskNo = unpack ("V", substr($buffer, 8, 4));
381 #my $entriesInThisCD = unpack ("V V", substr($buffer, 12, 8));
382 #my $entriesInCD = unpack ("V V", substr($buffer, 20, 8));
383 #my $sizeOfCD = unpack ("V V", substr($buffer, 28, 8));
384 #my $offsetToCD = unpack ("V V", substr($buffer, 36, 8));
385
386 return STATUS_OK ;
387}
388
389sub skipCentralDirectory64Loc
390{
391 my $self = shift;
392 my $magic = shift ;
393
394 my $buffer;
395 $self->smartReadExact(\$buffer, 20 - 4)
396 or return $self->TrailerError("Minimum header size is " .
397 20 . " bytes") ;
398
399 my $keep = $magic . $buffer ;
400 *$self->{HeaderPending} = $keep ;
401
402 #my $startCdDisk = unpack ("V", substr($buffer, 4-4, 4));
403 #my $offsetToCD = unpack ("V V", substr($buffer, 8-4, 8));
404 #my $diskCount = unpack ("V", substr($buffer, 16-4, 4));
405
406 return STATUS_OK ;
407}
408
a02d0f6f 409sub skipEndCentralDirectory
410{
411 my $self = shift;
412 my $magic = shift ;
413
414 my $buffer;
415 $self->smartReadExact(\$buffer, 22 - 4)
416 or return $self->TrailerError("Minimum header size is " .
417 22 . " bytes") ;
418
419 my $keep = $magic . $buffer ;
420 *$self->{HeaderPending} = $keep ;
421
422 #my $diskNumber = unpack ("v", substr($buffer, 4-4, 2));
423 #my $cntrlDirDiskNo = unpack ("v", substr($buffer, 6-4, 2));
424 #my $entriesInThisCD = unpack ("v", substr($buffer, 8-4, 2));
425 #my $entriesInCD = unpack ("v", substr($buffer, 10-4, 2));
426 #my $sizeOfCD = unpack ("V", substr($buffer, 12-4, 2));
427 #my $offsetToCD = unpack ("V", substr($buffer, 16-4, 2));
428 my $comment_length = unpack ("v", substr($buffer, 20-4, 2));
429
430
431 my $comment ;
432 if ($comment_length)
433 {
434 $self->smartReadExact(\$comment, $comment_length)
2b4e0969 435 or return $self->TruncatedTrailer("comment");
a02d0f6f 436 $keep .= $comment ;
437 }
438
439 return STATUS_OK ;
440}
441
442
a02d0f6f 443sub _isZipMagic
444{
445 my $buffer = shift ;
446 return 0 if length $buffer < 4 ;
447 my $sig = unpack("V", $buffer) ;
c70c1701 448 return $sig == ZIP_LOCAL_HDR_SIG ;
a02d0f6f 449}
450
451
452sub _readFullZipHeader($)
453{
454 my ($self) = @_ ;
455 my $magic = '' ;
456
457 $self->smartReadExact(\$magic, 4);
458
459 *$self->{HeaderPending} = $magic ;
460
461 return $self->HeaderError("Minimum header size is " .
462 30 . " bytes")
463 if length $magic != 4 ;
464
465
466 return $self->HeaderError("Bad Magic")
467 if ! _isZipMagic($magic) ;
468
469 my $status = $self->_readZipHeader($magic);
470 delete *$self->{Transparent} if ! defined $status ;
471 return $status ;
472}
473
474sub _readZipHeader($)
475{
476 my ($self, $magic) = @_ ;
477 my ($HeaderCRC) ;
478 my ($buffer) = '' ;
479
480 $self->smartReadExact(\$buffer, 30 - 4)
481 or return $self->HeaderError("Minimum header size is " .
482 30 . " bytes") ;
483
484 my $keep = $magic . $buffer ;
485 *$self->{HeaderPending} = $keep ;
486
487 my $extractVersion = unpack ("v", substr($buffer, 4-4, 2));
488 my $gpFlag = unpack ("v", substr($buffer, 6-4, 2));
489 my $compressedMethod = unpack ("v", substr($buffer, 8-4, 2));
490 my $lastModTime = unpack ("V", substr($buffer, 10-4, 4));
491 my $crc32 = unpack ("V", substr($buffer, 14-4, 4));
e7d45986 492 my $compressedLength = new U64 unpack ("V", substr($buffer, 18-4, 4));
493 my $uncompressedLength = new U64 unpack ("V", substr($buffer, 22-4, 4));
a02d0f6f 494 my $filename_length = unpack ("v", substr($buffer, 26-4, 2));
495 my $extra_length = unpack ("v", substr($buffer, 28-4, 2));
496
497 my $filename;
498 my $extraField;
c70c1701 499 my @EXTRA = ();
500 my $streamingMode = ($gpFlag & ZIP_GP_FLAG_STREAMING_MASK) ? 1 : 0 ;
a02d0f6f 501
502 return $self->HeaderError("Streamed Stored content not supported")
503 if $streamingMode && $compressedMethod == 0 ;
504
505 *$self->{ZipData}{Streaming} = $streamingMode;
506
a02d0f6f 507
508 if ($filename_length)
509 {
510 $self->smartReadExact(\$filename, $filename_length)
c70c1701 511 or return $self->TruncatedHeader("Filename");
a02d0f6f 512 $keep .= $filename ;
513 }
514
e7d45986 515 my $zip64 = 0 ;
516
a02d0f6f 517 if ($extra_length)
518 {
519 $self->smartReadExact(\$extraField, $extra_length)
c70c1701 520 or return $self->TruncatedHeader("Extra Field");
521
522 my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField,
523 \@EXTRA, 1, 0);
524 return $self->HeaderError($bad)
525 if defined $bad;
526
a02d0f6f 527 $keep .= $extraField ;
e7d45986 528
529 my %Extra ;
530 for (@EXTRA)
531 {
532 $Extra{$_->[0]} = \$_->[1];
533 }
534
535 if (defined $Extra{ZIP_EXTRA_ID_ZIP64()})
536 {
537 $zip64 = 1 ;
538
539 my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} };
540
541 $uncompressedLength = U64::newUnpack_V64 substr($buff, 0, 8);
542 $compressedLength = U64::newUnpack_V64 substr($buff, 8, 8);
543 #my $cheaderOffset = U64::newUnpack_V64 substr($buff, 16, 8);
544 #my $diskNumber = unpack ("V", substr($buff, 24, 4));
545 }
546 }
547
548 *$self->{ZipData}{Zip64} = $zip64;
549
550 if (! $streamingMode) {
551 *$self->{ZipData}{Streaming} = 0;
552 *$self->{ZipData}{Crc32} = $crc32;
553 *$self->{ZipData}{CompressedLen} = $compressedLength;
554 *$self->{ZipData}{UnCompressedLen} = $uncompressedLength;
555 *$self->{CompressedInputLengthRemaining} =
556 *$self->{CompressedInputLength} = $compressedLength->get32bit();
a02d0f6f 557 }
558
2b4e0969 559 *$self->{ZipData}{Method} = $compressedMethod;
c70c1701 560 if ($compressedMethod == ZIP_CM_DEFLATE)
a02d0f6f 561 {
2b4e0969 562 *$self->{Type} = 'zip-deflate';
563 }
c70c1701 564 elsif ($compressedMethod == ZIP_CM_BZIP2)
2b4e0969 565 {
566 #if (! defined $IO::Uncompress::Adapter::Bunzip2::VERSION)
567
568 *$self->{Type} = 'zip-bzip2';
569
570 my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject(
571 );
572
573 *$self->{Uncomp} = $obj;
574 *$self->{ZipData}{CRC32} = crc32(undef);
575
a02d0f6f 576 }
c70c1701 577 elsif ($compressedMethod == ZIP_CM_STORE)
a02d0f6f 578 {
579 # TODO -- add support for reading uncompressed
580
2b4e0969 581 *$self->{Type} = 'zip-stored';
a02d0f6f 582
583 my $obj = IO::Uncompress::Adapter::Identity::mkUncompObject(# $got->value('CRC32'),
584 # $got->value('ADLER32'),
585 );
586
587 *$self->{Uncomp} = $obj;
588
589 }
590 else
591 {
592 return $self->HeaderError("Unsupported Compression format $compressedMethod");
593 }
594
595 return {
596 'Type' => 'zip',
597 'FingerprintLength' => 4,
598 #'HeaderLength' => $compressedMethod == 8 ? length $keep : 0,
599 'HeaderLength' => length $keep,
e7d45986 600 'Zip64' => $zip64,
601 'TrailerLength' => ! $streamingMode ? 0 : $zip64 ? 24 : 16,
a02d0f6f 602 'Header' => $keep,
603 'CompressedLength' => $compressedLength ,
604 'UncompressedLength' => $uncompressedLength ,
605 'CRC32' => $crc32 ,
606 'Name' => $filename,
607 'Time' => _dosToUnixTime($lastModTime),
608 'Stream' => $streamingMode,
609
610 'MethodID' => $compressedMethod,
c70c1701 611 'MethodName' => $compressedMethod == ZIP_CM_DEFLATE
a02d0f6f 612 ? "Deflated"
c70c1701 613 : $compressedMethod == ZIP_CM_BZIP2
614 ? "Bzip2"
615 : $compressedMethod == ZIP_CM_STORE
616 ? "Stored"
617 : "Unknown" ,
a02d0f6f 618
619# 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0,
620# 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
621# 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0,
622# 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
623# 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
624# 'Comment' => $comment,
625# 'OsID' => $os,
626# 'OsName' => defined $GZIP_OS_Names{$os}
627# ? $GZIP_OS_Names{$os} : "Unknown",
628# 'HeaderCRC' => $HeaderCRC,
629# 'Flags' => $flag,
630# 'ExtraFlags' => $xfl,
c70c1701 631 'ExtraFieldRaw' => $extraField,
632 'ExtraField' => [ @EXTRA ],
a02d0f6f 633
634
635 }
636}
637
2b4e0969 638sub filterUncompressed
639{
640 my $self = shift ;
641
642 if (*$self->{ZipData}{Method} == 12) {
643 *$self->{ZipData}{CRC32} = crc32(${$_[0]}, *$self->{ZipData}{CRC32});
644 }
645 else {
646 *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ;
647 }
648}
649
650
a02d0f6f 651# from Archive::Zip
652sub _dosToUnixTime
653{
654 #use Time::Local 'timelocal_nocheck';
655 use Time::Local 'timelocal';
656
657 my $dt = shift;
658
659 my $year = ( ( $dt >> 25 ) & 0x7f ) + 80;
660 my $mon = ( ( $dt >> 21 ) & 0x0f ) - 1;
661 my $mday = ( ( $dt >> 16 ) & 0x1f );
662
663 my $hour = ( ( $dt >> 11 ) & 0x1f );
664 my $min = ( ( $dt >> 5 ) & 0x3f );
665 my $sec = ( ( $dt << 1 ) & 0x3e );
666
667 # catch errors
668 my $time_t =
669 eval { timelocal( $sec, $min, $hour, $mday, $mon, $year ); };
670 return 0
671 if $@;
672 return $time_t;
673}
674
675
6761;
677
678__END__
679
680
681=head1 NAME
682
683
cb7abd7f 684
685IO::Uncompress::Unzip - Read zip files/buffers
686
a02d0f6f 687
688
689=head1 SYNOPSIS
690
691 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
692
693 my $status = unzip $input => $output [,OPTS]
694 or die "unzip failed: $UnzipError\n";
695
696 my $z = new IO::Uncompress::Unzip $input [OPTS]
697 or die "unzip failed: $UnzipError\n";
698
699 $status = $z->read($buffer)
700 $status = $z->read($buffer, $length)
701 $status = $z->read($buffer, $length, $offset)
702 $line = $z->getline()
703 $char = $z->getc()
704 $char = $z->ungetc()
705 $char = $z->opened()
706
707 $status = $z->inflateSync()
708
e7d45986 709 $data = $z->trailingData()
710 $status = $z->nextStream()
a02d0f6f 711 $data = $z->getHeaderInfo()
712 $z->tell()
713 $z->seek($position, $whence)
714 $z->binmode()
715 $z->fileno()
716 $z->eof()
717 $z->close()
718
719 $UnzipError ;
720
721 # IO::File mode
722
723 <$z>
724 read($z, $buffer);
725 read($z, $buffer, $length);
726 read($z, $buffer, $length, $offset);
727 tell($z)
728 seek($z, $position, $whence)
729 binmode($z)
730 fileno($z)
731 eof($z)
732 close($z)
733
734
735=head1 DESCRIPTION
736
737
738
739B<WARNING -- This is a Beta release>.
740
741=over 5
742
743=item * DO NOT use in production code.
744
745=item * The documentation is incomplete in places.
746
747=item * Parts of the interface defined here are tentative.
748
749=item * Please report any problems you find.
750
751=back
752
753
754
755
756
757This module provides a Perl interface that allows the reading of
758zlib files/buffers.
759
760For writing zip files/buffers, see the companion module IO::Compress::Zip.
761
762
763
cb7abd7f 764
765
766
a02d0f6f 767=head1 Functional Interface
768
769A top-level function, C<unzip>, is provided to carry out
770"one-shot" uncompression between buffers and/or files. For finer
771control over the uncompression process, see the L</"OO Interface">
772section.
773
774 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
775
776 unzip $input => $output [,OPTS]
777 or die "unzip failed: $UnzipError\n";
778
779
780
781The functional interface needs Perl5.005 or better.
782
783
784=head2 unzip $input => $output [, OPTS]
785
786
787C<unzip> expects at least two parameters, C<$input> and C<$output>.
788
789=head3 The C<$input> parameter
790
791The parameter, C<$input>, is used to define the source of
792the compressed data.
793
794It can take one of the following forms:
795
796=over 5
797
798=item A filename
799
800If the C<$input> parameter is a simple scalar, it is assumed to be a
801filename. This file will be opened for reading and the input data
802will be read from it.
803
804=item A filehandle
805
806If the C<$input> parameter is a filehandle, the input data will be
807read from it.
808The string '-' can be used as an alias for standard input.
809
810=item A scalar reference
811
812If C<$input> is a scalar reference, the input data will be read
813from C<$$input>.
814
815=item An array reference
816
817If C<$input> is an array reference, each element in the array must be a
818filename.
819
820The input data will be read from each file in turn.
821
822The complete array will be walked to ensure that it only
823contains valid filenames before any data is uncompressed.
824
825
826
827=item An Input FileGlob string
828
829If C<$input> is a string that is delimited by the characters "<" and ">"
830C<unzip> will assume that it is an I<input fileglob string>. The
831input is the list of files that match the fileglob.
832
833If the fileglob does not match any files ...
834
835See L<File::GlobMapper|File::GlobMapper> for more details.
836
837
838=back
839
840If the C<$input> parameter is any other type, C<undef> will be returned.
841
842
843
844=head3 The C<$output> parameter
845
846The parameter C<$output> is used to control the destination of the
847uncompressed data. This parameter can take one of these forms.
848
849=over 5
850
851=item A filename
852
853If the C<$output> parameter is a simple scalar, it is assumed to be a
854filename. This file will be opened for writing and the uncompressed
855data will be written to it.
856
857=item A filehandle
858
859If the C<$output> parameter is a filehandle, the uncompressed data
860will be written to it.
861The string '-' can be used as an alias for standard output.
862
863
864=item A scalar reference
865
866If C<$output> is a scalar reference, the uncompressed data will be
867stored in C<$$output>.
868
869
870
871=item An Array Reference
872
873If C<$output> is an array reference, the uncompressed data will be
874pushed onto the array.
875
876=item An Output FileGlob
877
878If C<$output> is a string that is delimited by the characters "<" and ">"
879C<unzip> will assume that it is an I<output fileglob string>. The
880output is the list of files that match the fileglob.
881
882When C<$output> is an fileglob string, C<$input> must also be a fileglob
883string. Anything else is an error.
884
885=back
886
887If the C<$output> parameter is any other type, C<undef> will be returned.
888
889
890
891=head2 Notes
892
c70c1701 893
894When C<$input> maps to multiple compressed files/buffers and C<$output> is
895a single file/buffer, after uncompression C<$output> will contain a
896concatenation of all the uncompressed data from each of the input
897files/buffers.
898
899
a02d0f6f 900
901
902
903=head2 Optional Parameters
904
905Unless specified below, the optional parameters for C<unzip>,
906C<OPTS>, are the same as those used with the OO interface defined in the
907L</"Constructor Options"> section below.
908
909=over 5
910
e7d45986 911=item C<< AutoClose => 0|1 >>
a02d0f6f 912
913This option applies to any input or output data streams to
914C<unzip> that are filehandles.
915
916If C<AutoClose> is specified, and the value is true, it will result in all
917input and/or output filehandles being closed once C<unzip> has
918completed.
919
920This parameter defaults to 0.
921
922
e7d45986 923=item C<< BinModeOut => 0|1 >>
a02d0f6f 924
925When writing to a file or filehandle, set C<binmode> before writing to the
926file.
927
928Defaults to 0.
929
930
931
932
933
e7d45986 934=item C<< Append => 0|1 >>
a02d0f6f 935
936TODO
937
e7d45986 938=item C<< MultiStream => 0|1 >>
a02d0f6f 939
e7d45986 940If the input file/buffer contains multiple compressed data streams, this
941option will uncompress the whole lot as a single data stream.
a02d0f6f 942
e7d45986 943Defaults to 0.
a02d0f6f 944
945
946
947=back
948
949
950
951
952=head2 Examples
953
954To read the contents of the file C<file1.txt.zip> and write the
955compressed data to the file C<file1.txt>.
956
957 use strict ;
958 use warnings ;
959 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
960
961 my $input = "file1.txt.zip";
962 my $output = "file1.txt";
963 unzip $input => $output
964 or die "unzip failed: $UnzipError\n";
965
966
967To read from an existing Perl filehandle, C<$input>, and write the
968uncompressed data to a buffer, C<$buffer>.
969
970 use strict ;
971 use warnings ;
972 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
973 use IO::File ;
974
975 my $input = new IO::File "<file1.txt.zip"
976 or die "Cannot open 'file1.txt.zip': $!\n" ;
977 my $buffer ;
978 unzip $input => \$buffer
979 or die "unzip failed: $UnzipError\n";
980
981To uncompress all files in the directory "/my/home" that match "*.txt.zip" and store the compressed data in the same directory
982
983 use strict ;
984 use warnings ;
985 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
986
987 unzip '</my/home/*.txt.zip>' => '</my/home/#1.txt>'
988 or die "unzip failed: $UnzipError\n";
989
990and if you want to compress each file one at a time, this will do the trick
991
992 use strict ;
993 use warnings ;
994 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
995
996 for my $input ( glob "/my/home/*.txt.zip" )
997 {
998 my $output = $input;
999 $output =~ s/.zip// ;
1000 unzip $input => $output
1001 or die "Error compressing '$input': $UnzipError\n";
1002 }
1003
1004=head1 OO Interface
1005
1006=head2 Constructor
1007
1008The format of the constructor for IO::Uncompress::Unzip is shown below
1009
1010
1011 my $z = new IO::Uncompress::Unzip $input [OPTS]
1012 or die "IO::Uncompress::Unzip failed: $UnzipError\n";
1013
1014Returns an C<IO::Uncompress::Unzip> object on success and undef on failure.
1015The variable C<$UnzipError> will contain an error message on failure.
1016
1017If you are running Perl 5.005 or better the object, C<$z>, returned from
1018IO::Uncompress::Unzip can be used exactly like an L<IO::File|IO::File> filehandle.
1019This means that all normal input file operations can be carried out with
1020C<$z>. For example, to read a line from a compressed file/buffer you can
1021use either of these forms
1022
1023 $line = $z->getline();
1024 $line = <$z>;
1025
1026The mandatory parameter C<$input> is used to determine the source of the
1027compressed data. This parameter can take one of three forms.
1028
1029=over 5
1030
1031=item A filename
1032
1033If the C<$input> parameter is a scalar, it is assumed to be a filename. This
1034file will be opened for reading and the compressed data will be read from it.
1035
1036=item A filehandle
1037
1038If the C<$input> parameter is a filehandle, the compressed data will be
1039read from it.
1040The string '-' can be used as an alias for standard input.
1041
1042
1043=item A scalar reference
1044
1045If C<$input> is a scalar reference, the compressed data will be read from
1046C<$$output>.
1047
1048=back
1049
1050=head2 Constructor Options
1051
1052
1053The option names defined below are case insensitive and can be optionally
1054prefixed by a '-'. So all of the following are valid
1055
1056 -AutoClose
1057 -autoclose
1058 AUTOCLOSE
1059 autoclose
1060
1061OPTS is a combination of the following options:
1062
1063=over 5
1064
e7d45986 1065=item C<< AutoClose => 0|1 >>
a02d0f6f 1066
1067This option is only valid when the C<$input> parameter is a filehandle. If
1068specified, and the value is true, it will result in the file being closed once
1069either the C<close> method is called or the IO::Uncompress::Unzip object is
1070destroyed.
1071
1072This parameter defaults to 0.
1073
e7d45986 1074=item C<< MultiStream => 0|1 >>
a02d0f6f 1075
1076
1077
e7d45986 1078Treats the complete zip file/buffer as a single compressed data
1079stream. When reading in multi-stream mode each member of the zip
1080file/buffer will be uncompressed in turn until the end of the file/buffer
1081is encountered.
a02d0f6f 1082
1083This parameter defaults to 0.
1084
1085
e7d45986 1086=item C<< Prime => $string >>
a02d0f6f 1087
1088This option will uncompress the contents of C<$string> before processing the
1089input file/buffer.
1090
1091This option can be useful when the compressed data is embedded in another
1092file/data structure and it is not possible to work out where the compressed
1093data begins without having to read the first few bytes. If this is the
1094case, the uncompression can be I<primed> with these bytes using this
1095option.
1096
e7d45986 1097=item C<< Transparent => 0|1 >>
a02d0f6f 1098
1099If this option is set and the input file or buffer is not compressed data,
1100the module will allow reading of it anyway.
1101
1102This option defaults to 1.
1103
e7d45986 1104=item C<< BlockSize => $num >>
a02d0f6f 1105
1106When reading the compressed input data, IO::Uncompress::Unzip will read it in
1107blocks of C<$num> bytes.
1108
1109This option defaults to 4096.
1110
e7d45986 1111=item C<< InputLength => $size >>
a02d0f6f 1112
1113When present this option will limit the number of compressed bytes read
1114from the input file/buffer to C<$size>. This option can be used in the
1115situation where there is useful data directly after the compressed data
1116stream and you know beforehand the exact length of the compressed data
1117stream.
1118
1119This option is mostly used when reading from a filehandle, in which case
1120the file pointer will be left pointing to the first byte directly after the
1121compressed data stream.
1122
1123
1124
1125This option defaults to off.
1126
e7d45986 1127=item C<< Append => 0|1 >>
a02d0f6f 1128
1129This option controls what the C<read> method does with uncompressed data.
1130
1131If set to 1, all uncompressed data will be appended to the output parameter
1132of the C<read> method.
1133
1134If set to 0, the contents of the output parameter of the C<read> method
1135will be overwritten by the uncompressed data.
1136
1137Defaults to 0.
1138
e7d45986 1139=item C<< Strict => 0|1 >>
a02d0f6f 1140
1141
1142
1143This option controls whether the extra checks defined below are used when
1144carrying out the decompression. When Strict is on, the extra tests are
1145carried out, when Strict is off they are not.
1146
1147The default for this option is off.
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162=back
1163
1164=head2 Examples
1165
1166TODO
1167
1168=head1 Methods
1169
1170=head2 read
1171
1172Usage is
1173
1174 $status = $z->read($buffer)
1175
1176Reads a block of compressed data (the size the the compressed block is
1177determined by the C<Buffer> option in the constructor), uncompresses it and
1178writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
1179set in the constructor, the uncompressed data will be appended to the
1180C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
1181
1182Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1183or a negative number on error.
1184
1185=head2 read
1186
1187Usage is
1188
1189 $status = $z->read($buffer, $length)
1190 $status = $z->read($buffer, $length, $offset)
1191
1192 $status = read($z, $buffer, $length)
1193 $status = read($z, $buffer, $length, $offset)
1194
1195Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
1196
1197The main difference between this form of the C<read> method and the
1198previous one, is that this one will attempt to return I<exactly> C<$length>
1199bytes. The only circumstances that this function will not is if end-of-file
1200or an IO error is encountered.
1201
1202Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1203or a negative number on error.
1204
1205
1206=head2 getline
1207
1208Usage is
1209
1210 $line = $z->getline()
1211 $line = <$z>
1212
1213Reads a single line.
1214
1215This method fully supports the use of of the variable C<$/>
1216(or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
1217determine what constitutes an end of line. Both paragraph mode and file
1218slurp mode are supported.
1219
1220
1221=head2 getc
1222
1223Usage is
1224
1225 $char = $z->getc()
1226
1227Read a single character.
1228
1229=head2 ungetc
1230
1231Usage is
1232
1233 $char = $z->ungetc($string)
1234
1235
1236
1237=head2 inflateSync
1238
1239Usage is
1240
1241 $status = $z->inflateSync()
1242
1243TODO
1244
1245
1246=head2 getHeaderInfo
1247
1248Usage is
1249
1250 $hdr = $z->getHeaderInfo();
1251 @hdrs = $z->getHeaderInfo();
1252
1253This method returns either a hash reference (in scalar context) or a list
1254or hash references (in array context) that contains information about each
1255of the header fields in the compressed data stream(s).
1256
1257
1258
1259
1260=head2 tell
1261
1262Usage is
1263
1264 $z->tell()
1265 tell $z
1266
1267Returns the uncompressed file offset.
1268
1269=head2 eof
1270
1271Usage is
1272
1273 $z->eof();
1274 eof($z);
1275
1276
1277
1278Returns true if the end of the compressed input stream has been reached.
1279
1280
1281
1282=head2 seek
1283
1284 $z->seek($position, $whence);
1285 seek($z, $position, $whence);
1286
1287
1288
1289
1290Provides a sub-set of the C<seek> functionality, with the restriction
1291that it is only legal to seek forward in the input file/buffer.
1292It is a fatal error to attempt to seek backward.
1293
1294
1295
1296The C<$whence> parameter takes one the usual values, namely SEEK_SET,
1297SEEK_CUR or SEEK_END.
1298
1299Returns 1 on success, 0 on failure.
1300
1301=head2 binmode
1302
1303Usage is
1304
1305 $z->binmode
1306 binmode $z ;
1307
1308This is a noop provided for completeness.
1309
1310=head2 opened
1311
1312 $z->opened()
1313
1314Returns true if the object currently refers to a opened file/buffer.
1315
1316=head2 autoflush
1317
1318 my $prev = $z->autoflush()
1319 my $prev = $z->autoflush(EXPR)
1320
1321If the C<$z> object is associated with a file or a filehandle, this method
1322returns the current autoflush setting for the underlying filehandle. If
1323C<EXPR> is present, and is non-zero, it will enable flushing after every
1324write/print operation.
1325
1326If C<$z> is associated with a buffer, this method has no effect and always
1327returns C<undef>.
1328
1329B<Note> that the special variable C<$|> B<cannot> be used to set or
1330retrieve the autoflush setting.
1331
1332=head2 input_line_number
1333
1334 $z->input_line_number()
1335 $z->input_line_number(EXPR)
1336
1337
1338
1339Returns the current uncompressed line number. If C<EXPR> is present it has
1340the effect of setting the line number. Note that setting the line number
1341does not change the current position within the file/buffer being read.
1342
1343The contents of C<$/> are used to to determine what constitutes a line
1344terminator.
1345
1346
1347
1348=head2 fileno
1349
1350 $z->fileno()
1351 fileno($z)
1352
1353If the C<$z> object is associated with a file or a filehandle, this method
1354will return the underlying file descriptor.
1355
1356If the C<$z> object is is associated with a buffer, this method will
1357return undef.
1358
1359=head2 close
1360
1361 $z->close() ;
1362 close $z ;
1363
1364
1365
1366Closes the output file/buffer.
1367
1368
1369
1370For most versions of Perl this method will be automatically invoked if
1371the IO::Uncompress::Unzip object is destroyed (either explicitly or by the
1372variable with the reference to the object going out of scope). The
1373exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1374these cases, the C<close> method will be called automatically, but
1375not until global destruction of all live objects when the program is
1376terminating.
1377
1378Therefore, if you want your scripts to be able to run on all versions
1379of Perl, you should call C<close> explicitly and not rely on automatic
1380closing.
1381
1382Returns true on success, otherwise 0.
1383
1384If the C<AutoClose> option has been enabled when the IO::Uncompress::Unzip
1385object was created, and the object is associated with a file, the
1386underlying file will also be closed.
1387
1388
1389
1390
e7d45986 1391=head2 nextStream
1392
1393Usage is
1394
1395 my $status = $z->nextStream();
1396
1397Skips to the next compressed data stream in the input file/buffer. If a new
1398compressed data stream is found, the eof marker will be cleared, C<$.> will
1399be reset to 0.
1400
1401Returns 1 if a new stream was found, 0 if none was found, and -1 if an
1402error was encountered.
1403
1404=head2 trailingData
1405
1406Usage is
1407
1408 my $data = $z->trailingData();
1409
1410Returns any data that
1411
a02d0f6f 1412=head1 Importing
1413
1414No symbolic constants are required by this IO::Uncompress::Unzip at present.
1415
1416=over 5
1417
1418=item :all
1419
1420Imports C<unzip> and C<$UnzipError>.
1421Same as doing this
1422
1423 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1424
1425=back
1426
1427=head1 EXAMPLES
1428
1429
1430
1431
1432=head1 SEE ALSO
1433
1434L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
1435
1436L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
1437
1438L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1439L<Archive::Tar|Archive::Tar>,
1440L<IO::Zlib|IO::Zlib>
1441
1442
1443For RFC 1950, 1951 and 1952 see
1444F<http://www.faqs.org/rfcs/rfc1950.html>,
1445F<http://www.faqs.org/rfcs/rfc1951.html> and
1446F<http://www.faqs.org/rfcs/rfc1952.html>
1447
1448The I<zlib> compression library was written by Jean-loup Gailly
1449F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
1450
1451The primary site for the I<zlib> compression library is
1452F<http://www.zlib.org>.
1453
1454The primary site for gzip is F<http://www.gzip.org>.
1455
1456
1457
1458
a02d0f6f 1459=head1 AUTHOR
1460
cb7abd7f 1461This module was written by Paul Marquess, F<pmqs@cpan.org>.
a02d0f6f 1462
1463
1464
1465=head1 MODIFICATION HISTORY
1466
1467See the Changes file.
1468
1469=head1 COPYRIGHT AND LICENSE
a02d0f6f 1470
1471Copyright (c) 2005-2006 Paul Marquess. All rights reserved.
1472
1473This program is free software; you can redistribute it and/or
1474modify it under the same terms as Perl itself.
1475