Commit | Line | Data |
f4c6fd49 |
1 | # File : Zlib.pm |
2 | # Author : Paul Marquess |
5e282033 |
3 | # Created : 7 September 2005 |
4 | # Version : 1.39 |
f4c6fd49 |
5 | # |
6 | # Copyright (c) 1995-2005 Paul Marquess. All rights reserved. |
7 | # This program is free software; you can redistribute it and/or |
8 | # modify it under the same terms as Perl itself. |
9 | # |
10 | |
11 | package Compress::Zlib; |
12 | |
13 | require 5.004 ; |
14 | require Exporter; |
f4c6fd49 |
15 | use AutoLoader; |
16 | use Carp ; |
17 | use IO::Handle ; |
18 | |
19 | use strict ; |
8aa25532 |
20 | use warnings ; |
21 | our ($VERSION, @ISA, @EXPORT, $AUTOLOAD); |
22 | our ($deflateDefault, $deflateParamsDefault, $inflateDefault); |
f4c6fd49 |
23 | |
5e282033 |
24 | $VERSION = "1.39" ; |
f4c6fd49 |
25 | |
06edba15 |
26 | @ISA = qw(Exporter); |
f4c6fd49 |
27 | # Items to export into callers namespace by default. Note: do not export |
28 | # names by default without a very good reason. Use EXPORT_OK instead. |
29 | # Do not simply export all your public functions/methods/constants. |
30 | @EXPORT = qw( |
31 | deflateInit |
32 | inflateInit |
33 | |
34 | compress |
35 | uncompress |
36 | |
37 | gzip gunzip |
38 | |
39 | gzopen |
40 | $gzerrno |
41 | |
42 | adler32 |
43 | crc32 |
44 | |
45 | ZLIB_VERSION |
06edba15 |
46 | ZLIB_VERNUM |
f4c6fd49 |
47 | |
48 | DEF_WBITS |
49 | OS_CODE |
50 | |
51 | MAX_MEM_LEVEL |
52 | MAX_WBITS |
53 | |
54 | Z_ASCII |
55 | Z_BEST_COMPRESSION |
56 | Z_BEST_SPEED |
57 | Z_BINARY |
58 | Z_BUF_ERROR |
59 | Z_DATA_ERROR |
60 | Z_DEFAULT_COMPRESSION |
61 | Z_DEFAULT_STRATEGY |
62 | Z_DEFLATED |
63 | Z_ERRNO |
64 | Z_FILTERED |
65 | Z_FINISH |
66 | Z_FULL_FLUSH |
67 | Z_HUFFMAN_ONLY |
68 | Z_MEM_ERROR |
69 | Z_NEED_DICT |
70 | Z_NO_COMPRESSION |
71 | Z_NO_FLUSH |
72 | Z_NULL |
73 | Z_OK |
74 | Z_PARTIAL_FLUSH |
75 | Z_STREAM_END |
76 | Z_STREAM_ERROR |
77 | Z_SYNC_FLUSH |
78 | Z_UNKNOWN |
79 | Z_VERSION_ERROR |
80 | ); |
81 | |
82 | |
83 | |
84 | sub AUTOLOAD { |
85 | my($constname); |
86 | ($constname = $AUTOLOAD) =~ s/.*:://; |
87 | my ($error, $val) = constant($constname); |
88 | Carp::croak $error if $error; |
89 | no strict 'refs'; |
90 | *{$AUTOLOAD} = sub { $val }; |
91 | goto &{$AUTOLOAD}; |
92 | } |
93 | |
06edba15 |
94 | eval { |
95 | require XSLoader; |
96 | XSLoader::load('Compress::Zlib', $VERSION); |
97 | } or do { |
98 | require DynaLoader; |
99 | local @ISA = qw(DynaLoader); |
100 | bootstrap Compress::Zlib $VERSION ; |
101 | } ; |
f4c6fd49 |
102 | |
103 | # Preloaded methods go here. |
104 | |
105 | sub isaFilehandle($) |
106 | { |
107 | my $fh = shift ; |
108 | |
109 | return ((UNIVERSAL::isa($fh,'GLOB') or UNIVERSAL::isa(\$fh,'GLOB')) |
110 | and defined fileno($fh) ) |
111 | |
112 | } |
113 | |
114 | sub isaFilename($) |
115 | { |
116 | my $name = shift ; |
117 | |
118 | return (! ref $name and UNIVERSAL::isa(\$name, 'SCALAR')) ; |
119 | } |
120 | |
121 | sub gzopen($$) |
122 | { |
123 | my ($file, $mode) = @_ ; |
124 | |
125 | if (isaFilehandle $file) { |
126 | IO::Handle::flush($file) ; |
127 | my $offset = tell($file) ; |
128 | gzdopen_(fileno($file), $mode, $offset) ; |
129 | } |
130 | elsif (isaFilename $file) { |
131 | gzopen_($file, $mode) |
132 | } |
133 | else { |
134 | croak "gzopen: file parameter is not a filehandle or filename" |
135 | } |
136 | } |
137 | |
138 | sub ParseParameters($@) |
139 | { |
140 | my ($default, @rest) = @_ ; |
141 | my (%got) = %$default ; |
142 | my (@Bad) ; |
143 | my ($key, $value) ; |
144 | my $sub = (caller(1))[3] ; |
145 | my %options = () ; |
146 | |
147 | # allow the options to be passed as a hash reference or |
148 | # as the complete hash. |
149 | if (@rest == 1) { |
150 | |
151 | croak "$sub: parameter is not a reference to a hash" |
152 | if ref $rest[0] ne "HASH" ; |
153 | |
154 | %options = %{ $rest[0] } ; |
155 | } |
156 | elsif (@rest >= 2) { |
157 | my $count = @rest; |
158 | croak "$sub: Expected even number of parameters, got $count" |
159 | if @rest % 2 != 0 ; |
160 | %options = @rest ; |
161 | } |
162 | |
163 | while (($key, $value) = each %options) |
164 | { |
165 | $key =~ s/^-// ; |
166 | |
167 | if (exists $default->{$key}) |
168 | { $got{$key} = $value } |
169 | else |
170 | { push (@Bad, $key) } |
171 | } |
172 | |
173 | if (@Bad) { |
174 | my ($bad) = join(", ", @Bad) ; |
175 | croak "unknown key value(s) @Bad" ; |
176 | } |
177 | |
178 | return \%got ; |
179 | } |
180 | |
181 | $deflateDefault = { |
182 | 'Level' => Z_DEFAULT_COMPRESSION(), |
183 | 'Method' => Z_DEFLATED(), |
184 | 'WindowBits' => MAX_WBITS(), |
185 | 'MemLevel' => MAX_MEM_LEVEL(), |
186 | 'Strategy' => Z_DEFAULT_STRATEGY(), |
187 | 'Bufsize' => 4096, |
188 | 'Dictionary' => "", |
189 | } ; |
190 | |
191 | $deflateParamsDefault = { |
192 | 'Level' => undef, |
193 | 'Strategy' => undef, |
194 | 'Bufsize' => undef, |
195 | } ; |
196 | |
197 | $inflateDefault = { |
198 | 'WindowBits' => MAX_WBITS(), |
199 | 'Bufsize' => 4096, |
200 | 'Dictionary' => "", |
201 | } ; |
202 | |
203 | |
204 | sub deflateInit(@) |
205 | { |
206 | my ($got) = ParseParameters($deflateDefault, @_) ; |
8aa25532 |
207 | no warnings; |
f4c6fd49 |
208 | croak "deflateInit: Bufsize must be >= 1, you specified $got->{Bufsize}" |
209 | unless $got->{Bufsize} >= 1; |
210 | _deflateInit($got->{Level}, $got->{Method}, $got->{WindowBits}, |
211 | $got->{MemLevel}, $got->{Strategy}, $got->{Bufsize}, |
212 | $got->{Dictionary}) ; |
213 | |
214 | } |
215 | |
216 | sub inflateInit(@) |
217 | { |
218 | my ($got) = ParseParameters($inflateDefault, @_) ; |
8aa25532 |
219 | no warnings; |
f4c6fd49 |
220 | croak "inflateInit: Bufsize must be >= 1, you specified $got->{Bufsize}" |
221 | unless $got->{Bufsize} >= 1; |
222 | _inflateInit($got->{WindowBits}, $got->{Bufsize}, $got->{Dictionary}); |
223 | |
224 | } |
225 | |
226 | sub Compress::Zlib::deflateStream::deflateParams |
227 | { |
228 | my $self = shift ; |
229 | my ($got) = ParseParameters($deflateParamsDefault, @_) ; |
230 | croak "deflateParams needs Level and/or Strategy" |
231 | unless defined $got->{Level} || defined $got->{Strategy}; |
8aa25532 |
232 | no warnings; |
f4c6fd49 |
233 | croak "deflateParams: Bufsize must be >= 1, you specified $got->{Bufsize}" |
234 | unless !defined $got->{Bufsize} || $got->{Bufsize} >= 1; |
235 | |
236 | my $flags = 0; |
237 | if (defined $got->{Level}) |
238 | { $flags |= 1 } |
239 | else |
240 | { $got->{Level} = 0 } |
241 | |
242 | if (defined $got->{Strategy}) |
243 | { $flags |= 2 } |
244 | else |
245 | { $got->{Strategy} = 0 } |
246 | |
247 | $got->{Bufsize} = 0 |
248 | if !defined $got->{Bufsize}; |
249 | |
250 | $self->_deflateParams($flags, $got->{Level}, $got->{Strategy}, |
251 | $got->{Bufsize}); |
252 | |
253 | } |
254 | |
255 | sub compress($;$) |
256 | { |
257 | my ($x, $output, $out, $err, $in) ; |
258 | |
259 | if (ref $_[0] ) { |
260 | $in = $_[0] ; |
261 | croak "not a scalar reference" unless ref $in eq 'SCALAR' ; |
262 | } |
263 | else { |
264 | $in = \$_[0] ; |
265 | } |
266 | |
267 | my $level = (@_ == 2 ? $_[1] : Z_DEFAULT_COMPRESSION() ); |
268 | |
269 | |
270 | if ( (($x, $err) = deflateInit(Level => $level))[1] == Z_OK()) { |
271 | |
272 | ($output, $err) = $x->deflate($in) ; |
273 | return undef unless $err == Z_OK() ; |
274 | |
275 | ($out, $err) = $x->flush() ; |
276 | return undef unless $err == Z_OK() ; |
277 | |
278 | return ($output . $out) ; |
279 | |
280 | } |
281 | |
282 | return undef ; |
283 | } |
284 | |
285 | |
286 | sub uncompress($) |
287 | { |
288 | my ($x, $output, $err, $in) ; |
289 | |
290 | if (ref $_[0] ) { |
291 | $in = $_[0] ; |
292 | croak "not a scalar reference" unless ref $in eq 'SCALAR' ; |
293 | } |
294 | else { |
295 | $in = \$_[0] ; |
296 | } |
297 | |
298 | if ( (($x, $err) = inflateInit())[1] == Z_OK()) { |
299 | |
300 | ($output, $err) = $x->__unc_inflate($in) ; |
301 | return undef unless $err == Z_STREAM_END() ; |
302 | |
303 | return $output ; |
304 | } |
305 | |
306 | return undef ; |
307 | } |
308 | |
309 | |
310 | # Constants |
311 | use constant MAGIC1 => 0x1f ; |
312 | use constant MAGIC2 => 0x8b ; |
313 | use constant OSCODE => 3 ; |
314 | |
315 | use constant FTEXT => 1 ; |
316 | use constant FHCRC => 2 ; |
317 | use constant FEXTRA => 4 ; |
318 | use constant FNAME => 8 ; |
319 | use constant FCOMMENT => 16 ; |
320 | use constant NULL => pack("C", 0) ; |
321 | use constant RESERVED => 0xE0 ; |
322 | |
323 | use constant MIN_HDR_SIZE => 10 ; # minimum gzip header size |
324 | |
325 | sub memGzip($) |
326 | { |
327 | my $x = deflateInit( |
328 | -Level => Z_BEST_COMPRESSION(), |
329 | -WindowBits => - MAX_WBITS(), |
330 | ) |
331 | or return undef ; |
332 | |
333 | # write a minimal gzip header |
334 | my(@m); |
335 | push @m, pack("C" . MIN_HDR_SIZE, |
336 | MAGIC1, MAGIC2, Z_DEFLATED(), 0,0,0,0,0,0, OSCODE) ; |
337 | |
338 | # if the deflation buffer isn't a reference, make it one |
339 | my $string = (ref $_[0] ? $_[0] : \$_[0]) ; |
340 | |
341 | my ($output, $status) = $x->deflate($string) ; |
342 | push @m, $output ; |
343 | $status == Z_OK() |
344 | or return undef ; |
345 | |
346 | ($output, $status) = $x->flush() ; |
347 | push @m, $output ; |
348 | $status == Z_OK() |
349 | or return undef ; |
350 | |
351 | push @m, pack("V V", crc32($string), $x->total_in()); |
352 | |
353 | return join "", @m; |
354 | } |
355 | |
356 | sub _removeGzipHeader($) |
357 | { |
358 | my $string = shift ; |
359 | |
360 | return Z_DATA_ERROR() |
361 | if length($$string) < MIN_HDR_SIZE ; |
362 | |
363 | my ($magic1, $magic2, $method, $flags, $time, $xflags, $oscode) = |
364 | unpack ('CCCCVCC', $$string); |
365 | |
366 | return Z_DATA_ERROR() |
367 | unless $magic1 == MAGIC1 and $magic2 == MAGIC2 and |
368 | $method == Z_DEFLATED() and !($flags & RESERVED()) ; |
369 | substr($$string, 0, MIN_HDR_SIZE) = '' ; |
370 | |
371 | # skip extra field |
372 | if ($flags & FEXTRA) |
373 | { |
374 | return Z_DATA_ERROR() |
375 | if length($$string) < 2 ; |
376 | |
377 | my ($extra_len) = unpack ('v', $$string); |
378 | $extra_len += 2; |
379 | return Z_DATA_ERROR() |
380 | if length($$string) < $extra_len ; |
381 | |
382 | substr($$string, 0, $extra_len) = ''; |
383 | } |
384 | |
385 | # skip orig name |
386 | if ($flags & FNAME) |
387 | { |
388 | my $name_end = index ($$string, NULL); |
389 | return Z_DATA_ERROR() |
390 | if $name_end == -1 ; |
391 | substr($$string, 0, $name_end + 1) = ''; |
392 | } |
393 | |
394 | # skip comment |
395 | if ($flags & FCOMMENT) |
396 | { |
397 | my $comment_end = index ($$string, NULL); |
398 | return Z_DATA_ERROR() |
399 | if $comment_end == -1 ; |
400 | substr($$string, 0, $comment_end + 1) = ''; |
401 | } |
402 | |
403 | # skip header crc |
404 | if ($flags & FHCRC) |
405 | { |
406 | return Z_DATA_ERROR() |
407 | if length ($$string) < 2 ; |
408 | substr($$string, 0, 2) = ''; |
409 | } |
410 | |
411 | return Z_OK(); |
412 | } |
413 | |
414 | |
415 | sub memGunzip($) |
416 | { |
417 | # if the buffer isn't a reference, make it one |
418 | my $string = (ref $_[0] ? $_[0] : \$_[0]); |
419 | |
420 | _removeGzipHeader($string) == Z_OK() |
421 | or return undef; |
422 | |
423 | my $bufsize = length $$string > 4096 ? length $$string : 4096 ; |
424 | my $x = inflateInit( -WindowBits => - MAX_WBITS(), |
425 | -Bufsize => $bufsize) |
426 | or return undef; |
427 | my ($output, $status) = $x->inflate($string); |
428 | return undef |
429 | unless $status == Z_STREAM_END(); |
430 | |
431 | if (length $$string >= 8) |
432 | { |
433 | my ($crc, $len) = unpack ("VV", substr($$string, 0, 8)); |
434 | substr($$string, 0, 8) = ''; |
435 | return undef |
436 | unless $len == length($output) and |
437 | $crc == crc32($output); |
438 | } |
439 | else |
440 | { |
441 | $$string = ''; |
442 | } |
443 | |
444 | return $output; |
445 | } |
446 | |
447 | # Autoload methods go after __END__, and are processed by the autosplit program. |
448 | |
449 | 1; |
450 | __END__ |
451 | |
f4c6fd49 |
452 | |
453 | =head1 NAME |
454 | |
455 | Compress::Zlib - Interface to zlib compression library |
456 | |
457 | =head1 SYNOPSIS |
458 | |
459 | use Compress::Zlib ; |
460 | |
461 | ($d, $status) = deflateInit( [OPT] ) ; |
462 | ($out, $status) = $d->deflate($buffer) ; |
463 | $status = $d->deflateParams([OPT]) ; |
464 | ($out, $status) = $d->flush() ; |
465 | $d->dict_adler() ; |
466 | $d->total_in() ; |
467 | $d->total_out() ; |
468 | $d->msg() ; |
469 | |
470 | ($i, $status) = inflateInit( [OPT] ) ; |
471 | ($out, $status) = $i->inflate($buffer) ; |
472 | $status = $i->inflateSync($buffer) ; |
473 | $i->dict_adler() ; |
474 | $i->total_in() ; |
475 | $i->total_out() ; |
476 | $i->msg() ; |
477 | |
478 | $dest = compress($source, [$level]) ; |
479 | $dest = uncompress($source) ; |
480 | |
481 | $gz = gzopen($filename or filehandle, $mode) ; |
482 | $bytesread = $gz->gzread($buffer [,$size]) ; |
483 | $bytesread = $gz->gzreadline($line) ; |
484 | $byteswritten = $gz->gzwrite($buffer) ; |
485 | $status = $gz->gzflush($flush) ; |
486 | $status = $gz->gzclose() ; |
487 | $status = $gz->gzeof() ; |
488 | $status = $gz->gzsetparams($level, $strategy) ; |
489 | $errstring = $gz->gzerror() ; |
490 | $gzerrno |
491 | |
492 | $dest = Compress::Zlib::memGzip($buffer) ; |
493 | $dest = Compress::Zlib::memGunzip($buffer) ; |
494 | |
495 | $crc = adler32($buffer [,$crc]) ; |
496 | $crc = crc32($buffer [,$crc]) ; |
497 | |
498 | ZLIB_VERSION |
499 | |
500 | =head1 DESCRIPTION |
501 | |
502 | The I<Compress::Zlib> module provides a Perl interface to the I<zlib> |
503 | compression library (see L</AUTHOR> for details about where to get |
504 | I<zlib>). Most of the functionality provided by I<zlib> is available |
505 | in I<Compress::Zlib>. |
506 | |
507 | The module can be split into two general areas of functionality, namely |
508 | in-memory compression/decompression and read/write access to I<gzip> |
509 | files. Each of these areas will be discussed separately below. |
510 | |
511 | =head1 DEFLATE |
512 | |
513 | The interface I<Compress::Zlib> provides to the in-memory I<deflate> |
514 | (and I<inflate>) functions has been modified to fit into a Perl model. |
515 | |
516 | The main difference is that for both inflation and deflation, the Perl |
517 | interface will I<always> consume the complete input buffer before |
518 | returning. Also the output buffer returned will be automatically grown |
519 | to fit the amount of output available. |
520 | |
521 | Here is a definition of the interface available: |
522 | |
523 | |
524 | =head2 B<($d, $status) = deflateInit( [OPT] )> |
525 | |
526 | Initialises a deflation stream. |
527 | |
528 | It combines the features of the I<zlib> functions B<deflateInit>, |
529 | B<deflateInit2> and B<deflateSetDictionary>. |
530 | |
531 | If successful, it will return the initialised deflation stream, B<$d> |
532 | and B<$status> of C<Z_OK> in a list context. In scalar context it |
533 | returns the deflation stream, B<$d>, only. |
534 | |
535 | If not successful, the returned deflation stream (B<$d>) will be |
536 | I<undef> and B<$status> will hold the exact I<zlib> error code. |
537 | |
538 | The function optionally takes a number of named options specified as |
539 | C<-Name=E<gt>value> pairs. This allows individual options to be |
540 | tailored without having to specify them all in the parameter list. |
541 | |
542 | For backward compatibility, it is also possible to pass the parameters |
543 | as a reference to a hash containing the name=>value pairs. |
544 | |
545 | The function takes one optional parameter, a reference to a hash. The |
546 | contents of the hash allow the deflation interface to be tailored. |
547 | |
548 | Here is a list of the valid options: |
549 | |
550 | =over 5 |
551 | |
552 | =item B<-Level> |
553 | |
554 | Defines the compression level. Valid values are 0 through 9, |
555 | C<Z_NO_COMPRESSION>, C<Z_BEST_SPEED>, C<Z_BEST_COMPRESSION>, and |
556 | C<Z_DEFAULT_COMPRESSION>. |
557 | |
558 | The default is C<-Level =E<gt>Z_DEFAULT_COMPRESSION>. |
559 | |
560 | =item B<-Method> |
561 | |
562 | Defines the compression method. The only valid value at present (and |
563 | the default) is C<-Method =E<gt>Z_DEFLATED>. |
564 | |
565 | =item B<-WindowBits> |
566 | |
567 | For a definition of the meaning and valid values for B<WindowBits> |
568 | refer to the I<zlib> documentation for I<deflateInit2>. |
569 | |
570 | Defaults to C<-WindowBits =E<gt>MAX_WBITS>. |
571 | |
572 | =item B<-MemLevel> |
573 | |
574 | For a definition of the meaning and valid values for B<MemLevel> |
575 | refer to the I<zlib> documentation for I<deflateInit2>. |
576 | |
577 | Defaults to C<-MemLevel =E<gt>MAX_MEM_LEVEL>. |
578 | |
579 | =item B<-Strategy> |
580 | |
581 | Defines the strategy used to tune the compression. The valid values are |
582 | C<Z_DEFAULT_STRATEGY>, C<Z_FILTERED> and C<Z_HUFFMAN_ONLY>. |
583 | |
584 | The default is C<-Strategy =E<gt>Z_DEFAULT_STRATEGY>. |
585 | |
586 | =item B<-Dictionary> |
587 | |
588 | When a dictionary is specified I<Compress::Zlib> will automatically |
589 | call B<deflateSetDictionary> directly after calling B<deflateInit>. The |
590 | Adler32 value for the dictionary can be obtained by calling the method |
591 | C<$d->dict_adler()>. |
592 | |
593 | The default is no dictionary. |
594 | |
595 | =item B<-Bufsize> |
596 | |
597 | Sets the initial size for the deflation buffer. If the buffer has to be |
598 | reallocated to increase the size, it will grow in increments of |
599 | B<Bufsize>. |
600 | |
601 | The default is 4096. |
602 | |
603 | =back |
604 | |
605 | Here is an example of using the B<deflateInit> optional parameter list |
606 | to override the default buffer size and compression level. All other |
607 | options will take their default values. |
608 | |
609 | deflateInit( -Bufsize => 300, |
610 | -Level => Z_BEST_SPEED ) ; |
611 | |
612 | |
613 | =head2 B<($out, $status) = $d-E<gt>deflate($buffer)> |
614 | |
615 | |
616 | Deflates the contents of B<$buffer>. The buffer can either be a scalar |
617 | or a scalar reference. When finished, B<$buffer> will be |
618 | completely processed (assuming there were no errors). If the deflation |
619 | was successful it returns the deflated output, B<$out>, and a status |
620 | value, B<$status>, of C<Z_OK>. |
621 | |
622 | On error, B<$out> will be I<undef> and B<$status> will contain the |
623 | I<zlib> error code. |
624 | |
625 | In a scalar context B<deflate> will return B<$out> only. |
626 | |
627 | As with the I<deflate> function in I<zlib>, it is not necessarily the |
628 | case that any output will be produced by this method. So don't rely on |
629 | the fact that B<$out> is empty for an error test. |
630 | |
631 | |
632 | =head2 B<($out, $status) = $d-E<gt>flush([flush_type])> |
633 | |
634 | Typically used to finish the deflation. Any pending output will be |
635 | returned via B<$out>. |
636 | B<$status> will have a value C<Z_OK> if successful. |
637 | |
638 | In a scalar context B<flush> will return B<$out> only. |
639 | |
640 | Note that flushing can seriously degrade the compression ratio, so it |
641 | should only be used to terminate a decompression (using C<Z_FINISH>) or |
642 | when you want to create a I<full flush point> (using C<Z_FULL_FLUSH>). |
643 | |
644 | By default the C<flush_type> used is C<Z_FINISH>. Other valid values |
645 | for C<flush_type> are C<Z_NO_FLUSH>, C<Z_PARTIAL_FLUSH>, C<Z_SYNC_FLUSH> |
646 | and C<Z_FULL_FLUSH>. It is strongly recommended that you only set the |
647 | C<flush_type> parameter if you fully understand the implications of |
648 | what it does. See the C<zlib> documentation for details. |
649 | |
650 | =head2 B<$status = $d-E<gt>deflateParams([OPT])> |
651 | |
652 | Change settings for the deflate stream C<$d>. |
653 | |
654 | The list of the valid options is shown below. Options not specified |
655 | will remain unchanged. |
656 | |
657 | =over 5 |
658 | |
659 | =item B<-Level> |
660 | |
661 | Defines the compression level. Valid values are 0 through 9, |
662 | C<Z_NO_COMPRESSION>, C<Z_BEST_SPEED>, C<Z_BEST_COMPRESSION>, and |
663 | C<Z_DEFAULT_COMPRESSION>. |
664 | |
665 | =item B<-Strategy> |
666 | |
667 | Defines the strategy used to tune the compression. The valid values are |
668 | C<Z_DEFAULT_STRATEGY>, C<Z_FILTERED> and C<Z_HUFFMAN_ONLY>. |
669 | |
670 | =back |
671 | |
672 | =head2 B<$d-E<gt>dict_adler()> |
673 | |
674 | Returns the adler32 value for the dictionary. |
675 | |
676 | =head2 B<$d-E<gt>msg()> |
677 | |
678 | Returns the last error message generated by zlib. |
679 | |
680 | =head2 B<$d-E<gt>total_in()> |
681 | |
682 | Returns the total number of bytes uncompressed bytes input to deflate. |
683 | |
684 | =head2 B<$d-E<gt>total_out()> |
685 | |
686 | Returns the total number of compressed bytes output from deflate. |
687 | |
688 | =head2 Example |
689 | |
690 | |
691 | Here is a trivial example of using B<deflate>. It simply reads standard |
692 | input, deflates it and writes it to standard output. |
693 | |
694 | use strict ; |
695 | use warnings ; |
696 | |
697 | use Compress::Zlib ; |
698 | |
699 | binmode STDIN; |
700 | binmode STDOUT; |
701 | my $x = deflateInit() |
702 | or die "Cannot create a deflation stream\n" ; |
703 | |
704 | my ($output, $status) ; |
705 | while (<>) |
706 | { |
707 | ($output, $status) = $x->deflate($_) ; |
708 | |
709 | $status == Z_OK |
710 | or die "deflation failed\n" ; |
711 | |
712 | print $output ; |
713 | } |
714 | |
715 | ($output, $status) = $x->flush() ; |
716 | |
717 | $status == Z_OK |
718 | or die "deflation failed\n" ; |
719 | |
720 | print $output ; |
721 | |
722 | =head1 INFLATE |
723 | |
724 | Here is a definition of the interface: |
725 | |
726 | |
727 | =head2 B<($i, $status) = inflateInit()> |
728 | |
729 | Initialises an inflation stream. |
730 | |
731 | In a list context it returns the inflation stream, B<$i>, and the |
732 | I<zlib> status code (B<$status>). In a scalar context it returns the |
733 | inflation stream only. |
734 | |
735 | If successful, B<$i> will hold the inflation stream and B<$status> will |
736 | be C<Z_OK>. |
737 | |
738 | If not successful, B<$i> will be I<undef> and B<$status> will hold the |
739 | I<zlib> error code. |
740 | |
741 | The function optionally takes a number of named options specified as |
742 | C<-Name=E<gt>value> pairs. This allows individual options to be |
743 | tailored without having to specify them all in the parameter list. |
744 | |
745 | For backward compatibility, it is also possible to pass the parameters |
746 | as a reference to a hash containing the name=>value pairs. |
747 | |
748 | The function takes one optional parameter, a reference to a hash. The |
749 | contents of the hash allow the deflation interface to be tailored. |
750 | |
751 | Here is a list of the valid options: |
752 | |
753 | =over 5 |
754 | |
755 | =item B<-WindowBits> |
756 | |
757 | For a definition of the meaning and valid values for B<WindowBits> |
758 | refer to the I<zlib> documentation for I<inflateInit2>. |
759 | |
760 | Defaults to C<-WindowBits =E<gt>MAX_WBITS>. |
761 | |
762 | =item B<-Bufsize> |
763 | |
764 | Sets the initial size for the inflation buffer. If the buffer has to be |
765 | reallocated to increase the size, it will grow in increments of |
766 | B<Bufsize>. |
767 | |
768 | Default is 4096. |
769 | |
770 | =item B<-Dictionary> |
771 | |
772 | The default is no dictionary. |
773 | |
774 | =back |
775 | |
776 | Here is an example of using the B<inflateInit> optional parameter to |
777 | override the default buffer size. |
778 | |
779 | inflateInit( -Bufsize => 300 ) ; |
780 | |
781 | =head2 B<($out, $status) = $i-E<gt>inflate($buffer)> |
782 | |
783 | Inflates the complete contents of B<$buffer>. The buffer can either be |
784 | a scalar or a scalar reference. |
785 | |
786 | Returns C<Z_OK> if successful and C<Z_STREAM_END> if the end of the |
787 | compressed data has been successfully reached. |
788 | If not successful, B<$out> will be I<undef> and B<$status> will hold |
789 | the I<zlib> error code. |
790 | |
791 | The C<$buffer> parameter is modified by C<inflate>. On completion it |
792 | will contain what remains of the input buffer after inflation. This |
793 | means that C<$buffer> will be an empty string when the return status is |
794 | C<Z_OK>. When the return status is C<Z_STREAM_END> the C<$buffer> |
795 | parameter will contains what (if anything) was stored in the input |
796 | buffer after the deflated data stream. |
797 | |
798 | This feature is useful when processing a file format that encapsulates |
799 | a compressed data stream (e.g. gzip, zip). |
800 | |
801 | =head2 B<$status = $i-E<gt>inflateSync($buffer)> |
802 | |
803 | Scans C<$buffer> until it reaches either a I<full flush point> or the |
804 | end of the buffer. |
805 | |
806 | If a I<full flush point> is found, C<Z_OK> is returned and C<$buffer> |
807 | will be have all data up to the flush point removed. This can then be |
808 | passed to the C<deflate> method. |
809 | |
810 | Any other return code means that a flush point was not found. If more |
811 | data is available, C<inflateSync> can be called repeatedly with more |
812 | compressed data until the flush point is found. |
813 | |
814 | |
815 | =head2 B<$i-E<gt>dict_adler()> |
816 | |
817 | Returns the adler32 value for the dictionary. |
818 | |
819 | =head2 B<$i-E<gt>msg()> |
820 | |
821 | Returns the last error message generated by zlib. |
822 | |
823 | =head2 B<$i-E<gt>total_in()> |
824 | |
825 | Returns the total number of bytes compressed bytes input to inflate. |
826 | |
827 | =head2 B<$i-E<gt>total_out()> |
828 | |
829 | Returns the total number of uncompressed bytes output from inflate. |
830 | |
831 | =head2 Example |
832 | |
833 | Here is an example of using B<inflate>. |
834 | |
835 | use strict ; |
836 | use warnings ; |
837 | |
838 | use Compress::Zlib ; |
839 | |
840 | my $x = inflateInit() |
841 | or die "Cannot create a inflation stream\n" ; |
842 | |
843 | my $input = '' ; |
844 | binmode STDIN; |
845 | binmode STDOUT; |
846 | |
847 | my ($output, $status) ; |
848 | while (read(STDIN, $input, 4096)) |
849 | { |
850 | ($output, $status) = $x->inflate(\$input) ; |
851 | |
852 | print $output |
853 | if $status == Z_OK or $status == Z_STREAM_END ; |
854 | |
855 | last if $status != Z_OK ; |
856 | } |
857 | |
858 | die "inflation failed\n" |
859 | unless $status == Z_STREAM_END ; |
860 | |
861 | =head1 COMPRESS/UNCOMPRESS |
862 | |
863 | Two high-level functions are provided by I<zlib> to perform in-memory |
bdd93743 |
864 | compression/uncompression of RFC1950 data streams. They are called |
865 | B<compress> and B<uncompress>. |
866 | |
867 | The two Perl subs defined below provide the equivalent |
868 | functionality. |
f4c6fd49 |
869 | |
870 | =over 5 |
871 | |
872 | =item B<$dest = compress($source [, $level] ) ;> |
873 | |
874 | Compresses B<$source>. If successful it returns the |
875 | compressed data. Otherwise it returns I<undef>. |
876 | |
877 | The source buffer can either be a scalar or a scalar reference. |
878 | |
5993747c |
879 | The B<$level> paramter defines the compression level. Valid values are |
f4c6fd49 |
880 | 0 through 9, C<Z_NO_COMPRESSION>, C<Z_BEST_SPEED>, |
881 | C<Z_BEST_COMPRESSION>, and C<Z_DEFAULT_COMPRESSION>. |
882 | If B<$level> is not specified C<Z_DEFAULT_COMPRESSION> will be used. |
883 | |
884 | |
885 | =item B<$dest = uncompress($source) ;> |
886 | |
887 | Uncompresses B<$source>. If successful it returns the uncompressed |
888 | data. Otherwise it returns I<undef>. |
889 | |
890 | The source buffer can either be a scalar or a scalar reference. |
891 | |
892 | =back |
893 | |
bdd93743 |
894 | Please note: the two functions defined above are I<not> compatible with |
895 | the Unix commands of the same name. |
896 | |
f4c6fd49 |
897 | =head1 GZIP INTERFACE |
898 | |
899 | A number of functions are supplied in I<zlib> for reading and writing |
900 | I<gzip> files. This module provides an interface to most of them. In |
901 | general the interface provided by this module operates identically to |
902 | the functions provided by I<zlib>. Any differences are explained |
903 | below. |
904 | |
905 | =over 5 |
906 | |
907 | =item B<$gz = gzopen(filename or filehandle, mode)> |
908 | |
909 | This function operates identically to the I<zlib> equivalent except |
910 | that it returns an object which is used to access the other I<gzip> |
911 | methods. |
912 | |
913 | As with the I<zlib> equivalent, the B<mode> parameter is used to |
914 | specify both whether the file is opened for reading or writing and to |
915 | optionally specify a a compression level. Refer to the I<zlib> |
916 | documentation for the exact format of the B<mode> parameter. |
917 | |
918 | If a reference to an open filehandle is passed in place of the |
919 | filename, gzdopen will be called behind the scenes. The third example |
920 | at the end of this section, I<gzstream>, uses this feature. |
921 | |
922 | =item B<$bytesread = $gz-E<gt>gzread($buffer [, $size]) ;> |
923 | |
924 | Reads B<$size> bytes from the compressed file into B<$buffer>. If |
925 | B<$size> is not specified, it will default to 4096. If the scalar |
926 | B<$buffer> is not large enough, it will be extended automatically. |
927 | |
928 | Returns the number of bytes actually read. On EOF it returns 0 and in |
929 | the case of an error, -1. |
930 | |
931 | =item B<$bytesread = $gz-E<gt>gzreadline($line) ;> |
932 | |
933 | Reads the next line from the compressed file into B<$line>. |
934 | |
935 | Returns the number of bytes actually read. On EOF it returns 0 and in |
936 | the case of an error, -1. |
937 | |
938 | It is legal to intermix calls to B<gzread> and B<gzreadline>. |
939 | |
940 | At this time B<gzreadline> ignores the variable C<$/> |
941 | (C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use). The |
942 | end of a line is denoted by the C character C<'\n'>. |
943 | |
944 | =item B<$byteswritten = $gz-E<gt>gzwrite($buffer) ;> |
945 | |
946 | Writes the contents of B<$buffer> to the compressed file. Returns the |
947 | number of bytes actually written, or 0 on error. |
948 | |
949 | =item B<$status = $gz-E<gt>gzflush($flush) ;> |
950 | |
951 | Flushes all pending output to the compressed file. |
952 | Works identically to the I<zlib> function it interfaces to. Note that |
953 | the use of B<gzflush> can degrade compression. |
954 | |
955 | Returns C<Z_OK> if B<$flush> is C<Z_FINISH> and all output could be |
956 | flushed. Otherwise the zlib error code is returned. |
957 | |
958 | Refer to the I<zlib> documentation for the valid values of B<$flush>. |
959 | |
960 | =item B<$status = $gz-E<gt>gzeof() ;> |
961 | |
962 | Returns 1 if the end of file has been detected while reading the input |
963 | file, otherwise returns 0. |
964 | |
965 | =item B<$gz-E<gt>gzclose> |
966 | |
967 | Closes the compressed file. Any pending data is flushed to the file |
968 | before it is closed. |
969 | |
970 | =item B<$gz-E<gt>gzsetparams($level, $strategy> |
971 | |
972 | Change settings for the deflate stream C<$gz>. |
973 | |
974 | The list of the valid options is shown below. Options not specified |
975 | will remain unchanged. |
976 | |
977 | Note: This method is only available if you are running zlib 1.0.6 or better. |
978 | |
979 | =over 5 |
980 | |
981 | =item B<$level> |
982 | |
983 | Defines the compression level. Valid values are 0 through 9, |
984 | C<Z_NO_COMPRESSION>, C<Z_BEST_SPEED>, C<Z_BEST_COMPRESSION>, and |
985 | C<Z_DEFAULT_COMPRESSION>. |
986 | |
987 | =item B<$strategy> |
988 | |
989 | Defines the strategy used to tune the compression. The valid values are |
990 | C<Z_DEFAULT_STRATEGY>, C<Z_FILTERED> and C<Z_HUFFMAN_ONLY>. |
991 | |
992 | =back |
993 | |
994 | =item B<$gz-E<gt>gzerror> |
995 | |
996 | Returns the I<zlib> error message or number for the last operation |
997 | associated with B<$gz>. The return value will be the I<zlib> error |
998 | number when used in a numeric context and the I<zlib> error message |
999 | when used in a string context. The I<zlib> error number constants, |
1000 | shown below, are available for use. |
1001 | |
1002 | Z_OK |
1003 | Z_STREAM_END |
1004 | Z_ERRNO |
1005 | Z_STREAM_ERROR |
1006 | Z_DATA_ERROR |
1007 | Z_MEM_ERROR |
1008 | Z_BUF_ERROR |
1009 | |
1010 | =item B<$gzerrno> |
1011 | |
1012 | The B<$gzerrno> scalar holds the error code associated with the most |
1013 | recent I<gzip> routine. Note that unlike B<gzerror()>, the error is |
1014 | I<not> associated with a particular file. |
1015 | |
1016 | As with B<gzerror()> it returns an error number in numeric context and |
1017 | an error message in string context. Unlike B<gzerror()> though, the |
1018 | error message will correspond to the I<zlib> message when the error is |
1019 | associated with I<zlib> itself, or the UNIX error message when it is |
1020 | not (i.e. I<zlib> returned C<Z_ERRORNO>). |
1021 | |
1022 | As there is an overlap between the error numbers used by I<zlib> and |
1023 | UNIX, B<$gzerrno> should only be used to check for the presence of |
1024 | I<an> error in numeric context. Use B<gzerror()> to check for specific |
1025 | I<zlib> errors. The I<gzcat> example below shows how the variable can |
1026 | be used safely. |
1027 | |
1028 | =back |
1029 | |
1030 | |
1031 | =head2 Examples |
1032 | |
1033 | Here is an example script which uses the interface. It implements a |
1034 | I<gzcat> function. |
1035 | |
1036 | use strict ; |
1037 | use warnings ; |
1038 | |
1039 | use Compress::Zlib ; |
1040 | |
1041 | die "Usage: gzcat file...\n" |
1042 | unless @ARGV ; |
1043 | |
1044 | my $file ; |
1045 | |
1046 | foreach $file (@ARGV) { |
1047 | my $buffer ; |
1048 | |
1049 | my $gz = gzopen($file, "rb") |
1050 | or die "Cannot open $file: $gzerrno\n" ; |
1051 | |
1052 | print $buffer while $gz->gzread($buffer) > 0 ; |
1053 | |
1054 | die "Error reading from $file: $gzerrno" . ($gzerrno+0) . "\n" |
1055 | if $gzerrno != Z_STREAM_END ; |
1056 | |
1057 | $gz->gzclose() ; |
1058 | } |
1059 | |
1060 | Below is a script which makes use of B<gzreadline>. It implements a |
1061 | very simple I<grep> like script. |
1062 | |
1063 | use strict ; |
1064 | use warnings ; |
1065 | |
1066 | use Compress::Zlib ; |
1067 | |
1068 | die "Usage: gzgrep pattern file...\n" |
1069 | unless @ARGV >= 2; |
1070 | |
1071 | my $pattern = shift ; |
1072 | |
1073 | my $file ; |
1074 | |
1075 | foreach $file (@ARGV) { |
1076 | my $gz = gzopen($file, "rb") |
1077 | or die "Cannot open $file: $gzerrno\n" ; |
1078 | |
1079 | while ($gz->gzreadline($_) > 0) { |
1080 | print if /$pattern/ ; |
1081 | } |
1082 | |
1083 | die "Error reading from $file: $gzerrno\n" |
1084 | if $gzerrno != Z_STREAM_END ; |
1085 | |
1086 | $gz->gzclose() ; |
1087 | } |
1088 | |
1089 | This script, I<gzstream>, does the opposite of the I<gzcat> script |
1090 | above. It reads from standard input and writes a gzip file to standard |
1091 | output. |
1092 | |
1093 | use strict ; |
1094 | use warnings ; |
1095 | |
1096 | use Compress::Zlib ; |
1097 | |
1098 | binmode STDOUT; # gzopen only sets it on the fd |
1099 | |
1100 | my $gz = gzopen(\*STDOUT, "wb") |
1101 | or die "Cannot open stdout: $gzerrno\n" ; |
1102 | |
1103 | while (<>) { |
1104 | $gz->gzwrite($_) |
1105 | or die "error writing: $gzerrno\n" ; |
1106 | } |
1107 | |
1108 | $gz->gzclose ; |
1109 | |
1110 | =head2 Compress::Zlib::memGzip |
1111 | |
1112 | This function is used to create an in-memory gzip file. |
1113 | It creates a minimal gzip header. |
1114 | |
1115 | $dest = Compress::Zlib::memGzip($buffer) ; |
1116 | |
1117 | If successful, it returns the in-memory gzip file, otherwise it returns |
1118 | undef. |
1119 | |
1120 | The buffer parameter can either be a scalar or a scalar reference. |
1121 | |
1122 | =head2 Compress::Zlib::memGunzip |
1123 | |
1124 | This function is used to uncompress an in-memory gzip file. |
1125 | |
1126 | $dest = Compress::Zlib::memGunzip($buffer) ; |
1127 | |
1128 | If successful, it returns the uncompressed gzip file, otherwise it |
1129 | returns undef. |
1130 | |
1131 | The buffer parameter can either be a scalar or a scalar reference. The |
1132 | contents of the buffer parameter are destroyed after calling this |
1133 | function. |
1134 | |
1135 | =head1 CHECKSUM FUNCTIONS |
1136 | |
1137 | Two functions are provided by I<zlib> to calculate a checksum. For the |
1138 | Perl interface, the order of the two parameters in both functions has |
1139 | been reversed. This allows both running checksums and one off |
1140 | calculations to be done. |
1141 | |
1142 | $crc = adler32($buffer [,$crc]) ; |
1143 | $crc = crc32($buffer [,$crc]) ; |
1144 | |
1145 | The buffer parameters can either be a scalar or a scalar reference. |
1146 | |
1147 | If the $crc parameters is C<undef>, the crc value will be reset. |
1148 | |
5993747c |
1149 | =head1 FAQ |
1150 | |
1151 | =head2 Compatibility with Unix compress/uncompress. |
1152 | |
1153 | Although C<Compress::Zlib> has a pair of functions called C<compress> |
1154 | and C<uncompress>, they are I<not> the same as the Unix programs of the |
1155 | same name. The C<Compress::Zlib> library is not compatable with Unix |
1156 | C<compress>. |
1157 | |
1158 | If you have the C<uncompress> program available, you can use this to |
1159 | read compressed files |
1160 | |
1161 | open F, "uncompress -c $filename |"; |
1162 | while (<F>) |
1163 | { |
1164 | ... |
1165 | |
1166 | If you have the C<gunzip> program available, you can use this to read |
1167 | compressed files |
1168 | |
1169 | open F, "gunzip -c $filename |"; |
1170 | while (<F>) |
1171 | { |
1172 | ... |
1173 | |
1174 | and this to write compress files if you have the C<compress> program |
1175 | available |
1176 | |
1177 | open F, "| compress -c $filename "; |
1178 | print F "data"; |
1179 | ... |
1180 | close F ; |
1181 | |
1182 | =head2 Accessing .tar.Z files |
1183 | |
1184 | The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via |
1185 | the C<IO::Zlib> module) to access tar files that have been compressed |
1186 | with C<gzip>. Unfortunately tar files compressed with the Unix C<compress> |
1187 | utility cannot be read by C<Compress::Zlib> and so cannot be directly |
1188 | accesses by C<Archive::Tar>. |
1189 | |
1190 | If the C<uncompress> or C<gunzip> programs are available, you can use |
1191 | one of these workarounds to read C<.tar.Z> files from C<Archive::Tar> |
1192 | |
1193 | Firstly with C<uncompress> |
1194 | |
1195 | use strict; |
1196 | use warnings; |
1197 | use Archive::Tar; |
1198 | |
1199 | open F, "uncompress -c $filename |"; |
1200 | my $tar = Archive::Tar->new(*F); |
1201 | ... |
1202 | |
1203 | and this with C<gunzip> |
1204 | |
1205 | use strict; |
1206 | use warnings; |
1207 | use Archive::Tar; |
1208 | |
1209 | open F, "gunzip -c $filename |"; |
1210 | my $tar = Archive::Tar->new(*F); |
1211 | ... |
1212 | |
1213 | Similarly, if the C<compress> program is available, you can use this to |
1214 | write a C<.tar.Z> file |
1215 | |
1216 | use strict; |
1217 | use warnings; |
1218 | use Archive::Tar; |
1219 | use IO::File; |
1220 | |
1221 | my $fh = newIO::File "| compress -c >$filename"; |
1222 | my $tar = Archive::Tar->new(); |
1223 | ... |
1224 | $tar->write($fh); |
1225 | $fh->close ; |
1226 | |
1227 | =head2 Accessing ZIP Files |
f4c6fd49 |
1228 | |
1229 | Although it is possible to use this module to access .zip files, there |
1230 | is a module on CPAN that will do all the hard work for you. Check out |
1231 | |
1232 | http://www.cpan.org/modules/by-module/Archive/Archive-Zip-*.tar.gz |
1233 | |
1234 | Assuming you don't want to use this module to access zip files there |
1235 | are a number of undocumented features in the zlib library you need to |
1236 | be aware of. |
1237 | |
1238 | =over 5 |
1239 | |
1240 | =item 1. |
1241 | |
1242 | When calling B<inflateInit> or B<deflateInit> the B<WindowBits> parameter |
1243 | must be set to C<-MAX_WBITS>. This disables the creation of the zlib |
1244 | header. |
1245 | |
1246 | =item 2. |
1247 | |
1248 | The zlib function B<inflate>, and so the B<inflate> method supplied in |
1249 | this module, assume that there is at least one trailing byte after the |
1250 | compressed data stream. Normally this isn't a problem because both |
1251 | the gzip and zip file formats will guarantee that there is data directly |
1252 | after the compressed data stream. |
1253 | |
1254 | =back |
1255 | |
1256 | =head1 CONSTANTS |
1257 | |
1258 | All the I<zlib> constants are automatically imported when you make use |
1259 | of I<Compress::Zlib>. |
1260 | |
1261 | =head1 AUTHOR |
1262 | |
1263 | The I<Compress::Zlib> module was written by Paul Marquess, |
1264 | F<pmqs@cpan.org>. The latest copy of the module can be |
1265 | found on CPAN in F<modules/by-module/Compress/Compress-Zlib-x.x.tar.gz>. |
1266 | |
1267 | The primary site for the I<zlib> compression library is |
1268 | F<http://www.zlib.org>. |
1269 | |
1270 | =head1 MODIFICATION HISTORY |
1271 | |
1272 | See the Changes file. |