Compress::Zlib becomes zlib agnostic
[p5sagit/p5-mst-13.2.git] / ext / Compress / Zlib / lib / IO / Compress / Gzip.pm
CommitLineData
642e522c 1
2package IO::Compress::Gzip ;
3
4require 5.004 ;
5
6use strict ;
7use warnings;
8
642e522c 9
1a6a8453 10use IO::Compress::RawDeflate;
642e522c 11
12use Compress::Zlib 2 ;
1a6a8453 13use Compress::Zlib::Common qw(:Status createSelfTiedObject);
642e522c 14use Compress::Gzip::Constants;
642e522c 15
16BEGIN
17{
18 if (defined &utf8::downgrade )
19 { *noUTF8 = \&utf8::downgrade }
20 else
21 { *noUTF8 = sub {} }
22}
642e522c 23
1a6a8453 24require Exporter ;
642e522c 25
1a6a8453 26our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $GzipError);
642e522c 27
1a6a8453 28$VERSION = '2.000_07';
29$GzipError = '' ;
642e522c 30
1a6a8453 31@ISA = qw(Exporter IO::Compress::RawDeflate);
32@EXPORT_OK = qw( $GzipError gzip ) ;
33%EXPORT_TAGS = %IO::Compress::RawDeflate::DEFLATE_CONSTANTS ;
34push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
35Exporter::export_ok_tags('all');
642e522c 36
1a6a8453 37sub new
642e522c 38{
1a6a8453 39 my $class = shift ;
642e522c 40
1a6a8453 41 my $obj = createSelfTiedObject($class, \$GzipError);
642e522c 42
1a6a8453 43 $obj->_create(undef, @_);
642e522c 44}
45
642e522c 46
1a6a8453 47sub gzip
642e522c 48{
1a6a8453 49 my $obj = createSelfTiedObject(undef, \$GzipError);
50 return $obj->_def(@_);
642e522c 51}
52
1a6a8453 53#sub newHeader
54#{
55# my $self = shift ;
56# #return GZIP_MINIMUM_HEADER ;
57# return $self->mkHeader(*$self->{Got});
58#}
642e522c 59
1a6a8453 60sub getExtraParams
642e522c 61{
62 my $self = shift ;
642e522c 63
1a6a8453 64 use Compress::Zlib::ParseParameters;
642e522c 65
1a6a8453 66 return (
642e522c 67 # zlib behaviour
1a6a8453 68 $self->getZlibParams(),
642e522c 69
70 # Gzip header fields
1a6a8453 71 'Minimal' => [0, 1, Parse_boolean, 0],
72 'Comment' => [0, 1, Parse_any, undef],
73 'Name' => [0, 1, Parse_any, undef],
74 'Time' => [0, 1, Parse_any, undef],
75 'TextFlag' => [0, 1, Parse_boolean, 0],
76 'HeaderCRC' => [0, 1, Parse_boolean, 0],
77 'OS_Code' => [0, 1, Parse_unsigned, $Compress::Zlib::gzip_os_code],
78 'ExtraField'=> [0, 1, Parse_string, undef],
79 'ExtraFlags'=> [0, 1, Parse_any, undef],
80
81 );
642e522c 82}
83
642e522c 84
1a6a8453 85sub ckParams
86{
87 my $self = shift ;
88 my $got = shift ;
642e522c 89
1a6a8453 90 # gzip always needs crc32
91 $got->value('CRC32' => 1);
642e522c 92
1a6a8453 93 return 1
94 if $got->value('Merge') ;
642e522c 95
96 my $lax = ! $got->value('Strict') ;
97
642e522c 98
642e522c 99 {
642e522c 100 if (! $got->parsed('Time') ) {
101 # Modification time defaults to now.
102 $got->value('Time' => time) ;
103 }
104
105 # Check that the Name & Comment don't have embedded NULLs
106 # Also check that they only contain ISO 8859-1 chars.
107 if ($got->parsed('Name') && defined $got->value('Name')) {
108 my $name = $got->value('Name');
109
1a6a8453 110 return $self->saveErrorString(undef, "Null Character found in Name",
642e522c 111 Z_DATA_ERROR)
112 if ! $lax && $name =~ /\x00/ ;
113
1a6a8453 114 return $self->saveErrorString(undef, "Non ISO 8859-1 Character found in Name",
642e522c 115 Z_DATA_ERROR)
116 if ! $lax && $name =~ /$GZIP_FNAME_INVALID_CHAR_RE/o ;
117 }
118
119 if ($got->parsed('Comment') && defined $got->value('Comment')) {
120 my $comment = $got->value('Comment');
121
1a6a8453 122 return $self->saveErrorString(undef, "Null Character found in Comment",
642e522c 123 Z_DATA_ERROR)
124 if ! $lax && $comment =~ /\x00/ ;
125
1a6a8453 126 return $self->saveErrorString(undef, "Non ISO 8859-1 Character found in Comment",
642e522c 127 Z_DATA_ERROR)
128 if ! $lax && $comment =~ /$GZIP_FCOMMENT_INVALID_CHAR_RE/o;
129 }
130
131 if ($got->parsed('OS_Code') ) {
132 my $value = $got->value('OS_Code');
133
1a6a8453 134 return $self->saveErrorString(undef, "OS_Code must be between 0 and 255, got '$value'")
642e522c 135 if $value < 0 || $value > 255 ;
136
137 }
138
139 # gzip only supports Deflate at present
140 $got->value('Method' => Z_DEFLATED) ;
141
142 if ( ! $got->parsed('ExtraFlags')) {
143 $got->value('ExtraFlags' => 2)
144 if $got->value('Level') == Z_BEST_SPEED ;
145 $got->value('ExtraFlags' => 4)
146 if $got->value('Level') == Z_BEST_COMPRESSION ;
147 }
148
149 if ($got->parsed('ExtraField')) {
150
1a6a8453 151 my $bad = $self->parseExtraField($got, $lax) ;
152 return $self->saveErrorString(undef, $bad, Z_DATA_ERROR)
642e522c 153 if $bad ;
154
155 my $len = length $got->value('ExtraField') ;
1a6a8453 156 return $self->saveErrorString(undef, ExtraFieldError("Too Large"),
642e522c 157 Z_DATA_ERROR)
158 if $len > GZIP_FEXTRA_MAX_SIZE;
159 }
160 }
161
1a6a8453 162 return 1;
642e522c 163}
164
1a6a8453 165sub mkTrailer
642e522c 166{
167 my $self = shift ;
1a6a8453 168 return pack("V V", *$self->{Compress}->crc32(),
169 *$self->{UnCompSize_32bit});
642e522c 170}
171
1a6a8453 172sub getInverseClass
642e522c 173{
1a6a8453 174 return ('IO::Uncompress::Gunzip',
175 \$IO::Uncompress::Gunzip::GunzipError);
642e522c 176}
177
1a6a8453 178sub getFileInfo
642e522c 179{
180 my $self = shift ;
1a6a8453 181 my $params = shift;
182 my $filename = shift ;
642e522c 183
1a6a8453 184 my $defaultTime = (stat($filename))[9] ;
642e522c 185
1a6a8453 186 $params->value('Name' => $filename)
187 if ! $params->parsed('Name') ;
642e522c 188
1a6a8453 189 $params->value('Time' => $defaultTime)
190 if ! $params->parsed('Time') ;
191}
642e522c 192
193
1a6a8453 194sub mkHeader
642e522c 195{
196 my $self = shift ;
1a6a8453 197 my $param = shift ;
642e522c 198
1a6a8453 199 # stort-circuit if a minimal header is requested.
200 return GZIP_MINIMUM_HEADER if $param->value('Minimal') ;
642e522c 201
1a6a8453 202 # METHOD
203 my $method = $param->valueOrDefault('Method', GZIP_CM_DEFLATED) ;
642e522c 204
1a6a8453 205 # FLAGS
206 my $flags = GZIP_FLG_DEFAULT ;
207 $flags |= GZIP_FLG_FTEXT if $param->value('TextFlag') ;
208 $flags |= GZIP_FLG_FHCRC if $param->value('HeaderCRC') ;
209 $flags |= GZIP_FLG_FEXTRA if $param->wantValue('ExtraField') ;
210 $flags |= GZIP_FLG_FNAME if $param->wantValue('Name') ;
211 $flags |= GZIP_FLG_FCOMMENT if $param->wantValue('Comment') ;
212
213 # MTIME
214 my $time = $param->valueOrDefault('Time', GZIP_MTIME_DEFAULT) ;
642e522c 215
1a6a8453 216 # EXTRA FLAGS
217 my $extra_flags = $param->valueOrDefault('ExtraFlags', GZIP_XFL_DEFAULT);
642e522c 218
1a6a8453 219 # OS CODE
220 my $os_code = $param->valueOrDefault('OS_Code', GZIP_OS_DEFAULT) ;
642e522c 221
642e522c 222
1a6a8453 223 my $out = pack("C4 V C C",
224 GZIP_ID1, # ID1
225 GZIP_ID2, # ID2
226 $method, # Compression Method
227 $flags, # Flags
228 $time, # Modification Time
229 $extra_flags, # Extra Flags
230 $os_code, # Operating System Code
231 ) ;
642e522c 232
1a6a8453 233 # EXTRA
234 if ($flags & GZIP_FLG_FEXTRA) {
235 my $extra = $param->value('ExtraField') ;
236 $out .= pack("v", length $extra) . $extra ;
642e522c 237 }
238
1a6a8453 239 # NAME
240 if ($flags & GZIP_FLG_FNAME) {
241 my $name .= $param->value('Name') ;
242 $name =~ s/\x00.*$//;
243 $out .= $name ;
244 # Terminate the filename with NULL unless it already is
245 $out .= GZIP_NULL_BYTE
246 if !length $name or
247 substr($name, 1, -1) ne GZIP_NULL_BYTE ;
248 }
642e522c 249
1a6a8453 250 # COMMENT
251 if ($flags & GZIP_FLG_FCOMMENT) {
252 my $comment .= $param->value('Comment') ;
253 $comment =~ s/\x00.*$//;
254 $out .= $comment ;
255 # Terminate the comment with NULL unless it already is
256 $out .= GZIP_NULL_BYTE
257 if ! length $comment or
258 substr($comment, 1, -1) ne GZIP_NULL_BYTE;
642e522c 259 }
642e522c 260
1a6a8453 261 # HEADER CRC
262 $out .= pack("v", crc32($out) & 0x00FF ) if $param->value('HeaderCRC') ;
642e522c 263
1a6a8453 264 noUTF8($out);
642e522c 265
1a6a8453 266 return $out ;
267}
642e522c 268
1a6a8453 269sub ExtraFieldError
642e522c 270{
1a6a8453 271 return "Error with ExtraField Parameter: $_[0]" ;
642e522c 272}
273
1a6a8453 274sub validateExtraFieldPair
642e522c 275{
1a6a8453 276 my $pair = shift ;
277 my $lax = shift ;
642e522c 278
1a6a8453 279 return ExtraFieldError("Not an array ref")
280 unless ref $pair && ref $pair eq 'ARRAY';
642e522c 281
1a6a8453 282 return ExtraFieldError("SubField must have two parts")
283 unless @$pair == 2 ;
642e522c 284
1a6a8453 285 return ExtraFieldError("SubField ID is a reference")
286 if ref $pair->[0] ;
642e522c 287
1a6a8453 288 return ExtraFieldError("SubField Data is a reference")
289 if ref $pair->[1] ;
642e522c 290
1a6a8453 291 # ID is exactly two chars
292 return ExtraFieldError("SubField ID not two chars long")
293 unless length $pair->[0] == GZIP_FEXTRA_SUBFIELD_ID_SIZE ;
642e522c 294
1a6a8453 295 # Check that the 2nd byte of the ID isn't 0
296 return ExtraFieldError("SubField ID 2nd byte is 0x00")
297 if ! $lax && substr($pair->[0], 1, 1) eq "\x00" ;
642e522c 298
1a6a8453 299 return ExtraFieldError("SubField Data too long")
300 if length $pair->[1] > GZIP_FEXTRA_SUBFIELD_MAX_SIZE ;
642e522c 301
642e522c 302
1a6a8453 303 return undef ;
642e522c 304}
305
1a6a8453 306sub parseExtra
642e522c 307{
1a6a8453 308 my $data = shift ;
309 my $lax = shift ;
642e522c 310
1a6a8453 311 return undef
312 if $lax ;
642e522c 313
1a6a8453 314 my $XLEN = length $data ;
642e522c 315
1a6a8453 316 return ExtraFieldError("Too Large")
317 if $XLEN > GZIP_FEXTRA_MAX_SIZE;
642e522c 318
1a6a8453 319 my $offset = 0 ;
320 while ($offset < $XLEN) {
642e522c 321
1a6a8453 322 return ExtraFieldError("FEXTRA Body")
323 if $offset + GZIP_FEXTRA_SUBFIELD_HEADER_SIZE > $XLEN ;
642e522c 324
1a6a8453 325 my $id = substr($data, $offset, GZIP_FEXTRA_SUBFIELD_ID_SIZE);
326 $offset += GZIP_FEXTRA_SUBFIELD_ID_SIZE;
642e522c 327
1a6a8453 328 my $subLen = unpack("v", substr($data, $offset,
329 GZIP_FEXTRA_SUBFIELD_LEN_SIZE));
330 $offset += GZIP_FEXTRA_SUBFIELD_LEN_SIZE ;
642e522c 331
1a6a8453 332 return ExtraFieldError("FEXTRA Body")
333 if $offset + $subLen > $XLEN ;
642e522c 334
1a6a8453 335 my $bad = validateExtraFieldPair( [$id,
336 substr($data, $offset, $subLen)], $lax );
337 return $bad if $bad ;
642e522c 338
1a6a8453 339 $offset += $subLen ;
642e522c 340 }
1a6a8453 341
342 return undef ;
642e522c 343}
344
1a6a8453 345sub parseExtraField
642e522c 346{
347 my $self = shift ;
1a6a8453 348 my $got = shift ;
349 my $lax = shift ;
642e522c 350
1a6a8453 351 # ExtraField can be any of
352 #
353 # -ExtraField => $data
354 # -ExtraField => [$id1, $data1,
355 # $id2, $data2]
356 # ...
357 # ]
358 # -ExtraField => [ [$id1 => $data1],
359 # [$id2 => $data2],
360 # ...
361 # ]
362 # -ExtraField => { $id1 => $data1,
363 # $id2 => $data2,
364 # ...
365 # }
642e522c 366
1a6a8453 367
368 return undef
369 unless $got->parsed('ExtraField') ;
642e522c 370
1a6a8453 371 return parseExtra($got->value('ExtraField'), $lax)
372 unless ref $got->value('ExtraField') ;
642e522c 373
1a6a8453 374 my $data = $got->value('ExtraField');
375 my $out = '' ;
642e522c 376
1a6a8453 377 if (ref $data eq 'ARRAY') {
378 if (ref $data->[0]) {
642e522c 379
1a6a8453 380 foreach my $pair (@$data) {
381 return ExtraFieldError("Not list of lists")
382 unless ref $pair eq 'ARRAY' ;
642e522c 383
1a6a8453 384 my $bad = validateExtraFieldPair($pair, $lax) ;
385 return $bad if $bad ;
642e522c 386
1a6a8453 387 $out .= $pair->[0] . pack("v", length $pair->[1]) .
388 $pair->[1] ;
389 }
390 }
391 else {
392 return ExtraFieldError("Not even number of elements")
393 unless @$data % 2 == 0;
642e522c 394
1a6a8453 395 for (my $ix = 0; $ix <= length(@$data) -1 ; $ix += 2) {
396 my $bad = validateExtraFieldPair([$data->[$ix], $data->[$ix+1]], $lax) ;
397 return $bad if $bad ;
642e522c 398
1a6a8453 399 $out .= $data->[$ix] . pack("v", length $data->[$ix+1]) .
400 $data->[$ix+1] ;
401 }
402 }
403 }
404 elsif (ref $data eq 'HASH') {
405 while (my ($id, $info) = each %$data) {
406 my $bad = validateExtraFieldPair([$id, $info], $lax);
407 return $bad if $bad ;
642e522c 408
1a6a8453 409 $out .= $id . pack("v", length $info) . $info ;
410 }
411 }
642e522c 412 else {
1a6a8453 413 return ExtraFieldError("Not a scalar, array ref or hash ref") ;
642e522c 414 }
415
1a6a8453 416 $got->value('ExtraField' => $out);
642e522c 417
1a6a8453 418 return undef;
642e522c 419}
420
1a6a8453 421sub mkFinalTrailer
642e522c 422{
1a6a8453 423 return '';
642e522c 424}
425
642e522c 4261;
427
428__END__
429
430=head1 NAME
431
432IO::Compress::Gzip - Perl interface to write RFC 1952 files/buffers
433
434=head1 SYNOPSIS
435
436 use IO::Compress::Gzip qw(gzip $GzipError) ;
437
438
439 my $status = gzip $input => $output [,OPTS]
440 or die "gzip failed: $GzipError\n";
441
442 my $z = new IO::Compress::Gzip $output [,OPTS]
443 or die "gzip failed: $GzipError\n";
444
445 $z->print($string);
446 $z->printf($format, $string);
447 $z->write($string);
448 $z->syswrite($string [, $length, $offset]);
449 $z->flush();
450 $z->tell();
451 $z->eof();
452 $z->seek($position, $whence);
453 $z->binmode();
454 $z->fileno();
1a6a8453 455 $z->newStream( [OPTS] );
642e522c 456 $z->deflateParams();
457 $z->close() ;
458
459 $GzipError ;
460
461 # IO::File mode
462
463 print $z $string;
464 printf $z $format, $string;
465 syswrite $z, $string [, $length, $offset];
466 flush $z, ;
467 tell $z
468 eof $z
469 seek $z, $position, $whence
470 binmode $z
471 fileno $z
472 close $z ;
473
474
475=head1 DESCRIPTION
476
477
478
479B<WARNING -- This is a Beta release>.
480
481=over 5
482
483=item * DO NOT use in production code.
484
485=item * The documentation is incomplete in places.
486
487=item * Parts of the interface defined here are tentative.
488
489=item * Please report any problems you find.
490
491=back
492
493
494
495This module provides a Perl interface that allows writing compressed
496data to files or buffer as defined in RFC 1952.
497
498
499All the gzip headers defined in RFC 1952 can be created using
500this module.
501
502
503
504
505For reading RFC 1952 files/buffers, see the companion module
506L<IO::Uncompress::Gunzip|IO::Uncompress::Gunzip>.
507
508
509=head1 Functional Interface
510
1a6a8453 511A top-level function, C<gzip>, is provided to carry out
512"one-shot" compression between buffers and/or files. For finer
513control over the compression process, see the L</"OO Interface">
514section.
642e522c 515
516 use IO::Compress::Gzip qw(gzip $GzipError) ;
517
518 gzip $input => $output [,OPTS]
519 or die "gzip failed: $GzipError\n";
520
1a6a8453 521
642e522c 522
523The functional interface needs Perl5.005 or better.
524
525
526=head2 gzip $input => $output [, OPTS]
527
1a6a8453 528
529C<gzip> expects at least two parameters, C<$input> and C<$output>.
642e522c 530
531=head3 The C<$input> parameter
532
533The parameter, C<$input>, is used to define the source of
534the uncompressed data.
535
536It can take one of the following forms:
537
538=over 5
539
540=item A filename
541
542If the C<$input> parameter is a simple scalar, it is assumed to be a
543filename. This file will be opened for reading and the input data
544will be read from it.
545
546=item A filehandle
547
548If the C<$input> parameter is a filehandle, the input data will be
549read from it.
550The string '-' can be used as an alias for standard input.
551
552=item A scalar reference
553
554If C<$input> is a scalar reference, the input data will be read
555from C<$$input>.
556
557=item An array reference
558
1a6a8453 559If C<$input> is an array reference, each element in the array must be a
560filename.
561
562The input data will be read from each file in turn.
563
642e522c 564The complete array will be walked to ensure that it only
1a6a8453 565contains valid filenames before any data is compressed.
566
567
642e522c 568
569=item An Input FileGlob string
570
571If C<$input> is a string that is delimited by the characters "<" and ">"
572C<gzip> will assume that it is an I<input fileglob string>. The
573input is the list of files that match the fileglob.
574
575If the fileglob does not match any files ...
576
577See L<File::GlobMapper|File::GlobMapper> for more details.
578
579
580=back
581
582If the C<$input> parameter is any other type, C<undef> will be returned.
583
584
585
586In addition, if C<$input> is a simple filename, the default values for
587two of the gzip header fields created by this function will be sourced
588from that file -- the NAME gzip header field will be populated with
589the filename itself, and the MTIME header field will be set to the
590modification time of the file.
1a6a8453 591The intention here is to mirror part of the behaviour of the gzip
642e522c 592executable.
593If you do not want to use these defaults they can be overridden by
1a6a8453 594explicitly setting the C<Name> and C<Time> options or by setting the
595C<Minimal> parameter.
642e522c 596
597
598
599=head3 The C<$output> parameter
600
601The parameter C<$output> is used to control the destination of the
602compressed data. This parameter can take one of these forms.
603
604=over 5
605
606=item A filename
607
1a6a8453 608If the C<$output> parameter is a simple scalar, it is assumed to be a
609filename. This file will be opened for writing and the compressed
610data will be written to it.
642e522c 611
612=item A filehandle
613
1a6a8453 614If the C<$output> parameter is a filehandle, the compressed data
615will be written to it.
642e522c 616The string '-' can be used as an alias for standard output.
617
618
619=item A scalar reference
620
1a6a8453 621If C<$output> is a scalar reference, the compressed data will be
622stored in C<$$output>.
642e522c 623
642e522c 624
625
626=item An Array Reference
627
1a6a8453 628If C<$output> is an array reference, the compressed data will be
629pushed onto the array.
642e522c 630
631=item An Output FileGlob
632
633If C<$output> is a string that is delimited by the characters "<" and ">"
634C<gzip> will assume that it is an I<output fileglob string>. The
635output is the list of files that match the fileglob.
636
637When C<$output> is an fileglob string, C<$input> must also be a fileglob
638string. Anything else is an error.
639
640=back
641
642If the C<$output> parameter is any other type, C<undef> will be returned.
643
642e522c 644
642e522c 645
646=head2 Notes
647
648When C<$input> maps to multiple files/buffers and C<$output> is a single
1a6a8453 649file/buffer the compressed input files/buffers will all be stored
650in C<$output> as a single compressed stream.
642e522c 651
652
653
654=head2 Optional Parameters
655
656Unless specified below, the optional parameters for C<gzip>,
657C<OPTS>, are the same as those used with the OO interface defined in the
658L</"Constructor Options"> section below.
659
660=over 5
661
662=item AutoClose =E<gt> 0|1
663
1a6a8453 664This option applies to any input or output data streams to
665C<gzip> that are filehandles.
642e522c 666
667If C<AutoClose> is specified, and the value is true, it will result in all
668input and/or output filehandles being closed once C<gzip> has
669completed.
670
671This parameter defaults to 0.
672
673
674
1a6a8453 675=item BinModeIn =E<gt> 0|1
676
677When reading from a file or filehandle, set C<binmode> before reading.
678
679Defaults to 0.
680
681
682
683
684
642e522c 685=item -Append =E<gt> 0|1
686
687TODO
688
689
690=back
691
692
693
694=head2 Examples
695
696To read the contents of the file C<file1.txt> and write the compressed
697data to the file C<file1.txt.gz>.
698
699 use strict ;
700 use warnings ;
701 use IO::Compress::Gzip qw(gzip $GzipError) ;
702
703 my $input = "file1.txt";
704 gzip $input => "$input.gz"
705 or die "gzip failed: $GzipError\n";
706
707
708To read from an existing Perl filehandle, C<$input>, and write the
709compressed data to a buffer, C<$buffer>.
710
711 use strict ;
712 use warnings ;
713 use IO::Compress::Gzip qw(gzip $GzipError) ;
714 use IO::File ;
715
716 my $input = new IO::File "<file1.txt"
717 or die "Cannot open 'file1.txt': $!\n" ;
718 my $buffer ;
719 gzip $input => \$buffer
720 or die "gzip failed: $GzipError\n";
721
722To compress all files in the directory "/my/home" that match "*.txt"
723and store the compressed data in the same directory
724
725 use strict ;
726 use warnings ;
727 use IO::Compress::Gzip qw(gzip $GzipError) ;
728
729 gzip '</my/home/*.txt>' => '<*.gz>'
730 or die "gzip failed: $GzipError\n";
731
732and if you want to compress each file one at a time, this will do the trick
733
734 use strict ;
735 use warnings ;
736 use IO::Compress::Gzip qw(gzip $GzipError) ;
737
738 for my $input ( glob "/my/home/*.txt" )
739 {
740 my $output = "$input.gz" ;
741 gzip $input => $output
742 or die "Error compressing '$input': $GzipError\n";
743 }
744
745
746=head1 OO Interface
747
748=head2 Constructor
749
750The format of the constructor for C<IO::Compress::Gzip> is shown below
751
752 my $z = new IO::Compress::Gzip $output [,OPTS]
753 or die "IO::Compress::Gzip failed: $GzipError\n";
754
755It returns an C<IO::Compress::Gzip> object on success and undef on failure.
756The variable C<$GzipError> will contain an error message on failure.
757
758If you are running Perl 5.005 or better the object, C<$z>, returned from
759IO::Compress::Gzip can be used exactly like an L<IO::File|IO::File> filehandle.
760This means that all normal output file operations can be carried out
761with C<$z>.
762For example, to write to a compressed file/buffer you can use either of
763these forms
764
765 $z->print("hello world\n");
766 print $z "hello world\n";
767
768The mandatory parameter C<$output> is used to control the destination
769of the compressed data. This parameter can take one of these forms.
770
771=over 5
772
773=item A filename
774
775If the C<$output> parameter is a simple scalar, it is assumed to be a
776filename. This file will be opened for writing and the compressed data
777will be written to it.
778
779=item A filehandle
780
781If the C<$output> parameter is a filehandle, the compressed data will be
782written to it.
783The string '-' can be used as an alias for standard output.
784
785
786=item A scalar reference
787
788If C<$output> is a scalar reference, the compressed data will be stored
789in C<$$output>.
790
791=back
792
793If the C<$output> parameter is any other type, C<IO::Compress::Gzip>::new will
794return undef.
795
796=head2 Constructor Options
797
798C<OPTS> is any combination of the following options:
799
800=over 5
801
802=item -AutoClose =E<gt> 0|1
803
804This option is only valid when the C<$output> parameter is a filehandle. If
1a6a8453 805specified, and the value is true, it will result in the C<$output> being
806closed once either the C<close> method is called or the C<IO::Compress::Gzip>
807object is destroyed.
642e522c 808
809This parameter defaults to 0.
810
811=item -Append =E<gt> 0|1
812
813Opens C<$output> in append mode.
814
1a6a8453 815The behaviour of this option is dependent on the type of C<$output>.
642e522c 816
817=over 5
818
819=item * A Buffer
820
1a6a8453 821If C<$output> is a buffer and C<Append> is enabled, all compressed data
822will be append to the end if C<$output>. Otherwise C<$output> will be
823cleared before any data is written to it.
642e522c 824
825=item * A Filename
826
1a6a8453 827If C<$output> is a filename and C<Append> is enabled, the file will be
828opened in append mode. Otherwise the contents of the file, if any, will be
829truncated before any compressed data is written to it.
642e522c 830
831=item * A Filehandle
832
1a6a8453 833If C<$output> is a filehandle, the file pointer will be positioned to the
834end of the file via a call to C<seek> before any compressed data is written
835to it. Otherwise the file pointer will not be moved.
642e522c 836
837=back
838
839This parameter defaults to 0.
840
841=item -Merge =E<gt> 0|1
842
843This option is used to compress input data and append it to an existing
844compressed data stream in C<$output>. The end result is a single compressed
845data stream stored in C<$output>.
846
847
848
1a6a8453 849It is a fatal error to attempt to use this option when C<$output> is not an
850RFC 1952 data stream.
642e522c 851
852
853
854There are a number of other limitations with the C<Merge> option:
855
856=over 5
857
858=item 1
859
1a6a8453 860This module needs to have been built with zlib 1.2.1 or better to work. A
861fatal error will be thrown if C<Merge> is used with an older version of
862zlib.
642e522c 863
864=item 2
865
866If C<$output> is a file or a filehandle, it must be seekable.
867
868=back
869
870
871This parameter defaults to 0.
872
873=item -Level
874
875Defines the compression level used by zlib. The value should either be
876a number between 0 and 9 (0 means no compression and 9 is maximum
877compression), or one of the symbolic constants defined below.
878
879 Z_NO_COMPRESSION
880 Z_BEST_SPEED
881 Z_BEST_COMPRESSION
882 Z_DEFAULT_COMPRESSION
883
884The default is Z_DEFAULT_COMPRESSION.
885
886Note, these constants are not imported by C<IO::Compress::Gzip> by default.
887
888 use IO::Compress::Gzip qw(:strategy);
889 use IO::Compress::Gzip qw(:constants);
890 use IO::Compress::Gzip qw(:all);
891
892=item -Strategy
893
894Defines the strategy used to tune the compression. Use one of the symbolic
895constants defined below.
896
897 Z_FILTERED
898 Z_HUFFMAN_ONLY
899 Z_RLE
900 Z_FIXED
901 Z_DEFAULT_STRATEGY
902
903The default is Z_DEFAULT_STRATEGY.
904
905
906
907
908
1a6a8453 909=item -Minimal =E<gt> 0|1
642e522c 910
911If specified, this option will force the creation of the smallest possible
912compliant gzip header (which is exactly 10 bytes long) as defined in
913RFC 1952.
914
915See the section titled "Compliance" in RFC 1952 for a definition
916of the values used for the fields in the gzip header.
917
918All other parameters that control the content of the gzip header will
919be ignored if this parameter is set to 1.
920
921This parameter defaults to 0.
922
923=item -Comment =E<gt> $comment
924
925Stores the contents of C<$comment> in the COMMENT field in
926the gzip header.
927By default, no comment field is written to the gzip file.
928
929If the C<-Strict> option is enabled, the comment can only consist of ISO
9308859-1 characters plus line feed.
931
932If the C<-Strict> option is disabled, the comment field can contain any
933character except NULL. If any null characters are present, the field
934will be truncated at the first NULL.
935
936=item -Name =E<gt> $string
937
938Stores the contents of C<$string> in the gzip NAME header field. If
939C<Name> is not specified, no gzip NAME field will be created.
940
941If the C<-Strict> option is enabled, C<$string> can only consist of ISO
9428859-1 characters.
943
944If C<-Strict> is disabled, then C<$string> can contain any character
945except NULL. If any null characters are present, the field will be
946truncated at the first NULL.
947
948=item -Time =E<gt> $number
949
950Sets the MTIME field in the gzip header to $number.
951
952This field defaults to the time the C<IO::Compress::Gzip> object was created
953if this option is not specified.
954
955=item -TextFlag =E<gt> 0|1
956
1a6a8453 957This parameter controls the setting of the FLG.FTEXT bit in the gzip
958header. It is used to signal that the data stored in the gzip file/buffer
959is probably text.
642e522c 960
961The default is 0.
962
963=item -HeaderCRC =E<gt> 0|1
964
1a6a8453 965When true this parameter will set the FLG.FHCRC bit to 1 in the gzip header
966and set the CRC16 header field to the CRC of the complete gzip header
967except the CRC16 field itself.
642e522c 968
1a6a8453 969B<Note> that gzip files created with the C<HeaderCRC> flag set to 1 cannot
970be read by most, if not all, of the the standard gunzip utilities, most
971notably gzip version 1.2.4. You should therefore avoid using this option if
972you want to maximize the portability of your gzip files.
642e522c 973
974This parameter defaults to 0.
975
976=item -OS_Code =E<gt> $value
977
1a6a8453 978Stores C<$value> in the gzip OS header field. A number between 0 and 255 is
979valid.
642e522c 980
981If not specified, this parameter defaults to the OS code of the Operating
982System this module was built on. The value 3 is used as a catch-all for all
983Unix variants and unknown Operating Systems.
984
985=item -ExtraField =E<gt> $data
986
1a6a8453 987This parameter allows additional metadata to be stored in the ExtraField in
988the gzip header. An RFC1952 compliant ExtraField consists of zero or more
989subfields. Each subfield consists of a two byte header followed by the
990subfield data.
642e522c 991
992The list of subfields can be supplied in any of the following formats
993
994 -ExtraField => [$id1, $data1,
995 $id2, $data2,
996 ...
997 ]
998 -ExtraField => [ [$id1 => $data1],
999 [$id2 => $data2],
1000 ...
1001 ]
1002 -ExtraField => { $id1 => $data1,
1003 $id2 => $data2,
1004 ...
1005 }
1006
1007Where C<$id1>, C<$id2> are two byte subfield ID's. The second byte of
1008the ID cannot be 0, unless the C<Strict> option has been disabled.
1009
1010If you use the hash syntax, you have no control over the order in which
1011the ExtraSubFields are stored, plus you cannot have SubFields with
1012duplicate ID.
1013
1014Alternatively the list of subfields can by supplied as a scalar, thus
1015
1016 -ExtraField => $rawdata
1017
1018If you use the raw format, and the C<Strict> option is enabled,
1019C<IO::Compress::Gzip> will check that C<$rawdata> consists of zero or more
1020conformant sub-fields. When C<Strict> is disabled, C<$rawdata> can
1021consist of any arbitrary byte stream.
1022
1023The maximum size of the Extra Field 65535 bytes.
1024
1025=item -ExtraFlags =E<gt> $value
1026
1027Sets the XFL byte in the gzip header to C<$value>.
1028
1a6a8453 1029If this option is not present, the value stored in XFL field will be
1030determined by the setting of the C<Level> option.
642e522c 1031
1032If C<Level =E<gt> Z_BEST_SPEED> has been specified then XFL is set to 2.
1033If C<Level =E<gt> Z_BEST_COMPRESSION> has been specified then XFL is set to 4.
1034Otherwise XFL is set to 0.
1035
1036
1037
1038=item -Strict =E<gt> 0|1
1039
1040
1041
1042C<Strict> will optionally police the values supplied with other options
1043to ensure they are compliant with RFC1952.
1044
1045This option is enabled by default.
1046
1a6a8453 1047If C<Strict> is enabled the following behaviour will be policed:
642e522c 1048
1049=over 5
1050
1051=item *
1052
1053The value supplied with the C<Name> option can only contain ISO 8859-1
1054characters.
1055
1056=item *
1057
1058The value supplied with the C<Comment> option can only contain ISO 8859-1
1059characters plus line-feed.
1060
1061=item *
1062
1063The values supplied with the C<-Name> and C<-Comment> options cannot
1064contain multiple embedded nulls.
1065
1066=item *
1067
1068If an C<ExtraField> option is specified and it is a simple scalar,
1069it must conform to the sub-field structure as defined in RFC1952.
1070
1071=item *
1072
1073If an C<ExtraField> option is specified the second byte of the ID will be
1074checked in each subfield to ensure that it does not contain the reserved
1075value 0x00.
1076
1077=back
1078
1a6a8453 1079When C<Strict> is disabled the following behaviour will be policed:
642e522c 1080
1081=over 5
1082
1083=item *
1084
1085The value supplied with C<-Name> option can contain
1086any character except NULL.
1087
1088=item *
1089
1090The value supplied with C<-Comment> option can contain any character
1091except NULL.
1092
1093=item *
1094
1095The values supplied with the C<-Name> and C<-Comment> options can contain
1096multiple embedded nulls. The string written to the gzip header will
1097consist of the characters up to, but not including, the first embedded
1098NULL.
1099
1100=item *
1101
1102If an C<ExtraField> option is specified and it is a simple scalar, the
1103structure will not be checked. The only error is if the length is too big.
1104
1105=item *
1106
1107The ID header in an C<ExtraField> sub-field can consist of any two bytes.
1108
1109=back
1110
1111
1112
1113=back
1114
1115=head2 Examples
1116
1117TODO
1118
1119=head1 Methods
1120
1121=head2 print
1122
1123Usage is
1124
1125 $z->print($data)
1126 print $z $data
1127
1128Compresses and outputs the contents of the C<$data> parameter. This
1a6a8453 1129has the same behaviour as the C<print> built-in.
642e522c 1130
1131Returns true if successful.
1132
1133=head2 printf
1134
1135Usage is
1136
1137 $z->printf($format, $data)
1138 printf $z $format, $data
1139
1140Compresses and outputs the contents of the C<$data> parameter.
1141
1142Returns true if successful.
1143
1144=head2 syswrite
1145
1146Usage is
1147
1148 $z->syswrite $data
1149 $z->syswrite $data, $length
1150 $z->syswrite $data, $length, $offset
1151
1152 syswrite $z, $data
1153 syswrite $z, $data, $length
1154 syswrite $z, $data, $length, $offset
1155
1156Compresses and outputs the contents of the C<$data> parameter.
1157
1158Returns the number of uncompressed bytes written, or C<undef> if
1159unsuccessful.
1160
1161=head2 write
1162
1163Usage is
1164
1165 $z->write $data
1166 $z->write $data, $length
1167 $z->write $data, $length, $offset
1168
1169Compresses and outputs the contents of the C<$data> parameter.
1170
1171Returns the number of uncompressed bytes written, or C<undef> if
1172unsuccessful.
1173
1174=head2 flush
1175
1176Usage is
1177
1178 $z->flush;
1179 $z->flush($flush_type);
1180 flush $z ;
1181 flush $z $flush_type;
1182
1183Flushes any pending compressed data to the output file/buffer.
1184
1185This method takes an optional parameter, C<$flush_type>, that controls
1186how the flushing will be carried out. By default the C<$flush_type>
1187used is C<Z_FINISH>. Other valid values for C<$flush_type> are
1188C<Z_NO_FLUSH>, C<Z_SYNC_FLUSH>, C<Z_FULL_FLUSH> and C<Z_BLOCK>. It is
1189strongly recommended that you only set the C<flush_type> parameter if
1190you fully understand the implications of what it does - overuse of C<flush>
1191can seriously degrade the level of compression achieved. See the C<zlib>
1192documentation for details.
1193
1194Returns true on success.
1195
1196
1197=head2 tell
1198
1199Usage is
1200
1201 $z->tell()
1202 tell $z
1203
1204Returns the uncompressed file offset.
1205
1206=head2 eof
1207
1208Usage is
1209
1210 $z->eof();
1211 eof($z);
1212
1213
1214
1215Returns true if the C<close> method has been called.
1216
1217
1218
1219=head2 seek
1220
1221 $z->seek($position, $whence);
1222 seek($z, $position, $whence);
1223
1224
1225
1226
1227Provides a sub-set of the C<seek> functionality, with the restriction
1228that it is only legal to seek forward in the output file/buffer.
1229It is a fatal error to attempt to seek backward.
1230
1231Empty parts of the file/buffer will have NULL (0x00) bytes written to them.
1232
1233
1234
1235The C<$whence> parameter takes one the usual values, namely SEEK_SET,
1236SEEK_CUR or SEEK_END.
1237
1238Returns 1 on success, 0 on failure.
1239
1240=head2 binmode
1241
1242Usage is
1243
1244 $z->binmode
1245 binmode $z ;
1246
1247This is a noop provided for completeness.
1248
1249=head2 fileno
1250
1251 $z->fileno()
1252 fileno($z)
1253
1254If the C<$z> object is associated with a file, this method will return
1255the underlying filehandle.
1256
1257If the C<$z> object is is associated with a buffer, this method will
1258return undef.
1259
1260=head2 close
1261
1262 $z->close() ;
1263 close $z ;
1264
1265
1266
1267Flushes any pending compressed data and then closes the output file/buffer.
1268
1269
1270
1271For most versions of Perl this method will be automatically invoked if
1272the IO::Compress::Gzip object is destroyed (either explicitly or by the
1273variable with the reference to the object going out of scope). The
1274exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1275these cases, the C<close> method will be called automatically, but
1276not until global destruction of all live objects when the program is
1277terminating.
1278
1279Therefore, if you want your scripts to be able to run on all versions
1280of Perl, you should call C<close> explicitly and not rely on automatic
1281closing.
1282
1283Returns true on success, otherwise 0.
1284
1285If the C<AutoClose> option has been enabled when the IO::Compress::Gzip
1286object was created, and the object is associated with a file, the
1287underlying file will also be closed.
1288
1289
1290
1291
1a6a8453 1292=head2 newStream([OPTS])
642e522c 1293
1294Usage is
1295
1a6a8453 1296 $z->newStream( [OPTS] )
642e522c 1297
1a6a8453 1298Closes the current compressed data stream and starts a new one.
1299
1300OPTS consists of the following sub-set of the the options that are
1301available when creating the C<$z> object,
1302
1303=over 5
1304
1305=item * Level
1306
1307=item * TODO
1308
1309=back
642e522c 1310
1311=head2 deflateParams
1312
1313Usage is
1314
1315 $z->deflateParams
1316
1317TODO
1318
1319=head1 Importing
1320
1321A number of symbolic constants are required by some methods in
1322C<IO::Compress::Gzip>. None are imported by default.
1323
1324=over 5
1325
1326=item :all
1327
1328Imports C<gzip>, C<$GzipError> and all symbolic
1329constants that can be used by C<IO::Compress::Gzip>. Same as doing this
1330
1331 use IO::Compress::Gzip qw(gzip $GzipError :constants) ;
1332
1333=item :constants
1334
1335Import all symbolic constants. Same as doing this
1336
1337 use IO::Compress::Gzip qw(:flush :level :strategy) ;
1338
1339=item :flush
1340
1341These symbolic constants are used by the C<flush> method.
1342
1343 Z_NO_FLUSH
1344 Z_PARTIAL_FLUSH
1345 Z_SYNC_FLUSH
1346 Z_FULL_FLUSH
1347 Z_FINISH
1348 Z_BLOCK
1349
1350
1351=item :level
1352
1353These symbolic constants are used by the C<Level> option in the constructor.
1354
1355 Z_NO_COMPRESSION
1356 Z_BEST_SPEED
1357 Z_BEST_COMPRESSION
1358 Z_DEFAULT_COMPRESSION
1359
1360
1361=item :strategy
1362
1363These symbolic constants are used by the C<Strategy> option in the constructor.
1364
1365 Z_FILTERED
1366 Z_HUFFMAN_ONLY
1367 Z_RLE
1368 Z_FIXED
1369 Z_DEFAULT_STRATEGY
1370
1371=back
1372
1373For
1374
1375=head1 EXAMPLES
1376
1377TODO
1378
1379
1380
1381
1382
1383
1384=head1 SEE ALSO
1385
1386L<Compress::Zlib>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Uncompress::AnyInflate>
1387
1388L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
1389
1390L<File::GlobMapper|File::GlobMapper>, L<Archive::Tar|Archive::Zip>,
1391L<IO::Zlib|IO::Zlib>
1392
1393For RFC 1950, 1951 and 1952 see
1394F<http://www.faqs.org/rfcs/rfc1950.html>,
1395F<http://www.faqs.org/rfcs/rfc1951.html> and
1396F<http://www.faqs.org/rfcs/rfc1952.html>
1397
1398The primary site for the gzip program is F<http://www.gzip.org>.
1399
1400=head1 AUTHOR
1401
1402The I<IO::Compress::Gzip> module was written by Paul Marquess,
1403F<pmqs@cpan.org>. The latest copy of the module can be
1404found on CPAN in F<modules/by-module/Compress/Compress-Zlib-x.x.tar.gz>.
1405
1406The I<zlib> compression library was written by Jean-loup Gailly
1407F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
1408
1409The primary site for the I<zlib> compression library is
1410F<http://www.zlib.org>.
1411
1412=head1 MODIFICATION HISTORY
1413
1414See the Changes file.
1415
1416=head1 COPYRIGHT AND LICENSE
1417
1418
1a6a8453 1419Copyright (c) 2005-2006 Paul Marquess. All rights reserved.
642e522c 1420This program is free software; you can redistribute it and/or
1421modify it under the same terms as Perl itself.
1422
1423
1424
1425