Compress::Zlib becomes zlib agnostic
[p5sagit/p5-mst-13.2.git] / ext / Compress / Zlib / lib / IO / Uncompress / AnyInflate.pm
CommitLineData
642e522c 1package IO::Uncompress::AnyInflate ;
2
3# for RFC1950, RFC1951 or RFC1952
4
5use strict;
6use warnings;
1a6a8453 7
8use Compress::Zlib::Common qw(createSelfTiedObject);
9
10use UncompressPlugin::Inflate ();
11#use UncompressPlugin::Bunzip2 ();
12
13
14#use IO::Uncompress::Base ;
642e522c 15use IO::Uncompress::Gunzip ;
1a6a8453 16use IO::Uncompress::Inflate ;
17use IO::Uncompress::RawInflate ;
18use IO::Uncompress::Unzip ;
19#use IO::Uncompress::Bunzip2 ;
20#use IO::Uncompress::UnLzop ;
642e522c 21
22require Exporter ;
23
24our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $AnyInflateError);
25
1a6a8453 26$VERSION = '2.000_07';
642e522c 27$AnyInflateError = '';
28
1a6a8453 29@ISA = qw( Exporter IO::Uncompress::Base );
642e522c 30@EXPORT_OK = qw( $AnyInflateError anyinflate ) ;
1a6a8453 31%EXPORT_TAGS = %IO::Uncompress::Base::DEFLATE_CONSTANTS ;
642e522c 32push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
33Exporter::export_ok_tags('all');
34
642e522c 35# TODO - allow the user to pick a set of the three formats to allow
36# or just assume want to auto-detect any of the three formats.
37
38sub new
39{
1a6a8453 40 my $class = shift ;
41 my $obj = createSelfTiedObject($class, \$AnyInflateError);
42 $obj->_create(undef, 0, @_);
642e522c 43}
44
45sub anyinflate
46{
1a6a8453 47 my $obj = createSelfTiedObject(undef, \$AnyInflateError);
48 return $obj->_inf(@_) ;
49}
50
51sub getExtraParams
52{
53 return ();
54}
55
56sub ckParams
57{
58 my $self = shift ;
59 my $got = shift ;
60
61 # any always needs both crc32 and adler32
62 $got->value('CRC32' => 1);
63 $got->value('ADLER32' => 1);
64
65 return 1;
66}
67
68sub mkUncomp
69{
70 my $self = shift ;
71 my $class = shift ;
72 my $got = shift ;
73
74 my ($obj, $errstr, $errno) = UncompressPlugin::Inflate::mkUncompObject();
75
76 return $self->saveErrorString(undef, $errstr, $errno)
77 if ! defined $obj;
78
79 *$self->{Uncomp} = $obj;
80
81 my $magic = $self->ckMagic( qw( RawInflate Inflate Gunzip Unzip ) );
82
83 if ($magic) {
84 *$self->{Info} = $self->readHeader($magic)
85 or return undef ;
86
87 return 1;
88 }
89
90 return 0 ;
91}
92
93
94
95sub ckMagic
96{
97 my $self = shift;
98 my @names = @_ ;
99
100 my $keep = ref $self ;
101 for my $class ( map { "IO::Uncompress::$_" } @names)
102 {
103 bless $self => $class;
104 my $magic = $self->ckMagic();
105
106 if ($magic)
107 {
108 #bless $self => $class;
109 return $magic ;
110 }
111
112 $self->pushBack(*$self->{HeaderPending}) ;
113 *$self->{HeaderPending} = '' ;
114 }
115
116 bless $self => $keep;
117 return undef;
642e522c 118}
119
1201 ;
121
122__END__
123
124
125=head1 NAME
126
127IO::Uncompress::AnyInflate - Perl interface to read RFC 1950, 1951 & 1952 files/buffers
128
129=head1 SYNOPSIS
130
131 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
132
133 my $status = anyinflate $input => $output [,OPTS]
134 or die "anyinflate failed: $AnyInflateError\n";
135
136 my $z = new IO::Uncompress::AnyInflate $input [OPTS]
137 or die "anyinflate failed: $AnyInflateError\n";
138
139 $status = $z->read($buffer)
140 $status = $z->read($buffer, $length)
141 $status = $z->read($buffer, $length, $offset)
142 $line = $z->getline()
143 $char = $z->getc()
144 $char = $z->ungetc()
145 $status = $z->inflateSync()
146 $z->trailingData()
147 $data = $z->getHeaderInfo()
148 $z->tell()
149 $z->seek($position, $whence)
150 $z->binmode()
151 $z->fileno()
152 $z->eof()
153 $z->close()
154
155 $AnyInflateError ;
156
157 # IO::File mode
158
159 <$z>
160 read($z, $buffer);
161 read($z, $buffer, $length);
162 read($z, $buffer, $length, $offset);
163 tell($z)
164 seek($z, $position, $whence)
165 binmode($z)
166 fileno($z)
167 eof($z)
168 close($z)
169
170
171=head1 DESCRIPTION
172
173
174
175B<WARNING -- This is a Beta release>.
176
177=over 5
178
179=item * DO NOT use in production code.
180
181=item * The documentation is incomplete in places.
182
183=item * Parts of the interface defined here are tentative.
184
185=item * Please report any problems you find.
186
187=back
188
189
190
191
192
1a6a8453 193This module provides a Perl interface that allows the reading of
194files/buffers that conform to RFC's 1950, 1951 and 1952.
642e522c 195
1a6a8453 196The module will auto-detect which, if any, of the three supported
197compression formats is being used.
642e522c 198
199
200
201=head1 Functional Interface
202
1a6a8453 203A top-level function, C<anyinflate>, is provided to carry out
204"one-shot" uncompression between buffers and/or files. For finer
205control over the uncompression process, see the L</"OO Interface">
206section.
642e522c 207
208 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
209
210 anyinflate $input => $output [,OPTS]
211 or die "anyinflate failed: $AnyInflateError\n";
212
1a6a8453 213
642e522c 214
215The functional interface needs Perl5.005 or better.
216
217
218=head2 anyinflate $input => $output [, OPTS]
219
1a6a8453 220
221C<anyinflate> expects at least two parameters, C<$input> and C<$output>.
642e522c 222
223=head3 The C<$input> parameter
224
225The parameter, C<$input>, is used to define the source of
226the compressed data.
227
228It can take one of the following forms:
229
230=over 5
231
232=item A filename
233
234If the C<$input> parameter is a simple scalar, it is assumed to be a
235filename. This file will be opened for reading and the input data
236will be read from it.
237
238=item A filehandle
239
240If the C<$input> parameter is a filehandle, the input data will be
241read from it.
242The string '-' can be used as an alias for standard input.
243
244=item A scalar reference
245
246If C<$input> is a scalar reference, the input data will be read
247from C<$$input>.
248
249=item An array reference
250
1a6a8453 251If C<$input> is an array reference, each element in the array must be a
252filename.
253
254The input data will be read from each file in turn.
255
642e522c 256The complete array will be walked to ensure that it only
1a6a8453 257contains valid filenames before any data is uncompressed.
258
259
642e522c 260
261=item An Input FileGlob string
262
263If C<$input> is a string that is delimited by the characters "<" and ">"
264C<anyinflate> will assume that it is an I<input fileglob string>. The
265input is the list of files that match the fileglob.
266
267If the fileglob does not match any files ...
268
269See L<File::GlobMapper|File::GlobMapper> for more details.
270
271
272=back
273
274If the C<$input> parameter is any other type, C<undef> will be returned.
275
276
277
278=head3 The C<$output> parameter
279
280The parameter C<$output> is used to control the destination of the
281uncompressed data. This parameter can take one of these forms.
282
283=over 5
284
285=item A filename
286
1a6a8453 287If the C<$output> parameter is a simple scalar, it is assumed to be a
288filename. This file will be opened for writing and the uncompressed
289data will be written to it.
642e522c 290
291=item A filehandle
292
1a6a8453 293If the C<$output> parameter is a filehandle, the uncompressed data
294will be written to it.
642e522c 295The string '-' can be used as an alias for standard output.
296
297
298=item A scalar reference
299
1a6a8453 300If C<$output> is a scalar reference, the uncompressed data will be
301stored in C<$$output>.
642e522c 302
642e522c 303
304
305=item An Array Reference
306
1a6a8453 307If C<$output> is an array reference, the uncompressed data will be
308pushed onto the array.
642e522c 309
310=item An Output FileGlob
311
312If C<$output> is a string that is delimited by the characters "<" and ">"
313C<anyinflate> will assume that it is an I<output fileglob string>. The
314output is the list of files that match the fileglob.
315
316When C<$output> is an fileglob string, C<$input> must also be a fileglob
317string. Anything else is an error.
318
319=back
320
321If the C<$output> parameter is any other type, C<undef> will be returned.
322
642e522c 323
642e522c 324
325=head2 Notes
326
327When C<$input> maps to multiple files/buffers and C<$output> is a single
1a6a8453 328file/buffer the uncompressed input files/buffers will all be stored
329in C<$output> as a single uncompressed stream.
642e522c 330
331
332
333=head2 Optional Parameters
334
335Unless specified below, the optional parameters for C<anyinflate>,
336C<OPTS>, are the same as those used with the OO interface defined in the
337L</"Constructor Options"> section below.
338
339=over 5
340
341=item AutoClose =E<gt> 0|1
342
1a6a8453 343This option applies to any input or output data streams to
344C<anyinflate> that are filehandles.
642e522c 345
346If C<AutoClose> is specified, and the value is true, it will result in all
347input and/or output filehandles being closed once C<anyinflate> has
348completed.
349
350This parameter defaults to 0.
351
352
353
1a6a8453 354=item BinModeOut =E<gt> 0|1
355
356When writing to a file or filehandle, set C<binmode> before writing to the
357file.
358
359Defaults to 0.
360
361
362
363
364
642e522c 365=item -Append =E<gt> 0|1
366
367TODO
368
1a6a8453 369=item -MultiStream =E<gt> 0|1
370
371Creates a new stream after each file.
372
373Defaults to 1.
374
642e522c 375
376
377=back
378
379
380
381
382=head2 Examples
383
384To read the contents of the file C<file1.txt.Compressed> and write the
385compressed data to the file C<file1.txt>.
386
387 use strict ;
388 use warnings ;
389 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
390
391 my $input = "file1.txt.Compressed";
392 my $output = "file1.txt";
393 anyinflate $input => $output
394 or die "anyinflate failed: $AnyInflateError\n";
395
396
397To read from an existing Perl filehandle, C<$input>, and write the
398uncompressed data to a buffer, C<$buffer>.
399
400 use strict ;
401 use warnings ;
402 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
403 use IO::File ;
404
405 my $input = new IO::File "<file1.txt.Compressed"
406 or die "Cannot open 'file1.txt.Compressed': $!\n" ;
407 my $buffer ;
408 anyinflate $input => \$buffer
409 or die "anyinflate failed: $AnyInflateError\n";
410
411To uncompress all files in the directory "/my/home" that match "*.txt.Compressed" and store the compressed data in the same directory
412
413 use strict ;
414 use warnings ;
415 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
416
417 anyinflate '</my/home/*.txt.Compressed>' => '</my/home/#1.txt>'
418 or die "anyinflate failed: $AnyInflateError\n";
419
420and if you want to compress each file one at a time, this will do the trick
421
422 use strict ;
423 use warnings ;
424 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
425
426 for my $input ( glob "/my/home/*.txt.Compressed" )
427 {
428 my $output = $input;
429 $output =~ s/.Compressed// ;
430 anyinflate $input => $output
431 or die "Error compressing '$input': $AnyInflateError\n";
432 }
433
434=head1 OO Interface
435
436=head2 Constructor
437
438The format of the constructor for IO::Uncompress::AnyInflate is shown below
439
440
441 my $z = new IO::Uncompress::AnyInflate $input [OPTS]
442 or die "IO::Uncompress::AnyInflate failed: $AnyInflateError\n";
443
444Returns an C<IO::Uncompress::AnyInflate> object on success and undef on failure.
445The variable C<$AnyInflateError> will contain an error message on failure.
446
1a6a8453 447If you are running Perl 5.005 or better the object, C<$z>, returned from
448IO::Uncompress::AnyInflate can be used exactly like an L<IO::File|IO::File> filehandle.
449This means that all normal input file operations can be carried out with
450C<$z>. For example, to read a line from a compressed file/buffer you can
451use either of these forms
642e522c 452
453 $line = $z->getline();
454 $line = <$z>;
455
456The mandatory parameter C<$input> is used to determine the source of the
457compressed data. This parameter can take one of three forms.
458
459=over 5
460
461=item A filename
462
463If the C<$input> parameter is a scalar, it is assumed to be a filename. This
464file will be opened for reading and the compressed data will be read from it.
465
466=item A filehandle
467
468If the C<$input> parameter is a filehandle, the compressed data will be
469read from it.
470The string '-' can be used as an alias for standard input.
471
472
473=item A scalar reference
474
475If C<$input> is a scalar reference, the compressed data will be read from
476C<$$output>.
477
478=back
479
480=head2 Constructor Options
481
482
483The option names defined below are case insensitive and can be optionally
484prefixed by a '-'. So all of the following are valid
485
486 -AutoClose
487 -autoclose
488 AUTOCLOSE
489 autoclose
490
491OPTS is a combination of the following options:
492
493=over 5
494
495=item -AutoClose =E<gt> 0|1
496
497This option is only valid when the C<$input> parameter is a filehandle. If
498specified, and the value is true, it will result in the file being closed once
499either the C<close> method is called or the IO::Uncompress::AnyInflate object is
500destroyed.
501
502This parameter defaults to 0.
503
504=item -MultiStream =E<gt> 0|1
505
506
507
508Allows multiple concatenated compressed streams to be treated as a single
509compressed stream. Decompression will stop once either the end of the
510file/buffer is reached, an error is encountered (premature eof, corrupt
511compressed data) or the end of a stream is not immediately followed by the
512start of another stream.
513
514This parameter defaults to 0.
515
516
517
518=item -Prime =E<gt> $string
519
520This option will uncompress the contents of C<$string> before processing the
521input file/buffer.
522
523This option can be useful when the compressed data is embedded in another
524file/data structure and it is not possible to work out where the compressed
1a6a8453 525data begins without having to read the first few bytes. If this is the
526case, the uncompression can be I<primed> with these bytes using this
527option.
642e522c 528
529=item -Transparent =E<gt> 0|1
530
531If this option is set and the input file or buffer is not compressed data,
532the module will allow reading of it anyway.
533
534This option defaults to 1.
535
536=item -BlockSize =E<gt> $num
537
1a6a8453 538When reading the compressed input data, IO::Uncompress::AnyInflate will read it in
539blocks of C<$num> bytes.
642e522c 540
541This option defaults to 4096.
542
543=item -InputLength =E<gt> $size
544
1a6a8453 545When present this option will limit the number of compressed bytes read
546from the input file/buffer to C<$size>. This option can be used in the
547situation where there is useful data directly after the compressed data
548stream and you know beforehand the exact length of the compressed data
549stream.
642e522c 550
1a6a8453 551This option is mostly used when reading from a filehandle, in which case
552the file pointer will be left pointing to the first byte directly after the
642e522c 553compressed data stream.
554
555
556
557This option defaults to off.
558
559=item -Append =E<gt> 0|1
560
561This option controls what the C<read> method does with uncompressed data.
562
1a6a8453 563If set to 1, all uncompressed data will be appended to the output parameter
564of the C<read> method.
642e522c 565
1a6a8453 566If set to 0, the contents of the output parameter of the C<read> method
567will be overwritten by the uncompressed data.
642e522c 568
569Defaults to 0.
570
571=item -Strict =E<gt> 0|1
572
573
574
575This option controls whether the extra checks defined below are used when
1a6a8453 576carrying out the decompression. When Strict is on, the extra tests are
577carried out, when Strict is off they are not.
642e522c 578
579The default for this option is off.
580
581
582If the input is an RFC1950 data stream, the following will be checked:
583
584
585
586
587=over 5
588
589=item 1
590
591The ADLER32 checksum field must be present.
592
593=item 2
594
595The value of the ADLER32 field read must match the adler32 value of the
596uncompressed data actually contained in the file.
597
598=back
599
600
601
602If the input is a gzip (RFC1952) data stream, the following will be checked:
603
604
605
606
607=over 5
608
609=item 1
610
611If the FHCRC bit is set in the gzip FLG header byte, the CRC16 bytes in the
612header must match the crc16 value of the gzip header actually read.
613
614=item 2
615
616If the gzip header contains a name field (FNAME) it consists solely of ISO
6178859-1 characters.
618
619=item 3
620
1a6a8453 621If the gzip header contains a comment field (FCOMMENT) it consists solely
622of ISO 8859-1 characters plus line-feed.
642e522c 623
624=item 4
625
626If the gzip FEXTRA header field is present it must conform to the sub-field
627structure as defined in RFC1952.
628
629=item 5
630
631The CRC32 and ISIZE trailer fields must be present.
632
633=item 6
634
635The value of the CRC32 field read must match the crc32 value of the
636uncompressed data actually contained in the gzip file.
637
638=item 7
639
1a6a8453 640The value of the ISIZE fields read must match the length of the
641uncompressed data actually read from the file.
642e522c 642
643=back
644
645
646
647
648
649
650=item -ParseExtra =E<gt> 0|1
651
652If the gzip FEXTRA header field is present and this option is set, it will
653force the module to check that it conforms to the sub-field structure as
654defined in RFC1952.
655
656If the C<Strict> is on it will automatically enable this option.
657
658Defaults to 0.
659
660
661
662=back
663
664=head2 Examples
665
666TODO
667
668=head1 Methods
669
670=head2 read
671
672Usage is
673
674 $status = $z->read($buffer)
675
676Reads a block of compressed data (the size the the compressed block is
677determined by the C<Buffer> option in the constructor), uncompresses it and
1a6a8453 678writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
679set in the constructor, the uncompressed data will be appended to the
680C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
642e522c 681
1a6a8453 682Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
683or a negative number on error.
642e522c 684
685=head2 read
686
687Usage is
688
689 $status = $z->read($buffer, $length)
690 $status = $z->read($buffer, $length, $offset)
691
692 $status = read($z, $buffer, $length)
693 $status = read($z, $buffer, $length, $offset)
694
695Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
696
1a6a8453 697The main difference between this form of the C<read> method and the
698previous one, is that this one will attempt to return I<exactly> C<$length>
699bytes. The only circumstances that this function will not is if end-of-file
700or an IO error is encountered.
642e522c 701
1a6a8453 702Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
703or a negative number on error.
642e522c 704
705
706=head2 getline
707
708Usage is
709
710 $line = $z->getline()
711 $line = <$z>
712
713Reads a single line.
714
715This method fully supports the use of of the variable C<$/>
716(or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
717determine what constitutes an end of line. Both paragraph mode and file
718slurp mode are supported.
719
720
721=head2 getc
722
723Usage is
724
725 $char = $z->getc()
726
727Read a single character.
728
729=head2 ungetc
730
731Usage is
732
733 $char = $z->ungetc($string)
734
735
736=head2 inflateSync
737
738Usage is
739
740 $status = $z->inflateSync()
741
742TODO
743
744=head2 getHeaderInfo
745
746Usage is
747
1a6a8453 748 $hdr = $z->getHeaderInfo();
749 @hdrs = $z->getHeaderInfo();
642e522c 750
1a6a8453 751This method returns either a hash reference (in scalar context) or a list
752or hash references (in array context) that contains information about each
753of the header fields in the compressed data stream(s).
642e522c 754
755
756
757
758=head2 tell
759
760Usage is
761
762 $z->tell()
763 tell $z
764
765Returns the uncompressed file offset.
766
767=head2 eof
768
769Usage is
770
771 $z->eof();
772 eof($z);
773
774
775
776Returns true if the end of the compressed input stream has been reached.
777
778
779
780=head2 seek
781
782 $z->seek($position, $whence);
783 seek($z, $position, $whence);
784
785
786
787
788Provides a sub-set of the C<seek> functionality, with the restriction
789that it is only legal to seek forward in the input file/buffer.
790It is a fatal error to attempt to seek backward.
791
792
793
794The C<$whence> parameter takes one the usual values, namely SEEK_SET,
795SEEK_CUR or SEEK_END.
796
797Returns 1 on success, 0 on failure.
798
799=head2 binmode
800
801Usage is
802
803 $z->binmode
804 binmode $z ;
805
806This is a noop provided for completeness.
807
808=head2 fileno
809
810 $z->fileno()
811 fileno($z)
812
813If the C<$z> object is associated with a file, this method will return
814the underlying filehandle.
815
816If the C<$z> object is is associated with a buffer, this method will
817return undef.
818
819=head2 close
820
821 $z->close() ;
822 close $z ;
823
824
825
826Closes the output file/buffer.
827
828
829
830For most versions of Perl this method will be automatically invoked if
831the IO::Uncompress::AnyInflate object is destroyed (either explicitly or by the
832variable with the reference to the object going out of scope). The
833exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
834these cases, the C<close> method will be called automatically, but
835not until global destruction of all live objects when the program is
836terminating.
837
838Therefore, if you want your scripts to be able to run on all versions
839of Perl, you should call C<close> explicitly and not rely on automatic
840closing.
841
842Returns true on success, otherwise 0.
843
844If the C<AutoClose> option has been enabled when the IO::Uncompress::AnyInflate
845object was created, and the object is associated with a file, the
846underlying file will also be closed.
847
848
849
850
851=head1 Importing
852
853No symbolic constants are required by this IO::Uncompress::AnyInflate at present.
854
855=over 5
856
857=item :all
858
859Imports C<anyinflate> and C<$AnyInflateError>.
860Same as doing this
861
862 use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
863
864=back
865
866=head1 EXAMPLES
867
868
869
870
871=head1 SEE ALSO
872
873L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>
874
875L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
876
877L<File::GlobMapper|File::GlobMapper>, L<Archive::Tar|Archive::Zip>,
878L<IO::Zlib|IO::Zlib>
879
880For RFC 1950, 1951 and 1952 see
881F<http://www.faqs.org/rfcs/rfc1950.html>,
882F<http://www.faqs.org/rfcs/rfc1951.html> and
883F<http://www.faqs.org/rfcs/rfc1952.html>
884
885The primary site for the gzip program is F<http://www.gzip.org>.
886
887=head1 AUTHOR
888
889The I<IO::Uncompress::AnyInflate> module was written by Paul Marquess,
890F<pmqs@cpan.org>. The latest copy of the module can be
891found on CPAN in F<modules/by-module/Compress/Compress-Zlib-x.x.tar.gz>.
892
893The I<zlib> compression library was written by Jean-loup Gailly
894F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
895
896The primary site for the I<zlib> compression library is
897F<http://www.zlib.org>.
898
899=head1 MODIFICATION HISTORY
900
901See the Changes file.
902
903=head1 COPYRIGHT AND LICENSE
904
905
1a6a8453 906Copyright (c) 2005-2006 Paul Marquess. All rights reserved.
642e522c 907This program is free software; you can redistribute it and/or
908modify it under the same terms as Perl itself.
909
910
911