Update Compression modules to version 2.009
[p5sagit/p5-mst-13.2.git] / ext / IO_Compress_Zlib / pod / FAQ.pod
CommitLineData
d54256af 1
2=head1 NAME
3
4IO::Compress::Zlib::FAQ -- Frequently Asked Questions about IO::Compress::Zlib
5
6=head1 DESCRIPTION
7
8Common questions answered.
9
10=head2 Compatibility with Unix compress/uncompress.
11
12This module is not compatible with Unix C<compress>.
13
14If you have the C<uncompress> program available, you can use this to read
15compressed files
16
17 open F, "uncompress -c $filename |";
18 while (<F>)
19 {
20 ...
21
22Alternatively, if you have the C<gunzip> program available, you can use
23this to read compressed files
24
25 open F, "gunzip -c $filename |";
26 while (<F>)
27 {
28 ...
29
30and this to write compress files, if you have the C<compress> program
31available
32
33 open F, "| compress -c $filename ";
34 print F "data";
35 ...
36 close F ;
37
38=head2 Accessing .tar.Z files
39
40See previous FAQ item.
41
42If the C<Archive::Tar> module is installed and either the C<uncompress> or
43C<gunzip> programs are available, you can use one of these workarounds to
44read C<.tar.Z> files.
45
46Firstly with C<uncompress>
47
48 use strict;
49 use warnings;
50 use Archive::Tar;
51
52 open F, "uncompress -c $filename |";
53 my $tar = Archive::Tar->new(*F);
54 ...
55
56and this with C<gunzip>
57
58 use strict;
59 use warnings;
60 use Archive::Tar;
61
62 open F, "gunzip -c $filename |";
63 my $tar = Archive::Tar->new(*F);
64 ...
65
66Similarly, if the C<compress> program is available, you can use this to
67write a C<.tar.Z> file
68
69 use strict;
70 use warnings;
71 use Archive::Tar;
72 use IO::File;
73
74 my $fh = new IO::File "| compress -c >$filename";
75 my $tar = Archive::Tar->new();
76 ...
77 $tar->write($fh);
78 $fh->close ;
79
80=head2 Accessing Zip Files
81
82This module provides support for reading/writing zip files using the
83C<IO::Compress::Zip> and C<IO::Uncompress::Unzip> modules.
84
85The primary focus of the C<IO::Compress::Zip> and C<IO::Uncompress::Unzip>
86modules is to provide an C<IO::File> compatible streaming read/write
87interface to zip files/buffers. They are not fully flegged archivers. If
88you are looking for an archiver check out the C<Archive::Zip> module. You
89can find it on CPAN at
90
91 http://www.cpan.org/modules/by-module/Archive/Archive-Zip-*.tar.gz
92
93=head2 Compressed files and Net::FTP
94
95The C<Net::FTP> module provides two low-level methods called C<stor> and
96C<retr> that both return filehandles. These filehandles can used with the
97C<IO::Compress/Uncompress> modules to compress or uncompress files read
98from or written to an FTP Server on the fly, without having to create a
99temporary file.
100
101Firstly, here is code that uses C<retr> to uncompressed a file as it is
102read from the FTP Server.
103
104 use Net::FTP;
105 use IO::Uncompress::Gunzip qw(:all);
106
107 my $ftp = new Net::FTP ...
108
109 my $retr_fh = $ftp->retr($compressed_filename);
110 gunzip $retr_fh => $outFilename, AutoClose => 1
111 or die "Cannot uncompress '$compressed_file': $GunzipError\n";
112
113and this to compress a file as it is written to the FTP Server
114
115 use Net::FTP;
116 use IO::Compress::Gzip qw(:all);
117
118 my $stor_fh = $ftp->stor($filename);
119 gzip "filename" => $stor_fh, AutoClose => 1
120 or die "Cannot compress '$filename': $GzipError\n";
121
122=head2 How do I recompress using a different compression?
123
124This is easier that you might expect if you realise that all the
125C<IO::Compress::*> objects are derived from C<IO::File> and that all the
126C<IO::Uncompress::*> modules can read from an C<IO::File> filehandle.
127
128So, for example, say you have a file compressed with gzip that you want to
129recompress with bzip2. Here is all that is needed to carry out the
130recompression.
131
132 use IO::Uncompress::Gunzip ':all';
133 use IO::Compress::Bzip2 ':all';
134
135 my $gzipFile = "somefile.gz";
136 my $bzipFile = "somefile.bz2";
137
138 my $gunzip = new IO::Uncompress::Gunzip $gzipFile
139 or die "Cannot gunzip $gzipFile: $GunzipError\n" ;
140
141 bzip2 $gunzip => $bzipFile
142 or die "Cannot bzip2 to $bzipFile: $Bzip2Error\n" ;
143
144Note, there is a limitation of this technique. Some compression file
145formats store extra information along with the compressed data payload. For
146example, gzip can optionally store the original filename and Zip stores a
147lot of information about the original file. If the original compressed file
148contains any of this extra information, it will not be transferred to the
149new compressed file usign the technique above.
150
151=head2 Apache::GZip Revisited
152
153Below is a mod_perl Apache compression module, called C<Apache::GZip>,
154taken from
155F<http://perl.apache.org/docs/tutorials/tips/mod_perl_tricks/mod_perl_tricks.html#On_the_Fly_Compression>
156
157 package Apache::GZip;
158 #File: Apache::GZip.pm
159
160 use strict vars;
161 use Apache::Constants ':common';
162 use Compress::Zlib;
163 use IO::File;
164 use constant GZIP_MAGIC => 0x1f8b;
165 use constant OS_MAGIC => 0x03;
166
167 sub handler {
168 my $r = shift;
169 my ($fh,$gz);
170 my $file = $r->filename;
171 return DECLINED unless $fh=IO::File->new($file);
172 $r->header_out('Content-Encoding'=>'gzip');
173 $r->send_http_header;
174 return OK if $r->header_only;
175
176 tie *STDOUT,'Apache::GZip',$r;
177 print($_) while <$fh>;
178 untie *STDOUT;
179 return OK;
180 }
181
182 sub TIEHANDLE {
183 my($class,$r) = @_;
184 # initialize a deflation stream
185 my $d = deflateInit(-WindowBits=>-MAX_WBITS()) || return undef;
186
187 # gzip header -- don't ask how I found out
188 $r->print(pack("nccVcc",GZIP_MAGIC,Z_DEFLATED,0,time(),0,OS_MAGIC));
189
190 return bless { r => $r,
191 crc => crc32(undef),
192 d => $d,
193 l => 0
194 },$class;
195 }
196
197 sub PRINT {
198 my $self = shift;
199 foreach (@_) {
200 # deflate the data
201 my $data = $self->{d}->deflate($_);
202 $self->{r}->print($data);
203 # keep track of its length and crc
204 $self->{l} += length($_);
205 $self->{crc} = crc32($_,$self->{crc});
206 }
207 }
208
209 sub DESTROY {
210 my $self = shift;
211
212 # flush the output buffers
213 my $data = $self->{d}->flush;
214 $self->{r}->print($data);
215
216 # print the CRC and the total length (uncompressed)
217 $self->{r}->print(pack("LL",@{$self}{qw/crc l/}));
218 }
219
220 1;
221
222Here's the Apache configuration entry you'll need to make use of it. Once
223set it will result in everything in the /compressed directory will be
224compressed automagically.
225
226 <Location /compressed>
227 SetHandler perl-script
228 PerlHandler Apache::GZip
229 </Location>
230
231Although at first sight there seems to be quite a lot going on in
232C<Apache::GZip>, you could sum up what the code was doing as follows --
233read the contents of the file in C<< $r->filename >>, compress it and write
234the compressed data to standard output. That's all.
235
236This code has to jump through a few hoops to achieve this because
237
238=over
239
240=item 1.
241
242The gzip support in C<Compress::Zlib> version 1.x can only work with a real
243filesystem filehandle. The filehandles used by Apache modules are not
244associated with the filesystem.
245
246=item 2.
247
248That means all the gzip support has to be done by hand - in this case by
249creating a tied filehandle to deal with creating the gzip header and
250trailer.
251
252=back
253
254C<IO::Compress::Gzip> doesn't have that filehandle limitation (this was one
255of the reasons for writing it in the first place). So if
256C<IO::Compress::Gzip> is used instead of C<Compress::Zlib> the whole tied
257filehandle code can be removed. Here is the rewritten code.
258
259 package Apache::GZip;
260
261 use strict vars;
262 use Apache::Constants ':common';
263 use IO::Compress::Gzip;
264 use IO::File;
265
266 sub handler {
267 my $r = shift;
268 my ($fh,$gz);
269 my $file = $r->filename;
270 return DECLINED unless $fh=IO::File->new($file);
271 $r->header_out('Content-Encoding'=>'gzip');
272 $r->send_http_header;
273 return OK if $r->header_only;
274
275 my $gz = new IO::Compress::Gzip '-', Minimal => 1
276 or return DECLINED ;
277
278 print $gz $_ while <$fh>;
279
280 return OK;
281 }
282
283or even more succinctly, like this, using a one-shot gzip
284
285 package Apache::GZip;
286
287 use strict vars;
288 use Apache::Constants ':common';
289 use IO::Compress::Gzip qw(gzip);
290
291 sub handler {
292 my $r = shift;
293 $r->header_out('Content-Encoding'=>'gzip');
294 $r->send_http_header;
295 return OK if $r->header_only;
296
297 gzip $r->filename => '-', Minimal => 1
298 or return DECLINED ;
299
300 return OK;
301 }
302
303 1;
304
305The use of one-shot C<gzip> above just reads from C<< $r->filename >> and
306writes the compressed data to standard output.
307
308Note the use of the C<Minimal> option in the code above. When using gzip
309for Content-Encoding you should I<always> use this option. In the example
310above it will prevent the filename being included in the gzip header and
311make the size of the gzip data stream a slight bit smaller.
312
313=head2 Using C<InputLength> to uncompress data embedded in a larger file/buffer.
314
315A fairly common use-case is where compressed data is embedded in a larger
316file/buffer and you want to read both.
317
318As an example consider the structure of a zip file. This is a well-defined
319file format that mixes both compressed and uncompressed sections of data in
320a single file.
321
322For the purposes of this discussion you can think of a zip file as sequence
323of compressed data streams, each of which is prefixed by an uncompressed
324local header. The local header contains information about the compressed
325data stream, including the name of the compressed file and, in particular,
326the length of the compressed data stream.
327
328To illustrate how to use C<InputLength> here is a script that walks a zip
329file and prints out how many lines are in each compressed file (if you
330intend write code to walking through a zip file for real see
331L<IO::Uncompress::Unzip/"Walking through a zip file"> )
332
333 use strict;
334 use warnings;
335
336 use IO::File;
337 use IO::Uncompress::RawInflate qw(:all);
338
339 use constant ZIP_LOCAL_HDR_SIG => 0x04034b50;
340 use constant ZIP_LOCAL_HDR_LENGTH => 30;
341
342 my $file = $ARGV[0] ;
343
344 my $fh = new IO::File "<$file"
345 or die "Cannot open '$file': $!\n";
346
347 while (1)
348 {
349 my $sig;
350 my $buffer;
351
352 my $x ;
353 ($x = $fh->read($buffer, ZIP_LOCAL_HDR_LENGTH)) == ZIP_LOCAL_HDR_LENGTH
354 or die "Truncated file: $!\n";
355
356 my $signature = unpack ("V", substr($buffer, 0, 4));
357
358 last unless $signature == ZIP_LOCAL_HDR_SIG;
359
360 # Read Local Header
361 my $gpFlag = unpack ("v", substr($buffer, 6, 2));
362 my $compressedMethod = unpack ("v", substr($buffer, 8, 2));
363 my $compressedLength = unpack ("V", substr($buffer, 18, 4));
364 my $uncompressedLength = unpack ("V", substr($buffer, 22, 4));
365 my $filename_length = unpack ("v", substr($buffer, 26, 2));
366 my $extra_length = unpack ("v", substr($buffer, 28, 2));
367
368 my $filename ;
369 $fh->read($filename, $filename_length) == $filename_length
370 or die "Truncated file\n";
371
372 $fh->read($buffer, $extra_length) == $extra_length
373 or die "Truncated file\n";
374
375 if ($compressedMethod != 8 && $compressedMethod != 0)
376 {
377 warn "Skipping file '$filename' - not deflated $compressedMethod\n";
378 $fh->read($buffer, $compressedLength) == $compressedLength
379 or die "Truncated file\n";
380 next;
381 }
382
383 if ($compressedMethod == 0 && $gpFlag & 8 == 8)
384 {
385 die "Streamed Stored not supported for '$filename'\n";
386 }
387
388 next if $compressedLength == 0;
389
390 # Done reading the Local Header
391
392 my $inf = new IO::Uncompress::RawInflate $fh,
393 Transparent => 1,
394 InputLength => $compressedLength
395 or die "Cannot uncompress $file [$filename]: $RawInflateError\n" ;
396
397 my $line_count = 0;
398
399 while (<$inf>)
400 {
401 ++ $line_count;
402 }
403
404 print "$filename: $line_count\n";
405 }
406
407The majority of the code above is concerned with reading the zip local
408header data. The code that I want to focus on is at the bottom.
409
410 while (1) {
411
412 # read local zip header data
413 # get $filename
414 # get $compressedLength
415
416 my $inf = new IO::Uncompress::RawInflate $fh,
417 Transparent => 1,
418 InputLength => $compressedLength
419 or die "Cannot uncompress $file [$filename]: $RawInflateError\n" ;
420
421 my $line_count = 0;
422
423 while (<$inf>)
424 {
425 ++ $line_count;
426 }
427
428 print "$filename: $line_count\n";
429 }
430
431The call to C<IO::Uncompress::RawInflate> creates a new filehandle C<$inf>
432that can be used to read from the parent filehandle C<$fh>, uncompressing
433it as it goes. The use of the C<InputLength> option will guarantee that
434I<at most> C<$compressedLength> bytes of compressed data will be read from
435the C<$fh> filehandle (The only exception is for an error case like a
436truncated file or a corrupt data stream).
437
438This means that once RawInflate is finished C<$fh> will be left at the
439byte directly after the compressed data stream.
440
441Now consider what the code looks like without C<InputLength>
442
443 while (1) {
444
445 # read local zip header data
446 # get $filename
447 # get $compressedLength
448
449 # read all the compressed data into $data
450 read($fh, $data, $compressedLength);
451
452 my $inf = new IO::Uncompress::RawInflate \$data,
453 Transparent => 1,
454 or die "Cannot uncompress $file [$filename]: $RawInflateError\n" ;
455
456 my $line_count = 0;
457
458 while (<$inf>)
459 {
460 ++ $line_count;
461 }
462
463 print "$filename: $line_count\n";
464 }
465
466The difference here is the addition of the temporary variable C<$data>.
467This is used to store a copy of the compressed data while it is being
468uncompressed.
469
470If you know that C<$compressedLength> isn't that big then using temporary
471storage won't be a problem. But if C<$compressedLength> is very large or
472you are writing an application that other people will use, and so have no
473idea how big C<$compressedLength> will be, it could be an issue.
474
475Using C<InputLength> avoids the use of temporary storage and means the
476application can cope with large compressed data streams.
477
478One final point -- obviously C<InputLength> can only be used whenever you
479know the length of the compressed data beforehand, like here with a zip
480file.
481
482=head1 SEE ALSO
483
484L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
485
486L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
487
488L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
489L<Archive::Tar|Archive::Tar>,
490L<IO::Zlib|IO::Zlib>
491
492=head1 AUTHOR
493
494This module was written by Paul Marquess, F<pmqs@cpan.org>.
495
496=head1 MODIFICATION HISTORY
497
498See the Changes file.
499
500=head1 COPYRIGHT AND LICENSE
501
502Copyright (c) 2005-2008 Paul Marquess. All rights reserved.
503
504This program is free software; you can redistribute it and/or
505modify it under the same terms as Perl itself.
506