Commit | Line | Data |
8d0ec8cf |
1 | # IO::Zlib.pm |
2 | # |
3 | # Copyright (c) 1998-2004 Tom Hughes <tom@compton.nu>. |
4 | # All rights reserved. This program is free software; you can redistribute |
5 | # it and/or modify it under the same terms as Perl itself. |
6 | |
7 | package IO::Zlib; |
8 | |
6d22e870 |
9 | $VERSION = "1.07"; |
8d0ec8cf |
10 | |
11 | =head1 NAME |
12 | |
13 | IO::Zlib - IO:: style interface to L<Compress::Zlib> |
14 | |
15 | =head1 SYNOPSIS |
16 | |
17 | With any version of Perl 5 you can use the basic OO interface: |
18 | |
19 | use IO::Zlib; |
20 | |
21 | $fh = new IO::Zlib; |
22 | if ($fh->open("file.gz", "rb")) { |
23 | print <$fh>; |
24 | $fh->close; |
25 | } |
26 | |
27 | $fh = IO::Zlib->new("file.gz", "wb9"); |
28 | if (defined $fh) { |
29 | print $fh "bar\n"; |
30 | $fh->close; |
31 | } |
32 | |
33 | $fh = IO::Zlib->new("file.gz", "rb"); |
34 | if (defined $fh) { |
35 | print <$fh>; |
36 | undef $fh; # automatically closes the file |
37 | } |
38 | |
39 | With Perl 5.004 you can also use the TIEHANDLE interface to access |
40 | compressed files just like ordinary files: |
41 | |
42 | use IO::Zlib; |
43 | |
44 | tie *FILE, 'IO::Zlib', "file.gz", "wb"; |
45 | print FILE "line 1\nline2\n"; |
46 | |
47 | tie *FILE, 'IO::Zlib', "file.gz", "rb"; |
48 | while (<FILE>) { print "LINE: ", $_ }; |
49 | |
50 | =head1 DESCRIPTION |
51 | |
52 | C<IO::Zlib> provides an IO:: style interface to L<Compress::Zlib> and |
53 | hence to gzip/zlib compressed files. It provides many of the same methods |
54 | as the L<IO::Handle> interface. |
55 | |
56 | Starting from IO::Zlib version 1.02, IO::Zlib can also use an |
57 | external F<gzip> command. The default behaviour is to try to use |
58 | an external F<gzip> if no C<Compress::Zlib> can be loaded, unless |
59 | explicitly disabled by |
60 | |
61 | use IO::Zlib qw(:gzip_external 0); |
62 | |
63 | If explicitly enabled by |
64 | |
65 | use IO::Zlib qw(:gzip_external 1); |
66 | |
67 | then the external F<gzip> is used B<instead> of C<Compress::Zlib>. |
68 | |
69 | =head1 CONSTRUCTOR |
70 | |
71 | =over 4 |
72 | |
73 | =item new ( [ARGS] ) |
74 | |
75 | Creates an C<IO::Zlib> object. If it receives any parameters, they are |
76 | passed to the method C<open>; if the open fails, the object is destroyed. |
77 | Otherwise, it is returned to the caller. |
78 | |
79 | =back |
80 | |
81 | =head1 OBJECT METHODS |
82 | |
83 | =over 4 |
84 | |
85 | =item open ( FILENAME, MODE ) |
86 | |
87 | C<open> takes two arguments. The first is the name of the file to open |
88 | and the second is the open mode. The mode can be anything acceptable to |
89 | L<Compress::Zlib> and by extension anything acceptable to I<zlib> (that |
90 | basically means POSIX fopen() style mode strings plus an optional number |
91 | to indicate the compression level). |
92 | |
93 | =item opened |
94 | |
95 | Returns true if the object currently refers to a opened file. |
96 | |
97 | =item close |
98 | |
99 | Close the file associated with the object and disassociate |
100 | the file from the handle. |
101 | Done automatically on destroy. |
102 | |
103 | =item getc |
104 | |
105 | Return the next character from the file, or undef if none remain. |
106 | |
107 | =item getline |
108 | |
109 | Return the next line from the file, or undef on end of string. |
110 | Can safely be called in an array context. |
111 | Currently ignores $/ ($INPUT_RECORD_SEPARATOR or $RS when L<English> |
112 | is in use) and treats lines as delimited by "\n". |
113 | |
114 | =item getlines |
115 | |
116 | Get all remaining lines from the file. |
117 | It will croak() if accidentally called in a scalar context. |
118 | |
119 | =item print ( ARGS... ) |
120 | |
121 | Print ARGS to the file. |
122 | |
123 | =item read ( BUF, NBYTES, [OFFSET] ) |
124 | |
125 | Read some bytes from the file. |
126 | Returns the number of bytes actually read, 0 on end-of-file, undef on error. |
127 | |
128 | =item eof |
129 | |
130 | Returns true if the handle is currently positioned at end of file? |
131 | |
132 | =item seek ( OFFSET, WHENCE ) |
133 | |
134 | Seek to a given position in the stream. |
135 | Not yet supported. |
136 | |
137 | =item tell |
138 | |
139 | Return the current position in the stream, as a numeric offset. |
140 | Not yet supported. |
141 | |
142 | =item setpos ( POS ) |
143 | |
144 | Set the current position, using the opaque value returned by C<getpos()>. |
145 | Not yet supported. |
146 | |
147 | =item getpos ( POS ) |
148 | |
149 | Return the current position in the string, as an opaque object. |
150 | Not yet supported. |
151 | |
152 | =back |
153 | |
154 | =head1 USING THE EXTERNAL GZIP |
155 | |
156 | If the external F<gzip> is used, the following C<open>s are used: |
157 | |
158 | open(FH, "gzip -dc $filename |") # for read opens |
159 | open(FH, " | gzip > $filename") # for write opens |
160 | |
161 | You can modify the 'commands' for example to hardwire |
162 | an absolute path by e.g. |
163 | |
164 | use IO::Zlib ':gzip_read_open' => '/some/where/gunzip -c %s |'; |
165 | use IO::Zlib ':gzip_write_open' => '| /some/where/gzip.exe > %s'; |
166 | |
167 | The C<%s> is expanded to be the filename (C<sprintf> is used, so be |
168 | careful to escape any other C<%> signs). The 'commands' are checked |
169 | for sanity - they must contain the C<%s>, and the read open must end |
170 | with the pipe sign, and the write open must begin with the pipe sign. |
171 | |
172 | =head1 CLASS METHODS |
173 | |
174 | =over 4 |
175 | |
176 | =item has_Compress_Zlib |
177 | |
178 | Returns true if C<Compress::Zlib> is available. Note that this does |
179 | not mean that C<Compress::Zlib> is being used: see L</gzip_external> |
180 | and L<gzip_used>. |
181 | |
182 | =item gzip_external |
183 | |
184 | Undef if an external F<gzip> B<can> be used if C<Compress::Zlib> is |
185 | not available (see L</has_Compress_Zlib>), true if an external F<gzip> |
186 | is explicitly used, false if an external F<gzip> must not be used. |
187 | See L</gzip_used>. |
188 | |
189 | =item gzip_used |
190 | |
191 | True if an external F<gzip> is being used, false if not. |
192 | |
193 | =item gzip_read_open |
194 | |
195 | Return the 'command' being used for opening a file for reading using an |
196 | external F<gzip>. |
197 | |
198 | =item gzip_write_open |
199 | |
200 | Return the 'command' being used for opening a file for writing using an |
201 | external F<gzip>. |
202 | |
203 | =back |
204 | |
205 | =head1 DIAGNOSTICS |
206 | |
207 | =over 4 |
208 | |
209 | =item IO::Zlib::getlines: must be called in list context |
210 | |
211 | If you want read lines, you must read in list context. |
212 | |
213 | =item IO::Zlib::gzopen_external: mode '...' is illegal |
214 | |
215 | Use only modes 'rb' or 'wb' or /wb[1-9]/. |
216 | |
217 | =item IO::Zlib::import: '...' is illegal |
218 | |
219 | The known import symbols are the C<:gzip_external>, C<:gzip_read_open>, |
220 | and C<:gzip_write_open>. Anything else is not recognized. |
221 | |
222 | =item IO::Zlib::import: ':gzip_external' requires an argument |
223 | |
224 | The C<:gzip_external> requires one boolean argument. |
225 | |
226 | =item IO::Zlib::import: 'gzip_read_open' requires an argument |
227 | |
228 | The C<:gzip_external> requires one string argument. |
229 | |
230 | =item IO::Zlib::import: 'gzip_read' '...' is illegal |
231 | |
232 | The C<:gzip_read_open> argument must end with the pipe sign (|) |
233 | and have the C<%s> for the filename. See L</"USING THE EXTERNAL GZIP">. |
234 | |
235 | =item IO::Zlib::import: 'gzip_write_open' requires an argument |
236 | |
237 | The C<:gzip_external> requires one string argument. |
238 | |
239 | =item IO::Zlib::import: 'gzip_write_open' '...' is illegal |
240 | |
241 | The C<:gzip_write_open> argument must begin with the pipe sign (|) |
242 | and have the C<%s> for the filename. An output redirect (>) is also |
243 | often a good idea, depending on your operating system shell syntax. |
244 | See L</"USING THE EXTERNAL GZIP">. |
245 | |
246 | =item IO::Zlib::import: no Compress::Zlib and no external gzip |
247 | |
248 | Given that we failed to load C<Compress::Zlib> and that the use of |
249 | an external F<gzip> was disabled, IO::Zlib has not much chance of working. |
250 | |
251 | =item IO::Zlib::open: needs a filename |
252 | |
253 | No filename, no open. |
254 | |
255 | =item IO::Zlib::READ: NBYTES must be specified |
256 | |
257 | We must know how much to read. |
258 | |
8d0ec8cf |
259 | =item IO::Zlib::WRITE: too long LENGTH |
260 | |
261 | The LENGTH must be less than or equal to the buffer size. |
262 | |
263 | =item IO::Zlib::WRITE: OFFSET is not supported |
264 | |
265 | Offsets of gzipped streams are not supported. |
266 | |
267 | =back |
268 | |
269 | =head1 SEE ALSO |
270 | |
271 | L<perlfunc>, |
272 | L<perlop/"I/O Operators">, |
273 | L<IO::Handle>, |
274 | L<Compress::Zlib> |
275 | |
276 | =head1 HISTORY |
277 | |
278 | Created by Tom Hughes E<lt>F<tom@compton.nu>E<gt>. |
279 | |
280 | Support for external gzip added by Jarkko Hietaniemi E<lt>F<jhi@iki.fi>E<gt>. |
281 | |
282 | =head1 COPYRIGHT |
283 | |
284 | Copyright (c) 1998-2004 Tom Hughes E<lt>F<tom@compton.nu>E<gt>. |
285 | All rights reserved. This program is free software; you can redistribute |
286 | it and/or modify it under the same terms as Perl itself. |
287 | |
288 | =cut |
289 | |
290 | require 5.004; |
291 | |
292 | use strict; |
293 | use vars qw($VERSION $AUTOLOAD @ISA); |
294 | |
295 | use Carp; |
296 | use Fcntl qw(SEEK_SET); |
297 | |
298 | my $has_Compress_Zlib; |
299 | my $aliased; |
300 | |
301 | sub has_Compress_Zlib { |
302 | $has_Compress_Zlib; |
303 | } |
304 | |
305 | BEGIN { |
306 | eval { require Compress::Zlib }; |
307 | $has_Compress_Zlib = $@ ? 0 : 1; |
308 | } |
309 | |
310 | use Symbol; |
311 | use Tie::Handle; |
312 | |
313 | # These might use some $^O logic. |
314 | my $gzip_read_open = "gzip -dc %s |"; |
315 | my $gzip_write_open = "| gzip > %s"; |
316 | |
317 | my $gzip_external; |
318 | my $gzip_used; |
319 | |
320 | sub gzip_read_open { |
321 | $gzip_read_open; |
322 | } |
323 | |
324 | sub gzip_write_open { |
325 | $gzip_write_open; |
326 | } |
327 | |
328 | sub gzip_external { |
329 | $gzip_external; |
330 | } |
331 | |
332 | sub gzip_used { |
333 | $gzip_used; |
334 | } |
335 | |
336 | sub can_gunzip { |
337 | $has_Compress_Zlib || $gzip_external; |
338 | } |
339 | |
340 | sub _import { |
341 | my $import = shift; |
342 | while (@_) { |
343 | if ($_[0] eq ':gzip_external') { |
344 | shift; |
345 | if (@_) { |
346 | $gzip_external = shift; |
347 | } else { |
348 | croak "$import: ':gzip_external' requires an argument"; |
349 | } |
350 | } |
351 | elsif ($_[0] eq ':gzip_read_open') { |
352 | shift; |
353 | if (@_) { |
354 | $gzip_read_open = shift; |
355 | croak "$import: ':gzip_read_open' '$gzip_read_open' is illegal" |
356 | unless $gzip_read_open =~ /^.+%s.+\|\s*$/; |
357 | } else { |
358 | croak "$import: ':gzip_read_open' requires an argument"; |
359 | } |
360 | } |
361 | elsif ($_[0] eq ':gzip_write_open') { |
362 | shift; |
363 | if (@_) { |
364 | $gzip_write_open = shift; |
365 | croak "$import: ':gzip_write_open' '$gzip_read_open' is illegal" |
366 | unless $gzip_write_open =~ /^\s*\|.+%s.*$/; |
367 | } else { |
368 | croak "$import: ':gzip_write_open' requires an argument"; |
369 | } |
370 | } |
371 | else { |
372 | last; |
373 | } |
374 | } |
375 | return @_; |
376 | } |
377 | |
378 | sub _alias { |
379 | my $import = shift; |
380 | if ((!$has_Compress_Zlib && !defined $gzip_external) || $gzip_external) { |
381 | # The undef *gzopen is really needed only during |
382 | # testing where we eval several 'use IO::Zlib's. |
383 | undef *gzopen; |
384 | *gzopen = \&gzopen_external; |
385 | *IO::Handle::gzread = \&gzread_external; |
386 | *IO::Handle::gzwrite = \&gzwrite_external; |
387 | *IO::Handle::gzreadline = \&gzreadline_external; |
82d8f834 |
388 | *IO::Handle::gzeof = \&gzeof_external; |
8d0ec8cf |
389 | *IO::Handle::gzclose = \&gzclose_external; |
390 | $gzip_used = 1; |
391 | } else { |
392 | croak "$import: no Compress::Zlib and no external gzip" |
393 | unless $has_Compress_Zlib; |
394 | *gzopen = \&Compress::Zlib::gzopen; |
395 | *gzread = \&Compress::Zlib::gzread; |
396 | *gzwrite = \&Compress::Zlib::gzwrite; |
397 | *gzreadline = \&Compress::Zlib::gzreadline; |
82d8f834 |
398 | *gzeof = \&Compress::Zlib::gzeof; |
8d0ec8cf |
399 | } |
400 | $aliased = 1; |
401 | } |
402 | |
403 | sub import { |
404 | shift; |
405 | my $import = "IO::Zlib::import"; |
406 | if (@_) { |
407 | if (_import($import, @_)) { |
408 | croak "$import: '@_' is illegal"; |
409 | } |
410 | } |
411 | _alias($import); |
412 | } |
413 | |
414 | @ISA = qw(Tie::Handle); |
415 | |
416 | sub TIEHANDLE |
417 | { |
418 | my $class = shift; |
419 | my @args = @_; |
420 | |
421 | my $self = bless {}, $class; |
422 | |
423 | return @args ? $self->OPEN(@args) : $self; |
424 | } |
425 | |
426 | sub DESTROY |
427 | { |
428 | } |
429 | |
430 | sub OPEN |
431 | { |
432 | my $self = shift; |
433 | my $filename = shift; |
434 | my $mode = shift; |
435 | |
436 | croak "IO::Zlib::open: needs a filename" unless defined($filename); |
437 | |
438 | $self->{'file'} = gzopen($filename,$mode); |
8d0ec8cf |
439 | |
440 | return defined($self->{'file'}) ? $self : undef; |
441 | } |
442 | |
443 | sub CLOSE |
444 | { |
445 | my $self = shift; |
446 | |
447 | return undef unless defined($self->{'file'}); |
448 | |
449 | my $status = $self->{'file'}->gzclose(); |
450 | |
451 | delete $self->{'file'}; |
8d0ec8cf |
452 | |
453 | return ($status == 0) ? 1 : undef; |
454 | } |
455 | |
456 | sub READ |
457 | { |
458 | my $self = shift; |
459 | my $bufref = \$_[0]; |
460 | my $nbytes = $_[1]; |
82d8f834 |
461 | my $offset = $_[2] || 0; |
8d0ec8cf |
462 | |
463 | croak "IO::Zlib::READ: NBYTES must be specified" unless defined($nbytes); |
8d0ec8cf |
464 | |
82d8f834 |
465 | $$bufref = "" unless defined($$bufref); |
8d0ec8cf |
466 | |
82d8f834 |
467 | my $bytesread = $self->{'file'}->gzread(substr($$bufref,$offset),$nbytes); |
8d0ec8cf |
468 | |
469 | return undef if $bytesread < 0; |
470 | |
8d0ec8cf |
471 | return $bytesread; |
472 | } |
473 | |
474 | sub READLINE |
475 | { |
476 | my $self = shift; |
477 | |
478 | my $line; |
479 | |
480 | return () if $self->{'file'}->gzreadline($line) <= 0; |
481 | |
482 | return $line unless wantarray; |
483 | |
484 | my @lines = $line; |
485 | |
486 | while ($self->{'file'}->gzreadline($line) > 0) |
487 | { |
488 | push @lines, $line; |
489 | } |
490 | |
491 | return @lines; |
492 | } |
493 | |
494 | sub WRITE |
495 | { |
496 | my $self = shift; |
497 | my $buf = shift; |
498 | my $length = shift; |
499 | my $offset = shift; |
500 | |
82d8f834 |
501 | croak "IO::Zlib::WRITE: too long LENGTH" unless $offset + $length <= length($buf); |
8d0ec8cf |
502 | |
82d8f834 |
503 | return $self->{'file'}->gzwrite(substr($buf,$offset,$length)); |
8d0ec8cf |
504 | } |
505 | |
506 | sub EOF |
507 | { |
508 | my $self = shift; |
509 | |
82d8f834 |
510 | return $self->{'file'}->gzeof(); |
511 | } |
512 | |
513 | sub FILENO |
514 | { |
515 | return undef; |
8d0ec8cf |
516 | } |
517 | |
518 | sub new |
519 | { |
520 | my $class = shift; |
521 | my @args = @_; |
522 | |
523 | _alias("new", @_) unless $aliased; # Some call new IO::Zlib directly... |
524 | |
525 | my $self = gensym(); |
526 | |
527 | tie *{$self}, $class, @args; |
528 | |
529 | return tied(${$self}) ? bless $self, $class : undef; |
530 | } |
531 | |
532 | sub getline |
533 | { |
534 | my $self = shift; |
535 | |
536 | return scalar tied(*{$self})->READLINE(); |
537 | } |
538 | |
539 | sub getlines |
540 | { |
541 | my $self = shift; |
542 | |
543 | croak "IO::Zlib::getlines: must be called in list context" |
544 | unless wantarray; |
545 | |
546 | return tied(*{$self})->READLINE(); |
547 | } |
548 | |
549 | sub opened |
550 | { |
551 | my $self = shift; |
552 | |
553 | return defined tied(*{$self})->{'file'}; |
554 | } |
555 | |
556 | sub AUTOLOAD |
557 | { |
558 | my $self = shift; |
559 | |
560 | $AUTOLOAD =~ s/.*:://; |
561 | $AUTOLOAD =~ tr/a-z/A-Z/; |
562 | |
563 | return tied(*{$self})->$AUTOLOAD(@_); |
564 | } |
565 | |
566 | sub gzopen_external { |
567 | my ($filename, $mode) = @_; |
568 | require IO::Handle; |
569 | my $fh = IO::Handle->new(); |
570 | if ($mode =~ /r/) { |
571 | # Because someone will try to read ungzipped files |
572 | # with this we peek and verify the signature. Yes, |
573 | # this means that we open the file twice (if it is |
574 | # gzipped). |
575 | # Plenty of race conditions exist in this code, but |
576 | # the alternative would be to capture the stderr of |
577 | # gzip and parse it, which would be a portability nightmare. |
578 | if (-e $filename && open($fh, $filename)) { |
579 | binmode $fh; |
580 | my $sig; |
581 | my $rdb = read($fh, $sig, 2); |
582 | if ($rdb == 2 && $sig eq "\x1F\x8B") { |
583 | my $ropen = sprintf $gzip_read_open, $filename; |
584 | if (open($fh, $ropen)) { |
585 | binmode $fh; |
586 | return $fh; |
587 | } else { |
588 | return undef; |
589 | } |
590 | } |
591 | seek($fh, 0, SEEK_SET) or |
592 | die "IO::Zlib: open('$filename', 'r'): seek: $!"; |
593 | return $fh; |
594 | } else { |
595 | return undef; |
596 | } |
597 | } elsif ($mode =~ /w/) { |
598 | my $level = ''; |
599 | $level = "-$1" if $mode =~ /([1-9])/; |
600 | # To maximize portability we would need to open |
601 | # two filehandles here, one for "| gzip $level" |
602 | # and another for "> $filename", and then when |
603 | # writing copy bytes from the first to the second. |
604 | # We are using IO::Handle objects for now, however, |
605 | # and they can only contain one stream at a time. |
606 | my $wopen = sprintf $gzip_write_open, $filename; |
607 | if (open($fh, $wopen)) { |
608 | $fh->autoflush(1); |
609 | binmode $fh; |
610 | return $fh; |
611 | } else { |
612 | return undef; |
613 | } |
614 | } else { |
615 | croak "IO::Zlib::gzopen_external: mode '$mode' is illegal"; |
616 | } |
617 | return undef; |
618 | } |
619 | |
620 | sub gzread_external { |
621 | # Use read() instead of syswrite() because people may |
622 | # mix reads and readlines, and we don't want to mess |
623 | # the stdio buffering. See also gzreadline_external() |
624 | # and gzwrite_external(). |
625 | my $nread = read($_[0], $_[1], @_ == 3 ? $_[2] : 4096); |
626 | defined $nread ? $nread : -1; |
627 | } |
628 | |
629 | sub gzwrite_external { |
630 | # Using syswrite() is okay (cf. gzread_external()) |
631 | # since the bytes leave this process and buffering |
632 | # is therefore not an issue. |
633 | my $nwrote = syswrite($_[0], $_[1]); |
634 | defined $nwrote ? $nwrote : -1; |
635 | } |
636 | |
637 | sub gzreadline_external { |
638 | # See the comment in gzread_external(). |
639 | $_[1] = readline($_[0]); |
640 | return defined $_[1] ? length($_[1]) : -1; |
641 | } |
642 | |
82d8f834 |
643 | sub gzeof_external { |
644 | return eof($_[0]); |
645 | } |
646 | |
8d0ec8cf |
647 | sub gzclose_external { |
648 | close($_[0]); |
649 | # I am not entirely certain why this is needed but it seems |
650 | # the above close() always fails (as if the stream would have |
651 | # been already closed - something to do with using external |
652 | # processes via pipes?) |
653 | return 0; |
654 | } |
655 | |
656 | 1; |