From: Ilya Zakharevich Date: Tue, 27 Sep 2005 02:07:35 +0000 (-0700) Subject: Re: [BUG 5.8.7] Another major bug in PerlIO layer X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=93c2c2ecd9924225ba4c26762e3e59cf95458982;p=p5sagit%2Fp5-mst-13.2.git Re: [BUG 5.8.7] Another major bug in PerlIO layer Message-ID: <20050927090734.GB3687@math.berkeley.edu> p4raw-id: //depot/perl@25618 --- diff --git a/MANIFEST b/MANIFEST index 755e4b3..c038ab4 100644 --- a/MANIFEST +++ b/MANIFEST @@ -2670,6 +2670,7 @@ thread.h Threading header t/io/argv.t See if ARGV stuff works t/io/binmode.t See if binmode() works t/io/crlf.t See if :crlf works +t/io/crlf_through.t See if pipe passes data intact with :crlf t/io/dup.t See if >& works right t/io/fflush.t See if auto-flush on fork/exec/system/qx works t/io/fs.t See if directory manipulations work @@ -2683,6 +2684,7 @@ t/io/pipe.t See if secure pipes work t/io/print.t See if print commands work t/io/read.t See if read works t/io/tell.t See if file seeking works +t/io/through.t See if pipe passes data intact t/io/utf8.t See if file seeking works t/japh/abigail.t Obscure tests t/lib/1_compile.t See if the various libraries and extensions compile diff --git a/perlio.c b/perlio.c index e36a730..86cc827 100644 --- a/perlio.c +++ b/perlio.c @@ -2066,6 +2066,8 @@ PerlIOBase_read(pTHX_ PerlIO *f, void *vbuf, Size_t count) return 0; } while (count > 0) { + get_cnt: + { SSize_t avail = PerlIO_get_cnt(f); SSize_t take = 0; if (avail > 0) @@ -2076,11 +2078,14 @@ PerlIOBase_read(pTHX_ PerlIO *f, void *vbuf, Size_t count) PerlIO_set_ptrcnt(f, ptr + take, (avail -= take)); count -= take; buf += take; + if (avail == 0) /* set_ptrcnt could have reset avail */ + goto get_cnt; } if (count > 0 && avail <= 0) { if (PerlIO_fill(f) != 0) break; } + } } return (buf - (STDCHAR *) vbuf); } @@ -3538,7 +3543,11 @@ PerlIOBuf_open(pTHX_ PerlIO_funcs *self, PerlIO_list_t *layers, /* * This "flush" is akin to sfio's sync in that it handles files in either - * read or write state + * read or write state. For write state, we put the postponed data through + * the next layers. For read state, we seek() the next layers to the + * offset given by current position in the buffer, and discard the buffer + * state (XXXX supposed to be for seek()able buffers only, but now it is done + * in any case?). Then the pass the stick further in chain. */ IV PerlIOBuf_flush(pTHX_ PerlIO *f) @@ -3597,6 +3606,10 @@ PerlIOBuf_flush(pTHX_ PerlIO *f) return code; } +/* This discards the content of the buffer after b->ptr, and rereads + * the buffer from the position off in the layer downstream; here off + * is at offset corresponding to b->ptr - b->buf. + */ IV PerlIOBuf_fill(pTHX_ PerlIO *f) { @@ -3607,7 +3620,7 @@ PerlIOBuf_fill(pTHX_ PerlIO *f) * Down-stream flush is defined not to loose read data so is harmless. * we would not normally be fill'ing if there was data left in anycase. */ - if (PerlIO_flush(f) != 0) + if (PerlIO_flush(f) != 0) /* XXXX Check that its seek() succeeded?! */ return -1; if (PerlIOBase(f)->flags & PERLIO_F_TTY) PerlIOBase_flush_linebuf(aTHX); @@ -4083,6 +4096,14 @@ PERLIO_FUNCS_DECL(PerlIO_pending) = { * crlf - translation On read translate CR,LF to "\n" we do this by * overriding ptr/cnt entries to hand back a line at a time and keeping a * record of which nl we "lied" about. On write translate "\n" to CR,LF + * + * c->nl points on the first byte of CR LF pair when it is temporarily + * replaced by LF, or to the last CR of the buffer. In the former case + * the caller thinks that the buffer ends at c->nl + 1, in the latter + * that it ends at c->nl; these two cases can be distinguished by + * *c->nl. c->nl is set during _getcnt() call, and unset during + * _unread() and _flush() calls. + * It only matters for read operations. */ typedef struct { @@ -4127,7 +4148,7 @@ SSize_t PerlIOCrlf_unread(pTHX_ PerlIO *f, const void *vbuf, Size_t count) { PerlIOCrlf *c = PerlIOSelf(f, PerlIOCrlf); - if (c->nl) { + if (c->nl) { /* XXXX Shouldn't it be done only if b->ptr > c->nl? */ *(c->nl) = 0xd; c->nl = NULL; } @@ -4157,8 +4178,10 @@ PerlIOCrlf_unread(pTHX_ PerlIO *f, const void *vbuf, Size_t count) count--; } else { - buf++; - break; + /* If b->ptr - 1 == b->buf, we are undoing reading 0xa */ + *--(b->ptr) = 0xa; /* Works even if 0xa == '\r' */ + unread++; + count--; } } else { @@ -4172,6 +4195,7 @@ PerlIOCrlf_unread(pTHX_ PerlIO *f, const void *vbuf, Size_t count) } } +/* XXXX This code assumes that buffer size >=2, but does not check it... */ SSize_t PerlIOCrlf_get_cnt(pTHX_ PerlIO *f) { diff --git a/t/io/crlf_through.t b/t/io/crlf_through.t new file mode 100644 index 0000000..3a5522a --- /dev/null +++ b/t/io/crlf_through.t @@ -0,0 +1,9 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +$main::use_crlf = 1; +do './io/through.t' or die "no kid script"; diff --git a/t/io/through.t b/t/io/through.t new file mode 100644 index 0000000..d664b08 --- /dev/null +++ b/t/io/through.t @@ -0,0 +1,139 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use strict; +require './test.pl'; + +my $Perl = which_perl(); + +my $data = <<'EOD'; +x + yy +z +EOD + +(my $data2 = $data) =~ s/\n/\n\n/g; + +my $t1 = { data => $data, write_c => [1,2,length $data], read_c => [1,2,3,length $data]}; +my $t2 = { data => $data2, write_c => [1,2,length $data2], read_c => [1,2,3,length $data2]}; + +$_->{write_c} = [1..length($_->{data})], + $_->{read_c} = [1..length($_->{data})+1, 0xe000] # Need <0xffff for REx + for (); # $t1, $t2; + +my $c; # len write tests, for each: one _all test, and 3 each len+2 +$c += @{$_->{write_c}} * (1 + 3*@{$_->{read_c}}) for $t1, $t2; +$c *= 3*2*2; # $how_w, file/pipe, 2 reports + +$c += 6; # Tests with sleep()... + +print "1..$c\n"; + +my $set_out = ''; +$set_out = "binmode STDOUT, ':crlf'" if $main::use_crlf = 1; + +sub testread ($$$$$$$) { + my ($fh, $str, $read_c, $how_r, $write_c, $how_w, $why) = @_; + my $buf = ''; + if ($how_r eq 'readline_all') { + $buf .= $_ while <$fh>; + } elsif ($how_r eq 'readline') { + $/ = \$read_c; + $buf .= $_ while <$fh>; + } elsif ($how_r eq 'read') { + my($in, $c); + $buf .= $in while $c = read($fh, $in, $read_c); + } elsif ($how_r eq 'sysread') { + my($in, $c); + $buf .= $in while $c = sysread($fh, $in, $read_c); + } else { + die "Unrecognized read: '$how_r'"; + } + close $fh or die "close: $!"; + # The only contamination allowed is with sysread/prints + $buf =~ s/\r\n/\n/g if $how_r eq 'sysread' and $how_w =~ /print/; + is(length $buf, length $str, "length with wrc=$write_c, rdc=$read_c, $how_w, $how_r, $why"); + is($buf, $str, "content with wrc=$write_c, rdc=$read_c, $how_w, $how_r, $why"); +} + +sub testpipe ($$$$$$) { + my ($str, $write_c, $read_c, $how_w, $how_r, $why) = @_; + (my $quoted = $str) =~ s/\n/\\n/g;; + my $fh; + if ($how_w eq 'print') { # AUTOFLUSH??? + # Should be shell-neutral: + open $fh, '-|', qq[$Perl -we "$set_out;print for grep length, split /(.{1,$write_c})/s, qq($quoted)"] or die "open: $!"; + } elsif ($how_w eq 'print/flush') { + # shell-neutral and miniperl-enabled autoflush? qq(\x24\x7c) eq '$|' + open $fh, '-|', qq[$Perl -we "$set_out;eval qq(\\x24\\x7c = 1) or die;print for grep length, split /(.{1,$write_c})/s, qq($quoted)"] or die "open: $!"; + } elsif ($how_w eq 'syswrite') { + ### How to protect \$_ + open $fh, '-|', qq[$Perl -we "$set_out;eval qq(sub w {syswrite STDOUT, \\x24_} 1) or die; w() for grep length, split /(.{1,$write_c})/s, qq($quoted)"] or die "open: $!"; + } else { + die "Unrecognized write: '$how_w'"; + } + binmode $fh, ':crlf' if $main::use_crlf = 1; + testread($fh, $str, $read_c, $how_r, $write_c, $how_w, "pipe$why"); +} + +sub testfile ($$$$$$) { + my ($str, $write_c, $read_c, $how_w, $how_r, $why) = @_; + my @data = grep length, split /(.{1,$write_c})/s, $str; + + open my $fh, '>', 'io_io.tmp' or die; + select $fh; + binmode $fh, ':crlf' if $main::use_crlf = 1; + if ($how_w eq 'print') { # AUTOFLUSH??? + $| = 0; + print $fh $_ for @data; + } elsif ($how_w eq 'print/flush') { + $| = 1; + print $fh $_ for @data; + } elsif ($how_w eq 'syswrite') { + syswrite $fh, $_ for @data; + } else { + die "Unrecognized write: '$how_w'"; + } + close $fh or die "close: $!"; + open $fh, '<', 'io_io.tmp' or die; + binmode $fh, ':crlf' if $main::use_crlf = 1; + testread($fh, $str, $read_c, $how_r, $write_c, $how_w, "file$why"); +} + +# shell-neutral and miniperl-enabled autoflush? qq(\x24\x7c) eq '$|' +open my $fh, '-|', qq[$Perl -we "eval qq(\\x24\\x7c = 1) or die; binmode STDOUT; sleep 1, print for split //, qq(a\nb\n\nc\n\n\n)"] or die "open: $!"; +ok(1, 'open pipe'); +binmode $fh, q(:crlf); +ok(1, 'binmode'); +my (@c, $c); +push @c, ord $c while $c = getc $fh; +ok(1, 'got chars'); +is(scalar @c, 9, 'got 9 chars'); +is("@c", '97 10 98 10 10 99 10 10 10', 'got expected chars'); +ok(close($fh), 'close'); + +for my $s (1..2) { + my $t = ($t1, $t2)[$s-1]; + my $str = $t->{data}; + my $r = $t->{read_c}; + my $w = $t->{write_c}; + for my $read_c (@$r) { + for my $write_c (@$w) { + for my $how_r (qw(readline_all readline read sysread)) { + next if $how_r eq 'readline_all' and $read_c != 1; + for my $how_w (qw(print print/flush syswrite)) { + testfile($str, $write_c, $read_c, $how_w, $how_r, $s); + testpipe($str, $write_c, $read_c, $how_w, $how_r, $s); + } + } + } + } +} + +unlink 'io_io.tmp'; + +1;