From: Nicholas Clark Date: Wed, 29 Dec 2004 19:00:12 +0000 (+0000) Subject: read (and presuambly sysread) would expose the UTF8 internals when X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=1dd30107ebac37cf465e5225a00d367454a7cb84;p=p5sagit%2Fp5-mst-13.2.git read (and presuambly sysread) would expose the UTF8 internals when reading from a byte orientated file handle into a UTF8 scalar. p4raw-id: //depot/perl@23703 --- diff --git a/pp_sys.c b/pp_sys.c index 3071f1b..edb69c2 100644 --- a/pp_sys.c +++ b/pp_sys.c @@ -1555,6 +1555,8 @@ PP(pp_sysread) STRLEN blen; MAGIC *mg; int fp_utf8; + int buffer_utf8; + SV *read_target; Size_t got = 0; Size_t wanted; bool charstart = FALSE; @@ -1605,6 +1607,7 @@ PP(pp_sysread) } else { buffer = SvPV_force(bufsv, blen); + buffer_utf8 = !IN_BYTES && SvUTF8(bufsv); } if (length < 0) DIE(aTHX_ "Negative length"); @@ -1672,11 +1675,30 @@ PP(pp_sysread) } more_bytes: bufsize = SvCUR(bufsv); + /* Allocating length + offset + 1 isn't perfect in the case of reading + bytes from a byte file handle into a UTF8 buffer, but it won't harm us + unduly. + (should be 2 * length + offset + 1, or possibly something longer if + PL_encoding is true) */ buffer = SvGROW(bufsv, (STRLEN)(length+offset+1)); if (offset > bufsize) { /* Zero any newly allocated space */ Zero(buffer+bufsize, offset-bufsize, char); } buffer = buffer + offset; + if (!buffer_utf8) { + read_target = bufsv; + } else { + /* Best to read the bytes into a new SV, upgrade that to UTF8, then + concatenate it to the current buffer. */ + + /* Truncate the existing buffer to the start of where we will be + reading to: */ + SvCUR_set(bufsv, offset); + + read_target = sv_newmortal(); + SvUPGRADE(read_target, SVt_PV); + buffer = SvGROW(read_target, length + 1); + } if (PL_op->op_type == OP_SYSREAD) { #ifdef PERL_SOCK_SYSREAD_IS_RECV @@ -1716,9 +1738,9 @@ PP(pp_sysread) report_evil_fh(gv, io, OP_phoney_OUTPUT_ONLY); goto say_undef; } - SvCUR_set(bufsv, count+(buffer - SvPVX(bufsv))); - *SvEND(bufsv) = '\0'; - (void)SvPOK_only(bufsv); + SvCUR_set(read_target, count+(buffer - SvPVX(read_target))); + *SvEND(read_target) = '\0'; + (void)SvPOK_only(read_target); if (fp_utf8 && !IN_BYTES) { /* Look at utf8 we got back and count the characters */ char *bend = buffer + count; @@ -1754,6 +1776,11 @@ PP(pp_sysread) count = got; SvUTF8_on(bufsv); } + else if (buffer_utf8) { + /* Let svcatsv upgrade the bytes we read in to utf8. + The buffer is a mortal so will be freed soon. */ + sv_catsv_nomg(bufsv, read_target); + } SvSETMAGIC(bufsv); /* This should not be marked tainted if the fp is marked clean */ if (!(IoFLAGS(io) & IOf_UNTAINT)) diff --git a/t/op/read.t b/t/op/read.t index f343c0d..6aab395 100755 --- a/t/op/read.t +++ b/t/op/read.t @@ -9,7 +9,7 @@ BEGIN { } use strict; -plan tests => 516; +plan tests => 1732; open(FOO,'op/read.t') || open(FOO,'t/op/read.t') || open(FOO,':op:read.t') || die "Can't open op.read"; seek(FOO,4,0) or die "Seek failed: $!"; @@ -31,7 +31,7 @@ my $tmpfile = 'Op_read.tmp'; my (@values, @buffers) = ('', ''); -foreach (65, 161) { # , 253, 9786) { +foreach (65, 161, 253) { # 9786) { push @values, join "", map {chr $_} $_ .. $_ + 4; push @buffers, join "", map {chr $_} $_ + 5 .. $_ + 20; } @@ -43,39 +43,42 @@ foreach my $value (@values) { # It's all 8 bit unshift @utf8, 0; } - # foreach my $utf8 (@utf8) { - 1 while unlink $tmpfile; - open FH, ">$tmpfile" or die "Can't open $tmpfile: $!"; - print FH $value; - close FH; - foreach my $offset (0, 3, 7, 22, -1, -3, -5, -7) { - foreach my $length (0, 2, 5, 10) { - # Will read the lesser of the length of the file and the read - # length - my $will_read = $value; - if ($length < length $will_read) { - substr ($will_read, $length) = ''; + foreach my $utf8 (@utf8) { + 1 while unlink $tmpfile; + open FH, ">$tmpfile" or die "Can't open $tmpfile: $!"; + binmode FH, "utf8" if $utf8; + print FH $value; + close FH; + foreach my $offset (0, 3, 7, 22, -1, -3, -5, -7) { + foreach my $length (0, 2, 5, 10) { + # Will read the lesser of the length of the file and the + # read length + my $will_read = $value; + if ($length < length $will_read) { + substr ($will_read, $length) = ''; + } + # Going to trash this so need a copy + my $buffer = $initial_buffer; + + my $expect = $buffer; + if ($offset > 0) { + # Right pad with NUL bytes + $expect .= "\0" x $offset; + substr ($expect, $offset) = ''; + } + substr ($expect, $offset) = $will_read; + + open FH, $tmpfile or die "Can't open $tmpfile: $!"; + binmode FH, "utf8" if $utf8; + my $what = sprintf "%d into %d l $length o $offset", + ord $value, ord $buffer; + $what .= ' u' if $utf8; + $got = read (FH, $buffer, $length, $offset); + is ($got, length $will_read, "got $what"); + is ($buffer, $expect, "buffer $what"); } - # Going to trash this so need a copy - my $buffer = $initial_buffer; - - my $expect = $buffer; - if ($offset > 0) { - # Right pad with NUL bytes - $expect .= "\0" x $offset; - substr ($expect, $offset) = ''; - } - substr ($expect, $offset) = $will_read; - - open FH, $tmpfile or die "Can't open $tmpfile: $!"; - printf "# %d into %d l $length o $offset\n", - ord $value, ord $buffer; - $got = read (FH, $buffer, $length, $offset); - is ($got, length $will_read); - is ($buffer, $expect); } } - # } } }