From: Nicholas Clark <nick@ccl4.org>
Date: Wed, 29 Dec 2004 19:00:12 +0000 (+0000)
Subject: read (and presuambly sysread) would expose the UTF8 internals when
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=1dd30107ebac37cf465e5225a00d367454a7cb84;p=p5sagit%2Fp5-mst-13.2.git

read (and presuambly sysread) would expose the UTF8 internals when
reading from a byte orientated file handle into a UTF8 scalar.

p4raw-id: //depot/perl@23703
---

diff --git a/pp_sys.c b/pp_sys.c
index 3071f1b..edb69c2 100644
--- a/pp_sys.c
+++ b/pp_sys.c
@@ -1555,6 +1555,8 @@ PP(pp_sysread)
     STRLEN blen;
     MAGIC *mg;
     int fp_utf8;
+    int buffer_utf8;
+    SV *read_target;
     Size_t got = 0;
     Size_t wanted;
     bool charstart = FALSE;
@@ -1605,6 +1607,7 @@ PP(pp_sysread)
     }
     else {
 	buffer = SvPV_force(bufsv, blen);
+	buffer_utf8 = !IN_BYTES && SvUTF8(bufsv);
     }
     if (length < 0)
 	DIE(aTHX_ "Negative length");
@@ -1672,11 +1675,30 @@ PP(pp_sysread)
     }
  more_bytes:
     bufsize = SvCUR(bufsv);
+    /* Allocating length + offset + 1 isn't perfect in the case of reading
+       bytes from a byte file handle into a UTF8 buffer, but it won't harm us
+       unduly.
+       (should be 2 * length + offset + 1, or possibly something longer if
+       PL_encoding is true) */
     buffer  = SvGROW(bufsv, (STRLEN)(length+offset+1));
     if (offset > bufsize) { /* Zero any newly allocated space */
     	Zero(buffer+bufsize, offset-bufsize, char);
     }
     buffer = buffer + offset;
+    if (!buffer_utf8) {
+	read_target = bufsv;
+    } else {
+	/* Best to read the bytes into a new SV, upgrade that to UTF8, then
+	   concatenate it to the current buffer.  */
+
+	/* Truncate the existing buffer to the start of where we will be
+	   reading to:  */
+	SvCUR_set(bufsv, offset);
+
+	read_target = sv_newmortal();
+	SvUPGRADE(read_target, SVt_PV);
+	buffer = SvGROW(read_target, length + 1);
+    }
 
     if (PL_op->op_type == OP_SYSREAD) {
 #ifdef PERL_SOCK_SYSREAD_IS_RECV
@@ -1716,9 +1738,9 @@ PP(pp_sysread)
 		report_evil_fh(gv, io, OP_phoney_OUTPUT_ONLY);
 	goto say_undef;
     }
-    SvCUR_set(bufsv, count+(buffer - SvPVX(bufsv)));
-    *SvEND(bufsv) = '\0';
-    (void)SvPOK_only(bufsv);
+    SvCUR_set(read_target, count+(buffer - SvPVX(read_target)));
+    *SvEND(read_target) = '\0';
+    (void)SvPOK_only(read_target);
     if (fp_utf8 && !IN_BYTES) {
 	/* Look at utf8 we got back and count the characters */
 	char *bend = buffer + count;
@@ -1754,6 +1776,11 @@ PP(pp_sysread)
 	count = got;
 	SvUTF8_on(bufsv);
     }
+    else if (buffer_utf8) {
+	/* Let svcatsv upgrade the bytes we read in to utf8.
+	   The buffer is a mortal so will be freed soon.  */
+	sv_catsv_nomg(bufsv, read_target);
+    }
     SvSETMAGIC(bufsv);
     /* This should not be marked tainted if the fp is marked clean */
     if (!(IoFLAGS(io) & IOf_UNTAINT))
diff --git a/t/op/read.t b/t/op/read.t
index f343c0d..6aab395 100755
--- a/t/op/read.t
+++ b/t/op/read.t
@@ -9,7 +9,7 @@ BEGIN {
 }
 use strict;
 
-plan tests => 516;
+plan tests => 1732;
 
 open(FOO,'op/read.t') || open(FOO,'t/op/read.t') || open(FOO,':op:read.t') || die "Can't open op.read";
 seek(FOO,4,0) or die "Seek failed: $!";
@@ -31,7 +31,7 @@ my $tmpfile = 'Op_read.tmp';
 
 my (@values, @buffers) = ('', '');
 
-foreach (65, 161) { # , 253, 9786) {
+foreach (65, 161, 253) { #  9786) {
     push @values, join "", map {chr $_} $_ .. $_ + 4;
     push @buffers, join "", map {chr $_} $_ + 5 .. $_ + 20;
 }
@@ -43,39 +43,42 @@ foreach my $value (@values) {
 	    # It's all 8 bit
 	    unshift @utf8, 0;
 	}
-	# foreach my $utf8 (@utf8) {
-	1 while unlink $tmpfile;
-	open FH, ">$tmpfile" or die "Can't open $tmpfile: $!";
-	print FH $value;
-	close FH;
-	foreach my $offset (0, 3, 7, 22, -1, -3, -5, -7) {
-	    foreach my $length (0, 2, 5, 10) {
-		# Will read the lesser of the length of the file and the read
-		# length
-		my $will_read = $value;
-		if ($length < length $will_read) {
-		    substr ($will_read, $length) = '';
+	foreach my $utf8 (@utf8) {
+	    1 while unlink $tmpfile;
+	    open FH, ">$tmpfile" or die "Can't open $tmpfile: $!";
+	    binmode FH, "utf8" if $utf8;
+	    print FH $value;
+	    close FH;
+	    foreach my $offset (0, 3, 7, 22, -1, -3, -5, -7) {
+		foreach my $length (0, 2, 5, 10) {
+		    # Will read the lesser of the length of the file and the
+		    # read length
+		    my $will_read = $value;
+		    if ($length < length $will_read) {
+			substr ($will_read, $length) = '';
+		    }
+		    # Going to trash this so need a copy
+		    my $buffer = $initial_buffer;
+
+		    my $expect = $buffer;
+		    if ($offset > 0) {
+			# Right pad with NUL bytes
+			$expect .= "\0" x $offset;
+			substr ($expect, $offset) = '';
+		    }
+		    substr ($expect, $offset) = $will_read;
+
+		    open FH, $tmpfile or die "Can't open $tmpfile: $!";
+		    binmode FH, "utf8" if $utf8;
+		    my $what = sprintf "%d into %d l $length o $offset",
+			ord $value, ord $buffer;
+		    $what .= ' u' if $utf8;
+		    $got = read (FH, $buffer, $length, $offset);
+		    is ($got, length $will_read, "got $what");
+		    is ($buffer, $expect, "buffer $what");
 		}
-		# Going to trash this so need a copy
-		my $buffer = $initial_buffer;
-
-		my $expect = $buffer;
-		if ($offset > 0) {
-		    # Right pad with NUL bytes
-		    $expect .= "\0" x $offset;
-		    substr ($expect, $offset) = '';
-		}
-		substr ($expect, $offset) = $will_read;
-
-		open FH, $tmpfile or die "Can't open $tmpfile: $!";
-		printf "# %d into %d l $length o $offset\n",
-		    ord $value, ord $buffer;
-		$got = read (FH, $buffer, $length, $offset);
-		is ($got, length $will_read);
-		is ($buffer, $expect);
 	    }
 	}
-	# }
     }
 }