From: Hugo van der Sanden Date: Sun, 20 Aug 2000 07:30:46 +0000 (+0100) Subject: UTF8 concat fixes. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=15bb2692a112e58a65a3e260753db74fb4044704;p=p5sagit%2Fp5-mst-13.2.git UTF8 concat fixes. Subject: [PATCH @6713] Re: [ID 20000815.006] latest patched perl core dumps Message-Id: <200008200630.HAA18053@crypt.compulink.co.uk> p4raw-id: //depot/perl@6719 --- diff --git a/pp_hot.c b/pp_hot.c index 1b5f278..0a0c084 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -158,14 +158,11 @@ PP(pp_concat) /* Set TARG to PV(left), then add right */ U8 *l, *c, *olds = NULL; STRLEN targlen; + s = (U8*)SvPV(right,len); if (TARG == right) { - /* Need a safe copy elsewhere since we're just about to - write onto TARG */ - olds = (U8*)SvPV(right,len); - s = (U8*)savepv((char*)olds); + /* Take a copy since we're about to overwrite TARG */ + olds = s = (U8*)savepvn((char*)s, len); } - else - s = (U8*)SvPV(right,len); l = (U8*)SvPV(left, targlen); if (TARG != left) sv_setpvn(TARG, (char*)l, targlen); @@ -175,14 +172,14 @@ PP(pp_concat) targlen = SvCUR(TARG) + len; if (!right_utf) { /* plus one for each hi-byte char if we have to upgrade */ - for (c = s; *c; c++) { + for (c = s; c < s + len; c++) { if (*c & 0x80) targlen++; } } SvGROW(TARG, targlen+1); /* And now copy, maybe upgrading right to UTF8 on the fly */ - for (c = (U8*)SvEND(TARG); *s; s++) { + for (c = (U8*)SvEND(TARG); len--; s++) { if (*s & 0x80 && !right_utf) c = uv_to_utf8(c, *s); else diff --git a/t/op/append.t b/t/op/append.t index d115146..afaf6a1 100755 --- a/t/op/append.t +++ b/t/op/append.t @@ -2,7 +2,7 @@ # $RCSfile: append.t,v $$Revision: 4.1 $$Date: 92/08/07 18:27:36 $ -print "1..3\n"; +print "1..13\n"; $a = 'ab' . 'c'; # compile time $b = 'def'; @@ -19,3 +19,38 @@ $_ = $a; $_ .= $b; print "#3\t:$_: eq :abcdef:\n"; if ($_ eq 'abcdef') {print "ok 3\n";} else {print "not ok 3\n";} + +# test that when right argument of concat is UTF8, and is the same +# variable as the target, and the left argument is not UTF8, it no +# longer frees the wrong string. +{ + sub r2 { + my $string = ''; + $string .= pack("U0a*", 'mnopqrstuvwx'); + $string = "abcdefghijkl$string"; + } + + r2() and print "ok $_\n" for qw/ 4 5 /; +} + +# test that nul bytes get copied +{ + my($a, $ab) = ("a", "a\000b"); + my($u, $ub) = map pack("U0a*", $_), $a, $ab; + my $t1 = $a; $t1 .= $ab; + print $t1 =~ /b/ ? "ok 6\n" : "not ok 6\t# $t1\n"; + my $t2 = $a; $t2 .= $ub; + print $t2 =~ /b/ ? "ok 7\n" : "not ok 7\t# $t2\n"; + my $t3 = $u; $t3 .= $ab; + print $t3 =~ /b/ ? "ok 8\n" : "not ok 8\t# $t3\n"; + my $t4 = $u; $t4 .= $ub; + print $t4 =~ /b/ ? "ok 9\n" : "not ok 9\t# $t4\n"; + my $t5 = $a; $t5 = $ab . $t5; + print $t5 =~ /b/ ? "ok 10\n" : "not ok 10\t# $t5\n"; + my $t6 = $a; $t6 = $ub . $t6; + print $t6 =~ /b/ ? "ok 11\n" : "not ok 11\t# $t6\n"; + my $t7 = $u; $t7 = $ab . $t7; + print $t7 =~ /b/ ? "ok 12\n" : "not ok 12\t# $t7\n"; + my $t8 = $u; $t8 = $ub . $t8; + print $t8 =~ /b/ ? "ok 13\n" : "not ok 13\t# $t8\n"; +}