From: Gurusamy Sarathy Date: Sun, 7 May 2000 05:39:55 +0000 (+0000) Subject: substr() does not preserve utf8-ness (from Stefan Eissing X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=7f66633bd788f56bb4e77473ec86f45e8cc0614b;p=p5sagit%2Fp5-mst-13.2.git substr() does not preserve utf8-ness (from Stefan Eissing ); added tests p4raw-id: //depot/perl@6084 --- diff --git a/pp.c b/pp.c index a86be7a..d2a8c64 100644 --- a/pp.c +++ b/pp.c @@ -2008,12 +2008,12 @@ PP(pp_substr) RETPUSHUNDEF; } else { - if (utfcurlen) { + if (utfcurlen) sv_pos_u2b(sv, &pos, &rem); - SvUTF8_on(TARG); - } tmps += pos; sv_setpvn(TARG, tmps, rem); + if (utfcurlen) + SvUTF8_on(TARG); if (repl) sv_insert(sv, pos, rem, repl, repl_len); else if (lvalue) { /* it's an lvalue! */ @@ -2026,7 +2026,7 @@ PP(pp_substr) "Attempt to use reference as lvalue in substr"); } if (SvOK(sv)) /* is it defined ? */ - (void)SvPOK_only(sv); + (void)SvPOK_only_UTF8(sv); else sv_setpvn(sv,"",0); /* avoid lexical reincarnation */ } diff --git a/t/op/substr.t b/t/op/substr.t index 5764e67..6180acc 100755 --- a/t/op/substr.t +++ b/t/op/substr.t @@ -1,5 +1,5 @@ -print "1..125\n"; +print "1..130\n"; #P = start of string Q = start of substr R = end of substr S = end of string @@ -268,3 +268,15 @@ ok 123, $@ && $@ =~ /Can't modify substr/ && $a eq "foo"; $a = "abcdefgh"; ok 124, sub { shift }->(substr($a, 0, 4, "xxxx")) eq 'abcd'; ok 125, $a eq 'xxxxefgh'; + +# utf8 sanity +{ + my $x = substr("a\x{263a}b",0); + ok 126, length($x) eq 3; + $x = substr($x,1,1); + ok 127, $x eq "\x{263a}"; + ok 128, length($x) eq 1; + substr($x,0,1) = "abcd"; + ok 129, $x eq "abcd"; + ok 130, length($x) eq 4; +}