From: Jarkko Hietaniemi Date: Thu, 11 Jan 2001 17:11:01 +0000 (+0000) Subject: Fix UTF-8 lval substr(). X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=075a4a2b17419f90b0888605cbb91ee4b236f645;p=p5sagit%2Fp5-mst-13.2.git Fix UTF-8 lval substr(). p4raw-id: //depot/perl@8405 --- diff --git a/mg.c b/mg.c index 3a61655..ca06b89 100644 --- a/mg.c +++ b/mg.c @@ -1417,15 +1417,25 @@ Perl_magic_getsubstr(pTHX_ SV *sv, MAGIC *mg) int Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg) { - STRLEN len; - char *tmps = SvPV(sv,len); + STRLEN littlelen; + char *tmps = SvPV(sv, littlelen); + if (DO_UTF8(sv)) { + I32 bigoff = LvTARGOFF(sv); + I32 biglen = LvTARGLEN(sv); + U8 *s, *a, *b; + sv_utf8_upgrade(LvTARG(sv)); - sv_insert(LvTARG(sv),LvTARGOFF(sv),LvTARGLEN(sv), tmps, len); + /* sv_utf8_upgrade() might have moved and/or resized + * the string to be replaced, we must rediscover it. --jhi */ + s = (U8*)SvPVX(LvTARG(sv)); + a = utf8_hop(s, bigoff); + b = utf8_hop(a, biglen); + sv_insert(LvTARG(sv), a - s, b - a, tmps, littlelen); SvUTF8_on(LvTARG(sv)); } else - sv_insert(LvTARG(sv),LvTARGOFF(sv),LvTARGLEN(sv), tmps, len); + sv_insert(LvTARG(sv), LvTARGOFF(sv), LvTARGLEN(sv), tmps, littlelen); return 0; } diff --git a/t/op/substr.t b/t/op/substr.t index 4d3bbce..d3937fb 100755 --- a/t/op/substr.t +++ b/t/op/substr.t @@ -1,6 +1,6 @@ #!./perl -print "1..136\n"; +print "1..149\n"; #P = start of string Q = start of substr R = end of substr S = end of string @@ -304,3 +304,128 @@ my %data; tie $data{'a'}, 'Tie::StdScalar'; # makes $data{'a'} magical $data{a} = "firstlast"; ok 136, substr($data{'a'}, 0, 5, "") eq "first" && $data{'a'} eq "last"; + +# more utf8 + +# The following two originally from Ignasi Roca. + +$x = "\xF1\xF2\xF3"; +substr($x, 0, 1) = "\x{100}"; # Ignasi had \x{FF} +ok 137, length($x) == 3 && + $x eq "\x{100}\xF2\xF3" && + substr($x, 0, 1) eq "\x{100}" && + substr($x, 1, 1) eq "\x{F2}" && + substr($x, 2, 1) eq "\x{F3}"; + +$x = "\xF1\xF2\xF3"; +substr($x, 0, 1) = "\x{100}\x{FF}"; # Ignasi had \x{FF} +ok 138, length($x) == 4 && + $x eq "\x{100}\x{FF}\xF2\xF3" && + substr($x, 0, 1) eq "\x{100}" && + substr($x, 1, 1) eq "\x{FF}" && + substr($x, 2, 1) eq "\x{F2}" && + substr($x, 3, 1) eq "\x{F3}"; + +# more utf8 lval exercise + +$x = "\xF1\xF2\xF3"; +substr($x, 0, 2) = "\x{100}\xFF"; +ok 139, length($x) == 3 && + $x eq "\x{100}\xFF\xF3" && + substr($x, 0, 1) eq "\x{100}" && + substr($x, 1, 1) eq "\x{FF}" && + substr($x, 2, 1) eq "\x{F3}"; + +$x = "\xF1\xF2\xF3"; +substr($x, 1, 1) = "\x{100}\xFF"; +ok 140, length($x) == 4 && + $x eq "\xF1\x{100}\xFF\xF3" && + substr($x, 0, 1) eq "\x{F1}" && + substr($x, 1, 1) eq "\x{100}" && + substr($x, 2, 1) eq "\x{FF}" && + substr($x, 3, 1) eq "\x{F3}"; + +$x = "\xF1\xF2\xF3"; +substr($x, 2, 1) = "\x{100}\xFF"; +ok 141, length($x) == 4 && + $x eq "\xF1\xF2\x{100}\xFF" && + substr($x, 0, 1) eq "\x{F1}" && + substr($x, 1, 1) eq "\x{F2}" && + substr($x, 2, 1) eq "\x{100}" && + substr($x, 3, 1) eq "\x{FF}"; + +$x = "\xF1\xF2\xF3"; +substr($x, 3, 1) = "\x{100}\xFF"; +ok 142, length($x) == 5 && + $x eq "\xF1\xF2\xF3\x{100}\xFF" && + substr($x, 0, 1) eq "\x{F1}" && + substr($x, 1, 1) eq "\x{F2}" && + substr($x, 2, 1) eq "\x{F3}" && + substr($x, 3, 1) eq "\x{100}" && + substr($x, 4, 1) eq "\x{FF}"; + +$x = "\xF1\xF2\xF3"; +substr($x, -1, 1) = "\x{100}\xFF"; +ok 143, length($x) == 4 && + $x eq "\xF1\xF2\x{100}\xFF" && + substr($x, 0, 1) eq "\x{F1}" && + substr($x, 1, 1) eq "\x{F2}" && + substr($x, 2, 1) eq "\x{100}" && + substr($x, 3, 1) eq "\x{FF}"; + +$x = "\xF1\xF2\xF3"; +substr($x, -1, 0) = "\x{100}\xFF"; +ok 144, length($x) == 5 && + $x eq "\xF1\xF2\x{100}\xFF\xF3" && + substr($x, 0, 1) eq "\x{F1}" && + substr($x, 1, 1) eq "\x{F2}" && + substr($x, 2, 1) eq "\x{100}" && + substr($x, 3, 1) eq "\x{FF}" && + substr($x, 4, 1) eq "\x{F3}"; + +$x = "\xF1\xF2\xF3"; +substr($x, 0, -1) = "\x{100}\xFF"; +ok 145, length($x) == 3 && + $x eq "\x{100}\xFF\xF3" && + substr($x, 0, 1) eq "\x{100}" && + substr($x, 1, 1) eq "\x{FF}" && + substr($x, 2, 1) eq "\x{F3}"; + +$x = "\xF1\xF2\xF3"; +substr($x, 0, -2) = "\x{100}\xFF"; +ok 146, length($x) == 4 && + $x eq "\x{100}\xFF\xF2\xF3" && + substr($x, 0, 1) eq "\x{100}" && + substr($x, 1, 1) eq "\x{FF}" && + substr($x, 2, 1) eq "\x{F2}" && + substr($x, 3, 1) eq "\x{F3}"; + +$x = "\xF1\xF2\xF3"; +substr($x, 0, -3) = "\x{100}\xFF"; +ok 147, length($x) == 5 && + $x eq "\x{100}\xFF\xF1\xF2\xF3" && + substr($x, 0, 1) eq "\x{100}" && + substr($x, 1, 1) eq "\x{FF}" && + substr($x, 2, 1) eq "\x{F1}" && + substr($x, 3, 1) eq "\x{F2}" && + substr($x, 4, 1) eq "\x{F3}"; + +$x = "\xF1\xF2\xF3"; +substr($x, 1, -1) = "\x{100}\xFF"; +ok 148, length($x) == 4 && + $x eq "\xF1\x{100}\xFF\xF3" && + substr($x, 0, 1) eq "\x{F1}" && + substr($x, 1, 1) eq "\x{100}" && + substr($x, 2, 1) eq "\x{FF}" && + substr($x, 3, 1) eq "\x{F3}"; + +$x = "\xF1\xF2\xF3"; +substr($x, -1, -1) = "\x{100}\xFF"; +ok 149, length($x) == 5 && + $x eq "\xF1\xF2\x{100}\xFF\xF3" && + substr($x, 0, 1) eq "\x{F1}" && + substr($x, 1, 1) eq "\x{F2}" && + substr($x, 2, 1) eq "\x{100}" && + substr($x, 3, 1) eq "\x{FF}" && + substr($x, 4, 1) eq "\x{F3}"; +