From: SADAHIRO Tomoyuki Date: Sat, 25 Feb 2006 18:16:45 +0000 (+0900) Subject: Re: [perl #38619] Bug in lc and uc (interaction between UTF-8, substr, and lc/uc) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=6818a3573a254af22852a6102b2d70b346bae901;p=p5sagit%2Fp5-mst-13.2.git Re: [perl #38619] Bug in lc and uc (interaction between UTF-8, substr, and lc/uc) Message-Id: <20060225180934.FCC3.BQW10602@nifty.com> p4raw-id: //depot/perl@27329 --- diff --git a/pp.c b/pp.c index 1724ab0..b1fa7bc 100644 --- a/pp.c +++ b/pp.c @@ -3350,7 +3350,8 @@ PP(pp_ucfirst) if (slen > ulen) sv_catpvn(TARG, (char*)(s + ulen), slen - ulen); SvUTF8_on(TARG); - SETs(TARG); + sv = TARG; + SETs(sv); } else { s = (U8*)SvPV_force_nomg(sv, slen); @@ -3402,7 +3403,8 @@ PP(pp_uc) if (!len) { SvUTF8_off(TARG); /* decontaminate */ sv_setpvn(TARG, "", 0); - SETs(TARG); + sv = TARG; + SETs(sv); } else { STRLEN min = len + 1; @@ -3435,7 +3437,8 @@ PP(pp_uc) *d = '\0'; SvUTF8_on(TARG); SvCUR_set(TARG, d - (U8*)SvPVX_const(TARG)); - SETs(TARG); + sv = TARG; + SETs(sv); } } else { @@ -3487,7 +3490,8 @@ PP(pp_lc) if (!len) { SvUTF8_off(TARG); /* decontaminate */ sv_setpvn(TARG, "", 0); - SETs(TARG); + sv = TARG; + SETs(sv); } else { STRLEN min = len + 1; @@ -3540,7 +3544,8 @@ PP(pp_lc) *d = '\0'; SvUTF8_on(TARG); SvCUR_set(TARG, d - (U8*)SvPVX_const(TARG)); - SETs(TARG); + sv = TARG; + SETs(sv); } } else { diff --git a/t/op/lc.t b/t/op/lc.t index 460fed2..95cf019 100644 --- a/t/op/lc.t +++ b/t/op/lc.t @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan tests => 59; +plan tests => 77; $a = "HELLO.* world"; $b = "hello.* WORLD"; @@ -163,3 +163,39 @@ for my $a (0,1) { is($a, v10, "[perl #18857]"); } } + + +# [perl #38619] Bug in lc and uc (interaction between UTF-8, substr, and lc/uc) + +for ("a\x{100}", "xyz\x{100}") { + is(substr(uc($_), 0), uc($_), "[perl #38619] uc"); +} +for ("A\x{100}", "XYZ\x{100}") { + is(substr(lc($_), 0), lc($_), "[perl #38619] lc"); +} +for ("a\x{100}", "ßyz\x{100}") { # ß to Ss (different length) + is(substr(ucfirst($_), 0), ucfirst($_), "[perl #38619] ucfirst"); +} + +# Related to [perl #38619] +# the original report concerns PERL_MAGIC_utf8. +# these cases concern PERL_MAGIC_regex_global. + +for (map { $_ } "a\x{100}", "abc\x{100}", "\x{100}") { + chop; # get ("a", "abc", "") in utf8 + my $return = uc($_) =~ /\G(.?)/g; + my $result = $return ? $1 : "not"; + my $expect = (uc($_) =~ /(.?)/g)[0]; + is($return, 1, "[perl #38619]"); + is($result, $expect, "[perl #38619]"); +} + +for (map { $_ } "A\x{100}", "ABC\x{100}", "\x{100}") { + chop; # get ("A", "ABC", "") in utf8 + my $return = lc($_) =~ /\G(.?)/g; + my $result = $return ? $1 : "not"; + my $expect = (lc($_) =~ /(.?)/g)[0]; + is($return, 1, "[perl #38619]"); + is($result, $expect, "[perl #38619]"); +} +