From: Nicholas Clark Date: Wed, 26 Mar 2008 21:05:20 +0000 (+0000) Subject: The offset for pos is stored as bytes, and converted to (Unicode) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ce47496284713207331ee4a6b164a0088c7e4776;p=p5sagit%2Fp5-mst-13.2.git The offset for pos is stored as bytes, and converted to (Unicode) character position when read, if needed. The code for setting pos inside subst was incorrectly converting to character position before storing the value. This code appears to have been buggy since it was added in 2000 in change 7562. p4raw-id: //depot/perl@33580 --- diff --git a/pp_ctl.c b/pp_ctl.c index 0b4da4d..a3be377 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -298,7 +298,6 @@ PP(pp_substcont) { /* Update the pos() information. */ SV * const sv = cx->sb_targ; MAGIC *mg; - I32 i; SvUPGRADE(sv, SVt_PVMG); if (!(mg = mg_find(sv, PERL_MAGIC_regex_global))) { #ifdef PERL_OLD_COPY_ON_WRITE @@ -308,10 +307,7 @@ PP(pp_substcont) mg = sv_magicext(sv, NULL, PERL_MAGIC_regex_global, &PL_vtbl_mglob, NULL, 0); } - i = m - orig; - if (DO_UTF8(sv)) - sv_pos_b2u(sv, &i); - mg->mg_len = i; + mg->mg_len = m - orig; } if (old != rx) (void)ReREFCNT_inc(rx); diff --git a/t/op/subst.t b/t/op/subst.t index 6cf84b7..06c04e8 100755 --- a/t/op/subst.t +++ b/t/op/subst.t @@ -7,7 +7,7 @@ BEGIN { } require './test.pl'; -plan( tests => 136 ); +plan( tests => 139 ); $x = 'foo'; $_ = "x"; @@ -583,3 +583,11 @@ is($name, "cis", q[#22351 bug with 'e' substitution modifier]); is($want,$_,"RT#17542"); } +{ + my @tests = ('ABC', "\xA3\xA4\xA5", "\x{410}\x{411}\x{412}"); + foreach (@tests) { + my $id = ord $_; + s/./pos/ge; + is($_, "012", "RT#52104: $id"); + } +}