From: Nicholas Clark Date: Tue, 11 Jan 2005 19:10:20 +0000 (+0000) Subject: Fix bug 32294 - index()/rindex() ignore UTF8 flag X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e609e5866582a76dcfe889f9b46e4909b2f0b543;p=p5sagit%2Fp5-mst-13.2.git Fix bug 32294 - index()/rindex() ignore UTF8 flag (for cases of mixed UTF8/bytes) Test code based on bug report by John Gardiner Myers p4raw-id: //depot/perl@23782 --- diff --git a/pp.c b/pp.c index 69d8e18..f960c37 100644 --- a/pp.c +++ b/pp.c @@ -3190,12 +3190,15 @@ PP(pp_index) dSP; dTARGET; SV *big; SV *little; + SV *temp = Nullsv; I32 offset; I32 retval; char *tmps; char *tmps2; STRLEN biglen; I32 arybase = PL_curcop->cop_arybase; + int big_utf8; + int little_utf8; if (MAXARG < 3) offset = 0; @@ -3203,9 +3206,31 @@ PP(pp_index) offset = POPi - arybase; little = POPs; big = POPs; - tmps = SvPV(big, biglen); - if (offset > 0 && DO_UTF8(big)) + big_utf8 = DO_UTF8(big); + little_utf8 = DO_UTF8(little); + if (big_utf8 ^ little_utf8) { + /* One needs to be upgraded. */ + SV *bytes = little_utf8 ? big : little; + STRLEN len; + char *p = SvPV(bytes, len); + + temp = newSVpvn(p, len); + + if (PL_encoding) { + sv_recode_to_utf8(temp, PL_encoding); + } else { + sv_utf8_upgrade(temp); + } + if (little_utf8) { + big = temp; + big_utf8 = TRUE; + } else { + little = temp; + } + } + if (big_utf8 && offset > 0) sv_pos_u2b(big, &offset, 0); + tmps = SvPV(big, biglen); if (offset < 0) offset = 0; else if (offset > (I32)biglen) @@ -3215,8 +3240,10 @@ PP(pp_index) retval = -1; else retval = tmps2 - tmps; - if (retval > 0 && DO_UTF8(big)) + if (retval > 0 && big_utf8) sv_pos_b2u(big, &retval); + if (temp) + SvREFCNT_dec(temp); PUSHi(retval + arybase); RETURN; } @@ -3226,6 +3253,7 @@ PP(pp_rindex) dSP; dTARGET; SV *big; SV *little; + SV *temp = Nullsv; STRLEN blen; STRLEN llen; I32 offset; @@ -3233,17 +3261,42 @@ PP(pp_rindex) char *tmps; char *tmps2; I32 arybase = PL_curcop->cop_arybase; + int big_utf8; + int little_utf8; if (MAXARG >= 3) offset = POPi; little = POPs; big = POPs; + big_utf8 = DO_UTF8(big); + little_utf8 = DO_UTF8(little); + if (big_utf8 ^ little_utf8) { + /* One needs to be upgraded. */ + SV *bytes = little_utf8 ? big : little; + STRLEN len; + char *p = SvPV(bytes, len); + + temp = newSVpvn(p, len); + + if (PL_encoding) { + sv_recode_to_utf8(temp, PL_encoding); + } else { + sv_utf8_upgrade(temp); + } + if (little_utf8) { + big = temp; + big_utf8 = TRUE; + } else { + little = temp; + } + } tmps2 = SvPV(little, llen); tmps = SvPV(big, blen); + if (MAXARG < 3) offset = blen; else { - if (offset > 0 && DO_UTF8(big)) + if (offset > 0 && big_utf8) sv_pos_u2b(big, &offset, 0); offset = offset - arybase + llen; } @@ -3256,8 +3309,10 @@ PP(pp_rindex) retval = -1; else retval = tmps2 - tmps; - if (retval > 0 && DO_UTF8(big)) + if (retval > 0 && big_utf8) sv_pos_b2u(big, &retval); + if (temp) + SvREFCNT_dec(temp); PUSHi(retval + arybase); RETURN; } @@ -4749,3 +4804,13 @@ PP(pp_threadsv) { DIE(aTHX_ "tried to access per-thread data in non-threaded perl"); } + +/* + * Local variables: + * c-indentation-style: bsd + * c-basic-offset: 4 + * indent-tabs-mode: t + * End: + * + * vim: expandtab shiftwidth=4: +*/ diff --git a/t/op/index.t b/t/op/index.t index 9e21e58..d223265 100755 --- a/t/op/index.t +++ b/t/op/index.t @@ -5,15 +5,16 @@ BEGIN { @INC = '../lib'; } +use strict; require './test.pl'; -plan( tests => 28 ); +plan( tests => 46 ); -$foo = 'Now is the time for all good men to come to the aid of their country.'; +my $foo = 'Now is the time for all good men to come to the aid of their country.'; -$first = substr($foo,0,index($foo,'the')); +my $first = substr($foo,0,index($foo,'the')); is($first, "Now is "); -$last = substr($foo,rindex($foo,'the'),100); +my $last = substr($foo,rindex($foo,'the'),100); is($last, "their country."); $last = substr($foo,index($foo,'Now'),2); @@ -69,3 +70,40 @@ is(rindex($a, "foo", ), 0); is($a, $b, q{[perl #22375] 'split'/'index' problem for utf8}); } } + +{ + my $search = "foo \xc9 bar"; + my $text = "a\xa3\xa3a $search $search quux"; + + my $text_utf8 = $text; + utf8::upgrade($text_utf8); + my $search_utf8 = $search; + utf8::upgrade($search_utf8); + + is (index($text, $search), 5); + is (rindex($text, $search), 18); + is (index($text, $search_utf8), 5); + is (rindex($text, $search_utf8), 18); + is (index($text_utf8, $search), 5); + is (rindex($text_utf8, $search), 18); + is (index($text_utf8, $search_utf8), 5); + is (rindex($text_utf8, $search_utf8), 18); + + my $text_octets = $text_utf8; + utf8::encode ($text_octets); + my $search_octets = $search_utf8; + utf8::encode ($search_octets); + + is (index($text_octets, $search_octets), 7, "index octets, octets") + or _diag ($text_octets, $search_octets); + is (rindex($text_octets, $search_octets), 21, "rindex octets, octets"); + is (index($text_octets, $search_utf8), -1); + is (rindex($text_octets, $search_utf8), -1); + is (index($text_utf8, $search_octets), -1); + is (rindex($text_utf8, $search_octets), -1); + + is (index($text_octets, $search), -1); + is (rindex($text_octets, $search), -1); + is (index($text, $search_octets), -1); + is (rindex($text, $search_octets), -1); +}