From: Nicholas Clark Date: Mon, 6 Feb 2006 22:28:41 +0000 (+0000) Subject: Optimise index so that if the big string is ISO-8859-1 but the little X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=2f040f7f3a7618c48a8d153deb2b7e4a59efefb0;p=p5sagit%2Fp5-mst-13.2.git Optimise index so that if the big string is ISO-8859-1 but the little string is UTF-8, it tries to downgrade the little string, rather than upgrade the big string. For half-meg big strings this is a fourfold speed gain. p4raw-id: //depot/perl@27113 --- diff --git a/pp.c b/pp.c index 96d5ef6..e9f159b 100644 --- a/pp.c +++ b/pp.c @@ -3117,8 +3117,8 @@ PP(pp_index) const char *tmps2; STRLEN biglen; const I32 arybase = PL_curcop->cop_arybase; - int big_utf8; - int little_utf8; + bool big_utf8; + bool little_utf8; if (MAXARG < 3) offset = 0; @@ -3130,22 +3130,43 @@ PP(pp_index) little_utf8 = DO_UTF8(little); if (big_utf8 ^ little_utf8) { /* One needs to be upgraded. */ - SV * const bytes = little_utf8 ? big : little; - STRLEN len; - const char * const p = SvPV_const(bytes, len); - - temp = newSVpvn(p, len); + if (little_utf8 && !PL_encoding) { + /* Well, maybe instead we might be able to downgrade the small + string? */ + STRLEN little_len; + const U8 * const little_pv = (U8*) SvPV_const(little, little_len); + char * const pv = (char*)bytes_from_utf8(little_pv, &little_len, + &little_utf8); + if (little_utf8) { + /* If the large string is ISO-8859-1, and it's not possible to + convert the small string to ISO-8859-1, then there is no + way that it could be found anywhere by index. */ + retval = -1; + goto fail; + } - if (PL_encoding) { - sv_recode_to_utf8(temp, PL_encoding); - } else { - sv_utf8_upgrade(temp); - } - if (little_utf8) { - big = temp; - big_utf8 = TRUE; + /* At this point, pv is a malloc()ed string. So donate it to temp + to ensure it will get free()d */ + little = temp = newSV(0); + sv_usepvn(temp, pv, little_len); } else { - little = temp; + SV * const bytes = little_utf8 ? big : little; + STRLEN len; + const char * const p = SvPV_const(bytes, len); + + temp = newSVpvn(p, len); + + if (PL_encoding) { + sv_recode_to_utf8(temp, PL_encoding); + } else { + sv_utf8_upgrade(temp); + } + if (little_utf8) { + big = temp; + big_utf8 = TRUE; + } else { + little = temp; + } } } if (big_utf8 && offset > 0) @@ -3158,12 +3179,14 @@ PP(pp_index) if (!(tmps2 = fbm_instr((unsigned char*)tmps + offset, (unsigned char*)tmps + biglen, little, 0))) retval = -1; - else + else { retval = tmps2 - tmps; - if (retval > 0 && big_utf8) - sv_pos_b2u(big, &retval); + if (big_utf8) + sv_pos_b2u(big, &retval); + } if (temp) SvREFCNT_dec(temp); + fail: PUSHi(retval + arybase); RETURN; }