From: Nicholas Clark Date: Fri, 17 Mar 2006 19:37:41 +0000 (+0000) Subject: Properly set/reset the UTF-8 length cache in regcomp.c. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=45f4726892ee34fcb87cb59a34b8208652d4d517;p=p5sagit%2Fp5-mst-13.2.git Properly set/reset the UTF-8 length cache in regcomp.c. p4raw-id: //depot/perl@27536 --- diff --git a/regcomp.c b/regcomp.c index b3c31b7..cf8486c 100644 --- a/regcomp.c +++ b/regcomp.c @@ -478,7 +478,7 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data) SV * const sv = data->last_found; MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL; - if (mg && mg->mg_len > 0) + if (mg) mg->mg_len = 0; } data->last_end = -1; @@ -1978,6 +1978,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, ? I32_MAX : data->pos_min + data->pos_delta; } sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan)); + if (UTF) + SvUTF8_on(data->last_found); { SV * const sv = data->last_found; MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ? @@ -1986,8 +1988,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, mg->mg_len += utf8_length((U8*)STRING(scan), (U8*)STRING(scan)+STR_LEN(scan)); } - if (UTF) - SvUTF8_on(data->last_found); data->last_end = data->pos_min + l; data->pos_min += l; /* As in the first entry. */ data->flags &= ~SF_BEFORE_EOL; @@ -2383,7 +2383,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, the group. */ scan_commit(pRExC_state,data); if (mincount && last_str) { - sv_setsv(data->last_found, last_str); + SV *sv = data->last_found; + MAGIC *mg = SvUTF8(sv) && SvMAGICAL(sv) ? + mg_find(sv, PERL_MAGIC_utf8) : NULL; + + if (mg) + mg->mg_len = -1; + sv_setsv(sv, last_str); data->last_end = data->pos_min; data->last_start_min = data->pos_min - CHR_SVLEN(last_str); diff --git a/sv.c b/sv.c index 1ec559c..5c4722d 100644 --- a/sv.c +++ b/sv.c @@ -5289,7 +5289,7 @@ Perl_sv_len_utf8(pTHX_ register SV *sv) const U8 *s = (U8*)SvPV_const(sv, len); MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : 0; - if (mg && mg->mg_len != -1 && (mg->mg_len > 0 || len == 0)) { + if (mg && mg->mg_len != -1) { ulen = mg->mg_len; #ifdef PERL_UTF8_CACHE_ASSERT assert(ulen == Perl_utf8_length(aTHX_ s, s + len)); @@ -5326,8 +5326,10 @@ S_utf8_mg_pos_init(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, bool found = FALSE; if (SvMAGICAL(sv) && !SvREADONLY(sv)) { - if (!*mgp) + if (!*mgp) { *mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, (MGVTBL*)&PL_vtbl_utf8, 0, 0); + (*mgp)->mg_len = -1; + } assert(*mgp); if ((*mgp)->mg_ptr) @@ -5445,7 +5447,7 @@ S_utf8_mg_pos(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, I32 *offsetp, I } #ifdef PERL_UTF8_CACHE_ASSERT if (found) { - U8 *s = start; + const U8 *s = start; I32 n = uoff; while (n-- && s < send) @@ -5649,6 +5651,7 @@ Perl_sv_pos_b2u(pTHX_ register SV* sv, I32* offsetp) if (!mg) { sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0); mg = mg_find(sv, PERL_MAGIC_utf8); + mg->mg_len = -1; } assert(mg);