Properly set/reset the UTF-8 length cache in regcomp.c.
Nicholas Clark [Fri, 17 Mar 2006 19:37:41 +0000 (19:37 +0000)]
p4raw-id: //depot/perl@27536

regcomp.c
sv.c

index b3c31b7..cf8486c 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -478,7 +478,7 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
        SV * const sv = data->last_found;
        MAGIC * const mg =
            SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
-       if (mg && mg->mg_len > 0)
+       if (mg)
            mg->mg_len = 0;
     }
     data->last_end = -1;
@@ -1978,6 +1978,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                        ? I32_MAX : data->pos_min + data->pos_delta;
                }
                sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
+               if (UTF)
+                   SvUTF8_on(data->last_found);
                {
                    SV * const sv = data->last_found;
                    MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
@@ -1986,8 +1988,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                        mg->mg_len += utf8_length((U8*)STRING(scan),
                                                  (U8*)STRING(scan)+STR_LEN(scan));
                }
-               if (UTF)
-                   SvUTF8_on(data->last_found);
                data->last_end = data->pos_min + l;
                data->pos_min += l; /* As in the first entry. */
                data->flags &= ~SF_BEFORE_EOL;
@@ -2383,7 +2383,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                            the group.  */
                        scan_commit(pRExC_state,data);
                        if (mincount && last_str) {
-                           sv_setsv(data->last_found, last_str);
+                           SV *sv = data->last_found;
+                           MAGIC *mg = SvUTF8(sv) && SvMAGICAL(sv) ?
+                               mg_find(sv, PERL_MAGIC_utf8) : NULL;
+
+                           if (mg)
+                               mg->mg_len = -1;
+                           sv_setsv(sv, last_str);
                            data->last_end = data->pos_min;
                            data->last_start_min =
                                data->pos_min - CHR_SVLEN(last_str);
diff --git a/sv.c b/sv.c
index 1ec559c..5c4722d 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -5289,7 +5289,7 @@ Perl_sv_len_utf8(pTHX_ register SV *sv)
        const U8 *s = (U8*)SvPV_const(sv, len);
        MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : 0;
 
-       if (mg && mg->mg_len != -1 && (mg->mg_len > 0 || len == 0)) {
+       if (mg && mg->mg_len != -1) {
            ulen = mg->mg_len;
 #ifdef PERL_UTF8_CACHE_ASSERT
            assert(ulen == Perl_utf8_length(aTHX_ s, s + len));
@@ -5326,8 +5326,10 @@ S_utf8_mg_pos_init(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i,
     bool found = FALSE;
 
     if (SvMAGICAL(sv) && !SvREADONLY(sv)) {
-       if (!*mgp)
+       if (!*mgp) {
            *mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, (MGVTBL*)&PL_vtbl_utf8, 0, 0);
+           (*mgp)->mg_len = -1;
+       }
        assert(*mgp);
 
        if ((*mgp)->mg_ptr)
@@ -5445,7 +5447,7 @@ S_utf8_mg_pos(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, I32 *offsetp, I
        }
 #ifdef PERL_UTF8_CACHE_ASSERT
        if (found) {
-            U8 *s = start;
+            const U8 *s = start;
             I32 n = uoff;
 
             while (n-- && s < send)
@@ -5649,6 +5651,7 @@ Perl_sv_pos_b2u(pTHX_ register SV* sv, I32* offsetp)
            if (!mg) {
                sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0);
                mg = mg_find(sv, PERL_MAGIC_utf8);
+               mg->mg_len = -1;
            }
            assert(mg);