PATCH for [ID 20010305.003]
[p5sagit/p5-mst-13.2.git] / regcomp.c
index bbd91c6..4638d77 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
 /* *These* symbols are masked to allow static link. */
 #  define Perl_regnext my_regnext
 #  define Perl_save_re_context my_save_re_context
-#  define Perl_reginitcolors my_reginitcolors 
+#  define Perl_reginitcolors my_reginitcolors
 
 #  define PERL_NO_GET_CONTEXT
-#endif 
+#endif
 
 /*SUPPRESS 112*/
 /*
@@ -69,7 +69,7 @@
  *
  ****    Alterations to Henry's code are...
  ****
- ****    Copyright (c) 1991-2000, Larry Wall
+ ****    Copyright (c) 1991-2001, Larry Wall
  ****
  ****    You may distribute under the terms of either the GNU General Public
  ****    License or the Artistic License, as specified in the README file.
@@ -154,11 +154,6 @@ typedef struct RExC_state_t {
 #define        ISMULT1(c)      ((c) == '*' || (c) == '+' || (c) == '?')
 #define        ISMULT2(s)      ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
        ((*s) == '{' && regcurly(s)))
-#ifdef atarist
-#define        PERL_META       "^$.[()|?+*\\"
-#else
-#define        META    "^$.[()|?+*\\"
-#endif
 
 #ifdef SPSTART
 #undef SPSTART         /* dratted cpp namespace... */
@@ -199,7 +194,7 @@ typedef struct scan_data_t {
  * Forward declarations for pregcomp()'s friends.
  */
 
-static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                      0, 0, 0, 0, 0, 0};
 
 #define SF_BEFORE_EOL          (SF_BEFORE_SEOL|SF_BEFORE_MEOL)
@@ -252,7 +247,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  */
 #define MARKER1 "HERE"      /* marker as it appears in the description */
 #define MARKER2 " << HERE "  /* marker as it appears within the regex */
-   
+
 #define REPORT_LOCATION " before " MARKER1 " mark in regex m/%.*s" MARKER2 "%s/"
 
 /*
@@ -433,24 +428,24 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
 {
     STRLEN l = CHR_SVLEN(data->last_found);
     STRLEN old_l = CHR_SVLEN(*data->longest);
-    
+
     if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
        sv_setsv(*data->longest, data->last_found);
        if (*data->longest == data->longest_fixed) {
            data->offset_fixed = l ? data->last_start_min : data->pos_min;
            if (data->flags & SF_BEFORE_EOL)
-               data->flags 
+               data->flags
                    |= ((data->flags & SF_BEFORE_EOL) << SF_FIX_SHIFT_EOL);
            else
                data->flags &= ~SF_FIX_BEFORE_EOL;
        }
        else {
            data->offset_float_min = l ? data->last_start_min : data->pos_min;
-           data->offset_float_max = (l 
-                                     ? data->last_start_max 
+           data->offset_float_max = (l
+                                     ? data->last_start_max
                                      : data->pos_min + data->pos_delta);
            if (data->flags & SF_BEFORE_EOL)
-               data->flags 
+               data->flags
                    |= ((data->flags & SF_BEFORE_EOL) << SF_FL_SHIFT_EOL);
            else
                data->flags &= ~SF_FL_BEFORE_EOL;
@@ -574,7 +569,7 @@ S_cl_or(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, str
     } else {
        /* (B1 | CL1) | (B2 | CL2) = (B1 | B2) | (CL1 | CL2)) */
        if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
-            && (!(or_with->flags & ANYOF_FOLD) 
+            && (!(or_with->flags & ANYOF_FOLD)
                 || (cl->flags & ANYOF_FOLD)) ) {
            int i;
 
@@ -625,7 +620,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
     I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
     scan_data_t data_fake;
     struct regnode_charclass_class and_with; /* Valid if flags & SCF_DO_STCLASS_OR */
-    
+
     while (scan && OP(scan) != END && scan < last) {
        /* Peephole optimizer: */
 
@@ -635,12 +630,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
            U32 stringok = 1;
 #ifdef DEBUGGING
            regnode *stop = scan;
-#endif 
+#endif
 
            next = scan + NODE_SZ_STR(scan);
            /* Skip NOTHING, merge EXACT*. */
            while (n &&
-                  ( PL_regkind[(U8)OP(n)] == NOTHING || 
+                  ( PL_regkind[(U8)OP(n)] == NOTHING ||
                     (stringok && (OP(n) == OP(scan))))
                   && NEXT_OFF(n)
                   && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) {
@@ -652,25 +647,23 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
 #ifdef DEBUGGING
                    if (stringok)
                        stop = n;
-#endif 
+#endif
                    n = regnext(n);
                }
-               else {
+               else if (stringok) {
                    int oldl = STR_LEN(scan);
                    regnode *nnext = regnext(n);
-                   
-                   if (oldl + STR_LEN(n) > U8_MAX) 
+
+                   if (oldl + STR_LEN(n) > U8_MAX)
                        break;
                    NEXT_OFF(scan) += NEXT_OFF(n);
                    STR_LEN(scan) += STR_LEN(n);
                    next = n + NODE_SZ_STR(n);
                    /* Now we can overwrite *n : */
-                   Move(STRING(n), STRING(scan) + oldl,
-                        STR_LEN(n), char);
+                   Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
 #ifdef DEBUGGING
-                   if (stringok)
-                       stop = next - 1;
-#endif 
+                   stop = next - 1;
+#endif
                    n = nnext;
                }
            }
@@ -696,7 +689,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
            int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
            int noff;
            regnode *n = scan;
-           
+       
            /* Skip NOTHING and LONGJMP. */
            while ((n = regnext(n))
                   && ((PL_regkind[(U8)OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
@@ -705,17 +698,17 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                off += noff;
            if (reg_off_by_arg[OP(scan)])
                ARG(scan) = off;
-           else 
+           else
                NEXT_OFF(scan) = off;
        }
        /* The principal pseudo-switch.  Cannot be a switch, since we
           look into several different things.  */
-       if (OP(scan) == BRANCH || OP(scan) == BRANCHJ 
+       if (OP(scan) == BRANCH || OP(scan) == BRANCHJ
                   || OP(scan) == IFTHEN || OP(scan) == SUSPEND) {
            next = regnext(scan);
            code = OP(scan);
-           
-           if (OP(next) == code || code == IFTHEN || code == SUSPEND) { 
+       
+           if (OP(next) == code || code == IFTHEN || code == SUSPEND) {
                I32 max1 = 0, min1 = I32_MAX, num = 0;
                struct regnode_charclass_class accum;
                
@@ -729,7 +722,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
 
                    num++;
                    data_fake.flags = 0;
-                   if (data) {             
+                   if (data) {         
                        data_fake.whilem_c = data->whilem_c;
                        data_fake.last_closep = data->last_closep;
                    }
@@ -743,13 +736,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        cl_init(pRExC_state, &this_class);
                        data_fake.start_class = &this_class;
                        f = SCF_DO_STCLASS_AND;
-                   }               
+                   }           
                    if (flags & SCF_WHILEM_VISITED_POS)
                        f |= SCF_WHILEM_VISITED_POS;
                    /* we suppose the run is continuous, last=next...*/
                    minnext = study_chunk(pRExC_state, &scan, &deltanext,
                                          next, &data_fake, f);
-                   if (min1 > minnext) 
+                   if (min1 > minnext)
                        min1 = minnext;
                    if (max1 < minnext + deltanext)
                        max1 = minnext + deltanext;
@@ -764,7 +757,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        data->whilem_c = data_fake.whilem_c;
                    if (flags & SCF_DO_STCLASS)
                        cl_or(pRExC_state, &accum, &this_class);
-                   if (code == SUSPEND) 
+                   if (code == SUSPEND)
                        break;
                }
                if (code == IFTHEN && num < 2) /* Empty ELSE branch */
@@ -790,7 +783,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        flags &= ~SCF_DO_STCLASS;
                    }
                    else {
-                       /* Switch to OR mode: cache the old value of 
+                       /* Switch to OR mode: cache the old value of
                         * data->start_class */
                        StructCopy(data->start_class, &and_with,
                                   struct regnode_charclass_class);
@@ -814,7 +807,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
            if (UTF) {
                U8 *s = (U8*)STRING(scan);
                l = utf8_length(s, s + l);
-               uc = utf8_to_uv_simple(s, NULL);
+               uc = utf8_to_uvchr(s, NULL);
            }
            min += l;
            if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
@@ -823,7 +816,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                if (data->last_end == -1) { /* Update the start info. */
                    data->last_start_min = data->pos_min;
                    data->last_start_max = is_inf
-                       ? I32_MAX : data->pos_min + data->pos_delta; 
+                       ? I32_MAX : data->pos_min + data->pos_delta;
                }
                sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
                data->last_end = data->pos_min + l;
@@ -835,7 +828,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                int compat = 1;
 
                if (uc >= 0x100 ||
-                   !(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE)) 
+                   !(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
                    && !ANYOF_BITMAP_TEST(data->start_class, uc)
                    && (!(data->start_class->flags & ANYOF_FOLD)
                        || !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
@@ -845,11 +838,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                if (compat)
                    ANYOF_BITMAP_SET(data->start_class, uc);
                data->start_class->flags &= ~ANYOF_EOS;
+               if (uc < 0x100)
+                 data->start_class->flags &= ~ANYOF_UNICODE_ALL;
            }
            else if (flags & SCF_DO_STCLASS_OR) {
                /* false positive possible if the class is case-folded */
                if (uc < 0x100)
-                   ANYOF_BITMAP_SET(data->start_class, uc);    
+                   ANYOF_BITMAP_SET(data->start_class, uc);
+               else
+                   data->start_class->flags |= ANYOF_UNICODE_ALL;
                data->start_class->flags &= ~ANYOF_EOS;
                cl_and(data->start_class, &and_with);
            }
@@ -860,12 +857,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
            UV uc = *((U8*)STRING(scan));
 
            /* Search for fixed substrings supports EXACT only. */
-           if (flags & SCF_DO_SUBSTR) 
+           if (flags & SCF_DO_SUBSTR)
                scan_commit(pRExC_state, data);
            if (UTF) {
                U8 *s = (U8 *)STRING(scan);
                l = utf8_length(s, s + l);
-               uc = utf8_to_uv_simple(s, NULL);
+               uc = utf8_to_uvchr(s, NULL);
            }
            min += l;
            if (data && (flags & SCF_DO_SUBSTR))
@@ -875,7 +872,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                int compat = 1;
 
                if (uc >= 0x100 ||
-                   !(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE)) 
+                   !(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
                    && !ANYOF_BITMAP_TEST(data->start_class, uc)
                    && !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc]))
                    compat = 0;
@@ -916,8 +913,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
                    next = NEXTOPER(scan);
                    if (OP(next) == EXACT || (flags & SCF_DO_STCLASS)) {
-                       mincount = 1; 
-                       maxcount = REG_INFTY; 
+                       mincount = 1;
+                       maxcount = REG_INFTY;
                        next = regnext(scan);
                        scan = NEXTOPER(scan);
                        goto do_curly;
@@ -930,12 +927,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
            case STAR:
                if (flags & SCF_DO_STCLASS) {
                    mincount = 0;
-                   maxcount = REG_INFTY; 
+                   maxcount = REG_INFTY;
                    next = regnext(scan);
                    scan = NEXTOPER(scan);
                    goto do_curly;
                }
-               is_inf = is_inf_internal = 1; 
+               is_inf = is_inf_internal = 1;
                scan = regnext(scan);
                if (flags & SCF_DO_SUBSTR) {
                    scan_commit(pRExC_state, data); /* Cannot extend fixed substrings */
@@ -943,7 +940,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                }
                goto optimize_curly_tail;
            case CURLY:
-               mincount = ARG1(scan); 
+               mincount = ARG1(scan);
                maxcount = ARG2(scan);
                next = regnext(scan);
                if (OP(scan) == CURLYX) {
@@ -973,15 +970,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                /* These are the cases when once a subexpression
                   fails at a particular position, it cannot succeed
                   even after backtracking at the enclosing scope.
-                  
+               
                   XXXX what if minimal match and we are at the
                        initial run of {n,m}? */
                if ((mincount != maxcount - 1) && (maxcount != REG_INFTY))
                    f &= ~SCF_WHILEM_VISITED_POS;
 
                /* This will finish on WHILEM, setting scan, or on NULL: */
-               minnext = study_chunk(pRExC_state, &scan, &deltanext, last, data, 
-                                     mincount == 0 
+               minnext = study_chunk(pRExC_state, &scan, &deltanext, last, data,
+                                     mincount == 0
                                        ? (f & ~SCF_DO_SUBSTR) : f);
 
                if (flags & SCF_DO_STCLASS)
@@ -991,7 +988,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        cl_or(pRExC_state, data->start_class, &this_class);
                    }
                    else if (flags & SCF_DO_STCLASS_AND) {
-                       /* Switch to OR mode: cache the old value of 
+                       /* Switch to OR mode: cache the old value of
                         * data->start_class */
                        StructCopy(data->start_class, &and_with,
                                   struct regnode_charclass_class);
@@ -1012,7 +1009,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                }
                if (!scan)              /* It was not CURLYX, but CURLY. */
                    scan = next;
-               if (ckWARN(WARN_REGEXP) && (minnext + deltanext == 0) 
+               if (ckWARN(WARN_REGEXP) && (minnext + deltanext == 0)
                    && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
                    && maxcount <= REG_INFTY/3) /* Complement check for big count */
                {
@@ -1021,14 +1018,14 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                }
 
                min += minnext * mincount;
-               is_inf_internal |= ((maxcount == REG_INFTY 
+               is_inf_internal |= ((maxcount == REG_INFTY
                                     && (minnext + deltanext) > 0)
                                    || deltanext == I32_MAX);
                is_inf |= is_inf_internal;
                delta += (minnext + deltanext) * maxcount - minnext * mincount;
 
                /* Try powerful optimization CURLYX => CURLYN. */
-               if (  OP(oscan) == CURLYX && data 
+               if (  OP(oscan) == CURLYX && data
                      && data->flags & SF_IN_PAR
                      && !(data->flags & SF_HAS_EVAL)
                      && !deltanext && minnext == 1 ) {
@@ -1040,11 +1037,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                    nxt = regnext(nxt);
                    if (!strchr((char*)PL_simple,OP(nxt))
                        && !(PL_regkind[(U8)OP(nxt)] == EXACT
-                            && STR_LEN(nxt) == 1)) 
+                            && STR_LEN(nxt) == 1))
                        goto nogo;
                    nxt2 = nxt;
                    nxt = regnext(nxt);
-                   if (OP(nxt) != CLOSE) 
+                   if (OP(nxt) != CLOSE)
                        goto nogo;
                    /* Now we know that nxt2 is the only contents: */
                    oscan->flags = ARG(nxt);
@@ -1057,12 +1054,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                    OP(nxt) = OPTIMIZED;        /* was CLOSE. */
                    OP(nxt + 1) = OPTIMIZED; /* was count. */
                    NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */
-#endif 
+#endif
                }
              nogo:
 
                /* Try optimization CURLYX => CURLYM. */
-               if (  OP(oscan) == CURLYX && data 
+               if (  OP(oscan) == CURLYX && data
                      && !(data->flags & SF_HAS_PAR)
                      && !(data->flags & SF_HAS_EVAL)
                      && !deltanext  ) {
@@ -1073,7 +1070,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
 
                    OP(oscan) = CURLYM;
                    while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
-                           && (OP(nxt2) != WHILEM)) 
+                           && (OP(nxt2) != WHILEM))
                        nxt = nxt2;
                    OP(nxt2)  = SUCCEED; /* Whas WHILEM */
                    /* Need to optimize away parenths. */
@@ -1081,7 +1078,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        /* Set the parenth number.  */
                        regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/
 
-                       if (OP(nxt) != CLOSE) 
+                       if (OP(nxt) != CLOSE)
                            FAIL("Panic opt close");
                        oscan->flags = ARG(nxt);
                        OP(nxt1) = OPTIMIZED;   /* was OPEN. */
@@ -1091,11 +1088,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        OP(nxt + 1) = OPTIMIZED; /* was count. */
                        NEXT_OFF(nxt1 + 1) = 0; /* just for consistancy. */
                        NEXT_OFF(nxt + 1) = 0; /* just for consistancy. */
-#endif 
+#endif
 #if 0
                        while ( nxt1 && (OP(nxt1) != WHILEM)) {
                            regnode *nnxt = regnext(nxt1);
-                           
+                       
                            if (nnxt == nxt) {
                                if (reg_off_by_arg[OP(nxt1)])
                                    ARG_SET(nxt1, nxt2 - nxt1);
@@ -1108,7 +1105,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        }
 #endif
                        /* Optimize again: */
-                       study_chunk(pRExC_state, &nxt1, &deltanext, nxt, 
+                       study_chunk(pRExC_state, &nxt1, &deltanext, nxt,
                                    NULL, 0);
                    }
                    else
@@ -1130,14 +1127,14 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                    PREVOPER(nxt)->flags = data->whilem_c
                        | (RExC_whilem_seen << 4); /* On WHILEM */
                }
-               if (data && fl & (SF_HAS_PAR|SF_IN_PAR)) 
+               if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
                    pars++;
                if (flags & SCF_DO_SUBSTR) {
                    SV *last_str = Nullsv;
                    int counted = mincount != 0;
 
                    if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */
-                       I32 b = pos_before >= data->last_start_min 
+                       I32 b = pos_before >= data->last_start_min
                            ? pos_before : data->last_start_min;
                        STRLEN l;
                        char *s = SvPV(data->last_found, l);
@@ -1153,11 +1150,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                            /* What was added is a constant string */
                            if (mincount > 1) {
                                SvGROW(last_str, (mincount * l) + 1);
-                               repeatcpy(SvPVX(last_str) + l, 
+                               repeatcpy(SvPVX(last_str) + l,
                                          SvPVX(last_str), l, mincount - 1);
                                SvCUR(last_str) *= mincount;
                                /* Add additional parts. */
-                               SvCUR_set(data->last_found, 
+                               SvCUR_set(data->last_found,
                                          SvCUR(data->last_found) - l);
                                sv_catsv(data->last_found, last_str);
                                data->last_end += l * (mincount - 1);
@@ -1180,10 +1177,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        if (mincount && last_str) {
                            sv_setsv(data->last_found, last_str);
                            data->last_end = data->pos_min;
-                           data->last_start_min = 
+                           data->last_start_min =
                                data->pos_min - CHR_SVLEN(last_str);
-                           data->last_start_max = is_inf 
-                               ? I32_MAX 
+                           data->last_start_max = is_inf
+                               ? I32_MAX
                                : data->pos_min + data->pos_delta
                                - CHR_SVLEN(last_str);
                        }
@@ -1267,7 +1264,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        else {
                            for (value = 0; value < 256; value++)
                                if (isALNUM(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                     
+                                   ANYOF_BITMAP_SET(data->start_class, value);                 
                        }
                    }
                    break;
@@ -1296,7 +1293,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        else {
                            for (value = 0; value < 256; value++)
                                if (!isALNUM(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                     
+                                   ANYOF_BITMAP_SET(data->start_class, value);                 
                        }
                    }
                    break;
@@ -1325,7 +1322,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        else {
                            for (value = 0; value < 256; value++)
                                if (isSPACE(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                     
+                                   ANYOF_BITMAP_SET(data->start_class, value);                 
                        }
                    }
                    break;
@@ -1354,7 +1351,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        else {
                            for (value = 0; value < 256; value++)
                                if (!isSPACE(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                     
+                                   ANYOF_BITMAP_SET(data->start_class, value);                 
                        }
                    }
                    break;
@@ -1385,7 +1382,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        else {
                            for (value = 0; value < 256; value++)
                                if (isDIGIT(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                     
+                                   ANYOF_BITMAP_SET(data->start_class, value);                 
                        }
                    }
                    break;
@@ -1402,7 +1399,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        else {
                            for (value = 0; value < 256; value++)
                                if (!isDIGIT(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                     
+                                   ANYOF_BITMAP_SET(data->start_class, value);                 
                        }
                    }
                    break;
@@ -1428,7 +1425,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
            int f = 0;
 
            data_fake.flags = 0;
-           if (data) {             
+           if (data) {         
                data_fake.whilem_c = data->whilem_c;
                data_fake.last_closep = data->last_closep;
            }
@@ -1502,7 +1499,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
   finish:
     *scanp = scan;
     *deltap = is_inf_internal ? I32_MAX : delta;
-    if (flags & SCF_DO_SUBSTR && is_inf) 
+    if (flags & SCF_DO_SUBSTR && is_inf)
        data->pos_delta = I32_MAX - data->pos_min;
     if (is_par > U8_MAX)
        is_par = 0;
@@ -1523,8 +1520,8 @@ STATIC I32
 S_add_data(pTHX_ RExC_state_t *pRExC_state, I32 n, char *s)
 {
     if (RExC_rx->data) {
-       Renewc(RExC_rx->data, 
-              sizeof(*RExC_rx->data) + sizeof(void*) * (RExC_rx->data->count + n - 1), 
+       Renewc(RExC_rx->data,
+              sizeof(*RExC_rx->data) + sizeof(void*) * (RExC_rx->data->count + n - 1),
               char, struct reg_data);
        Renew(RExC_rx->data->what, RExC_rx->data->count + n, U8);
        RExC_rx->data->count += n;
@@ -1544,7 +1541,7 @@ Perl_reginitcolors(pTHX)
 {
     int i = 0;
     char *s = PerlEnv_getenv("PERL_RE_COLORS");
-           
+       
     if (s) {
        PL_colors[0] = s = savepv(s);
        while (++i < 6) {
@@ -1557,7 +1554,7 @@ Perl_reginitcolors(pTHX)
                PL_colors[i] = s = "";
        }
     } else {
-       while (i < 6) 
+       while (i < 6)
            PL_colors[i++] = "";
     }
     PL_colorset = 1;
@@ -1761,13 +1758,13 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
            first = NEXTOPER(first);
            goto again;
        }
-       if (sawplus && (!sawopen || !RExC_sawback) 
+       if (sawplus && (!sawopen || !RExC_sawback)
            && !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */
            /* x+ must match at the 1st pos of run of x's */
            r->reganch |= ROPT_SKIP;
 
        /* Scan is after the zeroth branch, first is atomic matcher. */
-       DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n", 
+       DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
                              (IV)(first - scan + 1)));
        /*
        * If there's something expensive in the r.e., find the
@@ -1798,7 +1795,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        minlen = study_chunk(pRExC_state, &first, &fake, scan + RExC_size, /* Up to end */
                             &data, SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag);
        if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
-            && data.last_start_min == 0 && data.last_end > 0 
+            && data.last_start_min == 0 && data.last_end > 0
             && !RExC_seen_zerolen
             && (!(RExC_seen & REG_SEEN_GPOS) || (r->reganch & ROPT_ANCH_GPOS)))
            r->reganch |= ROPT_CHECK_ALL;
@@ -1851,7 +1848,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
            SvREFCNT_dec(data.longest_fixed);
            longest_fixed_length = 0;
        }
-       if (r->regstclass 
+       if (r->regstclass
            && (OP(r->regstclass) == REG_ANY || OP(r->regstclass) == SANY))
            r->regstclass = NULL;
        if ((!r->anchored_substr || r->anchored_offset) && stclass_flag
@@ -1860,7 +1857,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
            SV *sv;
            I32 n = add_data(pRExC_state, 1, "f");
 
-           New(1006, RExC_rx->data->data[n], 1, 
+           New(1006, RExC_rx->data->data[n], 1,
                struct regnode_charclass_class);
            StructCopy(data.start_class,
                       (struct regnode_charclass_class*)RExC_rx->data->data[n],
@@ -1912,7 +1909,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
            SV *sv;
            I32 n = add_data(pRExC_state, 1, "f");
 
-           New(1006, RExC_rx->data->data[n], 1, 
+           New(1006, RExC_rx->data->data[n], 1,
                struct regnode_charclass_class);
            StructCopy(data.start_class,
                       (struct regnode_charclass_class*)RExC_rx->data->data[n],
@@ -1927,12 +1924,14 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     }
 
     r->minlen = minlen;
-    if (RExC_seen & REG_SEEN_GPOS) 
+    if (RExC_seen & REG_SEEN_GPOS)
        r->reganch |= ROPT_GPOS_SEEN;
     if (RExC_seen & REG_SEEN_LOOKBEHIND)
        r->reganch |= ROPT_LOOKBEHIND_SEEN;
     if (RExC_seen & REG_SEEN_EVAL)
        r->reganch |= ROPT_EVAL_SEEN;
+    if (RExC_seen & REG_SEEN_SANY)
+       r->reganch |= ROPT_SANY_SEEN;
     Newz(1002, r->startp, RExC_npar, I32);
     Newz(1002, r->endp, RExC_npar, I32);
     PL_regdata = r->data; /* for regprop() */
@@ -1978,9 +1977,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
            switch (paren) {
            case '<':
                RExC_seen |= REG_SEEN_LOOKBEHIND;
-               if (*RExC_parse == '!') 
+               if (*RExC_parse == '!')
                    paren = ',';
-               if (*RExC_parse != '=' && *RExC_parse != '!') 
+               if (*RExC_parse != '=' && *RExC_parse != '!')
                    goto unknown;
                RExC_parse++;
            case '=':
@@ -2022,21 +2021,21 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                while (count && (c = *RExC_parse)) {
                    if (c == '\\' && RExC_parse[1])
                        RExC_parse++;
-                   else if (c == '{') 
+                   else if (c == '{')
                        count++;
-                   else if (c == '}') 
+                   else if (c == '}')
                        count--;
                    RExC_parse++;
                }
                if (*RExC_parse != ')')
                {
-                   RExC_parse = s;                 
+                   RExC_parse = s;             
                    vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
                }
                if (!SIZE_ONLY) {
                    AV *av;
-                   
-                   if (RExC_parse - 1 - s) 
+               
+                   if (RExC_parse - 1 - s)
                        sv = newSVpvn(s, RExC_parse - 1 - s);
                    else
                        sv = newSVpvn("", 0);
@@ -2075,8 +2074,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
            case '(':
            {
                if (RExC_parse[0] == '?') {
-                   if (RExC_parse[1] == '=' || RExC_parse[1] == '!' 
-                       || RExC_parse[1] == '<' 
+                   if (RExC_parse[1] == '=' || RExC_parse[1] == '!'
+                       || RExC_parse[1] == '<'
                        || RExC_parse[1] == '{') { /* Lookahead or eval. */
                        I32 flag;
                        
@@ -2085,7 +2084,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                            ret->flags = 1;
                        regtail(pRExC_state, ret, reg(pRExC_state, 1, &flag));
                        goto insert_if;
-                   } 
+                   }
                }
                else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
                    parno = atoi(RExC_parse++);
@@ -2301,18 +2300,18 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first)
     register regnode *latest;
     I32 flags = 0, c = 0;
 
-    if (first) 
+    if (first)
        ret = NULL;
     else {
-       if (!SIZE_ONLY && RExC_extralen) 
+       if (!SIZE_ONLY && RExC_extralen)
            ret = reganode(pRExC_state, BRANCHJ,0);
        else
            ret = reg_node(pRExC_state, BRANCH);
     }
        
-    if (!first && SIZE_ONLY) 
+    if (!first && SIZE_ONLY)
        RExC_extralen += 1;                     /* BRANCHJ */
-    
+
     *flagp = WORST;                    /* Tentatively. */
 
     RExC_parse--;
@@ -2467,7 +2466,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
 
     if (!(flags&HASWIDTH) && op != '?')
       vFAIL("Regexp *+ operand could be empty");
-#endif 
+#endif
 
     nextchar(pRExC_state);
 
@@ -2547,7 +2546,7 @@ tryagain:
        break;
     case '$':
        nextchar(pRExC_state);
-       if (*RExC_parse) 
+       if (*RExC_parse)
            RExC_seen_zerolen++;
        if (RExC_flags16 & PMf_MULTILINE)
            ret = reg_node(pRExC_state, MEOL);
@@ -2641,6 +2640,7 @@ tryagain:
            break;
        case 'C':
            ret = reg_node(pRExC_state, SANY);
+           RExC_seen |= REG_SEEN_SANY;
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
            break;
@@ -2648,22 +2648,16 @@ tryagain:
            ret = reg_node(pRExC_state, CLUMP);
            *flagp |= HASWIDTH;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_mark)
-               is_utf8_mark((U8*)"~");         /* preload table */
            break;
        case 'w':
            ret = reg_node(pRExC_state, LOC ? ALNUML     : ALNUM);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum((U8*)"a");        /* preload table */
            break;
        case 'W':
            ret = reg_node(pRExC_state, LOC ? NALNUML     : NALNUM);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum((U8*)"a");        /* preload table */
            break;
        case 'b':
            RExC_seen_zerolen++;
@@ -2671,8 +2665,6 @@ tryagain:
            ret = reg_node(pRExC_state, LOC ? BOUNDL     : BOUND);
            *flagp |= SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum((U8*)"a");        /* preload table */
            break;
        case 'B':
            RExC_seen_zerolen++;
@@ -2680,36 +2672,26 @@ tryagain:
            ret = reg_node(pRExC_state, LOC ? NBOUNDL     : NBOUND);
            *flagp |= SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum((U8*)"a");        /* preload table */
            break;
        case 's':
            ret = reg_node(pRExC_state, LOC ? SPACEL     : SPACE);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_space)
-               is_utf8_space((U8*)" ");        /* preload table */
            break;
        case 'S':
            ret = reg_node(pRExC_state, LOC ? NSPACEL     : NSPACE);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_space)
-               is_utf8_space((U8*)" ");        /* preload table */
            break;
        case 'd':
            ret = reg_node(pRExC_state, DIGIT);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_digit)
-               is_utf8_digit((U8*)"1");        /* preload table */
            break;
        case 'D':
            ret = reg_node(pRExC_state, NDIGIT);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_digit)
-               is_utf8_digit((U8*)"1");        /* preload table */
            break;
        case 'p':
        case 'P':
@@ -2855,25 +2837,17 @@ tryagain:
                        p++;
                        break;
                    case 'e':
-#ifdef ASCIIish
-                         ender = '\033';
-#else
-                         ender = '\047';
-#endif
+                         ender = ASCII_TO_NATIVE('\033');
                        p++;
                        break;
                    case 'a':
-#ifdef ASCIIish
-                         ender = '\007';
-#else
-                         ender = '\057';
-#endif
+                         ender = ASCII_TO_NATIVE('\007');
                        p++;
                        break;
                    case 'x':
                        if (*++p == '{') {
                            char* e = strchr(p, '}');
-        
+       
                            if (!e) {
                                RExC_parse = p + 1;
                                vFAIL("Missing right brace on \\x{}");
@@ -2881,6 +2855,8 @@ tryagain:
                            else {
                                numlen = 1;     /* allow underscores */
                                ender = (UV)scan_hex(p + 1, e - p - 1, &numlen);
+                               if (ender > 0xff)
+                                   RExC_utf8 = 1;
                                /* numlen is generous */
                                if (numlen + len >= 127) {
                                    p--;
@@ -2925,8 +2901,8 @@ tryagain:
                    break;
                default:
                  normal_default:
-                   if ((*p & 0xc0) == 0xc0 && UTF) {
-                       ender = utf8_to_uv((U8*)p, RExC_end - p,
+                   if (UTF8_IS_START(*p) && UTF) {
+                       ender = utf8n_to_uvchr((U8*)p, RExC_end - p,
                                               &numlen, 0);
                        p += numlen;
                    }
@@ -2938,14 +2914,16 @@ tryagain:
                    p = regwhite(p, RExC_end);
                if (UTF && FOLD) {
                    if (LOC)
-                       ender = toLOWER_LC_uni(ender);
+                       ender = toLOWER_LC_uvchr(ender);
                    else
                        ender = toLOWER_uni(ender);
                }
                if (ISMULT2(p)) { /* Back off on ?+*. */
                    if (len)
                        p = oldp;
-                   else if (ender >= 0x80 && UTF) {
+                   /* ender is a Unicode value so it can be > 0xff --
+                    * in other words, do not use UTF8_IS_CONTINUED(). */
+                   else if (NATIVE_TO_ASCII(ender) >= 0x80 && UTF) {
                        reguni(pRExC_state, ender, s, &numlen);
                        s += numlen;
                        len += numlen;
@@ -2956,7 +2934,9 @@ tryagain:
                    }
                    break;
                }
-               if (ender >= 0x80 && UTF) {
+               /* ender is a Unicode value so it can be > 0xff --
+                * in other words, do not use UTF8_IS_CONTINUED(). */
+               if (NATIVE_TO_ASCII(ender) >= 0x80 && UTF) {
                    reguni(pRExC_state, ender, s, &numlen);
                    s += numlen;
                    len += numlen - 1;
@@ -3025,7 +3005,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
         *RExC_parse == '.')) {
        char  c = *RExC_parse;
        char* s = RExC_parse++;
-           
+       
        while (RExC_parse < RExC_end && *RExC_parse != c)
            RExC_parse++;
        if (RExC_parse == RExC_end)
@@ -3221,7 +3201,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        if (!range)
            rangebegin = RExC_parse;
        if (UTF) {
-           value = utf8_to_uv((U8*)RExC_parse,
+           value = utf8n_to_uvchr((U8*)RExC_parse,
                               RExC_end - RExC_parse,
                               &numlen, 0);
            RExC_parse += numlen;
@@ -3232,7 +3212,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            namedclass = regpposixcc(pRExC_state, value);
        else if (value == '\\') {
            if (UTF) {
-               value = utf8_to_uv((U8*)RExC_parse,
+               value = utf8n_to_uvchr((U8*)RExC_parse,
                                   RExC_end - RExC_parse,
                                   &numlen, 0);
                RExC_parse += numlen;
@@ -3279,17 +3259,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            case 't':   value = '\t';                   break;
            case 'f':   value = '\f';                   break;
            case 'b':   value = '\b';                   break;
-#ifdef ASCIIish
-           case 'e':   value = '\033';                 break;
-           case 'a':   value = '\007';                 break;
-#else
-           case 'e':   value = '\047';                 break;
-           case 'a':   value = '\057';                 break;
-#endif
+           case 'e':   value = ASCII_TO_NATIVE('\033');break;
+           case 'a':   value = ASCII_TO_NATIVE('\007');break;
            case 'x':
                if (*RExC_parse == '{') {
                    e = strchr(RExC_parse++, '}');
-                    if (!e) 
+                    if (!e)
                         vFAIL("Missing right brace on \\x{}");
                    numlen = 1;         /* allow underscores */
                    value = (UV)scan_hex(RExC_parse,
@@ -3345,8 +3320,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                    else {
                        ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
                        Perl_sv_catpvf(aTHX_ listsv,
-                                      /* 0x002D is Unicode for '-' */
-                                      "%04"UVxf"\n002D\n", (UV)lastvalue);
+                                      "%04"UVxf"\n%04"UVxf"\n", (UV)lastvalue, (UV) '-');
                    }
                }
 
@@ -3429,7 +3403,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_ASCII);
                    else {
-#ifdef ASCIIish
+#ifndef EBCDIC
                        for (value = 0; value < 128; value++)
                            ANYOF_BITMAP_SET(ret, value);
 #else  /* EBCDIC */
@@ -3445,7 +3419,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_NASCII);
                    else {
-#ifdef ASCIIish
+#ifndef EBCDIC
                        for (value = 128; value < 256; value++)
                            ANYOF_BITMAP_SET(ret, value);
 #else  /* EBCDIC */
@@ -3745,7 +3719,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        /* now is the next time */
        if (!SIZE_ONLY) {
            if (lastvalue < 256 && value < 256) {
-#ifndef ASCIIish /* EBCDIC, for example. */
+#ifdef EBCDIC /* EBCDIC, for example. */
                if ((isLOWER(lastvalue) && isLOWER(value)) ||
                    (isUPPER(lastvalue) && isUPPER(value)))
                {
@@ -3810,7 +3784,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        ANYOF_FLAGS(ret) = ANYOF_UNICODE_ALL;
     }
 
-    if (!SIZE_ONLY) { 
+    if (!SIZE_ONLY) {
        AV *av = newAV();
        SV *rv;
 
@@ -3908,7 +3882,7 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
 STATIC void
 S_reguni(pTHX_ RExC_state_t *pRExC_state, UV uv, char* s, STRLEN* lenp)
 {
-    *lenp = SIZE_ONLY ? UNISKIP(uv) : (uv_to_utf8((U8*)s, uv) - (U8*)s);
+    *lenp = SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s);
 }
 
 /*
@@ -3923,7 +3897,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd)
     register regnode *dst;
     register regnode *place;
     register int offset = regarglen[(U8)op];
-    
+
 /* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
 
     if (SIZE_ONLY) {
@@ -4036,13 +4010,13 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
                      (int)(2*l + 1), "", SvPVX(sv));
        if (next == NULL)               /* Next ptr. */
            PerlIO_printf(Perl_debug_log, "(0)");
-       else 
+       else
            PerlIO_printf(Perl_debug_log, "(%"IVdf")", (IV)(next - start));
        (void)PerlIO_putc(Perl_debug_log, '\n');
       after_print:
        if (PL_regkind[(U8)op] == BRANCHJ) {
-           register regnode *nnode = (OP(next) == LONGJMP 
-                                      ? regnext(next) 
+           register regnode *nnode = (OP(next) == LONGJMP
+                                      ? regnext(next)
                                       : next);
            if (last && nnode > last)
                nnode = last;
@@ -4098,25 +4072,25 @@ Perl_regdump(pTHX_ regexp *r)
     /* Header fields of interest. */
     if (r->anchored_substr)
        PerlIO_printf(Perl_debug_log,
-                     "anchored `%s%.*s%s'%s at %"IVdf" ", 
+                     "anchored `%s%.*s%s'%s at %"IVdf" ",
                      PL_colors[0],
                      (int)(SvCUR(r->anchored_substr) - (SvTAIL(r->anchored_substr)!=0)),
-                     SvPVX(r->anchored_substr), 
+                     SvPVX(r->anchored_substr),
                      PL_colors[1],
                      SvTAIL(r->anchored_substr) ? "$" : "",
                      (IV)r->anchored_offset);
     if (r->float_substr)
        PerlIO_printf(Perl_debug_log,
-                     "floating `%s%.*s%s'%s at %"IVdf"..%"UVuf" ", 
+                     "floating `%s%.*s%s'%s at %"IVdf"..%"UVuf" ",
                      PL_colors[0],
-                     (int)(SvCUR(r->float_substr) - (SvTAIL(r->float_substr)!=0)), 
+                     (int)(SvCUR(r->float_substr) - (SvTAIL(r->float_substr)!=0)),
                      SvPVX(r->float_substr),
                      PL_colors[1],
                      SvTAIL(r->float_substr) ? "$" : "",
                      (IV)r->float_min_offset, (UV)r->float_max_offset);
     if (r->check_substr)
-       PerlIO_printf(Perl_debug_log, 
-                     r->check_substr == r->float_substr 
+       PerlIO_printf(Perl_debug_log,
+                     r->check_substr == r->float_substr
                      ? "(checking floating" : "(checking anchored");
     if (r->reganch & ROPT_NOSCAN)
        PerlIO_printf(Perl_debug_log, " noscan");
@@ -4271,31 +4245,31 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
        {
            SV *lv;
            SV *sw = regclass_swash(o, FALSE, &lv);
-           
+       
            if (lv) {
                if (sw) {
                    UV i;
                    U8 s[UTF8_MAXLEN+1];
-                   
+               
                    for (i = 0; i <= 256; i++) { /* just the first 256 */
-                       U8 *e = uv_to_utf8(s, i);
+                       U8 *e = uvuni_to_utf8(s, i);
                        
                        if (i < 256 && swash_fetch(sw, s)) {
                            if (rangestart == -1)
                                rangestart = i;
                        } else if (rangestart != -1) {
                            U8 *p;
-                           
+                       
                            if (i <= rangestart + 3)
                                for (; rangestart < i; rangestart++) {
-                                   for(e = uv_to_utf8(s, rangestart), p = s; p < e; p++)
+                                   for(e = uvuni_to_utf8(s, rangestart), p = s; p < e; p++)
                                        put_byte(sv, *p);
                                }
                            else {
-                               for (e = uv_to_utf8(s, rangestart), p = s; p < e; p++)
+                               for (e = uvuni_to_utf8(s, rangestart), p = s; p < e; p++)
                                    put_byte(sv, *p);
                                sv_catpv(sv, "-");
-                                   for (e = uv_to_utf8(s, i - 1), p = s; p < e; p++)
+                                   for (e = uvuni_to_utf8(s, i - 1), p = s; p < e; p++)
                                        put_byte(sv, *p);
                                }
                                rangestart = -1;
@@ -4308,9 +4282,9 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
                {
                    char *s = savepv(SvPVX(lv));
                    char *origs = s;
-                   
+               
                    while(*s && *s != '\n') s++;
-                   
+               
                    if (*s == '\n') {
                        char *t = ++s;
                        
@@ -4324,7 +4298,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
                        
                        sv_catpv(sv, t);
                    }
-                   
+               
                    Safefree(origs);
                }
            }
@@ -4489,7 +4463,7 @@ S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
 
 void
 Perl_save_re_context(pTHX)
-{                   
+{
 #if 0
     SAVEPPTR(RExC_precomp);            /* uncompiled string. */
     SAVEI32(RExC_npar);                /* () count. */
@@ -4514,9 +4488,8 @@ Perl_save_re_context(pTHX)
     SAVEVPTR(PL_reglastparen);         /* Similarly for lastparen. */
     SAVEPPTR(PL_regtill);              /* How far we are required to go. */
     SAVEI8(PL_regprev);                        /* char before regbol, \n if none */
-    SAVEVPTR(PL_reg_start_tmp);                /* from regexec.c */
+    SAVEGENERICPV(PL_reg_start_tmp);           /* from regexec.c */
     PL_reg_start_tmp = 0;
-    SAVEFREEPV(PL_reg_start_tmp);
     SAVEI32(PL_reg_start_tmpl);                /* from regexec.c */
     PL_reg_start_tmpl = 0;
     SAVEVPTR(PL_regdata);
@@ -4536,7 +4509,7 @@ Perl_save_re_context(pTHX)
     SAVEVPTR(PL_reg_curpm);            /* from regexec.c */
     SAVEI32(PL_regnpar);               /* () count. */
 #ifdef DEBUGGING
-    SAVEPPTR(PL_reg_starttry);         /* from regexec.c */    
+    SAVEPPTR(PL_reg_starttry);         /* from regexec.c */
 #endif
 }