integrate Pod-Perldoc-3.07. This replaces the original inline perldoc
[p5sagit/p5-mst-13.2.git] / regexec.c
index f2d4b3d..f69c360 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -87,7 +87,7 @@
 #define RF_evaled      4               /* Did an EVAL with setting? */
 #define RF_utf8                8               /* String contains multibyte chars? */
 
-#define UTF (PL_reg_flags & RF_utf8)
+#define UTF ((PL_reg_flags & RF_utf8) != 0)
 
 #define RS_init                1               /* eval environment created */
 #define RS_set         2               /* replsv value is set */
@@ -239,7 +239,7 @@ S_regcppop(pTHX)
        );
     }
     DEBUG_r(
-       if (*PL_reglastparen + 1 <= PL_regnpar) {
+       if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
            PerlIO_printf(Perl_debug_log,
                          "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
                          (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
@@ -256,8 +256,8 @@ S_regcppop(pTHX)
      * building DynaLoader will fail:
      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
      * --jhi */
-    for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
-       if (paren > PL_regsize)
+    for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
+       if ((I32)paren > PL_regsize)
            PL_regstartp[paren] = -1;
        PL_regendp[paren] = -1;
     }
@@ -431,7 +431,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
              );
     });
 
-    if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
+    /* CHR_DIST() would be more correct here but it makes things slow. */
+    if (prog->minlen > strend - strpos) {
        DEBUG_r(PerlIO_printf(Perl_debug_log,
                              "String too short... [re_intuit_start]\n"));
        goto fail;
@@ -594,7 +595,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
 
                t = s - prog->check_offset_max;
                if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
-                   && (!(prog->reganch & ROPT_UTF8)
+                   && (!do_utf8
                        || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
                            && t > strpos)))
                    /* EMPTY */;
@@ -714,7 +715,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
 
     t = s - prog->check_offset_max;
     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
-        && (!(prog->reganch & ROPT_UTF8)
+        && (!do_utf8
            || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
                 && t > strpos))) {
        /* Fixed substring is found far enough so that the match
@@ -999,8 +1000,10 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
                to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
 
-               c1 = utf8_to_uvchr(tmpbuf1, 0);
-               c2 = utf8_to_uvchr(tmpbuf2, 0);
+               c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN_UCLC, 
+                                   0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+               c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC,
+                                   0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
            }
            else {
                c1 = *(U8*)m;
@@ -1013,7 +1016,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
            c1 = *(U8*)m;
            c2 = PL_fold_locale[c1];
          do_exactf:
-           e = do_utf8 ? s + ln : strend - ln;
+           e = HOP3c(strend, -(I32)ln, s);
 
            if (norun && e < s)
                e = s;                  /* Due to minlen logic of intuit() */
@@ -1037,11 +1040,13 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                
                if (c1 == c2) {
                    while (s <= e) {
-                       c = utf8_to_uvchr((U8*)s, &len);
+                       c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
+                                          ckWARN(WARN_UTF8) ?
+                                          0 : UTF8_ALLOW_ANY);
                        if ( c == c1
                             && (ln == len ||
                                 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
-                                           m, (char **)0, ln, UTF))
+                                           m, (char **)0, ln, (bool)UTF))
                             && (norun || regtry(prog, s)) )
                            goto got_it;
                        else {
@@ -1053,7 +1058,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                                      !ibcmp_utf8((char *) foldbuf,
                                                  (char **)0, foldlen, do_utf8,
                                                  m,
-                                                 (char **)0, ln,      UTF))
+                                                 (char **)0, ln, (bool)UTF))
                                  && (norun || regtry(prog, s)) )
                                  goto got_it;
                        }
@@ -1062,7 +1067,9 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                }
                else {
                    while (s <= e) {
-                       c = utf8_to_uvchr((U8*)s, &len);
+                     c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
+                                          ckWARN(WARN_UTF8) ?
+                                          0 : UTF8_ALLOW_ANY);
 
                        /* Handle some of the three Greek sigmas cases.
                         * Note that not all the possible combinations
@@ -1078,7 +1085,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                        if ( (c == c1 || c == c2)
                             && (ln == len ||
                                 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
-                                           m, (char **)0, ln, UTF))
+                                           m, (char **)0, ln, (bool)UTF))
                             && (norun || regtry(prog, s)) )
                            goto got_it;
                        else {
@@ -1090,7 +1097,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                                      !ibcmp_utf8((char *) foldbuf,
                                                  (char **)0, foldlen, do_utf8,
                                                  m,
-                                                 (char **)0, ln,      UTF))
+                                                 (char **)0, ln, (bool)UTF))
                                  && (norun || regtry(prog, s)) )
                                  goto got_it;
                        }
@@ -2128,7 +2135,7 @@ S_regtry(pTHX_ regexp *prog, char *startpos)
     sp = prog->startp;
     ep = prog->endp;
     if (prog->nparens) {
-       for (i = prog->nparens; i > *PL_reglastparen; i--) {
+       for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
            *++sp = -1;
            *++ep = -1;
        }
@@ -2378,7 +2385,7 @@ S_regmatch(pTHX_ regnode *prog)
        case EXACT:
            s = STRING(scan);
            ln = STR_LEN(scan);
-           if (do_utf8 != (UTF!=0)) {
+           if (do_utf8 != UTF) {
                /* The target and the pattern have differing utf8ness. */
                char *l = locinput;
                char *e = s + ln;
@@ -2390,7 +2397,9 @@ S_regmatch(pTHX_ regnode *prog)
                        if (l >= PL_regeol)
                             sayNO;
                        if (NATIVE_TO_UNI(*(U8*)s) !=
-                           utf8_to_uvuni((U8*)l, &ulen))
+                           utf8n_to_uvuni((U8*)l, UTF8_MAXLEN, &ulen,
+                                          ckWARN(WARN_UTF8) ?
+                                          0 : UTF8_ALLOW_ANY))
                             sayNO;
                        l += ulen;
                        s ++;
@@ -2402,7 +2411,9 @@ S_regmatch(pTHX_ regnode *prog)
                        if (l >= PL_regeol)
                            sayNO;
                        if (NATIVE_TO_UNI(*((U8*)l)) !=
-                           utf8_to_uvuni((U8*)s, &ulen))
+                           utf8n_to_uvuni((U8*)s, UTF8_MAXLEN, &ulen,
+                                          ckWARN(WARN_UTF8) ?
+                                          0 : UTF8_ALLOW_ANY))
                            sayNO;
                        s += ulen;
                        l ++;
@@ -2435,7 +2446,7 @@ S_regmatch(pTHX_ regnode *prog)
                char *l = locinput;
                char *e = PL_regeol;
 
-               if (ibcmp_utf8(s, 0,  ln, UTF,
+               if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
                               l, &e, 0,  do_utf8)) {
                     /* One more case for the sharp s:
                      * pack("U0U*", 0xDF) =~ /ss/i,
@@ -2717,7 +2728,7 @@ S_regmatch(pTHX_ regnode *prog)
            n = ARG(scan);  /* which paren pair */
            ln = PL_regstartp[n];
            PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
-           if (*PL_reglastparen < n || ln == -1)
+           if ((I32)*PL_reglastparen < n || ln == -1)
                sayNO;                  /* Do not match unless seen CLOSEn. */
            if (ln == PL_regendp[n])
                break;
@@ -2780,13 +2791,13 @@ S_regmatch(pTHX_ regnode *prog)
            dSP;
            OP_4tree *oop = PL_op;
            COP *ocurcop = PL_curcop;
-           SV **ocurpad = PL_curpad;
+           PAD *old_comppad;
            SV *ret;
        
            n = ARG(scan);
            PL_op = (OP_4tree*)PL_regdata->data[n];
            DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
-           PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
+           PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
            PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
 
            {
@@ -2794,7 +2805,7 @@ S_regmatch(pTHX_ regnode *prog)
                CALLRUNOPS(aTHX);                       /* Scalar context. */
                SPAGAIN;
                if (SP == before)
-                   ret = Nullsv;   /* protect against empty (?{}) blocks. */
+                   ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
                else {
                    ret = POPs;
                    PUTBACK;
@@ -2802,7 +2813,7 @@ S_regmatch(pTHX_ regnode *prog)
            }
 
            PL_op = oop;
-           PL_curpad = ocurpad;
+           PAD_RESTORE_LOCAL(old_comppad);
            PL_curcop = ocurcop;
            if (logical) {
                if (logical == 2) {     /* Postponed subexpression. */
@@ -2810,6 +2821,7 @@ S_regmatch(pTHX_ regnode *prog)
                    MAGIC *mg = Null(MAGIC*);
                    re_cc_state state;
                    CHECKPOINT cp, lastcp;
+                    int toggleutf;
 
                    if(SvROK(ret) || SvRMAGICAL(ret)) {
                        SV *sv = SvROK(ret) ? SvRV(ret) : ret;
@@ -2830,6 +2842,7 @@ S_regmatch(pTHX_ regnode *prog)
                        I32 onpar = PL_regnpar;
 
                        Zero(&pm, 1, PMOP);
+                        if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
                        re = CALLREGCOMP(aTHX_ t, t + len, &pm);
                        if (!(SvFLAGS(ret)
                              & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
@@ -2862,6 +2875,9 @@ S_regmatch(pTHX_ regnode *prog)
                    *PL_reglastcloseparen = 0;
                    PL_reg_call_cc = &state;
                    PL_reginput = locinput;
+                   toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
+                               ((re->reganch & ROPT_UTF8) != 0);
+                   if (toggleutf) PL_reg_flags ^= RF_utf8;
 
                    /* XXXX This is too dramatic a measure... */
                    PL_reg_maxiter = 0;
@@ -2876,6 +2892,7 @@ S_regmatch(pTHX_ regnode *prog)
                        PL_regcc = state.cc;
                        PL_reg_re = state.re;
                        cache_re(PL_reg_re);
+                       if (toggleutf) PL_reg_flags ^= RF_utf8;
 
                        /* XXXX This is too dramatic a measure... */
                        PL_reg_maxiter = 0;
@@ -2892,6 +2909,7 @@ S_regmatch(pTHX_ regnode *prog)
                    PL_regcc = state.cc;
                    PL_reg_re = state.re;
                    cache_re(PL_reg_re);
+                   if (toggleutf) PL_reg_flags ^= RF_utf8;
 
                    /* XXXX This is too dramatic a measure... */
                    PL_reg_maxiter = 0;
@@ -2916,13 +2934,13 @@ S_regmatch(pTHX_ regnode *prog)
            n = ARG(scan);  /* which paren pair */
            PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
            PL_regendp[n] = locinput - PL_bostr;
-           if (n > *PL_reglastparen)
+           if (n > (I32)*PL_reglastparen)
                *PL_reglastparen = n;
            *PL_reglastcloseparen = n;
            break;
        case GROUPP:
            n = ARG(scan);  /* which paren pair */
-           sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
+           sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
            break;
        case IFTHEN:
            PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
@@ -3024,7 +3042,7 @@ S_regmatch(pTHX_ regnode *prog)
                PL_regcc = &cc;
                /* XXXX Probably it is better to teach regpush to support
                   parenfloor > PL_regsize... */
-               if (parenfloor > *PL_reglastparen)
+               if (parenfloor > (I32)*PL_reglastparen)
                    parenfloor = *PL_reglastparen; /* Pessimization... */
                cc.parenfloor = parenfloor;
                cc.cur = -1;
@@ -3060,10 +3078,10 @@ S_regmatch(pTHX_ regnode *prog)
 
                DEBUG_r(
                    PerlIO_printf(Perl_debug_log,
-                                 "%*s  %ld out of %ld..%ld  cc=%lx\n",
+                                 "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
                                  REPORT_CODE_OFF+PL_regindent*2, "",
                                  (long)n, (long)cc->min,
-                                 (long)cc->max, (long)cc)
+                                 (long)cc->max, PTR2UV(cc))
                    );
 
                /* If degenerate scan matches "", assume scan done. */
@@ -3108,7 +3126,7 @@ S_regmatch(pTHX_ regnode *prog)
                if (PL_reg_leftiter-- == 0) {
                    I32 size = (PL_reg_maxiter + 7)/8;
                    if (PL_reg_poscache) {
-                       if (PL_reg_poscache_size < size) {
+                       if ((I32)PL_reg_poscache_size < size) {
                            Renew(PL_reg_poscache, size, char);
                            PL_reg_poscache_size = size;
                        }
@@ -3293,7 +3311,7 @@ S_regmatch(pTHX_ regnode *prog)
            if (paren) {
                if (paren > PL_regsize)
                    PL_regsize = paren;
-               if (paren > *PL_reglastparen)
+               if (paren > (I32)*PL_reglastparen)
                    *PL_reglastparen = paren;
            }
            scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
@@ -3327,7 +3345,7 @@ S_regmatch(pTHX_ regnode *prog)
                            ln = PL_regstartp[n];
                            /* assume yes if we haven't seen CLOSEn */
                            if (
-                               *PL_reglastparen < n ||
+                               (I32)*PL_reglastparen < n ||
                                ln == -1 ||
                                ln == PL_regendp[n]
                            ) {
@@ -3409,7 +3427,7 @@ S_regmatch(pTHX_ regnode *prog)
                                ln = PL_regstartp[n];
                                /* assume yes if we haven't seen CLOSEn */
                                if (
-                                   *PL_reglastparen < n ||
+                                   (I32)*PL_reglastparen < n ||
                                    ln == -1 ||
                                    ln == PL_regendp[n]
                                ) {
@@ -3469,7 +3487,7 @@ S_regmatch(pTHX_ regnode *prog)
            paren = scan->flags;        /* Which paren to set */
            if (paren > PL_regsize)
                PL_regsize = paren;
-           if (paren > *PL_reglastparen)
+           if (paren > (I32)*PL_reglastparen)
                *PL_reglastparen = paren;
            ln = ARG1(scan);  /* min to match */
            n  = ARG2(scan);  /* max to match */
@@ -3518,7 +3536,7 @@ S_regmatch(pTHX_ regnode *prog)
                        ln = PL_regstartp[n];
                        /* assume yes if we haven't seen CLOSEn */
                        if (
-                           *PL_reglastparen < n ||
+                           (I32)*PL_reglastparen < n ||
                            ln == -1 ||
                            ln == PL_regendp[n]
                        ) {
@@ -3545,11 +3563,17 @@ S_regmatch(pTHX_ regnode *prog)
                             to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
                             to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
 
-                            c1 = utf8_to_uvuni(tmpbuf1, 0);
-                            c2 = utf8_to_uvuni(tmpbuf2, 0);
+                            c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXLEN, 0,
+                                                ckWARN(WARN_UTF8) ?
+                                                0 : UTF8_ALLOW_ANY);
+                            c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXLEN, 0,
+                                                ckWARN(WARN_UTF8) ?
+                                                0 : UTF8_ALLOW_ANY);
                        }
                        else {
-                           c2 = c1 = utf8_to_uvchr(s, NULL);
+                           c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXLEN, 0,
+                                                    ckWARN(WARN_UTF8) ?
+                                                    0 : UTF8_ALLOW_ANY);
                        }
                    }
                }
@@ -3568,6 +3592,7 @@ S_regmatch(pTHX_ regnode *prog)
                if (c1 != -1000) {
                    char *e; /* Should not check after this */
                    char *old = locinput;
+                   int count = 0;
 
                    if  (n == REG_INFTY) {
                        e = PL_regeol - 1;
@@ -3587,7 +3612,6 @@ S_regmatch(pTHX_ regnode *prog)
                            e = PL_regeol - 1;
                    }
                    while (1) {
-                       int count;
                        /* Find place 'next' could work */
                        if (!do_utf8) {
                            if (c1 == c2) {
@@ -3605,18 +3629,28 @@ S_regmatch(pTHX_ regnode *prog)
                        else {
                            STRLEN len;
                            if (c1 == c2) {
-                               for (count = 0;
-                                    locinput <= e &&
-                                        utf8_to_uvchr((U8*)locinput, &len) != c1;
-                                    count++)
+                               /* count initialised to
+                                * utf8_distance(old, locinput) */
+                               while (locinput <= e &&
+                                      utf8n_to_uvchr((U8*)locinput,
+                                                     UTF8_MAXLEN, &len,
+                                                     ckWARN(WARN_UTF8) ?
+                                                     0 : UTF8_ALLOW_ANY) != (UV)c1) {
                                    locinput += len;
-                               
+                                   count++;
+                               }
                            } else {
-                               for (count = 0; locinput <= e; count++) {
-                                   UV c = utf8_to_uvchr((U8*)locinput, &len);
-                                   if (c == c1 || c == c2)
+                               /* count initialised to
+                                * utf8_distance(old, locinput) */
+                               while (locinput <= e) {
+                                   UV c = utf8n_to_uvchr((U8*)locinput,
+                                                         UTF8_MAXLEN, &len,
+                                                         ckWARN(WARN_UTF8) ?
+                                                         0 : UTF8_ALLOW_ANY);
+                                   if (c == (UV)c1 || c == (UV)c2)
                                        break;
-                                   locinput += len;                    
+                                   locinput += len;
+                                   count++;
                                }
                            }
                        }
@@ -3638,6 +3672,7 @@ S_regmatch(pTHX_ regnode *prog)
                            locinput += UTF8SKIP(locinput);
                        else
                            locinput++;
+                       count = 1;
                    }
                }
                else
@@ -3645,20 +3680,23 @@ S_regmatch(pTHX_ regnode *prog)
                    UV c;
                    if (c1 != -1000) {
                        if (do_utf8)
-                           c = utf8_to_uvchr((U8*)PL_reginput, NULL);
+                           c = utf8n_to_uvchr((U8*)PL_reginput,
+                                              UTF8_MAXLEN, 0,
+                                              ckWARN(WARN_UTF8) ?
+                                              0 : UTF8_ALLOW_ANY);
                        else
                            c = UCHARAT(PL_reginput);
                        /* If it could work, try it. */
-                       if (c == c1 || c == c2)
+                       if (c == (UV)c1 || c == (UV)c2)
                        {
-                           TRYPAREN(paren, n, PL_reginput);
+                           TRYPAREN(paren, ln, PL_reginput);
                            REGCP_UNWIND(lastcp);
                        }
                    }
                    /* If it could work, try it. */
                    else if (c1 == -1000)
                    {
-                       TRYPAREN(paren, n, PL_reginput);
+                       TRYPAREN(paren, ln, PL_reginput);
                        REGCP_UNWIND(lastcp);
                    }
                    /* Couldn't or didn't -- move forward. */
@@ -3692,12 +3730,15 @@ S_regmatch(pTHX_ regnode *prog)
                    while (n >= ln) {
                        if (c1 != -1000) {
                            if (do_utf8)
-                               c = utf8_to_uvchr((U8*)PL_reginput, NULL);
+                               c = utf8n_to_uvchr((U8*)PL_reginput,
+                                                  UTF8_MAXLEN, 0,
+                                                  ckWARN(WARN_UTF8) ?
+                                                  0 : UTF8_ALLOW_ANY);
                            else
                                c = UCHARAT(PL_reginput);
                        }
                        /* If it could work, try it. */
-                       if (c1 == -1000 || c == c1 || c == c2)
+                       if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
                            {
                                TRYPAREN(paren, n, PL_reginput);
                                REGCP_UNWIND(lastcp);
@@ -3712,12 +3753,15 @@ S_regmatch(pTHX_ regnode *prog)
                    while (n >= ln) {
                        if (c1 != -1000) {
                            if (do_utf8)
-                               c = utf8_to_uvchr((U8*)PL_reginput, NULL);
+                               c = utf8n_to_uvchr((U8*)PL_reginput,
+                                                  UTF8_MAXLEN, 0,
+                                                  ckWARN(WARN_UTF8) ?
+                                                  0 : UTF8_ALLOW_ANY);
                            else
                                c = UCHARAT(PL_reginput);
                        }
                        /* If it could work, try it. */
-                       if (c1 == -1000 || c == c1 || c == c2)
+                       if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
                            {
                                TRYPAREN(paren, n, PL_reginput);
                                REGCP_UNWIND(lastcp);
@@ -3953,7 +3997,9 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
     register bool do_utf8 = PL_reg_match_utf8;
 
     scan = PL_reginput;
-    if (max != REG_INFTY && max < loceol - scan)
+    if (max == REG_INFTY)
+       max = I32_MAX;
+    else if (max < loceol - scan)
       loceol = scan + max;
     switch (OP(p)) {
     case REG_ANY:
@@ -4247,15 +4293,16 @@ Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV
        if (PL_regdata->what[n] == 's') {
            SV *rv = (SV*)PL_regdata->data[n];
            AV *av = (AV*)SvRV((SV*)rv);
+           SV **ary = AvARRAY(av);
            SV **a, **b;
        
            /* See the end of regcomp.c:S_reglass() for
             * documentation of these array elements. */
 
-           si  = *av_fetch(av, 0, FALSE);
-           a   =  av_fetch(av, 1, FALSE);
-           b   =  av_fetch(av, 2, FALSE);
-       
+           si = *ary;
+           a  = SvTYPE(ary[1]) == SVt_RV   ? &ary[1] : 0;
+           b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
+
            if (a)
                sw = *a;
            else if (si && doinit) {
@@ -4290,11 +4337,13 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register b
 {
     char flags = ANYOF_FLAGS(n);
     bool match = FALSE;
-    UV c;
+    UV c = *p;
     STRLEN len = 0;
     STRLEN plen;
 
-    c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
+    if (do_utf8 && !UTF8_IS_INVARIANT(c))
+        c = utf8n_to_uvchr(p, UTF8_MAXLEN, &len,
+                           ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
 
     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
     if (do_utf8 || (flags & ANYOF_UNICODE)) {
@@ -4347,7 +4396,7 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register b
        if (ANYOF_BITMAP_TEST(n, c))
            match = TRUE;
        else if (flags & ANYOF_FOLD) {
-         I32 f;
+           U8 f;
 
            if (flags & ANYOF_LOCALE) {
                PL_reg_flags |= RF_tainted;
@@ -4489,7 +4538,7 @@ S_to_utf8_substr(pTHX_ register regexp *prog)
     SV* sv;
     if (prog->float_substr && !prog->float_utf8) {
        prog->float_utf8 = sv = NEWSV(117, 0);
-       SvSetMagicSV(sv, prog->float_substr);
+       SvSetSV(sv, prog->float_substr);
        sv_utf8_upgrade(sv);
        if (SvTAIL(prog->float_substr))
            SvTAIL_on(sv);
@@ -4498,7 +4547,7 @@ S_to_utf8_substr(pTHX_ register regexp *prog)
     }
     if (prog->anchored_substr && !prog->anchored_utf8) {
        prog->anchored_utf8 = sv = NEWSV(118, 0);
-       SvSetMagicSV(sv, prog->anchored_substr);
+       SvSetSV(sv, prog->anchored_substr);
        sv_utf8_upgrade(sv);
        if (SvTAIL(prog->anchored_substr))
            SvTAIL_on(sv);
@@ -4513,7 +4562,7 @@ S_to_byte_substr(pTHX_ register regexp *prog)
     SV* sv;
     if (prog->float_utf8 && !prog->float_substr) {
        prog->float_substr = sv = NEWSV(117, 0);
-       SvSetMagicSV(sv, prog->float_utf8);
+       SvSetSV(sv, prog->float_utf8);
        if (sv_utf8_downgrade(sv, TRUE)) {
            if (SvTAIL(prog->float_utf8))
                SvTAIL_on(sv);
@@ -4526,7 +4575,7 @@ S_to_byte_substr(pTHX_ register regexp *prog)
     }
     if (prog->anchored_utf8 && !prog->anchored_substr) {
        prog->anchored_substr = sv = NEWSV(118, 0);
-       SvSetMagicSV(sv, prog->anchored_utf8);
+       SvSetSV(sv, prog->anchored_utf8);
        if (sv_utf8_downgrade(sv, TRUE)) {
            if (SvTAIL(prog->anchored_utf8))
                SvTAIL_on(sv);