Simplify tests for fork() capabilities
[p5sagit/p5-mst-13.2.git] / regcomp.c
index 66a8f86..3e939df 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -282,27 +282,6 @@ static const scan_data_t zero_scan_data =
 } STMT_END
 
 /*
- * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
- * args. Show regex, up to a maximum length. If it's too long, chop and add
- * "...".
- */
-#define        FAIL2(pat,msg) STMT_START {                                     \
-    const char *ellipses = "";                                         \
-    IV len = RExC_end - RExC_precomp;                                  \
-                                                                       \
-    if (!SIZE_ONLY)                                                    \
-       SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);                      \
-    if (len > RegexLengthToShowInErrorMessages) {                      \
-       /* chop 10 shorter than the max, to ensure meaning of "..." */  \
-       len = RegexLengthToShowInErrorMessages - 10;                    \
-       ellipses = "...";                                               \
-    }                                                                  \
-    S_re_croak2(aTHX_ pat, " in regex m/%.*s%s/",                      \
-           msg, (int)len, RExC_precomp, ellipses);                     \
-} STMT_END
-
-
-/*
  * Simple_vFAIL -- like FAIL, but marks the current location in the scan
  */
 #define        Simple_vFAIL(m) STMT_START {                                    \
@@ -499,7 +478,7 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
        SV * const sv = data->last_found;
        MAGIC * const mg =
            SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
-       if (mg && mg->mg_len > 0)
+       if (mg)
            mg->mg_len = 0;
     }
     data->last_end = -1;
@@ -508,7 +487,7 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
 
 /* Can match anything (initialization) */
 STATIC void
-S_cl_anything(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_anything(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 {
     ANYOF_CLASS_ZERO(cl);
     ANYOF_BITMAP_SETALL(cl);
@@ -519,7 +498,7 @@ S_cl_anything(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *c
 
 /* Can match anything (initialization) */
 STATIC int
-S_cl_is_anything(pTHX_ const struct regnode_charclass_class *cl)
+S_cl_is_anything(const struct regnode_charclass_class *cl)
 {
     int value;
 
@@ -535,7 +514,7 @@ S_cl_is_anything(pTHX_ const struct regnode_charclass_class *cl)
 
 /* Can match anything (initialization) */
 STATIC void
-S_cl_init(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 {
     Zero(cl, 1, struct regnode_charclass_class);
     cl->type = ANYOF;
@@ -543,7 +522,7 @@ S_cl_init(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 }
 
 STATIC void
-S_cl_init_zero(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init_zero(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 {
     Zero(cl, 1, struct regnode_charclass_class);
     cl->type = ANYOF;
@@ -555,7 +534,7 @@ S_cl_init_zero(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *
 /* 'And' a given class with another one.  Can create false positives */
 /* We assume that cl is not inverted */
 STATIC void
-S_cl_and(pTHX_ struct regnode_charclass_class *cl,
+S_cl_and(struct regnode_charclass_class *cl,
        const struct regnode_charclass_class *and_with)
 {
     if (!(and_with->flags & ANYOF_CLASS)
@@ -592,7 +571,7 @@ S_cl_and(pTHX_ struct regnode_charclass_class *cl,
 /* 'OR' a given class with another one.  Can create false positives */
 /* We assume that cl is not inverted */
 STATIC void
-S_cl_or(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
+S_cl_or(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
 {
     if (or_with->flags & ANYOF_INVERT) {
        /* We do not use
@@ -765,7 +744,7 @@ and would end up looking like:
     DEBUG_TRIE_COMPILE_r({                                                 \
        SV *tmp;                                                           \
        if ( UTF ) {                                                       \
-           tmp = newSVpvn( "", 0 );                                       \
+           tmp = newSVpvs( "" );                                          \
            pv_uni_display( tmp, uc, len, 60, UNI_DISPLAY_REGEX );         \
        } else {                                                           \
            tmp = Perl_newSVpvf_nocontext( "%c", (int)uvc );               \
@@ -827,7 +806,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     /* first pass, loop through and scan words */
     reg_trie_data *trie;
     regnode *cur;
-    const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
+    const U32 uniflags = UTF8_ALLOW_DEFAULT;
     STRLEN len = 0;
     UV uvc = 0;
     U16 curword = 0;
@@ -1036,7 +1015,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                });
 
            } else {
-               /* Its a dupe. So ignore it. */
+               /*EMPTY*/;   /* It's a dupe. So ignore it. */
            }
 
         } /* end second pass */
@@ -1242,7 +1221,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                 });
 
             } else {
-                /* Its a dupe. So ignore it. */
+               /*EMPTY*/;  /* Its a dupe. So ignore it. */
             }
 
         } /* end second pass */
@@ -1540,6 +1519,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                        /* deltap: Write maxlen-minlen here. */
                        /* last: Stop before this one. */
 {
+    dVAR;
     I32 min = 0, pars = 0, code;
     regnode *scan = *scanp, *next;
     I32 delta = 0;
@@ -1637,7 +1617,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                 char * const s0 = STRING(scan), *s, *t;
                 char * const s1 = s0 + STR_LEN(scan) - 1;
                 char * const s2 = s1 - 4;
-                const char * const t0 = "\xcc\x88\xcc\x81";
+                const char t0[] = "\xcc\x88\xcc\x81";
                 const char * const t1 = t0 + 3;
 
                 for (s = s0 + 2;
@@ -1698,7 +1678,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
            if (OP(next) == code || code == IFTHEN || code == SUSPEND) {
                I32 max1 = 0, min1 = I32_MAX, num = 0;
                struct regnode_charclass_class accum;
-               regnode *startbranch=scan;
+               regnode * const startbranch=scan;
                
                if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
                    scan_commit(pRExC_state, data); /* Cannot merge strings after this. */
@@ -1998,6 +1978,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                        ? I32_MAX : data->pos_min + data->pos_delta;
                }
                sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
+               if (UTF)
+                   SvUTF8_on(data->last_found);
                {
                    SV * const sv = data->last_found;
                    MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
@@ -2006,8 +1988,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                        mg->mg_len += utf8_length((U8*)STRING(scan),
                                                  (U8*)STRING(scan)+STR_LEN(scan));
                }
-               if (UTF)
-                   SvUTF8_on(data->last_found);
                data->last_end = data->pos_min + l;
                data->pos_min += l; /* As in the first entry. */
                data->flags &= ~SF_BEFORE_EOL;
@@ -2050,7 +2030,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
            if (flags & SCF_DO_SUBSTR)
                scan_commit(pRExC_state, data);
            if (UTF) {
-               U8 *s = (U8 *)STRING(scan);
+               const U8 * const s = (U8 *)STRING(scan);
                l = utf8_length(s, s + l);
                uc = utf8_to_uvchr(s, NULL);
            }
@@ -2091,7 +2071,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
        else if (strchr((const char*)PL_varies,OP(scan))) {
            I32 mincount, maxcount, minnext, deltanext, fl = 0;
            I32 f = flags, pos_before = 0;
-           regnode *oscan = scan;
+           regnode * const oscan = scan;
            struct regnode_charclass_class this_class;
            struct regnode_charclass_class *oclass = NULL;
            I32 next_is_eval = 0;
@@ -2225,7 +2205,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                      && !deltanext && minnext == 1 ) {
                    /* Try to optimize to CURLYN.  */
                    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
-                   regnode *nxt1 = nxt;
+                   regnode * const nxt1 = nxt;
 #ifdef DEBUGGING
                    regnode *nxt2;
 #endif
@@ -2354,7 +2334,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                        I32 b = pos_before >= data->last_start_min
                            ? pos_before : data->last_start_min;
                        STRLEN l;
-                       const char *s = SvPV_const(data->last_found, l);
+                       const char * const s = SvPV_const(data->last_found, l);
                        I32 old = b - data->last_start_min;
 #endif
 
@@ -2403,7 +2383,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                            the group.  */
                        scan_commit(pRExC_state,data);
                        if (mincount && last_str) {
-                           sv_setsv(data->last_found, last_str);
+                           SV * const sv = data->last_found;
+                           MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
+                               mg_find(sv, PERL_MAGIC_utf8) : NULL;
+
+                           if (mg)
+                               mg->mg_len = -1;
+                           sv_setsv(sv, last_str);
                            data->last_end = data->pos_min;
                            data->last_start_min =
                                data->pos_min - CHR_SVLEN(last_str);
@@ -2745,7 +2731,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
 }
 
 STATIC I32
-S_add_data(pTHX_ RExC_state_t *pRExC_state, I32 n, const char *s)
+S_add_data(RExC_state_t *pRExC_state, I32 n, const char *s)
 {
     if (RExC_rx->data) {
        Renewc(RExC_rx->data,
@@ -2767,6 +2753,7 @@ S_add_data(pTHX_ RExC_state_t *pRExC_state, I32 n, const char *s)
 void
 Perl_reginitcolors(pTHX)
 {
+    dVAR;
     const char * const s = PerlEnv_getenv("PERL_RE_COLORS");
     if (s) {
        char *t = savepv(s);
@@ -2808,6 +2795,7 @@ Perl_reginitcolors(pTHX)
 regexp *
 Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
 {
+    dVAR;
     register regexp *r;
     regnode *scan;
     regnode *first;
@@ -2888,6 +2876,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
 #endif
     r->reganch = pm->op_pmflags & PMf_COMPILETIME;
     r->nparens = RExC_npar - 1;        /* set early to validate backrefs */
+    r->lastparen = 0;                  /* mg.c reads this.  */
 
     r->substrs = 0;                    /* Useful during FAIL. */
     r->startp = 0;                     /* Useful during FAIL. */
@@ -2963,7 +2952,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
       again:
        if (PL_regkind[(U8)OP(first)] == EXACT) {
            if (OP(first) == EXACT)
-               ;       /* Empty, get anchored substr later. */
+               /*EMPTY*/;      /* Empty, get anchored substr later. */
            else if ((OP(first) == EXACTF || OP(first) == EXACTFL))
                r->regstclass = first;
        }
@@ -3020,9 +3009,9 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        */
        minlen = 0;
 
-       data.longest_fixed = newSVpvn("",0);
-       data.longest_float = newSVpvn("",0);
-       data.last_found = newSVpvn("",0);
+       data.longest_fixed = newSVpvs("");
+       data.longest_float = newSVpvs("");
+       data.last_found = newSVpvs("");
        data.longest = &(data.longest_fixed);
        first = scan;
        if (!r->regstclass) {
@@ -3118,7 +3107,6 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
                       struct regnode_charclass_class);
            r->regstclass = (regnode*)RExC_rx->data->data[n];
            r->reganch &= ~ROPT_SKIP;   /* Used in find_byclass(). */
-           PL_regdata = r->data; /* for regprop() */
            DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
                      regprop(sv, (regnode*)data.start_class);
                      PerlIO_printf(Perl_debug_log,
@@ -3193,7 +3181,6 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        r->reganch |= ROPT_CANY_SEEN;
     Newxz(r->startp, RExC_npar, I32);
     Newxz(r->endp, RExC_npar, I32);
-    PL_regdata = r->data; /* for regprop() */
     DEBUG_COMPILE_r(regdump(r));
     return(r);
 }
@@ -3219,21 +3206,20 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
     register I32 parno = 0;
     I32 flags;
     const I32 oregflags = RExC_flags;
-    I32 have_branch = 0;
-    I32 open = 0;
+    bool have_branch = 0;
+    bool is_open = 0;
 
     /* for (?g), (?gc), and (?o) warnings; warning
        about (?c) will warn about (?g) -- japhy    */
 
+#define WASTED_O  0x01
+#define WASTED_G  0x02
+#define WASTED_C  0x04
+#define WASTED_GC (0x02|0x04)
     I32 wastedflags = 0x00;
-    const I32 wasted_o  = 0x01;
-    const I32 wasted_g  = 0x02;
-    const I32 wasted_gc = 0x02 | 0x04;
-    const I32 wasted_c  = 0x04;
 
     char * parse_start = RExC_parse; /* MJD */
     char * const oregcomp_parse = RExC_parse;
-    char c;
 
     *flagp = 0;                                /* Tentatively. */
 
@@ -3243,7 +3229,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
        if (*RExC_parse == '?') { /* (?...) */
            U32 posflags = 0, negflags = 0;
            U32 *flagsp = &posflags;
-           int logical = 0;
+           bool is_logical = 0;
            const char * const seqstart = RExC_parse;
 
            RExC_parse++;
@@ -3280,7 +3266,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                    vWARNdep(RExC_parse, "(?p{}) is deprecated - use (??{})");
                /* FALL THROUGH*/
            case '?':           /* (??...) */
-               logical = 1;
+               is_logical = 1;
                if (*RExC_parse != '{')
                    goto unknown;
                paren = *RExC_parse++;
@@ -3290,32 +3276,28 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                I32 count = 1, n = 0;
                char c;
                char *s = RExC_parse;
-               SV *sv;
-               OP_4tree *sop, *rop;
 
                RExC_seen_zerolen++;
                RExC_seen |= REG_SEEN_EVAL;
                while (count && (c = *RExC_parse)) {
-                   if (c == '\\' && RExC_parse[1])
-                       RExC_parse++;
+                   if (c == '\\') {
+                       if (RExC_parse[1])
+                           RExC_parse++;
+                   }
                    else if (c == '{')
                        count++;
                    else if (c == '}')
                        count--;
                    RExC_parse++;
                }
-               if (*RExC_parse != ')')
-               {
+               if (*RExC_parse != ')') {
                    RExC_parse = s;             
                    vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
                }
                if (!SIZE_ONLY) {
                    PAD *pad;
-               
-                   if (RExC_parse - 1 - s)
-                       sv = newSVpvn(s, RExC_parse - 1 - s);
-                   else
-                       sv = newSVpvn("", 0);
+                   OP_4tree *sop, *rop;
+                   SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
 
                    ENTER;
                    Perl_save_re_context(aTHX);
@@ -3344,7 +3326,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                }
 
                nextchar(pRExC_state);
-               if (logical) {
+               if (is_logical) {
                    ret = reg_node(pRExC_state, LOGICAL);
                    if (!SIZE_ONLY)
                        ret->flags = 2;
@@ -3374,6 +3356,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                }
                else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
                     /* (?(1)...) */
+                   char c;
                    parno = atoi(RExC_parse++);
 
                    while (isDIGIT(*RExC_parse))
@@ -3431,7 +3414,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
 
                    if (*RExC_parse == 'o' || *RExC_parse == 'g') {
                        if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
-                           I32 wflagbit = *RExC_parse == 'o' ? wasted_o : wasted_g;
+                           const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G;
                            if (! (wastedflags & wflagbit) ) {
                                wastedflags |= wflagbit;
                                vWARN5(
@@ -3447,8 +3430,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                    }
                    else if (*RExC_parse == 'c') {
                        if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
-                           if (! (wastedflags & wasted_c) ) {
-                               wastedflags |= wasted_gc;
+                           if (! (wastedflags & WASTED_C) ) {
+                               wastedflags |= WASTED_GC;
                                vWARN3(
                                    RExC_parse + 1,
                                    "Useless (%sc) - %suse /gc modifier",
@@ -3491,7 +3474,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
            ret = reganode(pRExC_state, OPEN, parno);
             Set_Node_Length(ret, 1); /* MJD */
             Set_Node_Offset(ret, RExC_parse); /* MJD */
-           open = 1;
+           is_open = 1;
        }
     }
     else                        /* ! paren */
@@ -3520,7 +3503,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
     else if (paren == ':') {
        *flagp |= flags&SIMPLE;
     }
-    if (open) {                                /* Starts with OPEN. */
+    if (is_open) {                             /* Starts with OPEN. */
        regtail(pRExC_state, ret, br);          /* OPEN -> first. */
     }
     else if (paren != '?')             /* Not Conditional */
@@ -3627,6 +3610,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
 STATIC regnode *
 S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first)
 {
+    dVAR;
     register regnode *ret;
     register regnode *chain = NULL;
     register regnode *latest;
@@ -3679,7 +3663,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first)
        *flagp |= flags&SIMPLE;
     }
 
-    return(ret);
+    return ret;
 }
 
 /*
@@ -3694,6 +3678,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first)
 STATIC regnode *
 S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
 {
+    dVAR;
     register regnode *ret;
     register char op;
     register char *next;
@@ -3842,7 +3827,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
     if (!SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3 && ckWARN(WARN_REGEXP)) {
        vWARN3(RExC_parse,
               "%.*s matches null string many times",
-              RExC_parse - origparse,
+              (int)(RExC_parse >= origparse ? RExC_parse - origparse : 0),
               origparse);
     }
 
@@ -3871,6 +3856,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
 STATIC regnode *
 S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
 {
+    dVAR;
     register regnode *ret = NULL;
     I32 flags;
     char *parse_start = RExC_parse;
@@ -4286,7 +4272,7 @@ tryagain:
                    if (UTF8_IS_START(*p) && UTF) {
                        STRLEN numlen;
                        ender = utf8n_to_uvchr((U8*)p, RExC_end - p,
-                                              &numlen, 0);
+                                              &numlen, UTF8_ALLOW_DEFAULT);
                        p += numlen;
                    }
                    else
@@ -4431,7 +4417,7 @@ tryagain:
 }
 
 STATIC char *
-S_regwhite(pTHX_ char *p, const char *e)
+S_regwhite(char *p, const char *e)
 {
     while (p < e) {
        if (isSPACE(*p))
@@ -4460,6 +4446,7 @@ S_regwhite(pTHX_ char *p, const char *e)
 STATIC I32
 S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
 {
+    dVAR;
     I32 namedclass = OOB_NAMEDCLASS;
 
     if (value == '[' && RExC_parse + 1 < RExC_end &&
@@ -4619,6 +4606,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
 STATIC void
 S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
 {
+    dVAR;
     if (!SIZE_ONLY && POSIXCC(UCHARAT(RExC_parse))) {
        const char *s = RExC_parse;
        const char  c = *s++;
@@ -4646,6 +4634,7 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
 STATIC regnode *
 S_regclass(pTHX_ RExC_state_t *pRExC_state)
 {
+    dVAR;
     register UV value;
     register UV nextvalue;
     register IV prevvalue = OOB_UNICODE;
@@ -4685,7 +4674,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        if (LOC)
            ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
        ANYOF_BITMAP_ZERO(ret);
-       listsv = newSVpvn("# comment\n", 10);
+       listsv = newSVpvs("# comment\n");
     }
 
     nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
@@ -4708,7 +4697,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        if (UTF) {
            value = utf8n_to_uvchr((U8*)RExC_parse,
                                   RExC_end - RExC_parse,
-                                  &numlen, 0);
+                                  &numlen, UTF8_ALLOW_DEFAULT);
            RExC_parse += numlen;
        }
        else
@@ -4720,7 +4709,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            if (UTF) {
                value = utf8n_to_uvchr((U8*)RExC_parse,
                                   RExC_end - RExC_parse,
-                                  &numlen, 0);
+                                  &numlen, UTF8_ALLOW_DEFAULT);
                RExC_parse += numlen;
            }
            else
@@ -4841,12 +4830,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            /* a bad range like a-\d, a-[:digit:] ? */
            if (range) {
                if (!SIZE_ONLY) {
-                   if (ckWARN(WARN_REGEXP))
+                   if (ckWARN(WARN_REGEXP)) {
+                       int w =
+                           RExC_parse >= rangebegin ?
+                           RExC_parse - rangebegin : 0;
                        vWARN4(RExC_parse,
                               "False [] range \"%*.*s\"",
-                              RExC_parse - rangebegin,
-                              RExC_parse - rangebegin,
+                              w,
+                              w,
                               rangebegin);
+                   }
                    if (prevvalue < 256) {
                        ANYOF_BITMAP_SET(ret, prevvalue);
                        ANYOF_BITMAP_SET(ret, '-');
@@ -5235,10 +5228,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 
        if (range) {
            if (prevvalue > (IV)value) /* b-a */ {
-               Simple_vFAIL4("Invalid [] range \"%*.*s\"",
-                             RExC_parse - rangebegin,
-                             RExC_parse - rangebegin,
-                             rangebegin);
+               const int w = RExC_parse - rangebegin;
+               Simple_vFAIL4("Invalid [] range \"%*.*s\"", w, w, rangebegin);
                range = 0; /* not a valid range */
            }
        }
@@ -5250,12 +5241,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 
                /* a bad range like \w-, [:word:]- ? */
                if (namedclass > OOB_NAMEDCLASS) {
-                   if (ckWARN(WARN_REGEXP))
+                   if (ckWARN(WARN_REGEXP)) {
+                       const int w =
+                           RExC_parse >= rangebegin ?
+                           RExC_parse - rangebegin : 0;
                        vWARN4(RExC_parse,
                               "False [] range \"%*.*s\"",
-                              RExC_parse - rangebegin,
-                              RExC_parse - rangebegin,
+                              w,
+                              w,
                               rangebegin);
+                   }
                    if (!SIZE_ONLY)
                        ANYOF_BITMAP_SET(ret, '-');
                } else
@@ -5456,6 +5451,7 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
 STATIC regnode *                       /* Location. */
 S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
 {
+    dVAR;
     register regnode *ptr;
     regnode * const ret = RExC_emit;
 
@@ -5491,6 +5487,7 @@ S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
 STATIC regnode *                       /* Location. */
 S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
 {
+    dVAR;
     register regnode *ptr;
     regnode * const ret = RExC_emit;
 
@@ -5527,6 +5524,7 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
 STATIC void
 S_reguni(pTHX_ const RExC_state_t *pRExC_state, UV uv, char* s, STRLEN* lenp)
 {
+    dVAR;
     *lenp = SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s);
 }
 
@@ -5538,6 +5536,7 @@ S_reguni(pTHX_ const RExC_state_t *pRExC_state, UV uv, char* s, STRLEN* lenp)
 STATIC void
 S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd)
 {
+    dVAR;
     register regnode *src;
     register regnode *dst;
     register regnode *place;
@@ -5596,6 +5595,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd)
 STATIC void
 S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
 {
+    dVAR;
     register regnode *scan;
 
     if (SIZE_ONLY)
@@ -5624,6 +5624,7 @@ S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
 STATIC void
 S_regoptail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
 {
+    dVAR;
     /* "Operandless" and "op != BRANCH" are synonymous in practice. */
     if (p == NULL || SIZE_ONLY)
        return;
@@ -5641,7 +5642,7 @@ S_regoptail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
  - regcurly - a little FSA that accepts {\d+,?\d*}
  */
 STATIC I32
-S_regcurly(pTHX_ register const char *s)
+S_regcurly(register const char *s)
 {
     if (*s++ != '{')
        return FALSE;
@@ -5666,9 +5667,10 @@ void
 Perl_regdump(pTHX_ regexp *r)
 {
 #ifdef DEBUGGING
+    dVAR;
     SV * const sv = sv_newmortal();
 
-    (void)dumpuntil(r->program, r->program + 1, NULL, sv, 0);
+    (void)dumpuntil(r, r->program, r->program + 1, NULL, sv, 0);
 
     /* Header fields of interest. */
     if (r->anchored_substr)
@@ -5758,6 +5760,7 @@ Perl_regdump(pTHX_ regexp *r)
         });
     }
 #else
+    PERL_UNUSED_CONTEXT;
     PERL_UNUSED_ARG(r);
 #endif /* DEBUGGING */
 }
@@ -5769,6 +5772,7 @@ void
 Perl_regprop(pTHX_ SV *sv, const regnode *o)
 {
 #ifdef DEBUGGING
+    dVAR;
     register int k;
 
     sv_setpvn(sv, "", 0);
@@ -5781,7 +5785,7 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
     k = PL_regkind[(U8)OP(o)];
 
     if (k == EXACT) {
-       SV * const dsv = sv_2mortal(newSVpvn("", 0));
+       SV * const dsv = sv_2mortal(newSVpvs(""));
        /* Using is_utf8_string() is a crude hack but it may
         * be the best for now since we have no flag "this EXACTish
         * node was UTF-8" --jhi */
@@ -5797,17 +5801,10 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
                       PL_colors[0],
                       len, s,
                       PL_colors[1]);
-    } else if (k == TRIE) {/*
-       this isn't always safe, as Pl_regdata may not be for this regex yet
-       (depending on where its called from) so its being moved to dumpuntil
-       I32 n = ARG(o);
-       reg_trie_data *trie=(reg_trie_data*)PL_regdata->data[n];
-       Perl_sv_catpvf(aTHX_ sv, " (W:%d L:%d C:%d S:%d)",
-                      trie->wordcount,
-                      trie->charcount,
-                      trie->uniquecharcount,
-                      trie->laststate);
-       */
+    } else if (k == TRIE) {
+       /*EMPTY*/;
+       /* print the details od the trie in dumpuntil instead, as
+        * prog->data isn't available here */
     } else if (k == CURLY) {
        if (OP(o) == CURLYM || OP(o) == CURLYN || OP(o) == CURLYX)
            Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
@@ -5858,12 +5855,12 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
        };
 
        if (flags & ANYOF_LOCALE)
-           sv_catpv(sv, "{loc}");
+           sv_catpvs(sv, "{loc}");
        if (flags & ANYOF_FOLD)
-           sv_catpv(sv, "{i}");
+           sv_catpvs(sv, "{i}");
        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
        if (flags & ANYOF_INVERT)
-           sv_catpv(sv, "^");
+           sv_catpvs(sv, "^");
        for (i = 0; i <= 256; i++) {
            if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
                if (rangestart == -1)
@@ -5874,7 +5871,7 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
                        put_byte(sv, rangestart);
                else {
                    put_byte(sv, rangestart);
-                   sv_catpv(sv, "-");
+                   sv_catpvs(sv, "-");
                    put_byte(sv, i - 1);
                }
                rangestart = -1;
@@ -5887,9 +5884,9 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
                    sv_catpv(sv, anyofs[i]);
 
        if (flags & ANYOF_UNICODE)
-           sv_catpv(sv, "{unicode}");
+           sv_catpvs(sv, "{unicode}");
        else if (flags & ANYOF_UNICODE_ALL)
-           sv_catpv(sv, "{unicode_all}");
+           sv_catpvs(sv, "{unicode_all}");
 
        {
            SV *lv;
@@ -5918,7 +5915,7 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
                                U8 *p;
                                for (p = s; p < e; p++)
                                    put_byte(sv, *p);
-                               sv_catpvn(sv, "-", 1);
+                               sv_catpvs(sv, "-");
                                e = uvchr_to_utf8(s, i-1);
                                for (p = s; p < e; p++)
                                    put_byte(sv, *p);
@@ -5927,7 +5924,7 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
                            }
                        }
                        
-                   sv_catpv(sv, "..."); /* et cetera */
+                   sv_catpvs(sv, "..."); /* et cetera */
                }
 
                {
@@ -5960,6 +5957,7 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
     else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
        Perl_sv_catpvf(aTHX_ sv, "[-%d]", o->flags);
 #else
+    PERL_UNUSED_CONTEXT;
     PERL_UNUSED_ARG(sv);
     PERL_UNUSED_ARG(o);
 #endif /* DEBUGGING */
@@ -5968,7 +5966,10 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
 SV *
 Perl_re_intuit_string(pTHX_ regexp *prog)
 {                              /* Assume that RE_INTUIT is set */
+    dVAR;
     GET_RE_DEBUG_FLAGS_DECL;
+    PERL_UNUSED_CONTEXT;
+
     DEBUG_COMPILE_r(
        {
            const char * const s = SvPV_nolen_const(prog->check_substr
@@ -6113,6 +6114,7 @@ Perl_pregfree(pTHX_ struct regexp *r)
 regnode *
 Perl_regnext(pTHX_ register regnode *p)
 {
+    dVAR;
     register I32 offset;
 
     if (p == &PL_regdummy)
@@ -6164,6 +6166,7 @@ S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
 void
 Perl_save_re_context(pTHX)
 {
+    dVAR;
     SAVEI32(PL_reg_flags);             /* from regexec.c */
     SAVEPPTR(PL_bostr);
     SAVEPPTR(PL_reginput);             /* String-input pointer. */
@@ -6178,12 +6181,9 @@ Perl_save_re_context(pTHX)
     PL_reg_start_tmp = 0;
     SAVEI32(PL_reg_start_tmpl);                /* from regexec.c */
     PL_reg_start_tmpl = 0;
-    SAVEVPTR(PL_regdata);
     SAVEI32(PL_reg_eval_set);          /* from regexec.c */
     SAVEI32(PL_regnarrate);            /* from regexec.c */
-    SAVEVPTR(PL_regprogram);           /* from regexec.c */
     SAVEINT(PL_regindent);             /* from regexec.c */
-    SAVEVPTR(PL_regcc);                        /* from regexec.c */
     SAVEVPTR(PL_curcop);
     SAVEVPTR(PL_reg_call_cc);          /* from regexec.c */
     SAVEVPTR(PL_reg_re);               /* from regexec.c */
@@ -6210,8 +6210,6 @@ Perl_save_re_context(pTHX)
     PL_reg_poscache = NULL;
     SAVEI32(PL_reg_poscache_size);     /* size of pos cache of WHILEM */
     PL_reg_poscache_size = 0;
-    SAVEPPTR(PL_regprecomp);           /* uncompiled string. */
-    SAVEI32(PL_regnpar);               /* () count. */
     SAVEI32(PL_regsize);               /* from regexec.c */
 
     /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
@@ -6222,9 +6220,14 @@ Perl_save_re_context(pTHX)
            for (i = 1; i <= rx->nparens; i++) {
                char digits[TYPE_CHARS(long)];
                const STRLEN len = my_sprintf(digits, "%lu", (long)i);
-               GV * const mgv = gv_fetchpvn_flags(digits, len, 0, SVt_PV);
-               if (mgv)
-                   save_scalar(mgv);
+               GV *const *const gvp
+                   = (GV**)hv_fetch(PL_defstash, digits, len, 0);
+
+               if (gvp) {
+                   GV * const gv = *gvp;
+                   if (SvTYPE(gv) == SVt_PVGV && GvSV(gv))
+                       save_scalar(gv);
+               }
            }
        }
     }
@@ -6237,6 +6240,7 @@ Perl_save_re_context(pTHX)
 static void
 clear_re(pTHX_ void *r)
 {
+    dVAR;
     ReREFCNT_dec((regexp *)r);
 }
 
@@ -6255,8 +6259,10 @@ S_put_byte(pTHX_ SV *sv, int c)
 
 
 STATIC regnode *
-S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
+S_dumpuntil(pTHX_ regexp *r, regnode *start, regnode *node, regnode *last,
+    SV* sv, I32 l)
 {
+    dVAR;
     register U8 op = EXACT;    /* Arbitrary non-END op. */
     register regnode *next;
 
@@ -6286,14 +6292,14 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
                                       : next);
            if (last && nnode > last)
                nnode = last;
-           node = dumpuntil(start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
+           node = dumpuntil(r, start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
        }
        else if (PL_regkind[(U8)op] == BRANCH) {
-           node = dumpuntil(start, NEXTOPER(node), next, sv, l + 1);
+           node = dumpuntil(r, start, NEXTOPER(node), next, sv, l + 1);
        }
        else if ( PL_regkind[(U8)op]  == TRIE ) {
             const I32 n = ARG(node);
-           const reg_trie_data * const trie = (reg_trie_data*)PL_regdata->data[n];
+           const reg_trie_data * const trie = (reg_trie_data*)r->data->data[n];
             const I32 arry_len = av_len(trie->words)+1;
            I32 word_idx;
            PerlIO_printf(Perl_debug_log,
@@ -6330,15 +6336,15 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
 
        }
        else if ( op == CURLY) {   /* "next" might be very big: optimizer */
-           node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
+           node = dumpuntil(r, start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
                             NEXTOPER(node) + EXTRA_STEP_2ARGS + 1, sv, l + 1);
        }
        else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) {
-           node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
+           node = dumpuntil(r, start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
                             next, sv, l + 1);
        }
        else if ( op == PLUS || op == STAR) {
-           node = dumpuntil(start, NEXTOPER(node), NEXTOPER(node) + 1, sv, l + 1);
+           node = dumpuntil(r, start, NEXTOPER(node), NEXTOPER(node) + 1, sv, l + 1);
        }
        else if (op == ANYOF) {
            /* arglen 1 + class block */