Add tests for all the other types that %^H serialisation is supposed
[p5sagit/p5-mst-13.2.git] / regexec.c
index 663af09..71eab5b 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -213,11 +213,10 @@ S_regcppush(pTHX_ I32 parenfloor)
                                (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
 
 STATIC char *
-S_regcppop(pTHX_ regexp *rex)
+S_regcppop(pTHX_ const regexp *rex)
 {
     dVAR;
     I32 i;
-    U32 paren = 0;
     char *input;
 
     GET_RE_DEBUG_FLAGS_DECL;
@@ -235,7 +234,7 @@ S_regcppop(pTHX_ regexp *rex)
     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
         i > 0; i -= REGCP_PAREN_ELEMS) {
        I32 tmps;
-       paren = (U32)SSPOPINT;
+       U32 paren = (U32)SSPOPINT;
        PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
        PL_regstartp[paren] = SSPOPINT;
        tmps = SSPOPINT;
@@ -268,10 +267,10 @@ S_regcppop(pTHX_ regexp *rex)
      * building DynaLoader will fail:
      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
      * --jhi */
-    for (paren = *PL_reglastparen + 1; (I32)paren <= rex->nparens; paren++) {
-       if ((I32)paren > PL_regsize)
-           PL_regstartp[paren] = -1;
-       PL_regendp[paren] = -1;
+    for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
+       if (i > PL_regsize)
+           PL_regstartp[i] = -1;
+       PL_regendp[i] = -1;
     }
 #endif
     return input;
@@ -934,7 +933,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
 
 /* We know what class REx starts with.  Try to find this position... */
 STATIC char *
-S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32 norun)
+S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, I32 norun)
 {
        dVAR;
        const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
@@ -1616,12 +1615,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_UNUSED_ARG(data);
-    RX_MATCH_UTF8_set(prog,do_utf8);
-
-    PL_reg_re = prog;
-#ifdef DEBUGGING
-    PL_regnarrate = DEBUG_r_TEST;
-#endif
 
     /* Be paranoid... */
     if (prog == NULL || startpos == NULL) {
@@ -1629,6 +1622,13 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
        return 0;
     }
 
+    PL_reg_re = prog;
+#ifdef DEBUGGING
+    PL_regnarrate = DEBUG_r_TEST;
+#endif
+
+    RX_MATCH_UTF8_set(prog, do_utf8);
+
     minlen = prog->minlen;
     if (strend - startpos < minlen) {
         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
@@ -1659,9 +1659,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
     /* see how far we have to get to not match where we matched before */
     PL_regtill = startpos+minend;
 
-    /* We start without call_cc context.  */
-    PL_reg_call_cc = 0;
-
     /* If there is a "must appear" string, look for it. */
     s = startpos;
 
@@ -2065,7 +2062,6 @@ STATIC I32                        /* 0 failure, 1 success */
 S_regtry(pTHX_ regexp *prog, char *startpos)
 {
     dVAR;
-    register I32 i;
     register I32 *sp;
     register I32 *ep;
     CHECKPOINT lastcp;
@@ -2180,6 +2176,7 @@ S_regtry(pTHX_ regexp *prog, char *startpos)
     sp = prog->startp;
     ep = prog->endp;
     if (prog->nparens) {
+       register I32 i;
        for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
            *++sp = -1;
            *++ep = -1;
@@ -2307,6 +2304,44 @@ S_push_slab(pTHX)
     goto start_recurse; \
     resume_point_##where:
 
+
+/* push a new regex state. Set newst to point to it */
+
+#define PUSH_STATE(newst, resume) \
+    depth++;   \
+    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \
+    st->scan = scan;   \
+    st->next = next;   \
+    st->n = n; \
+    st->locinput = locinput;   \
+    st->resume_state = resume; \
+    newst = st+1;   \
+    if (newst >  &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) \
+       newst = S_push_slab(aTHX);  \
+    PL_regmatch_state = newst; \
+    newst->cc = 0;  \
+    newst->minmod = 0; \
+    newst->sw = 0;  \
+    newst->logical = 0;        \
+    newst->unwind = 0; \
+    locinput = PL_reginput; \
+    nextchr = UCHARAT(locinput);    
+
+#define POP_STATE \
+    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
+    depth--; \
+    st--; \
+    if (st < &PL_regmatch_slab->states[0]) { \
+       PL_regmatch_slab = PL_regmatch_slab->prev; \
+       st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]; \
+    } \
+    PL_regmatch_state = st; \
+    scan       = st->scan; \
+    next       = st->next; \
+    n          = st->n; \
+    locinput   = st->locinput; \
+    nextchr = UCHARAT(locinput);
+
 /*
  - regmatch - main matching routine
  *
@@ -2437,6 +2472,8 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
     bool result;           /* return value of S_regmatch */
     regnode *inner;        /* Next node in internal branch. */
     int depth = 0;         /* depth of recursion */
+    regmatch_state *newst;  /* when pushing a state, this is the new one */
+    regmatch_state *cur_eval = NULL;  /* most recent (??{}) state */
     
 #ifdef DEBUGGING
     SV *re_debug_flags = NULL;
@@ -2767,7 +2804,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
 
                if ( st->u.trie.accepted == 1 ) {
                    DEBUG_EXECUTE_r({
-                        SV **tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
+                        SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
                                PerlIO_printf( Perl_debug_log,
                            "%*s  %sonly one match : #%d <%s>%s\n",
                            REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
@@ -3290,9 +3327,6 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
            }
            if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
                regexp *re;
-               re_cc_state state;
-               int toggleutf;
-
                {
                    /* extract RE object from returned value; compiling if
                     * necessary */
@@ -3329,6 +3363,9 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
                        PL_regsize = osize;
                    }
                }
+
+               /* run the pattern returned from (??{...}) */
+
                DEBUG_EXECUTE_r(
                    PerlIO_printf(Perl_debug_log,
                                  "Entering embedded \"%s%.60s%s%s\"\n",
@@ -3337,62 +3374,35 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
                                  PL_colors[1],
                                  (strlen(re->precomp) > 60 ? "..." : ""))
                    );
-               state.node = next;
-               state.prev = PL_reg_call_cc;
-               state.cc = st->cc;
-               state.re = PL_reg_re;
 
-               st->cc = 0;
-           
                st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
                REGCP_SET(st->u.eval.lastcp);
-               PL_reg_re = re;
-               state.ss = PL_savestack_ix;
                *PL_reglastparen = 0;
                *PL_reglastcloseparen = 0;
-               PL_reg_call_cc = &state;
                PL_reginput = locinput;
-               toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
-                           ((re->reganch & ROPT_UTF8) != 0);
-               if (toggleutf) PL_reg_flags ^= RF_utf8;
-
-               /* XXXX This is too dramatic a measure... */
-               PL_reg_maxiter = 0;
-
-               /* XXX the only recursion left in regmatch() */
-               if (regmatch(re, re->program + 1)) {
-                   /* Even though we succeeded, we need to restore
-                      global variables, since we may be wrapped inside
-                      SUSPEND, thus the match may be not finished yet. */
-
-                   /* XXXX Do this only if SUSPENDed? */
-                   PL_reg_call_cc = state.prev;
-                   st->cc = state.cc;
-                   PL_reg_re = state.re;
-
-                   if (toggleutf) PL_reg_flags ^= RF_utf8;
-
-                   /* XXXX This is too dramatic a measure... */
-                   PL_reg_maxiter = 0;
-
-                   /* These are needed even if not SUSPEND. */
-                   ReREFCNT_dec(re);
-                   regcpblow(st->u.eval.cp);
-                   sayYES;
-               }
-               ReREFCNT_dec(re);
-               REGCP_UNWIND(st->u.eval.lastcp);
-               regcppop(rex);
-               PL_reg_call_cc = state.prev;
-               st->cc = state.cc;
-               PL_reg_re = state.re;
-               if (toggleutf) PL_reg_flags ^= RF_utf8;
 
                /* XXXX This is too dramatic a measure... */
                PL_reg_maxiter = 0;
 
                st->logical = 0;
-               sayNO;
+               st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
+                           ((re->reganch & ROPT_UTF8) != 0);
+               if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8;
+               st->u.eval.prev_rex = rex;
+               assert(rex == PL_reg_re); /* XXX */
+               rex = re;
+               PL_reg_re = rex; /* XXX */
+
+               st->u.eval.prev_eval = cur_eval;
+               st->u.eval.prev_slab = PL_regmatch_slab;
+               st->u.eval.depth = depth;
+               cur_eval = st;
+               PUSH_STATE(newst, resume_EVAL);
+               st = newst;
+
+               /* now continue  from first node in postoned RE */
+               next = re->program + 1;
+               break;
                /* NOTREACHED */
            }
            /* /(?(?{...})X|Y)/ */
@@ -4234,49 +4244,49 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
            sayNO;
            break;
        case END:
-           if (PL_reg_call_cc) {
-               st->u.end.cur_call_cc = PL_reg_call_cc;
-               st->u.end.end_re = PL_reg_re;
+           if (cur_eval) {
+               /* we have successfully completed the execution of a
+                * postponed re. Pop all states back to the last EVAL
+                * then continue with the node following the (??{...})
+                */
+
+               /* this simulates a POP_STATE, except that it pops several
+                * levels, and doesn't restore locinput */
 
-               /* Save *all* the positions. */
-               st->u.end.cp = regcppush(0);
-               REGCP_SET(st->u.end.lastcp);
+               st = cur_eval;
+               PL_regmatch_slab = st->u.eval.prev_slab;
+               cur_eval = st->u.eval.prev_eval;
+               depth = st->u.eval.depth;
 
-               /* Restore parens of the caller. */
+               PL_regmatch_state = st;
+               scan    = st->scan;
+               next    = st->next;
+               n               = st->n;
+
+               if (st->u.eval.toggleutf)
+                   PL_reg_flags ^= RF_utf8;
+               ReREFCNT_dec(rex);
+               rex = st->u.eval.prev_rex;
+               PL_reg_re = rex; /* XXX */
+               /* XXXX This is too dramatic a measure... */
+               PL_reg_maxiter = 0;
+
+               /* Restore parens of the caller without popping the
+                * savestack */
                {
                    I32 tmp = PL_savestack_ix;
-                   PL_savestack_ix = PL_reg_call_cc->ss;
+                   PL_savestack_ix = st->u.eval.lastcp;
                    regcppop(rex);
                    PL_savestack_ix = tmp;
                }
 
-               /* Make position available to the callcc. */
-               PL_reginput = locinput;
 
-               PL_reg_re = PL_reg_call_cc->re;
-               st->u.end.savecc = st->cc;
-               st->cc = PL_reg_call_cc->cc;
-               PL_reg_call_cc = PL_reg_call_cc->prev;
-               REGMATCH(st->u.end.cur_call_cc->node, END);
-               /*** all unsaved local vars undefined at this point */
-               if (result) {
-                   PL_reg_call_cc = st->u.end.cur_call_cc;
-                   regcpblow(st->u.end.cp);
-                   sayYES;
-               }
-               REGCP_UNWIND(st->u.end.lastcp);
-               regcppop(rex);
-               PL_reg_call_cc = st->u.end.cur_call_cc;
-               st->cc = st->u.end.savecc;
-               PL_reg_re = st->u.end.end_re;
+               PL_reginput = locinput;
+               /* resume at node following the (??{...}) */
+               break;
 
-               DEBUG_EXECUTE_r(
-                   PerlIO_printf(Perl_debug_log,
-                                 "%*s  continuation failed...\n",
-                                 REPORT_CODE_OFF+PL_regindent*2, "")
-                   );
-               sayNO_SILENT;
            }
+
            if (locinput < PL_regtill) {
                DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
                                      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
@@ -4378,7 +4388,6 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
            oldst->next = next;
            oldst->n = n;
            oldst->locinput = locinput;
-           oldst->reg_call_cc = PL_reg_call_cc;
 
            st->cc = oldst->cc;
            locinput = PL_reginput;
@@ -4393,6 +4402,8 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
        }
     }
 
+
+
     /*
     * We get here only if there's trouble -- normally "case END" is
     * the terminating point.
@@ -4417,7 +4428,56 @@ yes:
 #endif
 
     result = 1;
-    goto exit_level;
+    /* XXX this is duplicate(ish) code to that in the do_no section.
+     * eventually a yes should just pop the whole stack */
+    if (depth) {
+       /* restore previous state and re-enter */
+       POP_STATE;
+
+       switch (st->resume_state) {
+       case resume_TRIE1:
+           goto resume_point_TRIE1;
+       case resume_TRIE2:
+           goto resume_point_TRIE2;
+       case resume_EVAL:
+           break;
+       case resume_CURLYX:
+           goto resume_point_CURLYX;
+       case resume_WHILEM1:
+           goto resume_point_WHILEM1;
+       case resume_WHILEM2:
+           goto resume_point_WHILEM2;
+       case resume_WHILEM3:
+           goto resume_point_WHILEM3;
+       case resume_WHILEM4:
+           goto resume_point_WHILEM4;
+       case resume_WHILEM5:
+           goto resume_point_WHILEM5;
+       case resume_WHILEM6:
+           goto resume_point_WHILEM6;
+       case resume_CURLYM1:
+           goto resume_point_CURLYM1;
+       case resume_CURLYM2:
+           goto resume_point_CURLYM2;
+       case resume_CURLYM3:
+           goto resume_point_CURLYM3;
+       case resume_CURLYM4:
+           goto resume_point_CURLYM4;
+       case resume_IFMATCH:
+           goto resume_point_IFMATCH;
+       case resume_PLUS1:
+           goto resume_point_PLUS1;
+       case resume_PLUS2:
+           goto resume_point_PLUS2;
+       case resume_PLUS3:
+           goto resume_point_PLUS3;
+       case resume_PLUS4:
+           goto resume_point_PLUS4;
+       default:
+           Perl_croak(aTHX_ "regexp resume memory corruption");
+       }
+    }
+    goto final_exit;
 
 no:
     DEBUG_EXECUTE_r(
@@ -4476,33 +4536,38 @@ do_no:
        }
        /* NOTREACHED */
     }
+
 #ifdef DEBUGGING
     PL_regindent--;
 #endif
     result = 0;
-exit_level:
-
-    if (depth--) {
-       /* restore previous state and re-enter */
-       st--;
-       if (st < &PL_regmatch_slab->states[0]) {
-           PL_regmatch_slab = PL_regmatch_slab->prev;
-           st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1];
-       }
-       PL_regmatch_state = st;
-
-       PL_reg_call_cc  = st->reg_call_cc;
-       scan            = st->scan;
-       next            = st->next;
-       n               = st->n;
-       locinput        = st->locinput;
-       nextchr = UCHARAT(locinput);
 
+    if (depth) {
+       /* there's a previous state to backtrack to */
+       POP_STATE;
        switch (st->resume_state) {
        case resume_TRIE1:
            goto resume_point_TRIE1;
        case resume_TRIE2:
            goto resume_point_TRIE2;
+       case resume_EVAL:
+           /* we have failed an (??{...}). Restore state to the outer re
+            * then re-throw the failure */
+           if (st->u.eval.toggleutf)
+               PL_reg_flags ^= RF_utf8;
+           ReREFCNT_dec(rex);
+           rex = st->u.eval.prev_rex;
+           PL_reg_re = rex; /* XXX */
+           cur_eval = st->u.eval.prev_eval;
+
+           /* XXXX This is too dramatic a measure... */
+           PL_reg_maxiter = 0;
+
+           PL_reginput = locinput;
+           REGCP_UNWIND(st->u.eval.lastcp);
+           regcppop(rex);
+           goto do_no;
+
        case resume_CURLYX:
            goto resume_point_CURLYX;
        case resume_WHILEM1:
@@ -4535,13 +4600,13 @@ exit_level:
            goto resume_point_PLUS3;
        case resume_PLUS4:
            goto resume_point_PLUS4;
-       case resume_END:
-           goto resume_point_END;
        default:
            Perl_croak(aTHX_ "regexp resume memory corruption");
        }
-       /* NOTREACHED */
     }
+
+final_exit:
+
     /* restore original high-water mark */
     PL_regmatch_slab  = orig_slab;
     PL_regmatch_state = orig_state;
@@ -4821,7 +4886,7 @@ Perl_regclass_swash(pTHX_ register const regnode* node, bool doinit, SV** listsv
     SV *sw  = NULL;
     SV *si  = NULL;
     SV *alt = NULL;
-    const struct reg_data *data = PL_reg_re->data;
+    const struct reg_data *data = PL_reg_re ? PL_reg_re->data : NULL;
 
     if (data && data->count) {
        const U32 n = ARG(node);
@@ -5051,7 +5116,7 @@ static void
 restore_pos(pTHX_ void *arg)
 {
     dVAR;
-    regexp *rex = (regexp *)arg;
+    regexp * const rex = (regexp *)arg;
     if (PL_reg_eval_set) {
        if (PL_reg_oldsaved) {
            rex->subbeg = PL_reg_oldsaved;
@@ -5071,8 +5136,8 @@ STATIC void
 S_to_utf8_substr(pTHX_ register regexp *prog)
 {
     if (prog->float_substr && !prog->float_utf8) {
-       SV* sv;
-       prog->float_utf8 = sv = newSVsv(prog->float_substr);
+       SV* const sv = newSVsv(prog->float_substr);
+       prog->float_utf8 = sv;
        sv_utf8_upgrade(sv);
        if (SvTAIL(prog->float_substr))
            SvTAIL_on(sv);
@@ -5080,8 +5145,8 @@ S_to_utf8_substr(pTHX_ register regexp *prog)
            prog->check_utf8 = sv;
     }
     if (prog->anchored_substr && !prog->anchored_utf8) {
-       SV* sv;
-       prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
+       SV* const sv = newSVsv(prog->anchored_substr);
+       prog->anchored_utf8 = sv;
        sv_utf8_upgrade(sv);
        if (SvTAIL(prog->anchored_substr))
            SvTAIL_on(sv);
@@ -5095,8 +5160,8 @@ S_to_byte_substr(pTHX_ register regexp *prog)
 {
     dVAR;
     if (prog->float_utf8 && !prog->float_substr) {
-       SV* sv;
-       prog->float_substr = sv = newSVsv(prog->float_utf8);
+       SV* sv = newSVsv(prog->float_utf8);
+       prog->float_substr = sv;
        if (sv_utf8_downgrade(sv, TRUE)) {
            if (SvTAIL(prog->float_utf8))
                SvTAIL_on(sv);
@@ -5108,8 +5173,8 @@ S_to_byte_substr(pTHX_ register regexp *prog)
            prog->check_substr = sv;
     }
     if (prog->anchored_utf8 && !prog->anchored_substr) {
-       SV* sv;
-       prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
+       SV* sv = newSVsv(prog->anchored_utf8);
+       prog->anchored_substr = sv;
        if (sv_utf8_downgrade(sv, TRUE)) {
            if (SvTAIL(prog->anchored_utf8))
                SvTAIL_on(sv);