Remove the final recursion path from S_regmatch: (??{...})
Dave Mitchell [Wed, 12 Apr 2006 22:43:03 +0000 (22:43 +0000)]
Also put PUSH/POP_STATE infastructure in place
Also eliminate PL_reg_call_cc
       (only another 440 global vars to go ...)

p4raw-id: //depot/perl@27778

regexec.c
regexp.h
sv.c

index 61a0115..71eab5b 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -1659,9 +1659,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
     /* see how far we have to get to not match where we matched before */
     PL_regtill = startpos+minend;
 
-    /* We start without call_cc context.  */
-    PL_reg_call_cc = 0;
-
     /* If there is a "must appear" string, look for it. */
     s = startpos;
 
@@ -2307,6 +2304,44 @@ S_push_slab(pTHX)
     goto start_recurse; \
     resume_point_##where:
 
+
+/* push a new regex state. Set newst to point to it */
+
+#define PUSH_STATE(newst, resume) \
+    depth++;   \
+    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \
+    st->scan = scan;   \
+    st->next = next;   \
+    st->n = n; \
+    st->locinput = locinput;   \
+    st->resume_state = resume; \
+    newst = st+1;   \
+    if (newst >  &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) \
+       newst = S_push_slab(aTHX);  \
+    PL_regmatch_state = newst; \
+    newst->cc = 0;  \
+    newst->minmod = 0; \
+    newst->sw = 0;  \
+    newst->logical = 0;        \
+    newst->unwind = 0; \
+    locinput = PL_reginput; \
+    nextchr = UCHARAT(locinput);    
+
+#define POP_STATE \
+    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
+    depth--; \
+    st--; \
+    if (st < &PL_regmatch_slab->states[0]) { \
+       PL_regmatch_slab = PL_regmatch_slab->prev; \
+       st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]; \
+    } \
+    PL_regmatch_state = st; \
+    scan       = st->scan; \
+    next       = st->next; \
+    n          = st->n; \
+    locinput   = st->locinput; \
+    nextchr = UCHARAT(locinput);
+
 /*
  - regmatch - main matching routine
  *
@@ -2437,6 +2472,8 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
     bool result;           /* return value of S_regmatch */
     regnode *inner;        /* Next node in internal branch. */
     int depth = 0;         /* depth of recursion */
+    regmatch_state *newst;  /* when pushing a state, this is the new one */
+    regmatch_state *cur_eval = NULL;  /* most recent (??{}) state */
     
 #ifdef DEBUGGING
     SV *re_debug_flags = NULL;
@@ -3290,9 +3327,6 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
            }
            if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
                regexp *re;
-               re_cc_state state;
-               int toggleutf;
-
                {
                    /* extract RE object from returned value; compiling if
                     * necessary */
@@ -3329,6 +3363,9 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
                        PL_regsize = osize;
                    }
                }
+
+               /* run the pattern returned from (??{...}) */
+
                DEBUG_EXECUTE_r(
                    PerlIO_printf(Perl_debug_log,
                                  "Entering embedded \"%s%.60s%s%s\"\n",
@@ -3337,62 +3374,35 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
                                  PL_colors[1],
                                  (strlen(re->precomp) > 60 ? "..." : ""))
                    );
-               state.node = next;
-               state.prev = PL_reg_call_cc;
-               state.cc = st->cc;
-               state.re = PL_reg_re;
 
-               st->cc = 0;
-           
                st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
                REGCP_SET(st->u.eval.lastcp);
-               PL_reg_re = re;
-               state.ss = PL_savestack_ix;
                *PL_reglastparen = 0;
                *PL_reglastcloseparen = 0;
-               PL_reg_call_cc = &state;
                PL_reginput = locinput;
-               toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
-                           ((re->reganch & ROPT_UTF8) != 0);
-               if (toggleutf) PL_reg_flags ^= RF_utf8;
-
-               /* XXXX This is too dramatic a measure... */
-               PL_reg_maxiter = 0;
-
-               /* XXX the only recursion left in regmatch() */
-               if (regmatch(re, re->program + 1)) {
-                   /* Even though we succeeded, we need to restore
-                      global variables, since we may be wrapped inside
-                      SUSPEND, thus the match may be not finished yet. */
-
-                   /* XXXX Do this only if SUSPENDed? */
-                   PL_reg_call_cc = state.prev;
-                   st->cc = state.cc;
-                   PL_reg_re = state.re;
-
-                   if (toggleutf) PL_reg_flags ^= RF_utf8;
-
-                   /* XXXX This is too dramatic a measure... */
-                   PL_reg_maxiter = 0;
-
-                   /* These are needed even if not SUSPEND. */
-                   ReREFCNT_dec(re);
-                   regcpblow(st->u.eval.cp);
-                   sayYES;
-               }
-               ReREFCNT_dec(re);
-               REGCP_UNWIND(st->u.eval.lastcp);
-               regcppop(rex);
-               PL_reg_call_cc = state.prev;
-               st->cc = state.cc;
-               PL_reg_re = state.re;
-               if (toggleutf) PL_reg_flags ^= RF_utf8;
 
                /* XXXX This is too dramatic a measure... */
                PL_reg_maxiter = 0;
 
                st->logical = 0;
-               sayNO;
+               st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
+                           ((re->reganch & ROPT_UTF8) != 0);
+               if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8;
+               st->u.eval.prev_rex = rex;
+               assert(rex == PL_reg_re); /* XXX */
+               rex = re;
+               PL_reg_re = rex; /* XXX */
+
+               st->u.eval.prev_eval = cur_eval;
+               st->u.eval.prev_slab = PL_regmatch_slab;
+               st->u.eval.depth = depth;
+               cur_eval = st;
+               PUSH_STATE(newst, resume_EVAL);
+               st = newst;
+
+               /* now continue  from first node in postoned RE */
+               next = re->program + 1;
+               break;
                /* NOTREACHED */
            }
            /* /(?(?{...})X|Y)/ */
@@ -4234,49 +4244,49 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
            sayNO;
            break;
        case END:
-           if (PL_reg_call_cc) {
-               st->u.end.cur_call_cc = PL_reg_call_cc;
-               st->u.end.end_re = PL_reg_re;
+           if (cur_eval) {
+               /* we have successfully completed the execution of a
+                * postponed re. Pop all states back to the last EVAL
+                * then continue with the node following the (??{...})
+                */
+
+               /* this simulates a POP_STATE, except that it pops several
+                * levels, and doesn't restore locinput */
 
-               /* Save *all* the positions. */
-               st->u.end.cp = regcppush(0);
-               REGCP_SET(st->u.end.lastcp);
+               st = cur_eval;
+               PL_regmatch_slab = st->u.eval.prev_slab;
+               cur_eval = st->u.eval.prev_eval;
+               depth = st->u.eval.depth;
 
-               /* Restore parens of the caller. */
+               PL_regmatch_state = st;
+               scan    = st->scan;
+               next    = st->next;
+               n               = st->n;
+
+               if (st->u.eval.toggleutf)
+                   PL_reg_flags ^= RF_utf8;
+               ReREFCNT_dec(rex);
+               rex = st->u.eval.prev_rex;
+               PL_reg_re = rex; /* XXX */
+               /* XXXX This is too dramatic a measure... */
+               PL_reg_maxiter = 0;
+
+               /* Restore parens of the caller without popping the
+                * savestack */
                {
                    I32 tmp = PL_savestack_ix;
-                   PL_savestack_ix = PL_reg_call_cc->ss;
+                   PL_savestack_ix = st->u.eval.lastcp;
                    regcppop(rex);
                    PL_savestack_ix = tmp;
                }
 
-               /* Make position available to the callcc. */
-               PL_reginput = locinput;
 
-               PL_reg_re = PL_reg_call_cc->re;
-               st->u.end.savecc = st->cc;
-               st->cc = PL_reg_call_cc->cc;
-               PL_reg_call_cc = PL_reg_call_cc->prev;
-               REGMATCH(st->u.end.cur_call_cc->node, END);
-               /*** all unsaved local vars undefined at this point */
-               if (result) {
-                   PL_reg_call_cc = st->u.end.cur_call_cc;
-                   regcpblow(st->u.end.cp);
-                   sayYES;
-               }
-               REGCP_UNWIND(st->u.end.lastcp);
-               regcppop(rex);
-               PL_reg_call_cc = st->u.end.cur_call_cc;
-               st->cc = st->u.end.savecc;
-               PL_reg_re = st->u.end.end_re;
+               PL_reginput = locinput;
+               /* resume at node following the (??{...}) */
+               break;
 
-               DEBUG_EXECUTE_r(
-                   PerlIO_printf(Perl_debug_log,
-                                 "%*s  continuation failed...\n",
-                                 REPORT_CODE_OFF+PL_regindent*2, "")
-                   );
-               sayNO_SILENT;
            }
+
            if (locinput < PL_regtill) {
                DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
                                      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
@@ -4378,7 +4388,6 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
            oldst->next = next;
            oldst->n = n;
            oldst->locinput = locinput;
-           oldst->reg_call_cc = PL_reg_call_cc;
 
            st->cc = oldst->cc;
            locinput = PL_reginput;
@@ -4393,6 +4402,8 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
        }
     }
 
+
+
     /*
     * We get here only if there's trouble -- normally "case END" is
     * the terminating point.
@@ -4417,7 +4428,56 @@ yes:
 #endif
 
     result = 1;
-    goto exit_level;
+    /* XXX this is duplicate(ish) code to that in the do_no section.
+     * eventually a yes should just pop the whole stack */
+    if (depth) {
+       /* restore previous state and re-enter */
+       POP_STATE;
+
+       switch (st->resume_state) {
+       case resume_TRIE1:
+           goto resume_point_TRIE1;
+       case resume_TRIE2:
+           goto resume_point_TRIE2;
+       case resume_EVAL:
+           break;
+       case resume_CURLYX:
+           goto resume_point_CURLYX;
+       case resume_WHILEM1:
+           goto resume_point_WHILEM1;
+       case resume_WHILEM2:
+           goto resume_point_WHILEM2;
+       case resume_WHILEM3:
+           goto resume_point_WHILEM3;
+       case resume_WHILEM4:
+           goto resume_point_WHILEM4;
+       case resume_WHILEM5:
+           goto resume_point_WHILEM5;
+       case resume_WHILEM6:
+           goto resume_point_WHILEM6;
+       case resume_CURLYM1:
+           goto resume_point_CURLYM1;
+       case resume_CURLYM2:
+           goto resume_point_CURLYM2;
+       case resume_CURLYM3:
+           goto resume_point_CURLYM3;
+       case resume_CURLYM4:
+           goto resume_point_CURLYM4;
+       case resume_IFMATCH:
+           goto resume_point_IFMATCH;
+       case resume_PLUS1:
+           goto resume_point_PLUS1;
+       case resume_PLUS2:
+           goto resume_point_PLUS2;
+       case resume_PLUS3:
+           goto resume_point_PLUS3;
+       case resume_PLUS4:
+           goto resume_point_PLUS4;
+       default:
+           Perl_croak(aTHX_ "regexp resume memory corruption");
+       }
+    }
+    goto final_exit;
 
 no:
     DEBUG_EXECUTE_r(
@@ -4476,33 +4536,38 @@ do_no:
        }
        /* NOTREACHED */
     }
+
 #ifdef DEBUGGING
     PL_regindent--;
 #endif
     result = 0;
-exit_level:
-
-    if (depth--) {
-       /* restore previous state and re-enter */
-       st--;
-       if (st < &PL_regmatch_slab->states[0]) {
-           PL_regmatch_slab = PL_regmatch_slab->prev;
-           st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1];
-       }
-       PL_regmatch_state = st;
-
-       PL_reg_call_cc  = st->reg_call_cc;
-       scan            = st->scan;
-       next            = st->next;
-       n               = st->n;
-       locinput        = st->locinput;
-       nextchr = UCHARAT(locinput);
 
+    if (depth) {
+       /* there's a previous state to backtrack to */
+       POP_STATE;
        switch (st->resume_state) {
        case resume_TRIE1:
            goto resume_point_TRIE1;
        case resume_TRIE2:
            goto resume_point_TRIE2;
+       case resume_EVAL:
+           /* we have failed an (??{...}). Restore state to the outer re
+            * then re-throw the failure */
+           if (st->u.eval.toggleutf)
+               PL_reg_flags ^= RF_utf8;
+           ReREFCNT_dec(rex);
+           rex = st->u.eval.prev_rex;
+           PL_reg_re = rex; /* XXX */
+           cur_eval = st->u.eval.prev_eval;
+
+           /* XXXX This is too dramatic a measure... */
+           PL_reg_maxiter = 0;
+
+           PL_reginput = locinput;
+           REGCP_UNWIND(st->u.eval.lastcp);
+           regcppop(rex);
+           goto do_no;
+
        case resume_CURLYX:
            goto resume_point_CURLYX;
        case resume_WHILEM1:
@@ -4535,13 +4600,13 @@ exit_level:
            goto resume_point_PLUS3;
        case resume_PLUS4:
            goto resume_point_PLUS4;
-       case resume_END:
-           goto resume_point_END;
        default:
            Perl_croak(aTHX_ "regexp resume memory corruption");
        }
-       /* NOTREACHED */
     }
+
+final_exit:
+
     /* restore original high-water mark */
     PL_regmatch_slab  = orig_slab;
     PL_regmatch_state = orig_state;
index 57f8d5d..bb02d22 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -159,19 +159,10 @@ typedef struct _reg_trie_accepted reg_trie_accepted;
 
 typedef I32 CHECKPOINT;
 
-typedef struct re_cc_state
-{
-    I32 ss;
-    regnode *node;
-    struct re_cc_state *prev;
-    struct regmatch_state *cc; /* state corresponding to the current curly */
-    regexp *re;
-} re_cc_state;
-
-
 typedef enum {
     resume_TRIE1,
     resume_TRIE2,
+    resume_EVAL,
     resume_CURLYX,
     resume_WHILEM1,
     resume_WHILEM2,
@@ -187,8 +178,7 @@ typedef enum {
     resume_PLUS1,
     resume_PLUS2,
     resume_PLUS3,
-    resume_PLUS4,
-    resume_END
+    resume_PLUS4
 } regmatch_resume_states;
 
 
@@ -200,7 +190,7 @@ typedef struct regmatch_state {
     regmatch_resume_states resume_state; /* where to jump to on return */
     regnode *scan;             /* Current node. */
     regnode *next;             /* Next node. */
-    bool minmod;               /* the next "{n.m}" is a "{n,m}?" */
+    bool minmod;               /* the next "{n,m}" is a "{n,m}?" */
     bool sw;                   /* the condition value in (?(cond)a|b) */
     int logical;
     I32 unwind;                        /* savestack index of current unwind block */
@@ -219,8 +209,14 @@ typedef struct regmatch_state {
        } trie;
 
        struct {
-           CHECKPOINT cp;      /* remember current savestack indexes */
-           CHECKPOINT lastcp;
+           regexp      *prev_rex;
+           int         toggleutf;
+           CHECKPOINT  cp;     /* remember current savestack indexes */
+           CHECKPOINT  lastcp;
+           struct regmatch_state  *prev_eval; /* save cur_eval */
+           struct regmatch_slab   *prev_slab;
+           int depth;
+
        } eval;
 
        struct {
@@ -263,17 +259,7 @@ typedef struct regmatch_state {
            char *old;
            int count;
        } plus; /* and CURLYN/CURLY/STAR */
-
-       struct {
-           CHECKPOINT cp;      /* remember current savestack indexes */
-           CHECKPOINT lastcp;
-           struct regmatch_state *savecc;
-           re_cc_state *cur_call_cc;
-           regexp *end_re;
-       } end;
     } u;
-
-    re_cc_state *reg_call_cc;  /* saved value of PL_reg_call_cc */
 } regmatch_state;
 
 /* how many regmatch_state structs to allocate as a single slab.
@@ -303,7 +289,6 @@ typedef struct regmatch_slab {
 #define PL_reg_eval_set                PL_reg_state.re_state_reg_eval_set
 #define PL_regnarrate          PL_reg_state.re_state_regnarrate
 #define PL_regindent           PL_reg_state.re_state_regindent
-#define PL_reg_call_cc         PL_reg_state.re_state_reg_call_cc
 #define PL_reg_re              PL_reg_state.re_state_reg_re
 #define PL_reg_ganch           PL_reg_state.re_state_reg_ganch
 #define PL_reg_sv              PL_reg_state.re_state_reg_sv
@@ -338,7 +323,6 @@ struct re_save_state {
     I32 re_state_reg_eval_set;         /* from regexec.c */
     I32 re_state_regnarrate;           /* from regexec.c */
     int re_state_regindent;            /* from regexec.c */
-    struct re_cc_state *re_state_reg_call_cc;          /* from regexec.c */
     regexp *re_state_reg_re;           /* from regexec.c */
     char *re_state_reg_ganch;          /* from regexec.c */
     SV *re_state_reg_sv;               /* from regexec.c */
diff --git a/sv.c b/sv.c
index cf42029..d6135ed 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -10657,8 +10657,6 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
                /* I assume that it only ever "worked" because no-one called
                   (pseudo)fork while the regexp engine had re-entered itself.
                */
-               new_state->re_state_reg_call_cc
-                   = any_dup(old_state->re_state_reg_call_cc, proto_perl);
                new_state->re_state_reg_re
                    = any_dup(old_state->re_state_reg_re, proto_perl);
                new_state->re_state_reg_ganch