The first step in removing recursion from the REx engine
Ilya Zakharevich [Sun, 19 Nov 2000 22:30:26 +0000 (17:30 -0500)]
Message-ID: <20001119223026.A5165@monk.mps.ohio-state.edu>

p4raw-id: //depot/perl@7760

regexec.c
scope.h
t/op/pat.t

index c0d0097..be10dc9 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -145,14 +145,14 @@ S_regcppush(pTHX_ I32 parenfloor)
 }
 
 /* These are needed since we do not localize EVAL nodes: */
-#  define REGCP_SET  DEBUG_r(PerlIO_printf(Perl_debug_log,             \
+#  define REGCP_SET(cp)  DEBUG_r(PerlIO_printf(Perl_debug_log,         \
                             "  Setting an EVAL scope, savestack=%"IVdf"\n",    \
-                            (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
+                            (IV)PL_savestack_ix)); cp = PL_savestack_ix
 
-#  define REGCP_UNWIND  DEBUG_r(lastcp != PL_savestack_ix ?            \
+#  define REGCP_UNWIND(cp)  DEBUG_r(cp != PL_savestack_ix ?            \
                                PerlIO_printf(Perl_debug_log,           \
                                "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
-                               (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
+                               (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
 
 STATIC char *
 S_regcppop(pTHX)
@@ -219,7 +219,7 @@ typedef struct re_cc_state
     regexp *re;
 } re_cc_state;
 
-#define regcpblow(cp) LEAVE_SCOPE(cp)
+#define regcpblow(cp) LEAVE_SCOPE(cp)  /* Ignores regcppush()ed data. */
 
 #define TRYPAREN(paren, n, input) {                            \
     if (paren) {                                               \
@@ -1706,6 +1706,9 @@ S_regtry(pTHX_ regexp *prog, char *startpos)
     register I32 *ep;
     CHECKPOINT lastcp;
 
+#ifdef DEBUGGING
+    PL_regindent = 0;  /* XXXX Not good when matches are reenterable... */
+#endif
     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
        MAGIC *mg;
 
@@ -1786,15 +1789,45 @@ S_regtry(pTHX_ regexp *prog, char *startpos)
            *++ep = -1;
        }
     }
-    REGCP_SET;
+    REGCP_SET(lastcp);
     if (regmatch(prog->program + 1)) {
        prog->endp[0] = PL_reginput - PL_bostr;
        return 1;
     }
-    REGCP_UNWIND;
+    REGCP_UNWIND(lastcp);
     return 0;
 }
 
+#define RE_UNWIND_BRANCH       1
+#define RE_UNWIND_BRANCHJ      2
+
+union re_unwind_t;
+
+typedef struct {               /* XX: makes sense to enlarge it... */
+    I32 type;
+    I32 prev;
+    CHECKPOINT lastcp;
+} re_unwind_generic_t;
+
+typedef struct {
+    I32 type;
+    I32 prev;
+    CHECKPOINT lastcp;
+    I32 lastparen;
+    regnode *next;
+    char *locinput;
+    I32 nextchr;
+#ifdef DEBUGGING
+    int regindent;
+#endif
+} re_unwind_branch_t;
+
+typedef union re_unwind_t {
+    I32 type;
+    re_unwind_generic_t generic;
+    re_unwind_branch_t branch;
+} re_unwind_t;
+
 /*
  - regmatch - main matching routine
  *
@@ -1824,6 +1857,9 @@ S_regmatch(pTHX_ regnode *prog)
     register char *locinput = PL_reginput;
     register I32 c1, c2, paren;        /* case fold search, parenth */
     int minmod = 0, sw = 0, logical = 0;
+    I32 unwind = 0;
+    I32 firstcp = PL_savestack_ix;
+
 #ifdef DEBUGGING
     PL_regindent++;
 #endif
@@ -1833,7 +1869,7 @@ S_regmatch(pTHX_ regnode *prog)
     scan = prog;
     while (scan != NULL) {
 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
-#ifdef DEBUGGING
+#if 1
 #  define sayYES goto yes
 #  define sayNO goto no
 #  define sayYES_FINAL goto yes_final
@@ -2439,7 +2475,7 @@ S_regmatch(pTHX_ regnode *prog)
                    PL_regcc = 0;
                    
                    cp = regcppush(0);  /* Save *all* the positions. */
-                   REGCP_SET;
+                   REGCP_SET(lastcp);
                    cache_re(re);
                    state.ss = PL_savestack_ix;
                    *PL_reglastparen = 0;
@@ -2469,7 +2505,7 @@ S_regmatch(pTHX_ regnode *prog)
                        sayYES;
                    }
                    ReREFCNT_dec(re);
-                   REGCP_UNWIND;
+                   REGCP_UNWIND(lastcp);
                    regcppop();
                    PL_reg_call_cc = state.prev;
                    PL_regcc = state.cc;
@@ -2730,12 +2766,12 @@ S_regmatch(pTHX_ regnode *prog)
                    if (PL_regcc)
                        ln = PL_regcc->cur;
                    cp = regcppush(cc->parenfloor);
-                   REGCP_SET;
+                   REGCP_SET(lastcp);
                    if (regmatch(cc->next)) {
                        regcpblow(cp);
                        sayYES; /* All done. */
                    }
-                   REGCP_UNWIND;
+                   REGCP_UNWIND(lastcp);
                    regcppop();
                    if (PL_regcc)
                        PL_regcc->cur = ln;
@@ -2762,12 +2798,12 @@ S_regmatch(pTHX_ regnode *prog)
                    cc->cur = n;
                    cc->lastloc = locinput;
                    cp = regcppush(cc->parenfloor);
-                   REGCP_SET;
+                   REGCP_SET(lastcp);
                    if (regmatch(cc->scan)) {
                        regcpblow(cp);
                        sayYES;
                    }
-                   REGCP_UNWIND;
+                   REGCP_UNWIND(lastcp);
                    regcppop();
                    cc->cur = n - 1;
                    cc->lastloc = lastloc;
@@ -2780,12 +2816,12 @@ S_regmatch(pTHX_ regnode *prog)
                    cp = regcppush(cc->parenfloor);
                    cc->cur = n;
                    cc->lastloc = locinput;
-                   REGCP_SET;
+                   REGCP_SET(lastcp);
                    if (regmatch(cc->scan)) {
                        regcpblow(cp);
                        sayYES;
                    }
-                   REGCP_UNWIND;
+                   REGCP_UNWIND(lastcp);
                    regcppop();         /* Restore some previous $<digit>s? */
                    PL_reginput = locinput;
                    DEBUG_r(
@@ -2831,30 +2867,30 @@ S_regmatch(pTHX_ regnode *prog)
                if (OP(next) != c1)     /* No choice. */
                    next = inner;       /* Avoid recursion. */
                else {
-                   int lastparen = *PL_reglastparen;
+                   I32 lastparen = *PL_reglastparen;
+                   I32 unwind1;
+                   re_unwind_branch_t *uw;
+
+                   /* Put unwinding data on stack */
+                   unwind1 = SSNEWt(1,re_unwind_branch_t);
+                   uw = SSPTRt(unwind1,re_unwind_branch_t);
+                   uw->prev = unwind;
+                   unwind = unwind1;
+                   uw->type = ((c1 == BRANCH)
+                               ? RE_UNWIND_BRANCH
+                               : RE_UNWIND_BRANCHJ);
+                   uw->lastparen = lastparen;
+                   uw->next = next;
+                   uw->locinput = locinput;
+                   uw->nextchr = nextchr;
+#ifdef DEBUGGING
+                   uw->regindent = ++PL_regindent;
+#endif
 
-                   REGCP_SET;
-                   do {
-                       PL_reginput = locinput;
-                       if (regmatch(inner))
-                           sayYES;
-                       REGCP_UNWIND;
-                       for (n = *PL_reglastparen; n > lastparen; n--)
-                           PL_regendp[n] = -1;
-                       *PL_reglastparen = n;
-                       scan = next;
-                       /*SUPPRESS 560*/
-                       if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))))
-                           next += n;
-                       else
-                           next = NULL;
-                       inner = NEXTOPER(scan);
-                       if (c1 == BRANCHJ) {
-                           inner = NEXTOPER(inner);
-                       }
-                   } while (scan != NULL && OP(scan) == c1);
-                   sayNO;
-                   /* NOTREACHED */
+                   REGCP_SET(uw->lastcp);
+
+                   /* Now go into the first branch */
+                   next = inner;
                }
            }
            break;
@@ -2904,7 +2940,7 @@ S_regmatch(pTHX_ regnode *prog)
                }
                else
                    c1 = c2 = -1000;
-               REGCP_SET;
+               REGCP_SET(lastcp);
                /* This may be improved if l == 0.  */
                while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
                    /* If it could work, try it. */
@@ -2923,7 +2959,7 @@ S_regmatch(pTHX_ regnode *prog)
                        }
                        if (regmatch(next))
                            sayYES;
-                       REGCP_UNWIND;
+                       REGCP_UNWIND(lastcp);
                    }
                    /* Couldn't or didn't -- move forward. */
                    PL_reginput = locinput;
@@ -2963,7 +2999,7 @@ S_regmatch(pTHX_ regnode *prog)
                    else
                        c1 = c2 = -1000;
                }
-               REGCP_SET;
+               REGCP_SET(lastcp);
                while (n >= ln) {
                    /* If it could work, try it. */
                    if (c1 == -1000 ||
@@ -2985,7 +3021,7 @@ S_regmatch(pTHX_ regnode *prog)
                        }
                        if (regmatch(next))
                            sayYES;
-                       REGCP_UNWIND;
+                       REGCP_UNWIND(lastcp);
                    }
                    /* Couldn't or didn't -- back up. */
                    n--;
@@ -3046,7 +3082,7 @@ S_regmatch(pTHX_ regnode *prog)
                if (ln && regrepeat(scan, ln) < ln)
                    sayNO;
                locinput = PL_reginput;
-               REGCP_SET;
+               REGCP_SET(lastcp);
                if (c1 != -1000) {
                    char *e = locinput + n - ln; /* Should not check after this */
                    char *old = locinput;
@@ -3076,7 +3112,7 @@ S_regmatch(pTHX_ regnode *prog)
                        /* PL_reginput == locinput now */
                        TRYPAREN(paren, ln, locinput);
                        PL_reginput = locinput; /* Could be reset... */
-                       REGCP_UNWIND;
+                       REGCP_UNWIND(lastcp);
                        /* Couldn't or didn't -- move forward. */
                        old = locinput++;
                    }
@@ -3089,7 +3125,7 @@ S_regmatch(pTHX_ regnode *prog)
                        UCHARAT(PL_reginput) == c2)
                    {
                        TRYPAREN(paren, n, PL_reginput);
-                       REGCP_UNWIND;
+                       REGCP_UNWIND(lastcp);
                    }
                    /* Couldn't or didn't -- move forward. */
                    PL_reginput = locinput;
@@ -3114,7 +3150,7 @@ S_regmatch(pTHX_ regnode *prog)
                    if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
                        ln--;
                }
-               REGCP_SET;
+               REGCP_SET(lastcp);
                if (paren) {
                    while (n >= ln) {
                        /* If it could work, try it. */
@@ -3123,7 +3159,7 @@ S_regmatch(pTHX_ regnode *prog)
                            UCHARAT(PL_reginput) == c2)
                            {
                                TRYPAREN(paren, n, PL_reginput);
-                               REGCP_UNWIND;
+                               REGCP_UNWIND(lastcp);
                            }
                        /* Couldn't or didn't -- back up. */
                        n--;
@@ -3138,7 +3174,7 @@ S_regmatch(pTHX_ regnode *prog)
                            UCHARAT(PL_reginput) == c2)
                            {
                                TRYPAREN(paren, n, PL_reginput);
-                               REGCP_UNWIND;
+                               REGCP_UNWIND(lastcp);
                            }
                        /* Couldn't or didn't -- back up. */
                        n--;
@@ -3156,7 +3192,7 @@ S_regmatch(pTHX_ regnode *prog)
                CHECKPOINT cp, lastcp;
                
                cp = regcppush(0);      /* Save *all* the positions. */
-               REGCP_SET;
+               REGCP_SET(lastcp);
                regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
                                                    the caller. */
                PL_reginput = locinput; /* Make position available to
@@ -3169,7 +3205,7 @@ S_regmatch(pTHX_ regnode *prog)
                    regcpblow(cp);
                    sayYES;
                }
-               REGCP_UNWIND;
+               REGCP_UNWIND(lastcp);
                regcppop();
                PL_reg_call_cc = cur_call_cc;
                PL_regcc = cctmp;
@@ -3276,6 +3312,7 @@ S_regmatch(pTHX_ regnode *prog)
                          PTR2UV(scan), OP(scan));
            Perl_croak(aTHX_ "regexp memory corruption");
        }
+      reenter:
        scan = next;
     }
 
@@ -3301,6 +3338,11 @@ yes:
 #ifdef DEBUGGING
     PL_regindent--;
 #endif
+
+#if 0                                  /* Breaks $^R */
+    if (unwind)
+       regcpblow(firstcp);
+#endif
     return 1;
 
 no:
@@ -3312,6 +3354,55 @@ no:
     goto do_no;
 no_final:
 do_no:
+    if (unwind) {
+       re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
+
+       switch (uw->type) {
+       case RE_UNWIND_BRANCH:
+       case RE_UNWIND_BRANCHJ:
+       {
+           re_unwind_branch_t *uwb = &(uw->branch);
+           I32 lastparen = uwb->lastparen;
+           
+           REGCP_UNWIND(uwb->lastcp);
+           for (n = *PL_reglastparen; n > lastparen; n--)
+               PL_regendp[n] = -1;
+           *PL_reglastparen = n;
+           scan = next = uwb->next;
+           if ( !scan || 
+                OP(scan) != (uwb->type == RE_UNWIND_BRANCH 
+                             ? BRANCH : BRANCHJ) ) {           /* Failure */
+               unwind = uwb->prev;
+#ifdef DEBUGGING
+               PL_regindent--;
+#endif
+               goto do_no;
+           }
+           /* Have more choice yet.  Reuse the same uwb.  */
+           /*SUPPRESS 560*/
+           if ((n = (uwb->type == RE_UNWIND_BRANCH
+                     ? NEXT_OFF(next) : ARG(next))))
+               next += n;
+           else
+               next = NULL;    /* XXXX Needn't unwinding in this case... */
+           uwb->next = next;
+           next = NEXTOPER(scan);
+           if (uwb->type == RE_UNWIND_BRANCHJ)
+               next = NEXTOPER(next);
+           locinput = uwb->locinput;
+           nextchr = uwb->nextchr;
+#ifdef DEBUGGING
+           PL_regindent = uwb->regindent;
+#endif
+
+           goto reenter;
+       }
+       /* NOT REACHED */
+       default:
+           Perl_croak(aTHX_ "regexp unwind memory corruption");
+       }
+       /* NOT REACHED */
+    }
 #ifdef DEBUGGING
     PL_regindent--;
 #endif
diff --git a/scope.h b/scope.h
index e6a4209..3aad0be 100644 (file)
--- a/scope.h
+++ b/scope.h
@@ -173,11 +173,14 @@ Closing bracket on a callback.  See C<ENTER> and L<perlcall>.
  * SSPTR() converts the index returned by SSNEW/SSNEWa() into a pointer.
  */
 
-#define SSNEW(size)             save_alloc(size, 0)
-#define SSNEWa(size,align)     save_alloc(size, \
+#define SSNEW(size)             save_alloc(aTHX_ (size), 0)
+#define SSNEWt(n,t)             SSNEW((n)*sizeof(t))
+#define SSNEWa(size,align)     save_alloc(aTHX_ (size), \
     (align - ((int)((caddr_t)&PL_savestack[PL_savestack_ix]) % align)) % align)
+#define SSNEWat(n,t,align)     SSNEWa((n)*sizeof(t), align)
 
-#define SSPTR(off,type)         ((type) ((char*)PL_savestack + off))
+#define SSPTR(off,type)         ((type)  ((char*)PL_savestack + off))
+#define SSPTRt(off,type)        ((type*) ((char*)PL_savestack + off))
 
 /* A jmpenv packages the state required to perform a proper non-local jump.
  * Note that there is a start_env initialized when perl starts, and top_env
index f0cb7dc..c6555f7 100755 (executable)
@@ -496,7 +496,7 @@ $test++;
 $_ = 'xabcx';
 foreach $ans ('', 'c') {
   /(?<=(?=a)..)((?=c)|.)/g;
-  print "not " unless $1 eq $ans;
+  print "# \$1  ='$1'\n# \$ans='$ans'\nnot " unless $1 eq $ans;
   print "ok $test\n";
   $test++;
 }
@@ -504,7 +504,7 @@ foreach $ans ('', 'c') {
 $_ = 'a';
 foreach $ans ('', 'a', '') {
   /^|a|$/g;
-  print "not " unless $& eq $ans;
+  print "# \$&  ='$&'\n# \$ans='$ans'\nnot " unless $& eq $ans;
   print "ok $test\n";
   $test++;
 }