From: Ilya Zakharevich Date: Sun, 19 Nov 2000 22:30:26 +0000 (-0500) Subject: The first step in removing recursion from the REx engine X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=02db2b7b4dd14eb0184a5e518cff4cef3723243b;p=p5sagit%2Fp5-mst-13.2.git The first step in removing recursion from the REx engine Message-ID: <20001119223026.A5165@monk.mps.ohio-state.edu> p4raw-id: //depot/perl@7760 --- diff --git a/regexec.c b/regexec.c index c0d0097..be10dc9 100644 --- a/regexec.c +++ b/regexec.c @@ -145,14 +145,14 @@ S_regcppush(pTHX_ I32 parenfloor) } /* These are needed since we do not localize EVAL nodes: */ -# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \ +# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \ " Setting an EVAL scope, savestack=%"IVdf"\n", \ - (IV)PL_savestack_ix)); lastcp = PL_savestack_ix + (IV)PL_savestack_ix)); cp = PL_savestack_ix -# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \ +# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \ PerlIO_printf(Perl_debug_log, \ " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \ - (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp) + (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp) STATIC char * S_regcppop(pTHX) @@ -219,7 +219,7 @@ typedef struct re_cc_state regexp *re; } re_cc_state; -#define regcpblow(cp) LEAVE_SCOPE(cp) +#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */ #define TRYPAREN(paren, n, input) { \ if (paren) { \ @@ -1706,6 +1706,9 @@ S_regtry(pTHX_ regexp *prog, char *startpos) register I32 *ep; CHECKPOINT lastcp; +#ifdef DEBUGGING + PL_regindent = 0; /* XXXX Not good when matches are reenterable... */ +#endif if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) { MAGIC *mg; @@ -1786,15 +1789,45 @@ S_regtry(pTHX_ regexp *prog, char *startpos) *++ep = -1; } } - REGCP_SET; + REGCP_SET(lastcp); if (regmatch(prog->program + 1)) { prog->endp[0] = PL_reginput - PL_bostr; return 1; } - REGCP_UNWIND; + REGCP_UNWIND(lastcp); return 0; } +#define RE_UNWIND_BRANCH 1 +#define RE_UNWIND_BRANCHJ 2 + +union re_unwind_t; + +typedef struct { /* XX: makes sense to enlarge it... */ + I32 type; + I32 prev; + CHECKPOINT lastcp; +} re_unwind_generic_t; + +typedef struct { + I32 type; + I32 prev; + CHECKPOINT lastcp; + I32 lastparen; + regnode *next; + char *locinput; + I32 nextchr; +#ifdef DEBUGGING + int regindent; +#endif +} re_unwind_branch_t; + +typedef union re_unwind_t { + I32 type; + re_unwind_generic_t generic; + re_unwind_branch_t branch; +} re_unwind_t; + /* - regmatch - main matching routine * @@ -1824,6 +1857,9 @@ S_regmatch(pTHX_ regnode *prog) register char *locinput = PL_reginput; register I32 c1, c2, paren; /* case fold search, parenth */ int minmod = 0, sw = 0, logical = 0; + I32 unwind = 0; + I32 firstcp = PL_savestack_ix; + #ifdef DEBUGGING PL_regindent++; #endif @@ -1833,7 +1869,7 @@ S_regmatch(pTHX_ regnode *prog) scan = prog; while (scan != NULL) { #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO) -#ifdef DEBUGGING +#if 1 # define sayYES goto yes # define sayNO goto no # define sayYES_FINAL goto yes_final @@ -2439,7 +2475,7 @@ S_regmatch(pTHX_ regnode *prog) PL_regcc = 0; cp = regcppush(0); /* Save *all* the positions. */ - REGCP_SET; + REGCP_SET(lastcp); cache_re(re); state.ss = PL_savestack_ix; *PL_reglastparen = 0; @@ -2469,7 +2505,7 @@ S_regmatch(pTHX_ regnode *prog) sayYES; } ReREFCNT_dec(re); - REGCP_UNWIND; + REGCP_UNWIND(lastcp); regcppop(); PL_reg_call_cc = state.prev; PL_regcc = state.cc; @@ -2730,12 +2766,12 @@ S_regmatch(pTHX_ regnode *prog) if (PL_regcc) ln = PL_regcc->cur; cp = regcppush(cc->parenfloor); - REGCP_SET; + REGCP_SET(lastcp); if (regmatch(cc->next)) { regcpblow(cp); sayYES; /* All done. */ } - REGCP_UNWIND; + REGCP_UNWIND(lastcp); regcppop(); if (PL_regcc) PL_regcc->cur = ln; @@ -2762,12 +2798,12 @@ S_regmatch(pTHX_ regnode *prog) cc->cur = n; cc->lastloc = locinput; cp = regcppush(cc->parenfloor); - REGCP_SET; + REGCP_SET(lastcp); if (regmatch(cc->scan)) { regcpblow(cp); sayYES; } - REGCP_UNWIND; + REGCP_UNWIND(lastcp); regcppop(); cc->cur = n - 1; cc->lastloc = lastloc; @@ -2780,12 +2816,12 @@ S_regmatch(pTHX_ regnode *prog) cp = regcppush(cc->parenfloor); cc->cur = n; cc->lastloc = locinput; - REGCP_SET; + REGCP_SET(lastcp); if (regmatch(cc->scan)) { regcpblow(cp); sayYES; } - REGCP_UNWIND; + REGCP_UNWIND(lastcp); regcppop(); /* Restore some previous $s? */ PL_reginput = locinput; DEBUG_r( @@ -2831,30 +2867,30 @@ S_regmatch(pTHX_ regnode *prog) if (OP(next) != c1) /* No choice. */ next = inner; /* Avoid recursion. */ else { - int lastparen = *PL_reglastparen; + I32 lastparen = *PL_reglastparen; + I32 unwind1; + re_unwind_branch_t *uw; + + /* Put unwinding data on stack */ + unwind1 = SSNEWt(1,re_unwind_branch_t); + uw = SSPTRt(unwind1,re_unwind_branch_t); + uw->prev = unwind; + unwind = unwind1; + uw->type = ((c1 == BRANCH) + ? RE_UNWIND_BRANCH + : RE_UNWIND_BRANCHJ); + uw->lastparen = lastparen; + uw->next = next; + uw->locinput = locinput; + uw->nextchr = nextchr; +#ifdef DEBUGGING + uw->regindent = ++PL_regindent; +#endif - REGCP_SET; - do { - PL_reginput = locinput; - if (regmatch(inner)) - sayYES; - REGCP_UNWIND; - for (n = *PL_reglastparen; n > lastparen; n--) - PL_regendp[n] = -1; - *PL_reglastparen = n; - scan = next; - /*SUPPRESS 560*/ - if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))) - next += n; - else - next = NULL; - inner = NEXTOPER(scan); - if (c1 == BRANCHJ) { - inner = NEXTOPER(inner); - } - } while (scan != NULL && OP(scan) == c1); - sayNO; - /* NOTREACHED */ + REGCP_SET(uw->lastcp); + + /* Now go into the first branch */ + next = inner; } } break; @@ -2904,7 +2940,7 @@ S_regmatch(pTHX_ regnode *prog) } else c1 = c2 = -1000; - REGCP_SET; + REGCP_SET(lastcp); /* This may be improved if l == 0. */ while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */ /* If it could work, try it. */ @@ -2923,7 +2959,7 @@ S_regmatch(pTHX_ regnode *prog) } if (regmatch(next)) sayYES; - REGCP_UNWIND; + REGCP_UNWIND(lastcp); } /* Couldn't or didn't -- move forward. */ PL_reginput = locinput; @@ -2963,7 +2999,7 @@ S_regmatch(pTHX_ regnode *prog) else c1 = c2 = -1000; } - REGCP_SET; + REGCP_SET(lastcp); while (n >= ln) { /* If it could work, try it. */ if (c1 == -1000 || @@ -2985,7 +3021,7 @@ S_regmatch(pTHX_ regnode *prog) } if (regmatch(next)) sayYES; - REGCP_UNWIND; + REGCP_UNWIND(lastcp); } /* Couldn't or didn't -- back up. */ n--; @@ -3046,7 +3082,7 @@ S_regmatch(pTHX_ regnode *prog) if (ln && regrepeat(scan, ln) < ln) sayNO; locinput = PL_reginput; - REGCP_SET; + REGCP_SET(lastcp); if (c1 != -1000) { char *e = locinput + n - ln; /* Should not check after this */ char *old = locinput; @@ -3076,7 +3112,7 @@ S_regmatch(pTHX_ regnode *prog) /* PL_reginput == locinput now */ TRYPAREN(paren, ln, locinput); PL_reginput = locinput; /* Could be reset... */ - REGCP_UNWIND; + REGCP_UNWIND(lastcp); /* Couldn't or didn't -- move forward. */ old = locinput++; } @@ -3089,7 +3125,7 @@ S_regmatch(pTHX_ regnode *prog) UCHARAT(PL_reginput) == c2) { TRYPAREN(paren, n, PL_reginput); - REGCP_UNWIND; + REGCP_UNWIND(lastcp); } /* Couldn't or didn't -- move forward. */ PL_reginput = locinput; @@ -3114,7 +3150,7 @@ S_regmatch(pTHX_ regnode *prog) if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS) ln--; } - REGCP_SET; + REGCP_SET(lastcp); if (paren) { while (n >= ln) { /* If it could work, try it. */ @@ -3123,7 +3159,7 @@ S_regmatch(pTHX_ regnode *prog) UCHARAT(PL_reginput) == c2) { TRYPAREN(paren, n, PL_reginput); - REGCP_UNWIND; + REGCP_UNWIND(lastcp); } /* Couldn't or didn't -- back up. */ n--; @@ -3138,7 +3174,7 @@ S_regmatch(pTHX_ regnode *prog) UCHARAT(PL_reginput) == c2) { TRYPAREN(paren, n, PL_reginput); - REGCP_UNWIND; + REGCP_UNWIND(lastcp); } /* Couldn't or didn't -- back up. */ n--; @@ -3156,7 +3192,7 @@ S_regmatch(pTHX_ regnode *prog) CHECKPOINT cp, lastcp; cp = regcppush(0); /* Save *all* the positions. */ - REGCP_SET; + REGCP_SET(lastcp); regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of the caller. */ PL_reginput = locinput; /* Make position available to @@ -3169,7 +3205,7 @@ S_regmatch(pTHX_ regnode *prog) regcpblow(cp); sayYES; } - REGCP_UNWIND; + REGCP_UNWIND(lastcp); regcppop(); PL_reg_call_cc = cur_call_cc; PL_regcc = cctmp; @@ -3276,6 +3312,7 @@ S_regmatch(pTHX_ regnode *prog) PTR2UV(scan), OP(scan)); Perl_croak(aTHX_ "regexp memory corruption"); } + reenter: scan = next; } @@ -3301,6 +3338,11 @@ yes: #ifdef DEBUGGING PL_regindent--; #endif + +#if 0 /* Breaks $^R */ + if (unwind) + regcpblow(firstcp); +#endif return 1; no: @@ -3312,6 +3354,55 @@ no: goto do_no; no_final: do_no: + if (unwind) { + re_unwind_t *uw = SSPTRt(unwind,re_unwind_t); + + switch (uw->type) { + case RE_UNWIND_BRANCH: + case RE_UNWIND_BRANCHJ: + { + re_unwind_branch_t *uwb = &(uw->branch); + I32 lastparen = uwb->lastparen; + + REGCP_UNWIND(uwb->lastcp); + for (n = *PL_reglastparen; n > lastparen; n--) + PL_regendp[n] = -1; + *PL_reglastparen = n; + scan = next = uwb->next; + if ( !scan || + OP(scan) != (uwb->type == RE_UNWIND_BRANCH + ? BRANCH : BRANCHJ) ) { /* Failure */ + unwind = uwb->prev; +#ifdef DEBUGGING + PL_regindent--; +#endif + goto do_no; + } + /* Have more choice yet. Reuse the same uwb. */ + /*SUPPRESS 560*/ + if ((n = (uwb->type == RE_UNWIND_BRANCH + ? NEXT_OFF(next) : ARG(next)))) + next += n; + else + next = NULL; /* XXXX Needn't unwinding in this case... */ + uwb->next = next; + next = NEXTOPER(scan); + if (uwb->type == RE_UNWIND_BRANCHJ) + next = NEXTOPER(next); + locinput = uwb->locinput; + nextchr = uwb->nextchr; +#ifdef DEBUGGING + PL_regindent = uwb->regindent; +#endif + + goto reenter; + } + /* NOT REACHED */ + default: + Perl_croak(aTHX_ "regexp unwind memory corruption"); + } + /* NOT REACHED */ + } #ifdef DEBUGGING PL_regindent--; #endif diff --git a/scope.h b/scope.h index e6a4209..3aad0be 100644 --- a/scope.h +++ b/scope.h @@ -173,11 +173,14 @@ Closing bracket on a callback. See C and L. * SSPTR() converts the index returned by SSNEW/SSNEWa() into a pointer. */ -#define SSNEW(size) save_alloc(size, 0) -#define SSNEWa(size,align) save_alloc(size, \ +#define SSNEW(size) save_alloc(aTHX_ (size), 0) +#define SSNEWt(n,t) SSNEW((n)*sizeof(t)) +#define SSNEWa(size,align) save_alloc(aTHX_ (size), \ (align - ((int)((caddr_t)&PL_savestack[PL_savestack_ix]) % align)) % align) +#define SSNEWat(n,t,align) SSNEWa((n)*sizeof(t), align) -#define SSPTR(off,type) ((type) ((char*)PL_savestack + off)) +#define SSPTR(off,type) ((type) ((char*)PL_savestack + off)) +#define SSPTRt(off,type) ((type*) ((char*)PL_savestack + off)) /* A jmpenv packages the state required to perform a proper non-local jump. * Note that there is a start_env initialized when perl starts, and top_env diff --git a/t/op/pat.t b/t/op/pat.t index f0cb7dc..c6555f7 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -496,7 +496,7 @@ $test++; $_ = 'xabcx'; foreach $ans ('', 'c') { /(?<=(?=a)..)((?=c)|.)/g; - print "not " unless $1 eq $ans; + print "# \$1 ='$1'\n# \$ans='$ans'\nnot " unless $1 eq $ans; print "ok $test\n"; $test++; } @@ -504,7 +504,7 @@ foreach $ans ('', 'c') { $_ = 'a'; foreach $ans ('', 'a', '') { /^|a|$/g; - print "not " unless $& eq $ans; + print "# \$& ='$&'\n# \$ans='$ans'\nnot " unless $& eq $ans; print "ok $test\n"; $test++; }