From: Dave Mitchell Date: Wed, 12 Apr 2006 22:43:03 +0000 (+0000) Subject: Remove the final recursion path from S_regmatch: (??{...}) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=aa283a383ef6540d57dd786b93d8ba9bd303e3e6;p=p5sagit%2Fp5-mst-13.2.git Remove the final recursion path from S_regmatch: (??{...}) Also put PUSH/POP_STATE infastructure in place Also eliminate PL_reg_call_cc (only another 440 global vars to go ...) p4raw-id: //depot/perl@27778 --- diff --git a/regexec.c b/regexec.c index 61a0115..71eab5b 100644 --- a/regexec.c +++ b/regexec.c @@ -1659,9 +1659,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* see how far we have to get to not match where we matched before */ PL_regtill = startpos+minend; - /* We start without call_cc context. */ - PL_reg_call_cc = 0; - /* If there is a "must appear" string, look for it. */ s = startpos; @@ -2307,6 +2304,44 @@ S_push_slab(pTHX) goto start_recurse; \ resume_point_##where: + +/* push a new regex state. Set newst to point to it */ + +#define PUSH_STATE(newst, resume) \ + depth++; \ + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \ + st->scan = scan; \ + st->next = next; \ + st->n = n; \ + st->locinput = locinput; \ + st->resume_state = resume; \ + newst = st+1; \ + if (newst > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) \ + newst = S_push_slab(aTHX); \ + PL_regmatch_state = newst; \ + newst->cc = 0; \ + newst->minmod = 0; \ + newst->sw = 0; \ + newst->logical = 0; \ + newst->unwind = 0; \ + locinput = PL_reginput; \ + nextchr = UCHARAT(locinput); + +#define POP_STATE \ + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \ + depth--; \ + st--; \ + if (st < &PL_regmatch_slab->states[0]) { \ + PL_regmatch_slab = PL_regmatch_slab->prev; \ + st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]; \ + } \ + PL_regmatch_state = st; \ + scan = st->scan; \ + next = st->next; \ + n = st->n; \ + locinput = st->locinput; \ + nextchr = UCHARAT(locinput); + /* - regmatch - main matching routine * @@ -2437,6 +2472,8 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) bool result; /* return value of S_regmatch */ regnode *inner; /* Next node in internal branch. */ int depth = 0; /* depth of recursion */ + regmatch_state *newst; /* when pushing a state, this is the new one */ + regmatch_state *cur_eval = NULL; /* most recent (??{}) state */ #ifdef DEBUGGING SV *re_debug_flags = NULL; @@ -3290,9 +3327,6 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) } if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */ regexp *re; - re_cc_state state; - int toggleutf; - { /* extract RE object from returned value; compiling if * necessary */ @@ -3329,6 +3363,9 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) PL_regsize = osize; } } + + /* run the pattern returned from (??{...}) */ + DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "Entering embedded \"%s%.60s%s%s\"\n", @@ -3337,62 +3374,35 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) PL_colors[1], (strlen(re->precomp) > 60 ? "..." : "")) ); - state.node = next; - state.prev = PL_reg_call_cc; - state.cc = st->cc; - state.re = PL_reg_re; - st->cc = 0; - st->u.eval.cp = regcppush(0); /* Save *all* the positions. */ REGCP_SET(st->u.eval.lastcp); - PL_reg_re = re; - state.ss = PL_savestack_ix; *PL_reglastparen = 0; *PL_reglastcloseparen = 0; - PL_reg_call_cc = &state; PL_reginput = locinput; - toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^ - ((re->reganch & ROPT_UTF8) != 0); - if (toggleutf) PL_reg_flags ^= RF_utf8; - - /* XXXX This is too dramatic a measure... */ - PL_reg_maxiter = 0; - - /* XXX the only recursion left in regmatch() */ - if (regmatch(re, re->program + 1)) { - /* Even though we succeeded, we need to restore - global variables, since we may be wrapped inside - SUSPEND, thus the match may be not finished yet. */ - - /* XXXX Do this only if SUSPENDed? */ - PL_reg_call_cc = state.prev; - st->cc = state.cc; - PL_reg_re = state.re; - - if (toggleutf) PL_reg_flags ^= RF_utf8; - - /* XXXX This is too dramatic a measure... */ - PL_reg_maxiter = 0; - - /* These are needed even if not SUSPEND. */ - ReREFCNT_dec(re); - regcpblow(st->u.eval.cp); - sayYES; - } - ReREFCNT_dec(re); - REGCP_UNWIND(st->u.eval.lastcp); - regcppop(rex); - PL_reg_call_cc = state.prev; - st->cc = state.cc; - PL_reg_re = state.re; - if (toggleutf) PL_reg_flags ^= RF_utf8; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; st->logical = 0; - sayNO; + st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^ + ((re->reganch & ROPT_UTF8) != 0); + if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8; + st->u.eval.prev_rex = rex; + assert(rex == PL_reg_re); /* XXX */ + rex = re; + PL_reg_re = rex; /* XXX */ + + st->u.eval.prev_eval = cur_eval; + st->u.eval.prev_slab = PL_regmatch_slab; + st->u.eval.depth = depth; + cur_eval = st; + PUSH_STATE(newst, resume_EVAL); + st = newst; + + /* now continue from first node in postoned RE */ + next = re->program + 1; + break; /* NOTREACHED */ } /* /(?(?{...})X|Y)/ */ @@ -4234,49 +4244,49 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) sayNO; break; case END: - if (PL_reg_call_cc) { - st->u.end.cur_call_cc = PL_reg_call_cc; - st->u.end.end_re = PL_reg_re; + if (cur_eval) { + /* we have successfully completed the execution of a + * postponed re. Pop all states back to the last EVAL + * then continue with the node following the (??{...}) + */ + + /* this simulates a POP_STATE, except that it pops several + * levels, and doesn't restore locinput */ - /* Save *all* the positions. */ - st->u.end.cp = regcppush(0); - REGCP_SET(st->u.end.lastcp); + st = cur_eval; + PL_regmatch_slab = st->u.eval.prev_slab; + cur_eval = st->u.eval.prev_eval; + depth = st->u.eval.depth; - /* Restore parens of the caller. */ + PL_regmatch_state = st; + scan = st->scan; + next = st->next; + n = st->n; + + if (st->u.eval.toggleutf) + PL_reg_flags ^= RF_utf8; + ReREFCNT_dec(rex); + rex = st->u.eval.prev_rex; + PL_reg_re = rex; /* XXX */ + /* XXXX This is too dramatic a measure... */ + PL_reg_maxiter = 0; + + /* Restore parens of the caller without popping the + * savestack */ { I32 tmp = PL_savestack_ix; - PL_savestack_ix = PL_reg_call_cc->ss; + PL_savestack_ix = st->u.eval.lastcp; regcppop(rex); PL_savestack_ix = tmp; } - /* Make position available to the callcc. */ - PL_reginput = locinput; - PL_reg_re = PL_reg_call_cc->re; - st->u.end.savecc = st->cc; - st->cc = PL_reg_call_cc->cc; - PL_reg_call_cc = PL_reg_call_cc->prev; - REGMATCH(st->u.end.cur_call_cc->node, END); - /*** all unsaved local vars undefined at this point */ - if (result) { - PL_reg_call_cc = st->u.end.cur_call_cc; - regcpblow(st->u.end.cp); - sayYES; - } - REGCP_UNWIND(st->u.end.lastcp); - regcppop(rex); - PL_reg_call_cc = st->u.end.cur_call_cc; - st->cc = st->u.end.savecc; - PL_reg_re = st->u.end.end_re; + PL_reginput = locinput; + /* resume at node following the (??{...}) */ + break; - DEBUG_EXECUTE_r( - PerlIO_printf(Perl_debug_log, - "%*s continuation failed...\n", - REPORT_CODE_OFF+PL_regindent*2, "") - ); - sayNO_SILENT; } + if (locinput < PL_regtill) { DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n", @@ -4378,7 +4388,6 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) oldst->next = next; oldst->n = n; oldst->locinput = locinput; - oldst->reg_call_cc = PL_reg_call_cc; st->cc = oldst->cc; locinput = PL_reginput; @@ -4393,6 +4402,8 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) } } + + /* * We get here only if there's trouble -- normally "case END" is * the terminating point. @@ -4417,7 +4428,56 @@ yes: #endif result = 1; - goto exit_level; + /* XXX this is duplicate(ish) code to that in the do_no section. + * eventually a yes should just pop the whole stack */ + if (depth) { + /* restore previous state and re-enter */ + POP_STATE; + + switch (st->resume_state) { + case resume_TRIE1: + goto resume_point_TRIE1; + case resume_TRIE2: + goto resume_point_TRIE2; + case resume_EVAL: + break; + case resume_CURLYX: + goto resume_point_CURLYX; + case resume_WHILEM1: + goto resume_point_WHILEM1; + case resume_WHILEM2: + goto resume_point_WHILEM2; + case resume_WHILEM3: + goto resume_point_WHILEM3; + case resume_WHILEM4: + goto resume_point_WHILEM4; + case resume_WHILEM5: + goto resume_point_WHILEM5; + case resume_WHILEM6: + goto resume_point_WHILEM6; + case resume_CURLYM1: + goto resume_point_CURLYM1; + case resume_CURLYM2: + goto resume_point_CURLYM2; + case resume_CURLYM3: + goto resume_point_CURLYM3; + case resume_CURLYM4: + goto resume_point_CURLYM4; + case resume_IFMATCH: + goto resume_point_IFMATCH; + case resume_PLUS1: + goto resume_point_PLUS1; + case resume_PLUS2: + goto resume_point_PLUS2; + case resume_PLUS3: + goto resume_point_PLUS3; + case resume_PLUS4: + goto resume_point_PLUS4; + default: + Perl_croak(aTHX_ "regexp resume memory corruption"); + } + } + goto final_exit; no: DEBUG_EXECUTE_r( @@ -4476,33 +4536,38 @@ do_no: } /* NOTREACHED */ } + #ifdef DEBUGGING PL_regindent--; #endif result = 0; -exit_level: - - if (depth--) { - /* restore previous state and re-enter */ - st--; - if (st < &PL_regmatch_slab->states[0]) { - PL_regmatch_slab = PL_regmatch_slab->prev; - st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]; - } - PL_regmatch_state = st; - - PL_reg_call_cc = st->reg_call_cc; - scan = st->scan; - next = st->next; - n = st->n; - locinput = st->locinput; - nextchr = UCHARAT(locinput); + if (depth) { + /* there's a previous state to backtrack to */ + POP_STATE; switch (st->resume_state) { case resume_TRIE1: goto resume_point_TRIE1; case resume_TRIE2: goto resume_point_TRIE2; + case resume_EVAL: + /* we have failed an (??{...}). Restore state to the outer re + * then re-throw the failure */ + if (st->u.eval.toggleutf) + PL_reg_flags ^= RF_utf8; + ReREFCNT_dec(rex); + rex = st->u.eval.prev_rex; + PL_reg_re = rex; /* XXX */ + cur_eval = st->u.eval.prev_eval; + + /* XXXX This is too dramatic a measure... */ + PL_reg_maxiter = 0; + + PL_reginput = locinput; + REGCP_UNWIND(st->u.eval.lastcp); + regcppop(rex); + goto do_no; + case resume_CURLYX: goto resume_point_CURLYX; case resume_WHILEM1: @@ -4535,13 +4600,13 @@ exit_level: goto resume_point_PLUS3; case resume_PLUS4: goto resume_point_PLUS4; - case resume_END: - goto resume_point_END; default: Perl_croak(aTHX_ "regexp resume memory corruption"); } - /* NOTREACHED */ } + +final_exit: + /* restore original high-water mark */ PL_regmatch_slab = orig_slab; PL_regmatch_state = orig_state; diff --git a/regexp.h b/regexp.h index 57f8d5d..bb02d22 100644 --- a/regexp.h +++ b/regexp.h @@ -159,19 +159,10 @@ typedef struct _reg_trie_accepted reg_trie_accepted; typedef I32 CHECKPOINT; -typedef struct re_cc_state -{ - I32 ss; - regnode *node; - struct re_cc_state *prev; - struct regmatch_state *cc; /* state corresponding to the current curly */ - regexp *re; -} re_cc_state; - - typedef enum { resume_TRIE1, resume_TRIE2, + resume_EVAL, resume_CURLYX, resume_WHILEM1, resume_WHILEM2, @@ -187,8 +178,7 @@ typedef enum { resume_PLUS1, resume_PLUS2, resume_PLUS3, - resume_PLUS4, - resume_END + resume_PLUS4 } regmatch_resume_states; @@ -200,7 +190,7 @@ typedef struct regmatch_state { regmatch_resume_states resume_state; /* where to jump to on return */ regnode *scan; /* Current node. */ regnode *next; /* Next node. */ - bool minmod; /* the next "{n.m}" is a "{n,m}?" */ + bool minmod; /* the next "{n,m}" is a "{n,m}?" */ bool sw; /* the condition value in (?(cond)a|b) */ int logical; I32 unwind; /* savestack index of current unwind block */ @@ -219,8 +209,14 @@ typedef struct regmatch_state { } trie; struct { - CHECKPOINT cp; /* remember current savestack indexes */ - CHECKPOINT lastcp; + regexp *prev_rex; + int toggleutf; + CHECKPOINT cp; /* remember current savestack indexes */ + CHECKPOINT lastcp; + struct regmatch_state *prev_eval; /* save cur_eval */ + struct regmatch_slab *prev_slab; + int depth; + } eval; struct { @@ -263,17 +259,7 @@ typedef struct regmatch_state { char *old; int count; } plus; /* and CURLYN/CURLY/STAR */ - - struct { - CHECKPOINT cp; /* remember current savestack indexes */ - CHECKPOINT lastcp; - struct regmatch_state *savecc; - re_cc_state *cur_call_cc; - regexp *end_re; - } end; } u; - - re_cc_state *reg_call_cc; /* saved value of PL_reg_call_cc */ } regmatch_state; /* how many regmatch_state structs to allocate as a single slab. @@ -303,7 +289,6 @@ typedef struct regmatch_slab { #define PL_reg_eval_set PL_reg_state.re_state_reg_eval_set #define PL_regnarrate PL_reg_state.re_state_regnarrate #define PL_regindent PL_reg_state.re_state_regindent -#define PL_reg_call_cc PL_reg_state.re_state_reg_call_cc #define PL_reg_re PL_reg_state.re_state_reg_re #define PL_reg_ganch PL_reg_state.re_state_reg_ganch #define PL_reg_sv PL_reg_state.re_state_reg_sv @@ -338,7 +323,6 @@ struct re_save_state { I32 re_state_reg_eval_set; /* from regexec.c */ I32 re_state_regnarrate; /* from regexec.c */ int re_state_regindent; /* from regexec.c */ - struct re_cc_state *re_state_reg_call_cc; /* from regexec.c */ regexp *re_state_reg_re; /* from regexec.c */ char *re_state_reg_ganch; /* from regexec.c */ SV *re_state_reg_sv; /* from regexec.c */ diff --git a/sv.c b/sv.c index cf42029..d6135ed 100644 --- a/sv.c +++ b/sv.c @@ -10657,8 +10657,6 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) /* I assume that it only ever "worked" because no-one called (pseudo)fork while the regexp engine had re-entered itself. */ - new_state->re_state_reg_call_cc - = any_dup(old_state->re_state_reg_call_cc, proto_perl); new_state->re_state_reg_re = any_dup(old_state->re_state_reg_re, proto_perl); new_state->re_state_reg_ganch