From: Dave Mitchell Date: Sun, 16 Apr 2006 23:30:36 +0000 (+0000) Subject: generalize the 'continue on success' mechanism of deferred REs X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=77cb431f8c908a31b286bd0ba26649cb6041bd72;p=p5sagit%2Fp5-mst-13.2.git generalize the 'continue on success' mechanism of deferred REs p4raw-id: //depot/perl@27855 --- diff --git a/regexec.c b/regexec.c index acac46a..4d0388d 100644 --- a/regexec.c +++ b/regexec.c @@ -2486,7 +2486,8 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) regnode *inner; /* Next node in internal branch. */ int depth = 0; /* depth of recursion */ regmatch_state *newst; /* when pushing a state, this is the new one */ - regmatch_state *cur_eval = NULL; /* most recent (??{}) state */ + regmatch_state *yes_state = NULL; /* state to pop to on success of + subpattern */ #ifdef DEBUGGING SV *re_debug_flags = NULL; @@ -3402,10 +3403,9 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) st->u.eval.prev_rex = rex; rex = re; - st->u.eval.prev_eval = cur_eval; - st->u.eval.prev_slab = PL_regmatch_slab; - st->u.eval.depth = depth; - cur_eval = st; + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; PUSH_STATE(newst, resume_EVAL); st = newst; @@ -4254,48 +4254,6 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) sayNO; break; case END: - if (cur_eval) { - /* we have successfully completed the execution of a - * postponed re. Pop all states back to the last EVAL - * then continue with the node following the (??{...}) - */ - - /* this simulates a POP_STATE, except that it pops several - * levels, and doesn't restore locinput */ - - st = cur_eval; - PL_regmatch_slab = st->u.eval.prev_slab; - cur_eval = st->u.eval.prev_eval; - depth = st->u.eval.depth; - - PL_regmatch_state = st; - scan = st->scan; - next = st->next; - n = st->n; - - if (st->u.eval.toggleutf) - PL_reg_flags ^= RF_utf8; - ReREFCNT_dec(rex); - rex = st->u.eval.prev_rex; - /* XXXX This is too dramatic a measure... */ - PL_reg_maxiter = 0; - - /* Restore parens of the caller without popping the - * savestack */ - { - I32 tmp = PL_savestack_ix; - PL_savestack_ix = st->u.eval.lastcp; - regcppop(rex); - PL_savestack_ix = tmp; - } - - - PL_reginput = locinput; - /* resume at node following the (??{...}) */ - break; - - } - if (locinput < reginfo->till) { DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n", @@ -4429,6 +4387,52 @@ yes_loud: ); goto yes; yes_final: + + if (yes_state) { + /* we have successfully completed a subexpression, but we must now + * pop to the state marked by yes_state and continue from there */ + + assert(st != yes_state); + while (yes_state < SLAB_FIRST(PL_regmatch_slab) + || yes_state > SLAB_LAST(PL_regmatch_slab)) + { + /* not in this slab, pop slab */ + depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1); + PL_regmatch_slab = PL_regmatch_slab->prev; + st = SLAB_LAST(PL_regmatch_slab); + } + depth -= (st - yes_state); + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE TO (%d)\n", depth)); \ + st = yes_state; + yes_state = st->u.yes.prev_yes_state; + PL_regmatch_state = st; + + switch (st->resume_state) { + case resume_EVAL: + if (st->u.eval.toggleutf) + PL_reg_flags ^= RF_utf8; + ReREFCNT_dec(rex); + rex = st->u.eval.prev_rex; + /* XXXX This is too dramatic a measure... */ + PL_reg_maxiter = 0; + /* Restore parens of the caller without popping the + * savestack */ + { + I32 tmp = PL_savestack_ix; + PL_savestack_ix = st->u.eval.lastcp; + regcppop(rex); + PL_savestack_ix = tmp; + } + PL_reginput = locinput; + /* continue at the node following the (??{...}) */ + next = st->next; + goto reenter; + + default: + Perl_croak(aTHX_ "unexpected yes reume state"); + } + } + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n", PL_colors[4], PL_colors[5])); yes: @@ -4438,7 +4442,8 @@ yes: result = 1; /* XXX this is duplicate(ish) code to that in the do_no section. - * eventually a yes should just pop the whole stack */ + * eventually a yes should just pop the stack back to the current + * yes_state */ if (depth) { /* restore previous state and re-enter */ POP_STATE; @@ -4448,8 +4453,6 @@ yes: goto resume_point_TRIE1; case resume_TRIE2: goto resume_point_TRIE2; - case resume_EVAL: - break; case resume_CURLYX: goto resume_point_CURLYX; case resume_WHILEM1: @@ -4482,6 +4485,8 @@ yes: goto resume_point_PLUS3; case resume_PLUS4: goto resume_point_PLUS4; + + case resume_EVAL: default: Perl_croak(aTHX_ "regexp resume memory corruption"); } @@ -4567,7 +4572,7 @@ do_no: PL_reg_flags ^= RF_utf8; ReREFCNT_dec(rex); rex = st->u.eval.prev_rex; - cur_eval = st->u.eval.prev_eval; + yes_state = st->u.yes.prev_yes_state; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; diff --git a/regexp.h b/regexp.h index 0491d34..455f048 100644 --- a/regexp.h +++ b/regexp.h @@ -214,20 +214,26 @@ typedef struct regmatch_state { I32 ln; /* len or last */ union { + + /* this is a fake union member that matches the first element + * of each member that needs to store positive backtrack + * information */ + struct { + struct regmatch_state *prev_yes_state; + } yes; + struct { reg_trie_accepted *accept_buff; U32 accepted; /* how many accepting states we have seen */ } trie; struct { + /* this first element must match u.yes */ + struct regmatch_state *prev_yes_state; regexp *prev_rex; int toggleutf; CHECKPOINT cp; /* remember current savestack indexes */ CHECKPOINT lastcp; - struct regmatch_state *prev_eval; /* save cur_eval */ - struct regmatch_slab *prev_slab; - int depth; - } eval; struct {