From: Dave Mitchell Date: Sun, 24 Sep 2006 22:31:59 +0000 (+0000) Subject: fix regression introduced in #27778: must backtrack into inner regex X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=faec1544a062a64ce2cc9a557a1d8f35ddbe6c18;p=p5sagit%2Fp5-mst-13.2.git fix regression introduced in #27778: must backtrack into inner regex "aa" =~ /(??{"a+"})a/ p4raw-id: //depot/perl@28884 --- diff --git a/regexec.c b/regexec.c index f1cd8a3..7fbd1db 100644 --- a/regexec.c +++ b/regexec.c @@ -77,7 +77,7 @@ #define RF_tainted 1 /* tainted information used? */ #define RF_warned 2 /* warned about big count? */ -#define RF_evaled 4 /* Did an EVAL with setting? */ + #define RF_utf8 8 /* Pattern contains multibyte chars? */ #define UTF ((PL_reg_flags & RF_utf8) != 0) @@ -2384,8 +2384,8 @@ S_push_slab(pTHX) /* *** every FOO_fail should = FOO+1 */ #define TRIE_next (REGNODE_MAX+1) #define TRIE_next_fail (REGNODE_MAX+2) -#define EVAL_A (REGNODE_MAX+3) -#define EVAL_A_fail (REGNODE_MAX+4) +#define EVAL_AB (REGNODE_MAX+3) +#define EVAL_AB_fail (REGNODE_MAX+4) #define resume_CURLYX (REGNODE_MAX+5) #define resume_WHILEM1 (REGNODE_MAX+6) #define resume_WHILEM2 (REGNODE_MAX+7) @@ -2530,6 +2530,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) int depth = 0; /* depth of recursion */ regmatch_state *yes_state = NULL; /* state to pop to on success of subpattern */ + regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */ struct regmatch_state *cur_curlyx = NULL; /* most recent curlyx */ U32 state_num; @@ -3426,19 +3427,27 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) PL_reg_maxiter = 0; st->logical = 0; - ST.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^ - ((re->reganch & ROPT_UTF8) != 0); - if (ST.toggleutf) PL_reg_flags ^= RF_utf8; + ST.toggle_reg_flags = PL_reg_flags; + if (re->reganch & ROPT_UTF8) + PL_reg_flags |= RF_utf8; + else + PL_reg_flags &= ~RF_utf8; + ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */ + ST.prev_rex = rex; + ST.prev_curlyx = cur_curlyx; rex = re; - + cur_curlyx = NULL; ST.B = next; + ST.prev_eval = cur_eval; + cur_eval = st; + DEBUG_EXECUTE_r( debug_start_match(re, do_utf8, locinput, PL_regeol, "Matching embedded"); ); - /* now continue from first node in postoned RE */ - PUSH_YES_STATE_GOTO(EVAL_A, re->program + 1); + /* now continue from first node in postoned RE */ + PUSH_YES_STATE_GOTO(EVAL_AB, re->program + 1); /* NOTREACHED */ } /* /(?(?{...})X|Y)/ */ @@ -3447,38 +3456,31 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) break; } - case EVAL_A: /* successfully ran inner rex (??{rex}) */ - if (ST.toggleutf) - PL_reg_flags ^= RF_utf8; + case EVAL_AB: /* cleanup after a successful (??{A})B */ + /* note: this is called twice; first after popping B, then A */ + PL_reg_flags ^= ST.toggle_reg_flags; ReREFCNT_dec(rex); rex = ST.prev_rex; + regcpblow(ST.cp); + cur_eval = ST.prev_eval; + cur_curlyx = ST.prev_curlyx; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; - /* Restore parens of the caller without popping the - * savestack */ - { - const I32 tmp = PL_savestack_ix; - PL_savestack_ix = ST.lastcp; - regcppop(rex); - PL_savestack_ix = tmp; - } - PL_reginput = locinput; - /* continue at the node following the (??{...}) */ - scan = ST.B; - continue; - case EVAL_A_fail: /* unsuccessfully ran inner rex (??{rex}) */ - /* Restore state to the outer re then re-throw the failure */ - if (ST.toggleutf) - PL_reg_flags ^= RF_utf8; - ReREFCNT_dec(rex); - rex = ST.prev_rex; + sayYES_FINAL; - /* XXXX This is too dramatic a measure... */ - PL_reg_maxiter = 0; + case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */ + /* note: this is called twice; first after popping B, then A */ + PL_reg_flags ^= ST.toggle_reg_flags; + ReREFCNT_dec(rex); + rex = ST.prev_rex; PL_reginput = locinput; REGCP_UNWIND(ST.lastcp); regcppop(rex); + cur_eval = ST.prev_eval; + cur_curlyx = ST.prev_curlyx; + /* XXXX This is too dramatic a measure... */ + PL_reg_maxiter = 0; sayNO_SILENT; #undef ST @@ -4344,6 +4346,39 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) case END: + if (cur_eval) { + /* we've just finished A in /(??{A})B/; now continue with B */ + I32 tmpix; + + + st->u.eval.toggle_reg_flags + = cur_eval->u.eval.toggle_reg_flags; + PL_reg_flags ^= st->u.eval.toggle_reg_flags; + + st->u.eval.prev_rex = rex; /* inner */ + rex = cur_eval->u.eval.prev_rex; /* outer */ + cur_curlyx = cur_eval->u.eval.prev_curlyx; + ReREFCNT_inc(rex); + st->u.eval.cp = regcppush(0); /* Save *all* the positions. */ + REGCP_SET(st->u.eval.lastcp); + PL_reginput = locinput; + + /* Restore parens of the outer rex without popping the + * savestack */ + tmpix = PL_savestack_ix; + PL_savestack_ix = cur_eval->u.eval.lastcp; + regcppop(rex); + PL_savestack_ix = tmpix; + + st->u.eval.prev_eval = cur_eval; + cur_eval = cur_eval->u.eval.prev_eval; + DEBUG_EXECUTE_r( + PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ...\n", + REPORT_CODE_OFF+PL_regindent*2, "");); + PUSH_YES_STATE_GOTO(EVAL_AB, + st->u.eval.prev_eval->u.eval.B); /* match B */ + } + if (locinput < reginfo->till) { DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n", @@ -4543,7 +4578,7 @@ yes_final: PL_regmatch_state = st; switch (st->resume_state) { - case EVAL_A: + case EVAL_AB: case IFMATCH_A: case CURLYM_A: state_num = st->resume_state; @@ -4603,7 +4638,7 @@ yes: case TRIE_next: case CURLYM_A: case CURLYM_B: - case EVAL_A: + case EVAL_AB: case IFMATCH_A: case BRANCH_next: case CURLY_B_max: @@ -4665,7 +4700,7 @@ do_no: goto resume_point_WHILEM6; case TRIE_next: - case EVAL_A: + case EVAL_AB: case BRANCH_next: case CURLYM_A: case CURLYM_B: diff --git a/regexp.h b/regexp.h index 7f82199..36b2f7f 100644 --- a/regexp.h +++ b/regexp.h @@ -219,8 +219,12 @@ typedef struct regmatch_state { struct { /* this first element must match u.yes */ struct regmatch_state *prev_yes_state; + struct regmatch_state *prev_eval; + struct regmatch_state *prev_curlyx; regexp *prev_rex; - int toggleutf; + U32 toggle_reg_flags; /* what bits in PL_reg_flags to + flip when transitioning between + inner and outer rexen */ CHECKPOINT cp; /* remember current savestack indexes */ CHECKPOINT lastcp; regnode *B; /* the node following us */ diff --git a/t/op/pat.t b/t/op/pat.t index 97bad61..303e448 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -3627,11 +3627,16 @@ EOFTEST /[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/, 'Intermixed named and unicode escapes'); } +$brackets = qr{ + { (?> [^{}]+ | (??{ $brackets }) )* } + }x; +ok("{b{c}d" !~ m/^((??{ $brackets }))/, "bracket mismatch"); + # Keep the following test last -- it may crash perl ok(("a" x (2**15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker: #24274") or print "# Unexpected outcome: should pass or crash perl\n"; # Don't forget to update this! -BEGIN{print "1..1251\n"}; +BEGIN{print "1..1252\n"}; diff --git a/t/op/re_tests b/t/op/re_tests index 2dedaef..3ff5a73 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -1004,3 +1004,15 @@ X(?=foo) ..XfooY.. y pos 3 X(?<=foo.)[YZ] ..XfooXY.. y pos 8 (?=XY*foo) Xfoo y pos 0 ^(?=XY*foo) Xfoo y pos 0 +^(??{"a+"})a aa y $& aa +^(?:(??{"a+"})|b)a aa y $& aa +^(??{chr 0x100}).$ \x{100}\x{100} y $& \x{100}\x{100} +^(??{q(\x{100})}). \x{100}\x{100} y $& \x{100}\x{100} +^(??{q(.+)})\x{100} \x{100}\x{100} y $& \x{100}\x{100} +^(??{q(.)})\x{100} \x{100}\x{100} y $& \x{100}\x{100} +^(??{chr 0x100})\xbb \x{100}\x{bb} y $& \x{100}\x{bb} +^(.)(??{"(.)(.)"})(.)$ abcd y $1-$2 a-d +^(.)(??{"(bz+|.)(.)"})(.)$ abcd y $1-$2 a-d +^(.)((??{"(.)(cz+)"})|.) abcd y $1-$2 a-b +^a(?>(??{q(b)}))(??{q(c)})d abcd y - - +^x(??{""})+$ x y $& x