X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=daa8e005232f64405adca71f8bb7392c6f6fcbd1;hb=0fa0c92d7e11919cd6e656e650a639f346cb3ba1;hp=cad8f61520ee949d9a0d23ce8fc65f0aa1f24a3d;hpb=ded05c2a789e70bb7204e21b2aa98c6d1ac776c2;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index cad8f61..daa8e00 100644 --- a/regexec.c +++ b/regexec.c @@ -122,8 +122,11 @@ /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */ /* for use after a quantifier and before an EXACT-like node -- japhy */ -#define JUMPABLE(rn) ( \ - OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \ +/* it would be nice to rework regcomp.sym to generate this stuff. sigh */ +#define JUMPABLE(rn) ( \ + OP(rn) == OPEN || \ + (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \ + OP(rn) == EVAL || \ OP(rn) == SUSPEND || OP(rn) == IFMATCH || \ OP(rn) == PLUS || OP(rn) == MINMOD || \ OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \ @@ -498,7 +501,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, /* end shift should be non negative here */ } -#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */ +#ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */ if (end_shift < 0) Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ", (IV)end_shift, prog->precomp); @@ -1646,6 +1649,33 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, return s; } +void +S_swap_match_buff (pTHX_ regexp *prog) { + I32 *t; + RXi_GET_DECL(prog,progi); + + if (!progi->swap) { + /* We have to be careful. If the previous successful match + was from this regex we don't want a subsequent paritally + successful match to clobber the old results. + So when we detect this possibility we add a swap buffer + to the re, and switch the buffer each match. If we fail + we switch it back, otherwise we leave it swapped. + */ + Newxz(progi->swap, 1, regexp_paren_ofs); + /* no need to copy these */ + Newxz(progi->swap->startp, prog->nparens + 1, I32); + Newxz(progi->swap->endp, prog->nparens + 1, I32); + } + t = progi->swap->startp; + progi->swap->startp = prog->startp; + prog->startp = t; + t = progi->swap->endp; + progi->swap->endp = prog->endp; + prog->endp = t; +} + + /* - regexec_flags - match a regexp against a string */ @@ -1674,6 +1704,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * I32 multiline; RXi_GET_DECL(prog,progi); regmatch_info reginfo; /* create some info to pass to regtry etc */ + bool swap_on_fail = 0; GET_RE_DEBUG_FLAGS_DECL; @@ -1751,26 +1782,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * reginfo.ganch = strbeg; } if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) { - I32 *t; - if (!progi->swap) { - /* We have to be careful. If the previous successful match - was from this regex we don't want a subsequent paritally - successful match to clobber the old results. - So when we detect this possibility we add a swap buffer - to the re, and switch the buffer each match. If we fail - we switch it back, otherwise we leave it swapped. - */ - Newxz(progi->swap, 1, regexp_paren_ofs); - /* no need to copy these */ - Newxz(progi->swap->startp, prog->nparens + 1, I32); - Newxz(progi->swap->endp, prog->nparens + 1, I32); - } - t = progi->swap->startp; - progi->swap->startp = prog->startp; - prog->startp = t; - t = progi->swap->endp; - progi->swap->endp = prog->endp; - prog->endp = t; + swap_on_fail = 1; + swap_match_buff(prog); /* do we need a save destructor here for + eval dies? */ } if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) { re_scream_pos_data d; @@ -2120,16 +2134,10 @@ phooey: PL_colors[4], PL_colors[5])); if (PL_reg_eval_set) restore_pos(aTHX_ prog); - if (progi->swap) { + if (swap_on_fail) /* we failed :-( roll it back */ - I32 *t; - t = progi->swap->startp; - progi->swap->startp = prog->startp; - prog->startp = t; - t = progi->swap->endp; - progi->swap->endp = prog->endp; - prog->endp = t; - } + swap_match_buff(prog); + return 0; } @@ -2869,7 +2877,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) ST.B = next; ST.jump = trie->jump; ST.me = scan; - /* traverse the TRIE keeping track of all accepting states we transition through until we get to a failing node. @@ -2967,13 +2974,25 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_colors[4], (IV)ST.accepted, PL_colors[5] ); ); }} - - /* FALL THROUGH */ + goto trie_first_try; /* jump into the fail handler */ + /* NOTREACHED */ case TRIE_next_fail: /* we failed - try next alterative */ + if ( ST.jump) { + REGCP_UNWIND(ST.cp); + for (n = *PL_reglastparen; n > ST.lastparen; n--) + PL_regendp[n] = -1; + *PL_reglastparen = n; + } + trie_first_try: if (do_cutgroup) { do_cutgroup = 0; no_final = 0; } + + if ( ST.jump) { + ST.lastparen = *PL_reglastparen; + REGCP_SET(ST.cp); + } if ( ST.accepted == 1 ) { /* only one choice left - just continue */ DEBUG_EXECUTE_r({ @@ -3014,8 +3033,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) continue; /* execute rest of RE */ } - - if (!ST.accepted-- ) { + + if ( !ST.accepted-- ) { DEBUG_EXECUTE_r({ PerlIO_printf( Perl_debug_log, "%*s %sTRIE failed...%s\n", @@ -3026,7 +3045,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) FREETMPS; LEAVE; sayNO_SILENT; - } + /*NOTREACHED*/ + } /* There are at least two accepting states left. Presumably @@ -4377,6 +4397,12 @@ NULL && UCHARAT(PL_reginput) != ST.c2) { /* simulate B failing */ + DEBUG_OPTIMISE_r( + PerlIO_printf(Perl_debug_log, + "%*s CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n", + (int)(REPORT_CODE_OFF+(depth*2)),"", + (IV)ST.c1,(IV)ST.c2 + )); state_num = CURLYM_B_fail; goto reenter_switch; } @@ -5690,10 +5716,18 @@ S_to_utf8_substr(pTHX_ register regexp *prog) SV* const sv = newSVsv(prog->substrs->data[i].substr); prog->substrs->data[i].utf8_substr = sv; sv_utf8_upgrade(sv); - if (SvVALID(prog->substrs->data[i].substr)) - fbm_compile(sv, 0); - if (SvTAIL(prog->substrs->data[i].substr)) - SvTAIL_on(sv); + if (SvVALID(prog->substrs->data[i].substr)) { + const U8 flags = BmFLAGS(prog->substrs->data[i].substr); + if (flags & FBMcf_TAIL) { + /* Trim the trailing \n that fbm_compile added last + time. */ + SvCUR_set(sv, SvCUR(sv) - 1); + /* Whilst this makes the SV technically "invalid" (as its + buffer is no longer followed by "\0") when fbm_compile() + adds the "\n" back, a "\0" is restored. */ + } + fbm_compile(sv, flags); + } if (prog->substrs->data[i].substr == prog->check_substr) prog->check_utf8 = sv; } @@ -5710,10 +5744,16 @@ S_to_byte_substr(pTHX_ register regexp *prog) && !prog->substrs->data[i].substr) { SV* sv = newSVsv(prog->substrs->data[i].utf8_substr); if (sv_utf8_downgrade(sv, TRUE)) { - if (SvVALID(prog->substrs->data[i].utf8_substr)) - fbm_compile(sv, 0); - if (SvTAIL(prog->substrs->data[i].utf8_substr)) - SvTAIL_on(sv); + if (SvVALID(prog->substrs->data[i].utf8_substr)) { + const U8 flags + = BmFLAGS(prog->substrs->data[i].utf8_substr); + if (flags & FBMcf_TAIL) { + /* Trim the trailing \n that fbm_compile added last + time. */ + SvCUR_set(sv, SvCUR(sv) - 1); + } + fbm_compile(sv, flags); + } } else { SvREFCNT_dec(sv); sv = &PL_sv_undef;