X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=3d64f20ea51d4aa3280408106ae30607f067d35e;hb=f449fe8af429114912b627758de5588f04953ecc;hp=6c82ba746503f5129fdd8a3eff44d067e1261f10;hpb=bcdf74043c1fd4b60b7764f5cd7d87525cf77e74;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index 6c82ba7..3d64f20 100644 --- a/regexec.c +++ b/regexec.c @@ -122,8 +122,11 @@ /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */ /* for use after a quantifier and before an EXACT-like node -- japhy */ -#define JUMPABLE(rn) ( \ - OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \ +/* it would be nice to rework regcomp.sym to generate this stuff. sigh */ +#define JUMPABLE(rn) ( \ + OP(rn) == OPEN || \ + (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \ + OP(rn) == EVAL || \ OP(rn) == SUSPEND || OP(rn) == IFMATCH || \ OP(rn) == PLUS || OP(rn) == MINMOD || \ OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \ @@ -190,7 +193,7 @@ S_regcppush(pTHX_ I32 parenfloor) SSPUSHINT(PL_regstartp[p]); SSPUSHPTR(PL_reg_start_tmp[p]); SSPUSHINT(p); - DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, + DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log, " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n", (UV)p, (IV)PL_regstartp[p], (IV)(PL_reg_start_tmp[p] - PL_bostr), @@ -260,7 +263,7 @@ S_regcppop(pTHX_ const regexp *rex) tmps = SSPOPINT; if (paren <= *PL_reglastparen) PL_regendp[paren] = tmps; - DEBUG_EXECUTE_r( + DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log, " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n", (UV)paren, (IV)PL_regstartp[paren], @@ -269,7 +272,7 @@ S_regcppop(pTHX_ const regexp *rex) (paren > *PL_reglastparen ? "(no)" : "")); ); } - DEBUG_EXECUTE_r( + DEBUG_BUFFERS_r( if (*PL_reglastparen + 1 <= rex->nparens) { PerlIO_printf(Perl_debug_log, " restoring \\%"IVdf"..\\%"IVdf" to undef\n", @@ -1646,6 +1649,32 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, return s; } +static void +S_swap_match_buff (pTHX_ regexp *prog) { + I32 *t; + + if (!prog->swap) { + /* We have to be careful. If the previous successful match + was from this regex we don't want a subsequent paritally + successful match to clobber the old results. + So when we detect this possibility we add a swap buffer + to the re, and switch the buffer each match. If we fail + we switch it back, otherwise we leave it swapped. + */ + Newxz(prog->swap, 1, regexp_paren_ofs); + /* no need to copy these */ + Newxz(prog->swap->startp, prog->nparens + 1, I32); + Newxz(prog->swap->endp, prog->nparens + 1, I32); + } + t = prog->swap->startp; + prog->swap->startp = prog->startp; + prog->startp = t; + t = prog->swap->endp; + prog->swap->endp = prog->endp; + prog->endp = t; +} + + /* - regexec_flags - match a regexp against a string */ @@ -1674,6 +1703,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * I32 multiline; RXi_GET_DECL(prog,progi); regmatch_info reginfo; /* create some info to pass to regtry etc */ + bool swap_on_fail = 0; GET_RE_DEBUG_FLAGS_DECL; @@ -1751,26 +1781,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * reginfo.ganch = strbeg; } if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) { - I32 *t; - if (!progi->swap) { - /* We have to be careful. If the previous successful match - was from this regex we don't want a subsequent paritally - successful match to clobber the old results. - So when we detect this possibility we add a swap buffer - to the re, and switch the buffer each match. If we fail - we switch it back, otherwise we leave it swapped. - */ - Newxz(progi->swap, 1, regexp_paren_ofs); - /* no need to copy these */ - Newxz(progi->swap->startp, prog->nparens + 1, I32); - Newxz(progi->swap->endp, prog->nparens + 1, I32); - } - t = progi->swap->startp; - progi->swap->startp = prog->startp; - prog->startp = t; - t = progi->swap->endp; - progi->swap->endp = prog->endp; - prog->endp = t; + swap_on_fail = 1; + swap_match_buff(prog); /* do we need a save destructor here for + eval dies? */ } if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) { re_scream_pos_data d; @@ -1983,7 +1996,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * SV * const prop = sv_newmortal(); regprop(prog, prop, c); { - RE_PV_QUOTED_DECL(quoted,UTF,PERL_DEBUG_PAD_ZERO(1), + RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1), s,strend-s,60); PerlIO_printf(Perl_debug_log, "Matching stclass %.*s against %s (%d chars)\n", @@ -2120,22 +2133,14 @@ phooey: PL_colors[4], PL_colors[5])); if (PL_reg_eval_set) restore_pos(aTHX_ prog); - if (progi->swap) { + if (swap_on_fail) /* we failed :-( roll it back */ - I32 *t; - t = progi->swap->startp; - progi->swap->startp = prog->startp; - prog->startp = t; - t = progi->swap->endp; - progi->swap->endp = prog->endp; - prog->endp = t; - } + swap_match_buff(prog); + return 0; } - - /* - regtry - try match at specific point */ @@ -2308,7 +2313,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) STATIC regmatch_state * S_push_slab(pTHX) { -#if PERL_VERSION < 9 +#if PERL_VERSION < 9 && !defined(PERL_CORE) dMY_CXT; #endif regmatch_slab *s = PL_regmatch_slab->next; @@ -2479,7 +2484,7 @@ regmatch(), slabs allocated since entry are freed. PerlIO_printf(Perl_debug_log, \ " %*s"pp" %s%s%s%s%s\n", \ depth*2, "", \ - reg_name[st->resume_state], \ + PL_reg_name[st->resume_state], \ ((st==yes_state||st==mark_state) ? "[" : ""), \ ((st==yes_state) ? "Y" : ""), \ ((st==mark_state) ? "M" : ""), \ @@ -2605,10 +2610,14 @@ S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) { return 0; } +#define SETREX(Re1,Re2) \ + if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \ + Re1 = (Re2) + STATIC I32 /* 0 failure, 1 success */ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) { -#if PERL_VERSION < 9 +#if PERL_VERSION < 9 && !defined(PERL_CORE) dMY_CXT; #endif dVAR; @@ -2634,7 +2643,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) bool result = 0; /* return value of S_regmatch */ int depth = 0; /* depth of backtrack stack */ - int nochange_depth = 0; /* depth of GOSUB recursion with nochange*/ + U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */ + const U32 max_nochange_depth = + (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ? + 3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH; + regmatch_state *yes_state = NULL; /* state to pop to on success of subpattern */ /* mark_state piggy backs on the yes_state logic so that when we unwind @@ -2674,9 +2687,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) GET_RE_DEBUG_FLAGS_DECL; #endif - DEBUG_OPTIMISE_r( { + DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({ PerlIO_printf(Perl_debug_log,"regmatch start\n"); - }); + })); /* on first ever call to regmatch, allocate first slab */ if (!PL_regmatch_slab) { Newx(PL_regmatch_slab, 1, regmatch_slab); @@ -2869,7 +2882,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) ST.B = next; ST.jump = trie->jump; ST.me = scan; - /* traverse the TRIE keeping track of all accepting states we transition through until we get to a failing node. @@ -2967,13 +2979,25 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_colors[4], (IV)ST.accepted, PL_colors[5] ); ); }} - - /* FALL THROUGH */ + goto trie_first_try; /* jump into the fail handler */ + /* NOTREACHED */ case TRIE_next_fail: /* we failed - try next alterative */ + if ( ST.jump) { + REGCP_UNWIND(ST.cp); + for (n = *PL_reglastparen; n > ST.lastparen; n--) + PL_regendp[n] = -1; + *PL_reglastparen = n; + } + trie_first_try: if (do_cutgroup) { do_cutgroup = 0; no_final = 0; } + + if ( ST.jump) { + ST.lastparen = *PL_reglastparen; + REGCP_SET(ST.cp); + } if ( ST.accepted == 1 ) { /* only one choice left - just continue */ DEBUG_EXECUTE_r({ @@ -3014,8 +3038,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) continue; /* execute rest of RE */ } - - if (!ST.accepted-- ) { + + if ( !ST.accepted-- ) { DEBUG_EXECUTE_r({ PerlIO_printf( Perl_debug_log, "%*s %sTRIE failed...%s\n", @@ -3026,7 +3050,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) FREETMPS; LEAVE; sayNO_SILENT; - } + /*NOTREACHED*/ + } /* There are at least two accepting states left. Presumably @@ -3546,11 +3571,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) regnode *startpoint; case GOSTART: - case GOSUB: /* /(...(?1))/ */ - if (cur_eval && cur_eval->locinput==locinput) { + case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */ + if (cur_eval && cur_eval->locinput==locinput) { if (cur_eval->u.eval.close_paren == (U32)ARG(scan)) Perl_croak(aTHX_ "Infinite recursion in regex"); - if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH ) + if ( ++nochange_depth > max_nochange_depth ) Perl_croak(aTHX_ "Pattern subroutine nesting without pos change" " exceeded limit in regex"); @@ -3571,14 +3596,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) /* NOTREACHED */ case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X|Y)B/ */ if (cur_eval && cur_eval->locinput==locinput) { - if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH ) + if ( ++nochange_depth > max_nochange_depth ) Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex"); } else { nochange_depth = 0; } - { regexp *ocurpm = PM_GETRE(PL_curpm); - char *osubbeg = rex->subbeg; - STRLEN osublen = rex->sublen; { /* execute the code in the {...} */ dSP; @@ -3586,7 +3608,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) OP_4tree * const oop = PL_op; COP * const ocurcop = PL_curcop; PAD *old_comppad; - n = ARG(scan); PL_op = (OP_4tree*)rexi->data->data[n]; @@ -3599,10 +3620,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) SV *sv_mrk = get_sv("REGMARK", 1); sv_setsv(sv_mrk, sv_yes_mark); } - /* make sure that $1 and friends are available with nested eval */ - PM_SETRE(PL_curpm,rex); - rex->subbeg = ocurpm->subbeg; - rex->sublen = ocurpm->sublen; CALLRUNOPS(aTHX); /* Scalar context. */ SPAGAIN; @@ -3616,7 +3633,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_op = oop; PAD_RESTORE_LOCAL(old_comppad); PL_curcop = ocurcop; - if (!logical) { /* /(?{...})/ */ sv_setsv(save_scalar(PL_replgv), ret); @@ -3641,8 +3657,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) } if (mg) { - re = (regexp *)mg->mg_obj; - (void)ReREFCNT_inc(re); + re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/ } else { STRLEN len; @@ -3661,13 +3676,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_regsize = osize; } } + RX_MATCH_COPIED_off(re); + re->subbeg = rex->subbeg; + re->sublen = rex->sublen; rei = RXi_GET(re); - - /* restore PL_curpm after the eval */ - PM_SETRE(PL_curpm,ocurpm); - rex->sublen = osublen; - rex->subbeg = osubbeg; - DEBUG_EXECUTE_r( debug_start_match(re, do_utf8, locinput, PL_regeol, "Matching embedded"); @@ -3681,8 +3693,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*); else Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*); - } - + } eval_recurse_doit: /* Share code with GOSUB below this line */ /* run the pattern returned from (??{...}) */ @@ -3709,7 +3720,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) ST.prev_rex = rex; ST.prev_curlyx = cur_curlyx; - rex = re; + SETREX(rex,re); rexi = rei; cur_curlyx = NULL; ST.B = next; @@ -3719,11 +3730,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PUSH_YES_STATE_GOTO(EVAL_AB, startpoint); /* NOTREACHED */ } - /* restore PL_curpm after the eval */ - PM_SETRE(PL_curpm,ocurpm); - rex->sublen = osublen; - rex->subbeg = osubbeg; - } /* logical is 1, /(?(?{...})X|Y)/ */ sw = (bool)SvTRUE(ret); logical = 0; @@ -3734,13 +3740,15 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) /* note: this is called twice; first after popping B, then A */ PL_reg_flags ^= ST.toggle_reg_flags; ReREFCNT_dec(rex); - rex = ST.prev_rex; + SETREX(rex,ST.prev_rex); rexi = RXi_GET(rex); regcpblow(ST.cp); cur_eval = ST.prev_eval; cur_curlyx = ST.prev_curlyx; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; + if ( nochange_depth ) + nochange_depth--; sayYES; @@ -3748,7 +3756,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) /* note: this is called twice; first after popping B, then A */ PL_reg_flags ^= ST.toggle_reg_flags; ReREFCNT_dec(rex); - rex = ST.prev_rex; + SETREX(rex,ST.prev_rex); rexi = RXi_GET(rex); PL_reginput = locinput; REGCP_UNWIND(ST.lastcp); @@ -3757,6 +3765,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) cur_curlyx = ST.prev_curlyx; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; + if ( nochange_depth ) + nochange_depth--; sayNO_SILENT; #undef ST @@ -4377,6 +4387,12 @@ NULL && UCHARAT(PL_reginput) != ST.c2) { /* simulate B failing */ + DEBUG_OPTIMISE_r( + PerlIO_printf(Perl_debug_log, + "%*s CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n", + (int)(REPORT_CODE_OFF+(depth*2)),"", + (IV)ST.c1,(IV)ST.c2 + )); state_num = CURLYM_B_fail; goto reenter_switch; } @@ -4744,14 +4760,12 @@ NULL if (cur_eval) { /* we've just finished A in /(??{A})B/; now continue with B */ I32 tmpix; - - st->u.eval.toggle_reg_flags = cur_eval->u.eval.toggle_reg_flags; PL_reg_flags ^= st->u.eval.toggle_reg_flags; st->u.eval.prev_rex = rex; /* inner */ - rex = cur_eval->u.eval.prev_rex; /* outer */ + SETREX(rex,cur_eval->u.eval.prev_rex); rexi = RXi_GET(rex); cur_curlyx = cur_eval->u.eval.prev_curlyx; ReREFCNT_inc(rex); @@ -4771,7 +4785,10 @@ NULL DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n", REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval));); - PUSH_YES_STATE_GOTO(EVAL_AB, + if ( nochange_depth ) + nochange_depth--; + + PUSH_YES_STATE_GOTO(EVAL_AB, st->u.eval.prev_eval->u.eval.B); /* match B */ } @@ -4996,7 +5013,7 @@ NULL } PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n", REPORT_CODE_OFF + 2 + depth * 2,"", - curd, reg_name[cur->resume_state], + curd, PL_reg_name[cur->resume_state], (curyes == cur) ? "yes" : "" ); if (curyes == cur) @@ -5166,6 +5183,9 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth) register char *loceol = PL_regeol; register I32 hardcount = 0; register bool do_utf8 = PL_reg_match_utf8; +#ifndef DEBUGGING + PERL_UNUSED_ARG(depth); +#endif scan = PL_reginput; if (max == REG_INFTY) @@ -5690,10 +5710,18 @@ S_to_utf8_substr(pTHX_ register regexp *prog) SV* const sv = newSVsv(prog->substrs->data[i].substr); prog->substrs->data[i].utf8_substr = sv; sv_utf8_upgrade(sv); - if (SvVALID(prog->substrs->data[i].substr)) - fbm_compile(sv, 0); - if (SvTAIL(prog->substrs->data[i].substr)) - SvTAIL_on(sv); + if (SvVALID(prog->substrs->data[i].substr)) { + const U8 flags = BmFLAGS(prog->substrs->data[i].substr); + if (flags & FBMcf_TAIL) { + /* Trim the trailing \n that fbm_compile added last + time. */ + SvCUR_set(sv, SvCUR(sv) - 1); + /* Whilst this makes the SV technically "invalid" (as its + buffer is no longer followed by "\0") when fbm_compile() + adds the "\n" back, a "\0" is restored. */ + } + fbm_compile(sv, flags); + } if (prog->substrs->data[i].substr == prog->check_substr) prog->check_utf8 = sv; } @@ -5710,10 +5738,16 @@ S_to_byte_substr(pTHX_ register regexp *prog) && !prog->substrs->data[i].substr) { SV* sv = newSVsv(prog->substrs->data[i].utf8_substr); if (sv_utf8_downgrade(sv, TRUE)) { - if (SvVALID(prog->substrs->data[i].utf8_substr)) - fbm_compile(sv, 0); - if (SvTAIL(prog->substrs->data[i].utf8_substr)) - SvTAIL_on(sv); + if (SvVALID(prog->substrs->data[i].utf8_substr)) { + const U8 flags + = BmFLAGS(prog->substrs->data[i].utf8_substr); + if (flags & FBMcf_TAIL) { + /* Trim the trailing \n that fbm_compile added last + time. */ + SvCUR_set(sv, SvCUR(sv) - 1); + } + fbm_compile(sv, flags); + } } else { SvREFCNT_dec(sv); sv = &PL_sv_undef;