X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=c65c33b6c856de3a8d50a51ddb069dc6a933d13b;hb=277e868c1c62af9319379385544f564e302be2b0;hp=d1f6b8ec8757b0fa8871872806cee3978734706d;hpb=e7707071e420c5a715c0621d0428dd393503e884;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index d1f6b8e..c65c33b 100644 --- a/regexec.c +++ b/regexec.c @@ -184,25 +184,24 @@ S_regcppush(pTHX_ I32 parenfloor) if (paren_elems_to_push < 0) Perl_croak(aTHX_ "panic: paren_elems_to_push < 0"); -#define REGCP_OTHER_ELEMS 8 +#define REGCP_OTHER_ELEMS 7 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS); for (p = PL_regsize; p > parenfloor; p--) { /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */ - SSPUSHINT(PL_regendp[p]); - SSPUSHINT(PL_regstartp[p]); + SSPUSHINT(PL_regoffs[p].end); + SSPUSHINT(PL_regoffs[p].start); SSPUSHPTR(PL_reg_start_tmp[p]); SSPUSHINT(p); DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log, " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n", - (UV)p, (IV)PL_regstartp[p], + (UV)p, (IV)PL_regoffs[p].start, (IV)(PL_reg_start_tmp[p] - PL_bostr), - (IV)PL_regendp[p] + (IV)PL_regoffs[p].end )); } /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */ - SSPUSHPTR(PL_regstartp); - SSPUSHPTR(PL_regendp); + SSPUSHPTR(PL_regoffs); SSPUSHINT(PL_regsize); SSPUSHINT(*PL_reglastparen); SSPUSHINT(*PL_reglastcloseparen); @@ -249,8 +248,7 @@ S_regcppop(pTHX_ const regexp *rex) *PL_reglastcloseparen = SSPOPINT; *PL_reglastparen = SSPOPINT; PL_regsize = SSPOPINT; - PL_regendp=(I32 *) SSPOPPTR; - PL_regstartp=(I32 *) SSPOPPTR; + PL_regoffs=(regexp_paren_pair *) SSPOPPTR; /* Now restore the parentheses context. */ @@ -259,16 +257,16 @@ S_regcppop(pTHX_ const regexp *rex) I32 tmps; U32 paren = (U32)SSPOPINT; PL_reg_start_tmp[paren] = (char *) SSPOPPTR; - PL_regstartp[paren] = SSPOPINT; + PL_regoffs[paren].start = SSPOPINT; tmps = SSPOPINT; if (paren <= *PL_reglastparen) - PL_regendp[paren] = tmps; + PL_regoffs[paren].end = tmps; DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log, " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n", - (UV)paren, (IV)PL_regstartp[paren], + (UV)paren, (IV)PL_regoffs[paren].start, (IV)(PL_reg_start_tmp[paren] - PL_bostr), - (IV)PL_regendp[paren], + (IV)PL_regoffs[paren].end, (paren > *PL_reglastparen ? "(no)" : "")); ); } @@ -292,8 +290,8 @@ S_regcppop(pTHX_ const regexp *rex) * --jhi */ for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) { if (i > PL_regsize) - PL_regstartp[i] = -1; - PL_regendp[i] = -1; + PL_regoffs[i].start = -1; + PL_regoffs[i].end = -1; } #endif return input; @@ -1651,10 +1649,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, static void S_swap_match_buff (pTHX_ regexp *prog) { - I32 *t; - RXi_GET_DECL(prog,progi); + regexp_paren_pair *t; - if (!progi->swap) { + if (!prog->swap) { /* We have to be careful. If the previous successful match was from this regex we don't want a subsequent paritally successful match to clobber the old results. @@ -1662,17 +1659,11 @@ S_swap_match_buff (pTHX_ regexp *prog) { to the re, and switch the buffer each match. If we fail we switch it back, otherwise we leave it swapped. */ - Newxz(progi->swap, 1, regexp_paren_ofs); - /* no need to copy these */ - Newxz(progi->swap->startp, prog->nparens + 1, I32); - Newxz(progi->swap->endp, prog->nparens + 1, I32); + Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair); } - t = progi->swap->startp; - progi->swap->startp = prog->startp; - prog->startp = t; - t = progi->swap->endp; - progi->swap->endp = prog->endp; - prog->endp = t; + t = prog->swap; + prog->swap = prog->offs; + prog->offs = t; } @@ -1699,7 +1690,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * I32 end_shift = 0; /* Same for the end. */ /* CC */ I32 scream_pos = -1; /* Internal iterator of scream. */ char *scream_olds = NULL; - SV* const oreplsv = GvSV(PL_replgv); const bool do_utf8 = (bool)DO_UTF8(sv); I32 multiline; RXi_GET_DECL(prog,progi); @@ -1997,7 +1987,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * SV * const prop = sv_newmortal(); regprop(prog, prop, c); { - RE_PV_QUOTED_DECL(quoted,UTF,PERL_DEBUG_PAD_ZERO(1), + RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1), s,strend-s,60); PerlIO_printf(Perl_debug_log, "Matching stclass %.*s against %s (%d chars)\n", @@ -2085,14 +2075,8 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * got_it: RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted); - if (PL_reg_eval_set) { - /* Preserve the current value of $^R */ - if (oreplsv != GvSV(PL_replgv)) - sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is - restored, the value remains - the same. */ + if (PL_reg_eval_set) restore_pos(aTHX_ prog); - } if (prog->paren_names) (void)hv_iterinit(prog->paren_names); @@ -2149,8 +2133,6 @@ STATIC I32 /* 0 failure, 1 success */ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) { dVAR; - register I32 *sp; - register I32 *ep; CHECKPOINT lastcp; regexp *prog = reginfo->prog; RXi_GET_DECL(prog,progi); @@ -2228,15 +2210,14 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */ } DEBUG_EXECUTE_r(PL_reg_starttry = *startpos); - prog->startp[0] = *startpos - PL_bostr; + prog->offs[0].start = *startpos - PL_bostr; PL_reginput = *startpos; PL_reglastparen = &prog->lastparen; PL_reglastcloseparen = &prog->lastcloseparen; prog->lastparen = 0; prog->lastcloseparen = 0; PL_regsize = 0; - PL_regstartp = prog->startp; - PL_regendp = prog->endp; + PL_regoffs = prog->offs; if (PL_reg_start_tmpl <= prog->nparens) { PL_reg_start_tmpl = prog->nparens*3/2 + 3; if(PL_reg_start_tmp) @@ -2260,19 +2241,19 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) * on those tests seems to be returning null fields from matches. * --jhi */ #if 1 - sp = PL_regstartp; - ep = PL_regendp; if (prog->nparens) { + regexp_paren_pair *pp = PL_regoffs; register I32 i; for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) { - *++sp = -1; - *++ep = -1; + ++pp; + pp->start = -1; + pp->end = -1; } } #endif REGCP_SET(lastcp); if (regmatch(reginfo, progi->program + 1)) { - PL_regendp[0] = PL_reginput - PL_bostr; + PL_regoffs[0].end = PL_reginput - PL_bostr; return 1; } if (reginfo->cutpoint) @@ -2314,7 +2295,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) STATIC regmatch_state * S_push_slab(pTHX) { -#if PERL_VERSION < 9 +#if PERL_VERSION < 9 && !defined(PERL_CORE) dMY_CXT; #endif regmatch_slab *s = PL_regmatch_slab->next; @@ -2485,7 +2466,7 @@ regmatch(), slabs allocated since entry are freed. PerlIO_printf(Perl_debug_log, \ " %*s"pp" %s%s%s%s%s\n", \ depth*2, "", \ - reg_name[st->resume_state], \ + PL_reg_name[st->resume_state], \ ((st==yes_state||st==mark_state) ? "[" : ""), \ ((st==yes_state) ? "Y" : ""), \ ((st==mark_state) ? "M" : ""), \ @@ -2603,7 +2584,7 @@ S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) { I32 *nums=(I32*)SvPVX(sv_dat); for ( n=0; n= nums[n] && - PL_regendp[nums[n]] != -1) + PL_regoffs[nums[n]].end != -1) { return nums[n]; } @@ -2611,10 +2592,34 @@ S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) { return 0; } + +/* free all slabs above current one - called during LEAVE_SCOPE */ + +STATIC void +S_clear_backtrack_stack(pTHX_ void *p) +{ + regmatch_slab *s = PL_regmatch_slab->next; + PERL_UNUSED_ARG(p); + + if (!s) + return; + PL_regmatch_slab->next = NULL; + while (s) { + regmatch_slab * const osl = s; + s = s->next; + Safefree(osl); + } +} + + +#define SETREX(Re1,Re2) \ + if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \ + Re1 = (Re2) + STATIC I32 /* 0 failure, 1 success */ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) { -#if PERL_VERSION < 9 +#if PERL_VERSION < 9 && !defined(PERL_CORE) dMY_CXT; #endif dVAR; @@ -2624,8 +2629,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) regexp *rex = reginfo->prog; RXi_GET_DECL(rex,rexi); - regmatch_slab *orig_slab; - regmatch_state *orig_state; + I32 oldsave; /* the current state. This is a cached copy of PL_regmatch_state */ register regmatch_state *st; @@ -2663,6 +2667,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) during a successfull match */ U32 lastopen = 0; /* last open we saw */ bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0; + + SV* const oreplsv = GvSV(PL_replgv); /* these three flags are set by various ops to signal information to @@ -2695,10 +2701,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab); } - /* remember current high-water mark for exit */ - /* XXX this should be done with SAVE* instead */ - orig_slab = PL_regmatch_slab; - orig_state = PL_regmatch_state; + oldsave = PL_savestack_ix; + SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL); + SAVEVPTR(PL_regmatch_slab); + SAVEVPTR(PL_regmatch_state); /* grab next free state slot */ st = ++PL_regmatch_state; @@ -2756,14 +2762,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) case KEEPS: /* update the startpoint */ - st->u.keeper.val = PL_regstartp[0]; + st->u.keeper.val = PL_regoffs[0].start; PL_reginput = locinput; - PL_regstartp[0] = locinput - PL_bostr; + PL_regoffs[0].start = locinput - PL_bostr; PUSH_STATE_GOTO(KEEPS_next, next); /*NOT-REACHED*/ case KEEPS_next_fail: /* rollback the start point change */ - PL_regstartp[0] = st->u.keeper.val; + PL_regoffs[0].start = st->u.keeper.val; sayNO_SILENT; /*NOT-REACHED*/ case EOL: @@ -2982,7 +2988,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) if ( ST.jump) { REGCP_UNWIND(ST.cp); for (n = *PL_reglastparen; n > ST.lastparen; n--) - PL_regendp[n] = -1; + PL_regoffs[n].end = -1; *PL_reglastparen = n; } trie_first_try: @@ -3497,17 +3503,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) n = ARG(scan); /* which paren pair */ type = OP(scan); do_ref: - ln = PL_regstartp[n]; + ln = PL_regoffs[n].start; PL_reg_leftiter = PL_reg_maxiter; /* Void cache */ if (*PL_reglastparen < n || ln == -1) sayNO; /* Do not match unless seen CLOSEn. */ - if (ln == PL_regendp[n]) + if (ln == PL_regoffs[n].end) break; s = PL_bostr + ln; if (do_utf8 && type != REF) { /* REF can do byte comparison */ char *l = locinput; - const char *e = PL_bostr + PL_regendp[n]; + const char *e = PL_bostr + PL_regoffs[n].end; /* * Note that we can't do the "other character" lookup trick as * in the 8-bit case (no pun intended) because in Unicode we @@ -3540,7 +3546,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) (UCHARAT(s) != (type == REFF ? PL_fold : PL_fold_locale)[nextchr]))) sayNO; - ln = PL_regendp[n] - ln; + ln = PL_regoffs[n].end - ln; if (locinput + ln > PL_regeol) sayNO; if (ln > 1 && (type == REF @@ -3611,7 +3617,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) DEBUG_STATE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) ); PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]); - PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr; + PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr; if (sv_yes_mark) { SV *sv_mrk = get_sv("REGMARK", 1); @@ -3654,8 +3660,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) } if (mg) { - re = (regexp *)mg->mg_obj; - (void)ReREFCNT_inc(re); + re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/ } else { STRLEN len; @@ -3674,6 +3679,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_regsize = osize; } } + RX_MATCH_COPIED_off(re); + re->subbeg = rex->subbeg; + re->sublen = rex->sublen; rei = RXi_GET(re); DEBUG_EXECUTE_r( debug_start_match(re, do_utf8, locinput, PL_regeol, @@ -3695,8 +3703,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) ST.cp = regcppush(0); /* Save *all* the positions. */ REGCP_SET(ST.lastcp); - PL_regstartp = re->startp; /* essentially NOOP on GOSUB */ - PL_regendp = re->endp; /* essentially NOOP on GOSUB */ + PL_regoffs = re->offs; /* essentially NOOP on GOSUB */ *PL_reglastparen = 0; *PL_reglastcloseparen = 0; @@ -3715,7 +3722,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) ST.prev_rex = rex; ST.prev_curlyx = cur_curlyx; - rex = re; + SETREX(rex,re); rexi = rei; cur_curlyx = NULL; ST.B = next; @@ -3735,7 +3742,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) /* note: this is called twice; first after popping B, then A */ PL_reg_flags ^= ST.toggle_reg_flags; ReREFCNT_dec(rex); - rex = ST.prev_rex; + SETREX(rex,ST.prev_rex); rexi = RXi_GET(rex); regcpblow(ST.cp); cur_eval = ST.prev_eval; @@ -3751,7 +3758,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) /* note: this is called twice; first after popping B, then A */ PL_reg_flags ^= ST.toggle_reg_flags; ReREFCNT_dec(rex); - rex = ST.prev_rex; + SETREX(rex,ST.prev_rex); rexi = RXi_GET(rex); PL_reginput = locinput; REGCP_UNWIND(ST.lastcp); @@ -3774,8 +3781,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) break; case CLOSE: n = ARG(scan); /* which paren pair */ - PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr; - PL_regendp[n] = locinput - PL_bostr; + PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr; + PL_regoffs[n].end = locinput - PL_bostr; /*if (n > PL_regsize) PL_regsize = n;*/ if (n > *PL_reglastparen) @@ -3795,8 +3802,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) if ( OP(cursor)==CLOSE ){ n = ARG(cursor); if ( n <= lastopen ) { - PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr; - PL_regendp[n] = locinput - PL_bostr; + PL_regoffs[n].start + = PL_reg_start_tmp[n] - PL_bostr; + PL_regoffs[n].end = locinput - PL_bostr; /*if (n > PL_regsize) PL_regsize = n;*/ if (n > *PL_reglastparen) @@ -3813,7 +3821,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) /*NOTREACHED*/ case GROUPP: n = ARG(scan); /* which paren pair */ - sw = (bool)(*PL_reglastparen >= n && PL_regendp[n] != -1); + sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1); break; case NGROUPP: /* reg_check_named_buff_matched returns 0 for no match */ @@ -3961,15 +3969,6 @@ NULL } case CURLYX_end: /* just finished matching all of A*B */ - if (PL_reg_eval_set){ - SV *pres= GvSV(PL_replgv); - SvREFCNT_inc(pres); - regcpblow(ST.cp); - sv_setsv(GvSV(PL_replgv), pres); - SvREFCNT_dec(pres); - } else { - regcpblow(ST.cp); - } cur_curlyx = ST.prev_curlyx; sayYES; /* NOTREACHED */ @@ -4231,7 +4230,7 @@ NULL } REGCP_UNWIND(ST.cp); for (n = *PL_reglastparen; n > ST.lastparen; n--) - PL_regendp[n] = -1; + PL_regoffs[n].end = -1; *PL_reglastparen = n; /*dmq: *PL_reglastcloseparen = n; */ scan = ST.next_branch; @@ -4396,13 +4395,13 @@ NULL /* mark current A as captured */ I32 paren = ST.me->flags; if (ST.count) { - PL_regstartp[paren] + PL_regoffs[paren].start = HOPc(PL_reginput, -ST.alen) - PL_bostr; - PL_regendp[paren] = PL_reginput - PL_bostr; + PL_regoffs[paren].end = PL_reginput - PL_bostr; /*dmq: *PL_reglastcloseparen = paren; */ } else - PL_regendp[paren] = -1; + PL_regoffs[paren].end = -1; if (cur_eval && cur_eval->u.eval.close_paren && cur_eval->u.eval.close_paren == (U32)ST.me->flags) { @@ -4436,12 +4435,12 @@ NULL #define CURLY_SETPAREN(paren, success) \ if (paren) { \ if (success) { \ - PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr; \ - PL_regendp[paren] = locinput - PL_bostr; \ + PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \ + PL_regoffs[paren].end = locinput - PL_bostr; \ *PL_reglastcloseparen = paren; \ } \ else \ - PL_regendp[paren] = -1; \ + PL_regoffs[paren].end = -1; \ } case STAR: /* /A*B/ where A is width 1 */ @@ -4614,7 +4613,7 @@ NULL case CURLY_B_min_known_fail: /* failed to find B in a non-greedy match where c1,c2 valid */ if (ST.paren && ST.count) - PL_regendp[ST.paren] = -1; + PL_regoffs[ST.paren].end = -1; PL_reginput = locinput; /* Could be reset... */ REGCP_UNWIND(ST.cp); @@ -4692,7 +4691,7 @@ NULL case CURLY_B_min_fail: /* failed to find B in a non-greedy match where c1,c2 invalid */ if (ST.paren && ST.count) - PL_regendp[ST.paren] = -1; + PL_regoffs[ST.paren].end = -1; REGCP_UNWIND(ST.cp); /* failed -- move forward one */ @@ -4739,7 +4738,7 @@ NULL case CURLY_B_max_fail: /* failed to find B in a greedy match */ if (ST.paren && ST.count) - PL_regendp[ST.paren] = -1; + PL_regoffs[ST.paren].end = -1; REGCP_UNWIND(ST.cp); /* back up. */ @@ -4760,7 +4759,7 @@ NULL PL_reg_flags ^= st->u.eval.toggle_reg_flags; st->u.eval.prev_rex = rex; /* inner */ - rex = cur_eval->u.eval.prev_rex; /* outer */ + SETREX(rex,cur_eval->u.eval.prev_rex); rexi = RXi_GET(rex); cur_curlyx = cur_eval->u.eval.prev_curlyx; ReREFCNT_inc(rex); @@ -5008,7 +5007,7 @@ NULL } PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n", REPORT_CODE_OFF + 2 + depth * 2,"", - curd, reg_name[cur->resume_state], + curd, PL_reg_name[cur->resume_state], (curyes == cur) ? "yes" : "" ); if (curyes == cur) @@ -5087,6 +5086,15 @@ yes: DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n", PL_colors[4], PL_colors[5])); + if (PL_reg_eval_set) { + /* each successfully executed (?{...}) block does the equivalent of + * local $^R = do {...} + * When popping the save stack, all these locals would be undone; + * bypass this by setting the outermost saved $^R to the latest + * value */ + if (oreplsv != GvSV(PL_replgv)) + sv_setsv(oreplsv, GvSV(PL_replgv)); + } result = 1; goto final_exit; @@ -5143,20 +5151,9 @@ no_silent: sv_setsv(sv_err, sv_commit); sv_setsv(sv_mrk, sv_yes_mark); } - /* restore original high-water mark */ - PL_regmatch_slab = orig_slab; - PL_regmatch_state = orig_state; - - /* free all slabs above current one */ - if (orig_slab->next) { - regmatch_slab *sl = orig_slab->next; - orig_slab->next = NULL; - while (sl) { - regmatch_slab * const osl = sl; - sl = sl->next; - Safefree(osl); - } - } + + /* clean up; in particular, free all slabs above current one */ + LEAVE_SCOPE(oldsave); return result; }