X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=890736cc132485d7108f9ce0a39ea7828321b318;hb=dad790286e318c5c7f4b6ccd52b4fd512c87c763;hp=71eab5b1a4614e082cc13f6e29b19f2cced7a4e3;hpb=aa283a383ef6540d57dd786b93d8ba9bd303e3e6;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index 71eab5b..890736c 100644 --- a/regexec.c +++ b/regexec.c @@ -107,7 +107,7 @@ #define STATIC static #endif -#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c))) +#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c))) /* * Forwards. @@ -839,8 +839,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, : strend); t = s; - PL_reg_re = prog; - s = find_byclass(prog, prog->regstclass, s, endpos, 1); + s = find_byclass(prog, prog->regstclass, s, endpos, NULL); if (!s) { #ifdef DEBUGGING const char *what = NULL; @@ -932,8 +931,11 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, } /* We know what class REx starts with. Try to find this position... */ +/* if reginfo is NULL, its a dryrun */ + STATIC char * -S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, I32 norun) +S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char +*strend, const regmatch_info *reginfo) { dVAR; const I32 doevery = (prog->reganch & ROPT_SKIP) == 0; @@ -954,9 +956,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren while (s + (uskip = UTF8SKIP(s)) <= strend) { if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) || !UTF8_IS_INVARIANT((U8)s[0]) ? - reginclass(c, (U8*)s, 0, do_utf8) : - REGINCLASS(c, (U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + reginclass(prog, c, (U8*)s, 0, do_utf8) : + REGINCLASS(prog, c, (U8*)s)) { + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -970,12 +972,12 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren while (s < strend) { STRLEN skip = 1; - if (REGINCLASS(c, (U8*)s) || + if (REGINCLASS(prog, c, (U8*)s) || (ANYOF_FOLD_SHARP_S(c, s, strend) && /* The assignment of 2 is intentional: * for the folded sharp s, the skip is 2. */ (skip = SHARP_S_SKIP))) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -988,7 +990,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren break; case CANY: while (s < strend) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1033,7 +1035,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren do_exactf: e = HOP3c(strend, -((I32)lnc), s); - if (norun && e < s) + if (!reginfo && e < s) e = s; /* Due to minlen logic of intuit() */ /* The idea in the EXACTF* cases is to first find the @@ -1062,7 +1064,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren && (ln == len || ibcmp_utf8(s, (char **)0, 0, do_utf8, m, (char **)0, ln, (bool)UTF)) - && (norun || regtry(prog, s)) ) + && (!reginfo || regtry(reginfo, s)) ) goto got_it; else { U8 foldbuf[UTF8_MAXBYTES_CASE+1]; @@ -1075,7 +1077,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren (char **)0, foldlen, do_utf8, m, (char **)0, ln, (bool)UTF)) - && (norun || regtry(prog, s)) ) + && (!reginfo || regtry(reginfo, s)) ) goto got_it; } s += len; @@ -1101,7 +1103,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren && (ln == len || ibcmp_utf8(s, (char **)0, 0, do_utf8, m, (char **)0, ln, (bool)UTF)) - && (norun || regtry(prog, s)) ) + && (!reginfo || regtry(reginfo, s)) ) goto got_it; else { U8 foldbuf[UTF8_MAXBYTES_CASE+1]; @@ -1114,7 +1116,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren (char **)0, foldlen, do_utf8, m, (char **)0, ln, (bool)UTF)) - && (norun || regtry(prog, s)) ) + && (!reginfo || regtry(reginfo, s)) ) goto got_it; } s += len; @@ -1128,7 +1130,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren && (ln == 1 || !(OP(c) == EXACTF ? ibcmp(s, m, ln) : ibcmp_locale(s, m, ln))) - && (norun || regtry(prog, s)) ) + && (!reginfo || regtry(reginfo, s)) ) goto got_it; s++; } @@ -1138,7 +1140,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren && (ln == 1 || !(OP(c) == EXACTF ? ibcmp(s, m, ln) : ibcmp_locale(s, m, ln))) - && (norun || regtry(prog, s)) ) + && (!reginfo || regtry(reginfo, s)) ) goto got_it; s++; } @@ -1164,7 +1166,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren isALNUM_LC_utf8((U8*)s))) { tmp = !tmp; - if ((norun || regtry(prog, s))) + if ((!reginfo || regtry(reginfo, s))) goto got_it; } s += uskip; @@ -1177,13 +1179,13 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) { tmp = !tmp; - if ((norun || regtry(prog, s))) + if ((!reginfo || regtry(reginfo, s))) goto got_it; } s++; } } - if ((!prog->minlen && tmp) && (norun || regtry(prog, s))) + if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, s))) goto got_it; break; case NBOUNDL: @@ -1205,7 +1207,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) : isALNUM_LC_utf8((U8*)s))) tmp = !tmp; - else if ((norun || regtry(prog, s))) + else if ((!reginfo || regtry(reginfo, s))) goto got_it; s += uskip; } @@ -1218,12 +1220,12 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s))) tmp = !tmp; - else if ((norun || regtry(prog, s))) + else if ((!reginfo || regtry(reginfo, s))) goto got_it; s++; } } - if ((!prog->minlen && !tmp) && (norun || regtry(prog, s))) + if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, s))) goto got_it; break; case ALNUM: @@ -1231,7 +1233,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren LOAD_UTF8_CHARCLASS_ALNUM(); while (s + (uskip = UTF8SKIP(s)) <= strend) { if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1244,7 +1246,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren else { while (s < strend) { if (isALNUM(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1260,7 +1262,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren if (do_utf8) { while (s + (uskip = UTF8SKIP(s)) <= strend) { if (isALNUM_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1273,7 +1275,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren else { while (s < strend) { if (isALNUM_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1289,7 +1291,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren LOAD_UTF8_CHARCLASS_ALNUM(); while (s + (uskip = UTF8SKIP(s)) <= strend) { if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1302,7 +1304,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren else { while (s < strend) { if (!isALNUM(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1318,7 +1320,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren if (do_utf8) { while (s + (uskip = UTF8SKIP(s)) <= strend) { if (!isALNUM_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1331,7 +1333,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren else { while (s < strend) { if (!isALNUM_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1347,7 +1349,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren LOAD_UTF8_CHARCLASS_SPACE(); while (s + (uskip = UTF8SKIP(s)) <= strend) { if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1360,7 +1362,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren else { while (s < strend) { if (isSPACE(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1376,7 +1378,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren if (do_utf8) { while (s + (uskip = UTF8SKIP(s)) <= strend) { if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1389,7 +1391,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren else { while (s < strend) { if (isSPACE_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1405,7 +1407,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren LOAD_UTF8_CHARCLASS_SPACE(); while (s + (uskip = UTF8SKIP(s)) <= strend) { if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1418,7 +1420,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren else { while (s < strend) { if (!isSPACE(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1434,7 +1436,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren if (do_utf8) { while (s + (uskip = UTF8SKIP(s)) <= strend) { if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1447,7 +1449,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren else { while (s < strend) { if (!isSPACE_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1463,7 +1465,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren LOAD_UTF8_CHARCLASS_DIGIT(); while (s + (uskip = UTF8SKIP(s)) <= strend) { if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1476,7 +1478,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren else { while (s < strend) { if (isDIGIT(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1492,7 +1494,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren if (do_utf8) { while (s + (uskip = UTF8SKIP(s)) <= strend) { if (isDIGIT_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1505,7 +1507,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren else { while (s < strend) { if (isDIGIT_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1521,7 +1523,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren LOAD_UTF8_CHARCLASS_DIGIT(); while (s + (uskip = UTF8SKIP(s)) <= strend) { if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1534,7 +1536,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren else { while (s < strend) { if (!isDIGIT(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1550,7 +1552,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren if (do_utf8) { while (s + (uskip = UTF8SKIP(s)) <= strend) { if (!isDIGIT_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1563,7 +1565,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *stren else { while (s < strend) { if (!isDIGIT_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && (!reginfo || regtry(reginfo, s))) goto got_it; else tmp = doevery; @@ -1606,11 +1608,12 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * char *scream_olds = NULL; SV* oreplsv = GvSV(PL_replgv); const bool do_utf8 = DO_UTF8(sv); - const I32 multiline = prog->reganch & PMf_MULTILINE; + I32 multiline; #ifdef DEBUGGING - SV * const dsv0 = PERL_DEBUG_PAD_ZERO(0); - SV * const dsv1 = PERL_DEBUG_PAD_ZERO(1); + SV* dsv0; + SV* dsv1; #endif + regmatch_info reginfo; /* create some info to pass to regtry etc */ GET_RE_DEBUG_FLAGS_DECL; @@ -1622,9 +1625,12 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * return 0; } - PL_reg_re = prog; + multiline = prog->reganch & PMf_MULTILINE; + reginfo.prog = prog; + #ifdef DEBUGGING - PL_regnarrate = DEBUG_r_TEST; + dsv0 = PERL_DEBUG_PAD_ZERO(0); + dsv1 = PERL_DEBUG_PAD_ZERO(1); #endif RX_MATCH_UTF8_set(prog, do_utf8); @@ -1649,37 +1655,37 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_utf8; /* Mark beginning of line for ^ and lookbehind. */ - PL_regbol = startpos; + reginfo.bol = startpos; /* XXX not used ??? */ PL_bostr = strbeg; - PL_reg_sv = sv; + reginfo.sv = sv; /* Mark end of line for $ (and such) */ PL_regeol = strend; /* see how far we have to get to not match where we matched before */ - PL_regtill = startpos+minend; + reginfo.till = startpos+minend; /* If there is a "must appear" string, look for it. */ s = startpos; - if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */ + if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to set reginfo->ganch */ MAGIC *mg; if (flags & REXEC_IGNOREPOS) /* Means: check only at start */ - PL_reg_ganch = startpos; + reginfo.ganch = startpos; else if (sv && SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv) && (mg = mg_find(sv, PERL_MAGIC_regex_global)) && mg->mg_len >= 0) { - PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */ + reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */ if (prog->reganch & ROPT_ANCH_GPOS) { - if (s > PL_reg_ganch) + if (s > reginfo.ganch) goto phooey; - s = PL_reg_ganch; + s = reginfo.ganch; } } else /* pos() not defined */ - PL_reg_ganch = strbeg; + reginfo.ganch = strbeg; } if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) { @@ -1721,7 +1727,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* Simplest case: anchored match need be tried only once. */ /* [unless only anchor is BOL and multiline is set] */ if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) { - if (s == startpos && regtry(prog, startpos)) + if (s == startpos && regtry(®info, startpos)) goto got_it; else if (multiline || (prog->reganch & ROPT_IMPLICIT) || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */ @@ -1736,7 +1742,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * if (s == startpos) goto after_try; while (1) { - if (regtry(prog, s)) + if (regtry(®info, s)) goto got_it; after_try: if (s >= end) @@ -1754,7 +1760,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * s--; while (s < end) { if (*s++ == '\n') { /* don't need PL_utf8skip here */ - if (regtry(prog, s)) + if (regtry(®info, s)) goto got_it; } } @@ -1762,7 +1768,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * } goto phooey; } else if (prog->reganch & ROPT_ANCH_GPOS) { - if (regtry(prog, PL_reg_ganch)) + if (regtry(®info, reginfo.ganch)) goto got_it; goto phooey; } @@ -1783,7 +1789,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * while (s < strend) { if (*s == ch) { DEBUG_EXECUTE_r( did_match = 1 ); - if (regtry(prog, s)) goto got_it; + if (regtry(®info, s)) goto got_it; s += UTF8SKIP(s); while (s < strend && *s == ch) s += UTF8SKIP(s); @@ -1795,7 +1801,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * while (s < strend) { if (*s == ch) { DEBUG_EXECUTE_r( did_match = 1 ); - if (regtry(prog, s)) goto got_it; + if (regtry(®info, s)) goto got_it; s++; while (s < strend && *s == ch) s++; @@ -1873,14 +1879,14 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * } if (do_utf8) { while (s <= last1) { - if (regtry(prog, s)) + if (regtry(®info, s)) goto got_it; s += UTF8SKIP(s); } } else { while (s <= last1) { - if (regtry(prog, s)) + if (regtry(®info, s)) goto got_it; s++; } @@ -1912,7 +1918,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * int len0; int len1; - regprop(prop, c); + regprop(prog, prop, c); s0 = UTF ? pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60, UNI_DISPLAY_REGEX) : @@ -1926,7 +1932,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * len0, len0, s0, len1, len1, s1); }); - if (find_byclass(prog, c, s, strend, 0)) + if (find_byclass(prog, c, s, strend, ®info)) goto got_it; DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n")); } @@ -1984,7 +1990,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* We don't know much -- general case. */ if (do_utf8) { for (;;) { - if (regtry(prog, s)) + if (regtry(®info, s)) goto got_it; if (s >= strend) break; @@ -1993,7 +1999,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * } else { do { - if (regtry(prog, s)) + if (regtry(®info, s)) goto got_it; } while (s++ < strend); } @@ -2059,12 +2065,13 @@ phooey: - regtry - try match at specific point */ STATIC I32 /* 0 failure, 1 success */ -S_regtry(pTHX_ regexp *prog, char *startpos) +S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos) { dVAR; register I32 *sp; register I32 *ep; CHECKPOINT lastcp; + regexp *prog = reginfo->prog; GET_RE_DEBUG_FLAGS_DECL; #ifdef DEBUGGING @@ -2086,21 +2093,21 @@ S_regtry(pTHX_ regexp *prog, char *startpos) /* SAVEI8(cxstack[cxstack_ix].blk_gimme); cxstack[cxstack_ix].blk_gimme = G_SCALAR; */ - if (PL_reg_sv) { + if (reginfo->sv) { /* Make $_ available to executed code. */ - if (PL_reg_sv != DEFSV) { + if (reginfo->sv != DEFSV) { SAVE_DEFSV; - DEFSV = PL_reg_sv; + DEFSV = reginfo->sv; } - if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv) - && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) { + if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv) + && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) { /* prepare for quick setting of pos */ #ifdef PERL_OLD_COPY_ON_WRITE if (SvIsCOW(sv)) sv_force_normal_flags(sv, 0); #endif - mg = sv_magicext(PL_reg_sv, (SV*)0, PERL_MAGIC_regex_global, + mg = sv_magicext(reginfo->sv, (SV*)0, PERL_MAGIC_regex_global, &PL_vtbl_mglob, NULL, 0); mg->mg_len = -1; } @@ -2184,7 +2191,7 @@ S_regtry(pTHX_ regexp *prog, char *startpos) } #endif REGCP_SET(lastcp); - if (regmatch(prog, prog->program + 1)) { + if (regmatch(reginfo, prog->program + 1)) { prog->endp[0] = PL_reginput - PL_bostr; return 1; } @@ -2211,6 +2218,7 @@ typedef struct { regnode *next; char *locinput; I32 nextchr; + int minmod; #ifdef DEBUGGING int regindent; #endif @@ -2226,7 +2234,6 @@ typedef union re_unwind_t { #define sayNO goto no #define sayNO_ANYOF goto no_anyof #define sayYES_FINAL goto yes_final -#define sayYES_LOUD goto yes_loud #define sayNO_FINAL goto no_final #define sayNO_SILENT goto do_no #define saySAME(x) if (x) goto yes; else goto no @@ -2234,11 +2241,17 @@ typedef union re_unwind_t { #define POSCACHE_SUCCESS 0 /* caching success rather than failure */ #define POSCACHE_SEEN 1 /* we know what we're caching */ #define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */ + #define CACHEsayYES STMT_START { \ if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \ - if (!(PL_reg_poscache[0] & (1<u.whilem.cache_offset] |= (1<u.whilem.cache_bit); \ + } \ + else if (PL_reg_poscache[0] & (1<u.whilem.cache_offset] |= (1<u.whilem.cache_bit); \ + } \ + else { \ /* cache records failure, but this is success */ \ DEBUG_r( \ PerlIO_printf(Perl_debug_log, \ @@ -2250,11 +2263,17 @@ typedef union re_unwind_t { } \ sayYES; \ } STMT_END + #define CACHEsayNO STMT_START { \ if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \ - if (!(PL_reg_poscache[0] & (1<u.whilem.cache_offset] |= (1<u.whilem.cache_bit); \ + } \ + else if (!(PL_reg_poscache[0] & (1<u.whilem.cache_offset] |= (1<u.whilem.cache_bit); \ + } \ + else { \ /* cache records success, but this is failure */ \ DEBUG_r( \ PerlIO_printf(Perl_debug_log, \ @@ -2279,6 +2298,9 @@ typedef union re_unwind_t { /* Make sure there is a test for this +1 options in re_tests */ #define TRIE_INITAL_ACCEPT_BUFFLEN 4; +#define SLAB_FIRST(s) (&(s)->states[0]) +#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1]) + /* grab a new slab and return the first slot in it */ STATIC regmatch_state * @@ -2292,7 +2314,7 @@ S_push_slab(pTHX) PL_regmatch_slab->next = s; } PL_regmatch_slab = s; - return &s->states[0]; + return SLAB_FIRST(s); } /* simulate a recursive call to regmatch */ @@ -2316,7 +2338,7 @@ S_push_slab(pTHX) st->locinput = locinput; \ st->resume_state = resume; \ newst = st+1; \ - if (newst > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) \ + if (newst > SLAB_LAST(PL_regmatch_slab)) \ newst = S_push_slab(aTHX); \ PL_regmatch_state = newst; \ newst->cc = 0; \ @@ -2331,9 +2353,9 @@ S_push_slab(pTHX) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \ depth--; \ st--; \ - if (st < &PL_regmatch_slab->states[0]) { \ + if (st < SLAB_FIRST(PL_regmatch_slab)) { \ PL_regmatch_slab = PL_regmatch_slab->prev; \ - st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]; \ + st = SLAB_LAST(PL_regmatch_slab); \ } \ PL_regmatch_state = st; \ scan = st->scan; \ @@ -2449,12 +2471,14 @@ S_push_slab(pTHX) STATIC I32 /* 0 failure, 1 success */ -S_regmatch(pTHX_ regexp *rex, regnode *prog) +S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) { dVAR; register const bool do_utf8 = PL_reg_match_utf8; const U32 uniflags = UTF8_ALLOW_DEFAULT; + regexp *rex = reginfo->prog; + regmatch_slab *orig_slab; regmatch_state *orig_state; @@ -2473,7 +2497,8 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) regnode *inner; /* Next node in internal branch. */ int depth = 0; /* depth of recursion */ regmatch_state *newst; /* when pushing a state, this is the new one */ - regmatch_state *cur_eval = NULL; /* most recent (??{}) state */ + regmatch_state *yes_state = NULL; /* state to pop to on success of + subpattern */ #ifdef DEBUGGING SV *re_debug_flags = NULL; @@ -2486,7 +2511,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) Newx(PL_regmatch_slab, 1, regmatch_slab); PL_regmatch_slab->prev = NULL; PL_regmatch_slab->next = NULL; - PL_regmatch_state = &PL_regmatch_slab->states[0] - 1; + PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab); } /* remember current high-water mark for exit */ @@ -2496,7 +2521,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) /* grab next free state slot */ st = ++PL_regmatch_state; - if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) + if (st > SLAB_LAST(PL_regmatch_slab)) st = PL_regmatch_state = S_push_slab(aTHX); st->minmod = 0; @@ -2536,7 +2561,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) pref0_len = 0; if (pref0_len > pref_len) pref0_len = pref_len; - regprop(prop, scan); + regprop(rex, prop, scan); { const char * const s0 = do_utf8 && OP(scan) != CANY ? @@ -2583,7 +2608,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) case BOL: if (locinput == PL_bostr) { - /* regtill = regbol; */ + /* reginfo->till = reginfo->bol; */ break; } sayNO; @@ -2599,7 +2624,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) break; sayNO; case GPOS: - if (locinput == PL_reg_ganch) + if (locinput == reginfo->ganch) break; sayNO; case EOL: @@ -2678,7 +2703,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) /* what trie are we using right now */ reg_trie_data *trie - = (reg_trie_data*)PL_reg_re->data->data[ ARG( scan ) ]; + = (reg_trie_data*)rex->data->data[ ARG( scan ) ]; st->u.trie.accepted = 0; /* how many accepting states we have seen */ result = 0; @@ -2843,7 +2868,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) } DEBUG_EXECUTE_r({ reg_trie_data * const trie = (reg_trie_data*) - PL_reg_re->data->data[ARG(scan)]; + rex->data->data[ARG(scan)]; SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 ); PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n", REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], @@ -2985,7 +3010,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) if (do_utf8) { STRLEN inclasslen = PL_regeol - locinput; - if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8)) + if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8)) sayNO_ANYOF; if (locinput >= PL_regeol) sayNO; @@ -2996,7 +3021,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) else { if (nextchr < 0) nextchr = UCHARAT(locinput); - if (!REGINCLASS(scan, (U8*)locinput)) + if (!REGINCLASS(rex, scan, (U8*)locinput)) sayNO_ANYOF; if (!nextchr && locinput >= PL_regeol) sayNO; @@ -3298,12 +3323,11 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) OP_4tree * const oop = PL_op; COP * const ocurcop = PL_curcop; PAD *old_comppad; - struct regexp * const oreg = PL_reg_re; n = ARG(scan); - PL_op = (OP_4tree*)PL_reg_re->data->data[n]; + PL_op = (OP_4tree*)rex->data->data[n]; DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) ); - PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_reg_re->data->data[n + 2]); + PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]); PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr; CALLRUNOPS(aTHX); /* Scalar context. */ @@ -3318,7 +3342,6 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) PL_op = oop; PAD_RESTORE_LOCAL(old_comppad); PL_curcop = ocurcop; - PL_reg_re = oreg; if (!st->logical) { /* /(?{...})/ */ sv_setsv(save_scalar(PL_replgv), ret); @@ -3389,14 +3412,11 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) ((re->reganch & ROPT_UTF8) != 0); if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8; st->u.eval.prev_rex = rex; - assert(rex == PL_reg_re); /* XXX */ rex = re; - PL_reg_re = rex; /* XXX */ - st->u.eval.prev_eval = cur_eval; - st->u.eval.prev_slab = PL_regmatch_slab; - st->u.eval.depth = depth; - cur_eval = st; + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; PUSH_STATE(newst, resume_EVAL); st = newst; @@ -3522,6 +3542,18 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) /* No need to save/restore up to this paren */ I32 parenfloor = scan->flags; + /* Dave says: + + CURLYX and WHILEM are always paired: they're the moral + equivalent of pp_enteriter anbd pp_iter. + + The only time next could be null is if the node tree is + corrupt. This was mentioned on p5p a few days ago. + + See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html + So we'll assert that this is true: + */ + assert(next); if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */ next += ARG(next); /* XXXX Probably it is better to teach regpush to support @@ -3561,6 +3593,16 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) * that we can try again after backing off. */ + /* Dave says: + + st->cc gets initialised by CURLYX ready for use by WHILEM. + So again, unless somethings been corrupted, st->cc cannot + be null at that point in WHILEM. + + See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html + So we'll assert that this is true: + */ + assert(st->cc); st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */ st->u.whilem.cache_offset = 0; st->u.whilem.cache_bit = 0; @@ -3659,7 +3701,6 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) /* cache records failure */ sayNO_SILENT; } - PL_reg_poscache[st->u.whilem.cache_offset] |= (1<u.whilem.cache_bit); } } @@ -3778,7 +3819,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) { I32 type; type = OP(scan); - if (OP(next) != type) /* No choice. */ + if (!next || OP(next) != type) /* No choice. */ next = inner; /* Avoid recursion. */ else { const I32 lastparen = *PL_reglastparen; @@ -3795,6 +3836,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) uw->next = next; uw->locinput = locinput; uw->nextchr = nextchr; + uw->minmod = st->minmod; #ifdef DEBUGGING uw->regindent = ++PL_regindent; #endif @@ -3832,7 +3874,11 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) st->u.curlym.maxwanted = st->minmod ? st->ln : n; if (st->u.curlym.maxwanted) { while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) { + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; REGMATCH(scan, CURLYM1); + yes_state = st->u.yes.prev_yes_state; /*** all unsaved local vars undefined at this point */ if (!result) break; @@ -3902,15 +3948,24 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) else PL_regendp[st->u.curlym.paren] = -1; } + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; REGMATCH(next, CURLYM2); + yes_state = st->u.yes.prev_yes_state; /*** all unsaved local vars undefined at this point */ if (result) - sayYES; + /* XXX tmp sayYES; */ + sayYES_FINAL; REGCP_UNWIND(st->u.curlym.lastcp); } /* Couldn't or didn't -- move forward. */ PL_reginput = locinput; + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; REGMATCH(scan, CURLYM3); + yes_state = st->u.yes.prev_yes_state; /*** all unsaved local vars undefined at this point */ if (result) { st->ln++; @@ -3975,10 +4030,15 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) else PL_regendp[st->u.curlym.paren] = -1; } + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; REGMATCH(next, CURLYM4); + yes_state = st->u.yes.prev_yes_state; /*** all unsaved local vars undefined at this point */ if (result) - sayYES; + /* XXX tmp sayYES; */ + sayYES_FINAL; REGCP_UNWIND(st->u.curlym.lastcp); } /* Couldn't or didn't -- back up. */ @@ -4078,7 +4138,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) PL_reginput = locinput; if (st->minmod) { st->minmod = 0; - if (st->ln && regrepeat(scan, st->ln) < st->ln) + if (st->ln && regrepeat(rex, scan, st->ln) < st->ln) sayNO; locinput = PL_reginput; REGCP_SET(st->u.plus.lastcp); @@ -4150,7 +4210,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) /* PL_reginput == old now */ if (locinput != st->u.plus.old) { st->ln = 1; /* Did some */ - if (regrepeat(scan, st->u.plus.count) < st->u.plus.count) + if (regrepeat(rex, scan, st->u.plus.count) < st->u.plus.count) sayNO; } /* PL_reginput == locinput now */ @@ -4194,7 +4254,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) } /* Couldn't or didn't -- move forward. */ PL_reginput = locinput; - if (regrepeat(scan, 1)) { + if (regrepeat(rex, scan, 1)) { st->ln++; locinput = PL_reginput; } @@ -4203,7 +4263,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) } } else { - n = regrepeat(scan, n); + n = regrepeat(rex, scan, n); locinput = PL_reginput; if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL && (OP(next) != MEOL || @@ -4244,114 +4304,68 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) sayNO; break; case END: - if (cur_eval) { - /* we have successfully completed the execution of a - * postponed re. Pop all states back to the last EVAL - * then continue with the node following the (??{...}) - */ - - /* this simulates a POP_STATE, except that it pops several - * levels, and doesn't restore locinput */ - - st = cur_eval; - PL_regmatch_slab = st->u.eval.prev_slab; - cur_eval = st->u.eval.prev_eval; - depth = st->u.eval.depth; - - PL_regmatch_state = st; - scan = st->scan; - next = st->next; - n = st->n; - - if (st->u.eval.toggleutf) - PL_reg_flags ^= RF_utf8; - ReREFCNT_dec(rex); - rex = st->u.eval.prev_rex; - PL_reg_re = rex; /* XXX */ - /* XXXX This is too dramatic a measure... */ - PL_reg_maxiter = 0; - - /* Restore parens of the caller without popping the - * savestack */ - { - I32 tmp = PL_savestack_ix; - PL_savestack_ix = st->u.eval.lastcp; - regcppop(rex); - PL_savestack_ix = tmp; - } - - - PL_reginput = locinput; - /* resume at node following the (??{...}) */ - break; - - } - - if (locinput < PL_regtill) { + if (locinput < reginfo->till) { DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n", PL_colors[4], (long)(locinput - PL_reg_starttry), - (long)(PL_regtill - PL_reg_starttry), + (long)(reginfo->till - PL_reg_starttry), PL_colors[5])); sayNO_FINAL; /* Cannot match: too short. */ } PL_reginput = locinput; /* put where regtry can find it */ sayYES_FINAL; /* Success! */ - case SUCCEED: + + case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */ + DEBUG_EXECUTE_r( + PerlIO_printf(Perl_debug_log, + "%*s %ssubpattern success...%s\n", + REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])); PL_reginput = locinput; /* put where regtry can find it */ - sayYES_LOUD; /* Success! */ - case SUSPEND: - n = 1; + sayYES_FINAL; /* Success! */ + + case SUSPEND: /* (?>FOO) */ + st->u.ifmatch.wanted = 1; PL_reginput = locinput; goto do_ifmatch; - case UNLESSM: - n = 0; - if (scan->flags) { - char * const s = HOPBACKc(locinput, scan->flags); - if (!s) - goto say_yes; - PL_reginput = s; - } - else - PL_reginput = locinput; - goto do_ifmatch; - case IFMATCH: - n = 1; + + case UNLESSM: /* -ve lookaround: (?!FOO), or with flags, (?u.ifmatch.wanted = 0; + goto ifmatch_trivial_fail_test; + + case IFMATCH: /* +ve lookaround: (?=FOO), or with flags, (?<=foo) */ + st->u.ifmatch.wanted = 1; + ifmatch_trivial_fail_test: if (scan->flags) { char * const s = HOPBACKc(locinput, scan->flags); - if (!s) - goto say_no; + if (!s) { + /* trivial fail */ + if (st->logical) { + st->logical = 0; + st->sw = 1 - st->u.ifmatch.wanted; + } + else if (st->u.ifmatch.wanted) + sayNO; + next = scan + ARG(scan); + if (next == scan) + next = NULL; + break; + } PL_reginput = s; } else PL_reginput = locinput; do_ifmatch: - REGMATCH(NEXTOPER(NEXTOPER(scan)), IFMATCH); - /*** all unsaved local vars undefined at this point */ - if (result != n) { - say_no: - if (st->logical) { - st->logical = 0; - st->sw = 0; - goto do_longjump; - } - else - sayNO; - } - say_yes: - if (st->logical) { - st->logical = 0; - st->sw = 1; - } - if (OP(scan) == SUSPEND) { - locinput = PL_reginput; - nextchr = UCHARAT(locinput); - } - /* FALL THROUGH. */ + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; + PUSH_STATE(newst, resume_IFMATCH); + st = newst; + next = NEXTOPER(NEXTOPER(scan)); + break; + case LONGJMP: - do_longjump: next = scan + ARG(scan); if (next == scan) next = NULL; @@ -4381,7 +4395,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) /* grab the next free state slot */ st++; - if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) + if (st > SLAB_LAST(PL_regmatch_slab)) st = S_push_slab(aTHX); PL_regmatch_state = st; @@ -4412,14 +4426,90 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) /*NOTREACHED*/ sayNO; -yes_loud: - DEBUG_EXECUTE_r( - PerlIO_printf(Perl_debug_log, - "%*s %scould match...%s\n", - REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5]) - ); - goto yes; yes_final: + + if (yes_state) { + /* we have successfully completed a subexpression, but we must now + * pop to the state marked by yes_state and continue from there */ + + /*XXX tmp for CURLYM*/ + regmatch_slab *oslab = PL_regmatch_slab; + regmatch_state *ost = st, *oys=yes_state; + int odepth = depth; + + assert(st != yes_state); + while (yes_state < SLAB_FIRST(PL_regmatch_slab) + || yes_state > SLAB_LAST(PL_regmatch_slab)) + { + /* not in this slab, pop slab */ + depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1); + PL_regmatch_slab = PL_regmatch_slab->prev; + st = SLAB_LAST(PL_regmatch_slab); + } + depth -= (st - yes_state); + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE TO (%d)\n", depth)); + st = yes_state; + yes_state = st->u.yes.prev_yes_state; + PL_regmatch_state = st; + + switch (st->resume_state) { + case resume_EVAL: + if (st->u.eval.toggleutf) + PL_reg_flags ^= RF_utf8; + ReREFCNT_dec(rex); + rex = st->u.eval.prev_rex; + /* XXXX This is too dramatic a measure... */ + PL_reg_maxiter = 0; + /* Restore parens of the caller without popping the + * savestack */ + { + I32 tmp = PL_savestack_ix; + PL_savestack_ix = st->u.eval.lastcp; + regcppop(rex); + PL_savestack_ix = tmp; + } + PL_reginput = locinput; + /* continue at the node following the (??{...}) */ + next = st->next; + goto reenter; + + case resume_IFMATCH: + if (st->logical) { + st->logical = 0; + st->sw = st->u.ifmatch.wanted; + } + else if (!st->u.ifmatch.wanted) + sayNO; + + if (OP(st->scan) == SUSPEND) + locinput = PL_reginput; + else { + locinput = PL_reginput = st->locinput; + nextchr = UCHARAT(locinput); + } + next = st->scan + ARG(st->scan); + if (next == st->scan) + next = NULL; + goto reenter; + + /* XXX tmp don't handle yes_state yet */ + case resume_CURLYM1: + case resume_CURLYM2: + case resume_CURLYM3: + case resume_CURLYM4: + PL_regmatch_slab =oslab; + st = ost; + PL_regmatch_state = st; + depth = odepth; + yes_state = oys; + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "XXX revering a CURLYM\n")); + goto yes; + + default: + Perl_croak(aTHX_ "unexpected yes reume state"); + } + } + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n", PL_colors[4], PL_colors[5])); yes: @@ -4429,7 +4519,8 @@ yes: result = 1; /* XXX this is duplicate(ish) code to that in the do_no section. - * eventually a yes should just pop the whole stack */ + * eventually a yes should just pop the stack back to the current + * yes_state */ if (depth) { /* restore previous state and re-enter */ POP_STATE; @@ -4439,8 +4530,6 @@ yes: goto resume_point_TRIE1; case resume_TRIE2: goto resume_point_TRIE2; - case resume_EVAL: - break; case resume_CURLYX: goto resume_point_CURLYX; case resume_WHILEM1: @@ -4463,8 +4552,6 @@ yes: goto resume_point_CURLYM3; case resume_CURLYM4: goto resume_point_CURLYM4; - case resume_IFMATCH: - goto resume_point_IFMATCH; case resume_PLUS1: goto resume_point_PLUS1; case resume_PLUS2: @@ -4473,6 +4560,9 @@ yes: goto resume_point_PLUS3; case resume_PLUS4: goto resume_point_PLUS4; + + case resume_IFMATCH: + case resume_EVAL: default: Perl_croak(aTHX_ "regexp resume memory corruption"); } @@ -4503,6 +4593,7 @@ do_no: PL_regendp[n] = -1; *PL_reglastparen = n; scan = next = uwb->next; + st->minmod = uwb->minmod; if ( !scan || OP(scan) != (uwb->type == RE_UNWIND_BRANCH ? BRANCH : BRANCHJ) ) { /* Failure */ @@ -4557,8 +4648,7 @@ do_no: PL_reg_flags ^= RF_utf8; ReREFCNT_dec(rex); rex = st->u.eval.prev_rex; - PL_reg_re = rex; /* XXX */ - cur_eval = st->u.eval.prev_eval; + yes_state = st->u.yes.prev_yes_state; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; @@ -4591,7 +4681,22 @@ do_no: case resume_CURLYM4: goto resume_point_CURLYM4; case resume_IFMATCH: - goto resume_point_IFMATCH; + yes_state = st->u.yes.prev_yes_state; + if (st->logical) { + st->logical = 0; + st->sw = !st->u.ifmatch.wanted; + } + else if (st->u.ifmatch.wanted) + sayNO; + + assert(OP(scan) != SUSPEND); /* XXX DAPM tmp */ + locinput = PL_reginput = st->locinput; + nextchr = UCHARAT(locinput); + next = scan + ARG(scan); + if (next == scan) + next = NULL; + goto reenter; + case resume_PLUS1: goto resume_point_PLUS1; case resume_PLUS2: @@ -4635,7 +4740,7 @@ final_exit: * rather than incrementing count on every character. [Er, except utf8.]] */ STATIC I32 -S_regrepeat(pTHX_ const regnode *p, I32 max) +S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max) { dVAR; register char *scan; @@ -4698,12 +4803,12 @@ S_regrepeat(pTHX_ const regnode *p, I32 max) if (do_utf8) { loceol = PL_regeol; while (hardcount < max && scan < loceol && - reginclass(p, (U8*)scan, 0, do_utf8)) { + reginclass(prog, p, (U8*)scan, 0, do_utf8)) { scan += UTF8SKIP(scan); hardcount++; } } else { - while (scan < loceol && REGINCLASS(p, (U8*)scan)) + while (scan < loceol && REGINCLASS(prog, p, (U8*)scan)) scan++; } break; @@ -4864,7 +4969,7 @@ S_regrepeat(pTHX_ const regnode *p, I32 max) SV * const prop = sv_newmortal(); GET_RE_DEBUG_FLAGS; DEBUG_EXECUTE_r({ - regprop(prop, p); + regprop(prog, prop, p); PerlIO_printf(Perl_debug_log, "%*s %s can match %"IVdf" times out of %"IVdf"...\n", REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max); @@ -4880,13 +4985,13 @@ S_regrepeat(pTHX_ const regnode *p, I32 max) */ SV * -Perl_regclass_swash(pTHX_ register const regnode* node, bool doinit, SV** listsvp, SV **altsvp) +Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp) { dVAR; SV *sw = NULL; SV *si = NULL; SV *alt = NULL; - const struct reg_data *data = PL_reg_re ? PL_reg_re->data : NULL; + const struct reg_data *data = prog ? prog->data : NULL; if (data && data->count) { const U32 n = ARG(node); @@ -4934,7 +5039,7 @@ Perl_regclass_swash(pTHX_ register const regnode* node, bool doinit, SV** listsv */ STATIC bool -S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8) +S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8) { dVAR; const char flags = ANYOF_FLAGS(n); @@ -4963,7 +5068,7 @@ S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp match = TRUE; if (!match) { AV *av; - SV * const sw = regclass_swash(n, TRUE, 0, (SV**)&av); + SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av); if (sw) { if (swash_fetch(sw, p, do_utf8))