X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=747b61be3bfb6383d24d20a75045700907255474;hb=ce5e94717f361c3fd6b9b0fb704412d30f3ccf66;hp=87c0e993e1f784e0d856cc9cd6b9f60961fa4c65;hpb=30b2893d4b20c43880f1a64192bda137a40ee39f;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index 87c0e99..747b61b 100644 --- a/regexec.c +++ b/regexec.c @@ -31,28 +31,7 @@ */ #ifdef PERL_EXT_RE_BUILD -/* need to replace pregcomp et al, so enable that */ -# ifndef PERL_IN_XSUB_RE -# define PERL_IN_XSUB_RE -# endif -/* need access to debugger hooks */ -# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING) -# define DEBUGGING -# endif -#endif - -#ifdef PERL_IN_XSUB_RE -/* We *really* need to overwrite these symbols: */ -# define Perl_regexec_flags my_regexec -# define Perl_regdump my_regdump -# define Perl_regprop my_regprop -# define Perl_re_intuit_start my_re_intuit_start -/* *These* symbols are masked to allow static link. */ -# define Perl_pregexec my_pregexec -# define Perl_reginitcolors my_reginitcolors -# define Perl_regclass_swash my_regclass_swash - -# define PERL_NO_GET_CONTEXT +#include "re_top.h" #endif /* @@ -91,7 +70,11 @@ #define PERL_IN_REGEXEC_C #include "perl.h" -#include "regcomp.h" +#ifdef PERL_IN_XSUB_RE +# include "re_comp.h" +#else +# include "regcomp.h" +#endif #define RF_tainted 1 /* tainted information used? */ #define RF_warned 2 /* warned about big count? */ @@ -299,6 +282,7 @@ S_regcppop(pTHX_ const regexp *rex) * pregexec and friends */ +#ifndef PERL_IN_XSUB_RE /* - pregexec - match a regexp against a string */ @@ -314,7 +298,7 @@ Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *stren regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL, nosave ? 0 : REXEC_COPY_STR); } - +#endif /* * Need to implement the following flags for reg_anch: @@ -2234,7 +2218,6 @@ typedef union re_unwind_t { #define sayNO goto no #define sayNO_ANYOF goto no_anyof #define sayYES_FINAL goto yes_final -#define sayYES_LOUD goto yes_loud #define sayNO_FINAL goto no_final #define sayNO_SILENT goto do_no #define saySAME(x) if (x) goto yes; else goto no @@ -2242,11 +2225,17 @@ typedef union re_unwind_t { #define POSCACHE_SUCCESS 0 /* caching success rather than failure */ #define POSCACHE_SEEN 1 /* we know what we're caching */ #define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */ + #define CACHEsayYES STMT_START { \ if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \ - if (!(PL_reg_poscache[0] & (1<u.whilem.cache_offset] |= (1<u.whilem.cache_bit); \ + } \ + else if (PL_reg_poscache[0] & (1<u.whilem.cache_offset] |= (1<u.whilem.cache_bit); \ + } \ + else { \ /* cache records failure, but this is success */ \ DEBUG_r( \ PerlIO_printf(Perl_debug_log, \ @@ -2258,11 +2247,17 @@ typedef union re_unwind_t { } \ sayYES; \ } STMT_END + #define CACHEsayNO STMT_START { \ if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \ - if (!(PL_reg_poscache[0] & (1<u.whilem.cache_offset] |= (1<u.whilem.cache_bit); \ + } \ + else if (!(PL_reg_poscache[0] & (1<u.whilem.cache_offset] |= (1<u.whilem.cache_bit); \ + } \ + else { \ /* cache records success, but this is failure */ \ DEBUG_r( \ PerlIO_printf(Perl_debug_log, \ @@ -2287,11 +2282,20 @@ typedef union re_unwind_t { /* Make sure there is a test for this +1 options in re_tests */ #define TRIE_INITAL_ACCEPT_BUFFLEN 4; +/* this value indiciates that the c1/c2 "next char" test should be skipped */ +#define CHRTEST_VOID -1000 + +#define SLAB_FIRST(s) (&(s)->states[0]) +#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1]) + /* grab a new slab and return the first slot in it */ STATIC regmatch_state * S_push_slab(pTHX) { +#if PERL_VERSION < 9 + dMY_CXT; +#endif regmatch_slab *s = PL_regmatch_slab->next; if (!s) { Newx(s, 1, regmatch_slab); @@ -2300,7 +2304,7 @@ S_push_slab(pTHX) PL_regmatch_slab->next = s; } PL_regmatch_slab = s; - return &s->states[0]; + return SLAB_FIRST(s); } /* simulate a recursive call to regmatch */ @@ -2324,7 +2328,7 @@ S_push_slab(pTHX) st->locinput = locinput; \ st->resume_state = resume; \ newst = st+1; \ - if (newst > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) \ + if (newst > SLAB_LAST(PL_regmatch_slab)) \ newst = S_push_slab(aTHX); \ PL_regmatch_state = newst; \ newst->cc = 0; \ @@ -2339,9 +2343,9 @@ S_push_slab(pTHX) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \ depth--; \ st--; \ - if (st < &PL_regmatch_slab->states[0]) { \ + if (st < SLAB_FIRST(PL_regmatch_slab)) { \ PL_regmatch_slab = PL_regmatch_slab->prev; \ - st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]; \ + st = SLAB_LAST(PL_regmatch_slab); \ } \ PL_regmatch_state = st; \ scan = st->scan; \ @@ -2459,6 +2463,9 @@ S_push_slab(pTHX) STATIC I32 /* 0 failure, 1 success */ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) { +#if PERL_VERSION < 9 + dMY_CXT; +#endif dVAR; register const bool do_utf8 = PL_reg_match_utf8; const U32 uniflags = UTF8_ALLOW_DEFAULT; @@ -2483,7 +2490,8 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) regnode *inner; /* Next node in internal branch. */ int depth = 0; /* depth of recursion */ regmatch_state *newst; /* when pushing a state, this is the new one */ - regmatch_state *cur_eval = NULL; /* most recent (??{}) state */ + regmatch_state *yes_state = NULL; /* state to pop to on success of + subpattern */ #ifdef DEBUGGING SV *re_debug_flags = NULL; @@ -2496,7 +2504,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) Newx(PL_regmatch_slab, 1, regmatch_slab); PL_regmatch_slab->prev = NULL; PL_regmatch_slab->next = NULL; - PL_regmatch_state = &PL_regmatch_slab->states[0] - 1; + PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab); } /* remember current high-water mark for exit */ @@ -2506,7 +2514,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) /* grab next free state slot */ st = ++PL_regmatch_state; - if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) + if (st > SLAB_LAST(PL_regmatch_slab)) st = PL_regmatch_state = S_push_slab(aTHX); st->minmod = 0; @@ -3399,10 +3407,9 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) st->u.eval.prev_rex = rex; rex = re; - st->u.eval.prev_eval = cur_eval; - st->u.eval.prev_slab = PL_regmatch_slab; - st->u.eval.depth = depth; - cur_eval = st; + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; PUSH_STATE(newst, resume_EVAL); st = newst; @@ -3528,6 +3535,18 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) /* No need to save/restore up to this paren */ I32 parenfloor = scan->flags; + /* Dave says: + + CURLYX and WHILEM are always paired: they're the moral + equivalent of pp_enteriter anbd pp_iter. + + The only time next could be null is if the node tree is + corrupt. This was mentioned on p5p a few days ago. + + See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html + So we'll assert that this is true: + */ + assert(next); if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */ next += ARG(next); /* XXXX Probably it is better to teach regpush to support @@ -3567,6 +3586,16 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) * that we can try again after backing off. */ + /* Dave says: + + st->cc gets initialised by CURLYX ready for use by WHILEM. + So again, unless somethings been corrupted, st->cc cannot + be null at that point in WHILEM. + + See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html + So we'll assert that this is true: + */ + assert(st->cc); st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */ st->u.whilem.cache_offset = 0; st->u.whilem.cache_bit = 0; @@ -3665,7 +3694,6 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) /* cache records failure */ sayNO_SILENT; } - PL_reg_poscache[st->u.whilem.cache_offset] |= (1<u.whilem.cache_bit); } } @@ -3837,158 +3865,124 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) scan += NEXT_OFF(scan); /* Skip former OPEN. */ PL_reginput = locinput; st->u.curlym.maxwanted = st->minmod ? st->ln : n; - if (st->u.curlym.maxwanted) { - while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) { - REGMATCH(scan, CURLYM1); - /*** all unsaved local vars undefined at this point */ - if (!result) - break; - /* on first match, determine length, u.curlym.l */ - if (!st->u.curlym.matches++) { - if (PL_reg_match_utf8) { - char *s = locinput; - while (s < PL_reginput) { - st->u.curlym.l++; - s += UTF8SKIP(s); - } - } - else { - st->u.curlym.l = PL_reginput - locinput; - } - if (st->u.curlym.l == 0) { - st->u.curlym.matches = st->u.curlym.maxwanted; - break; + while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) { + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; + REGMATCH(scan, CURLYM1); + yes_state = st->u.yes.prev_yes_state; + /*** all unsaved local vars undefined at this point */ + if (!result) + break; + /* on first match, determine length, u.curlym.l */ + if (!st->u.curlym.matches++) { + if (PL_reg_match_utf8) { + char *s = locinput; + while (s < PL_reginput) { + st->u.curlym.l++; + s += UTF8SKIP(s); } } - locinput = PL_reginput; + else { + st->u.curlym.l = PL_reginput - locinput; + } + if (st->u.curlym.l == 0) { + st->u.curlym.matches = st->u.curlym.maxwanted; + break; + } } + locinput = PL_reginput; } PL_reginput = locinput; - - if (st->minmod) { + if (st->u.curlym.matches < st->ln) { st->minmod = 0; - if (st->ln && st->u.curlym.matches < st->ln) - sayNO; - if (HAS_TEXT(next) || JUMPABLE(next)) { - regnode *text_node = next; + sayNO; + } - if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node); + DEBUG_EXECUTE_r( + PerlIO_printf(Perl_debug_log, + "%*s matched %"IVdf" times, len=%"IVdf"...\n", + (int)(REPORT_CODE_OFF+PL_regindent*2), "", + (IV) st->u.curlym.matches, (IV)st->u.curlym.l) + ); + + /* calculate c1 and c1 for possible match of 1st char + * following curly */ + st->u.curlym.c1 = st->u.curlym.c2 = CHRTEST_VOID; + if (HAS_TEXT(next) || JUMPABLE(next)) { + regnode *text_node = next; + if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node); + if (HAS_TEXT(text_node) + && PL_regkind[(U8)OP(text_node)] != REF) + { + st->u.curlym.c1 = (U8)*STRING(text_node); + st->u.curlym.c2 = + (OP(text_node) == EXACTF || OP(text_node) == REFF) + ? PL_fold[st->u.curlym.c1] + : (OP(text_node) == EXACTFL || OP(text_node) == REFFL) + ? PL_fold_locale[st->u.curlym.c1] + : st->u.curlym.c1; + } + } - if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000; - else { - if (PL_regkind[(U8)OP(text_node)] == REF) { - st->u.curlym.c1 = st->u.curlym.c2 = -1000; - goto assume_ok_MM; + REGCP_SET(st->u.curlym.lastcp); + + st->u.curlym.minmod = st->minmod; + st->minmod = 0; + while (st->u.curlym.matches >= st->ln + && (st->u.curlym.matches <= n + /* for REG_INFTY, ln could overflow to negative */ + || (n == REG_INFTY && st->u.curlym.matches >= 0))) + { + /* If it could work, try it. */ + if (st->u.curlym.c1 == CHRTEST_VOID || + UCHARAT(PL_reginput) == st->u.curlym.c1 || + UCHARAT(PL_reginput) == st->u.curlym.c2) + { + DEBUG_EXECUTE_r( + PerlIO_printf(Perl_debug_log, + "%*s trying tail with matches=%"IVdf"...\n", + (int)(REPORT_CODE_OFF+PL_regindent*2), + "", (IV)st->u.curlym.matches) + ); + if (st->u.curlym.paren) { + if (st->u.curlym.matches) { + PL_regstartp[st->u.curlym.paren] + = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr; + PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr; } - else { st->u.curlym.c1 = (U8)*STRING(text_node); } - if (OP(text_node) == EXACTF || OP(text_node) == REFF) - st->u.curlym.c2 = PL_fold[st->u.curlym.c1]; - else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL) - st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1]; else - st->u.curlym.c2 = st->u.curlym.c1; + PL_regendp[st->u.curlym.paren] = -1; } + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; + REGMATCH(next, CURLYM2); + yes_state = st->u.yes.prev_yes_state; + /*** all unsaved local vars undefined at this point */ + if (result) + /* XXX tmp sayYES; */ + sayYES_FINAL; + REGCP_UNWIND(st->u.curlym.lastcp); } - else - st->u.curlym.c1 = st->u.curlym.c2 = -1000; - assume_ok_MM: - REGCP_SET(st->u.curlym.lastcp); - while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */ - /* If it could work, try it. */ - if (st->u.curlym.c1 == -1000 || - UCHARAT(PL_reginput) == st->u.curlym.c1 || - UCHARAT(PL_reginput) == st->u.curlym.c2) - { - if (st->u.curlym.paren) { - if (st->ln) { - PL_regstartp[st->u.curlym.paren] = - HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr; - PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr; - } - else - PL_regendp[st->u.curlym.paren] = -1; - } - REGMATCH(next, CURLYM2); - /*** all unsaved local vars undefined at this point */ - if (result) - sayYES; - REGCP_UNWIND(st->u.curlym.lastcp); - } - /* Couldn't or didn't -- move forward. */ + /* Couldn't or didn't -- move forward/backward. */ + if (st->u.curlym.minmod) { PL_reginput = locinput; + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; REGMATCH(scan, CURLYM3); + yes_state = st->u.yes.prev_yes_state; /*** all unsaved local vars undefined at this point */ if (result) { - st->ln++; + st->u.curlym.matches++; locinput = PL_reginput; } else sayNO; } - } - else { - DEBUG_EXECUTE_r( - PerlIO_printf(Perl_debug_log, - "%*s matched %"IVdf" times, len=%"IVdf"...\n", - (int)(REPORT_CODE_OFF+PL_regindent*2), "", - (IV) st->u.curlym.matches, (IV)st->u.curlym.l) - ); - if (st->u.curlym.matches >= st->ln) { - if (HAS_TEXT(next) || JUMPABLE(next)) { - regnode *text_node = next; - - if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node); - - if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000; - else { - if (PL_regkind[(U8)OP(text_node)] == REF) { - st->u.curlym.c1 = st->u.curlym.c2 = -1000; - goto assume_ok_REG; - } - else { st->u.curlym.c1 = (U8)*STRING(text_node); } - - if (OP(text_node) == EXACTF || OP(text_node) == REFF) - st->u.curlym.c2 = PL_fold[st->u.curlym.c1]; - else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL) - st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1]; - else - st->u.curlym.c2 = st->u.curlym.c1; - } - } - else - st->u.curlym.c1 = st->u.curlym.c2 = -1000; - } - assume_ok_REG: - REGCP_SET(st->u.curlym.lastcp); - while (st->u.curlym.matches >= st->ln) { - /* If it could work, try it. */ - if (st->u.curlym.c1 == -1000 || - UCHARAT(PL_reginput) == st->u.curlym.c1 || - UCHARAT(PL_reginput) == st->u.curlym.c2) - { - DEBUG_EXECUTE_r( - PerlIO_printf(Perl_debug_log, - "%*s trying tail with matches=%"IVdf"...\n", - (int)(REPORT_CODE_OFF+PL_regindent*2), - "", (IV)st->u.curlym.matches) - ); - if (st->u.curlym.paren) { - if (st->u.curlym.matches) { - PL_regstartp[st->u.curlym.paren] - = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr; - PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr; - } - else - PL_regendp[st->u.curlym.paren] = -1; - } - REGMATCH(next, CURLYM4); - /*** all unsaved local vars undefined at this point */ - if (result) - sayYES; - REGCP_UNWIND(st->u.curlym.lastcp); - } - /* Couldn't or didn't -- back up. */ + else { st->u.curlym.matches--; locinput = HOPc(locinput, -st->u.curlym.l); PL_reginput = locinput; @@ -4043,10 +4037,11 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node); - if (! HAS_TEXT(text_node)) st->u.plus.c1 = st->u.plus.c2 = -1000; + if (! HAS_TEXT(text_node)) + st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID; else { if (PL_regkind[(U8)OP(text_node)] == REF) { - st->u.plus.c1 = st->u.plus.c2 = -1000; + st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID; goto assume_ok_easy; } else { s = (U8*)STRING(text_node); } @@ -4080,7 +4075,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) } } else - st->u.plus.c1 = st->u.plus.c2 = -1000; + st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID; assume_ok_easy: PL_reginput = locinput; if (st->minmod) { @@ -4089,7 +4084,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) sayNO; locinput = PL_reginput; REGCP_SET(st->u.plus.lastcp); - if (st->u.plus.c1 != -1000) { + if (st->u.plus.c1 != CHRTEST_VOID) { st->u.plus.old = locinput; st->u.plus.count = 0; @@ -4138,11 +4133,11 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) st->u.plus.count++; } } else { - STRLEN len; /* count initialised to * utf8_distance(old, locinput) */ while (locinput <= st->u.plus.e) { - UV c = utf8n_to_uvchr((U8*)locinput, + STRLEN len; + const UV c = utf8n_to_uvchr((U8*)locinput, UTF8_MAXBYTES, &len, uniflags); if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2) @@ -4177,7 +4172,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) else while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */ UV c; - if (st->u.plus.c1 != -1000) { + if (st->u.plus.c1 != CHRTEST_VOID) { if (do_utf8) c = utf8n_to_uvchr((U8*)PL_reginput, UTF8_MAXBYTES, 0, @@ -4193,7 +4188,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) } } /* If it could work, try it. */ - else if (st->u.plus.c1 == -1000) + else if (st->u.plus.c1 == CHRTEST_VOID) { TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3); /*** all unsaved local vars undefined at this point */ @@ -4227,7 +4222,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) { UV c = 0; while (n >= st->ln) { - if (st->u.plus.c1 != -1000) { + if (st->u.plus.c1 != CHRTEST_VOID) { if (do_utf8) c = utf8n_to_uvchr((U8*)PL_reginput, UTF8_MAXBYTES, 0, @@ -4236,7 +4231,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) c = UCHARAT(PL_reginput); } /* If it could work, try it. */ - if (st->u.plus.c1 == -1000 || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2) + if (st->u.plus.c1 == CHRTEST_VOID || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2) { TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4); /*** all unsaved local vars undefined at this point */ @@ -4251,48 +4246,6 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) sayNO; break; case END: - if (cur_eval) { - /* we have successfully completed the execution of a - * postponed re. Pop all states back to the last EVAL - * then continue with the node following the (??{...}) - */ - - /* this simulates a POP_STATE, except that it pops several - * levels, and doesn't restore locinput */ - - st = cur_eval; - PL_regmatch_slab = st->u.eval.prev_slab; - cur_eval = st->u.eval.prev_eval; - depth = st->u.eval.depth; - - PL_regmatch_state = st; - scan = st->scan; - next = st->next; - n = st->n; - - if (st->u.eval.toggleutf) - PL_reg_flags ^= RF_utf8; - ReREFCNT_dec(rex); - rex = st->u.eval.prev_rex; - /* XXXX This is too dramatic a measure... */ - PL_reg_maxiter = 0; - - /* Restore parens of the caller without popping the - * savestack */ - { - I32 tmp = PL_savestack_ix; - PL_savestack_ix = st->u.eval.lastcp; - regcppop(rex); - PL_savestack_ix = tmp; - } - - - PL_reginput = locinput; - /* resume at node following the (??{...}) */ - break; - - } - if (locinput < reginfo->till) { DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n", @@ -4304,60 +4257,57 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) } PL_reginput = locinput; /* put where regtry can find it */ sayYES_FINAL; /* Success! */ - case SUCCEED: + + case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */ + DEBUG_EXECUTE_r( + PerlIO_printf(Perl_debug_log, + "%*s %ssubpattern success...%s\n", + REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])); PL_reginput = locinput; /* put where regtry can find it */ - sayYES_LOUD; /* Success! */ - case SUSPEND: - n = 1; + sayYES_FINAL; /* Success! */ + + case SUSPEND: /* (?>FOO) */ + st->u.ifmatch.wanted = 1; PL_reginput = locinput; goto do_ifmatch; - case UNLESSM: - n = 0; - if (scan->flags) { - char * const s = HOPBACKc(locinput, scan->flags); - if (!s) - goto say_yes; - PL_reginput = s; - } - else - PL_reginput = locinput; - goto do_ifmatch; - case IFMATCH: - n = 1; + + case UNLESSM: /* -ve lookaround: (?!FOO), or with flags, (?u.ifmatch.wanted = 0; + goto ifmatch_trivial_fail_test; + + case IFMATCH: /* +ve lookaround: (?=FOO), or with flags, (?<=foo) */ + st->u.ifmatch.wanted = 1; + ifmatch_trivial_fail_test: if (scan->flags) { char * const s = HOPBACKc(locinput, scan->flags); - if (!s) - goto say_no; + if (!s) { + /* trivial fail */ + if (st->logical) { + st->logical = 0; + st->sw = 1 - st->u.ifmatch.wanted; + } + else if (st->u.ifmatch.wanted) + sayNO; + next = scan + ARG(scan); + if (next == scan) + next = NULL; + break; + } PL_reginput = s; } else PL_reginput = locinput; do_ifmatch: - REGMATCH(NEXTOPER(NEXTOPER(scan)), IFMATCH); - /*** all unsaved local vars undefined at this point */ - if (result != n) { - say_no: - if (st->logical) { - st->logical = 0; - st->sw = 0; - goto do_longjump; - } - else - sayNO; - } - say_yes: - if (st->logical) { - st->logical = 0; - st->sw = 1; - } - if (OP(scan) == SUSPEND) { - locinput = PL_reginput; - nextchr = UCHARAT(locinput); - } - /* FALL THROUGH. */ + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; + PUSH_STATE(newst, resume_IFMATCH); + st = newst; + next = NEXTOPER(NEXTOPER(scan)); + break; + case LONGJMP: - do_longjump: next = scan + ARG(scan); if (next == scan) next = NULL; @@ -4387,7 +4337,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) /* grab the next free state slot */ st++; - if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) + if (st > SLAB_LAST(PL_regmatch_slab)) st = S_push_slab(aTHX); PL_regmatch_state = st; @@ -4418,14 +4368,90 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) /*NOTREACHED*/ sayNO; -yes_loud: - DEBUG_EXECUTE_r( - PerlIO_printf(Perl_debug_log, - "%*s %scould match...%s\n", - REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5]) - ); - goto yes; yes_final: + + if (yes_state) { + /* we have successfully completed a subexpression, but we must now + * pop to the state marked by yes_state and continue from there */ + + /*XXX tmp for CURLYM*/ + regmatch_slab * const oslab = PL_regmatch_slab; + regmatch_state * const ost = st; + regmatch_state * const oys = yes_state; + int odepth = depth; + + assert(st != yes_state); + while (yes_state < SLAB_FIRST(PL_regmatch_slab) + || yes_state > SLAB_LAST(PL_regmatch_slab)) + { + /* not in this slab, pop slab */ + depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1); + PL_regmatch_slab = PL_regmatch_slab->prev; + st = SLAB_LAST(PL_regmatch_slab); + } + depth -= (st - yes_state); + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE TO (%d)\n", depth)); + st = yes_state; + yes_state = st->u.yes.prev_yes_state; + PL_regmatch_state = st; + + switch (st->resume_state) { + case resume_EVAL: + if (st->u.eval.toggleutf) + PL_reg_flags ^= RF_utf8; + ReREFCNT_dec(rex); + rex = st->u.eval.prev_rex; + /* XXXX This is too dramatic a measure... */ + PL_reg_maxiter = 0; + /* Restore parens of the caller without popping the + * savestack */ + { + const I32 tmp = PL_savestack_ix; + PL_savestack_ix = st->u.eval.lastcp; + regcppop(rex); + PL_savestack_ix = tmp; + } + PL_reginput = locinput; + /* continue at the node following the (??{...}) */ + next = st->next; + goto reenter; + + case resume_IFMATCH: + if (st->logical) { + st->logical = 0; + st->sw = st->u.ifmatch.wanted; + } + else if (!st->u.ifmatch.wanted) + sayNO; + + if (OP(st->scan) == SUSPEND) + locinput = PL_reginput; + else { + locinput = PL_reginput = st->locinput; + nextchr = UCHARAT(locinput); + } + next = st->scan + ARG(st->scan); + if (next == st->scan) + next = NULL; + goto reenter; + + /* XXX tmp don't handle yes_state yet */ + case resume_CURLYM1: + case resume_CURLYM2: + case resume_CURLYM3: + PL_regmatch_slab =oslab; + st = ost; + PL_regmatch_state = st; + depth = odepth; + yes_state = oys; + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "XXX revering a CURLYM\n")); + goto yes; + + default: + Perl_croak(aTHX_ "unexpected yes reume state"); + } + } + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n", PL_colors[4], PL_colors[5])); yes: @@ -4435,7 +4461,8 @@ yes: result = 1; /* XXX this is duplicate(ish) code to that in the do_no section. - * eventually a yes should just pop the whole stack */ + * eventually a yes should just pop the stack back to the current + * yes_state */ if (depth) { /* restore previous state and re-enter */ POP_STATE; @@ -4445,8 +4472,6 @@ yes: goto resume_point_TRIE1; case resume_TRIE2: goto resume_point_TRIE2; - case resume_EVAL: - break; case resume_CURLYX: goto resume_point_CURLYX; case resume_WHILEM1: @@ -4467,10 +4492,6 @@ yes: goto resume_point_CURLYM2; case resume_CURLYM3: goto resume_point_CURLYM3; - case resume_CURLYM4: - goto resume_point_CURLYM4; - case resume_IFMATCH: - goto resume_point_IFMATCH; case resume_PLUS1: goto resume_point_PLUS1; case resume_PLUS2: @@ -4479,6 +4500,9 @@ yes: goto resume_point_PLUS3; case resume_PLUS4: goto resume_point_PLUS4; + + case resume_IFMATCH: + case resume_EVAL: default: Perl_croak(aTHX_ "regexp resume memory corruption"); } @@ -4564,7 +4588,7 @@ do_no: PL_reg_flags ^= RF_utf8; ReREFCNT_dec(rex); rex = st->u.eval.prev_rex; - cur_eval = st->u.eval.prev_eval; + yes_state = st->u.yes.prev_yes_state; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; @@ -4594,10 +4618,23 @@ do_no: goto resume_point_CURLYM2; case resume_CURLYM3: goto resume_point_CURLYM3; - case resume_CURLYM4: - goto resume_point_CURLYM4; case resume_IFMATCH: - goto resume_point_IFMATCH; + yes_state = st->u.yes.prev_yes_state; + if (st->logical) { + st->logical = 0; + st->sw = !st->u.ifmatch.wanted; + } + else if (st->u.ifmatch.wanted) + sayNO; + + assert(OP(scan) != SUSPEND); /* XXX DAPM tmp */ + locinput = PL_reginput = st->locinput; + nextchr = UCHARAT(locinput); + next = scan + ARG(scan); + if (next == scan) + next = NULL; + goto reenter; + case resume_PLUS1: goto resume_point_PLUS1; case resume_PLUS2: @@ -4619,10 +4656,10 @@ final_exit: /* free all slabs above current one */ if (orig_slab->next) { - regmatch_slab *osl, *sl = orig_slab->next; + regmatch_slab *sl = orig_slab->next; orig_slab->next = NULL; while (sl) { - osl = sl; + regmatch_slab * const osl = sl; sl = sl->next; Safefree(osl); } @@ -4881,6 +4918,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max) } +#ifndef PERL_IN_XSUB_RE /* - regclass_swash - prepare the utf8 swash */ @@ -4928,6 +4966,7 @@ Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool return sw; } +#endif /* - reginclass - determine if a character falls into a character class