From: Dave Mitchell Date: Thu, 20 Apr 2006 00:30:53 +0000 (+0000) Subject: regmatch: merge the greedy and non-greedy branches of CURLYM X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=0cadcf80572b885a4a6b7680439582b19fd09887;p=p5sagit%2Fp5-mst-13.2.git regmatch: merge the greedy and non-greedy branches of CURLYM p4raw-id: //depot/perl@27907 --- diff --git a/regexec.c b/regexec.c index 890736c..2c91f11 100644 --- a/regexec.c +++ b/regexec.c @@ -3872,94 +3872,109 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) scan += NEXT_OFF(scan); /* Skip former OPEN. */ PL_reginput = locinput; st->u.curlym.maxwanted = st->minmod ? st->ln : n; - if (st->u.curlym.maxwanted) { - while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) { - /* resume to current state on success */ - st->u.yes.prev_yes_state = yes_state; - yes_state = st; - REGMATCH(scan, CURLYM1); - yes_state = st->u.yes.prev_yes_state; - /*** all unsaved local vars undefined at this point */ - if (!result) - break; - /* on first match, determine length, u.curlym.l */ - if (!st->u.curlym.matches++) { - if (PL_reg_match_utf8) { - char *s = locinput; - while (s < PL_reginput) { - st->u.curlym.l++; - s += UTF8SKIP(s); - } - } - else { - st->u.curlym.l = PL_reginput - locinput; - } - if (st->u.curlym.l == 0) { - st->u.curlym.matches = st->u.curlym.maxwanted; - break; + while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) { + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; + REGMATCH(scan, CURLYM1); + yes_state = st->u.yes.prev_yes_state; + /*** all unsaved local vars undefined at this point */ + if (!result) + break; + /* on first match, determine length, u.curlym.l */ + if (!st->u.curlym.matches++) { + if (PL_reg_match_utf8) { + char *s = locinput; + while (s < PL_reginput) { + st->u.curlym.l++; + s += UTF8SKIP(s); } } - locinput = PL_reginput; + else { + st->u.curlym.l = PL_reginput - locinput; + } + if (st->u.curlym.l == 0) { + st->u.curlym.matches = st->u.curlym.maxwanted; + break; + } } + locinput = PL_reginput; } PL_reginput = locinput; - - if (st->minmod) { + if (st->u.curlym.matches < st->ln) { st->minmod = 0; - if (st->ln && st->u.curlym.matches < st->ln) - sayNO; - if (HAS_TEXT(next) || JUMPABLE(next)) { - regnode *text_node = next; + sayNO; + } - if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node); + DEBUG_EXECUTE_r( + PerlIO_printf(Perl_debug_log, + "%*s matched %"IVdf" times, len=%"IVdf"...\n", + (int)(REPORT_CODE_OFF+PL_regindent*2), "", + (IV) st->u.curlym.matches, (IV)st->u.curlym.l) + ); + + /* calculate c1 and c1 for possible match of 1st char + * following curly */ + st->u.curlym.c1 = st->u.curlym.c2 = -1000; + if (HAS_TEXT(next) || JUMPABLE(next)) { + regnode *text_node = next; + if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node); + if (HAS_TEXT(text_node) + && PL_regkind[(U8)OP(text_node)] != REF) + { + st->u.curlym.c1 = (U8)*STRING(text_node); + st->u.curlym.c2 = + (OP(text_node) == EXACTF || OP(text_node) == REFF) + ? PL_fold[st->u.curlym.c1] + : (OP(text_node) == EXACTFL || OP(text_node) == REFFL) + ? PL_fold_locale[st->u.curlym.c1] + : st->u.curlym.c1; + } + } - if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000; - else { - if (PL_regkind[(U8)OP(text_node)] == REF) { - st->u.curlym.c1 = st->u.curlym.c2 = -1000; - goto assume_ok_MM; + REGCP_SET(st->u.curlym.lastcp); + + st->u.curlym.minmod = st->minmod; + st->minmod = 0; + while (st->u.curlym.matches >= st->ln + && (st->u.curlym.matches <= n + /* for REG_INFTY, ln could overflow to negative */ + || (n == REG_INFTY && st->u.curlym.matches >= 0))) + { + /* If it could work, try it. */ + if (st->u.curlym.c1 == -1000 || + UCHARAT(PL_reginput) == st->u.curlym.c1 || + UCHARAT(PL_reginput) == st->u.curlym.c2) + { + DEBUG_EXECUTE_r( + PerlIO_printf(Perl_debug_log, + "%*s trying tail with matches=%"IVdf"...\n", + (int)(REPORT_CODE_OFF+PL_regindent*2), + "", (IV)st->u.curlym.matches) + ); + if (st->u.curlym.paren) { + if (st->u.curlym.matches) { + PL_regstartp[st->u.curlym.paren] + = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr; + PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr; } - else { st->u.curlym.c1 = (U8)*STRING(text_node); } - if (OP(text_node) == EXACTF || OP(text_node) == REFF) - st->u.curlym.c2 = PL_fold[st->u.curlym.c1]; - else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL) - st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1]; else - st->u.curlym.c2 = st->u.curlym.c1; + PL_regendp[st->u.curlym.paren] = -1; } + /* resume to current state on success */ + st->u.yes.prev_yes_state = yes_state; + yes_state = st; + REGMATCH(next, CURLYM2); + yes_state = st->u.yes.prev_yes_state; + /*** all unsaved local vars undefined at this point */ + if (result) + /* XXX tmp sayYES; */ + sayYES_FINAL; + REGCP_UNWIND(st->u.curlym.lastcp); } - else - st->u.curlym.c1 = st->u.curlym.c2 = -1000; - assume_ok_MM: - REGCP_SET(st->u.curlym.lastcp); - while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */ - /* If it could work, try it. */ - if (st->u.curlym.c1 == -1000 || - UCHARAT(PL_reginput) == st->u.curlym.c1 || - UCHARAT(PL_reginput) == st->u.curlym.c2) - { - if (st->u.curlym.paren) { - if (st->ln) { - PL_regstartp[st->u.curlym.paren] = - HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr; - PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr; - } - else - PL_regendp[st->u.curlym.paren] = -1; - } - /* resume to current state on success */ - st->u.yes.prev_yes_state = yes_state; - yes_state = st; - REGMATCH(next, CURLYM2); - yes_state = st->u.yes.prev_yes_state; - /*** all unsaved local vars undefined at this point */ - if (result) - /* XXX tmp sayYES; */ - sayYES_FINAL; - REGCP_UNWIND(st->u.curlym.lastcp); - } - /* Couldn't or didn't -- move forward. */ + /* Couldn't or didn't -- move forward/backward. */ + if (st->u.curlym.minmod) { PL_reginput = locinput; /* resume to current state on success */ st->u.yes.prev_yes_state = yes_state; @@ -3968,80 +3983,13 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) yes_state = st->u.yes.prev_yes_state; /*** all unsaved local vars undefined at this point */ if (result) { - st->ln++; + st->u.curlym.matches++; locinput = PL_reginput; } else sayNO; } - } - else { - DEBUG_EXECUTE_r( - PerlIO_printf(Perl_debug_log, - "%*s matched %"IVdf" times, len=%"IVdf"...\n", - (int)(REPORT_CODE_OFF+PL_regindent*2), "", - (IV) st->u.curlym.matches, (IV)st->u.curlym.l) - ); - if (st->u.curlym.matches >= st->ln) { - if (HAS_TEXT(next) || JUMPABLE(next)) { - regnode *text_node = next; - - if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node); - - if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000; - else { - if (PL_regkind[(U8)OP(text_node)] == REF) { - st->u.curlym.c1 = st->u.curlym.c2 = -1000; - goto assume_ok_REG; - } - else { st->u.curlym.c1 = (U8)*STRING(text_node); } - - if (OP(text_node) == EXACTF || OP(text_node) == REFF) - st->u.curlym.c2 = PL_fold[st->u.curlym.c1]; - else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL) - st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1]; - else - st->u.curlym.c2 = st->u.curlym.c1; - } - } - else - st->u.curlym.c1 = st->u.curlym.c2 = -1000; - } - assume_ok_REG: - REGCP_SET(st->u.curlym.lastcp); - while (st->u.curlym.matches >= st->ln) { - /* If it could work, try it. */ - if (st->u.curlym.c1 == -1000 || - UCHARAT(PL_reginput) == st->u.curlym.c1 || - UCHARAT(PL_reginput) == st->u.curlym.c2) - { - DEBUG_EXECUTE_r( - PerlIO_printf(Perl_debug_log, - "%*s trying tail with matches=%"IVdf"...\n", - (int)(REPORT_CODE_OFF+PL_regindent*2), - "", (IV)st->u.curlym.matches) - ); - if (st->u.curlym.paren) { - if (st->u.curlym.matches) { - PL_regstartp[st->u.curlym.paren] - = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr; - PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr; - } - else - PL_regendp[st->u.curlym.paren] = -1; - } - /* resume to current state on success */ - st->u.yes.prev_yes_state = yes_state; - yes_state = st; - REGMATCH(next, CURLYM4); - yes_state = st->u.yes.prev_yes_state; - /*** all unsaved local vars undefined at this point */ - if (result) - /* XXX tmp sayYES; */ - sayYES_FINAL; - REGCP_UNWIND(st->u.curlym.lastcp); - } - /* Couldn't or didn't -- back up. */ + else { st->u.curlym.matches--; locinput = HOPc(locinput, -st->u.curlym.l); PL_reginput = locinput; @@ -4496,7 +4444,6 @@ yes_final: case resume_CURLYM1: case resume_CURLYM2: case resume_CURLYM3: - case resume_CURLYM4: PL_regmatch_slab =oslab; st = ost; PL_regmatch_state = st; @@ -4550,8 +4497,6 @@ yes: goto resume_point_CURLYM2; case resume_CURLYM3: goto resume_point_CURLYM3; - case resume_CURLYM4: - goto resume_point_CURLYM4; case resume_PLUS1: goto resume_point_PLUS1; case resume_PLUS2: @@ -4678,8 +4623,6 @@ do_no: goto resume_point_CURLYM2; case resume_CURLYM3: goto resume_point_CURLYM3; - case resume_CURLYM4: - goto resume_point_CURLYM4; case resume_IFMATCH: yes_state = st->u.yes.prev_yes_state; if (st->logical) { diff --git a/regexp.h b/regexp.h index 5667d1f..0493267 100644 --- a/regexp.h +++ b/regexp.h @@ -184,7 +184,6 @@ typedef enum { resume_CURLYM1, resume_CURLYM2, resume_CURLYM3, - resume_CURLYM4, resume_IFMATCH, resume_PLUS1, resume_PLUS2, @@ -268,6 +267,7 @@ typedef struct regmatch_state { I32 l; I32 matches; I32 maxwanted; + bool minmod; } curlym; struct {