X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=efdd8df7abe2b84d4a7e8312803837b008ae61eb;hb=52a55424e4624fc79eb8894fb91c5e2f4a9018ab;hp=60d93f7ad782340f8cc3ea37bbf61f674655fef1;hpb=a72c75842468bcd2a7cf17032844c4040a5a31e2;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index 60d93f7..efdd8df 100644 --- a/regexec.c +++ b/regexec.c @@ -131,15 +131,22 @@ /* for use after a quantifier and before an EXACT-like node -- japhy */ #define JUMPABLE(rn) ( \ OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \ - OP(rn) == SUSPEND || OP(rn) == IFMATCH \ + OP(rn) == SUSPEND || OP(rn) == IFMATCH || \ + OP(rn) == PLUS || OP(rn) == MINMOD || \ + (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \ ) -#define NEAR_EXACT(rn) (PL_regkind[(U8)OP(rn)] == EXACT || JUMPABLE(rn)) +#define HAS_TEXT(rn) ( \ + PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \ +) -#define NEXT_IMPT(rn) STMT_START { \ +#define FIND_NEXT_IMPT(rn) STMT_START { \ while (JUMPABLE(rn)) \ - if (OP(rn) == SUSPEND || OP(rn) == IFMATCH) \ + if (OP(rn) == SUSPEND || OP(rn) == IFMATCH || \ + PL_regkind[(U8)OP(rn)] == CURLY) \ rn = NEXTOPER(NEXTOPER(rn)); \ + else if (OP(rn) == PLUS) \ + rn = NEXTOPER(rn); \ else rn += NEXT_OFF(rn); \ } STMT_END @@ -383,7 +390,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, char *check_at = Nullch; /* check substr found at this pos */ #ifdef DEBUGGING char *i_strpos = strpos; - SV *dsv = sv_2mortal(newSVpvn("", 0)); + SV *dsv = PERL_DEBUG_PAD_ZERO(0); #endif DEBUG_r({ @@ -1458,7 +1465,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * SV* oreplsv = GvSV(PL_replgv); bool do_utf8 = DO_UTF8(sv); #ifdef DEBUGGING - SV *dsv = sv_2mortal(newSVpvn("", 0)); + SV *dsv = PERL_DEBUG_PAD_ZERO(0); #endif PL_regcc = 0; @@ -1936,6 +1943,12 @@ S_regtry(pTHX_ regexp *prog, char *startpos) New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*); } +#ifdef DEBUGGING + sv_setpvn(PERL_DEBUG_PAD(0), "", 0); + sv_setpvn(PERL_DEBUG_PAD(1), "", 0); + sv_setpvn(PERL_DEBUG_PAD(2), "", 0); +#endif + /* XXXX What this code is doing here?!!! There should be no need to do this again and again, PL_reglastparen should take care of this! --ilya*/ @@ -2043,9 +2056,9 @@ S_regmatch(pTHX_ regnode *prog) #endif register bool do_utf8 = PL_reg_match_utf8; #ifdef DEBUGGING - SV *dsv0 = sv_2mortal(newSVpvn("", 0)); - SV *dsv1 = sv_2mortal(newSVpvn("", 0)); - SV *dsv2 = sv_2mortal(newSVpvn("", 0)); + SV *dsv0 = PERL_DEBUG_PAD_ZERO(0); + SV *dsv1 = PERL_DEBUG_PAD_ZERO(1); + SV *dsv2 = PERL_DEBUG_PAD_ZERO(2); #endif #ifdef DEBUGGING @@ -2204,43 +2217,40 @@ S_regmatch(pTHX_ regnode *prog) s = STRING(scan); ln = STR_LEN(scan); if (do_utf8 != (UTF!=0)) { + /* The target and the pattern have differing "utf8ness". */ char *l = locinput; char *e = s + ln; STRLEN len; - if (do_utf8) + if (do_utf8) { + /* The target is utf8, the pattern is not utf8. */ while (s < e) { - UV uv; - if (l >= PL_regeol) - sayNO; - uv = NATIVE_TO_UNI(*(U8*)s); - if (UTF8_IS_START(uv)) { - len = UTF8SKIP(s); - if (memNE(s, l, len)) - sayNO; - l += len; - s += len; - } else { - if (uv != utf8_to_uvchr((U8*)l, &len)) - sayNO; - l += len; - s ++; - } + sayNO; + if (NATIVE_TO_UNI(*(U8*)s) != + utf8_to_uvchr((U8*)l, &len)) + sayNO; + l += len; + s ++; } - else + } + else { + /* The target is not utf8, the pattern is utf8. */ while (s < e) { if (l >= PL_regeol) sayNO; - if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len)) + if (NATIVE_TO_UNI(*((U8*)l)) != + utf8_to_uvchr((U8*)s, &len)) sayNO; s += len; l ++; } + } locinput = l; nextchr = UCHARAT(locinput); break; } + /* The target and the pattern have the same "utf8ness". */ /* Inline the first character, for speed. */ if (UCHARAT(s) != nextchr) sayNO; @@ -3116,17 +3126,29 @@ S_regmatch(pTHX_ regnode *prog) if (ln && l == 0) n = ln; /* don't backtrack */ locinput = PL_reginput; - if (NEAR_EXACT(next)) { + if (HAS_TEXT(next) || JUMPABLE(next)) { regnode *text_node = next; - if (PL_regkind[(U8)OP(next)] != EXACT) - NEXT_IMPT(text_node); + if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node); - if (PL_regkind[(U8)OP(text_node)] != EXACT) { - c1 = c2 = -1000; - } + if (! HAS_TEXT(text_node)) c1 = c2 = -1000; else { - c1 = (U8)*STRING(text_node); + if (PL_regkind[(U8)OP(text_node)] == REF) { + I32 n, ln; + n = ARG(text_node); /* which paren pair */ + ln = PL_regstartp[n]; + /* assume yes if we haven't seen CLOSEn */ + if ( + *PL_reglastparen < n || + ln == -1 || + ln == PL_regendp[n] + ) { + c1 = c2 = -1000; + goto assume_ok_MM; + } + c1 = *(PL_bostr + ln); + } + else { c1 = (U8)*STRING(text_node); } if (OP(next) == EXACTF) c2 = PL_fold[c1]; else if (OP(text_node) == EXACTFL) @@ -3137,6 +3159,7 @@ S_regmatch(pTHX_ regnode *prog) } else c1 = c2 = -1000; + assume_ok_MM: REGCP_SET(lastcp); /* This may be improved if l == 0. */ while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */ @@ -3185,17 +3208,30 @@ S_regmatch(pTHX_ regnode *prog) (IV) n, (IV)l) ); if (n >= ln) { - if (NEAR_EXACT(next)) { + if (HAS_TEXT(next) || JUMPABLE(next)) { regnode *text_node = next; - if (PL_regkind[(U8)OP(next)] != EXACT) - NEXT_IMPT(text_node); + if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node); - if (PL_regkind[(U8)OP(text_node)] != EXACT) { - c1 = c2 = -1000; - } + if (! HAS_TEXT(text_node)) c1 = c2 = -1000; else { - c1 = (U8)*STRING(text_node); + if (PL_regkind[(U8)OP(text_node)] == REF) { + I32 n, ln; + n = ARG(text_node); /* which paren pair */ + ln = PL_regstartp[n]; + /* assume yes if we haven't seen CLOSEn */ + if ( + *PL_reglastparen < n || + ln == -1 || + ln == PL_regendp[n] + ) { + c1 = c2 = -1000; + goto assume_ok_REG; + } + c1 = *(PL_bostr + ln); + } + else { c1 = (U8)*STRING(text_node); } + if (OP(text_node) == EXACTF) c2 = PL_fold[c1]; else if (OP(text_node) == EXACTFL) @@ -3207,6 +3243,7 @@ S_regmatch(pTHX_ regnode *prog) else c1 = c2 = -1000; } + assume_ok_REG: REGCP_SET(lastcp); while (n >= ln) { /* If it could work, try it. */ @@ -3279,18 +3316,30 @@ S_regmatch(pTHX_ regnode *prog) * of the quantifier and the EXACT-like node. -- japhy */ - if (NEAR_EXACT(next)) { + if (HAS_TEXT(next) || JUMPABLE(next)) { U8 *s; regnode *text_node = next; - if (PL_regkind[(U8)OP(next)] != EXACT) - NEXT_IMPT(text_node); + if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node); - if (PL_regkind[(U8)OP(text_node)] != EXACT) { - c1 = c2 = -1000; - } + if (! HAS_TEXT(text_node)) c1 = c2 = -1000; else { - s = (U8*)STRING(text_node); + if (PL_regkind[(U8)OP(text_node)] == REF) { + I32 n, ln; + n = ARG(text_node); /* which paren pair */ + ln = PL_regstartp[n]; + /* assume yes if we haven't seen CLOSEn */ + if ( + *PL_reglastparen < n || + ln == -1 || + ln == PL_regendp[n] + ) { + c1 = c2 = -1000; + goto assume_ok_easy; + } + s = (U8*)PL_bostr + ln; + } + else { s = (U8*)STRING(text_node); } if (!UTF) { c2 = c1 = *s; @@ -3319,6 +3368,7 @@ S_regmatch(pTHX_ regnode *prog) } else c1 = c2 = -1000; + assume_ok_easy: PL_reginput = locinput; if (minmod) { CHECKPOINT lastcp;