X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=d3f2065cbc09e53f09a53fc016a3d234336b84a5;hb=22d4bb9ccb8701e68f9243547d7e3a3c55f70908;hp=002b66ac85ae93dcd4f6bfffb835261e4dcfe546;hpb=4b19af017623bfa3bb72bb164598a517f586e0d3;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index 002b66a..d3f2065 100644 --- a/regexec.c +++ b/regexec.c @@ -357,17 +357,18 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, || ( (prog->reganch & ROPT_ANCH_BOL) && !PL_multiline ) ); /* Check after \n? */ - if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) { + if (!ml_anch) { + if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */ + /* SvCUR is not set on references: SvRV and SvPVX overlap */ + && sv && !SvROK(sv) + && (strpos + SvCUR(sv) != strend)) { + DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n")); + goto fail; + } + if (prog->check_offset_min == prog->check_offset_max) { /* Substring at constant offset from beg-of-str... */ I32 slen; - if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */ - /* SvCUR is not set on references: SvRV and SvPVX overlap */ - && sv && !SvROK(sv) - && (strpos + SvCUR(sv) != strend)) { - DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n")); - goto fail; - } PL_regeol = strend; /* Used in HOP() */ s = HOPc(strpos, prog->check_offset_min); if (SvTAIL(check)) { @@ -393,6 +394,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, && memNE(SvPVX(check), s, slen))) goto report_neq; goto success_at_start; + } } /* Match is anchored, but substr is not anchored wrt beg-of-str. */ s = strpos; @@ -690,6 +692,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, SvREFCNT_dec(prog->check_substr); prog->check_substr = Nullsv; /* disable */ prog->float_substr = Nullsv; /* clear */ + check = Nullsv; /* abort */ s = strpos; /* XXXX This is a remnant of the old implementation. It looks wasteful, since now INTUIT can use many @@ -752,6 +755,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, "Could not match STCLASS...\n") ); goto fail; } + if (!check) + goto giveup; DEBUG_r( PerlIO_printf(Perl_debug_log, "Looking for %s substr starting at offset %ld...\n", what, (long)(s + start_shift - i_strpos)) ); @@ -762,6 +767,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, goto retry_floating_check; /* Recheck anchored substring, but not floating... */ s = check_at; + if (!check) + goto giveup; DEBUG_r( PerlIO_printf(Perl_debug_log, "Looking for anchored substr starting at offset %ld...\n", (long)(other_last - i_strpos)) ); @@ -771,6 +778,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, current position only: */ if (ml_anch) { s = t = t + 1; + if (!check) + goto giveup; DEBUG_r( PerlIO_printf(Perl_debug_log, "Looking for /%s^%s/m starting at offset %ld...\n", PL_colors[0],PL_colors[1], (long)(t - i_strpos)) ); @@ -792,8 +801,10 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, PerlIO_printf(Perl_debug_log, "Does not contradict STCLASS...\n") ); } - DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n", - PL_colors[4], PL_colors[5], (long)(s - i_strpos)) ); + giveup: + DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n", + PL_colors[4], (check ? "Guessed" : "Giving up"), + PL_colors[5], (long)(s - i_strpos)) ); return s; fail_finish: /* Substring not found */ @@ -905,7 +916,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case BOUNDUTF8: - tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n'; + tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1), 0, 0) : '\n'; tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == BOUNDUTF8 ? @@ -941,7 +952,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NBOUNDUTF8: - tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n'; + tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1), 0, 0) : '\n'; tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == NBOUNDUTF8 ? @@ -1986,7 +1997,7 @@ S_regmatch(pTHX_ regnode *prog) while (s < e) { if (l >= PL_regeol) sayNO; - if (utf8_to_uv((U8*)s, 0) != (c1 ? + if (utf8_to_uv_chk((U8*)s, 0, 0) != (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l))) { @@ -2124,7 +2135,7 @@ S_regmatch(pTHX_ regnode *prog) case NBOUNDUTF8: /* was last char in word? */ ln = (locinput != PL_regbol) - ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev; + ? utf8_to_uv_chk(reghop((U8*)locinput, -1), 0, 0) : PL_regprev; if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) { ln = isALNUM_uni(ln); n = swash_fetch(PL_utf8_alnum, (U8*)locinput); @@ -3616,7 +3627,11 @@ S_reginclass(pTHX_ register regnode *p, register I32 c) (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) || (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) || (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) + (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) || + (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) || + (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) || + (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) || + (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c)) ) /* How's that for a conditional? */ { match = TRUE; @@ -3722,4 +3737,3 @@ restore_pos(pTHXo_ void *arg) PL_curpm = PL_reg_oldcurpm; } } -