X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=c0d0097fa42ea9e5d94cafeb77dfe9658d3dfa39;hb=b76347f2eb34c85a0a38543b2f57ca474fedab4d;hp=990791bd6b3e26abfab6630039e2ab0b3b2086d2;hpb=67e989fb549091286d76fd8d29f1ec03b9da175d;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index 990791b..c0d0097 100644 --- a/regexec.c +++ b/regexec.c @@ -325,6 +325,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, register I32 end_shift; register char *s; register SV *check; + char *strbeg; char *t; I32 ml_anch; char *tmp; @@ -351,23 +352,25 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n")); goto fail; } + strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos; check = prog->check_substr; if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */ ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE) || ( (prog->reganch & ROPT_ANCH_BOL) && !PL_multiline ) ); /* Check after \n? */ - if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) { + if (!ml_anch) { + if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */ + /* SvCUR is not set on references: SvRV and SvPVX overlap */ + && sv && !SvROK(sv) + && (strpos != strbeg)) { + DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n")); + goto fail; + } + if (prog->check_offset_min == prog->check_offset_max) { /* Substring at constant offset from beg-of-str... */ I32 slen; - if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */ - /* SvCUR is not set on references: SvRV and SvPVX overlap */ - && sv && !SvROK(sv) - && (strpos + SvCUR(sv) != strend)) { - DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n")); - goto fail; - } PL_regeol = strend; /* Used in HOP() */ s = HOPc(strpos, prog->check_offset_min); if (SvTAIL(check)) { @@ -393,6 +396,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, && memNE(SvPVX(check), s, slen))) goto report_neq; goto success_at_start; + } } /* Match is anchored, but substr is not anchored wrt beg-of-str. */ s = strpos; @@ -426,7 +430,6 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, /* Find a possible match in the region s..strend by looking for the "check" substring in the region corrected by start/end_shift. */ if (flags & REXEC_SCREAM) { - char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */ I32 p = -1; /* Internal iterator of scream. */ I32 *pp = data ? data->scream_pos : &p; @@ -668,7 +671,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, /* Even in this situation we may use MBOL flag if strpos is offset wrt the start of the string. */ if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */ - && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n' + && (strpos != strbeg) && strpos[-1] != '\n' /* May be due to an implicit anchor of m{.*foo} */ && !(prog->reganch & ROPT_IMPLICIT)) { @@ -719,7 +722,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, ? s + (prog->minlen? cl_l : 0) : (prog->float_substr ? check_at - start_shift + cl_l : strend) ; - char *startpos = sv && SvPOK(sv) ? strend - SvCUR(sv) : s; + char *startpos = strbeg; t = s; if (prog->reganch & ROPT_UTF8) { @@ -914,7 +917,9 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case BOUNDUTF8: - tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0, 0) : '\n'; + tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), + strend - s, + 0, 0) : '\n'; tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == BOUNDUTF8 ? @@ -950,7 +955,9 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NBOUNDUTF8: - tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0, 0) : '\n'; + tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), + strend - s, + 0, 0) : '\n'; tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == NBOUNDUTF8 ? @@ -1995,7 +2002,7 @@ S_regmatch(pTHX_ regnode *prog) while (s < e) { if (l >= PL_regeol) sayNO; - if (utf8_to_uv((U8*)s, 0, 0) != (c1 ? + if (utf8_to_uv((U8*)s, e - s, 0, 0) != (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l))) { @@ -2133,7 +2140,8 @@ S_regmatch(pTHX_ regnode *prog) case NBOUNDUTF8: /* was last char in word? */ ln = (locinput != PL_regbol) - ? utf8_to_uv(reghop((U8*)locinput, -1), 0, 0) : PL_regprev; + ? utf8_to_uv(reghop((U8*)locinput, -1), + PL_regeol - locinput, 0, 0) : PL_regprev; if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) { ln = isALNUM_uni(ln); n = swash_fetch(PL_utf8_alnum, (U8*)locinput); @@ -2588,12 +2596,18 @@ S_regmatch(pTHX_ regnode *prog) case CURLYX: { CURCUR cc; CHECKPOINT cp = PL_savestack_ix; + /* No need to save/restore up to this paren */ + I32 parenfloor = scan->flags; if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */ next += ARG(next); cc.oldcc = PL_regcc; PL_regcc = &cc; - cc.parenfloor = *PL_reglastparen; + /* XXXX Probably it is better to teach regpush to support + parenfloor > PL_regsize... */ + if (parenfloor > *PL_reglastparen) + parenfloor = *PL_reglastparen; /* Pessimization... */ + cc.parenfloor = parenfloor; cc.cur = -1; cc.min = ARG1(scan); cc.max = ARG2(scan);