X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=3b6d8577a7b8431ef7cc3feb901c8f79fc0682ed;hb=7cfcdf208eb7eff0b6f8313f1a5b14880681119d;hp=e63fa6f07073dc5f0e86c300024dd96ced3a7475;hpb=9442cb0ec25041ea5b061c40868e0a3c8bfbb2ab;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index e63fa6f..3b6d857 100644 --- a/regexec.c +++ b/regexec.c @@ -688,7 +688,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, ? s + (prog->minlen? cl_l : 0) : (prog->float_substr ? check_at - start_shift + cl_l : strend) ; - char *startpos = sv ? strend - SvCUR(sv) : s; + char *startpos = sv && SvPOK(sv) ? strend - SvCUR(sv) : s; t = s; if (prog->reganch & ROPT_UTF8) { @@ -781,9 +781,9 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta { I32 doevery = (prog->reganch & ROPT_SKIP) == 0; char *m; - int ln; - int c1; - int c2; + STRLEN ln; + unsigned int c1; + unsigned int c2; char *e; register I32 tmp = 1; /* Scratch variable? */ @@ -804,7 +804,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta break; case ANYOF: while (s < strend) { - if (REGINCLASS(c, *s)) { + if (REGINCLASS(c, *(U8*)s)) { if (tmp && (norun || regtry(prog, s))) goto got_it; else @@ -818,13 +818,13 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta case EXACTF: m = STRING(c); ln = STR_LEN(c); - c1 = *m; + c1 = *(U8*)m; c2 = PL_fold[c1]; goto do_exactf; case EXACTFL: m = STRING(c); ln = STR_LEN(c); - c1 = *m; + c1 = *(U8*)m; c2 = PL_fold_locale[c1]; do_exactf: e = strend - ln; @@ -834,7 +834,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta /* Here it is NOT UTF! */ if (c1 == c2) { while (s <= e) { - if ( *s == c1 + if ( *(U8*)s == c1 && (ln == 1 || !(OP(c) == EXACTF ? ibcmp(s, m, ln) : ibcmp_locale(s, m, ln))) @@ -844,7 +844,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta } } else { while (s <= e) { - if ( (*s == c1 || *s == c2) + if ( (*(U8*)s == c1 || *(U8*)s == c2) && (ln == 1 || !(OP(c) == EXACTF ? ibcmp(s, m, ln) : ibcmp_locale(s, m, ln))) @@ -1274,10 +1274,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * register char *s; register regnode *c; register char *startpos = stringarg; - register I32 tmp; I32 minlen; /* must match at least this many chars */ I32 dontbother = 0; /* how many characters not to try at end */ - I32 start_shift = 0; /* Offset of the start to find + /* I32 start_shift = 0; */ /* Offset of the start to find constant substr. */ /* CC */ I32 end_shift = 0; /* Same for the end. */ /* CC */ I32 scream_pos = -1; /* Internal iterator of scream. */ @@ -1466,7 +1465,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset; I32 back_min = prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset; - I32 delta = back_max - back_min; char *last = HOPc(strend, /* Cannot start after this */ -(I32)(CHR_SVLEN(must) - (SvTAIL(must) != 0) + back_min)); @@ -1516,7 +1514,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * } goto phooey; } - else if (c = prog->regstclass) { + else if ((c = prog->regstclass)) { if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT) /* don't bother with what can't match */ strend = HOPc(strend, -(minlen - 1)); @@ -1527,7 +1525,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * dontbother = 0; if (prog->float_substr != Nullsv) { /* Trim the end. */ char *last; - I32 oldpos = scream_pos; if (flags & REXEC_SCREAM) { last = screaminstr(sv, prog->float_substr, s - strbeg, @@ -1894,12 +1891,12 @@ S_regmatch(pTHX_ regnode *prog) nextchr = UCHARAT(locinput); break; } - if (!nextchr && locinput >= PL_regeol || nextchr == '\n') + if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n') sayNO; nextchr = UCHARAT(++locinput); break; case REG_ANY: - if (!nextchr && locinput >= PL_regeol || nextchr == '\n') + if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n') sayNO; nextchr = UCHARAT(++locinput); break; @@ -2663,10 +2660,10 @@ S_regmatch(pTHX_ regnode *prog) PL_regcc = cc; if (n >= cc->max) { /* Maximum greed exceeded? */ - if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY + if (ckWARN(WARN_REGEXP) && n >= REG_INFTY && !(PL_reg_flags & RF_warned)) { PL_reg_flags |= RF_warned; - Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded", + Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded", "Complex regular subexpression recursion", REG_INFTY - 1); } @@ -2715,10 +2712,10 @@ S_regmatch(pTHX_ regnode *prog) REPORT_CODE_OFF+PL_regindent*2, "") ); } - if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY + if (ckWARN(WARN_REGEXP) && n >= REG_INFTY && !(PL_reg_flags & RF_warned)) { PL_reg_flags |= RF_warned; - Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded", + Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded", "Complex regular subexpression recursion", REG_INFTY - 1); } @@ -2765,7 +2762,7 @@ S_regmatch(pTHX_ regnode *prog) *PL_reglastparen = n; scan = next; /*SUPPRESS 560*/ - if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))) + if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))) next += n; else next = NULL; @@ -3045,8 +3042,14 @@ S_regmatch(pTHX_ regnode *prog) n = regrepeat(scan, n); locinput = PL_reginput; if (ln < n && PL_regkind[(U8)OP(next)] == EOL && - (!PL_multiline || OP(next) == SEOL)) + (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) { ln = n; /* why back off? */ + /* ...because $ and \Z can match before *and* after + newline at the end. Consider "\n\n" =~ /\n+\Z\n/. + We should back off by one in this case. */ + if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS) + ln--; + } REGCP_SET; if (paren) { while (n >= ln) { @@ -3603,8 +3606,7 @@ S_reginclassutf8(pTHX_ regnode *f, U8 *p) if (swash_fetch(sv, p)) match = TRUE; else if (flags & ANYOF_FOLD) { - I32 cf; - U8 tmpbuf[10]; + U8 tmpbuf[UTF8_MAXLEN]; if (flags & ANYOF_LOCALE) { PL_reg_flags |= RF_tainted; uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));