X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=cd3df4798635cb479a9ec559d34eec713c6b86f1;hb=765e9edb2de192ef033766d867f9bd290e9935e9;hp=c65624b21651f4624effdcfa8ea6399cb8d6bb5e;hpb=806e72015c0c16d1c66452a28aa8fd6c6cc967c3;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index c65624b..cd3df47 100644 --- a/regexec.c +++ b/regexec.c @@ -346,7 +346,9 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, I32 slen; if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */ - && (sv && (strpos + SvCUR(sv) != strend)) ) { + /* SvCUR is not set on references: SvRV and SvPVX overlap */ + && sv && !SvROK(sv) + && (strpos + SvCUR(sv) != strend)) { DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n")); goto fail; } @@ -638,7 +640,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, try_at_start: /* Even in this situation we may use MBOL flag if strpos is offset wrt the start of the string. */ - if (ml_anch && sv + if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */ && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n' /* May be due to an implicit anchor of m{.*foo} */ && !(prog->reganch & ROPT_IMPLICIT)) @@ -781,9 +783,9 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta { I32 doevery = (prog->reganch & ROPT_SKIP) == 0; char *m; - int ln; - int c1; - int c2; + STRLEN ln; + unsigned int c1; + unsigned int c2; char *e; register I32 tmp = 1; /* Scratch variable? */ @@ -804,7 +806,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta break; case ANYOF: while (s < strend) { - if (REGINCLASS(c, *s)) { + if (REGINCLASS(c, *(U8*)s)) { if (tmp && (norun || regtry(prog, s))) goto got_it; else @@ -818,13 +820,13 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta case EXACTF: m = STRING(c); ln = STR_LEN(c); - c1 = *m; + c1 = *(U8*)m; c2 = PL_fold[c1]; goto do_exactf; case EXACTFL: m = STRING(c); ln = STR_LEN(c); - c1 = *m; + c1 = *(U8*)m; c2 = PL_fold_locale[c1]; do_exactf: e = strend - ln; @@ -834,7 +836,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta /* Here it is NOT UTF! */ if (c1 == c2) { while (s <= e) { - if ( *s == c1 + if ( *(U8*)s == c1 && (ln == 1 || !(OP(c) == EXACTF ? ibcmp(s, m, ln) : ibcmp_locale(s, m, ln))) @@ -844,7 +846,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta } } else { while (s <= e) { - if ( (*s == c1 || *s == c2) + if ( (*(U8*)s == c1 || *(U8*)s == c2) && (ln == 1 || !(OP(c) == EXACTF ? ibcmp(s, m, ln) : ibcmp_locale(s, m, ln))) @@ -1274,10 +1276,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * register char *s; register regnode *c; register char *startpos = stringarg; - register I32 tmp; I32 minlen; /* must match at least this many chars */ I32 dontbother = 0; /* how many characters not to try at end */ - I32 start_shift = 0; /* Offset of the start to find + /* I32 start_shift = 0; */ /* Offset of the start to find constant substr. */ /* CC */ I32 end_shift = 0; /* Same for the end. */ /* CC */ I32 scream_pos = -1; /* Internal iterator of scream. */ @@ -1433,9 +1434,14 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* we have /x+whatever/ */ /* it must be a one character string (XXXX Except UTF?) */ char ch = SvPVX(prog->anchored_substr)[0]; +#ifdef DEBUGGING + int did_match = 0; +#endif + if (UTF) { while (s < strend) { if (*s == ch) { + DEBUG_r( did_match = 1 ); if (regtry(prog, s)) goto got_it; s += UTF8SKIP(s); while (s < strend && *s == ch) @@ -1447,6 +1453,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * else { while (s < strend) { if (*s == ch) { + DEBUG_r( did_match = 1 ); if (regtry(prog, s)) goto got_it; s++; while (s < strend && *s == ch) @@ -1455,6 +1462,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * s++; } } + DEBUG_r(did_match || + PerlIO_printf(Perl_debug_log, + "Did not find anchored character...\n")); } /*SUPPRESS 560*/ else if (prog->anchored_substr != Nullsv @@ -1466,11 +1476,13 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset; I32 back_min = prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset; - I32 delta = back_max - back_min; char *last = HOPc(strend, /* Cannot start after this */ -(I32)(CHR_SVLEN(must) - (SvTAIL(must) != 0) + back_min)); char *last1; /* Last position checked before */ +#ifdef DEBUGGING + int did_match = 0; +#endif if (s > PL_bostr) last1 = HOPc(s, -1); @@ -1489,6 +1501,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * : (s = fbm_instr((unsigned char*)HOP(s, back_min), (unsigned char*)strend, must, PL_multiline ? FBMrf_MULTILINE : 0))) ) { + DEBUG_r( did_match = 1 ); if (HOPc(s, -back_max) > last1) { last1 = HOPc(s, -back_min); s = HOPc(s, -back_max); @@ -1514,20 +1527,28 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * } } } + DEBUG_r(did_match || + PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n", + ((must == prog->anchored_substr) + ? "anchored" : "floating"), + PL_colors[0], + (int)(SvCUR(must) - (SvTAIL(must)!=0)), + SvPVX(must), + PL_colors[1], (SvTAIL(must) ? "$" : ""))); goto phooey; } - else if (c = prog->regstclass) { + else if ((c = prog->regstclass)) { if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT) /* don't bother with what can't match */ strend = HOPc(strend, -(minlen - 1)); if (find_byclass(prog, c, s, strend, startpos, 0)) goto got_it; + DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n")); } else { dontbother = 0; if (prog->float_substr != Nullsv) { /* Trim the end. */ char *last; - I32 oldpos = scream_pos; if (flags & REXEC_SCREAM) { last = screaminstr(sv, prog->float_substr, s - strbeg, @@ -1555,7 +1576,12 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * last = strend; /* matching `$' */ } } - if (last == NULL) goto phooey; /* Should not happen! */ + if (last == NULL) { + DEBUG_r(PerlIO_printf(Perl_debug_log, + "%sCan't trim the tail, match fails (should not happen)%s\n", + PL_colors[4],PL_colors[5])); + goto phooey; /* Should not happen! */ + } dontbother = strend - last + prog->float_min_offset; } if (minlen && (dontbother < minlen)) @@ -1617,6 +1643,8 @@ got_it: return 1; phooey: + DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n", + PL_colors[4],PL_colors[5])); if (PL_reg_eval_set) restore_pos(aTHXo_ 0); return 0; @@ -1894,12 +1922,12 @@ S_regmatch(pTHX_ regnode *prog) nextchr = UCHARAT(locinput); break; } - if (!nextchr && locinput >= PL_regeol || nextchr == '\n') + if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n') sayNO; nextchr = UCHARAT(++locinput); break; case REG_ANY: - if (!nextchr && locinput >= PL_regeol || nextchr == '\n') + if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n') sayNO; nextchr = UCHARAT(++locinput); break; @@ -2765,7 +2793,7 @@ S_regmatch(pTHX_ regnode *prog) *PL_reglastparen = n; scan = next; /*SUPPRESS 560*/ - if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))) + if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))) next += n; else next = NULL; @@ -3609,7 +3637,6 @@ S_reginclassutf8(pTHX_ regnode *f, U8 *p) if (swash_fetch(sv, p)) match = TRUE; else if (flags & ANYOF_FOLD) { - I32 cf; U8 tmpbuf[UTF8_MAXLEN]; if (flags & ANYOF_LOCALE) { PL_reg_flags |= RF_tainted;