X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=65a3b90e74db07078119d6f4b168be9cf5b53bf3;hb=54e82ce5cfd72fcdc60806373e0c4d6890b68a3c;hp=54f5e22253b7c87db609346a865f26f7205d1705;hpb=2ab053812751d8a65a6b8eda6829e97c0876564b;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index 54f5e22..65a3b90 100644 --- a/regexec.c +++ b/regexec.c @@ -278,7 +278,16 @@ S_cache_re(pTHX_ regexp *prog) /* A failure to find a constant substring means that there is no need to make an expensive call to REx engine, thus we celebrate a failure. Similarly, finding a substring too deep into the string means that less calls to - regtry() should be needed. */ + regtry() should be needed. + + REx compiler's optimizer found 4 possible hints: + a) Anchored substring; + b) Fixed substring; + c) Whether we are anchored (beginning-of-line or \G); + d) First node (of those at offset 0) which may distingush positions; + We use 'a', 'b', multiline-part of 'c', and try to find a position in the + string which does not contradict any of them. + */ char * Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, @@ -293,6 +302,9 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, I32 ml_anch; char *tmp; register char *other_last = Nullch; +#ifdef DEBUGGING + char *i_strpos = strpos; +#endif DEBUG_r( if (!PL_colorset) reginitcolors() ); DEBUG_r(PerlIO_printf(Perl_debug_log, @@ -420,7 +432,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, goto fail_finish; /* Finish the diagnostic message */ - DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - strpos)) ); + DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) ); /* Got a candidate. Check MBOL anchoring, and the *other* substr. Start with the other substr. @@ -431,11 +443,13 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, */ if (prog->float_substr && prog->anchored_substr) { - /* Take into account the anchored substring. */ + /* Take into account the "other" substring. */ /* XXXX May be hopelessly wrong for UTF... */ if (!other_last) other_last = strpos - 1; if (check == prog->float_substr) { + do_other_anchored: + { char *last = s - start_shift, *last1, *last2; char *s1 = s; @@ -446,7 +460,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, || (PL_bostr = strpos, /* Used in regcopmaybe() */ (t = reghopmaybe_c(s, -(prog->check_offset_max))) && t > strpos))) - ; + /* EMPTY */; else t = strpos; t += prog->anchored_offset; @@ -478,7 +492,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, } DEBUG_r(PerlIO_printf(Perl_debug_log, ", trying floating at offset %ld...\n", - (long)(s1 + 1 - strpos))); + (long)(s1 + 1 - i_strpos))); PL_regeol = strend; /* Used in HOP() */ other_last = last1 + prog->anchored_offset; s = HOPc(last, 1); @@ -486,14 +500,15 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, } else { DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n", - (long)(s - strpos))); + (long)(s - i_strpos))); t = s - prog->anchored_offset; other_last = s - 1; + s = s1; if (t == strpos) goto try_at_start; - s = s1; goto try_at_offset; } + } } else { /* Take into account the floating substring. */ char *last, *last1; @@ -529,7 +544,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, } DEBUG_r(PerlIO_printf(Perl_debug_log, ", trying anchored starting at offset %ld...\n", - (long)(s1 + 1 - strpos))); + (long)(s1 + 1 - i_strpos))); other_last = last; PL_regeol = strend; /* Used in HOP() */ s = HOPc(t, 1); @@ -537,11 +552,11 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, } else { DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n", - (long)(s - strpos))); + (long)(s - i_strpos))); other_last = s - 1; + s = s1; if (t == strpos) goto try_at_start; - s = s1; goto try_at_offset; } } @@ -559,18 +574,36 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, cannot start at strpos. */ try_at_offset: if (ml_anch && t[-1] != '\n') { - find_anchor: /* Eventually fbm_*() should handle this */ + /* Eventually fbm_*() should handle this, but often + anchored_offset is not 0, so this check will not be wasted. */ + /* XXXX In the code below we prefer to look for "^" even in + presence of anchored substrings. And we search even + beyond the found float position. These pessimizations + are historical artefacts only. */ + find_anchor: while (t < strend - prog->minlen) { if (*t == '\n') { if (t < s - prog->check_offset_min) { + if (prog->anchored_substr) { + /* We definitely contradict the found anchored + substr. Due to the above check we do not + contradict "check" substr. + Thus we can arrive here only if check substr + is float. Redo checking for "other"=="fixed". + */ + strpos = t + 1; + DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n", + PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset))); + goto do_other_anchored; + } s = t + 1; DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n", - PL_colors[0],PL_colors[1], (long)(s - strpos))); + PL_colors[0],PL_colors[1], (long)(s - i_strpos))); goto set_useful; } DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n", - PL_colors[0],PL_colors[1], (long)(t + 1 - strpos))); - s = t + 1; + PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos))); + strpos = s = t + 1; goto restart; } t++; @@ -596,8 +629,12 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, t = strpos; goto find_anchor; } + DEBUG_r( if (ml_anch) + PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n", + PL_colors[0],PL_colors[1]); + ); success_at_start: - if (!(prog->reganch & ROPT_NAUGHTY) + if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */ && --BmUSEFUL(prog->check_substr) < 0 && prog->check_substr == prog->float_substr) { /* boo */ /* If flags & SOMETHING - do not do it many times on the same match */ @@ -612,7 +649,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, } DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n", - PL_colors[4], PL_colors[5], (long)(s - strpos)) ); + PL_colors[4], PL_colors[5], (long)(s - i_strpos)) ); return s; fail_finish: /* Substring not found */ @@ -702,19 +739,23 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* If there is a "must appear" string, look for it. */ s = startpos; - if (prog->reganch & ROPT_GPOS_SEEN) { + if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */ MAGIC *mg; - if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG - && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) - PL_reg_ganch = strbeg + mg->mg_len; - else + if (flags & REXEC_IGNOREPOS) /* Means: check only at start */ PL_reg_ganch = startpos; - if (prog->reganch & ROPT_ANCH_GPOS) { - if (s > PL_reg_ganch) - goto phooey; - s = PL_reg_ganch; + else if (sv && SvTYPE(sv) >= SVt_PVMG + && SvMAGIC(sv) + && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) { + PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */ + if (prog->reganch & ROPT_ANCH_GPOS) { + if (s > PL_reg_ganch) + goto phooey; + s = PL_reg_ganch; + } } + else /* pos() not defined */ + PL_reg_ganch = strbeg; } if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) { @@ -755,9 +796,12 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * end = HOPc(strend, -dontbother) - 1; /* for multiline we only have to try after newlines */ if (prog->check_substr) { + if (s == startpos) + goto after_try; while (1) { if (regtry(prog, s)) goto got_it; + after_try: if (s >= end) goto phooey; s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL); @@ -1465,7 +1509,7 @@ S_regtry(pTHX_ regexp *prog, char *startpos) } PL_reg_magic = mg; PL_reg_oldpos = mg->mg_len; - SAVEDESTRUCTOR(restore_pos, 0); + SAVEDESTRUCTOR_X(restore_pos, 0); } if (!PL_reg_curpm) New(22,PL_reg_curpm, 1, PMOP); @@ -1562,11 +1606,19 @@ S_regmatch(pTHX_ regnode *prog) #ifdef DEBUGGING # define sayYES goto yes # define sayNO goto no +# define sayYES_FINAL goto yes_final +# define sayYES_LOUD goto yes_loud +# define sayNO_FINAL goto no_final +# define sayNO_SILENT goto do_no # define saySAME(x) if (x) goto yes; else goto no # define REPORT_CODE_OFF 24 #else # define sayYES return 1 # define sayNO return 0 +# define sayYES_FINAL return 1 +# define sayYES_LOUD return 1 +# define sayNO_FINAL return 0 +# define sayNO_SILENT return 0 # define saySAME(x) return x #endif DEBUG_r( { @@ -2162,15 +2214,24 @@ S_regmatch(pTHX_ regnode *prog) PL_reg_maxiter = 0; if (regmatch(re->program + 1)) { + /* Even though we succeeded, we need to restore + global variables, since we may be wrapped inside + SUSPEND, thus the match may be not finished yet. */ + + /* XXXX Do this only if SUSPENDed? */ + PL_reg_call_cc = state.prev; + PL_regcc = state.cc; + PL_reg_re = state.re; + cache_re(PL_reg_re); + + /* XXXX This is too dramatic a measure... */ + PL_reg_maxiter = 0; + + /* These are needed even if not SUSPEND. */ ReREFCNT_dec(re); regcpblow(cp); sayYES; } - DEBUG_r( - PerlIO_printf(Perl_debug_log, - "%*s failed...\n", - REPORT_CODE_OFF+PL_regindent*2, "") - ); ReREFCNT_dec(re); REGCP_UNWIND; regcppop(); @@ -2357,11 +2418,6 @@ S_regmatch(pTHX_ regnode *prog) ); if (regmatch(cc->next)) sayYES; - DEBUG_r( - PerlIO_printf(Perl_debug_log, - "%*s failed...\n", - REPORT_CODE_OFF+PL_regindent*2, "") - ); if (PL_regcc) PL_regcc->cur = ln; PL_regcc = cc; @@ -2377,11 +2433,6 @@ S_regmatch(pTHX_ regnode *prog) sayYES; cc->cur = n - 1; cc->lastloc = lastloc; - DEBUG_r( - PerlIO_printf(Perl_debug_log, - "%*s failed...\n", - REPORT_CODE_OFF+PL_regindent*2, "") - ); sayNO; } @@ -2424,7 +2475,7 @@ S_regmatch(pTHX_ regnode *prog) "%*s already tried at this position...\n", REPORT_CODE_OFF+PL_regindent*2, "") ); - sayNO; + sayNO_SILENT; } PL_reg_poscache[o] |= (1<cur = n - 1; @@ -2520,10 +2566,6 @@ S_regmatch(pTHX_ regnode *prog) ln = PL_regcc->cur; if (regmatch(cc->next)) sayYES; - DEBUG_r( - PerlIO_printf(Perl_debug_log, "%*s failed...\n", - REPORT_CODE_OFF+PL_regindent*2, "") - ); if (PL_regcc) PL_regcc->cur = ln; PL_regcc = cc; @@ -2918,14 +2960,22 @@ S_regmatch(pTHX_ regnode *prog) "%*s continuation failed...\n", REPORT_CODE_OFF+PL_regindent*2, "") ); - sayNO; + sayNO_SILENT; + } + if (locinput < PL_regtill) { + DEBUG_r(PerlIO_printf(Perl_debug_log, + "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n", + PL_colors[4], + (long)(locinput - PL_reg_starttry), + (long)(PL_regtill - PL_reg_starttry), + PL_colors[5])); + sayNO_FINAL; /* Cannot match: too short. */ } - if (locinput < PL_regtill) - sayNO; /* Cannot match: too short. */ - /* Fall through */ + PL_reginput = locinput; /* put where regtry can find it */ + sayYES_FINAL; /* Success! */ case SUCCEED: PL_reginput = locinput; /* put where regtry can find it */ - sayYES; /* Success! */ + sayYES_LOUD; /* Success! */ case SUSPEND: n = 1; PL_reginput = locinput; @@ -3001,7 +3051,7 @@ S_regmatch(pTHX_ regnode *prog) next = NULL; break; default: - PerlIO_printf(PerlIO_stderr(), "%lx %d\n", + PerlIO_printf(Perl_error_log, "%lx %d\n", (unsigned long)scan, OP(scan)); Perl_croak(aTHX_ "regexp memory corruption"); } @@ -3016,6 +3066,16 @@ S_regmatch(pTHX_ regnode *prog) /*NOTREACHED*/ sayNO; +yes_loud: + DEBUG_r( + PerlIO_printf(Perl_debug_log, + "%*s %scould match...%s\n", + REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5]) + ); + goto yes; +yes_final: + DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n", + PL_colors[4],PL_colors[5])); yes: #ifdef DEBUGGING PL_regindent--; @@ -3023,6 +3083,14 @@ yes: return 1; no: + DEBUG_r( + PerlIO_printf(Perl_debug_log, + "%*s %sfailed...%s\n", + REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5]) + ); + goto do_no; +no_final: +do_no: #ifdef DEBUGGING PL_regindent--; #endif