/* A failure to find a constant substring means that there is no need to make
an expensive call to REx engine, thus we celebrate a failure. Similarly,
finding a substring too deep into the string means that less calls to
- regtry() should be needed. */
+ regtry() should be needed.
+
+ REx compiler's optimizer found 4 possible hints:
+ a) Anchored substring;
+ b) Fixed substring;
+ c) Whether we are anchored (beginning-of-line or \G);
+ d) First node (of those at offset 0) which may distingush positions;
+ We use 'a', 'b', multiline-part of 'c', and try to find a position in the
+ string which does not contradict any of them.
+ */
char *
Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
I32 ml_anch;
char *tmp;
register char *other_last = Nullch;
+#ifdef DEBUGGING
+ char *i_strpos = strpos;
+#endif
DEBUG_r( if (!PL_colorset) reginitcolors() );
DEBUG_r(PerlIO_printf(Perl_debug_log,
goto fail_finish;
/* Finish the diagnostic message */
- DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - strpos)) );
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
/* Got a candidate. Check MBOL anchoring, and the *other* substr.
Start with the other substr.
*/
if (prog->float_substr && prog->anchored_substr) {
- /* Take into account the anchored substring. */
+ /* Take into account the "other" substring. */
/* XXXX May be hopelessly wrong for UTF... */
if (!other_last)
other_last = strpos - 1;
if (check == prog->float_substr) {
+ do_other_anchored:
+ {
char *last = s - start_shift, *last1, *last2;
char *s1 = s;
|| (PL_bostr = strpos, /* Used in regcopmaybe() */
(t = reghopmaybe_c(s, -(prog->check_offset_max)))
&& t > strpos)))
- ;
+ /* EMPTY */;
else
t = strpos;
t += prog->anchored_offset;
}
DEBUG_r(PerlIO_printf(Perl_debug_log,
", trying floating at offset %ld...\n",
- (long)(s1 + 1 - strpos)));
+ (long)(s1 + 1 - i_strpos)));
PL_regeol = strend; /* Used in HOP() */
other_last = last1 + prog->anchored_offset;
s = HOPc(last, 1);
}
else {
DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
- (long)(s - strpos)));
+ (long)(s - i_strpos)));
t = s - prog->anchored_offset;
other_last = s - 1;
+ s = s1;
if (t == strpos)
goto try_at_start;
- s = s1;
goto try_at_offset;
}
+ }
}
else { /* Take into account the floating substring. */
char *last, *last1;
}
DEBUG_r(PerlIO_printf(Perl_debug_log,
", trying anchored starting at offset %ld...\n",
- (long)(s1 + 1 - strpos)));
+ (long)(s1 + 1 - i_strpos)));
other_last = last;
PL_regeol = strend; /* Used in HOP() */
s = HOPc(t, 1);
}
else {
DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
- (long)(s - strpos)));
+ (long)(s - i_strpos)));
other_last = s - 1;
+ s = s1;
if (t == strpos)
goto try_at_start;
- s = s1;
goto try_at_offset;
}
}
cannot start at strpos. */
try_at_offset:
if (ml_anch && t[-1] != '\n') {
- find_anchor: /* Eventually fbm_*() should handle this */
+ /* Eventually fbm_*() should handle this, but often
+ anchored_offset is not 0, so this check will not be wasted. */
+ /* XXXX In the code below we prefer to look for "^" even in
+ presence of anchored substrings. And we search even
+ beyond the found float position. These pessimizations
+ are historical artefacts only. */
+ find_anchor:
while (t < strend - prog->minlen) {
if (*t == '\n') {
if (t < s - prog->check_offset_min) {
+ if (prog->anchored_substr) {
+ /* We definitely contradict the found anchored
+ substr. Due to the above check we do not
+ contradict "check" substr.
+ Thus we can arrive here only if check substr
+ is float. Redo checking for "other"=="fixed".
+ */
+ strpos = t + 1;
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
+ PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
+ goto do_other_anchored;
+ }
s = t + 1;
DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
- PL_colors[0],PL_colors[1], (long)(s - strpos)));
+ PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
goto set_useful;
}
DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
- PL_colors[0],PL_colors[1], (long)(t + 1 - strpos)));
- s = t + 1;
+ PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
+ strpos = s = t + 1;
goto restart;
}
t++;
t = strpos;
goto find_anchor;
}
+ DEBUG_r( if (ml_anch)
+ PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
+ PL_colors[0],PL_colors[1]);
+ );
success_at_start:
- if (!(prog->reganch & ROPT_NAUGHTY)
+ if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
&& --BmUSEFUL(prog->check_substr) < 0
&& prog->check_substr == prog->float_substr) { /* boo */
/* If flags & SOMETHING - do not do it many times on the same match */
}
DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
- PL_colors[4], PL_colors[5], (long)(s - strpos)) );
+ PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
return s;
fail_finish: /* Substring not found */
/* If there is a "must appear" string, look for it. */
s = startpos;
- if (prog->reganch & ROPT_GPOS_SEEN) {
+ if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
MAGIC *mg;
- if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
- && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
- PL_reg_ganch = strbeg + mg->mg_len;
- else
+ if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
PL_reg_ganch = startpos;
- if (prog->reganch & ROPT_ANCH_GPOS) {
- if (s > PL_reg_ganch)
- goto phooey;
- s = PL_reg_ganch;
+ else if (sv && SvTYPE(sv) >= SVt_PVMG
+ && SvMAGIC(sv)
+ && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
+ PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
+ if (prog->reganch & ROPT_ANCH_GPOS) {
+ if (s > PL_reg_ganch)
+ goto phooey;
+ s = PL_reg_ganch;
+ }
}
+ else /* pos() not defined */
+ PL_reg_ganch = strbeg;
}
if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
end = HOPc(strend, -dontbother) - 1;
/* for multiline we only have to try after newlines */
if (prog->check_substr) {
+ if (s == startpos)
+ goto after_try;
while (1) {
if (regtry(prog, s))
goto got_it;
+ after_try:
if (s >= end)
goto phooey;
s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
}
PL_reg_magic = mg;
PL_reg_oldpos = mg->mg_len;
- SAVEDESTRUCTOR(restore_pos, 0);
+ SAVEDESTRUCTOR_X(restore_pos, 0);
}
if (!PL_reg_curpm)
New(22,PL_reg_curpm, 1, PMOP);
#ifdef DEBUGGING
# define sayYES goto yes
# define sayNO goto no
+# define sayYES_FINAL goto yes_final
+# define sayYES_LOUD goto yes_loud
+# define sayNO_FINAL goto no_final
+# define sayNO_SILENT goto do_no
# define saySAME(x) if (x) goto yes; else goto no
# define REPORT_CODE_OFF 24
#else
# define sayYES return 1
# define sayNO return 0
+# define sayYES_FINAL return 1
+# define sayYES_LOUD return 1
+# define sayNO_FINAL return 0
+# define sayNO_SILENT return 0
# define saySAME(x) return x
#endif
DEBUG_r( {
regcpblow(cp);
sayYES;
}
- DEBUG_r(
- PerlIO_printf(Perl_debug_log,
- "%*s failed...\n",
- REPORT_CODE_OFF+PL_regindent*2, "")
- );
ReREFCNT_dec(re);
REGCP_UNWIND;
regcppop();
);
if (regmatch(cc->next))
sayYES;
- DEBUG_r(
- PerlIO_printf(Perl_debug_log,
- "%*s failed...\n",
- REPORT_CODE_OFF+PL_regindent*2, "")
- );
if (PL_regcc)
PL_regcc->cur = ln;
PL_regcc = cc;
sayYES;
cc->cur = n - 1;
cc->lastloc = lastloc;
- DEBUG_r(
- PerlIO_printf(Perl_debug_log,
- "%*s failed...\n",
- REPORT_CODE_OFF+PL_regindent*2, "")
- );
sayNO;
}
"%*s already tried at this position...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
- sayNO;
+ sayNO_SILENT;
}
PL_reg_poscache[o] |= (1<<b);
}
regcpblow(cp);
sayYES;
}
- DEBUG_r(
- PerlIO_printf(Perl_debug_log,
- "%*s failed...\n",
- REPORT_CODE_OFF+PL_regindent*2, "")
- );
REGCP_UNWIND;
regcppop();
cc->cur = n - 1;
ln = PL_regcc->cur;
if (regmatch(cc->next))
sayYES;
- DEBUG_r(
- PerlIO_printf(Perl_debug_log, "%*s failed...\n",
- REPORT_CODE_OFF+PL_regindent*2, "")
- );
if (PL_regcc)
PL_regcc->cur = ln;
PL_regcc = cc;
"%*s continuation failed...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
- sayNO;
+ sayNO_SILENT;
}
- if (locinput < PL_regtill)
- sayNO; /* Cannot match: too short. */
- /* Fall through */
+ if (locinput < PL_regtill) {
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
+ PL_colors[4],
+ (long)(locinput - PL_reg_starttry),
+ (long)(PL_regtill - PL_reg_starttry),
+ PL_colors[5]));
+ sayNO_FINAL; /* Cannot match: too short. */
+ }
+ PL_reginput = locinput; /* put where regtry can find it */
+ sayYES_FINAL; /* Success! */
case SUCCEED:
PL_reginput = locinput; /* put where regtry can find it */
- sayYES; /* Success! */
+ sayYES_LOUD; /* Success! */
case SUSPEND:
n = 1;
PL_reginput = locinput;
next = NULL;
break;
default:
- PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
+ PerlIO_printf(Perl_error_log, "%lx %d\n",
(unsigned long)scan, OP(scan));
Perl_croak(aTHX_ "regexp memory corruption");
}
/*NOTREACHED*/
sayNO;
+yes_loud:
+ DEBUG_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s %scould match...%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
+ );
+ goto yes;
+yes_final:
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
+ PL_colors[4],PL_colors[5]));
yes:
#ifdef DEBUGGING
PL_regindent--;
return 1;
no:
+ DEBUG_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s %sfailed...%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
+ );
+ goto do_no;
+no_final:
+do_no:
#ifdef DEBUGGING
PL_regindent--;
#endif