PL_regendp[paren] = tmps;
DEBUG_r(
PerlIO_printf(Perl_debug_log,
- " restoring \\%d to %d(%d)..%d%s\n",
- paren, PL_regstartp[paren],
- PL_reg_start_tmp[paren] - PL_bostr,
- PL_regendp[paren],
+ " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
+ (UV)paren, (IV)PL_regstartp[paren],
+ (IV)(PL_reg_start_tmp[paren] - PL_bostr),
+ (IV)PL_regendp[paren],
(paren > *PL_reglastparen ? "(no)" : ""));
);
}
/* A failure to find a constant substring means that there is no need to make
an expensive call to REx engine, thus we celebrate a failure. Similarly,
finding a substring too deep into the string means that less calls to
- regtry() should be needed. */
+ regtry() should be needed.
+
+ REx compiler's optimizer found 4 possible hints:
+ a) Anchored substring;
+ b) Fixed substring;
+ c) Whether we are anchored (beginning-of-line or \G);
+ d) First node (of those at offset 0) which may distingush positions;
+ We use 'a', 'b', multiline-part of 'c', and try to find a position in the
+ string which does not contradict any of them.
+ */
char *
Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
I32 ml_anch;
char *tmp;
register char *other_last = Nullch;
+#ifdef DEBUGGING
+ char *i_strpos = strpos;
+#endif
DEBUG_r( if (!PL_colorset) reginitcolors() );
DEBUG_r(PerlIO_printf(Perl_debug_log,
PL_colors[1],
(strlen(prog->precomp) > 60 ? "..." : ""),
PL_colors[0],
- (strend - strpos > 60 ? 60 : strend - strpos),
+ (int)(strend - strpos > 60 ? 60 : strend - strpos),
strpos, PL_colors[1],
(strend - strpos > 60 ? "..." : ""))
);
(s ? "Found" : "Did not find"),
((check == prog->anchored_substr) ? "anchored" : "floating"),
PL_colors[0],
- SvCUR(check) - (SvTAIL(check)!=0), SvPVX(check),
+ (int)(SvCUR(check) - (SvTAIL(check)!=0)),
+ SvPVX(check),
PL_colors[1], (SvTAIL(check) ? "$" : ""),
(s ? " at offset " : "...\n") ) );
goto fail_finish;
/* Finish the diagnostic message */
- DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - strpos)) );
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
/* Got a candidate. Check MBOL anchoring, and the *other* substr.
Start with the other substr.
*/
if (prog->float_substr && prog->anchored_substr) {
- /* Take into account the anchored substring. */
+ /* Take into account the "other" substring. */
/* XXXX May be hopelessly wrong for UTF... */
if (!other_last)
other_last = strpos - 1;
if (check == prog->float_substr) {
+ do_other_anchored:
+ {
char *last = s - start_shift, *last1, *last2;
char *s1 = s;
|| (PL_bostr = strpos, /* Used in regcopmaybe() */
(t = reghopmaybe_c(s, -(prog->check_offset_max)))
&& t > strpos)))
- ;
+ /* EMPTY */;
else
t = strpos;
t += prog->anchored_offset;
DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
(s ? "Found" : "Contradicts"),
PL_colors[0],
- SvCUR(prog->anchored_substr)
- - (SvTAIL(prog->anchored_substr)!=0),
+ (int)(SvCUR(prog->anchored_substr)
+ - (SvTAIL(prog->anchored_substr)!=0)),
SvPVX(prog->anchored_substr),
PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
if (!s) {
}
DEBUG_r(PerlIO_printf(Perl_debug_log,
", trying floating at offset %ld...\n",
- (long)(s1 + 1 - strpos)));
+ (long)(s1 + 1 - i_strpos)));
PL_regeol = strend; /* Used in HOP() */
other_last = last1 + prog->anchored_offset;
s = HOPc(last, 1);
}
else {
DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
- (long)(s - strpos)));
+ (long)(s - i_strpos)));
t = s - prog->anchored_offset;
other_last = s - 1;
+ s = s1;
if (t == strpos)
goto try_at_start;
- s = s1;
goto try_at_offset;
}
+ }
}
else { /* Take into account the floating substring. */
char *last, *last1;
DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
(s ? "Found" : "Contradicts"),
PL_colors[0],
- SvCUR(prog->float_substr)
- - (SvTAIL(prog->float_substr)!=0),
+ (int)(SvCUR(prog->float_substr)
+ - (SvTAIL(prog->float_substr)!=0)),
SvPVX(prog->float_substr),
PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
if (!s) {
}
DEBUG_r(PerlIO_printf(Perl_debug_log,
", trying anchored starting at offset %ld...\n",
- (long)(s1 + 1 - strpos)));
+ (long)(s1 + 1 - i_strpos)));
other_last = last;
PL_regeol = strend; /* Used in HOP() */
s = HOPc(t, 1);
}
else {
DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
- (long)(s - strpos)));
+ (long)(s - i_strpos)));
other_last = s - 1;
+ s = s1;
if (t == strpos)
goto try_at_start;
- s = s1;
goto try_at_offset;
}
}
cannot start at strpos. */
try_at_offset:
if (ml_anch && t[-1] != '\n') {
- find_anchor: /* Eventually fbm_*() should handle this */
+ /* Eventually fbm_*() should handle this, but often
+ anchored_offset is not 0, so this check will not be wasted. */
+ /* XXXX In the code below we prefer to look for "^" even in
+ presence of anchored substrings. And we search even
+ beyond the found float position. These pessimizations
+ are historical artefacts only. */
+ find_anchor:
while (t < strend - prog->minlen) {
if (*t == '\n') {
if (t < s - prog->check_offset_min) {
+ if (prog->anchored_substr) {
+ /* We definitely contradict the found anchored
+ substr. Due to the above check we do not
+ contradict "check" substr.
+ Thus we can arrive here only if check substr
+ is float. Redo checking for "other"=="fixed".
+ */
+ strpos = t + 1;
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
+ PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
+ goto do_other_anchored;
+ }
s = t + 1;
DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
- PL_colors[0],PL_colors[1], (long)(s - strpos)));
+ PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
goto set_useful;
}
DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
- PL_colors[0],PL_colors[1], (long)(t + 1 - strpos)));
- s = t + 1;
+ PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
+ strpos = s = t + 1;
goto restart;
}
t++;
t = strpos;
goto find_anchor;
}
+ DEBUG_r( if (ml_anch)
+ PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
+ PL_colors[0],PL_colors[1]);
+ );
success_at_start:
- if (!(prog->reganch & ROPT_NAUGHTY)
+ if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
&& --BmUSEFUL(prog->check_substr) < 0
&& prog->check_substr == prog->float_substr) { /* boo */
/* If flags & SOMETHING - do not do it many times on the same match */
}
DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
- PL_colors[4], PL_colors[5], (long)(s - strpos)) );
+ PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
return s;
fail_finish: /* Substring not found */
/* If there is a "must appear" string, look for it. */
s = startpos;
- if (prog->reganch & ROPT_GPOS_SEEN) {
+ if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
MAGIC *mg;
- if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
- && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
- PL_reg_ganch = strbeg + mg->mg_len;
- else
+ if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
PL_reg_ganch = startpos;
- if (prog->reganch & ROPT_ANCH_GPOS) {
- if (s > PL_reg_ganch)
- goto phooey;
- s = PL_reg_ganch;
+ else if (sv && SvTYPE(sv) >= SVt_PVMG
+ && SvMAGIC(sv)
+ && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
+ PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
+ if (prog->reganch & ROPT_ANCH_GPOS) {
+ if (s > PL_reg_ganch)
+ goto phooey;
+ s = PL_reg_ganch;
+ }
}
+ else /* pos() not defined */
+ PL_reg_ganch = strbeg;
}
if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
PL_colors[1],
(strlen(prog->precomp) > 60 ? "..." : ""),
PL_colors[0],
- (strend - startpos > 60 ? 60 : strend - startpos),
+ (int)(strend - startpos > 60 ? 60 : strend - startpos),
startpos, PL_colors[1],
(strend - startpos > 60 ? "..." : ""))
);
end = HOPc(strend, -dontbother) - 1;
/* for multiline we only have to try after newlines */
if (prog->check_substr) {
+ if (s == startpos)
+ goto after_try;
while (1) {
if (regtry(prog, s))
goto got_it;
+ after_try:
if (s >= end)
goto phooey;
s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
}
else if (c = prog->regstclass) {
I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
- char *cc;
+ char *m;
+ int ln;
+ int c1;
+ int c2;
+ char *e;
if (minlen)
dontbother = minlen - 1;
/* We know what class it must start with. */
switch (OP(c)) {
case ANYOFUTF8:
- cc = MASK(c);
while (s < strend) {
if (REGINCLASSUTF8(c, (U8*)s)) {
if (tmp && regtry(prog, s))
}
break;
case ANYOF:
- cc = MASK(c);
while (s < strend) {
- if (REGINCLASS(cc, *s)) {
+ if (REGINCLASS(c, *s)) {
if (tmp && regtry(prog, s))
goto got_it;
else
s++;
}
break;
+ case EXACTF:
+ m = STRING(c);
+ ln = STR_LEN(c);
+ c1 = *m;
+ c2 = PL_fold[c1];
+ goto do_exactf;
+ case EXACTFL:
+ m = STRING(c);
+ ln = STR_LEN(c);
+ c1 = *m;
+ c2 = PL_fold_locale[c1];
+ do_exactf:
+ e = strend - ln;
+
+ /* Here it is NOT UTF! */
+ if (c1 == c2) {
+ while (s <= e) {
+ if ( *s == c1
+ && (ln == 1 || (OP(c) == EXACTF
+ ? ibcmp(s, m, ln)
+ : ibcmp_locale(s, m, ln)))
+ && regtry(prog, s) )
+ goto got_it;
+ s++;
+ }
+ } else {
+ while (s <= e) {
+ if ( (*s == c1 || *s == c2)
+ && (ln == 1 || (OP(c) == EXACTF
+ ? ibcmp(s, m, ln)
+ : ibcmp_locale(s, m, ln)))
+ && regtry(prog, s) )
+ goto got_it;
+ s++;
+ }
+ }
+ break;
case BOUNDL:
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
s += UTF8SKIP(s);
}
break;
+ default:
+ croak("panic: unknown regstclass %d", (int)OP(c));
+ break;
}
}
else {
PL_reg_eval_set = RS_init;
DEBUG_r(DEBUG_s(
- PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
- PL_stack_sp - PL_stack_base);
+ PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
+ (IV)(PL_stack_sp - PL_stack_base));
));
SAVEINT(cxstack[cxstack_ix].blk_oldsp);
cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
}
PL_reg_magic = mg;
PL_reg_oldpos = mg->mg_len;
- SAVEDESTRUCTOR(restore_pos, 0);
+ SAVEDESTRUCTOR_X(restore_pos, 0);
}
if (!PL_reg_curpm)
New(22,PL_reg_curpm, 1, PMOP);
#ifdef DEBUGGING
# define sayYES goto yes
# define sayNO goto no
+# define sayYES_FINAL goto yes_final
+# define sayYES_LOUD goto yes_loud
+# define sayNO_FINAL goto no_final
+# define sayNO_SILENT goto do_no
# define saySAME(x) if (x) goto yes; else goto no
# define REPORT_CODE_OFF 24
#else
# define sayYES return 1
# define sayNO return 0
+# define sayYES_FINAL return 1
+# define sayYES_LOUD return 1
+# define sayNO_FINAL return 0
+# define sayNO_SILENT return 0
# define saySAME(x) return x
#endif
DEBUG_r( {
pref0_len = pref_len;
regprop(prop, scan);
PerlIO_printf(Perl_debug_log,
- "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
- locinput - PL_bostr,
+ "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
+ (IV)(locinput - PL_bostr),
PL_colors[4], pref0_len,
locinput - pref_len, PL_colors[5],
PL_colors[2], pref_len - pref0_len,
PL_colors[0], l, locinput, PL_colors[1],
15 - l - pref_len + 1,
"",
- scan - PL_regprogram, PL_regindent*2, "",
+ (IV)(scan - PL_regprogram), PL_regindent*2, "",
SvPVX(prop));
} );
nextchr = UCHARAT(locinput);
break;
case ANYOFUTF8:
- s = MASK(scan);
if (!REGINCLASSUTF8(scan, (U8*)locinput))
sayNO;
if (locinput >= PL_regeol)
nextchr = UCHARAT(locinput);
break;
case ANYOF:
- s = MASK(scan);
if (nextchr < 0)
nextchr = UCHARAT(locinput);
- if (!REGINCLASS(s, nextchr))
+ if (!REGINCLASS(scan, nextchr))
sayNO;
if (!nextchr && locinput >= PL_regeol)
sayNO;
n = ARG(scan);
PL_op = (OP_4tree*)PL_regdata->data[n];
- DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
+ DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", (UV)PL_op) );
PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
regcpblow(cp);
sayYES;
}
- DEBUG_r(
- PerlIO_printf(Perl_debug_log,
- "%*s failed...\n",
- REPORT_CODE_OFF+PL_regindent*2, "")
- );
ReREFCNT_dec(re);
REGCP_UNWIND;
regcppop();
);
if (regmatch(cc->next))
sayYES;
- DEBUG_r(
- PerlIO_printf(Perl_debug_log,
- "%*s failed...\n",
- REPORT_CODE_OFF+PL_regindent*2, "")
- );
if (PL_regcc)
PL_regcc->cur = ln;
PL_regcc = cc;
sayYES;
cc->cur = n - 1;
cc->lastloc = lastloc;
- DEBUG_r(
- PerlIO_printf(Perl_debug_log,
- "%*s failed...\n",
- REPORT_CODE_OFF+PL_regindent*2, "")
- );
sayNO;
}
"%*s already tried at this position...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
- sayNO;
+ sayNO_SILENT;
}
PL_reg_poscache[o] |= (1<<b);
}
regcpblow(cp);
sayYES;
}
- DEBUG_r(
- PerlIO_printf(Perl_debug_log,
- "%*s failed...\n",
- REPORT_CODE_OFF+PL_regindent*2, "")
- );
REGCP_UNWIND;
regcppop();
cc->cur = n - 1;
ln = PL_regcc->cur;
if (regmatch(cc->next))
sayYES;
- DEBUG_r(
- PerlIO_printf(Perl_debug_log, "%*s failed...\n",
- REPORT_CODE_OFF+PL_regindent*2, "")
- );
if (PL_regcc)
PL_regcc->cur = ln;
PL_regcc = cc;
locinput = PL_reginput;
DEBUG_r(
PerlIO_printf(Perl_debug_log,
- "%*s matched %ld times, len=%ld...\n",
- REPORT_CODE_OFF+PL_regindent*2, "", n, l)
+ "%*s matched %d times, len=%"IVdf"...\n",
+ (int)(REPORT_CODE_OFF+PL_regindent*2), "",
+ n, (IV)l)
);
if (n >= ln) {
if (PL_regkind[(U8)OP(next)] == EXACT) {
{
DEBUG_r(
PerlIO_printf(Perl_debug_log,
- "%*s trying tail with n=%ld...\n",
- REPORT_CODE_OFF+PL_regindent*2, "", n)
+ "%*s trying tail with n=%"IVdf"...\n",
+ (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
);
if (paren) {
if (n) {
"%*s continuation failed...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
- sayNO;
+ sayNO_SILENT;
+ }
+ if (locinput < PL_regtill) {
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
+ PL_colors[4],
+ (long)(locinput - PL_reg_starttry),
+ (long)(PL_regtill - PL_reg_starttry),
+ PL_colors[5]));
+ sayNO_FINAL; /* Cannot match: too short. */
}
- if (locinput < PL_regtill)
- sayNO; /* Cannot match: too short. */
- /* Fall through */
+ PL_reginput = locinput; /* put where regtry can find it */
+ sayYES_FINAL; /* Success! */
case SUCCEED:
PL_reginput = locinput; /* put where regtry can find it */
- sayYES; /* Success! */
+ sayYES_LOUD; /* Success! */
case SUSPEND:
n = 1;
PL_reginput = locinput;
next = NULL;
break;
default:
- PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
- (unsigned long)scan, OP(scan));
+ PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
+ (UV)scan, OP(scan));
Perl_croak(aTHX_ "regexp memory corruption");
}
scan = next;
/*NOTREACHED*/
sayNO;
+yes_loud:
+ DEBUG_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s %scould match...%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
+ );
+ goto yes;
+yes_final:
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
+ PL_colors[4],PL_colors[5]));
yes:
#ifdef DEBUGGING
PL_regindent--;
return 1;
no:
+ DEBUG_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s %sfailed...%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
+ );
+ goto do_no;
+no_final:
+do_no:
#ifdef DEBUGGING
PL_regindent--;
#endif
{
dTHR;
register char *scan;
- register char *opnd;
register I32 c;
register char *loceol = PL_regeol;
register I32 hardcount = 0;
}
break;
case ANYOF:
- opnd = MASK(p);
- while (scan < loceol && REGINCLASS(opnd, *scan))
+ while (scan < loceol && REGINCLASS(p, *scan))
scan++;
break;
case ALNUM:
regprop(prop, p);
PerlIO_printf(Perl_debug_log,
- "%*s %s can match %ld times out of %ld...\n",
- REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
+ "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
+ REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
});
return(c);
*/
STATIC bool
-S_reginclass(pTHX_ register char *p, register I32 c)
+S_reginclass(pTHX_ register regnode *p, register I32 c)
{
dTHR;
char flags = ANYOF_FLAGS(p);