#define regcpblow(cp) LEAVE_SCOPE(cp)
+#define TRYPAREN(paren, n, input) { \
+ if (paren) { \
+ if (n) { \
+ PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
+ PL_regendp[paren] = input - PL_bostr; \
+ } \
+ else \
+ PL_regendp[paren] = -1; \
+ } \
+ if (regmatch(next)) \
+ sayYES; \
+ if (paren && n) \
+ PL_regendp[paren] = -1; \
+}
+
+
/*
* pregexec and friends
*/
register I32 end_shift;
register char *s;
register SV *check;
+ char *strbeg;
char *t;
I32 ml_anch;
char *tmp;
DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
goto fail;
}
+ strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
check = prog->check_substr;
if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
|| ( (prog->reganch & ROPT_ANCH_BOL)
&& !PL_multiline ) ); /* Check after \n? */
- if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
+ if (!ml_anch) {
+ if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
+ /* SvCUR is not set on references: SvRV and SvPVX overlap */
+ && sv && !SvROK(sv)
+ && (strpos != strbeg)) {
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
+ goto fail;
+ }
+ if (prog->check_offset_min == prog->check_offset_max) {
/* Substring at constant offset from beg-of-str... */
I32 slen;
- if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
- /* SvCUR is not set on references: SvRV and SvPVX overlap */
- && sv && !SvROK(sv)
- && (strpos + SvCUR(sv) != strend)) {
- DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
- goto fail;
- }
PL_regeol = strend; /* Used in HOP() */
s = HOPc(strpos, prog->check_offset_min);
if (SvTAIL(check)) {
&& memNE(SvPVX(check), s, slen)))
goto report_neq;
goto success_at_start;
+ }
}
/* Match is anchored, but substr is not anchored wrt beg-of-str. */
s = strpos;
/* Find a possible match in the region s..strend by looking for
the "check" substring in the region corrected by start/end_shift. */
if (flags & REXEC_SCREAM) {
- char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
I32 p = -1; /* Internal iterator of scream. */
I32 *pp = data ? data->scream_pos : &p;
find_anchor:
while (t < strend - prog->minlen) {
if (*t == '\n') {
- if (t < s - prog->check_offset_min) {
+ if (t < check_at - prog->check_offset_min) {
if (prog->anchored_substr) {
- /* We definitely contradict the found anchored
+ /* Since we moved from the found position,
+ we definitely contradict the found anchored
substr. Due to the above check we do not
contradict "check" substr.
Thus we can arrive here only if check substr
PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
goto do_other_anchored;
}
+ /* We don't contradict the found floating substring. */
+ /* XXXX Why not check for STCLASS? */
s = t + 1;
DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
goto set_useful;
}
- DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
+ /* Position contradicts check-string */
+ /* XXXX probably better to look for check-string
+ than for "\n", so one should lower the limit for t? */
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
- strpos = s = t + 1;
+ other_last = strpos = s = t + 1;
goto restart;
}
t++;
PL_colors[0],PL_colors[1]));
goto fail_finish;
}
+ else {
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
+ PL_colors[0],PL_colors[1]));
+ }
s = t;
set_useful:
++BmUSEFUL(prog->check_substr); /* hooray/5 */
}
else {
PL_bostr = tmp;
- /* The found string does not prohibit matching at beg-of-str
+ /* The found string does not prohibit matching at strpos,
- no optimization of calling REx engine can be performed,
- unless it was an MBOL and we are not after MBOL. */
+ unless it was an MBOL and we are not after MBOL,
+ or a future STCLASS check will fail this. */
try_at_start:
/* Even in this situation we may use MBOL flag if strpos is offset
wrt the start of the string. */
if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
- && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n'
+ && (strpos != strbeg) && strpos[-1] != '\n'
/* May be due to an implicit anchor of m{.*foo} */
&& !(prog->reganch & ROPT_IMPLICIT))
{
goto find_anchor;
}
DEBUG_r( if (ml_anch)
- PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
- PL_colors[0],PL_colors[1]);
+ PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
+ (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
);
success_at_start:
if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
&& prog->check_substr == prog->float_substr)
{
/* If flags & SOMETHING - do not do it many times on the same match */
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
SvREFCNT_dec(prog->check_substr);
prog->check_substr = Nullsv; /* disable */
prog->float_substr = Nullsv; /* clear */
+ check = Nullsv; /* abort */
s = strpos;
/* XXXX This is a remnant of the old implementation. It
looks wasteful, since now INTUIT can use many
? s + (prog->minlen? cl_l : 0)
: (prog->float_substr ? check_at - start_shift + cl_l
: strend) ;
- char *startpos = sv && SvPOK(sv) ? strend - SvCUR(sv) : s;
+ char *startpos = strbeg;
t = s;
if (prog->reganch & ROPT_UTF8) {
"Could not match STCLASS...\n") );
goto fail;
}
+ if (!check)
+ goto giveup;
DEBUG_r( PerlIO_printf(Perl_debug_log,
- "Trying %s substr starting at offset %ld...\n",
+ "Looking for %s substr starting at offset %ld...\n",
what, (long)(s + start_shift - i_strpos)) );
goto restart;
}
goto retry_floating_check;
/* Recheck anchored substring, but not floating... */
s = check_at;
+ if (!check)
+ goto giveup;
DEBUG_r( PerlIO_printf(Perl_debug_log,
- "Trying anchored substr starting at offset %ld...\n",
+ "Looking for anchored substr starting at offset %ld...\n",
(long)(other_last - i_strpos)) );
goto do_other_anchored;
}
current position only: */
if (ml_anch) {
s = t = t + 1;
+ if (!check)
+ goto giveup;
DEBUG_r( PerlIO_printf(Perl_debug_log,
- "Trying /^/m starting at offset %ld...\n",
- (long)(t - i_strpos)) );
+ "Looking for /%s^%s/m starting at offset %ld...\n",
+ PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
goto try_at_offset;
}
if (!prog->float_substr) /* Could have been deleted */
PerlIO_printf(Perl_debug_log,
"Does not contradict STCLASS...\n") );
}
- DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
- PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
+ giveup:
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
+ PL_colors[4], (check ? "Guessed" : "Giving up"),
+ PL_colors[5], (long)(s - i_strpos)) );
return s;
fail_finish: /* Substring not found */
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case BOUNDUTF8:
- tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
+ tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1), 0, 0) : '\n';
tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
while (s < strend) {
if (tmp == !(OP(c) == BOUNDUTF8 ?
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case NBOUNDUTF8:
- tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
+ tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1), 0, 0) : '\n';
tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
while (s < strend) {
if (tmp == !(OP(c) == NBOUNDUTF8 ?
}
sayNO;
case SBOL:
- if (locinput == PL_regbol && PL_regprev == '\n')
+ if (locinput == PL_bostr)
break;
sayNO;
case GPOS:
while (s < e) {
if (l >= PL_regeol)
sayNO;
- if (utf8_to_uv((U8*)s, 0) != (c1 ?
+ if (utf8_to_uv_chk((U8*)s, 0, 0) != (c1 ?
toLOWER_utf8((U8*)l) :
toLOWER_LC_utf8((U8*)l)))
{
case NBOUNDUTF8:
/* was last char in word? */
ln = (locinput != PL_regbol)
- ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
+ ? utf8_to_uv_chk(reghop((U8*)locinput, -1), 0, 0) : PL_regprev;
if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
ln = isALNUM_uni(ln);
n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
sayNO;
}
/* PL_reginput == locinput now */
- if (paren) {
- if (ln) {
- PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
- PL_regendp[paren] = locinput - PL_bostr;
- }
- else
- PL_regendp[paren] = -1;
- }
- if (regmatch(next))
- sayYES;
+ TRYPAREN(paren, ln, locinput);
PL_reginput = locinput; /* Could be reset... */
REGCP_UNWIND;
/* Couldn't or didn't -- move forward. */
UCHARAT(PL_reginput) == c1 ||
UCHARAT(PL_reginput) == c2)
{
- if (paren) {
- if (n) {
- PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
- PL_regendp[paren] = PL_reginput - PL_bostr;
- }
- else
- PL_regendp[paren] = -1;
- }
- if (regmatch(next))
- sayYES;
+ TRYPAREN(paren, n, PL_reginput);
REGCP_UNWIND;
}
/* Couldn't or didn't -- move forward. */
UCHARAT(PL_reginput) == c1 ||
UCHARAT(PL_reginput) == c2)
{
- if (paren && n) {
- if (n) {
- PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
- PL_regendp[paren] = PL_reginput - PL_bostr;
- }
- else
- PL_regendp[paren] = -1;
- }
- if (regmatch(next))
- sayYES;
+ TRYPAREN(paren, n, PL_reginput);
REGCP_UNWIND;
}
/* Couldn't or didn't -- back up. */
UCHARAT(PL_reginput) == c1 ||
UCHARAT(PL_reginput) == c2)
{
- if (regmatch(next))
- sayYES;
+ TRYPAREN(paren, n, PL_reginput);
REGCP_UNWIND;
}
/* Couldn't or didn't -- back up. */
(ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
(ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
(ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
- (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
+ (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
+ (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) ||
+ (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
+ (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) ||
+ (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c))
) /* How's that for a conditional? */
{
match = TRUE;
PL_curpm = PL_reg_oldcurpm;
}
}
-