*/
#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
-#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
+#define CHR_DIST(a,b) (PL_reg_sv_utf8 ? utf8_distance(a,b) : a - b)
#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
-#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
-#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
+#define HOP(pos,off) (PL_reg_sv_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
+#define HOPMAYBE(pos,off) (PL_reg_sv_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
#define HOPc(pos,off) ((char*)HOP(pos,off))
#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
#define HOPBACK(pos, off) ( \
- (UTF && DO_UTF8(PL_reg_sv)) \
+ (UTF && PL_reg_sv_utf8) \
? reghopmaybe((U8*)pos, -off) \
: (pos - off >= PL_bostr) \
? (U8*)(pos - off) \
#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
-#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
-#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
+#define HOP3(pos,off,lim) (PL_reg_sv_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
+#define HOPMAYBE3(pos,off,lim) (PL_reg_sv_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
goto fail;
}
if (prog->check_offset_min == prog->check_offset_max &&
- !(prog->reganch & ROPT_SANY_SEEN)) {
+ !(prog->reganch & ROPT_CANY_SEEN)) {
/* Substring at constant offset from beg-of-str... */
I32 slen;
if (data)
*data->scream_olds = s;
}
- else if (prog->reganch & ROPT_SANY_SEEN)
+ else if (prog->reganch & ROPT_CANY_SEEN)
s = fbm_instr((U8*)(s + start_shift),
(U8*)(strend - end_shift),
check, PL_multiline ? FBMrf_MULTILINE : 0);
PL_regdata = prog->data;
PL_bostr = startpos;
}
- s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
+ s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
if (!s) {
#ifdef DEBUGGING
char *what = 0;
unsigned int c2;
char *e;
register I32 tmp = 1; /* Scratch variable? */
- register bool do_utf8 = DO_UTF8(PL_reg_sv);
+ register bool do_utf8 = PL_reg_sv_utf8;
/* We know what class it must start with. */
switch (OP(c)) {
s += do_utf8 ? UTF8SKIP(s) : 1;
}
break;
+ case CANY:
+ while (s < strend) {
+ if (tmp && (norun || regtry(prog, s)))
+ goto got_it;
+ else
+ tmp = doevery;
+ s++;
+ }
+ break;
case EXACTF:
m = STRING(c);
ln = STR_LEN(c);
}
minlen = prog->minlen;
- if (do_utf8) {
- if (!(prog->reganch & ROPT_SANY_SEEN))
+ if (do_utf8 && !(prog->reganch & ROPT_CANY_SEEN)) {
if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
}
else {
- if (strend - startpos < minlen) goto phooey;
+ if (strend - startpos < minlen) goto phooey;
}
/* Check validity of program. */
&& mg->mg_len >= 0) {
PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
if (prog->reganch & ROPT_ANCH_GPOS) {
- if (s > PL_reg_ganch)
+ if (s > PL_reg_ganch)
goto phooey;
s = PL_reg_ganch;
}
#ifdef USE_ITHREADS
{
SV* repointer = newSViv(0);
+ /* so we know which PL_regex_padav element is PL_reg_curpm */
+ SvFLAGS(repointer) |= SVf_BREAK;
av_push(PL_regex_padav,repointer);
PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
PL_regex_pad = AvARRAY(PL_regex_padav);
re_unwind_branch_t branch;
} re_unwind_t;
+#define sayYES goto yes
+#define sayNO goto no
+#define sayYES_FINAL goto yes_final
+#define sayYES_LOUD goto yes_loud
+#define sayNO_FINAL goto no_final
+#define sayNO_SILENT goto do_no
+#define saySAME(x) if (x) goto yes; else goto no
+
+#define REPORT_CODE_OFF 24
+
/*
- regmatch - main matching routine
*
#if 0
I32 firstcp = PL_savestack_ix;
#endif
- register bool do_utf8 = DO_UTF8(PL_reg_sv);
+ register bool do_utf8 = PL_reg_sv_utf8;
#ifdef DEBUGGING
PL_regindent++;
nextchr = UCHARAT(locinput);
scan = prog;
while (scan != NULL) {
-#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
-#if 1
-# define sayYES goto yes
-# define sayNO goto no
-# define sayYES_FINAL goto yes_final
-# define sayYES_LOUD goto yes_loud
-# define sayNO_FINAL goto no_final
-# define sayNO_SILENT goto do_no
-# define saySAME(x) if (x) goto yes; else goto no
-# define REPORT_CODE_OFF 24
-#else
-# define sayYES return 1
-# define sayNO return 0
-# define sayYES_FINAL return 1
-# define sayYES_LOUD return 1
-# define sayNO_FINAL return 0
-# define sayNO_SILENT return 0
-# define saySAME(x) return x
-#endif
+
DEBUG_r( {
SV *prop = sv_newmortal();
int docolor = *PL_colors[0];
case SANY:
if (!nextchr && locinput >= PL_regeol)
sayNO;
+ if (do_utf8) {
+ locinput += PL_utf8skip[nextchr];
+ if (locinput > PL_regeol)
+ sayNO;
+ nextchr = UCHARAT(locinput);
+ }
+ else
+ nextchr = UCHARAT(++locinput);
+ break;
+ case CANY:
+ if (!nextchr && locinput >= PL_regeol)
+ sayNO;
nextchr = UCHARAT(++locinput);
break;
case REG_ANY:
/* XXXX This is too dramatic a measure... */
PL_reg_maxiter = 0;
+ logical = 0;
sayNO;
}
sw = SvTRUE(ret);
minmod = 0;
if (ln && regrepeat_hard(scan, ln, &l) < ln)
sayNO;
- if (ln && l == 0 && n >= ln
- /* In fact, this is tricky. If paren, then the
- fact that we did/didnot match may influence
- future execution. */
- && !(paren && ln == 0))
- ln = n;
+ /* if we matched something zero-length we don't need to
+ backtrack - capturing parens are already defined, so
+ the caveat in the maximal case doesn't apply
+
+ XXXX if ln == 0, we can redo this check first time
+ through the following loop
+ */
+ if (ln && l == 0)
+ n = ln; /* don't backtrack */
locinput = PL_reginput;
if (PL_regkind[(U8)OP(next)] == EXACT) {
c1 = (U8)*STRING(next);
UCHARAT(PL_reginput) == c2)
{
if (paren) {
- if (n) {
+ if (ln) {
PL_regstartp[paren] =
HOPc(PL_reginput, -l) - PL_bostr;
PL_regendp[paren] = PL_reginput - PL_bostr;
}
else {
n = regrepeat_hard(scan, n, &l);
- if (n != 0 && l == 0
- /* In fact, this is tricky. If paren, then the
- fact that we did/didnot match may influence
- future execution. */
- && !(paren && ln == 0))
- ln = n;
+ /* if we matched something zero-length we don't need to
+ backtrack, unless the minimum count is zero and we
+ are capturing the result - in that case the capture
+ being defined or not may affect later execution
+ */
+ if (n != 0 && l == 0 && !(paren && ln == 0))
+ ln = n; /* don't backtrack */
locinput = PL_reginput;
DEBUG_r(
PerlIO_printf(Perl_debug_log,
register I32 c;
register char *loceol = PL_regeol;
register I32 hardcount = 0;
- register bool do_utf8 = DO_UTF8(PL_reg_sv);
+ register bool do_utf8 = PL_reg_sv_utf8;
scan = PL_reginput;
if (max != REG_INFTY && max < loceol - scan)
case SANY:
scan = loceol;
break;
+ case CANY:
+ scan = loceol;
+ break;
case EXACT: /* length of string is 1 */
c = (U8)*STRING(p);
while (scan < loceol && UCHARAT(scan) == c)
return 0;
start = PL_reginput;
- if (DO_UTF8(PL_reg_sv)) {
+ if (PL_reg_sv_utf8) {
while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
if (!count++) {
l = 0;