*/
#ifdef PERL_EXT_RE_BUILD
-/* need to replace pregcomp et al, so enable that */
-# ifndef PERL_IN_XSUB_RE
-# define PERL_IN_XSUB_RE
-# endif
-/* need access to debugger hooks */
-# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
-# define DEBUGGING
-# endif
-#endif
-
-#ifdef PERL_IN_XSUB_RE
-/* We *really* need to overwrite these symbols: */
-# define Perl_regexec_flags my_regexec
-# define Perl_regdump my_regdump
-# define Perl_regprop my_regprop
-# define Perl_re_intuit_start my_re_intuit_start
-/* *These* symbols are masked to allow static link. */
-# define Perl_pregexec my_pregexec
-# define Perl_reginitcolors my_reginitcolors
-# define Perl_regclass_swash my_regclass_swash
-
-# define PERL_NO_GET_CONTEXT
+#include "re_top.h"
#endif
/*
#define PERL_IN_REGEXEC_C
#include "perl.h"
-#include "regcomp.h"
+#ifdef PERL_IN_XSUB_RE
+# include "re_comp.h"
+#else
+# include "regcomp.h"
+#endif
#define RF_tainted 1 /* tainted information used? */
#define RF_warned 2 /* warned about big count? */
#define STATIC static
#endif
-#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
+#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
/*
* Forwards.
#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
-#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
-#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
-#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
-#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
-#define HOPc(pos,off) ((char*)HOP(pos,off))
-#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
-
-#define HOPBACK(pos, off) ( \
- (PL_reg_match_utf8) \
- ? reghopmaybe((U8*)pos, -off) \
+#define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
+ ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
+ : (U8*)(pos + off)))
+#define HOPBACKc(pos, off) ((char*) \
+ ((PL_reg_match_utf8) \
+ ? reghopmaybe3((U8*)pos, -off, ((U8*)(off < 0 ? PL_regeol : PL_bostr))) \
: (pos - off >= PL_bostr) \
? (U8*)(pos - off) \
- : (U8*)NULL \
+ : (U8*)NULL) \
)
-#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
-#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
-#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
-#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
(IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
STATIC char *
-S_regcppop(pTHX)
+S_regcppop(pTHX_ const regexp *rex)
{
dVAR;
I32 i;
- U32 paren = 0;
char *input;
GET_RE_DEBUG_FLAGS_DECL;
for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
i > 0; i -= REGCP_PAREN_ELEMS) {
I32 tmps;
- paren = (U32)SSPOPINT;
+ U32 paren = (U32)SSPOPINT;
PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
PL_regstartp[paren] = SSPOPINT;
tmps = SSPOPINT;
);
}
DEBUG_EXECUTE_r(
- if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
+ if ((I32)(*PL_reglastparen + 1) <= rex->nparens) {
PerlIO_printf(Perl_debug_log,
" restoring \\%"IVdf"..\\%"IVdf" to undef\n",
- (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
+ (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
}
);
#if 1
* building DynaLoader will fail:
* "Error: '*' not in typemap in DynaLoader.xs, line 164"
* --jhi */
- for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
- if ((I32)paren > PL_regsize)
- PL_regstartp[paren] = -1;
- PL_regendp[paren] = -1;
+ for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
+ if (i > PL_regsize)
+ PL_regstartp[i] = -1;
+ PL_regendp[i] = -1;
}
#endif
return input;
}
-typedef struct re_cc_state
-{
- I32 ss;
- regnode *node;
- struct re_cc_state *prev;
- CURCUR *cc;
- regexp *re;
-} re_cc_state;
-
#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
-#define TRYPAREN(paren, n, input) { \
+#define TRYPAREN(paren, n, input, where) { \
if (paren) { \
if (n) { \
PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
else \
PL_regendp[paren] = -1; \
} \
- if (regmatch(next)) \
+ REGMATCH(next, where); \
+ if (result) \
sayYES; \
if (paren && n) \
PL_regendp[paren] = -1; \
* pregexec and friends
*/
+#ifndef PERL_IN_XSUB_RE
/*
- pregexec - match a regexp against a string
*/
regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
nosave ? 0 : REXEC_COPY_STR);
}
-
-STATIC void
-S_cache_re(pTHX_ regexp *prog)
-{
- dVAR;
- PL_regprecomp = prog->precomp; /* Needed for FAIL. */
-#ifdef DEBUGGING
- PL_regprogram = prog->program;
#endif
- PL_regnpar = prog->nparens;
- PL_regdata = prog->data;
- PL_reg_re = prog;
-}
/*
* Need to implement the following flags for reg_anch:
: strend);
t = s;
- cache_re(prog);
- s = find_byclass(prog, prog->regstclass, s, endpos, 1);
+ s = find_byclass(prog, prog->regstclass, s, endpos, NULL);
if (!s) {
#ifdef DEBUGGING
const char *what = NULL;
}
/* We know what class REx starts with. Try to find this position... */
+/* if reginfo is NULL, its a dryrun */
+
STATIC char *
-S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32 norun)
+S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char
+*strend, const regmatch_info *reginfo)
{
dVAR;
const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
!UTF8_IS_INVARIANT((U8)s[0]) ?
- reginclass(c, (U8*)s, 0, do_utf8) :
- REGINCLASS(c, (U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ reginclass(prog, c, (U8*)s, 0, do_utf8) :
+ REGINCLASS(prog, c, (U8*)s)) {
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
while (s < strend) {
STRLEN skip = 1;
- if (REGINCLASS(c, (U8*)s) ||
+ if (REGINCLASS(prog, c, (U8*)s) ||
(ANYOF_FOLD_SHARP_S(c, s, strend) &&
/* The assignment of 2 is intentional:
* for the folded sharp s, the skip is 2. */
(skip = SHARP_S_SKIP))) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
break;
case CANY:
while (s < strend) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
U8 *sm = (U8 *) m;
U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
- const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
+ const U32 uniflags = UTF8_ALLOW_DEFAULT;
to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
do_exactf:
e = HOP3c(strend, -((I32)lnc), s);
- if (norun && e < s)
+ if (!reginfo && e < s)
e = s; /* Due to minlen logic of intuit() */
/* The idea in the EXACTF* cases is to first find the
UV c, f;
U8 tmpbuf [UTF8_MAXBYTES+1];
STRLEN len, foldlen;
- const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
+ const U32 uniflags = UTF8_ALLOW_DEFAULT;
if (c1 == c2) {
/* Upper and lower of 1st char are equal -
* probably not a "letter". */
&& (ln == len ||
ibcmp_utf8(s, (char **)0, 0, do_utf8,
m, (char **)0, ln, (bool)UTF))
- && (norun || regtry(prog, s)) )
+ && (!reginfo || regtry(reginfo, s)) )
goto got_it;
else {
U8 foldbuf[UTF8_MAXBYTES_CASE+1];
(char **)0, foldlen, do_utf8,
m,
(char **)0, ln, (bool)UTF))
- && (norun || regtry(prog, s)) )
+ && (!reginfo || regtry(reginfo, s)) )
goto got_it;
}
s += len;
&& (ln == len ||
ibcmp_utf8(s, (char **)0, 0, do_utf8,
m, (char **)0, ln, (bool)UTF))
- && (norun || regtry(prog, s)) )
+ && (!reginfo || regtry(reginfo, s)) )
goto got_it;
else {
U8 foldbuf[UTF8_MAXBYTES_CASE+1];
(char **)0, foldlen, do_utf8,
m,
(char **)0, ln, (bool)UTF))
- && (norun || regtry(prog, s)) )
+ && (!reginfo || regtry(reginfo, s)) )
goto got_it;
}
s += len;
&& (ln == 1 || !(OP(c) == EXACTF
? ibcmp(s, m, ln)
: ibcmp_locale(s, m, ln)))
- && (norun || regtry(prog, s)) )
+ && (!reginfo || regtry(reginfo, s)) )
goto got_it;
s++;
}
&& (ln == 1 || !(OP(c) == EXACTF
? ibcmp(s, m, ln)
: ibcmp_locale(s, m, ln)))
- && (norun || regtry(prog, s)) )
+ && (!reginfo || regtry(reginfo, s)) )
goto got_it;
s++;
}
tmp = '\n';
else {
U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
- tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
+ tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
}
tmp = ((OP(c) == BOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
isALNUM_LC_utf8((U8*)s)))
{
tmp = !tmp;
- if ((norun || regtry(prog, s)))
+ if ((!reginfo || regtry(reginfo, s)))
goto got_it;
}
s += uskip;
if (tmp ==
!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
tmp = !tmp;
- if ((norun || regtry(prog, s)))
+ if ((!reginfo || regtry(reginfo, s)))
goto got_it;
}
s++;
}
}
- if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
+ if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, s)))
goto got_it;
break;
case NBOUNDL:
tmp = '\n';
else {
U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
- tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
+ tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
}
tmp = ((OP(c) == NBOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
tmp = !tmp;
- else if ((norun || regtry(prog, s)))
+ else if ((!reginfo || regtry(reginfo, s)))
goto got_it;
s += uskip;
}
if (tmp ==
!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
tmp = !tmp;
- else if ((norun || regtry(prog, s)))
+ else if ((!reginfo || regtry(reginfo, s)))
goto got_it;
s++;
}
}
- if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
+ if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, s)))
goto got_it;
break;
case ALNUM:
LOAD_UTF8_CHARCLASS_ALNUM();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (isALNUM(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
if (do_utf8) {
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (isALNUM_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (isALNUM_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
LOAD_UTF8_CHARCLASS_ALNUM();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (!isALNUM(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
if (do_utf8) {
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!isALNUM_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (!isALNUM_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
LOAD_UTF8_CHARCLASS_SPACE();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (isSPACE(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
if (do_utf8) {
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (isSPACE_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
LOAD_UTF8_CHARCLASS_SPACE();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (!isSPACE(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
if (do_utf8) {
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (!isSPACE_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
LOAD_UTF8_CHARCLASS_DIGIT();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (isDIGIT(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
if (do_utf8) {
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (isDIGIT_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (isDIGIT_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
LOAD_UTF8_CHARCLASS_DIGIT();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (!isDIGIT(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
if (do_utf8) {
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!isDIGIT_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (!isDIGIT_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
char *scream_olds = NULL;
SV* oreplsv = GvSV(PL_replgv);
const bool do_utf8 = DO_UTF8(sv);
- const I32 multiline = prog->reganch & PMf_MULTILINE;
+ I32 multiline;
#ifdef DEBUGGING
- SV * const dsv0 = PERL_DEBUG_PAD_ZERO(0);
- SV * const dsv1 = PERL_DEBUG_PAD_ZERO(1);
+ SV* dsv0;
+ SV* dsv1;
#endif
+ regmatch_info reginfo; /* create some info to pass to regtry etc */
GET_RE_DEBUG_FLAGS_DECL;
PERL_UNUSED_ARG(data);
- RX_MATCH_UTF8_set(prog,do_utf8);
-
- PL_regcc = 0;
-
- cache_re(prog);
-#ifdef DEBUGGING
- PL_regnarrate = DEBUG_r_TEST;
-#endif
/* Be paranoid... */
if (prog == NULL || startpos == NULL) {
return 0;
}
+ multiline = prog->reganch & PMf_MULTILINE;
+ reginfo.prog = prog;
+
+#ifdef DEBUGGING
+ dsv0 = PERL_DEBUG_PAD_ZERO(0);
+ dsv1 = PERL_DEBUG_PAD_ZERO(1);
+#endif
+
+ RX_MATCH_UTF8_set(prog, do_utf8);
+
minlen = prog->minlen;
if (strend - startpos < minlen) {
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
PL_reg_flags |= RF_utf8;
/* Mark beginning of line for ^ and lookbehind. */
- PL_regbol = startpos;
+ reginfo.bol = startpos; /* XXX not used ??? */
PL_bostr = strbeg;
- PL_reg_sv = sv;
+ reginfo.sv = sv;
/* Mark end of line for $ (and such) */
PL_regeol = strend;
/* see how far we have to get to not match where we matched before */
- PL_regtill = startpos+minend;
-
- /* We start without call_cc context. */
- PL_reg_call_cc = 0;
+ reginfo.till = startpos+minend;
/* If there is a "must appear" string, look for it. */
s = startpos;
- if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
+ if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to set reginfo->ganch */
MAGIC *mg;
if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
- PL_reg_ganch = startpos;
+ reginfo.ganch = startpos;
else if (sv && SvTYPE(sv) >= SVt_PVMG
&& SvMAGIC(sv)
&& (mg = mg_find(sv, PERL_MAGIC_regex_global))
&& mg->mg_len >= 0) {
- PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
+ reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
if (prog->reganch & ROPT_ANCH_GPOS) {
- if (s > PL_reg_ganch)
+ if (s > reginfo.ganch)
goto phooey;
- s = PL_reg_ganch;
+ s = reginfo.ganch;
}
}
else /* pos() not defined */
- PL_reg_ganch = strbeg;
+ reginfo.ganch = strbeg;
}
if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
/* Simplest case: anchored match need be tried only once. */
/* [unless only anchor is BOL and multiline is set] */
if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
- if (s == startpos && regtry(prog, startpos))
+ if (s == startpos && regtry(®info, startpos))
goto got_it;
else if (multiline || (prog->reganch & ROPT_IMPLICIT)
|| (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
if (s == startpos)
goto after_try;
while (1) {
- if (regtry(prog, s))
+ if (regtry(®info, s))
goto got_it;
after_try:
if (s >= end)
s--;
while (s < end) {
if (*s++ == '\n') { /* don't need PL_utf8skip here */
- if (regtry(prog, s))
+ if (regtry(®info, s))
goto got_it;
}
}
}
goto phooey;
} else if (prog->reganch & ROPT_ANCH_GPOS) {
- if (regtry(prog, PL_reg_ganch))
+ if (regtry(®info, reginfo.ganch))
goto got_it;
goto phooey;
}
while (s < strend) {
if (*s == ch) {
DEBUG_EXECUTE_r( did_match = 1 );
- if (regtry(prog, s)) goto got_it;
+ if (regtry(®info, s)) goto got_it;
s += UTF8SKIP(s);
while (s < strend && *s == ch)
s += UTF8SKIP(s);
while (s < strend) {
if (*s == ch) {
DEBUG_EXECUTE_r( did_match = 1 );
- if (regtry(prog, s)) goto got_it;
+ if (regtry(®info, s)) goto got_it;
s++;
while (s < strend && *s == ch)
s++;
s = HOPc(s, -back_max);
}
else {
- char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
+ char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
last1 = HOPc(s, -back_min);
- s = t;
+ s = t;
}
if (do_utf8) {
while (s <= last1) {
- if (regtry(prog, s))
+ if (regtry(®info, s))
goto got_it;
s += UTF8SKIP(s);
}
}
else {
while (s <= last1) {
- if (regtry(prog, s))
+ if (regtry(®info, s))
goto got_it;
s++;
}
int len0;
int len1;
- regprop(prop, c);
+ regprop(prog, prop, c);
s0 = UTF ?
pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
UNI_DISPLAY_REGEX) :
len0, len0, s0,
len1, len1, s1);
});
- if (find_byclass(prog, c, s, strend, 0))
+ if (find_byclass(prog, c, s, strend, ®info))
goto got_it;
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
}
/* We don't know much -- general case. */
if (do_utf8) {
for (;;) {
- if (regtry(prog, s))
+ if (regtry(®info, s))
goto got_it;
if (s >= strend)
break;
}
else {
do {
- if (regtry(prog, s))
+ if (regtry(®info, s))
goto got_it;
} while (s++ < strend);
}
sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
restored, the value remains
the same. */
- restore_pos(aTHX_ 0);
+ restore_pos(aTHX_ prog);
}
/* make sure $`, $&, $', and $digit will work later */
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
PL_colors[4], PL_colors[5]));
if (PL_reg_eval_set)
- restore_pos(aTHX_ 0);
+ restore_pos(aTHX_ prog);
return 0;
}
- regtry - try match at specific point
*/
STATIC I32 /* 0 failure, 1 success */
-S_regtry(pTHX_ regexp *prog, char *startpos)
+S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
{
dVAR;
- register I32 i;
register I32 *sp;
register I32 *ep;
CHECKPOINT lastcp;
+ regexp *prog = reginfo->prog;
GET_RE_DEBUG_FLAGS_DECL;
#ifdef DEBUGGING
/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
- if (PL_reg_sv) {
+ if (reginfo->sv) {
/* Make $_ available to executed code. */
- if (PL_reg_sv != DEFSV) {
+ if (reginfo->sv != DEFSV) {
SAVE_DEFSV;
- DEFSV = PL_reg_sv;
+ DEFSV = reginfo->sv;
}
- if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
- && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
+ if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
+ && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
/* prepare for quick setting of pos */
#ifdef PERL_OLD_COPY_ON_WRITE
if (SvIsCOW(sv))
sv_force_normal_flags(sv, 0);
#endif
- mg = sv_magicext(PL_reg_sv, (SV*)0, PERL_MAGIC_regex_global,
+ mg = sv_magicext(reginfo->sv, (SV*)0, PERL_MAGIC_regex_global,
&PL_vtbl_mglob, NULL, 0);
mg->mg_len = -1;
}
PL_reg_magic = mg;
PL_reg_oldpos = mg->mg_len;
- SAVEDESTRUCTOR_X(restore_pos, 0);
+ SAVEDESTRUCTOR_X(restore_pos, prog);
}
if (!PL_reg_curpm) {
Newxz(PL_reg_curpm, 1, PMOP);
sp = prog->startp;
ep = prog->endp;
if (prog->nparens) {
+ register I32 i;
for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
*++sp = -1;
*++ep = -1;
}
#endif
REGCP_SET(lastcp);
- if (regmatch(prog->program + 1)) {
+ if (regmatch(reginfo, prog->program + 1)) {
prog->endp[0] = PL_reginput - PL_bostr;
return 1;
}
regnode *next;
char *locinput;
I32 nextchr;
+ int minmod;
#ifdef DEBUGGING
int regindent;
#endif
#define sayNO goto no
#define sayNO_ANYOF goto no_anyof
#define sayYES_FINAL goto yes_final
-#define sayYES_LOUD goto yes_loud
#define sayNO_FINAL goto no_final
#define sayNO_SILENT goto do_no
#define saySAME(x) if (x) goto yes; else goto no
#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
#define POSCACHE_SEEN 1 /* we know what we're caching */
#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
+
#define CACHEsayYES STMT_START { \
- if (cache_offset | cache_bit) { \
- if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
- PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
- else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
+ if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
+ if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
+ PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) | (1<<POSCACHE_SEEN); \
+ PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
+ } \
+ else if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS)) { \
+ PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
+ } \
+ else { \
/* cache records failure, but this is success */ \
DEBUG_r( \
PerlIO_printf(Perl_debug_log, \
"%*s (remove success from failure cache)\n", \
REPORT_CODE_OFF+PL_regindent*2, "") \
); \
- PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
+ PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
} \
} \
sayYES; \
} STMT_END
+
#define CACHEsayNO STMT_START { \
- if (cache_offset | cache_bit) { \
- if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
+ if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
+ if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
- else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
+ PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
+ } \
+ else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
+ PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
+ } \
+ else { \
/* cache records success, but this is failure */ \
DEBUG_r( \
PerlIO_printf(Perl_debug_log, \
"%*s (remove failure from success cache)\n", \
REPORT_CODE_OFF+PL_regindent*2, "") \
); \
- PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
+ PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
} \
} \
sayNO; \
/* Make sure there is a test for this +1 options in re_tests */
#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
+/* this value indiciates that the c1/c2 "next char" test should be skipped */
+#define CHRTEST_VOID -1000
+
+#define SLAB_FIRST(s) (&(s)->states[0])
+#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
+/* grab a new slab and return the first slot in it */
+
+STATIC regmatch_state *
+S_push_slab(pTHX)
+{
+#if PERL_VERSION < 9
+ dMY_CXT;
+#endif
+ regmatch_slab *s = PL_regmatch_slab->next;
+ if (!s) {
+ Newx(s, 1, regmatch_slab);
+ s->prev = PL_regmatch_slab;
+ s->next = NULL;
+ PL_regmatch_slab->next = s;
+ }
+ PL_regmatch_slab = s;
+ return SLAB_FIRST(s);
+}
+
+/* simulate a recursive call to regmatch */
+
+#define REGMATCH(ns, where) \
+ st->scan = scan; \
+ scan = (ns); \
+ st->resume_state = resume_##where; \
+ goto start_recurse; \
+ resume_point_##where:
+
+
+/* push a new regex state. Set newst to point to it */
+
+#define PUSH_STATE(newst, resume) \
+ depth++; \
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \
+ st->scan = scan; \
+ st->next = next; \
+ st->n = n; \
+ st->locinput = locinput; \
+ st->resume_state = resume; \
+ newst = st+1; \
+ if (newst > SLAB_LAST(PL_regmatch_slab)) \
+ newst = S_push_slab(aTHX); \
+ PL_regmatch_state = newst; \
+ newst->cc = 0; \
+ newst->minmod = 0; \
+ newst->sw = 0; \
+ newst->logical = 0; \
+ newst->unwind = 0; \
+ locinput = PL_reginput; \
+ nextchr = UCHARAT(locinput);
+
+#define POP_STATE \
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
+ depth--; \
+ st--; \
+ if (st < SLAB_FIRST(PL_regmatch_slab)) { \
+ PL_regmatch_slab = PL_regmatch_slab->prev; \
+ st = SLAB_LAST(PL_regmatch_slab); \
+ } \
+ PL_regmatch_state = st; \
+ scan = st->scan; \
+ next = st->next; \
+ n = st->n; \
+ locinput = st->locinput; \
+ nextchr = UCHARAT(locinput);
/*
- regmatch - main matching routine
/* [lwall] I've hoisted the register declarations to the outer block in order to
* maybe save a little bit of pushing and popping on the stack. It also takes
* advantage of machines that use a register save mask on subroutine entry.
+ *
+ * This function used to be heavily recursive, but since this had the
+ * effect of blowing the CPU stack on complex regexes, it has been
+ * restructured to be iterative, and to save state onto the heap rather
+ * than the stack. Essentially whereever regmatch() used to be called, it
+ * pushes the current state, notes where to return, then jumps back into
+ * the main loop.
+ *
+ * Originally the structure of this function used to look something like
+
+ S_regmatch() {
+ int a = 1, b = 2;
+ ...
+ while (scan != NULL) {
+ a++; // do stuff with a and b
+ ...
+ switch (OP(scan)) {
+ case FOO: {
+ int local = 3;
+ ...
+ if (regmatch(...)) // recurse
+ goto yes;
+ }
+ ...
+ }
+ }
+ yes:
+ return 1;
+ }
+
+ * Now it looks something like this:
+
+ typedef struct {
+ int a, b, local;
+ int resume_state;
+ } regmatch_state;
+
+ S_regmatch() {
+ regmatch_state *st = new();
+ int depth=0;
+ st->a++; // do stuff with a and b
+ ...
+ while (scan != NULL) {
+ ...
+ switch (OP(scan)) {
+ case FOO: {
+ st->local = 3;
+ ...
+ st->scan = scan;
+ scan = ...;
+ st->resume_state = resume_FOO;
+ goto start_recurse; // recurse
+
+ resume_point_FOO:
+ if (result)
+ goto yes;
+ }
+ ...
+ }
+ start_recurse:
+ st = new(); push a new state
+ st->a = 1; st->b = 2;
+ depth++;
+ }
+ yes:
+ result = 1;
+ if (depth--) {
+ st = pop();
+ switch (resume_state) {
+ case resume_FOO:
+ goto resume_point_FOO;
+ ...
+ }
+ }
+ return result
+ }
+
+ * WARNING: this means that any line in this function that contains a
+ * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
+ * regmatch() using gotos instead. Thus the values of any local variables
+ * not saved in the regmatch_state structure will have been lost when
+ * execution resumes on the next line .
+ *
+ * States (ie the st pointer) are allocated in slabs of about 4K in size.
+ * PL_regmatch_state always points to the currently active state, and
+ * PL_regmatch_slab points to the slab currently containing PL_regmatch_state.
+ * The first time regmatch is called, the first slab is allocated, and is
+ * never freed until interpreter desctruction. When the slab is full,
+ * a new one is allocated chained to the end. At exit from regmatch, slabs
+ * allocated since entry are freed.
*/
+
+
STATIC I32 /* 0 failure, 1 success */
-S_regmatch(pTHX_ regnode *prog)
+S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
{
- dVAR;
- register regnode *scan; /* Current node. */
- regnode *next; /* Next node. */
- regnode *inner; /* Next node in internal branch. */
- register I32 nextchr; /* renamed nextchr - nextchar colides with
- function of same name */
- register I32 n; /* no or next */
- register I32 ln = 0; /* len or last */
- register char *s = NULL; /* operand or save */
- register char *locinput = PL_reginput;
- register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
- int minmod = 0, sw = 0, logical = 0;
- I32 unwind = 0;
-
-#if 0
- I32 firstcp = PL_savestack_ix;
+#if PERL_VERSION < 9
+ dMY_CXT;
#endif
+ dVAR;
register const bool do_utf8 = PL_reg_match_utf8;
-#ifdef DEBUGGING
- SV * const dsv0 = PERL_DEBUG_PAD_ZERO(0);
- SV * const dsv1 = PERL_DEBUG_PAD_ZERO(1);
- SV * const dsv2 = PERL_DEBUG_PAD_ZERO(2);
+ const U32 uniflags = UTF8_ALLOW_DEFAULT;
- SV *re_debug_flags = NULL;
-#endif
- U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
+ regexp *rex = reginfo->prog;
- GET_RE_DEBUG_FLAGS;
+ regmatch_slab *orig_slab;
+ regmatch_state *orig_state;
+ /* the current state. This is a cached copy of PL_regmatch_state */
+ register regmatch_state *st;
+
+ /* cache heavy used fields of st in registers */
+ register regnode *scan;
+ register regnode *next;
+ register I32 n = 0; /* initialize to shut up compiler warning */
+ register char *locinput = PL_reginput;
+
+ /* these variables are NOT saved during a recusive RFEGMATCH: */
+ register I32 nextchr; /* is always set to UCHARAT(locinput) */
+ bool result; /* return value of S_regmatch */
+ regnode *inner; /* Next node in internal branch. */
+ int depth = 0; /* depth of recursion */
+ regmatch_state *newst; /* when pushing a state, this is the new one */
+ regmatch_state *yes_state = NULL; /* state to pop to on success of
+ subpattern */
+
#ifdef DEBUGGING
+ SV *re_debug_flags = NULL;
+ GET_RE_DEBUG_FLAGS;
PL_regindent++;
#endif
+ /* on first ever call to regmatch, allocate first slab */
+ if (!PL_regmatch_slab) {
+ Newx(PL_regmatch_slab, 1, regmatch_slab);
+ PL_regmatch_slab->prev = NULL;
+ PL_regmatch_slab->next = NULL;
+ PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
+ }
+ /* remember current high-water mark for exit */
+ /* XXX this should be done with SAVE* instead */
+ orig_slab = PL_regmatch_slab;
+ orig_state = PL_regmatch_state;
+
+ /* grab next free state slot */
+ st = ++PL_regmatch_state;
+ if (st > SLAB_LAST(PL_regmatch_slab))
+ st = PL_regmatch_state = S_push_slab(aTHX);
+
+ st->minmod = 0;
+ st->sw = 0;
+ st->logical = 0;
+ st->unwind = 0;
+ st->cc = NULL;
/* Note that nextchr is a byte even in UTF */
nextchr = UCHARAT(locinput);
scan = prog;
pref0_len = 0;
if (pref0_len > pref_len)
pref0_len = pref_len;
- regprop(prop, scan);
+ regprop(rex, prop, scan);
{
const char * const s0 =
do_utf8 && OP(scan) != CANY ?
- pv_uni_display(dsv0, (U8*)(locinput - pref_len),
+ pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
pref0_len, 60, UNI_DISPLAY_REGEX) :
locinput - pref_len;
const int len0 = do_utf8 ? strlen(s0) : pref0_len;
const char * const s1 = do_utf8 && OP(scan) != CANY ?
- pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
+ pv_uni_display(PERL_DEBUG_PAD(1),
+ (U8*)(locinput - pref_len + pref0_len),
pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
locinput - pref_len + pref0_len;
const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
const char * const s2 = do_utf8 && OP(scan) != CANY ?
- pv_uni_display(dsv2, (U8*)locinput,
+ pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
locinput;
const int len2 = do_utf8 ? strlen(s2) : l;
PL_colors[1],
15 - l - pref_len + 1,
"",
- (IV)(scan - PL_regprogram), PL_regindent*2, "",
+ (IV)(scan - rex->program), PL_regindent*2, "",
SvPVX_const(prop));
}
});
case BOL:
if (locinput == PL_bostr)
{
- /* regtill = regbol; */
+ /* reginfo->till = reginfo->bol; */
break;
}
sayNO;
break;
sayNO;
case GPOS:
- if (locinput == PL_reg_ganch)
+ if (locinput == reginfo->ganch)
break;
sayNO;
case EOL:
STRLEN foldlen = 0;
U8 *uscan = (U8*)NULL;
STRLEN bufflen=0;
+ SV *sv_accept_buff = NULL;
const enum { trie_plain, trie_utf8, trie_uft8_fold }
trie_type = do_utf8 ?
(OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
: trie_plain;
- int gotit = 0;
- /* accepting states we have traversed */
- SV *sv_accept_buff = NULL;
- reg_trie_accepted *accept_buff = NULL;
- reg_trie_data *trie; /* what trie are we using right now */
- U32 accepted = 0; /* how many accepting states we have seen */
-
- trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
+ /* what trie are we using right now */
+ reg_trie_data *trie
+ = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
+ st->u.trie.accepted = 0; /* how many accepting states we have seen */
+ result = 0;
while ( state && uc <= (U8*)PL_regeol ) {
if (trie->states[ state ].wordnum) {
- if (!accepted ) {
+ if (!st->u.trie.accepted ) {
ENTER;
SAVETMPS;
bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
sizeof(reg_trie_accepted));
SvPOK_on(sv_accept_buff);
sv_2mortal(sv_accept_buff);
- accept_buff =
+ st->u.trie.accept_buff =
(reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
}
else {
- if (accepted >= bufflen) {
+ if (st->u.trie.accepted >= bufflen) {
bufflen *= 2;
- accept_buff =(reg_trie_accepted*)
+ st->u.trie.accept_buff =(reg_trie_accepted*)
SvGROW(sv_accept_buff,
bufflen * sizeof(reg_trie_accepted));
}
SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
+ sizeof(reg_trie_accepted));
}
- accept_buff[accepted].wordnum = trie->states[state].wordnum;
- accept_buff[accepted].endpos = uc;
- ++accepted;
+ st->u.trie.accept_buff[st->u.trie.accepted].wordnum = trie->states[state].wordnum;
+ st->u.trie.accept_buff[st->u.trie.accepted].endpos = uc;
+ ++st->u.trie.accepted;
}
base = trie->states[ state ].trans.base;
PerlIO_printf( Perl_debug_log,
"%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
- (UV)state, (UV)base, (UV)accepted );
+ (UV)state, (UV)base, (UV)st->u.trie.accepted );
);
if ( base ) {
charid, uvc, (UV)state, PL_colors[5] );
);
}
- if (!accepted )
+ if (!st->u.trie.accepted )
sayNO;
/*
have been tried.
*/
- if ( accepted == 1 ) {
+ if ( st->u.trie.accepted == 1 ) {
DEBUG_EXECUTE_r({
- SV **tmp = av_fetch( trie->words, accept_buff[ 0 ].wordnum-1, 0 );
+ SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
PerlIO_printf( Perl_debug_log,
"%*s %sonly one match : #%d <%s>%s\n",
REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
- accept_buff[ 0 ].wordnum,
+ st->u.trie.accept_buff[ 0 ].wordnum,
tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
PL_colors[5] );
});
- PL_reginput = (char *)accept_buff[ 0 ].endpos;
+ PL_reginput = (char *)st->u.trie.accept_buff[ 0 ].endpos;
/* in this case we free tmps/leave before we call regmatch
as we wont be using accept_buff again. */
FREETMPS;
LEAVE;
- gotit = regmatch( scan + NEXT_OFF( scan ) );
+ REGMATCH(scan + NEXT_OFF(scan), TRIE1);
+ /*** all unsaved local vars undefined at this point */
} else {
DEBUG_EXECUTE_r(
PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
- REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)accepted,
+ REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->u.trie.accepted,
PL_colors[5] );
);
- while ( !gotit && accepted-- ) {
+ while ( !result && st->u.trie.accepted-- ) {
U32 best = 0;
U32 cur;
- for( cur = 1 ; cur <= accepted ; cur++ ) {
+ for( cur = 1 ; cur <= st->u.trie.accepted ; cur++ ) {
DEBUG_TRIE_EXECUTE_r(
PerlIO_printf( Perl_debug_log,
"%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
- (IV)best, accept_buff[ best ].wordnum, (IV)cur,
- accept_buff[ cur ].wordnum, PL_colors[5] );
+ (IV)best, st->u.trie.accept_buff[ best ].wordnum, (IV)cur,
+ st->u.trie.accept_buff[ cur ].wordnum, PL_colors[5] );
);
- if ( accept_buff[ cur ].wordnum < accept_buff[ best ].wordnum )
- best = cur;
+ if (st->u.trie.accept_buff[cur].wordnum <
+ st->u.trie.accept_buff[best].wordnum)
+ best = cur;
}
DEBUG_EXECUTE_r({
- SV ** const tmp = av_fetch( trie->words, accept_buff[ best ].wordnum - 1, 0 );
+ reg_trie_data * const trie = (reg_trie_data*)
+ rex->data->data[ARG(scan)];
+ SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 );
PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
- accept_buff[best].wordnum,
+ st->u.trie.accept_buff[best].wordnum,
tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
PL_colors[5] );
});
- if ( best<accepted ) {
- reg_trie_accepted tmp = accept_buff[ best ];
- accept_buff[ best ] = accept_buff[ accepted ];
- accept_buff[ accepted ] = tmp;
- best = accepted;
+ if ( best<st->u.trie.accepted ) {
+ reg_trie_accepted tmp = st->u.trie.accept_buff[ best ];
+ st->u.trie.accept_buff[ best ] = st->u.trie.accept_buff[ st->u.trie.accepted ];
+ st->u.trie.accept_buff[ st->u.trie.accepted ] = tmp;
+ best = st->u.trie.accepted;
}
- PL_reginput = (char *)accept_buff[ best ].endpos;
+ PL_reginput = (char *)st->u.trie.accept_buff[ best ].endpos;
/*
as far as I can tell we only need the SAVETMPS/FREETMPS
all until I can be sure.
*/
SAVETMPS;
- gotit = regmatch( scan + NEXT_OFF( scan ) ) ;
+ REGMATCH(scan + NEXT_OFF(scan), TRIE2);
+ /*** all unsaved local vars undefined at this point */
FREETMPS;
}
FREETMPS;
LEAVE;
}
- if ( gotit ) {
+ if (result) {
sayYES;
} else {
sayNO;
}
}
/* unreached codepoint */
- case EXACT:
- s = STRING(scan);
- ln = STR_LEN(scan);
+ case EXACT: {
+ char *s = STRING(scan);
+ st->ln = STR_LEN(scan);
if (do_utf8 != UTF) {
/* The target and the pattern have differing utf8ness. */
char *l = locinput;
- const char *e = s + ln;
+ const char *e = s + st->ln;
if (do_utf8) {
/* The target is utf8, the pattern is not utf8. */
/* Inline the first character, for speed. */
if (UCHARAT(s) != nextchr)
sayNO;
- if (PL_regeol - locinput < ln)
+ if (PL_regeol - locinput < st->ln)
sayNO;
- if (ln > 1 && memNE(s, locinput, ln))
+ if (st->ln > 1 && memNE(s, locinput, st->ln))
sayNO;
- locinput += ln;
+ locinput += st->ln;
nextchr = UCHARAT(locinput);
break;
+ }
case EXACTFL:
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
- case EXACTF:
- s = STRING(scan);
- ln = STR_LEN(scan);
+ case EXACTF: {
+ char *s = STRING(scan);
+ st->ln = STR_LEN(scan);
if (do_utf8 || UTF) {
/* Either target or the pattern are utf8. */
char *l = locinput;
char *e = PL_regeol;
- if (ibcmp_utf8(s, 0, ln, (bool)UTF,
+ if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
l, &e, 0, do_utf8)) {
/* One more case for the sharp s:
* pack("U0U*", 0xDF) =~ /ss/i,
* byte sequence for the U+00DF. */
if (!(do_utf8 &&
toLOWER(s[0]) == 's' &&
- ln >= 2 &&
+ st->ln >= 2 &&
toLOWER(s[1]) == 's' &&
(U8)l[0] == 0xC3 &&
e - l >= 2 &&
UCHARAT(s) != ((OP(scan) == EXACTF)
? PL_fold : PL_fold_locale)[nextchr])
sayNO;
- if (PL_regeol - locinput < ln)
+ if (PL_regeol - locinput < st->ln)
sayNO;
- if (ln > 1 && (OP(scan) == EXACTF
- ? ibcmp(s, locinput, ln)
- : ibcmp_locale(s, locinput, ln)))
+ if (st->ln > 1 && (OP(scan) == EXACTF
+ ? ibcmp(s, locinput, st->ln)
+ : ibcmp_locale(s, locinput, st->ln)))
sayNO;
- locinput += ln;
+ locinput += st->ln;
nextchr = UCHARAT(locinput);
break;
+ }
case ANYOF:
if (do_utf8) {
STRLEN inclasslen = PL_regeol - locinput;
- if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
+ if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
sayNO_ANYOF;
if (locinput >= PL_regeol)
sayNO;
else {
if (nextchr < 0)
nextchr = UCHARAT(locinput);
- if (!REGINCLASS(scan, (U8*)locinput))
+ if (!REGINCLASS(rex, scan, (U8*)locinput))
sayNO_ANYOF;
if (!nextchr && locinput >= PL_regeol)
sayNO;
/* was last char in word? */
if (do_utf8) {
if (locinput == PL_bostr)
- ln = '\n';
+ st->ln = '\n';
else {
const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
- ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
+ st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
}
if (OP(scan) == BOUND || OP(scan) == NBOUND) {
- ln = isALNUM_uni(ln);
+ st->ln = isALNUM_uni(st->ln);
LOAD_UTF8_CHARCLASS_ALNUM();
n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
}
else {
- ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
+ st->ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(st->ln));
n = isALNUM_LC_utf8((U8*)locinput);
}
}
else {
- ln = (locinput != PL_bostr) ?
+ st->ln = (locinput != PL_bostr) ?
UCHARAT(locinput - 1) : '\n';
if (OP(scan) == BOUND || OP(scan) == NBOUND) {
- ln = isALNUM(ln);
+ st->ln = isALNUM(st->ln);
n = isALNUM(nextchr);
}
else {
- ln = isALNUM_LC(ln);
+ st->ln = isALNUM_LC(st->ln);
n = isALNUM_LC(nextchr);
}
}
- if (((!ln) == (!n)) == (OP(scan) == BOUND ||
+ if (((!st->ln) == (!n)) == (OP(scan) == BOUND ||
OP(scan) == BOUNDL))
sayNO;
break;
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case REF:
- case REFF:
+ case REFF: {
+ char *s;
n = ARG(scan); /* which paren pair */
- ln = PL_regstartp[n];
+ st->ln = PL_regstartp[n];
PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
- if ((I32)*PL_reglastparen < n || ln == -1)
+ if ((I32)*PL_reglastparen < n || st->ln == -1)
sayNO; /* Do not match unless seen CLOSEn. */
- if (ln == PL_regendp[n])
+ if (st->ln == PL_regendp[n])
break;
- s = PL_bostr + ln;
+ s = PL_bostr + st->ln;
if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
char *l = locinput;
const char *e = PL_bostr + PL_regendp[n];
(UCHARAT(s) != ((OP(scan) == REFF
? PL_fold : PL_fold_locale)[nextchr]))))
sayNO;
- ln = PL_regendp[n] - ln;
- if (locinput + ln > PL_regeol)
+ st->ln = PL_regendp[n] - st->ln;
+ if (locinput + st->ln > PL_regeol)
sayNO;
- if (ln > 1 && (OP(scan) == REF
- ? memNE(s, locinput, ln)
+ if (st->ln > 1 && (OP(scan) == REF
+ ? memNE(s, locinput, st->ln)
: (OP(scan) == REFF
- ? ibcmp(s, locinput, ln)
- : ibcmp_locale(s, locinput, ln))))
+ ? ibcmp(s, locinput, st->ln)
+ : ibcmp_locale(s, locinput, st->ln))))
sayNO;
- locinput += ln;
+ locinput += st->ln;
nextchr = UCHARAT(locinput);
break;
+ }
case NOTHING:
case TAIL:
break;
case EVAL:
{
- dSP;
- OP_4tree * const oop = PL_op;
- COP * const ocurcop = PL_curcop;
- PAD *old_comppad;
SV *ret;
- struct regexp * const oreg = PL_reg_re;
-
- n = ARG(scan);
- PL_op = (OP_4tree*)PL_regdata->data[n];
- DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
- PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
- PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
-
{
+ /* execute the code in the {...} */
+ dSP;
SV ** const before = SP;
+ OP_4tree * const oop = PL_op;
+ COP * const ocurcop = PL_curcop;
+ PAD *old_comppad;
+
+ n = ARG(scan);
+ PL_op = (OP_4tree*)rex->data->data[n];
+ DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
+ PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]);
+ PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
+
CALLRUNOPS(aTHX); /* Scalar context. */
SPAGAIN;
if (SP == before)
ret = POPs;
PUTBACK;
}
+
+ PL_op = oop;
+ PAD_RESTORE_LOCAL(old_comppad);
+ PL_curcop = ocurcop;
+ if (!st->logical) {
+ /* /(?{...})/ */
+ sv_setsv(save_scalar(PL_replgv), ret);
+ break;
+ }
}
+ if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
+ regexp *re;
+ {
+ /* extract RE object from returned value; compiling if
+ * necessary */
- PL_op = oop;
- PAD_RESTORE_LOCAL(old_comppad);
- PL_curcop = ocurcop;
- if (logical) {
- if (logical == 2) { /* Postponed subexpression. */
- regexp *re;
MAGIC *mg = NULL;
- re_cc_state state;
- CHECKPOINT cp, lastcp;
- int toggleutf;
- register SV *sv;
-
+ SV *sv;
if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
mg = mg_find(sv, PERL_MAGIC_qr);
else if (SvSMAGICAL(ret)) {
STRLEN len;
const char * const t = SvPV_const(ret, len);
PMOP pm;
- char * const oprecomp = PL_regprecomp;
const I32 osize = PL_regsize;
- const I32 onpar = PL_regnpar;
Zero(&pm, 1, PMOP);
- if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
+ if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
if (!(SvFLAGS(ret)
& (SVs_TEMP | SVs_PADTMP | SVf_READONLY
| SVs_GMG)))
sv_magic(ret,(SV*)ReREFCNT_inc(re),
PERL_MAGIC_qr,0,0);
- PL_regprecomp = oprecomp;
PL_regsize = osize;
- PL_regnpar = onpar;
- }
- DEBUG_EXECUTE_r(
- PerlIO_printf(Perl_debug_log,
- "Entering embedded \"%s%.60s%s%s\"\n",
- PL_colors[0],
- re->precomp,
- PL_colors[1],
- (strlen(re->precomp) > 60 ? "..." : ""))
- );
- state.node = next;
- state.prev = PL_reg_call_cc;
- state.cc = PL_regcc;
- state.re = PL_reg_re;
-
- PL_regcc = 0;
-
- cp = regcppush(0); /* Save *all* the positions. */
- REGCP_SET(lastcp);
- cache_re(re);
- state.ss = PL_savestack_ix;
- *PL_reglastparen = 0;
- *PL_reglastcloseparen = 0;
- PL_reg_call_cc = &state;
- PL_reginput = locinput;
- toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
- ((re->reganch & ROPT_UTF8) != 0);
- if (toggleutf) PL_reg_flags ^= RF_utf8;
-
- /* XXXX This is too dramatic a measure... */
- PL_reg_maxiter = 0;
-
- if (regmatch(re->program + 1)) {
- /* Even though we succeeded, we need to restore
- global variables, since we may be wrapped inside
- SUSPEND, thus the match may be not finished yet. */
-
- /* XXXX Do this only if SUSPENDed? */
- PL_reg_call_cc = state.prev;
- PL_regcc = state.cc;
- PL_reg_re = state.re;
- cache_re(PL_reg_re);
- if (toggleutf) PL_reg_flags ^= RF_utf8;
-
- /* XXXX This is too dramatic a measure... */
- PL_reg_maxiter = 0;
-
- /* These are needed even if not SUSPEND. */
- ReREFCNT_dec(re);
- regcpblow(cp);
- sayYES;
}
- ReREFCNT_dec(re);
- REGCP_UNWIND(lastcp);
- regcppop();
- PL_reg_call_cc = state.prev;
- PL_regcc = state.cc;
- PL_reg_re = state.re;
- cache_re(PL_reg_re);
- if (toggleutf) PL_reg_flags ^= RF_utf8;
-
- /* XXXX This is too dramatic a measure... */
- PL_reg_maxiter = 0;
-
- logical = 0;
- sayNO;
}
- sw = SvTRUE(ret);
- logical = 0;
- }
- else {
- sv_setsv(save_scalar(PL_replgv), ret);
- cache_re(oreg);
+
+ /* run the pattern returned from (??{...}) */
+
+ DEBUG_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "Entering embedded \"%s%.60s%s%s\"\n",
+ PL_colors[0],
+ re->precomp,
+ PL_colors[1],
+ (strlen(re->precomp) > 60 ? "..." : ""))
+ );
+
+ st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
+ REGCP_SET(st->u.eval.lastcp);
+ *PL_reglastparen = 0;
+ *PL_reglastcloseparen = 0;
+ PL_reginput = locinput;
+
+ /* XXXX This is too dramatic a measure... */
+ PL_reg_maxiter = 0;
+
+ st->logical = 0;
+ st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
+ ((re->reganch & ROPT_UTF8) != 0);
+ if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8;
+ st->u.eval.prev_rex = rex;
+ rex = re;
+
+ /* resume to current state on success */
+ st->u.yes.prev_yes_state = yes_state;
+ yes_state = st;
+ PUSH_STATE(newst, resume_EVAL);
+ st = newst;
+
+ /* now continue from first node in postoned RE */
+ next = re->program + 1;
+ break;
+ /* NOTREACHED */
}
+ /* /(?(?{...})X|Y)/ */
+ st->sw = SvTRUE(ret);
+ st->logical = 0;
break;
}
case OPEN:
break;
case GROUPP:
n = ARG(scan); /* which paren pair */
- sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
+ st->sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
break;
case IFTHEN:
PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
- if (sw)
+ if (st->sw)
next = NEXTOPER(NEXTOPER(scan));
else {
next = scan + ARG(scan);
}
break;
case LOGICAL:
- logical = scan->flags;
+ st->logical = scan->flags;
break;
/*******************************************************************
- PL_regcc contains infoblock about the innermost (...)* loop, and
- a pointer to the next outer infoblock.
+ cc points to the regmatch_state associated with the most recent CURLYX.
+ This struct contains info about the innermost (...)* loop (an
+ "infoblock"), and a pointer to the next outer cc.
Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
- 1) After matching X, regnode for CURLYX is processed;
+ 1) After matching Y, regnode for CURLYX is processed;
- 2) This regnode creates infoblock on the stack, and calls
- regmatch() recursively with the starting point at WHILEM node;
+ 2) This regnode populates cc, and calls regmatch() recursively
+ with the starting point at WHILEM node;
3) Each hit of WHILEM node tries to match A and Z (in the order
depending on the current iteration, min/max of {min,max} and
greediness). The information about where are nodes for "A"
- and "Z" is read from the infoblock, as is info on how many times "A"
+ and "Z" is read from cc, as is info on how many times "A"
was already matched, and greediness.
4) After A matches, the same WHILEM node is hit again.
- 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
+ 5) Each time WHILEM is hit, cc is the infoblock created by CURLYX
of the same pair. Thus when WHILEM tries to match Z, it temporarily
- resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
+ resets cc, since this Y(A)*Z can be a part of some other loop:
as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
of the external loop.
- Currently present infoblocks form a tree with a stem formed by PL_curcc
+ Currently present infoblocks form a tree with a stem formed by st->cc
and whatever it mentions via ->next, and additional attached trees
corresponding to temporarily unset infoblocks as in "5" above.
- In the following picture infoblocks for outer loop of
+ In the following picture, infoblocks for outer loop of
(Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
infoblocks are drawn below the "reset" infoblock.
O
I,I
*******************************************************************/
+
case CURLYX: {
- CURCUR cc;
- CHECKPOINT cp = PL_savestack_ix;
/* No need to save/restore up to this paren */
I32 parenfloor = scan->flags;
+ /* Dave says:
+
+ CURLYX and WHILEM are always paired: they're the moral
+ equivalent of pp_enteriter anbd pp_iter.
+
+ The only time next could be null is if the node tree is
+ corrupt. This was mentioned on p5p a few days ago.
+
+ See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
+ So we'll assert that this is true:
+ */
+ assert(next);
if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
next += ARG(next);
- cc.oldcc = PL_regcc;
- PL_regcc = &cc;
/* XXXX Probably it is better to teach regpush to support
parenfloor > PL_regsize... */
if (parenfloor > (I32)*PL_reglastparen)
parenfloor = *PL_reglastparen; /* Pessimization... */
- cc.parenfloor = parenfloor;
- cc.cur = -1;
- cc.min = ARG1(scan);
- cc.max = ARG2(scan);
- cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
- cc.next = next;
- cc.minmod = minmod;
- cc.lastloc = 0;
+
+ st->u.curlyx.cp = PL_savestack_ix;
+ st->u.curlyx.outercc = st->cc;
+ st->cc = st;
+ /* these fields contain the state of the current curly.
+ * they are accessed by subsequent WHILEMs;
+ * cur and lastloc are also updated by WHILEM */
+ st->u.curlyx.parenfloor = parenfloor;
+ st->u.curlyx.cur = -1; /* this will be updated by WHILEM */
+ st->u.curlyx.min = ARG1(scan);
+ st->u.curlyx.max = ARG2(scan);
+ st->u.curlyx.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
+ st->u.curlyx.lastloc = 0;
+ /* st->next and st->minmod are also read by WHILEM */
+
PL_reginput = locinput;
- n = regmatch(PREVOPER(next)); /* start on the WHILEM */
- regcpblow(cp);
- PL_regcc = cc.oldcc;
- saySAME(n);
+ REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
+ /*** all unsaved local vars undefined at this point */
+ regcpblow(st->u.curlyx.cp);
+ st->cc = st->u.curlyx.outercc;
+ saySAME(result);
}
/* NOTREACHED */
case WHILEM: {
* that we can try again after backing off.
*/
- CHECKPOINT cp, lastcp;
- CURCUR* cc = PL_regcc;
- char * const lastloc = cc->lastloc; /* Detection of 0-len. */
- I32 cache_offset = 0, cache_bit = 0;
+ /* Dave says:
+
+ st->cc gets initialised by CURLYX ready for use by WHILEM.
+ So again, unless somethings been corrupted, st->cc cannot
+ be null at that point in WHILEM.
+
+ See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
+ So we'll assert that this is true:
+ */
+ assert(st->cc);
+ st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */
+ st->u.whilem.cache_offset = 0;
+ st->u.whilem.cache_bit = 0;
- n = cc->cur + 1; /* how many we know we matched */
+ n = st->cc->u.curlyx.cur + 1; /* how many we know we matched */
PL_reginput = locinput;
DEBUG_EXECUTE_r(
PerlIO_printf(Perl_debug_log,
"%*s %ld out of %ld..%ld cc=%"UVxf"\n",
REPORT_CODE_OFF+PL_regindent*2, "",
- (long)n, (long)cc->min,
- (long)cc->max, PTR2UV(cc))
+ (long)n, (long)st->cc->u.curlyx.min,
+ (long)st->cc->u.curlyx.max, PTR2UV(st->cc))
);
/* If degenerate scan matches "", assume scan done. */
- if (locinput == cc->lastloc && n >= cc->min) {
- PL_regcc = cc->oldcc;
- if (PL_regcc)
- ln = PL_regcc->cur;
+ if (locinput == st->cc->u.curlyx.lastloc && n >= st->cc->u.curlyx.min) {
+ st->u.whilem.savecc = st->cc;
+ st->cc = st->cc->u.curlyx.outercc;
+ if (st->cc)
+ st->ln = st->cc->u.curlyx.cur;
DEBUG_EXECUTE_r(
PerlIO_printf(Perl_debug_log,
"%*s empty match detected, try continuation...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
- if (regmatch(cc->next))
+ REGMATCH(st->u.whilem.savecc->next, WHILEM1);
+ /*** all unsaved local vars undefined at this point */
+ st->cc = st->u.whilem.savecc;
+ if (result)
sayYES;
- if (PL_regcc)
- PL_regcc->cur = ln;
- PL_regcc = cc;
+ if (st->cc->u.curlyx.outercc)
+ st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
sayNO;
}
/* First just match a string of min scans. */
- if (n < cc->min) {
- cc->cur = n;
- cc->lastloc = locinput;
- if (regmatch(cc->scan))
+ if (n < st->cc->u.curlyx.min) {
+ st->cc->u.curlyx.cur = n;
+ st->cc->u.curlyx.lastloc = locinput;
+ REGMATCH(st->cc->u.curlyx.scan, WHILEM2);
+ /*** all unsaved local vars undefined at this point */
+ if (result)
sayYES;
- cc->cur = n - 1;
- cc->lastloc = lastloc;
+ st->cc->u.curlyx.cur = n - 1;
+ st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
sayNO;
}
);
}
if (PL_reg_leftiter < 0) {
- cache_offset = locinput - PL_bostr;
+ st->u.whilem.cache_offset = locinput - PL_bostr;
- cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
- + cache_offset * (scan->flags>>4);
- cache_bit = cache_offset % 8;
- cache_offset /= 8;
- if (PL_reg_poscache[cache_offset] & (1<<cache_bit)) {
+ st->u.whilem.cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
+ + st->u.whilem.cache_offset * (scan->flags>>4);
+ st->u.whilem.cache_bit = st->u.whilem.cache_offset % 8;
+ st->u.whilem.cache_offset /= 8;
+ if (PL_reg_poscache[st->u.whilem.cache_offset] & (1<<st->u.whilem.cache_bit)) {
DEBUG_EXECUTE_r(
PerlIO_printf(Perl_debug_log,
"%*s already tried at this position...\n",
/* cache records failure */
sayNO_SILENT;
}
- PL_reg_poscache[cache_offset] |= (1<<cache_bit);
}
}
/* Prefer next over scan for minimal matching. */
- if (cc->minmod) {
- PL_regcc = cc->oldcc;
- if (PL_regcc)
- ln = PL_regcc->cur;
- cp = regcppush(cc->parenfloor);
- REGCP_SET(lastcp);
- if (regmatch(cc->next)) {
- regcpblow(cp);
+ if (st->cc->minmod) {
+ st->u.whilem.savecc = st->cc;
+ st->cc = st->cc->u.curlyx.outercc;
+ if (st->cc)
+ st->ln = st->cc->u.curlyx.cur;
+ st->u.whilem.cp = regcppush(st->u.whilem.savecc->u.curlyx.parenfloor);
+ REGCP_SET(st->u.whilem.lastcp);
+ REGMATCH(st->u.whilem.savecc->next, WHILEM3);
+ /*** all unsaved local vars undefined at this point */
+ st->cc = st->u.whilem.savecc;
+ if (result) {
+ regcpblow(st->u.whilem.cp);
CACHEsayYES; /* All done. */
}
- REGCP_UNWIND(lastcp);
- regcppop();
- if (PL_regcc)
- PL_regcc->cur = ln;
- PL_regcc = cc;
+ REGCP_UNWIND(st->u.whilem.lastcp);
+ regcppop(rex);
+ if (st->cc->u.curlyx.outercc)
+ st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
- if (n >= cc->max) { /* Maximum greed exceeded? */
+ if (n >= st->cc->u.curlyx.max) { /* Maximum greed exceeded? */
if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
&& !(PL_reg_flags & RF_warned)) {
PL_reg_flags |= RF_warned;
);
/* Try scanning more and see if it helps. */
PL_reginput = locinput;
- cc->cur = n;
- cc->lastloc = locinput;
- cp = regcppush(cc->parenfloor);
- REGCP_SET(lastcp);
- if (regmatch(cc->scan)) {
- regcpblow(cp);
+ st->cc->u.curlyx.cur = n;
+ st->cc->u.curlyx.lastloc = locinput;
+ st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
+ REGCP_SET(st->u.whilem.lastcp);
+ REGMATCH(st->cc->u.curlyx.scan, WHILEM4);
+ /*** all unsaved local vars undefined at this point */
+ if (result) {
+ regcpblow(st->u.whilem.cp);
CACHEsayYES;
}
- REGCP_UNWIND(lastcp);
- regcppop();
- cc->cur = n - 1;
- cc->lastloc = lastloc;
+ REGCP_UNWIND(st->u.whilem.lastcp);
+ regcppop(rex);
+ st->cc->u.curlyx.cur = n - 1;
+ st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
CACHEsayNO;
}
/* Prefer scan over next for maximal matching. */
- if (n < cc->max) { /* More greed allowed? */
- cp = regcppush(cc->parenfloor);
- cc->cur = n;
- cc->lastloc = locinput;
- REGCP_SET(lastcp);
- if (regmatch(cc->scan)) {
- regcpblow(cp);
+ if (n < st->cc->u.curlyx.max) { /* More greed allowed? */
+ st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
+ st->cc->u.curlyx.cur = n;
+ st->cc->u.curlyx.lastloc = locinput;
+ REGCP_SET(st->u.whilem.lastcp);
+ REGMATCH(st->cc->u.curlyx.scan, WHILEM5);
+ /*** all unsaved local vars undefined at this point */
+ if (result) {
+ regcpblow(st->u.whilem.cp);
CACHEsayYES;
}
- REGCP_UNWIND(lastcp);
- regcppop(); /* Restore some previous $<digit>s? */
+ REGCP_UNWIND(st->u.whilem.lastcp);
+ regcppop(rex); /* Restore some previous $<digit>s? */
PL_reginput = locinput;
DEBUG_EXECUTE_r(
PerlIO_printf(Perl_debug_log,
}
/* Failed deeper matches of scan, so see if this one works. */
- PL_regcc = cc->oldcc;
- if (PL_regcc)
- ln = PL_regcc->cur;
- if (regmatch(cc->next))
+ st->u.whilem.savecc = st->cc;
+ st->cc = st->cc->u.curlyx.outercc;
+ if (st->cc)
+ st->ln = st->cc->u.curlyx.cur;
+ REGMATCH(st->u.whilem.savecc->next, WHILEM6);
+ /*** all unsaved local vars undefined at this point */
+ st->cc = st->u.whilem.savecc;
+ if (result)
CACHEsayYES;
- if (PL_regcc)
- PL_regcc->cur = ln;
- PL_regcc = cc;
- cc->cur = n - 1;
- cc->lastloc = lastloc;
+ if (st->cc->u.curlyx.outercc)
+ st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
+ st->cc->u.curlyx.cur = n - 1;
+ st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
CACHEsayNO;
}
/* NOTREACHED */
inner = NEXTOPER(scan);
do_branch:
{
- c1 = OP(scan);
- if (OP(next) != c1) /* No choice. */
+ I32 type;
+ type = OP(scan);
+ if (!next || OP(next) != type) /* No choice. */
next = inner; /* Avoid recursion. */
else {
const I32 lastparen = *PL_reglastparen;
const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
- uw->prev = unwind;
- unwind = unwind1;
- uw->type = ((c1 == BRANCH)
+ uw->prev = st->unwind;
+ st->unwind = unwind1;
+ uw->type = ((type == BRANCH)
? RE_UNWIND_BRANCH
: RE_UNWIND_BRANCHJ);
uw->lastparen = lastparen;
uw->next = next;
uw->locinput = locinput;
uw->nextchr = nextchr;
+ uw->minmod = st->minmod;
#ifdef DEBUGGING
uw->regindent = ++PL_regindent;
#endif
}
break;
case MINMOD:
- minmod = 1;
+ st->minmod = 1;
break;
case CURLYM:
{
- I32 l = 0;
- I32 matches = 0;
- CHECKPOINT lastcp;
- I32 maxwanted;
+ st->u.curlym.l = st->u.curlym.matches = 0;
/* We suppose that the next guy does not need
backtracking: in particular, it is of constant non-zero length,
and has no parenths to influence future backrefs. */
- ln = ARG1(scan); /* min to match */
+ st->ln = ARG1(scan); /* min to match */
n = ARG2(scan); /* max to match */
- paren = scan->flags;
- if (paren) {
- if (paren > PL_regsize)
- PL_regsize = paren;
- if (paren > (I32)*PL_reglastparen)
- *PL_reglastparen = paren;
+ st->u.curlym.paren = scan->flags;
+ if (st->u.curlym.paren) {
+ if (st->u.curlym.paren > PL_regsize)
+ PL_regsize = st->u.curlym.paren;
+ if (st->u.curlym.paren > (I32)*PL_reglastparen)
+ *PL_reglastparen = st->u.curlym.paren;
}
scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
- if (paren)
+ if (st->u.curlym.paren)
scan += NEXT_OFF(scan); /* Skip former OPEN. */
PL_reginput = locinput;
- maxwanted = minmod ? ln : n;
- if (maxwanted) {
- while (PL_reginput < PL_regeol && matches < maxwanted) {
- if (!regmatch(scan))
- break;
- /* on first match, determine length, l */
- if (!matches++) {
- if (PL_reg_match_utf8) {
- char *s = locinput;
- while (s < PL_reginput) {
- l++;
- s += UTF8SKIP(s);
- }
- }
- else {
- l = PL_reginput - locinput;
- }
- if (l == 0) {
- matches = maxwanted;
- break;
+ st->u.curlym.maxwanted = st->minmod ? st->ln : n;
+ while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
+ /* resume to current state on success */
+ st->u.yes.prev_yes_state = yes_state;
+ yes_state = st;
+ REGMATCH(scan, CURLYM1);
+ yes_state = st->u.yes.prev_yes_state;
+ /*** all unsaved local vars undefined at this point */
+ if (!result)
+ break;
+ /* on first match, determine length, u.curlym.l */
+ if (!st->u.curlym.matches++) {
+ if (PL_reg_match_utf8) {
+ char *s = locinput;
+ while (s < PL_reginput) {
+ st->u.curlym.l++;
+ s += UTF8SKIP(s);
}
}
- locinput = PL_reginput;
+ else {
+ st->u.curlym.l = PL_reginput - locinput;
+ }
+ if (st->u.curlym.l == 0) {
+ st->u.curlym.matches = st->u.curlym.maxwanted;
+ break;
+ }
}
+ locinput = PL_reginput;
}
PL_reginput = locinput;
+ if (st->u.curlym.matches < st->ln) {
+ st->minmod = 0;
+ sayNO;
+ }
- if (minmod) {
- minmod = 0;
- if (ln && matches < ln)
- sayNO;
- if (HAS_TEXT(next) || JUMPABLE(next)) {
- regnode *text_node = next;
-
- if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
+ DEBUG_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s matched %"IVdf" times, len=%"IVdf"...\n",
+ (int)(REPORT_CODE_OFF+PL_regindent*2), "",
+ (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
+ );
+
+ /* calculate c1 and c1 for possible match of 1st char
+ * following curly */
+ st->u.curlym.c1 = st->u.curlym.c2 = CHRTEST_VOID;
+ if (HAS_TEXT(next) || JUMPABLE(next)) {
+ regnode *text_node = next;
+ if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
+ if (HAS_TEXT(text_node)
+ && PL_regkind[(U8)OP(text_node)] != REF)
+ {
+ st->u.curlym.c1 = (U8)*STRING(text_node);
+ st->u.curlym.c2 =
+ (OP(text_node) == EXACTF || OP(text_node) == REFF)
+ ? PL_fold[st->u.curlym.c1]
+ : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
+ ? PL_fold_locale[st->u.curlym.c1]
+ : st->u.curlym.c1;
+ }
+ }
- if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
- else {
- if (PL_regkind[(U8)OP(text_node)] == REF) {
- c1 = c2 = -1000;
- goto assume_ok_MM;
+ REGCP_SET(st->u.curlym.lastcp);
+
+ st->u.curlym.minmod = st->minmod;
+ st->minmod = 0;
+ while (st->u.curlym.matches >= st->ln
+ && (st->u.curlym.matches <= n
+ /* for REG_INFTY, ln could overflow to negative */
+ || (n == REG_INFTY && st->u.curlym.matches >= 0)))
+ {
+ /* If it could work, try it. */
+ if (st->u.curlym.c1 == CHRTEST_VOID ||
+ UCHARAT(PL_reginput) == st->u.curlym.c1 ||
+ UCHARAT(PL_reginput) == st->u.curlym.c2)
+ {
+ DEBUG_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s trying tail with matches=%"IVdf"...\n",
+ (int)(REPORT_CODE_OFF+PL_regindent*2),
+ "", (IV)st->u.curlym.matches)
+ );
+ if (st->u.curlym.paren) {
+ if (st->u.curlym.matches) {
+ PL_regstartp[st->u.curlym.paren]
+ = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
+ PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
}
- else { c1 = (U8)*STRING(text_node); }
- if (OP(text_node) == EXACTF || OP(text_node) == REFF)
- c2 = PL_fold[c1];
- else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
- c2 = PL_fold_locale[c1];
else
- c2 = c1;
+ PL_regendp[st->u.curlym.paren] = -1;
}
+ /* resume to current state on success */
+ st->u.yes.prev_yes_state = yes_state;
+ yes_state = st;
+ REGMATCH(next, CURLYM2);
+ yes_state = st->u.yes.prev_yes_state;
+ /*** all unsaved local vars undefined at this point */
+ if (result)
+ /* XXX tmp sayYES; */
+ sayYES_FINAL;
+ REGCP_UNWIND(st->u.curlym.lastcp);
}
- else
- c1 = c2 = -1000;
- assume_ok_MM:
- REGCP_SET(lastcp);
- while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
- /* If it could work, try it. */
- if (c1 == -1000 ||
- UCHARAT(PL_reginput) == c1 ||
- UCHARAT(PL_reginput) == c2)
- {
- if (paren) {
- if (ln) {
- PL_regstartp[paren] =
- HOPc(PL_reginput, -l) - PL_bostr;
- PL_regendp[paren] = PL_reginput - PL_bostr;
- }
- else
- PL_regendp[paren] = -1;
- }
- if (regmatch(next))
- sayYES;
- REGCP_UNWIND(lastcp);
- }
- /* Couldn't or didn't -- move forward. */
+ /* Couldn't or didn't -- move forward/backward. */
+ if (st->u.curlym.minmod) {
PL_reginput = locinput;
- if (regmatch(scan)) {
- ln++;
+ /* resume to current state on success */
+ st->u.yes.prev_yes_state = yes_state;
+ yes_state = st;
+ REGMATCH(scan, CURLYM3);
+ yes_state = st->u.yes.prev_yes_state;
+ /*** all unsaved local vars undefined at this point */
+ if (result) {
+ st->u.curlym.matches++;
locinput = PL_reginput;
}
else
sayNO;
}
- }
- else {
- DEBUG_EXECUTE_r(
- PerlIO_printf(Perl_debug_log,
- "%*s matched %"IVdf" times, len=%"IVdf"...\n",
- (int)(REPORT_CODE_OFF+PL_regindent*2), "",
- (IV) matches, (IV)l)
- );
- if (matches >= ln) {
- if (HAS_TEXT(next) || JUMPABLE(next)) {
- regnode *text_node = next;
-
- if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
-
- if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
- else {
- if (PL_regkind[(U8)OP(text_node)] == REF) {
- c1 = c2 = -1000;
- goto assume_ok_REG;
- }
- else { c1 = (U8)*STRING(text_node); }
-
- if (OP(text_node) == EXACTF || OP(text_node) == REFF)
- c2 = PL_fold[c1];
- else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
- c2 = PL_fold_locale[c1];
- else
- c2 = c1;
- }
- }
- else
- c1 = c2 = -1000;
- }
- assume_ok_REG:
- REGCP_SET(lastcp);
- while (matches >= ln) {
- /* If it could work, try it. */
- if (c1 == -1000 ||
- UCHARAT(PL_reginput) == c1 ||
- UCHARAT(PL_reginput) == c2)
- {
- DEBUG_EXECUTE_r(
- PerlIO_printf(Perl_debug_log,
- "%*s trying tail with matches=%"IVdf"...\n",
- (int)(REPORT_CODE_OFF+PL_regindent*2),
- "", (IV)matches)
- );
- if (paren) {
- if (matches) {
- PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
- PL_regendp[paren] = PL_reginput - PL_bostr;
- }
- else
- PL_regendp[paren] = -1;
- }
- if (regmatch(next))
- sayYES;
- REGCP_UNWIND(lastcp);
- }
- /* Couldn't or didn't -- back up. */
- matches--;
- locinput = HOPc(locinput, -l);
+ else {
+ st->u.curlym.matches--;
+ locinput = HOPc(locinput, -st->u.curlym.l);
PL_reginput = locinput;
}
}
break;
}
case CURLYN:
- paren = scan->flags; /* Which paren to set */
- if (paren > PL_regsize)
- PL_regsize = paren;
- if (paren > (I32)*PL_reglastparen)
- *PL_reglastparen = paren;
- ln = ARG1(scan); /* min to match */
+ st->u.plus.paren = scan->flags; /* Which paren to set */
+ if (st->u.plus.paren > PL_regsize)
+ PL_regsize = st->u.plus.paren;
+ if (st->u.plus.paren > (I32)*PL_reglastparen)
+ *PL_reglastparen = st->u.plus.paren;
+ st->ln = ARG1(scan); /* min to match */
n = ARG2(scan); /* max to match */
scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
goto repeat;
case CURLY:
- paren = 0;
- ln = ARG1(scan); /* min to match */
+ st->u.plus.paren = 0;
+ st->ln = ARG1(scan); /* min to match */
n = ARG2(scan); /* max to match */
scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
goto repeat;
case STAR:
- ln = 0;
+ st->ln = 0;
n = REG_INFTY;
scan = NEXTOPER(scan);
- paren = 0;
+ st->u.plus.paren = 0;
goto repeat;
case PLUS:
- ln = 1;
+ st->ln = 1;
n = REG_INFTY;
scan = NEXTOPER(scan);
- paren = 0;
+ st->u.plus.paren = 0;
repeat:
/*
* Lookahead to avoid useless match attempts
if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
- if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
+ if (! HAS_TEXT(text_node))
+ st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
else {
if (PL_regkind[(U8)OP(text_node)] == REF) {
- c1 = c2 = -1000;
+ st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
goto assume_ok_easy;
}
else { s = (U8*)STRING(text_node); }
if (!UTF) {
- c2 = c1 = *s;
+ st->u.plus.c2 = st->u.plus.c1 = *s;
if (OP(text_node) == EXACTF || OP(text_node) == REFF)
- c2 = PL_fold[c1];
+ st->u.plus.c2 = PL_fold[st->u.plus.c1];
else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
- c2 = PL_fold_locale[c1];
+ st->u.plus.c2 = PL_fold_locale[st->u.plus.c1];
}
else { /* UTF */
if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
- c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
+ st->u.plus.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
uniflags);
- c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
+ st->u.plus.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
uniflags);
}
else {
- c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
+ st->u.plus.c2 = st->u.plus.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
uniflags);
}
}
}
}
else
- c1 = c2 = -1000;
+ st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
assume_ok_easy:
PL_reginput = locinput;
- if (minmod) {
- CHECKPOINT lastcp;
- minmod = 0;
- if (ln && regrepeat(scan, ln) < ln)
+ if (st->minmod) {
+ st->minmod = 0;
+ if (st->ln && regrepeat(rex, scan, st->ln) < st->ln)
sayNO;
locinput = PL_reginput;
- REGCP_SET(lastcp);
- if (c1 != -1000) {
- char *e; /* Should not check after this */
- char *old = locinput;
- int count = 0;
+ REGCP_SET(st->u.plus.lastcp);
+ if (st->u.plus.c1 != CHRTEST_VOID) {
+ st->u.plus.old = locinput;
+ st->u.plus.count = 0;
if (n == REG_INFTY) {
- e = PL_regeol - 1;
+ st->u.plus.e = PL_regeol - 1;
if (do_utf8)
- while (UTF8_IS_CONTINUATION(*(U8*)e))
- e--;
+ while (UTF8_IS_CONTINUATION(*(U8*)st->u.plus.e))
+ st->u.plus.e--;
}
else if (do_utf8) {
- int m = n - ln;
- for (e = locinput;
- m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
- e += UTF8SKIP(e);
+ int m = n - st->ln;
+ for (st->u.plus.e = locinput;
+ m >0 && st->u.plus.e + UTF8SKIP(st->u.plus.e) <= PL_regeol; m--)
+ st->u.plus.e += UTF8SKIP(st->u.plus.e);
}
else {
- e = locinput + n - ln;
- if (e >= PL_regeol)
- e = PL_regeol - 1;
+ st->u.plus.e = locinput + n - st->ln;
+ if (st->u.plus.e >= PL_regeol)
+ st->u.plus.e = PL_regeol - 1;
}
while (1) {
/* Find place 'next' could work */
if (!do_utf8) {
- if (c1 == c2) {
- while (locinput <= e &&
- UCHARAT(locinput) != c1)
+ if (st->u.plus.c1 == st->u.plus.c2) {
+ while (locinput <= st->u.plus.e &&
+ UCHARAT(locinput) != st->u.plus.c1)
locinput++;
} else {
- while (locinput <= e
- && UCHARAT(locinput) != c1
- && UCHARAT(locinput) != c2)
+ while (locinput <= st->u.plus.e
+ && UCHARAT(locinput) != st->u.plus.c1
+ && UCHARAT(locinput) != st->u.plus.c2)
locinput++;
}
- count = locinput - old;
+ st->u.plus.count = locinput - st->u.plus.old;
}
else {
- if (c1 == c2) {
+ if (st->u.plus.c1 == st->u.plus.c2) {
STRLEN len;
/* count initialised to
* utf8_distance(old, locinput) */
- while (locinput <= e &&
+ while (locinput <= st->u.plus.e &&
utf8n_to_uvchr((U8*)locinput,
UTF8_MAXBYTES, &len,
- uniflags) != (UV)c1) {
+ uniflags) != (UV)st->u.plus.c1) {
locinput += len;
- count++;
+ st->u.plus.count++;
}
} else {
- STRLEN len;
/* count initialised to
* utf8_distance(old, locinput) */
- while (locinput <= e) {
- UV c = utf8n_to_uvchr((U8*)locinput,
+ while (locinput <= st->u.plus.e) {
+ STRLEN len;
+ const UV c = utf8n_to_uvchr((U8*)locinput,
UTF8_MAXBYTES, &len,
uniflags);
- if (c == (UV)c1 || c == (UV)c2)
+ if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
break;
locinput += len;
- count++;
+ st->u.plus.count++;
}
}
}
- if (locinput > e)
+ if (locinput > st->u.plus.e)
sayNO;
/* PL_reginput == old now */
- if (locinput != old) {
- ln = 1; /* Did some */
- if (regrepeat(scan, count) < count)
+ if (locinput != st->u.plus.old) {
+ st->ln = 1; /* Did some */
+ if (regrepeat(rex, scan, st->u.plus.count) < st->u.plus.count)
sayNO;
}
/* PL_reginput == locinput now */
- TRYPAREN(paren, ln, locinput);
+ TRYPAREN(st->u.plus.paren, st->ln, locinput, PLUS1);
+ /*** all unsaved local vars undefined at this point */
PL_reginput = locinput; /* Could be reset... */
- REGCP_UNWIND(lastcp);
+ REGCP_UNWIND(st->u.plus.lastcp);
/* Couldn't or didn't -- move forward. */
- old = locinput;
+ st->u.plus.old = locinput;
if (do_utf8)
locinput += UTF8SKIP(locinput);
else
locinput++;
- count = 1;
+ st->u.plus.count = 1;
}
}
else
- while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
+ while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
UV c;
- if (c1 != -1000) {
+ if (st->u.plus.c1 != CHRTEST_VOID) {
if (do_utf8)
c = utf8n_to_uvchr((U8*)PL_reginput,
UTF8_MAXBYTES, 0,
else
c = UCHARAT(PL_reginput);
/* If it could work, try it. */
- if (c == (UV)c1 || c == (UV)c2)
+ if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
{
- TRYPAREN(paren, ln, PL_reginput);
- REGCP_UNWIND(lastcp);
+ TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS2);
+ /*** all unsaved local vars undefined at this point */
+ REGCP_UNWIND(st->u.plus.lastcp);
}
}
/* If it could work, try it. */
- else if (c1 == -1000)
+ else if (st->u.plus.c1 == CHRTEST_VOID)
{
- TRYPAREN(paren, ln, PL_reginput);
- REGCP_UNWIND(lastcp);
+ TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3);
+ /*** all unsaved local vars undefined at this point */
+ REGCP_UNWIND(st->u.plus.lastcp);
}
/* Couldn't or didn't -- move forward. */
PL_reginput = locinput;
- if (regrepeat(scan, 1)) {
- ln++;
+ if (regrepeat(rex, scan, 1)) {
+ st->ln++;
locinput = PL_reginput;
}
else
}
}
else {
- CHECKPOINT lastcp;
- n = regrepeat(scan, n);
+ n = regrepeat(rex, scan, n);
locinput = PL_reginput;
- if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
+ if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL &&
(OP(next) != MEOL ||
OP(next) == SEOL || OP(next) == EOS))
{
- ln = n; /* why back off? */
+ st->ln = n; /* why back off? */
/* ...because $ and \Z can match before *and* after
newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
We should back off by one in this case. */
if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
- ln--;
+ st->ln--;
}
- REGCP_SET(lastcp);
+ REGCP_SET(st->u.plus.lastcp);
{
UV c = 0;
- while (n >= ln) {
- if (c1 != -1000) {
+ while (n >= st->ln) {
+ if (st->u.plus.c1 != CHRTEST_VOID) {
if (do_utf8)
c = utf8n_to_uvchr((U8*)PL_reginput,
UTF8_MAXBYTES, 0,
c = UCHARAT(PL_reginput);
}
/* If it could work, try it. */
- if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
+ if (st->u.plus.c1 == CHRTEST_VOID || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
{
- TRYPAREN(paren, n, PL_reginput);
- REGCP_UNWIND(lastcp);
+ TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4);
+ /*** all unsaved local vars undefined at this point */
+ REGCP_UNWIND(st->u.plus.lastcp);
}
/* Couldn't or didn't -- back up. */
n--;
sayNO;
break;
case END:
- if (PL_reg_call_cc) {
- re_cc_state *cur_call_cc = PL_reg_call_cc;
- CURCUR *cctmp = PL_regcc;
- regexp *re = PL_reg_re;
- CHECKPOINT lastcp;
- I32 tmp;
-
- /* Save *all* the positions. */
- const CHECKPOINT cp = regcppush(0);
- REGCP_SET(lastcp);
-
- /* Restore parens of the caller. */
- tmp = PL_savestack_ix;
- PL_savestack_ix = PL_reg_call_cc->ss;
- regcppop();
- PL_savestack_ix = tmp;
-
- /* Make position available to the callcc. */
- PL_reginput = locinput;
-
- cache_re(PL_reg_call_cc->re);
- PL_regcc = PL_reg_call_cc->cc;
- PL_reg_call_cc = PL_reg_call_cc->prev;
- if (regmatch(cur_call_cc->node)) {
- PL_reg_call_cc = cur_call_cc;
- regcpblow(cp);
- sayYES;
- }
- REGCP_UNWIND(lastcp);
- regcppop();
- PL_reg_call_cc = cur_call_cc;
- PL_regcc = cctmp;
- PL_reg_re = re;
- cache_re(re);
-
- DEBUG_EXECUTE_r(
- PerlIO_printf(Perl_debug_log,
- "%*s continuation failed...\n",
- REPORT_CODE_OFF+PL_regindent*2, "")
- );
- sayNO_SILENT;
- }
- if (locinput < PL_regtill) {
+ if (locinput < reginfo->till) {
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
PL_colors[4],
(long)(locinput - PL_reg_starttry),
- (long)(PL_regtill - PL_reg_starttry),
+ (long)(reginfo->till - PL_reg_starttry),
PL_colors[5]));
sayNO_FINAL; /* Cannot match: too short. */
}
PL_reginput = locinput; /* put where regtry can find it */
sayYES_FINAL; /* Success! */
- case SUCCEED:
+
+ case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
+ DEBUG_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s %ssubpattern success...%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5]));
PL_reginput = locinput; /* put where regtry can find it */
- sayYES_LOUD; /* Success! */
- case SUSPEND:
- n = 1;
+ sayYES_FINAL; /* Success! */
+
+ case SUSPEND: /* (?>FOO) */
+ st->u.ifmatch.wanted = 1;
PL_reginput = locinput;
goto do_ifmatch;
- case UNLESSM:
- n = 0;
- if (scan->flags) {
- s = HOPBACKc(locinput, scan->flags);
- if (!s)
- goto say_yes;
- PL_reginput = s;
- }
- else
- PL_reginput = locinput;
- goto do_ifmatch;
- case IFMATCH:
- n = 1;
+
+ case UNLESSM: /* -ve lookaround: (?!FOO), or with flags, (?<!foo) */
+ st->u.ifmatch.wanted = 0;
+ goto ifmatch_trivial_fail_test;
+
+ case IFMATCH: /* +ve lookaround: (?=FOO), or with flags, (?<=foo) */
+ st->u.ifmatch.wanted = 1;
+ ifmatch_trivial_fail_test:
if (scan->flags) {
- s = HOPBACKc(locinput, scan->flags);
- if (!s)
- goto say_no;
+ char * const s = HOPBACKc(locinput, scan->flags);
+ if (!s) {
+ /* trivial fail */
+ if (st->logical) {
+ st->logical = 0;
+ st->sw = 1 - st->u.ifmatch.wanted;
+ }
+ else if (st->u.ifmatch.wanted)
+ sayNO;
+ next = scan + ARG(scan);
+ if (next == scan)
+ next = NULL;
+ break;
+ }
PL_reginput = s;
}
else
PL_reginput = locinput;
do_ifmatch:
- inner = NEXTOPER(NEXTOPER(scan));
- if (regmatch(inner) != n) {
- say_no:
- if (logical) {
- logical = 0;
- sw = 0;
- goto do_longjump;
- }
- else
- sayNO;
- }
- say_yes:
- if (logical) {
- logical = 0;
- sw = 1;
- }
- if (OP(scan) == SUSPEND) {
- locinput = PL_reginput;
- nextchr = UCHARAT(locinput);
- }
- /* FALL THROUGH. */
+ /* resume to current state on success */
+ st->u.yes.prev_yes_state = yes_state;
+ yes_state = st;
+ PUSH_STATE(newst, resume_IFMATCH);
+ st = newst;
+ next = NEXTOPER(NEXTOPER(scan));
+ break;
+
case LONGJMP:
- do_longjump:
next = scan + ARG(scan);
if (next == scan)
next = NULL;
PTR2UV(scan), OP(scan));
Perl_croak(aTHX_ "regexp memory corruption");
}
+
reenter:
scan = next;
+ continue;
+ /* NOTREACHED */
+
+ /* simulate recursively calling regmatch(), but without actually
+ * recursing - ie save the current state on the heap rather than on
+ * the stack, then re-enter the loop. This avoids complex regexes
+ * blowing the processor stack */
+
+ start_recurse:
+ {
+ /* push new state */
+ regmatch_state *oldst = st;
+
+ depth++;
+
+ /* grab the next free state slot */
+ st++;
+ if (st > SLAB_LAST(PL_regmatch_slab))
+ st = S_push_slab(aTHX);
+ PL_regmatch_state = st;
+
+ oldst->next = next;
+ oldst->n = n;
+ oldst->locinput = locinput;
+
+ st->cc = oldst->cc;
+ locinput = PL_reginput;
+ nextchr = UCHARAT(locinput);
+ st->minmod = 0;
+ st->sw = 0;
+ st->logical = 0;
+ st->unwind = 0;
+#ifdef DEBUGGING
+ PL_regindent++;
+#endif
+ }
}
+
+
/*
* We get here only if there's trouble -- normally "case END" is
* the terminating point.
/*NOTREACHED*/
sayNO;
-yes_loud:
- DEBUG_EXECUTE_r(
- PerlIO_printf(Perl_debug_log,
- "%*s %scould match...%s\n",
- REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
- );
- goto yes;
yes_final:
+
+ if (yes_state) {
+ /* we have successfully completed a subexpression, but we must now
+ * pop to the state marked by yes_state and continue from there */
+
+ /*XXX tmp for CURLYM*/
+ regmatch_slab * const oslab = PL_regmatch_slab;
+ regmatch_state * const ost = st;
+ regmatch_state * const oys = yes_state;
+ int odepth = depth;
+
+ assert(st != yes_state);
+ while (yes_state < SLAB_FIRST(PL_regmatch_slab)
+ || yes_state > SLAB_LAST(PL_regmatch_slab))
+ {
+ /* not in this slab, pop slab */
+ depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
+ PL_regmatch_slab = PL_regmatch_slab->prev;
+ st = SLAB_LAST(PL_regmatch_slab);
+ }
+ depth -= (st - yes_state);
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE TO (%d)\n", depth));
+ st = yes_state;
+ yes_state = st->u.yes.prev_yes_state;
+ PL_regmatch_state = st;
+
+ switch (st->resume_state) {
+ case resume_EVAL:
+ if (st->u.eval.toggleutf)
+ PL_reg_flags ^= RF_utf8;
+ ReREFCNT_dec(rex);
+ rex = st->u.eval.prev_rex;
+ /* XXXX This is too dramatic a measure... */
+ PL_reg_maxiter = 0;
+ /* Restore parens of the caller without popping the
+ * savestack */
+ {
+ const I32 tmp = PL_savestack_ix;
+ PL_savestack_ix = st->u.eval.lastcp;
+ regcppop(rex);
+ PL_savestack_ix = tmp;
+ }
+ PL_reginput = locinput;
+ /* continue at the node following the (??{...}) */
+ next = st->next;
+ goto reenter;
+
+ case resume_IFMATCH:
+ if (st->logical) {
+ st->logical = 0;
+ st->sw = st->u.ifmatch.wanted;
+ }
+ else if (!st->u.ifmatch.wanted)
+ sayNO;
+
+ if (OP(st->scan) == SUSPEND)
+ locinput = PL_reginput;
+ else {
+ locinput = PL_reginput = st->locinput;
+ nextchr = UCHARAT(locinput);
+ }
+ next = st->scan + ARG(st->scan);
+ if (next == st->scan)
+ next = NULL;
+ goto reenter;
+
+ /* XXX tmp don't handle yes_state yet */
+ case resume_CURLYM1:
+ case resume_CURLYM2:
+ case resume_CURLYM3:
+ PL_regmatch_slab =oslab;
+ st = ost;
+ PL_regmatch_state = st;
+ depth = odepth;
+ yes_state = oys;
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "XXX revering a CURLYM\n"));
+ goto yes;
+
+ default:
+ Perl_croak(aTHX_ "unexpected yes reume state");
+ }
+ }
+
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
PL_colors[4], PL_colors[5]));
yes:
PL_regindent--;
#endif
-#if 0 /* Breaks $^R */
- if (unwind)
- regcpblow(firstcp);
-#endif
- return 1;
+ result = 1;
+ /* XXX this is duplicate(ish) code to that in the do_no section.
+ * eventually a yes should just pop the stack back to the current
+ * yes_state */
+ if (depth) {
+ /* restore previous state and re-enter */
+ POP_STATE;
+
+ switch (st->resume_state) {
+ case resume_TRIE1:
+ goto resume_point_TRIE1;
+ case resume_TRIE2:
+ goto resume_point_TRIE2;
+ case resume_CURLYX:
+ goto resume_point_CURLYX;
+ case resume_WHILEM1:
+ goto resume_point_WHILEM1;
+ case resume_WHILEM2:
+ goto resume_point_WHILEM2;
+ case resume_WHILEM3:
+ goto resume_point_WHILEM3;
+ case resume_WHILEM4:
+ goto resume_point_WHILEM4;
+ case resume_WHILEM5:
+ goto resume_point_WHILEM5;
+ case resume_WHILEM6:
+ goto resume_point_WHILEM6;
+ case resume_CURLYM1:
+ goto resume_point_CURLYM1;
+ case resume_CURLYM2:
+ goto resume_point_CURLYM2;
+ case resume_CURLYM3:
+ goto resume_point_CURLYM3;
+ case resume_PLUS1:
+ goto resume_point_PLUS1;
+ case resume_PLUS2:
+ goto resume_point_PLUS2;
+ case resume_PLUS3:
+ goto resume_point_PLUS3;
+ case resume_PLUS4:
+ goto resume_point_PLUS4;
+
+ case resume_IFMATCH:
+ case resume_EVAL:
+ default:
+ Perl_croak(aTHX_ "regexp resume memory corruption");
+ }
+ }
+ goto final_exit;
no:
DEBUG_EXECUTE_r(
goto do_no;
no_final:
do_no:
- if (unwind) {
- re_unwind_t * const uw = SSPTRt(unwind,re_unwind_t);
+ if (st->unwind) {
+ re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
switch (uw->type) {
case RE_UNWIND_BRANCH:
PL_regendp[n] = -1;
*PL_reglastparen = n;
scan = next = uwb->next;
+ st->minmod = uwb->minmod;
if ( !scan ||
OP(scan) != (uwb->type == RE_UNWIND_BRANCH
? BRANCH : BRANCHJ) ) { /* Failure */
- unwind = uwb->prev;
+ st->unwind = uwb->prev;
#ifdef DEBUGGING
PL_regindent--;
#endif
}
/* NOTREACHED */
}
+
#ifdef DEBUGGING
PL_regindent--;
#endif
- return 0;
+ result = 0;
+
+ if (depth) {
+ /* there's a previous state to backtrack to */
+ POP_STATE;
+ switch (st->resume_state) {
+ case resume_TRIE1:
+ goto resume_point_TRIE1;
+ case resume_TRIE2:
+ goto resume_point_TRIE2;
+ case resume_EVAL:
+ /* we have failed an (??{...}). Restore state to the outer re
+ * then re-throw the failure */
+ if (st->u.eval.toggleutf)
+ PL_reg_flags ^= RF_utf8;
+ ReREFCNT_dec(rex);
+ rex = st->u.eval.prev_rex;
+ yes_state = st->u.yes.prev_yes_state;
+
+ /* XXXX This is too dramatic a measure... */
+ PL_reg_maxiter = 0;
+
+ PL_reginput = locinput;
+ REGCP_UNWIND(st->u.eval.lastcp);
+ regcppop(rex);
+ goto do_no;
+
+ case resume_CURLYX:
+ goto resume_point_CURLYX;
+ case resume_WHILEM1:
+ goto resume_point_WHILEM1;
+ case resume_WHILEM2:
+ goto resume_point_WHILEM2;
+ case resume_WHILEM3:
+ goto resume_point_WHILEM3;
+ case resume_WHILEM4:
+ goto resume_point_WHILEM4;
+ case resume_WHILEM5:
+ goto resume_point_WHILEM5;
+ case resume_WHILEM6:
+ goto resume_point_WHILEM6;
+ case resume_CURLYM1:
+ goto resume_point_CURLYM1;
+ case resume_CURLYM2:
+ goto resume_point_CURLYM2;
+ case resume_CURLYM3:
+ goto resume_point_CURLYM3;
+ case resume_IFMATCH:
+ yes_state = st->u.yes.prev_yes_state;
+ if (st->logical) {
+ st->logical = 0;
+ st->sw = !st->u.ifmatch.wanted;
+ }
+ else if (st->u.ifmatch.wanted)
+ sayNO;
+
+ assert(OP(scan) != SUSPEND); /* XXX DAPM tmp */
+ locinput = PL_reginput = st->locinput;
+ nextchr = UCHARAT(locinput);
+ next = scan + ARG(scan);
+ if (next == scan)
+ next = NULL;
+ goto reenter;
+
+ case resume_PLUS1:
+ goto resume_point_PLUS1;
+ case resume_PLUS2:
+ goto resume_point_PLUS2;
+ case resume_PLUS3:
+ goto resume_point_PLUS3;
+ case resume_PLUS4:
+ goto resume_point_PLUS4;
+ default:
+ Perl_croak(aTHX_ "regexp resume memory corruption");
+ }
+ }
+
+final_exit:
+
+ /* restore original high-water mark */
+ PL_regmatch_slab = orig_slab;
+ PL_regmatch_state = orig_state;
+
+ /* free all slabs above current one */
+ if (orig_slab->next) {
+ regmatch_slab *sl = orig_slab->next;
+ orig_slab->next = NULL;
+ while (sl) {
+ regmatch_slab * const osl = sl;
+ sl = sl->next;
+ Safefree(osl);
+ }
+ }
+
+ return result;
+
}
/*
* rather than incrementing count on every character. [Er, except utf8.]]
*/
STATIC I32
-S_regrepeat(pTHX_ const regnode *p, I32 max)
+S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max)
{
dVAR;
register char *scan;
if (max == REG_INFTY)
max = I32_MAX;
else if (max < loceol - scan)
- loceol = scan + max;
+ loceol = scan + max;
switch (OP(p)) {
case REG_ANY:
if (do_utf8) {
if (do_utf8) {
loceol = PL_regeol;
while (hardcount < max && scan < loceol &&
- reginclass(p, (U8*)scan, 0, do_utf8)) {
+ reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
} else {
- while (scan < loceol && REGINCLASS(p, (U8*)scan))
+ while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
scan++;
}
break;
SV * const prop = sv_newmortal();
GET_RE_DEBUG_FLAGS;
DEBUG_EXECUTE_r({
- regprop(prop, p);
+ regprop(prog, prop, p);
PerlIO_printf(Perl_debug_log,
"%*s %s can match %"IVdf" times out of %"IVdf"...\n",
REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
}
+#ifndef PERL_IN_XSUB_RE
/*
- regclass_swash - prepare the utf8 swash
*/
SV *
-Perl_regclass_swash(pTHX_ register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
+Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
{
dVAR;
SV *sw = NULL;
SV *si = NULL;
SV *alt = NULL;
+ const struct reg_data *data = prog ? prog->data : NULL;
- if (PL_regdata && PL_regdata->count) {
+ if (data && data->count) {
const U32 n = ARG(node);
- if (PL_regdata->what[n] == 's') {
- SV * const rv = (SV*)PL_regdata->data[n];
+ if (data->what[n] == 's') {
+ SV * const rv = (SV*)data->data[n];
AV * const av = (AV*)SvRV((SV*)rv);
SV **const ary = AvARRAY(av);
SV **a, **b;
return sw;
}
+#endif
/*
- reginclass - determine if a character falls into a character class
*/
STATIC bool
-S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
+S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
{
dVAR;
const char flags = ANYOF_FLAGS(n);
if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
- ckWARN(WARN_UTF8) ? UTF8_CHECK_ONLY :
- UTF8_ALLOW_ANYUV|UTF8_CHECK_ONLY);
+ (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
+ /* see [perl #37836] for UTF8_ALLOW_ANYUV */
if (len == (STRLEN)-1)
Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
}
match = TRUE;
if (!match) {
AV *av;
- SV * const sw = regclass_swash(n, TRUE, 0, (SV**)&av);
+ SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
if (sw) {
if (swash_fetch(sw, p, do_utf8))
}
STATIC U8 *
-S_reghop(pTHX_ U8 *s, I32 off)
-{
- dVAR;
- return S_reghop3(s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
-}
-
-STATIC U8 *
S_reghop3(U8 *s, I32 off, U8* lim)
{
dVAR;
}
STATIC U8 *
-S_reghopmaybe(pTHX_ U8 *s, I32 off)
-{
- dVAR;
- return S_reghopmaybe3(s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
-}
-
-STATIC U8 *
S_reghopmaybe3(U8* s, I32 off, U8* lim)
{
dVAR;
restore_pos(pTHX_ void *arg)
{
dVAR;
- PERL_UNUSED_ARG(arg);
+ regexp * const rex = (regexp *)arg;
if (PL_reg_eval_set) {
if (PL_reg_oldsaved) {
- PL_reg_re->subbeg = PL_reg_oldsaved;
- PL_reg_re->sublen = PL_reg_oldsavedlen;
+ rex->subbeg = PL_reg_oldsaved;
+ rex->sublen = PL_reg_oldsavedlen;
#ifdef PERL_OLD_COPY_ON_WRITE
- PL_reg_re->saved_copy = PL_nrs;
+ rex->saved_copy = PL_nrs;
#endif
- RX_MATCH_COPIED_on(PL_reg_re);
+ RX_MATCH_COPIED_on(rex);
}
PL_reg_magic->mg_len = PL_reg_oldpos;
PL_reg_eval_set = 0;
S_to_utf8_substr(pTHX_ register regexp *prog)
{
if (prog->float_substr && !prog->float_utf8) {
- SV* sv;
- prog->float_utf8 = sv = newSVsv(prog->float_substr);
+ SV* const sv = newSVsv(prog->float_substr);
+ prog->float_utf8 = sv;
sv_utf8_upgrade(sv);
if (SvTAIL(prog->float_substr))
SvTAIL_on(sv);
prog->check_utf8 = sv;
}
if (prog->anchored_substr && !prog->anchored_utf8) {
- SV* sv;
- prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
+ SV* const sv = newSVsv(prog->anchored_substr);
+ prog->anchored_utf8 = sv;
sv_utf8_upgrade(sv);
if (SvTAIL(prog->anchored_substr))
SvTAIL_on(sv);
{
dVAR;
if (prog->float_utf8 && !prog->float_substr) {
- SV* sv;
- prog->float_substr = sv = newSVsv(prog->float_utf8);
+ SV* sv = newSVsv(prog->float_utf8);
+ prog->float_substr = sv;
if (sv_utf8_downgrade(sv, TRUE)) {
if (SvTAIL(prog->float_utf8))
SvTAIL_on(sv);
prog->check_substr = sv;
}
if (prog->anchored_utf8 && !prog->anchored_substr) {
- SV* sv;
- prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
+ SV* sv = newSVsv(prog->anchored_utf8);
+ prog->anchored_substr = sv;
if (sv_utf8_downgrade(sv, TRUE)) {
if (SvTAIL(prog->anchored_utf8))
SvTAIL_on(sv);