* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
* This causes the main functions to be compiled under new names and with
* debugging support added, which makes "use re 'debug'" work.
-
*/
/* NOTE: this is derived from Henry Spencer's regexp code, and should not
#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
-#define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
+#define HOPc(pos,off) \
+ (char *)(PL_reg_match_utf8 \
? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
- : (U8*)(pos + off)))
-#define HOPBACKc(pos, off) ((char*) \
- ((PL_reg_match_utf8) \
- ? reghopmaybe3((U8*)pos, -off, ((U8*)(off < 0 ? PL_regeol : PL_bostr))) \
- : (pos - off >= PL_bostr) \
- ? (U8*)(pos - off) \
- : (U8*)NULL) \
-)
+ : (U8*)(pos + off))
+#define HOPBACKc(pos, off) \
+ (char*)(PL_reg_match_utf8\
+ ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
+ : (pos - off >= PL_bostr) \
+ ? (U8*)pos - off \
+ : NULL)
-#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
+/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
+
/* for use after a quantifier and before an EXACT-like node -- japhy */
#define JUMPABLE(rn) ( \
OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
OP(rn) == PLUS || OP(rn) == MINMOD || \
- (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
+ (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
)
#define HAS_TEXT(rn) ( \
- PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
+ PL_regkind[OP(rn)] == EXACT || PL_regkind[OP(rn)] == REF \
)
/*
follow but for lookbehind (rn->flags != 0) we skip to the next step.
*/
#define FIND_NEXT_IMPT(rn) STMT_START { \
- while (JUMPABLE(rn)) \
- if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
+ while (JUMPABLE(rn)) { \
+ const OPCODE type = OP(rn); \
+ if (type == SUSPEND || PL_regkind[type] == CURLY) \
rn = NEXTOPER(NEXTOPER(rn)); \
- else if (OP(rn) == PLUS) \
+ else if (type == PLUS) \
rn = NEXTOPER(rn); \
- else if (OP(rn) == IFMATCH) \
+ else if (type == IFMATCH) \
rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
else rn += NEXT_OFF(rn); \
+ } \
} STMT_END
static void restore_pos(pTHX_ void *arg);
#define REGCP_PAREN_ELEMS 4
const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
int p;
+ GET_RE_DEBUG_FLAGS_DECL;
if (paren_elems_to_push < 0)
Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
SSPUSHINT(PL_regstartp[p]);
SSPUSHPTR(PL_reg_start_tmp[p]);
SSPUSHINT(p);
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
+ " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
+ (UV)p, (IV)PL_regstartp[p],
+ (IV)(PL_reg_start_tmp[p] - PL_bostr),
+ (IV)PL_regendp[p]
+ ));
}
/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
SSPUSHINT(PL_regsize);
#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
-#define TRYPAREN(paren, n, input, where) { \
- if (paren) { \
- if (n) { \
- PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
- PL_regendp[paren] = input - PL_bostr; \
- } \
- else \
- PL_regendp[paren] = -1; \
- } \
- REGMATCH(next, where); \
- if (result) \
- sayYES; \
- if (paren && n) \
- PL_regendp[paren] = -1; \
-}
-
-
/*
* pregexec and friends
*/
const I32 multiline = prog->reganch & PMf_MULTILINE;
#ifdef DEBUGGING
const char * const i_strpos = strpos;
- SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
#endif
GET_RE_DEBUG_FLAGS_DECL;
}
DEBUG_EXECUTE_r({
- const char *s = PL_reg_match_utf8 ?
- sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
- strpos;
- const int len = PL_reg_match_utf8 ?
- (int)strlen(s) : strend - strpos;
+ RE_PV_DISPLAY_DECL(s, len, PL_reg_match_utf8,
+ PERL_DEBUG_PAD_ZERO(0), strpos, strend - strpos, 60);
+
if (!PL_colorset)
reginitcolors();
if (PL_reg_match_utf8)
{
char * const last = HOP3c(s, -start_shift, strbeg);
char *last1, *last2;
- char *s1 = s;
+ char * const saved_s = s;
SV* must;
t = s - prog->check_offset_max;
if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
&& (!do_utf8
- || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
+ || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
&& t > strpos)))
NOOP;
else
}
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
", trying floating at offset %ld...\n",
- (long)(HOP3c(s1, 1, strend) - i_strpos)));
+ (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
other_last = HOP3c(last1, prog->anchored_offset+1, strend);
s = HOP3c(last, 1, strend);
goto restart;
(long)(s - i_strpos)));
t = HOP3c(s, -prog->anchored_offset, strbeg);
other_last = HOP3c(s, 1, strend);
- s = s1;
+ s = saved_s;
if (t == strpos)
goto try_at_start;
goto try_at_offset;
}
else { /* Take into account the floating substring. */
char *last, *last1;
- char *s1 = s;
+ char * const saved_s = s;
SV* must;
t = HOP3c(s, -start_shift, strbeg);
}
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
", trying anchored starting at offset %ld...\n",
- (long)(s1 + 1 - i_strpos)));
+ (long)(saved_s + 1 - i_strpos)));
other_last = last;
s = HOP3c(t, 1, strend);
goto restart;
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
(long)(s - i_strpos)));
other_last = s; /* Fix this later. --Hugo */
- s = s1;
+ s = saved_s;
if (t == strpos)
goto try_at_start;
goto try_at_offset;
t = s - prog->check_offset_max;
if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
&& (!do_utf8
- || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
+ || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*)strpos))
&& t > strpos))) {
/* Fixed substring is found far enough so that the match
cannot start at strpos. */
/* Last resort... */
/* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
- if (prog->regstclass) {
+ if (prog->regstclass && OP(prog->regstclass)!=TRIE) {
/* minlen == 0 is possible if regstclass is \b or \B,
and the fixed substr is ''$.
Since minlen is already taken into account, s+1 is before strend;
/* If regstclass takes bytelength more than 1: If charlength==1, OK.
This leaves EXACTF only, which is dealt with in find_byclass(). */
const U8* const str = (U8*)STRING(prog->regstclass);
- const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
+ const int cl_l = (PL_regkind[OP(prog->regstclass)] == EXACT
? CHR_DIST(str+STR_LEN(prog->regstclass), str)
: 1);
- const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
+ const char * endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
: (prog->float_substr || prog->float_utf8
? HOP3c(HOP3c(check_at, -start_shift, strbeg),
cl_l, strend)
: strend);
-
+ /*if (OP(prog->regstclass) == TRIE)
+ endpos++;*/
t = s;
s = find_byclass(prog, prog->regstclass, s, endpos, NULL);
if (!s) {
/* We know what class REx starts with. Try to find this position... */
/* if reginfo is NULL, its a dryrun */
+/* annoyingly all the vars in this routine have different names from their counterparts
+ in regmatch. /grrr */
+
+#define REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, uvc, charid, \
+foldlen, foldbuf, uniflags) STMT_START { \
+ switch (trie_type) { \
+ case trie_utf8_fold: \
+ if ( foldlen>0 ) { \
+ uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
+ foldlen -= len; \
+ uscan += len; \
+ len=0; \
+ } else { \
+ uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
+ uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
+ foldlen -= UNISKIP( uvc ); \
+ uscan = foldbuf + UNISKIP( uvc ); \
+ } \
+ break; \
+ case trie_utf8: \
+ uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
+ break; \
+ case trie_plain: \
+ uvc = (UV)*uc; \
+ len = 1; \
+ } \
+ \
+ if (uvc < 256) { \
+ charid = trie->charmap[ uvc ]; \
+ } \
+ else { \
+ charid = 0; \
+ if (trie->widecharmap) { \
+ SV** const svpp = hv_fetch(trie->widecharmap, \
+ (char*)&uvc, sizeof(UV), 0); \
+ if (svpp) \
+ charid = (U16)SvIV(*svpp); \
+ } \
+ } \
+} STMT_END
+
+#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
+ if ( (CoNd) \
+ && (ln == len || \
+ ibcmp_utf8(s, NULL, 0, do_utf8, \
+ m, NULL, ln, (bool)UTF)) \
+ && (!reginfo || regtry(reginfo, s)) ) \
+ goto got_it; \
+ else { \
+ U8 foldbuf[UTF8_MAXBYTES_CASE+1]; \
+ uvchr_to_utf8(tmpbuf, c); \
+ f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); \
+ if ( f != c \
+ && (f == c1 || f == c2) \
+ && (ln == foldlen || \
+ !ibcmp_utf8((char *) foldbuf, \
+ NULL, foldlen, do_utf8, \
+ m, \
+ NULL, ln, (bool)UTF)) \
+ && (!reginfo || regtry(reginfo, s)) ) \
+ goto got_it; \
+ } \
+ s += len
+
+#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
+STMT_START { \
+ while (s <= e) { \
+ if ( (CoNd) \
+ && (ln == 1 || !(OP(c) == EXACTF \
+ ? ibcmp(s, m, ln) \
+ : ibcmp_locale(s, m, ln))) \
+ && (!reginfo || regtry(reginfo, s)) ) \
+ goto got_it; \
+ s++; \
+ } \
+} STMT_END
+
+#define REXEC_FBC_UTF8_SCAN(CoDe) \
+STMT_START { \
+ while (s + (uskip = UTF8SKIP(s)) <= strend) { \
+ CoDe \
+ s += uskip; \
+ } \
+} STMT_END
+
+#define REXEC_FBC_SCAN(CoDe) \
+STMT_START { \
+ while (s < strend) { \
+ CoDe \
+ s++; \
+ } \
+} STMT_END
+
+#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
+REXEC_FBC_UTF8_SCAN( \
+ if (CoNd) { \
+ if (tmp && (!reginfo || regtry(reginfo, s))) \
+ goto got_it; \
+ else \
+ tmp = doevery; \
+ } \
+ else \
+ tmp = 1; \
+)
+
+#define REXEC_FBC_CLASS_SCAN(CoNd) \
+REXEC_FBC_SCAN( \
+ if (CoNd) { \
+ if (tmp && (!reginfo || regtry(reginfo, s))) \
+ goto got_it; \
+ else \
+ tmp = doevery; \
+ } \
+ else \
+ tmp = 1; \
+)
+
+#define REXEC_FBC_TRYIT \
+if ((!reginfo || regtry(reginfo, s))) \
+ goto got_it
+
+#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
+ if (do_utf8) { \
+ UtFpReLoAd; \
+ REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
+ } \
+ else { \
+ REXEC_FBC_CLASS_SCAN(CoNd); \
+ } \
+ break
+
+#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
+ PL_reg_flags |= RF_tainted; \
+ if (do_utf8) { \
+ REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
+ } \
+ else { \
+ REXEC_FBC_CLASS_SCAN(CoNd); \
+ } \
+ break
STATIC char *
-S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char
-*strend, const regmatch_info *reginfo)
+S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
+ const char *strend, const regmatch_info *reginfo)
{
dVAR;
const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
switch (OP(c)) {
case ANYOF:
if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
+ REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
!UTF8_IS_INVARIANT((U8)s[0]) ?
reginclass(prog, c, (U8*)s, 0, do_utf8) :
- REGINCLASS(prog, c, (U8*)s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
+ REGINCLASS(prog, c, (U8*)s));
}
else {
while (s < strend) {
}
break;
case CANY:
- while (s < strend) {
+ REXEC_FBC_SCAN(
if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
- s++;
- }
+ );
break;
case EXACTF:
m = STRING(c);
while (s <= e) {
c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
uniflags);
- if ( c == c1
- && (ln == len ||
- ibcmp_utf8(s, (char **)0, 0, do_utf8,
- m, (char **)0, ln, (bool)UTF))
- && (!reginfo || regtry(reginfo, s)) )
- goto got_it;
- else {
- U8 foldbuf[UTF8_MAXBYTES_CASE+1];
- uvchr_to_utf8(tmpbuf, c);
- f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
- if ( f != c
- && (f == c1 || f == c2)
- && (ln == foldlen ||
- !ibcmp_utf8((char *) foldbuf,
- (char **)0, foldlen, do_utf8,
- m,
- (char **)0, ln, (bool)UTF))
- && (!reginfo || regtry(reginfo, s)) )
- goto got_it;
- }
- s += len;
+ REXEC_FBC_EXACTISH_CHECK(c == c1);
}
}
else {
c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
- if ( (c == c1 || c == c2)
- && (ln == len ||
- ibcmp_utf8(s, (char **)0, 0, do_utf8,
- m, (char **)0, ln, (bool)UTF))
- && (!reginfo || regtry(reginfo, s)) )
- goto got_it;
- else {
- U8 foldbuf[UTF8_MAXBYTES_CASE+1];
- uvchr_to_utf8(tmpbuf, c);
- f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
- if ( f != c
- && (f == c1 || f == c2)
- && (ln == foldlen ||
- !ibcmp_utf8((char *) foldbuf,
- (char **)0, foldlen, do_utf8,
- m,
- (char **)0, ln, (bool)UTF))
- && (!reginfo || regtry(reginfo, s)) )
- goto got_it;
- }
- s += len;
+ REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
}
}
}
else {
if (c1 == c2)
- while (s <= e) {
- if ( *(U8*)s == c1
- && (ln == 1 || !(OP(c) == EXACTF
- ? ibcmp(s, m, ln)
- : ibcmp_locale(s, m, ln)))
- && (!reginfo || regtry(reginfo, s)) )
- goto got_it;
- s++;
- }
+ REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
else
- while (s <= e) {
- if ( (*(U8*)s == c1 || *(U8*)s == c2)
- && (ln == 1 || !(OP(c) == EXACTF
- ? ibcmp(s, m, ln)
- : ibcmp_locale(s, m, ln)))
- && (!reginfo || regtry(reginfo, s)) )
- goto got_it;
- s++;
- }
+ REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
}
break;
case BOUNDL:
tmp = ((OP(c) == BOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
LOAD_UTF8_CHARCLASS_ALNUM();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
+ REXEC_FBC_UTF8_SCAN(
if (tmp == !(OP(c) == BOUND ?
(bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
{
tmp = !tmp;
- if ((!reginfo || regtry(reginfo, s)))
- goto got_it;
- }
- s += uskip;
+ REXEC_FBC_TRYIT;
}
+ );
}
else {
tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
- while (s < strend) {
+ REXEC_FBC_SCAN(
if (tmp ==
!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
tmp = !tmp;
- if ((!reginfo || regtry(reginfo, s)))
- goto got_it;
- }
- s++;
+ REXEC_FBC_TRYIT;
}
+ );
}
if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, s)))
goto got_it;
tmp = ((OP(c) == NBOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
LOAD_UTF8_CHARCLASS_ALNUM();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
+ REXEC_FBC_UTF8_SCAN(
if (tmp == !(OP(c) == NBOUND ?
(bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
tmp = !tmp;
- else if ((!reginfo || regtry(reginfo, s)))
- goto got_it;
- s += uskip;
- }
+ else REXEC_FBC_TRYIT;
+ );
}
else {
tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
tmp = ((OP(c) == NBOUND ?
isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
- while (s < strend) {
+ REXEC_FBC_SCAN(
if (tmp ==
!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
tmp = !tmp;
- else if ((!reginfo || regtry(reginfo, s)))
- goto got_it;
- s++;
- }
+ else REXEC_FBC_TRYIT;
+ );
}
if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, s)))
goto got_it;
break;
case ALNUM:
- if (do_utf8) {
- LOAD_UTF8_CHARCLASS_ALNUM();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (isALNUM(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_PRELOAD(
+ LOAD_UTF8_CHARCLASS_ALNUM(),
+ swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
+ isALNUM(*s)
+ );
case ALNUML:
- PL_reg_flags |= RF_tainted;
- if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (isALNUM_LC_utf8((U8*)s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (isALNUM_LC(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_TAINT(
+ isALNUM_LC_utf8((U8*)s),
+ isALNUM_LC(*s)
+ );
case NALNUM:
- if (do_utf8) {
- LOAD_UTF8_CHARCLASS_ALNUM();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (!isALNUM(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_PRELOAD(
+ LOAD_UTF8_CHARCLASS_ALNUM(),
+ !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
+ !isALNUM(*s)
+ );
case NALNUML:
- PL_reg_flags |= RF_tainted;
- if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (!isALNUM_LC_utf8((U8*)s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (!isALNUM_LC(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_TAINT(
+ !isALNUM_LC_utf8((U8*)s),
+ !isALNUM_LC(*s)
+ );
case SPACE:
- if (do_utf8) {
- LOAD_UTF8_CHARCLASS_SPACE();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (isSPACE(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_PRELOAD(
+ LOAD_UTF8_CHARCLASS_SPACE(),
+ *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
+ isSPACE(*s)
+ );
case SPACEL:
- PL_reg_flags |= RF_tainted;
- if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (isSPACE_LC(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_TAINT(
+ *s == ' ' || isSPACE_LC_utf8((U8*)s),
+ isSPACE_LC(*s)
+ );
case NSPACE:
- if (do_utf8) {
- LOAD_UTF8_CHARCLASS_SPACE();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (!isSPACE(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_PRELOAD(
+ LOAD_UTF8_CHARCLASS_SPACE(),
+ !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
+ !isSPACE(*s)
+ );
case NSPACEL:
- PL_reg_flags |= RF_tainted;
- if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (!isSPACE_LC(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_TAINT(
+ !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
+ !isSPACE_LC(*s)
+ );
case DIGIT:
- if (do_utf8) {
- LOAD_UTF8_CHARCLASS_DIGIT();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (isDIGIT(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_PRELOAD(
+ LOAD_UTF8_CHARCLASS_DIGIT(),
+ swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
+ isDIGIT(*s)
+ );
case DIGITL:
- PL_reg_flags |= RF_tainted;
- if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (isDIGIT_LC_utf8((U8*)s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (isDIGIT_LC(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_TAINT(
+ isDIGIT_LC_utf8((U8*)s),
+ isDIGIT_LC(*s)
+ );
case NDIGIT:
- if (do_utf8) {
- LOAD_UTF8_CHARCLASS_DIGIT();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (!isDIGIT(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_PRELOAD(
+ LOAD_UTF8_CHARCLASS_DIGIT(),
+ !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
+ !isDIGIT(*s)
+ );
case NDIGITL:
- PL_reg_flags |= RF_tainted;
- if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (!isDIGIT_LC_utf8((U8*)s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (!isDIGIT_LC(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
+ REXEC_FBC_CSCAN_TAINT(
+ !isDIGIT_LC_utf8((U8*)s),
+ !isDIGIT_LC(*s)
+ );
+ case TRIE:
+ /*Perl_croak(aTHX_ "panic: unknown regstclass TRIE");*/
+ {
+ const enum { trie_plain, trie_utf8, trie_utf8_fold }
+ trie_type = do_utf8 ?
+ (c->flags == EXACT ? trie_utf8 : trie_utf8_fold)
+ : trie_plain;
+ /* what trie are we using right now */
+ reg_ac_data *aho
+ = (reg_ac_data*)prog->data->data[ ARG( c ) ];
+ reg_trie_data *trie=aho->trie;
+
+ const char *last_start = strend - trie->minlen;
+ const char *real_start = s;
+ STRLEN maxlen = trie->maxlen;
+ SV *sv_points;
+ U8 **points; /* map of where we were in the input string
+ when reading a given string. For ASCII this
+ is unnecessary overhead as the relationship
+ is always 1:1, but for unicode, especially
+ case folded unicode this is not true. */
+ U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
+
+ GET_RE_DEBUG_FLAGS_DECL;
+
+ /* We can't just allocate points here. We need to wrap it in
+ * an SV so it gets freed properly if there is a croak while
+ * running the match */
+ ENTER;
+ SAVETMPS;
+ sv_points=newSV(maxlen * sizeof(U8 *));
+ SvCUR_set(sv_points,
+ maxlen * sizeof(U8 *));
+ SvPOK_on(sv_points);
+ sv_2mortal(sv_points);
+ points=(U8**)SvPV_nolen(sv_points );
+
+ if (trie->bitmap && trie_type != trie_utf8_fold) {
+ while (s <= last_start && !TRIE_BITMAP_TEST(trie,*s) ) {
+ s++;
+ }
+ }
+
+ while (s <= last_start) {
+ const U32 uniflags = UTF8_ALLOW_DEFAULT;
+ U8 *uc = (U8*)s;
+ U16 charid = 0;
+ U32 base = 1;
+ U32 state = 1;
+ UV uvc = 0;
+ STRLEN len = 0;
+ STRLEN foldlen = 0;
+ U8 *uscan = (U8*)NULL;
+ U8 *leftmost = NULL;
+
+ U32 pointpos = 0;
+
+ while ( state && uc <= (U8*)strend ) {
+ int failed=0;
+ if (aho->states[ state ].wordnum) {
+ U8 *lpos= points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1] ) % maxlen ];
+ if (!leftmost || lpos < leftmost)
+ leftmost= lpos;
+ if (base==0) break;
+ }
+ points[pointpos++ % maxlen]= uc;
+ REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
+ uvc, charid, foldlen, foldbuf, uniflags);
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "Pos: %d Charid:%3x CV:%4"UVxf" ",
+ (int)((const char*)uc - real_start), charid, uvc)
+ );
+ uc += len;
+
+ do {
+ U32 word = aho->states[ state ].wordnum;
+ base = aho->states[ state ].trans.base;
+
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "%sState: %4"UVxf", Base: 0x%-4"UVxf" uvc=%"UVxf" word=%"UVxf"\n",
+ failed ? "Fail transition to " : "",
+ state, base, uvc, word)
+ );
+ if ( base ) {
+ U32 tmp;
+ if (charid &&
+ (base + charid > trie->uniquecharcount )
+ && (base + charid - 1 - trie->uniquecharcount
+ < trie->lasttrans)
+ && trie->trans[base + charid - 1 -
+ trie->uniquecharcount].check == state
+ && (tmp=trie->trans[base + charid - 1 -
+ trie->uniquecharcount ].next))
+ {
+ state = tmp;
+ break;
+ }
+ else {
+ failed++;
+ if ( state == 1 )
+ break;
+ else
+ state = aho->fail[state];
+ }
+ }
+ else {
+ /* we must be accepting here */
+ failed++;
+ break;
+ }
+ } while(state);
+ if (failed) {
+ if (leftmost)
+ break;
+ else if (!charid && trie->bitmap && trie_type != trie_utf8_fold) {
+ while ( uc <= (U8*)last_start && !TRIE_BITMAP_TEST(trie,*uc) ) {
+ uc++;
+ }
+ }
+ }
+ }
+ if ( aho->states[ state ].wordnum ) {
+ U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
+ if (!leftmost || lpos < leftmost)
+ leftmost = lpos;
+ }
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "%sState: %4"UVxf", Base: 0x%-4"UVxf" uvc=%"UVxf"\n",
+ "All done: ",
+ state, base, uvc)
+ );
+ if (leftmost) {
+ s = (char*)leftmost;
+ if (!reginfo || regtry(reginfo, s)) {
+ FREETMPS;
+ LEAVE;
+ goto got_it;
+ }
+ s = HOPc(s,1);
+ } else {
+ break;
+ }
+ }
+ FREETMPS;
+ LEAVE;
}
break;
default:
I32 end_shift = 0; /* Same for the end. */ /* CC */
I32 scream_pos = -1; /* Internal iterator of scream. */
char *scream_olds = NULL;
- SV* oreplsv = GvSV(PL_replgv);
+ SV* const oreplsv = GvSV(PL_replgv);
const bool do_utf8 = DO_UTF8(sv);
I32 multiline;
-#ifdef DEBUGGING
- SV* dsv0;
- SV* dsv1;
-#endif
+
regmatch_info reginfo; /* create some info to pass to regtry etc */
GET_RE_DEBUG_FLAGS_DECL;
multiline = prog->reganch & PMf_MULTILINE;
reginfo.prog = prog;
-#ifdef DEBUGGING
- dsv0 = PERL_DEBUG_PAD_ZERO(0);
- dsv1 = PERL_DEBUG_PAD_ZERO(1);
-#endif
-
RX_MATCH_UTF8_set(prog, do_utf8);
minlen = prog->minlen;
}
DEBUG_EXECUTE_r({
- const char * const s0 = UTF
- ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
- UNI_DISPLAY_REGEX)
- : prog->precomp;
- const int len0 = UTF ? (int)SvCUR(dsv0) : prog->prelen;
- const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
- UNI_DISPLAY_REGEX) : startpos;
- const int len1 = do_utf8 ? (int)SvCUR(dsv1) : strend - startpos;
+ RE_PV_DISPLAY_DECL(s0, len0, UTF,
+ PERL_DEBUG_PAD_ZERO(0), prog->precomp, prog->prelen, 60);
+ RE_PV_DISPLAY_DECL(s1, len1, do_utf8,
+ PERL_DEBUG_PAD_ZERO(1), startpos, strend - startpos, 60);
+
if (!PL_colorset)
reginitcolors();
PerlIO_printf(Perl_debug_log,
ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
if (do_utf8) {
- while (s < strend) {
+ REXEC_FBC_SCAN(
if (*s == ch) {
DEBUG_EXECUTE_r( did_match = 1 );
if (regtry(®info, s)) goto got_it;
while (s < strend && *s == ch)
s += UTF8SKIP(s);
}
- s += UTF8SKIP(s);
- }
+ );
}
else {
- while (s < strend) {
+ REXEC_FBC_SCAN(
if (*s == ch) {
DEBUG_EXECUTE_r( did_match = 1 );
if (regtry(®info, s)) goto got_it;
while (s < strend && *s == ch)
s++;
}
- s++;
- }
+ );
}
DEBUG_EXECUTE_r(if (!did_match)
PerlIO_printf(Perl_debug_log,
}
else if ((c = prog->regstclass)) {
if (minlen) {
- I32 op = (U8)OP(prog->regstclass);
+ const OPCODE op = OP(prog->regstclass);
/* don't bother with what can't match */
- if (PL_regkind[op] != EXACT && op != CANY)
+ if (PL_regkind[op] != EXACT && op != CANY && op != TRIE)
strend = HOPc(strend, -(minlen - 1));
}
DEBUG_EXECUTE_r({
- SV *prop = sv_newmortal();
- const char *s0;
- const char *s1;
- int len0;
- int len1;
-
+ SV * const prop = sv_newmortal();
regprop(prog, prop, c);
- s0 = UTF ?
- pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
- UNI_DISPLAY_REGEX) :
- SvPVX_const(prop);
- len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
- s1 = UTF ?
- sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
- len1 = UTF ? (int)SvCUR(dsv1) : strend - s;
- PerlIO_printf(Perl_debug_log,
- "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
- len0, len0, s0,
- len1, len1, s1);
+ {
+ RE_PV_DISPLAY_DECL(s0,len0,UTF,
+ PERL_DEBUG_PAD_ZERO(0),SvPVX_const(prop),SvCUR(prop),60);
+ RE_PV_DISPLAY_DECL(s1,len1,UTF,
+ PERL_DEBUG_PAD_ZERO(1),s,strend-s,60);
+ PerlIO_printf(Perl_debug_log,
+ "Matching stclass \"%*.*s\" against \"%*.*s\" (%d chars)\n",
+ len0, len0, s0,
+ len1, len1, s1, (int)(strend - s));
+ }
});
if (find_byclass(prog, c, s, strend, ®info))
goto got_it;
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
}
else {
dontbother = 0;
if ( !(flags & REXEC_NOT_FIRST) ) {
RX_MATCH_COPY_FREE(prog);
if (flags & REXEC_COPY_STR) {
- I32 i = PL_regeol - startpos + (stringarg - strbeg);
+ const I32 i = PL_regeol - startpos + (stringarg - strbeg);
#ifdef PERL_OLD_COPY_ON_WRITE
if ((SvIsCOW(sv)
|| (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
if (SvIsCOW(sv))
sv_force_normal_flags(sv, 0);
#endif
- mg = sv_magicext(reginfo->sv, (SV*)0, PERL_MAGIC_regex_global,
+ mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
&PL_vtbl_mglob, NULL, 0);
mg->mg_len = -1;
}
Newxz(PL_reg_curpm, 1, PMOP);
#ifdef USE_ITHREADS
{
- SV* repointer = newSViv(0);
+ SV* const repointer = newSViv(0);
/* so we know which PL_regex_padav element is PL_reg_curpm */
SvFLAGS(repointer) |= SVf_BREAK;
av_push(PL_regex_padav,repointer);
return 0;
}
-#define RE_UNWIND_BRANCH 1
-#define RE_UNWIND_BRANCHJ 2
-
-union re_unwind_t;
-
-typedef struct { /* XX: makes sense to enlarge it... */
- I32 type;
- I32 prev;
- CHECKPOINT lastcp;
-} re_unwind_generic_t;
-
-typedef struct {
- I32 type;
- I32 prev;
- CHECKPOINT lastcp;
- I32 lastparen;
- regnode *next;
- char *locinput;
- I32 nextchr;
- int minmod;
-#ifdef DEBUGGING
- int regindent;
-#endif
-} re_unwind_branch_t;
-
-typedef union re_unwind_t {
- I32 type;
- re_unwind_generic_t generic;
- re_unwind_branch_t branch;
-} re_unwind_t;
#define sayYES goto yes
#define sayNO goto no
/* Make sure there is a test for this +1 options in re_tests */
#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
-/* this value indiciates that the c1/c2 "next char" test should be skipped */
-#define CHRTEST_VOID -1000
+#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
+#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
#define SLAB_FIRST(s) (&(s)->states[0])
#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
goto start_recurse; \
resume_point_##where:
+/* push a new state then goto it */
+
+#define PUSH_STATE_GOTO(state, node) \
+ scan = node; \
+ st->resume_state = state; \
+ goto push_state;
+
+/* push a new state with success backtracking, then goto it */
+
+#define PUSH_YES_STATE_GOTO(state, node) \
+ scan = node; \
+ st->resume_state = state; \
+ goto push_yes_state;
+
-/* push a new regex state. Set newst to point to it */
-
-#define PUSH_STATE(newst, resume) \
- depth++; \
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \
- st->scan = scan; \
- st->next = next; \
- st->n = n; \
- st->locinput = locinput; \
- st->resume_state = resume; \
- newst = st+1; \
- if (newst > SLAB_LAST(PL_regmatch_slab)) \
- newst = S_push_slab(aTHX); \
- PL_regmatch_state = newst; \
- newst->cc = 0; \
- newst->minmod = 0; \
- newst->sw = 0; \
- newst->logical = 0; \
- newst->unwind = 0; \
- locinput = PL_reginput; \
- nextchr = UCHARAT(locinput);
-
-#define POP_STATE \
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
- depth--; \
- st--; \
- if (st < SLAB_FIRST(PL_regmatch_slab)) { \
- PL_regmatch_slab = PL_regmatch_slab->prev; \
- st = SLAB_LAST(PL_regmatch_slab); \
- } \
- PL_regmatch_state = st; \
- scan = st->scan; \
- next = st->next; \
- n = st->n; \
- locinput = st->locinput; \
- nextchr = UCHARAT(locinput);
/*
- regmatch - main matching routine
* allocated since entry are freed.
*/
+/* *** every FOO_fail should = FOO+1 */
+#define TRIE_next (REGNODE_MAX+1)
+#define TRIE_next_fail (REGNODE_MAX+2)
+#define EVAL_A (REGNODE_MAX+3)
+#define EVAL_A_fail (REGNODE_MAX+4)
+#define resume_CURLYX (REGNODE_MAX+5)
+#define resume_WHILEM1 (REGNODE_MAX+6)
+#define resume_WHILEM2 (REGNODE_MAX+7)
+#define resume_WHILEM3 (REGNODE_MAX+8)
+#define resume_WHILEM4 (REGNODE_MAX+9)
+#define resume_WHILEM5 (REGNODE_MAX+10)
+#define resume_WHILEM6 (REGNODE_MAX+11)
+#define BRANCH_next (REGNODE_MAX+12)
+#define BRANCH_next_fail (REGNODE_MAX+13)
+#define CURLYM_A (REGNODE_MAX+14)
+#define CURLYM_A_fail (REGNODE_MAX+15)
+#define CURLYM_B (REGNODE_MAX+16)
+#define CURLYM_B_fail (REGNODE_MAX+17)
+#define IFMATCH_A (REGNODE_MAX+18)
+#define IFMATCH_A_fail (REGNODE_MAX+19)
+#define CURLY_B_min_known (REGNODE_MAX+20)
+#define CURLY_B_min_known_fail (REGNODE_MAX+21)
+#define CURLY_B_min (REGNODE_MAX+22)
+#define CURLY_B_min_fail (REGNODE_MAX+23)
+#define CURLY_B_max (REGNODE_MAX+24)
+#define CURLY_B_max_fail (REGNODE_MAX+25)
+
+
+#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
+
+#ifdef DEBUGGING
+
+STATIC void
+S_dump_exec_pos(pTHX_ const char *locinput, const regnode *scan, const bool do_utf8)
+{
+ const int docolor = *PL_colors[0];
+ const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
+ int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
+ /* The part of the string before starttry has one color
+ (pref0_len chars), between starttry and current
+ position another one (pref_len - pref0_len chars),
+ after the current position the third one.
+ We assume that pref0_len <= pref_len, otherwise we
+ decrease pref0_len. */
+ int pref_len = (locinput - PL_bostr) > (5 + taill) - l
+ ? (5 + taill) - l : locinput - PL_bostr;
+ int pref0_len;
+
+ while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
+ pref_len++;
+ pref0_len = pref_len - (locinput - PL_reg_starttry);
+ if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
+ l = ( PL_regeol - locinput > (5 + taill) - pref_len
+ ? (5 + taill) - pref_len : PL_regeol - locinput);
+ while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
+ l--;
+ if (pref0_len < 0)
+ pref0_len = 0;
+ if (pref0_len > pref_len)
+ pref0_len = pref_len;
+ {
+ const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
+
+ RE_PV_DISPLAY_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
+ (locinput - pref_len),pref0_len, 60);
+
+ RE_PV_DISPLAY_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
+ (locinput - pref_len + pref0_len),
+ pref_len - pref0_len, 60);
+
+ RE_PV_DISPLAY_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
+ locinput, PL_regeol - locinput, 60);
+
+ PerlIO_printf(Perl_debug_log,
+ "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|",
+ (IV)(locinput - PL_bostr),
+ PL_colors[4],
+ len0, s0,
+ PL_colors[5],
+ PL_colors[2],
+ len1, s1,
+ PL_colors[3],
+ (docolor ? "" : "> <"),
+ PL_colors[0],
+ len2, s2,
+ PL_colors[1],
+ 15 - l - pref_len + 1,
+ "");
+ }
+}
+
+#endif
STATIC I32 /* 0 failure, 1 success */
S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
/* these variables are NOT saved during a recusive RFEGMATCH: */
register I32 nextchr; /* is always set to UCHARAT(locinput) */
bool result; /* return value of S_regmatch */
- regnode *inner; /* Next node in internal branch. */
int depth = 0; /* depth of recursion */
- regmatch_state *newst; /* when pushing a state, this is the new one */
regmatch_state *yes_state = NULL; /* state to pop to on success of
subpattern */
+ U32 state_num;
#ifdef DEBUGGING
- SV *re_debug_flags = NULL;
- GET_RE_DEBUG_FLAGS;
+ GET_RE_DEBUG_FLAGS_DECL;
PL_regindent++;
#endif
st->minmod = 0;
st->sw = 0;
st->logical = 0;
- st->unwind = 0;
st->cc = NULL;
/* Note that nextchr is a byte even in UTF */
nextchr = UCHARAT(locinput);
DEBUG_EXECUTE_r( {
SV * const prop = sv_newmortal();
- const int docolor = *PL_colors[0];
- const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
- int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
- /* The part of the string before starttry has one color
- (pref0_len chars), between starttry and current
- position another one (pref_len - pref0_len chars),
- after the current position the third one.
- We assume that pref0_len <= pref_len, otherwise we
- decrease pref0_len. */
- int pref_len = (locinput - PL_bostr) > (5 + taill) - l
- ? (5 + taill) - l : locinput - PL_bostr;
- int pref0_len;
-
- while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
- pref_len++;
- pref0_len = pref_len - (locinput - PL_reg_starttry);
- if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
- l = ( PL_regeol - locinput > (5 + taill) - pref_len
- ? (5 + taill) - pref_len : PL_regeol - locinput);
- while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
- l--;
- if (pref0_len < 0)
- pref0_len = 0;
- if (pref0_len > pref_len)
- pref0_len = pref_len;
+ dump_exec_pos( locinput, scan, do_utf8 );
regprop(rex, prop, scan);
- {
- const char * const s0 =
- do_utf8 && OP(scan) != CANY ?
- pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
- pref0_len, 60, UNI_DISPLAY_REGEX) :
- locinput - pref_len;
- const int len0 = do_utf8 ? (int)strlen(s0) : pref0_len;
- const char * const s1 = do_utf8 && OP(scan) != CANY ?
- pv_uni_display(PERL_DEBUG_PAD(1),
- (U8*)(locinput - pref_len + pref0_len),
- pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
- locinput - pref_len + pref0_len;
- const int len1 = do_utf8 ? (int)strlen(s1) : pref_len - pref0_len;
- const char * const s2 = do_utf8 && OP(scan) != CANY ?
- pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
- PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
- locinput;
- const int len2 = do_utf8 ? (int)strlen(s2) : l;
- PerlIO_printf(Perl_debug_log,
- "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
- (IV)(locinput - PL_bostr),
- PL_colors[4],
- len0, s0,
- PL_colors[5],
- PL_colors[2],
- len1, s1,
- PL_colors[3],
- (docolor ? "" : "> <"),
- PL_colors[0],
- len2, s2,
- PL_colors[1],
- 15 - l - pref_len + 1,
- "",
- (IV)(scan - rex->program), PL_regindent*2, "",
- SvPVX_const(prop));
- }
+
+ PerlIO_printf(Perl_debug_log,
+ "%3"IVdf":%*s%s(%"IVdf")\n",
+ (IV)(scan - rex->program), PL_regindent*2, "",
+ SvPVX_const(prop),
+ PL_regkind[OP(scan)] == END ? 0 : (IV)(regnext(scan) - rex->program));
});
next = scan + NEXT_OFF(scan);
if (next == scan)
next = NULL;
+ state_num = OP(scan);
- switch (OP(scan)) {
+ reenter_switch:
+ switch (state_num) {
case BOL:
if (locinput == PL_bostr)
{
nextchr = UCHARAT(++locinput);
break;
+#undef ST
+#define ST st->u.trie
-
- /*
- traverse the TRIE keeping track of all accepting states
- we transition through until we get to a failing node.
-
-
- */
case TRIE:
- case TRIEF:
- case TRIEFL:
{
+ /* what type of TRIE am I? (utf8 makes this contextual) */
+ const enum { trie_plain, trie_utf8, trie_utf8_fold }
+ trie_type = do_utf8 ?
+ (scan->flags == EXACT ? trie_utf8 : trie_utf8_fold)
+ : trie_plain;
+
+ /* what trie are we using right now */
+ reg_trie_data * const trie
+ = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
+ U32 state = trie->startstate;
+
U8 *uc = ( U8* )locinput;
- U32 state = 1;
U16 charid = 0;
U32 base = 0;
UV uvc = 0;
U8 *uscan = (U8*)NULL;
STRLEN bufflen=0;
SV *sv_accept_buff = NULL;
- const enum { trie_plain, trie_utf8, trie_uft8_fold }
- trie_type = do_utf8 ?
- (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
- : trie_plain;
+ U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
- /* what trie are we using right now */
- reg_trie_data *trie
- = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
- st->u.trie.accepted = 0; /* how many accepting states we have seen */
- result = 0;
+ ST.accepted = 0; /* how many accepting states we have seen */
+ ST.B = next;
+#ifdef DEBUGGING
+ ST.me = scan;
+#endif
+
+ if (trie->bitmap && trie_type != trie_utf8_fold &&
+ !TRIE_BITMAP_TEST(trie,*locinput)
+ ) {
+ if (trie->states[ state ].wordnum) {
+ DEBUG_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s %smatched empty string...%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
+ );
+ break;
+ } else {
+ DEBUG_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s %sfailed to match start class...%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
+ );
+ sayNO_SILENT;
+ }
+ }
+
+ /*
+ traverse the TRIE keeping track of all accepting states
+ we transition through until we get to a failing node.
+ */
while ( state && uc <= (U8*)PL_regeol ) {
if (trie->states[ state ].wordnum) {
- if (!st->u.trie.accepted ) {
+ if (!ST.accepted ) {
ENTER;
SAVETMPS;
bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
sizeof(reg_trie_accepted));
SvPOK_on(sv_accept_buff);
sv_2mortal(sv_accept_buff);
- st->u.trie.accept_buff =
+ SAVETMPS;
+ ST.accept_buff =
(reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
}
else {
- if (st->u.trie.accepted >= bufflen) {
+ if (ST.accepted >= bufflen) {
bufflen *= 2;
- st->u.trie.accept_buff =(reg_trie_accepted*)
+ ST.accept_buff =(reg_trie_accepted*)
SvGROW(sv_accept_buff,
bufflen * sizeof(reg_trie_accepted));
}
SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
+ sizeof(reg_trie_accepted));
}
- st->u.trie.accept_buff[st->u.trie.accepted].wordnum = trie->states[state].wordnum;
- st->u.trie.accept_buff[st->u.trie.accepted].endpos = uc;
- ++st->u.trie.accepted;
+ ST.accept_buff[ST.accepted].wordnum = trie->states[state].wordnum;
+ ST.accept_buff[ST.accepted].endpos = uc;
+ ++ST.accepted;
}
base = trie->states[ state ].trans.base;
- DEBUG_TRIE_EXECUTE_r(
+ DEBUG_TRIE_EXECUTE_r({
+ dump_exec_pos( (char *)uc, scan, do_utf8 );
PerlIO_printf( Perl_debug_log,
"%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
- REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
- (UV)state, (UV)base, (UV)st->u.trie.accepted );
- );
+ 2+PL_regindent * 2, "", PL_colors[4],
+ (UV)state, (UV)base, (UV)ST.accepted );
+ });
if ( base ) {
- switch (trie_type) {
- case trie_uft8_fold:
- if ( foldlen>0 ) {
- uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
- foldlen -= len;
- uscan += len;
- len=0;
- } else {
- U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
- uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
- uvc = to_uni_fold( uvc, foldbuf, &foldlen );
- foldlen -= UNISKIP( uvc );
- uscan = foldbuf + UNISKIP( uvc );
- }
- break;
- case trie_utf8:
- uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
- &len, uniflags );
- break;
- case trie_plain:
- uvc = (UV)*uc;
- len = 1;
- }
-
- if (uvc < 256) {
- charid = trie->charmap[ uvc ];
- }
- else {
- charid = 0;
- if (trie->widecharmap) {
- SV** svpp = (SV**)NULL;
- svpp = hv_fetch(trie->widecharmap,
- (char*)&uvc, sizeof(UV), 0);
- if (svpp)
- charid = (U16)SvIV(*svpp);
- }
- }
+ REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
+ uvc, charid, foldlen, foldbuf, uniflags);
if (charid &&
(base + charid > trie->uniquecharcount )
charid, uvc, (UV)state, PL_colors[5] );
);
}
- if (!st->u.trie.accepted )
+ if (!ST.accepted )
sayNO;
+ DEBUG_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "%*s %sgot %"IVdf" possible matches%s\n",
+ REPORT_CODE_OFF + PL_regindent * 2, "",
+ PL_colors[4], (IV)ST.accepted, PL_colors[5] );
+ );
+ }
+
+ /* FALL THROUGH */
+
+ case TRIE_next_fail: /* we failed - try next alterative */
+
+ if ( ST.accepted == 1 ) {
+ /* only one choice left - just continue */
+ DEBUG_EXECUTE_r({
+ reg_trie_data * const trie
+ = (reg_trie_data*)rex->data->data[ ARG(ST.me) ];
+ SV ** const tmp = RX_DEBUG(reginfo->prog)
+ ? av_fetch( trie->words, ST.accept_buff[ 0 ].wordnum-1, 0 )
+ : NULL;
+ PerlIO_printf( Perl_debug_log,
+ "%*s %sonly one match left: #%d <%s>%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
+ ST.accept_buff[ 0 ].wordnum,
+ tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
+ PL_colors[5] );
+ });
+ PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
+ /* in this case we free tmps/leave before we call regmatch
+ as we wont be using accept_buff again. */
+ FREETMPS;
+ LEAVE;
+ locinput = PL_reginput;
+ nextchr = UCHARAT(locinput);
+ scan = ST.B;
+ continue; /* execute rest of RE */
+ }
+
+ if (!ST.accepted-- ) {
+ FREETMPS;
+ LEAVE;
+ sayNO;
+ }
+
/*
- There was at least one accepting state that we
- transitioned through. Presumably the number of accepting
- states is going to be low, typically one or two. So we
- simply scan through to find the one with lowest wordnum.
- Once we find it, we swap the last state into its place
- and decrement the size. We then try to match the rest of
- the pattern at the point where the word ends, if we
- succeed then we end the loop, otherwise the loop
- eventually terminates once all of the accepting states
- have been tried.
- */
+ There are at least two accepting states left. Presumably
+ the number of accepting states is going to be low,
+ typically two. So we simply scan through to find the one
+ with lowest wordnum. Once we find it, we swap the last
+ state into its place and decrement the size. We then try to
+ match the rest of the pattern at the point where the word
+ ends. If we succeed, control just continues along the
+ regex; if we fail we return here to try the next accepting
+ state
+ */
- if ( st->u.trie.accepted == 1 ) {
- DEBUG_EXECUTE_r({
- SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
- PerlIO_printf( Perl_debug_log,
- "%*s %sonly one match : #%d <%s>%s\n",
- REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
- st->u.trie.accept_buff[ 0 ].wordnum,
- tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
- PL_colors[5] );
- });
- PL_reginput = (char *)st->u.trie.accept_buff[ 0 ].endpos;
- /* in this case we free tmps/leave before we call regmatch
- as we wont be using accept_buff again. */
- FREETMPS;
- LEAVE;
- REGMATCH(scan + NEXT_OFF(scan), TRIE1);
- /*** all unsaved local vars undefined at this point */
- } else {
- DEBUG_EXECUTE_r(
- PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
- REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->u.trie.accepted,
- PL_colors[5] );
- );
- while ( !result && st->u.trie.accepted-- ) {
- U32 best = 0;
- U32 cur;
- for( cur = 1 ; cur <= st->u.trie.accepted ; cur++ ) {
- DEBUG_TRIE_EXECUTE_r(
- PerlIO_printf( Perl_debug_log,
- "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
- REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
- (IV)best, st->u.trie.accept_buff[ best ].wordnum, (IV)cur,
- st->u.trie.accept_buff[ cur ].wordnum, PL_colors[5] );
- );
-
- if (st->u.trie.accept_buff[cur].wordnum <
- st->u.trie.accept_buff[best].wordnum)
- best = cur;
- }
- DEBUG_EXECUTE_r({
- reg_trie_data * const trie = (reg_trie_data*)
- rex->data->data[ARG(scan)];
- SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 );
- PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
- REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
- st->u.trie.accept_buff[best].wordnum,
- tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr", (void*)scan,
- PL_colors[5] );
- });
- if ( best<st->u.trie.accepted ) {
- reg_trie_accepted tmp = st->u.trie.accept_buff[ best ];
- st->u.trie.accept_buff[ best ] = st->u.trie.accept_buff[ st->u.trie.accepted ];
- st->u.trie.accept_buff[ st->u.trie.accepted ] = tmp;
- best = st->u.trie.accepted;
- }
- PL_reginput = (char *)st->u.trie.accept_buff[ best ].endpos;
-
- /*
- as far as I can tell we only need the SAVETMPS/FREETMPS
- for re's with EVAL in them but I'm leaving them in for
- all until I can be sure.
- */
- SAVETMPS;
- REGMATCH(scan + NEXT_OFF(scan), TRIE2);
- /*** all unsaved local vars undefined at this point */
- FREETMPS;
- }
- FREETMPS;
- LEAVE;
+ {
+ U32 best = 0;
+ U32 cur;
+ for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
+ REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
+ (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
+ ST.accept_buff[ cur ].wordnum, PL_colors[5] );
+ );
+
+ if (ST.accept_buff[cur].wordnum <
+ ST.accept_buff[best].wordnum)
+ best = cur;
}
-
- if (result) {
- sayYES;
- } else {
- sayNO;
+
+ DEBUG_EXECUTE_r({
+ reg_trie_data * const trie
+ = (reg_trie_data*)rex->data->data[ ARG(ST.me) ];
+ SV ** const tmp = RX_DEBUG(reginfo->prog)
+ ? av_fetch( trie->words, ST.accept_buff[ best ].wordnum - 1, 0 )
+ : NULL;
+ PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at node #%d %s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
+ ST.accept_buff[best].wordnum,
+ tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr", REG_NODE_NUM(scan),
+ PL_colors[5] );
+ });
+
+ if ( best<ST.accepted ) {
+ reg_trie_accepted tmp = ST.accept_buff[ best ];
+ ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
+ ST.accept_buff[ ST.accepted ] = tmp;
+ best = ST.accepted;
}
+ PL_reginput = (char *)ST.accept_buff[ best ].endpos;
}
- /* unreached codepoint */
+ PUSH_STATE_GOTO(TRIE_next, ST.B);
+ /* NOTREACHED */
+
+#undef ST
+
case EXACT: {
char *s = STRING(scan);
st->ln = STR_LEN(scan);
if (do_utf8 != UTF) {
/* The target and the pattern have differing utf8ness. */
char *l = locinput;
- const char *e = s + st->ln;
+ const char * const e = s + st->ln;
if (do_utf8) {
/* The target is utf8, the pattern is not utf8. */
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case EXACTF: {
- char *s = STRING(scan);
+ char * const s = STRING(scan);
st->ln = STR_LEN(scan);
if (do_utf8 || UTF) {
/* Either target or the pattern are utf8. */
- char *l = locinput;
+ const char * const l = locinput;
char *e = PL_regeol;
if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
break;
case BACK:
break;
- case EVAL:
+
+#undef ST
+#define ST st->u.eval
+
+ case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X|Y)B/ */
{
SV *ret;
{
* necessary */
MAGIC *mg = NULL;
- SV *sv;
+ const SV *sv;
if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
mg = mg_find(sv, PERL_MAGIC_qr);
else if (SvSMAGICAL(ret)) {
(strlen(re->precomp) > 60 ? "..." : ""))
);
- st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
- REGCP_SET(st->u.eval.lastcp);
+ ST.cp = regcppush(0); /* Save *all* the positions. */
+ REGCP_SET(ST.lastcp);
*PL_reglastparen = 0;
*PL_reglastcloseparen = 0;
PL_reginput = locinput;
PL_reg_maxiter = 0;
st->logical = 0;
- st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
+ ST.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
((re->reganch & ROPT_UTF8) != 0);
- if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8;
- st->u.eval.prev_rex = rex;
+ if (ST.toggleutf) PL_reg_flags ^= RF_utf8;
+ ST.prev_rex = rex;
rex = re;
- /* resume to current state on success */
- st->u.yes.prev_yes_state = yes_state;
- yes_state = st;
- PUSH_STATE(newst, resume_EVAL);
- st = newst;
-
+ ST.B = next;
/* now continue from first node in postoned RE */
- next = re->program + 1;
- break;
+ PUSH_YES_STATE_GOTO(EVAL_A, re->program + 1);
/* NOTREACHED */
}
/* /(?(?{...})X|Y)/ */
st->logical = 0;
break;
}
+
+ case EVAL_A: /* successfully ran inner rex (??{rex}) */
+ if (ST.toggleutf)
+ PL_reg_flags ^= RF_utf8;
+ ReREFCNT_dec(rex);
+ rex = ST.prev_rex;
+ /* XXXX This is too dramatic a measure... */
+ PL_reg_maxiter = 0;
+ /* Restore parens of the caller without popping the
+ * savestack */
+ {
+ const I32 tmp = PL_savestack_ix;
+ PL_savestack_ix = ST.lastcp;
+ regcppop(rex);
+ PL_savestack_ix = tmp;
+ }
+ PL_reginput = locinput;
+ /* continue at the node following the (??{...}) */
+ scan = ST.B;
+ continue;
+
+ case EVAL_A_fail: /* unsuccessfully ran inner rex (??{rex}) */
+ /* Restore state to the outer re then re-throw the failure */
+ if (ST.toggleutf)
+ PL_reg_flags ^= RF_utf8;
+ ReREFCNT_dec(rex);
+ rex = ST.prev_rex;
+
+ /* XXXX This is too dramatic a measure... */
+ PL_reg_maxiter = 0;
+
+ PL_reginput = locinput;
+ REGCP_UNWIND(ST.lastcp);
+ regcppop(rex);
+ sayNO_SILENT;
+
+#undef ST
+
case OPEN:
n = ARG(scan); /* which paren pair */
PL_reg_start_tmp[n] = locinput;
*that* much linear. */
if (!PL_reg_maxiter) {
PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
+ /* possible overflow for long strings and many CURLYX's */
+ if (PL_reg_maxiter < 0)
+ PL_reg_maxiter = I32_MAX;
PL_reg_leftiter = PL_reg_maxiter;
}
if (PL_reg_leftiter-- == 0) {
CACHEsayNO;
}
/* NOTREACHED */
- case BRANCHJ:
+
+#undef ST
+#define ST st->u.branch
+
+ case BRANCHJ: /* /(...|A|...)/ with long next pointer */
next = scan + ARG(scan);
if (next == scan)
next = NULL;
- inner = NEXTOPER(NEXTOPER(scan));
- goto do_branch;
- case BRANCH:
- inner = NEXTOPER(scan);
- do_branch:
- {
- I32 type;
- type = OP(scan);
- if (!next || OP(next) != type) /* No choice. */
- next = inner; /* Avoid recursion. */
- else {
- const I32 lastparen = *PL_reglastparen;
- /* Put unwinding data on stack */
- const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
- re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
-
- uw->prev = st->unwind;
- st->unwind = unwind1;
- uw->type = ((type == BRANCH)
- ? RE_UNWIND_BRANCH
- : RE_UNWIND_BRANCHJ);
- uw->lastparen = lastparen;
- uw->next = next;
- uw->locinput = locinput;
- uw->nextchr = nextchr;
- uw->minmod = st->minmod;
-#ifdef DEBUGGING
- uw->regindent = ++PL_regindent;
-#endif
+ scan = NEXTOPER(scan);
+ /* FALL THROUGH */
- REGCP_SET(uw->lastcp);
+ case BRANCH: /* /(...|A|...)/ */
+ scan = NEXTOPER(scan); /* scan now points to inner node */
+ if (!next || (OP(next) != BRANCH && OP(next) != BRANCHJ))
+ /* last branch; skip state push and jump direct to node */
+ continue;
+ ST.lastparen = *PL_reglastparen;
+ ST.next_branch = next;
+ REGCP_SET(ST.cp);
+ PL_reginput = locinput;
- /* Now go into the first branch */
- next = inner;
- }
- }
- break;
+ /* Now go into the branch */
+ PUSH_STATE_GOTO(BRANCH_next, scan);
+ /* NOTREACHED */
+
+ case BRANCH_next_fail: /* that branch failed; try the next, if any */
+ REGCP_UNWIND(ST.cp);
+ for (n = *PL_reglastparen; n > ST.lastparen; n--)
+ PL_regendp[n] = -1;
+ *PL_reglastparen = n;
+ scan = ST.next_branch;
+ /* no more branches? */
+ if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ))
+ sayNO;
+ continue; /* execute next BRANCH[J] op */
+ /* NOTREACHED */
+
case MINMOD:
st->minmod = 1;
break;
- case CURLYM:
- {
- st->u.curlym.l = st->u.curlym.matches = 0;
-
- /* We suppose that the next guy does not need
- backtracking: in particular, it is of constant non-zero length,
- and has no parenths to influence future backrefs. */
- st->ln = ARG1(scan); /* min to match */
- n = ARG2(scan); /* max to match */
- st->u.curlym.paren = scan->flags;
- if (st->u.curlym.paren) {
- if (st->u.curlym.paren > PL_regsize)
- PL_regsize = st->u.curlym.paren;
- if (st->u.curlym.paren > (I32)*PL_reglastparen)
- *PL_reglastparen = st->u.curlym.paren;
- }
+
+#undef ST
+#define ST st->u.curlym
+
+ case CURLYM: /* /A{m,n}B/ where A is fixed-length */
+
+ /* This is an optimisation of CURLYX that enables us to push
+ * only a single backtracking state, no matter now many matches
+ * there are in {m,n}. It relies on the pattern being constant
+ * length, with no parens to influence future backrefs
+ */
+
+ ST.me = scan;
scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
- if (st->u.curlym.paren)
+
+ /* if paren positive, emulate an OPEN/CLOSE around A */
+ if (ST.me->flags) {
+ I32 paren = ST.me->flags;
+ if (paren > PL_regsize)
+ PL_regsize = paren;
+ if (paren > (I32)*PL_reglastparen)
+ *PL_reglastparen = paren;
scan += NEXT_OFF(scan); /* Skip former OPEN. */
- PL_reginput = locinput;
- st->u.curlym.maxwanted = st->minmod ? st->ln : n;
- while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
- /* resume to current state on success */
- st->u.yes.prev_yes_state = yes_state;
- yes_state = st;
- REGMATCH(scan, CURLYM1);
- yes_state = st->u.yes.prev_yes_state;
- /*** all unsaved local vars undefined at this point */
- if (!result)
- break;
- /* on first match, determine length, u.curlym.l */
- if (!st->u.curlym.matches++) {
- if (PL_reg_match_utf8) {
- char *s = locinput;
- while (s < PL_reginput) {
- st->u.curlym.l++;
- s += UTF8SKIP(s);
- }
- }
- else {
- st->u.curlym.l = PL_reginput - locinput;
- }
- if (st->u.curlym.l == 0) {
- st->u.curlym.matches = st->u.curlym.maxwanted;
- break;
- }
- }
- locinput = PL_reginput;
}
+ ST.A = scan;
+ ST.B = next;
+ ST.alen = 0;
+ ST.count = 0;
+ ST.minmod = st->minmod;
+ st->minmod = 0;
+ ST.c1 = CHRTEST_UNINIT;
+ REGCP_SET(ST.cp);
+
+ if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
+ goto curlym_do_B;
+ curlym_do_A: /* execute the A in /A{m,n}B/ */
PL_reginput = locinput;
- if (st->u.curlym.matches < st->ln) {
- st->minmod = 0;
- sayNO;
- }
+ PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
+ /* NOTREACHED */
+
+ case CURLYM_A: /* we've just matched an A */
+ locinput = st->locinput;
+ nextchr = UCHARAT(locinput);
+ ST.count++;
+ /* after first match, determine A's length: u.curlym.alen */
+ if (ST.count == 1) {
+ if (PL_reg_match_utf8) {
+ char *s = locinput;
+ while (s < PL_reginput) {
+ ST.alen++;
+ s += UTF8SKIP(s);
+ }
+ }
+ else {
+ ST.alen = PL_reginput - locinput;
+ }
+ if (ST.alen == 0)
+ ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
+ }
DEBUG_EXECUTE_r(
PerlIO_printf(Perl_debug_log,
- "%*s matched %"IVdf" times, len=%"IVdf"...\n",
+ "%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
(int)(REPORT_CODE_OFF+PL_regindent*2), "",
- (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
+ (IV) ST.count, (IV)ST.alen)
);
- /* calculate c1 and c1 for possible match of 1st char
- * following curly */
- st->u.curlym.c1 = st->u.curlym.c2 = CHRTEST_VOID;
- if (HAS_TEXT(next) || JUMPABLE(next)) {
- regnode *text_node = next;
- if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
- if (HAS_TEXT(text_node)
- && PL_regkind[(U8)OP(text_node)] != REF)
- {
- st->u.curlym.c1 = (U8)*STRING(text_node);
- st->u.curlym.c2 =
- (OP(text_node) == EXACTF || OP(text_node) == REFF)
- ? PL_fold[st->u.curlym.c1]
- : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
- ? PL_fold_locale[st->u.curlym.c1]
- : st->u.curlym.c1;
- }
- }
+ locinput = PL_reginput;
+ if (ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)))
+ goto curlym_do_A; /* try to match another A */
+ goto curlym_do_B; /* try to match B */
- REGCP_SET(st->u.curlym.lastcp);
+ case CURLYM_A_fail: /* just failed to match an A */
+ REGCP_UNWIND(ST.cp);
+ if (ST.minmod || ST.count < ARG1(ST.me) /* min*/ )
+ sayNO;
- st->u.curlym.minmod = st->minmod;
- st->minmod = 0;
- while (st->u.curlym.matches >= st->ln
- && (st->u.curlym.matches <= n
- /* for REG_INFTY, ln could overflow to negative */
- || (n == REG_INFTY && st->u.curlym.matches >= 0)))
- {
- /* If it could work, try it. */
- if (st->u.curlym.c1 == CHRTEST_VOID ||
- UCHARAT(PL_reginput) == st->u.curlym.c1 ||
- UCHARAT(PL_reginput) == st->u.curlym.c2)
- {
- DEBUG_EXECUTE_r(
- PerlIO_printf(Perl_debug_log,
- "%*s trying tail with matches=%"IVdf"...\n",
- (int)(REPORT_CODE_OFF+PL_regindent*2),
- "", (IV)st->u.curlym.matches)
- );
- if (st->u.curlym.paren) {
- if (st->u.curlym.matches) {
- PL_regstartp[st->u.curlym.paren]
- = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
- PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
- }
- else
- PL_regendp[st->u.curlym.paren] = -1;
- }
- /* resume to current state on success */
- st->u.yes.prev_yes_state = yes_state;
- yes_state = st;
- REGMATCH(next, CURLYM2);
- yes_state = st->u.yes.prev_yes_state;
- /*** all unsaved local vars undefined at this point */
- if (result)
- /* XXX tmp sayYES; */
- sayYES_FINAL;
- REGCP_UNWIND(st->u.curlym.lastcp);
- }
- /* Couldn't or didn't -- move forward/backward. */
- if (st->u.curlym.minmod) {
- PL_reginput = locinput;
- /* resume to current state on success */
- st->u.yes.prev_yes_state = yes_state;
- yes_state = st;
- REGMATCH(scan, CURLYM3);
- yes_state = st->u.yes.prev_yes_state;
- /*** all unsaved local vars undefined at this point */
- if (result) {
- st->u.curlym.matches++;
- locinput = PL_reginput;
+ curlym_do_B: /* execute the B in /A{m,n}B/ */
+ PL_reginput = locinput;
+ if (ST.c1 == CHRTEST_UNINIT) {
+ /* calculate c1 and c2 for possible match of 1st char
+ * following curly */
+ ST.c1 = ST.c2 = CHRTEST_VOID;
+ if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
+ regnode *text_node = ST.B;
+ if (! HAS_TEXT(text_node))
+ FIND_NEXT_IMPT(text_node);
+ if (HAS_TEXT(text_node)
+ && PL_regkind[OP(text_node)] != REF)
+ {
+ ST.c1 = (U8)*STRING(text_node);
+ ST.c2 =
+ (OP(text_node) == EXACTF || OP(text_node) == REFF)
+ ? PL_fold[ST.c1]
+ : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
+ ? PL_fold_locale[ST.c1]
+ : ST.c1;
}
- else
- sayNO;
}
- else {
- st->u.curlym.matches--;
- locinput = HOPc(locinput, -st->u.curlym.l);
- PL_reginput = locinput;
+ }
+
+ DEBUG_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s CURLYM trying tail with matches=%"IVdf"...\n",
+ (int)(REPORT_CODE_OFF+PL_regindent*2),
+ "", (IV)ST.count)
+ );
+ if (ST.c1 != CHRTEST_VOID
+ && UCHARAT(PL_reginput) != ST.c1
+ && UCHARAT(PL_reginput) != ST.c2)
+ {
+ /* simulate B failing */
+ state_num = CURLYM_B_fail;
+ goto reenter_switch;
+ }
+
+ if (ST.me->flags) {
+ /* mark current A as captured */
+ I32 paren = ST.me->flags;
+ if (ST.count) {
+ PL_regstartp[paren]
+ = HOPc(PL_reginput, -ST.alen) - PL_bostr;
+ PL_regendp[paren] = PL_reginput - PL_bostr;
}
+ else
+ PL_regendp[paren] = -1;
}
- sayNO;
+ PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
/* NOTREACHED */
- break;
- }
- case CURLYN:
- st->u.plus.paren = scan->flags; /* Which paren to set */
- if (st->u.plus.paren > PL_regsize)
- PL_regsize = st->u.plus.paren;
- if (st->u.plus.paren > (I32)*PL_reglastparen)
- *PL_reglastparen = st->u.plus.paren;
- st->ln = ARG1(scan); /* min to match */
- n = ARG2(scan); /* max to match */
- scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
- goto repeat;
- case CURLY:
- st->u.plus.paren = 0;
- st->ln = ARG1(scan); /* min to match */
- n = ARG2(scan); /* max to match */
- scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
- goto repeat;
- case STAR:
- st->ln = 0;
- n = REG_INFTY;
+
+ case CURLYM_B_fail: /* just failed to match a B */
+ REGCP_UNWIND(ST.cp);
+ if (ST.minmod) {
+ if (ST.count == ARG2(ST.me) /* max */)
+ sayNO;
+ goto curlym_do_A; /* try to match a further A */
+ }
+ /* backtrack one A */
+ if (ST.count == ARG1(ST.me) /* min */)
+ sayNO;
+ ST.count--;
+ locinput = HOPc(locinput, -ST.alen);
+ goto curlym_do_B; /* try to match B */
+
+#undef ST
+#define ST st->u.curly
+
+#define CURLY_SETPAREN(paren, success) \
+ if (paren) { \
+ if (success) { \
+ PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr; \
+ PL_regendp[paren] = locinput - PL_bostr; \
+ } \
+ else \
+ PL_regendp[paren] = -1; \
+ }
+
+ case STAR: /* /A*B/ where A is width 1 */
+ ST.paren = 0;
+ ST.min = 0;
+ ST.max = REG_INFTY;
scan = NEXTOPER(scan);
- st->u.plus.paren = 0;
goto repeat;
- case PLUS:
- st->ln = 1;
- n = REG_INFTY;
+ case PLUS: /* /A+B/ where A is width 1 */
+ ST.paren = 0;
+ ST.min = 1;
+ ST.max = REG_INFTY;
scan = NEXTOPER(scan);
- st->u.plus.paren = 0;
+ goto repeat;
+ case CURLYN: /* /(A){m,n}B/ where A is width 1 */
+ ST.paren = scan->flags; /* Which paren to set */
+ if (ST.paren > PL_regsize)
+ PL_regsize = ST.paren;
+ if (ST.paren > (I32)*PL_reglastparen)
+ *PL_reglastparen = ST.paren;
+ ST.min = ARG1(scan); /* min to match */
+ ST.max = ARG2(scan); /* max to match */
+ scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
+ goto repeat;
+ case CURLY: /* /A{m,n}B/ where A is width 1 */
+ ST.paren = 0;
+ ST.min = ARG1(scan); /* min to match */
+ ST.max = ARG2(scan); /* max to match */
+ scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
repeat:
/*
* Lookahead to avoid useless match attempts
* when we know what character comes next.
- */
-
- /*
+ *
* Used to only do .*x and .*?x, but now it allows
* for )'s, ('s and (?{ ... })'s to be in the way
* of the quantifier and the EXACT-like node. -- japhy
*/
+ if (ST.min > ST.max) /* XXX make this a compile-time check? */
+ sayNO;
if (HAS_TEXT(next) || JUMPABLE(next)) {
U8 *s;
regnode *text_node = next;
- if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
+ if (! HAS_TEXT(text_node))
+ FIND_NEXT_IMPT(text_node);
if (! HAS_TEXT(text_node))
- st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
+ ST.c1 = ST.c2 = CHRTEST_VOID;
else {
- if (PL_regkind[(U8)OP(text_node)] == REF) {
- st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
+ if (PL_regkind[OP(text_node)] == REF) {
+ ST.c1 = ST.c2 = CHRTEST_VOID;
goto assume_ok_easy;
}
- else { s = (U8*)STRING(text_node); }
+ else
+ s = (U8*)STRING(text_node);
if (!UTF) {
- st->u.plus.c2 = st->u.plus.c1 = *s;
+ ST.c2 = ST.c1 = *s;
if (OP(text_node) == EXACTF || OP(text_node) == REFF)
- st->u.plus.c2 = PL_fold[st->u.plus.c1];
+ ST.c2 = PL_fold[ST.c1];
else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
- st->u.plus.c2 = PL_fold_locale[st->u.plus.c1];
+ ST.c2 = PL_fold_locale[ST.c1];
}
else { /* UTF */
if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
- st->u.plus.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
+ ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
uniflags);
- st->u.plus.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
+ ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
uniflags);
}
else {
- st->u.plus.c2 = st->u.plus.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
+ ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
uniflags);
}
}
}
}
else
- st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
+ ST.c1 = ST.c2 = CHRTEST_VOID;
assume_ok_easy:
+
+ ST.A = scan;
+ ST.B = next;
PL_reginput = locinput;
if (st->minmod) {
st->minmod = 0;
- if (st->ln && regrepeat(rex, scan, st->ln) < st->ln)
+ if (ST.min && regrepeat(rex, ST.A, ST.min) < ST.min)
sayNO;
+ ST.count = ST.min;
locinput = PL_reginput;
- REGCP_SET(st->u.plus.lastcp);
- if (st->u.plus.c1 != CHRTEST_VOID) {
- st->u.plus.old = locinput;
- st->u.plus.count = 0;
-
- if (n == REG_INFTY) {
- st->u.plus.e = PL_regeol - 1;
- if (do_utf8)
- while (UTF8_IS_CONTINUATION(*(U8*)st->u.plus.e))
- st->u.plus.e--;
- }
- else if (do_utf8) {
- int m = n - st->ln;
- for (st->u.plus.e = locinput;
- m >0 && st->u.plus.e + UTF8SKIP(st->u.plus.e) <= PL_regeol; m--)
- st->u.plus.e += UTF8SKIP(st->u.plus.e);
+ REGCP_SET(ST.cp);
+ if (ST.c1 == CHRTEST_VOID)
+ goto curly_try_B_min;
+
+ ST.oldloc = locinput;
+
+ /* set ST.maxpos to the furthest point along the
+ * string that could possibly match */
+ if (ST.max == REG_INFTY) {
+ ST.maxpos = PL_regeol - 1;
+ if (do_utf8)
+ while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
+ ST.maxpos--;
+ }
+ else if (do_utf8) {
+ int m = ST.max - ST.min;
+ for (ST.maxpos = locinput;
+ m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
+ ST.maxpos += UTF8SKIP(ST.maxpos);
+ }
+ else {
+ ST.maxpos = locinput + ST.max - ST.min;
+ if (ST.maxpos >= PL_regeol)
+ ST.maxpos = PL_regeol - 1;
+ }
+ goto curly_try_B_min_known;
+
+ }
+ else {
+ ST.count = regrepeat(rex, ST.A, ST.max);
+ locinput = PL_reginput;
+ if (ST.count < ST.min)
+ sayNO;
+ if ((ST.count > ST.min)
+ && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
+ {
+ /* A{m,n} must come at the end of the string, there's
+ * no point in backing off ... */
+ ST.min = ST.count;
+ /* ...except that $ and \Z can match before *and* after
+ newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
+ We may back off by one in this case. */
+ if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
+ ST.min--;
+ }
+ REGCP_SET(ST.cp);
+ goto curly_try_B_max;
+ }
+ /* NOTREACHED */
+
+
+ case CURLY_B_min_known_fail:
+ /* failed to find B in a non-greedy match where c1,c2 valid */
+ if (ST.paren && ST.count)
+ PL_regendp[ST.paren] = -1;
+
+ PL_reginput = locinput; /* Could be reset... */
+ REGCP_UNWIND(ST.cp);
+ /* Couldn't or didn't -- move forward. */
+ ST.oldloc = locinput;
+ if (do_utf8)
+ locinput += UTF8SKIP(locinput);
+ else
+ locinput++;
+ ST.count++;
+ curly_try_B_min_known:
+ /* find the next place where 'B' could work, then call B */
+ {
+ int n;
+ if (do_utf8) {
+ n = (ST.oldloc == locinput) ? 0 : 1;
+ if (ST.c1 == ST.c2) {
+ STRLEN len;
+ /* set n to utf8_distance(oldloc, locinput) */
+ while (locinput <= ST.maxpos &&
+ utf8n_to_uvchr((U8*)locinput,
+ UTF8_MAXBYTES, &len,
+ uniflags) != (UV)ST.c1) {
+ locinput += len;
+ n++;
+ }
}
else {
- st->u.plus.e = locinput + n - st->ln;
- if (st->u.plus.e >= PL_regeol)
- st->u.plus.e = PL_regeol - 1;
- }
- while (1) {
- /* Find place 'next' could work */
- if (!do_utf8) {
- if (st->u.plus.c1 == st->u.plus.c2) {
- while (locinput <= st->u.plus.e &&
- UCHARAT(locinput) != st->u.plus.c1)
- locinput++;
- } else {
- while (locinput <= st->u.plus.e
- && UCHARAT(locinput) != st->u.plus.c1
- && UCHARAT(locinput) != st->u.plus.c2)
- locinput++;
- }
- st->u.plus.count = locinput - st->u.plus.old;
- }
- else {
- if (st->u.plus.c1 == st->u.plus.c2) {
- STRLEN len;
- /* count initialised to
- * utf8_distance(old, locinput) */
- while (locinput <= st->u.plus.e &&
- utf8n_to_uvchr((U8*)locinput,
- UTF8_MAXBYTES, &len,
- uniflags) != (UV)st->u.plus.c1) {
- locinput += len;
- st->u.plus.count++;
- }
- } else {
- /* count initialised to
- * utf8_distance(old, locinput) */
- while (locinput <= st->u.plus.e) {
- STRLEN len;
- const UV c = utf8n_to_uvchr((U8*)locinput,
- UTF8_MAXBYTES, &len,
- uniflags);
- if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
- break;
- locinput += len;
- st->u.plus.count++;
- }
- }
- }
- if (locinput > st->u.plus.e)
- sayNO;
- /* PL_reginput == old now */
- if (locinput != st->u.plus.old) {
- st->ln = 1; /* Did some */
- if (regrepeat(rex, scan, st->u.plus.count) < st->u.plus.count)
- sayNO;
+ /* set n to utf8_distance(oldloc, locinput) */
+ while (locinput <= ST.maxpos) {
+ STRLEN len;
+ const UV c = utf8n_to_uvchr((U8*)locinput,
+ UTF8_MAXBYTES, &len,
+ uniflags);
+ if (c == (UV)ST.c1 || c == (UV)ST.c2)
+ break;
+ locinput += len;
+ n++;
}
- /* PL_reginput == locinput now */
- TRYPAREN(st->u.plus.paren, st->ln, locinput, PLUS1);
- /*** all unsaved local vars undefined at this point */
- PL_reginput = locinput; /* Could be reset... */
- REGCP_UNWIND(st->u.plus.lastcp);
- /* Couldn't or didn't -- move forward. */
- st->u.plus.old = locinput;
- if (do_utf8)
- locinput += UTF8SKIP(locinput);
- else
- locinput++;
- st->u.plus.count = 1;
}
}
- else
- while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
- UV c;
- if (st->u.plus.c1 != CHRTEST_VOID) {
- if (do_utf8)
- c = utf8n_to_uvchr((U8*)PL_reginput,
- UTF8_MAXBYTES, 0,
- uniflags);
- else
- c = UCHARAT(PL_reginput);
- /* If it could work, try it. */
- if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
- {
- TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS2);
- /*** all unsaved local vars undefined at this point */
- REGCP_UNWIND(st->u.plus.lastcp);
- }
- }
- /* If it could work, try it. */
- else if (st->u.plus.c1 == CHRTEST_VOID)
- {
- TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3);
- /*** all unsaved local vars undefined at this point */
- REGCP_UNWIND(st->u.plus.lastcp);
+ else {
+ if (ST.c1 == ST.c2) {
+ while (locinput <= ST.maxpos &&
+ UCHARAT(locinput) != ST.c1)
+ locinput++;
}
- /* Couldn't or didn't -- move forward. */
- PL_reginput = locinput;
- if (regrepeat(rex, scan, 1)) {
- st->ln++;
- locinput = PL_reginput;
+ else {
+ while (locinput <= ST.maxpos
+ && UCHARAT(locinput) != ST.c1
+ && UCHARAT(locinput) != ST.c2)
+ locinput++;
}
- else
+ n = locinput - ST.oldloc;
+ }
+ if (locinput > ST.maxpos)
+ sayNO;
+ /* PL_reginput == oldloc now */
+ if (n) {
+ ST.count += n;
+ if (regrepeat(rex, ST.A, n) < n)
sayNO;
}
+ PL_reginput = locinput;
+ CURLY_SETPAREN(ST.paren, ST.count);
+ PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
}
- else {
- n = regrepeat(rex, scan, n);
+ /* NOTREACHED */
+
+
+ case CURLY_B_min_fail:
+ /* failed to find B in a non-greedy match where c1,c2 invalid */
+ if (ST.paren && ST.count)
+ PL_regendp[ST.paren] = -1;
+
+ REGCP_UNWIND(ST.cp);
+ /* failed -- move forward one */
+ PL_reginput = locinput;
+ if (regrepeat(rex, ST.A, 1)) {
+ ST.count++;
locinput = PL_reginput;
- if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL &&
- (OP(next) != MEOL ||
- OP(next) == SEOL || OP(next) == EOS))
+ if (ST.count <= ST.max || (ST.max == REG_INFTY &&
+ ST.count > 0)) /* count overflow ? */
{
- st->ln = n; /* why back off? */
- /* ...because $ and \Z can match before *and* after
- newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
- We should back off by one in this case. */
- if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
- st->ln--;
- }
- REGCP_SET(st->u.plus.lastcp);
- {
- UV c = 0;
- while (n >= st->ln) {
- if (st->u.plus.c1 != CHRTEST_VOID) {
- if (do_utf8)
- c = utf8n_to_uvchr((U8*)PL_reginput,
- UTF8_MAXBYTES, 0,
- uniflags);
- else
- c = UCHARAT(PL_reginput);
- }
- /* If it could work, try it. */
- if (st->u.plus.c1 == CHRTEST_VOID || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
- {
- TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4);
- /*** all unsaved local vars undefined at this point */
- REGCP_UNWIND(st->u.plus.lastcp);
- }
- /* Couldn't or didn't -- back up. */
- n--;
- PL_reginput = locinput = HOPc(locinput, -1);
- }
+ curly_try_B_min:
+ CURLY_SETPAREN(ST.paren, ST.count);
+ PUSH_STATE_GOTO(CURLY_B_min, ST.B);
}
}
sayNO;
- break;
+ /* NOTREACHED */
+
+
+ curly_try_B_max:
+ /* a successful greedy match: now try to match B */
+ {
+ UV c = 0;
+ if (ST.c1 != CHRTEST_VOID)
+ c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
+ UTF8_MAXBYTES, 0, uniflags)
+ : (UV) UCHARAT(PL_reginput);
+ /* If it could work, try it. */
+ if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
+ CURLY_SETPAREN(ST.paren, ST.count);
+ PUSH_STATE_GOTO(CURLY_B_max, ST.B);
+ /* NOTREACHED */
+ }
+ }
+ /* FALL THROUGH */
+ case CURLY_B_max_fail:
+ /* failed to find B in a greedy match */
+ if (ST.paren && ST.count)
+ PL_regendp[ST.paren] = -1;
+
+ REGCP_UNWIND(ST.cp);
+ /* back up. */
+ if (--ST.count < ST.min)
+ sayNO;
+ PL_reginput = locinput = HOPc(locinput, -1);
+ goto curly_try_B_max;
+
+#undef ST
+
+
case END:
if (locinput < reginfo->till) {
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
PL_reginput = locinput; /* put where regtry can find it */
sayYES_FINAL; /* Success! */
- case SUSPEND: /* (?>FOO) */
- st->u.ifmatch.wanted = 1;
+#undef ST
+#define ST st->u.ifmatch
+
+ case SUSPEND: /* (?>A) */
+ ST.wanted = 1;
PL_reginput = locinput;
goto do_ifmatch;
- case UNLESSM: /* -ve lookaround: (?!FOO), or with flags, (?<!foo) */
- st->u.ifmatch.wanted = 0;
+ case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
+ ST.wanted = 0;
goto ifmatch_trivial_fail_test;
- case IFMATCH: /* +ve lookaround: (?=FOO), or with flags, (?<=foo) */
- st->u.ifmatch.wanted = 1;
+ case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
+ ST.wanted = 1;
ifmatch_trivial_fail_test:
if (scan->flags) {
char * const s = HOPBACKc(locinput, scan->flags);
/* trivial fail */
if (st->logical) {
st->logical = 0;
- st->sw = 1 - st->u.ifmatch.wanted;
+ st->sw = 1 - (bool)ST.wanted;
}
- else if (st->u.ifmatch.wanted)
+ else if (ST.wanted)
sayNO;
next = scan + ARG(scan);
if (next == scan)
PL_reginput = locinput;
do_ifmatch:
- /* resume to current state on success */
- st->u.yes.prev_yes_state = yes_state;
- yes_state = st;
- PUSH_STATE(newst, resume_IFMATCH);
- st = newst;
- next = NEXTOPER(NEXTOPER(scan));
- break;
+ ST.me = scan;
+ /* execute body of (?...A) */
+ PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
+ /* NOTREACHED */
+
+ case IFMATCH_A_fail: /* body of (?...A) failed */
+ ST.wanted = !ST.wanted;
+ /* FALL THROUGH */
+
+ case IFMATCH_A: /* body of (?...A) succeeded */
+ if (st->logical) {
+ st->logical = 0;
+ st->sw = (bool)ST.wanted;
+ }
+ else if (!ST.wanted)
+ sayNO;
+
+ if (OP(ST.me) == SUSPEND)
+ locinput = PL_reginput;
+ else {
+ locinput = PL_reginput = st->locinput;
+ nextchr = UCHARAT(locinput);
+ }
+ scan = ST.me + ARG(ST.me);
+ if (scan == ST.me)
+ scan = NULL;
+ continue; /* execute B */
+
+#undef ST
case LONGJMP:
next = scan + ARG(scan);
Perl_croak(aTHX_ "regexp memory corruption");
}
- reenter:
scan = next;
continue;
/* NOTREACHED */
+ push_yes_state:
+ /* push a state that backtracks on success */
+ st->u.yes.prev_yes_state = yes_state;
+ yes_state = st;
+ /* FALL THROUGH */
+ push_state:
+ /* push a new regex state, then continue at scan */
+ {
+ regmatch_state *newst;
+
+ depth++;
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
+ "PUSH STATE(%d)\n", depth));
+ st->locinput = locinput;
+ newst = st+1;
+ if (newst > SLAB_LAST(PL_regmatch_slab))
+ newst = S_push_slab(aTHX);
+ PL_regmatch_state = newst;
+ newst->cc = st->cc;
+ /* XXX probably don't need to initialise these */
+ newst->minmod = 0;
+ newst->sw = 0;
+ newst->logical = 0;
+
+ locinput = PL_reginput;
+ nextchr = UCHARAT(locinput);
+ st = newst;
+ continue;
+ /* NOTREACHED */
+ }
+
/* simulate recursively calling regmatch(), but without actually
* recursing - ie save the current state on the heap rather than on
* the stack, then re-enter the loop. This avoids complex regexes
regmatch_state *oldst = st;
depth++;
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH RECURSE STATE(%d)\n", depth));
/* grab the next free state slot */
st++;
st->minmod = 0;
st->sw = 0;
st->logical = 0;
- st->unwind = 0;
#ifdef DEBUGGING
PL_regindent++;
#endif
/* we have successfully completed a subexpression, but we must now
* pop to the state marked by yes_state and continue from there */
- /*XXX tmp for CURLYM*/
- regmatch_slab * const oslab = PL_regmatch_slab;
- regmatch_state * const ost = st;
- regmatch_state * const oys = yes_state;
- int odepth = depth;
-
assert(st != yes_state);
while (yes_state < SLAB_FIRST(PL_regmatch_slab)
|| yes_state > SLAB_LAST(PL_regmatch_slab))
st = SLAB_LAST(PL_regmatch_slab);
}
depth -= (st - yes_state);
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE TO (%d)\n", depth));
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATES (%d..%d)\n",
+ depth+1, depth+(st - yes_state)));
st = yes_state;
yes_state = st->u.yes.prev_yes_state;
PL_regmatch_state = st;
switch (st->resume_state) {
- case resume_EVAL:
- if (st->u.eval.toggleutf)
- PL_reg_flags ^= RF_utf8;
- ReREFCNT_dec(rex);
- rex = st->u.eval.prev_rex;
- /* XXXX This is too dramatic a measure... */
- PL_reg_maxiter = 0;
- /* Restore parens of the caller without popping the
- * savestack */
- {
- const I32 tmp = PL_savestack_ix;
- PL_savestack_ix = st->u.eval.lastcp;
- regcppop(rex);
- PL_savestack_ix = tmp;
- }
- PL_reginput = locinput;
- /* continue at the node following the (??{...}) */
- next = st->next;
- goto reenter;
-
- case resume_IFMATCH:
- if (st->logical) {
- st->logical = 0;
- st->sw = st->u.ifmatch.wanted;
- }
- else if (!st->u.ifmatch.wanted)
- sayNO;
-
- if (OP(st->scan) == SUSPEND)
- locinput = PL_reginput;
- else {
- locinput = PL_reginput = st->locinput;
- nextchr = UCHARAT(locinput);
- }
- next = st->scan + ARG(st->scan);
- if (next == st->scan)
- next = NULL;
- goto reenter;
-
- /* XXX tmp don't handle yes_state yet */
- case resume_CURLYM1:
- case resume_CURLYM2:
- case resume_CURLYM3:
- PL_regmatch_slab =oslab;
- st = ost;
- PL_regmatch_state = st;
- depth = odepth;
- yes_state = oys;
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "XXX revering a CURLYM\n"));
- goto yes;
-
+ case IFMATCH_A:
+ case CURLYM_A:
+ case EVAL_A:
+ state_num = st->resume_state;
+ goto reenter_switch;
+
+ case CURLYM_B:
+ case BRANCH_next:
+ case TRIE_next:
+ case CURLY_B_max:
default:
- Perl_croak(aTHX_ "unexpected yes reume state");
+ Perl_croak(aTHX_ "unexpected yes resume state");
}
}
result = 1;
/* XXX this is duplicate(ish) code to that in the do_no section.
- * eventually a yes should just pop the stack back to the current
- * yes_state */
+ * will disappear when REGFMATCH goes */
if (depth) {
/* restore previous state and re-enter */
- POP_STATE;
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth));
+ depth--;
+ st--;
+ if (st < SLAB_FIRST(PL_regmatch_slab)) {
+ PL_regmatch_slab = PL_regmatch_slab->prev;
+ st = SLAB_LAST(PL_regmatch_slab);
+ }
+ PL_regmatch_state = st;
+ scan = st->scan;
+ next = st->next;
+ n = st->n;
+ locinput= st->locinput;
+ nextchr = UCHARAT(locinput);
switch (st->resume_state) {
- case resume_TRIE1:
- goto resume_point_TRIE1;
- case resume_TRIE2:
- goto resume_point_TRIE2;
case resume_CURLYX:
goto resume_point_CURLYX;
case resume_WHILEM1:
goto resume_point_WHILEM5;
case resume_WHILEM6:
goto resume_point_WHILEM6;
- case resume_CURLYM1:
- goto resume_point_CURLYM1;
- case resume_CURLYM2:
- goto resume_point_CURLYM2;
- case resume_CURLYM3:
- goto resume_point_CURLYM3;
- case resume_PLUS1:
- goto resume_point_PLUS1;
- case resume_PLUS2:
- goto resume_point_PLUS2;
- case resume_PLUS3:
- goto resume_point_PLUS3;
- case resume_PLUS4:
- goto resume_point_PLUS4;
-
- case resume_IFMATCH:
- case resume_EVAL:
+
+ case TRIE_next:
+ case CURLYM_A:
+ case CURLYM_B:
+ case EVAL_A:
+ case IFMATCH_A:
+ case BRANCH_next:
+ case CURLY_B_max:
+ case CURLY_B_min:
+ case CURLY_B_min_known:
+ break;
+
default:
Perl_croak(aTHX_ "regexp resume memory corruption");
}
"%*s %sfailed...%s\n",
REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
);
- goto do_no;
no_final:
do_no:
- if (st->unwind) {
- re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
-
- switch (uw->type) {
- case RE_UNWIND_BRANCH:
- case RE_UNWIND_BRANCHJ:
- {
- re_unwind_branch_t * const uwb = &(uw->branch);
- const I32 lastparen = uwb->lastparen;
-
- REGCP_UNWIND(uwb->lastcp);
- for (n = *PL_reglastparen; n > lastparen; n--)
- PL_regendp[n] = -1;
- *PL_reglastparen = n;
- scan = next = uwb->next;
- st->minmod = uwb->minmod;
- if ( !scan ||
- OP(scan) != (uwb->type == RE_UNWIND_BRANCH
- ? BRANCH : BRANCHJ) ) { /* Failure */
- st->unwind = uwb->prev;
-#ifdef DEBUGGING
- PL_regindent--;
-#endif
- goto do_no;
- }
- /* Have more choice yet. Reuse the same uwb. */
- if ((n = (uwb->type == RE_UNWIND_BRANCH
- ? NEXT_OFF(next) : ARG(next))))
- next += n;
- else
- next = NULL; /* XXXX Needn't unwinding in this case... */
- uwb->next = next;
- next = NEXTOPER(scan);
- if (uwb->type == RE_UNWIND_BRANCHJ)
- next = NEXTOPER(next);
- locinput = uwb->locinput;
- nextchr = uwb->nextchr;
-#ifdef DEBUGGING
- PL_regindent = uwb->regindent;
-#endif
-
- goto reenter;
- }
- /* NOTREACHED */
- default:
- Perl_croak(aTHX_ "regexp unwind memory corruption");
- }
- /* NOTREACHED */
- }
#ifdef DEBUGGING
PL_regindent--;
if (depth) {
/* there's a previous state to backtrack to */
- POP_STATE;
- switch (st->resume_state) {
- case resume_TRIE1:
- goto resume_point_TRIE1;
- case resume_TRIE2:
- goto resume_point_TRIE2;
- case resume_EVAL:
- /* we have failed an (??{...}). Restore state to the outer re
- * then re-throw the failure */
- if (st->u.eval.toggleutf)
- PL_reg_flags ^= RF_utf8;
- ReREFCNT_dec(rex);
- rex = st->u.eval.prev_rex;
- yes_state = st->u.yes.prev_yes_state;
-
- /* XXXX This is too dramatic a measure... */
- PL_reg_maxiter = 0;
-
- PL_reginput = locinput;
- REGCP_UNWIND(st->u.eval.lastcp);
- regcppop(rex);
- goto do_no;
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth));
+ depth--;
+ st--;
+ if (st < SLAB_FIRST(PL_regmatch_slab)) {
+ PL_regmatch_slab = PL_regmatch_slab->prev;
+ st = SLAB_LAST(PL_regmatch_slab);
+ }
+ PL_regmatch_state = st;
+ scan = st->scan;
+ next = st->next;
+ n = st->n;
+ locinput= st->locinput;
+ nextchr = UCHARAT(locinput);
+ switch (st->resume_state) {
case resume_CURLYX:
goto resume_point_CURLYX;
case resume_WHILEM1:
goto resume_point_WHILEM5;
case resume_WHILEM6:
goto resume_point_WHILEM6;
- case resume_CURLYM1:
- goto resume_point_CURLYM1;
- case resume_CURLYM2:
- goto resume_point_CURLYM2;
- case resume_CURLYM3:
- goto resume_point_CURLYM3;
- case resume_IFMATCH:
- yes_state = st->u.yes.prev_yes_state;
- if (st->logical) {
- st->logical = 0;
- st->sw = !st->u.ifmatch.wanted;
- }
- else if (st->u.ifmatch.wanted)
- sayNO;
- assert(OP(scan) != SUSPEND); /* XXX DAPM tmp */
- locinput = PL_reginput = st->locinput;
- nextchr = UCHARAT(locinput);
- next = scan + ARG(scan);
- if (next == scan)
- next = NULL;
- goto reenter;
-
- case resume_PLUS1:
- goto resume_point_PLUS1;
- case resume_PLUS2:
- goto resume_point_PLUS2;
- case resume_PLUS3:
- goto resume_point_PLUS3;
- case resume_PLUS4:
- goto resume_point_PLUS4;
+ case TRIE_next:
+ case EVAL_A:
+ case BRANCH_next:
+ case CURLYM_A:
+ case CURLYM_B:
+ case IFMATCH_A:
+ case CURLY_B_max:
+ case CURLY_B_min:
+ case CURLY_B_min_known:
+ if (yes_state == st)
+ yes_state = st->u.yes.prev_yes_state;
+ state_num = st->resume_state + 1; /* failure = success + 1 */
+ goto reenter_switch;
+
default:
Perl_croak(aTHX_ "regexp resume memory corruption");
}
PL_reginput = scan;
DEBUG_r({
- SV *re_debug_flags = NULL;
- SV * const prop = sv_newmortal();
- GET_RE_DEBUG_FLAGS;
- DEBUG_EXECUTE_r({
- regprop(prog, prop, p);
- PerlIO_printf(Perl_debug_log,
- "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
- REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
- });
+ GET_RE_DEBUG_FLAGS_DECL;
+ DEBUG_EXECUTE_r({
+ SV * const prop = sv_newmortal();
+ regprop(prog, prop, p);
+ PerlIO_printf(Perl_debug_log,
+ "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
+ REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
});
+ });
return(c);
}
-#ifndef PERL_IN_XSUB_RE
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
/*
- regclass_swash - prepare the utf8 swash
*/
SV *sw = NULL;
SV *si = NULL;
SV *alt = NULL;
- const struct reg_data *data = prog ? prog->data : NULL;
+ const struct reg_data * const data = prog ? prog->data : NULL;
if (data && data->count) {
const U32 n = ARG(node);
}
STATIC U8 *
-S_reghop3(U8 *s, I32 off, U8* lim)
+S_reghop3(U8 *s, I32 off, const U8* lim)
{
dVAR;
if (off >= 0) {
}
STATIC U8 *
-S_reghopmaybe3(U8* s, I32 off, U8* lim)
+S_reghopmaybe3(U8* s, I32 off, const U8* lim)
{
dVAR;
if (off >= 0) {
s += UTF8SKIP(s);
}
if (off >= 0)
- return 0;
+ return NULL;
}
else {
while (off++) {
break;
}
if (off <= 0)
- return 0;
+ return NULL;
}
return s;
}