#endif
#if defined(PERL_IN_REGEXEC_C) || defined(PERL_DECL_PROT)
-ERs |I32 |regmatch |NN const regmatch_info *reginfo|NN regnode *prog
+ERs |I32 |regmatch |NN regmatch_info *reginfo|NN regnode *prog
ERs |I32 |regrepeat |NN const regexp *prog|NN const regnode *p|I32 max
-ERs |I32 |regtry |NN const regmatch_info *reginfo|NN char *startpos
+ERs |I32 |regtry |NN regmatch_info *reginfo|NN char **startpos
ERs |bool |reginclass |NULLOK const regexp *prog|NN const regnode *n|NN const U8 *p|NULLOK STRLEN *lenp\
|bool do_utf8sv_is_utf8
Es |CHECKPOINT|regcppush |I32 parenfloor
ERsn |U8* |reghop4 |NN U8 *pos|I32 off|NN const U8 *llim|NN const U8 *rlim
#endif
ERsn |U8* |reghopmaybe3 |NN U8 *pos|I32 off|NN const U8 *lim
-ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c|NN char *s|NN const char *strend|NULLOK const regmatch_info *reginfo
+ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c|NN char *s|NN const char *strend|NULLOK regmatch_info *reginfo
Es |void |to_utf8_substr |NN regexp * prog
Es |void |to_byte_substr |NN regexp * prog
ERs |I32 |reg_check_named_buff_matched |NN const regexp *rex|NN const regnode *prog
Enables output related to the optimisation phase of compilation.
-=item TRIE_COMPILE
+=item TRIEC
Detailed info about trie compilation.
Dump the final program out after it is compiled and optimised.
-=item OFFSETS
-
-Dump offset information. This can be used to see how regops correlate
-to the pattern. Output format is
-
- NODENUM:POSITION[LENGTH]
-
-Where 1 is the position of the first char in the string. Note that position
-can be 0, or larger than the actual length of the pattern, likewise length
-can be zero.
=back
Turns on debugging of the main matching loop.
-=item TRIE_EXECUTE
+=item TRIEE
Extra debugging of how tries execute.
Turns on all "extra" debugging options.
-=item TRIE_MORE
+=item TRIEM
+
+Enable enhanced TRIE debugging. Enhances both TRIEE
+and TRIEC.
+
+=item STATE
+
+Enable debugging of states in the engine.
+
+=item STACK
-Enable enhanced TRIE debugging. Enhances both TRIE_EXECUTE
-and TRIE_COMPILE.
+Enable debugging of the recursion stack in the engine. Enabling
+or disabling this option automatically does the same for debugging
+states as well. This output from this can be quite large.
+
+=item OPTIMISEM
+
+Enable enhanced optimisation debugging and start point optimisations.
+Probably not useful except when debugging the regex engine itself.
+
+=item OFFSETS
+
+Dump offset information. This can be used to see how regops correlate
+to the pattern. Output format is
+
+ NODENUM:POSITION[LENGTH]
+
+Where 1 is the position of the first char in the string. Note that position
+can be 0, or larger than the actual length of the pattern, likewise length
+can be zero.
-=item OFFSETS_DEBUG
+=item OFFSETSDBG
Enable debugging of offsets information. This emits copious
amounts of trace information and doesn't mesh well with other
=item More
-Enable TRIE_MORE and all execute compile and execute options.
+Enable TRIEM and all execute compile and execute options.
=back
OFFSETSDBG => 0x040000,
STATE => 0x080000,
OPTIMISEM => 0x100000,
+ STACK => 0x280000,
);
$flags{ALL} = -1;
$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
the '+' is used. Thus C<?+>, C<*+>, C<++>, C<{min,max}+> are now legal
quantifiers. (Yves Orton)
+=item Backtracking control verbs
+
+The regex engine now supports a number of special purpose backtrack
+control verbs: (?COMMIT), (?CUT), (?ERROR) and (?FAIL). See L<perlre>
+for their descriptions.
+
=back
=head2 The C<_> prototype
See also C<< (?>pattern) >> and possessive quantifiers for other
ways to control backtracking.
+=item C<(?CUT)>
+X<(?CUT)>
+
+This zero-width pattern is similar to C<(?COMMIT)>, except that on
+failure it also signifies that whatever text that was matched leading
+up to the C<(?CUT)> pattern cannot match, I<even from another
+starting point>.
+
+Compare the following to the examples in C<(?COMMIT)>, note the string
+is twice as long:
+
+ 'aaabaaab'=~/a+b?(?CUT)(?{print "$&\n"; $count++})(?FAIL)/;
+ print "Count=$count\n";
+
+outputs
+
+ aaab
+ aaab
+ Count=2
+
+Once the 'aaab' at the start of the string has matched and the C<(?CUT)>
+executed the next startpoint will be where the cursor was when the
+C<(?CUT)> was executed.
+
+=item C<(?ERROR)>
+X<(?ERROR)>
+
+This zero-width pattern is similar to C<(?CUT)> except that it causes
+the match to fail outright. No attempts to match will occur again.
+
+ 'aaabaaab'=~/a+b?(?ERROR)(?{print "$&\n"; $count++})(?FAIL)/;
+ print "Count=$count\n";
+
+outputs
+
+ aaab
+ Count=1
+
+In other words, once the C<(?ERROR)> has been entered and then pattern
+does not match then the regex engine will not try any further matching at
+all on the rest of the string.
+
=item C<(?(condition)yes-pattern|no-pattern)>
X<(?()>
#endif
#if defined(PERL_IN_REGEXEC_C) || defined(PERL_DECL_PROT)
-STATIC I32 S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
+STATIC I32 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2);
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2);
-STATIC I32 S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
+STATIC I32 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2);
__attribute__nonnull__(1)
__attribute__nonnull__(3);
-STATIC char* S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, const regmatch_info *reginfo)
+STATIC char* S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, regmatch_info *reginfo)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
case ':': /* (?:...) */
case '>': /* (?>...) */
break;
- case 'C':
+ case 'C': /* (?CUT) and (?COMMIT) */
if (RExC_parse[0] == 'O' &&
RExC_parse[1] == 'M' &&
RExC_parse[2] == 'M' &&
{
RExC_parse+=5;
ret = reg_node(pRExC_state, COMMIT);
+ } else if (
+ RExC_parse[0] == 'U' &&
+ RExC_parse[1] == 'T' &&
+ RExC_parse[2] == ')')
+ {
+ RExC_parse+=2;
+ ret = reg_node(pRExC_state, CUT);
} else {
vFAIL("Sequence (?C... not terminated");
}
nextchar(pRExC_state);
return ret;
break;
+ case 'E': /* (?ERROR) */
+ if (RExC_parse[0] == 'R' &&
+ RExC_parse[1] == 'R' &&
+ RExC_parse[2] == 'O' &&
+ RExC_parse[3] == 'R' &&
+ RExC_parse[4] == ')')
+ {
+ RExC_parse+=4;
+ ret = reg_node(pRExC_state, OPERROR);
+ } else {
+ vFAIL("Sequence (?E... not terminated");
+ }
+ nextchar(pRExC_state);
+ return ret;
+ break;
case 'F':
if (RExC_parse[0] == 'A' &&
RExC_parse[1] == 'I' &&
(dist ? this_trie + dist : next) - start);
if (dist) {
if (!nextbranch)
- nextbranch = this_trie + trie->jump[0];
+ nextbranch= this_trie + trie->jump[0];
DUMPUNTIL(this_trie + dist, nextbranch);
}
if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
#define RE_DEBUG_EXTRA_OFFDEBUG 0x040000
#define RE_DEBUG_EXTRA_STATE 0x080000
#define RE_DEBUG_EXTRA_OPTIMISE 0x100000
+/* combined */
+#define RE_DEBUG_EXTRA_STACK 0x280000
#define RE_DEBUG_FLAG(x) (re_debug_flags & x)
/* Compile */
if (re_debug_flags & RE_DEBUG_EXTRA_OFFSETS) x )
#define DEBUG_STATE_r(x) DEBUG_r( \
if (re_debug_flags & RE_DEBUG_EXTRA_STATE) x )
+#define DEBUG_STACK_r(x) DEBUG_r( \
+ if (re_debug_flags & RE_DEBUG_EXTRA_STACK) x )
#define DEBUG_OPTIMISE_MORE_r(x) DEBUG_r( \
if ((RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE) == \
(re_debug_flags & (RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE)) ) x )
$ind++;
$name[$ind]="$real$suffix";
$type[$ind]=$type;
- $rest[$ind]="Regmatch state for $type";
+ $rest[$ind]="state for $type";
}
}
}
-$width, REGMATCH_STATE_MAX => $tot - 1
;
-$ind = 0;
-while (++$ind <= $tot) {
+
+for ($ind=1; $ind <= $lastregop ; $ind++) {
my $oind = $ind - 1;
printf OUT "#define\t%*s\t%d\t/* %#04x %s */\n",
-$width, $name[$ind], $ind-1, $ind-1, $rest[$ind];
- print OUT "\n\t/* ------------ States ------------- */\n\n"
- if $ind == $lastregop and $lastregop != $tot;
+}
+print OUT "\t/* ------------ States ------------- */\n";
+for ( ; $ind <= $tot ; $ind++) {
+ printf OUT "#define\t%*s\t(REGNODE_MAX + %d)\t/* %s */\n",
+ -$width, $name[$ind], $ind - $lastregop, $rest[$ind];
}
print OUT <<EOP;
EOP
$ind = 0;
+my $ofs = 1;
+my $sym = "";
while (++$ind <= $tot) {
my $size = $longj[$ind] || 0;
- printf OUT "\t%*s\t/* %#04x */\n",
- -3-$width,qq("$name[$ind]",),$ind-1;
- print OUT "\t/* ------------ States ------------- */\n"
- if $ind == $lastregop and $lastregop != $tot;
+ printf OUT "\t%*s\t/* $sym%#04x */\n",
+ -3-$width,qq("$name[$ind]",), $ind - $ofs;
+ if ($ind == $lastregop and $lastregop != $tot) {
+ print OUT "\t/* ------------ States ------------- */\n";
+ $ofs = $lastregop;
+ $sym = 'REGNODE_MAX +';
+ }
+
}
print OUT <<EOP;
#*Bactracking
OPFAIL OPFAIL, none Same as (?!)
-COMMIT COMMIT, node Pattern fails if backtracking through this
+COMMIT COMMIT, none Pattern fails if backtracking through this
+CUT COMMIT, none ... and restarts at the cursor point
+OPERROR OPERROR,none Pattern fails outright if backtracking through this
# NEW STUFF ABOVE THIS LINE -- Please update counts below.
CURLYM A,B:FAIL
IFMATCH A:FAIL
CURLY B_min_known,B_min,B_max:FAIL
-COMMIT next:FAIL
+COMMIT next:FAIL
+
&& (ln == len || \
ibcmp_utf8(s, NULL, 0, do_utf8, \
m, NULL, ln, (bool)UTF)) \
- && (!reginfo || regtry(reginfo, s)) ) \
+ && (!reginfo || regtry(reginfo, &s)) ) \
goto got_it; \
else { \
U8 foldbuf[UTF8_MAXBYTES_CASE+1]; \
NULL, foldlen, do_utf8, \
m, \
NULL, ln, (bool)UTF)) \
- && (!reginfo || regtry(reginfo, s)) ) \
+ && (!reginfo || regtry(reginfo, &s)) ) \
goto got_it; \
} \
s += len
&& (ln == 1 || !(OP(c) == EXACTF \
? ibcmp(s, m, ln) \
: ibcmp_locale(s, m, ln))) \
- && (!reginfo || regtry(reginfo, s)) ) \
+ && (!reginfo || regtry(reginfo, &s)) ) \
goto got_it; \
s++; \
} \
#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
REXEC_FBC_UTF8_SCAN( \
if (CoNd) { \
- if (tmp && (!reginfo || regtry(reginfo, s))) \
+ if (tmp && (!reginfo || regtry(reginfo, &s))) \
goto got_it; \
else \
tmp = doevery; \
#define REXEC_FBC_CLASS_SCAN(CoNd) \
REXEC_FBC_SCAN( \
if (CoNd) { \
- if (tmp && (!reginfo || regtry(reginfo, s))) \
+ if (tmp && (!reginfo || regtry(reginfo, &s))) \
goto got_it; \
else \
tmp = doevery; \
)
#define REXEC_FBC_TRYIT \
-if ((!reginfo || regtry(reginfo, s))) \
+if ((!reginfo || regtry(reginfo, &s))) \
goto got_it
#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
STATIC char *
S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
- const char *strend, const regmatch_info *reginfo)
+ const char *strend, regmatch_info *reginfo)
{
dVAR;
const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
/* The assignment of 2 is intentional:
* for the folded sharp s, the skip is 2. */
(skip = SHARP_S_SKIP))) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
+ if (tmp && (!reginfo || regtry(reginfo, &s)))
goto got_it;
else
tmp = doevery;
break;
case CANY:
REXEC_FBC_SCAN(
- if (tmp && (!reginfo || regtry(reginfo, s)))
+ if (tmp && (!reginfo || regtry(reginfo, &s)))
goto got_it;
else
tmp = doevery;
}
);
}
- if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, s)))
+ if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
goto got_it;
break;
case NBOUNDL:
else REXEC_FBC_TRYIT;
);
}
- if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, s)))
+ if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
goto got_it;
break;
case ALNUM:
(UV)accepted_word, s - real_start
);
});
- if (!reginfo || regtry(reginfo, s)) {
+ if (!reginfo || regtry(reginfo, &s)) {
FREETMPS;
LEAVE;
goto got_it;
/* nosave: For optimizations. */
{
dVAR;
- register char *s;
+ /*register*/ char *s;
register regnode *c;
- register char *startpos = stringarg;
+ /*register*/ char *startpos = stringarg;
I32 minlen; /* must match at least this many chars */
I32 dontbother = 0; /* how many characters not to try at end */
I32 end_shift = 0; /* Same for the end. */ /* CC */
/* Simplest case: anchored match need be tried only once. */
/* [unless only anchor is BOL and multiline is set] */
if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
- if (s == startpos && regtry(®info, startpos))
+ if (s == startpos && regtry(®info, &startpos))
goto got_it;
else if (multiline || (prog->reganch & ROPT_IMPLICIT)
|| (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
if (s == startpos)
goto after_try;
while (1) {
- if (regtry(®info, s))
+ if (regtry(®info, &s))
goto got_it;
after_try:
if (s >= end)
s--;
while (s < end) {
if (*s++ == '\n') { /* don't need PL_utf8skip here */
- if (regtry(®info, s))
+ if (regtry(®info, &s))
goto got_it;
}
}
/* the warning about reginfo.ganch being used without intialization
is bogus -- we set it above, when prog->reganch & ROPT_GPOS_SEEN
and we only enter this block when the same bit is set. */
- if (regtry(®info, reginfo.ganch))
+ if (regtry(®info, ®info.ganch))
goto got_it;
goto phooey;
}
REXEC_FBC_SCAN(
if (*s == ch) {
DEBUG_EXECUTE_r( did_match = 1 );
- if (regtry(®info, s)) goto got_it;
+ if (regtry(®info, &s)) goto got_it;
s += UTF8SKIP(s);
while (s < strend && *s == ch)
s += UTF8SKIP(s);
REXEC_FBC_SCAN(
if (*s == ch) {
DEBUG_EXECUTE_r( did_match = 1 );
- if (regtry(®info, s)) goto got_it;
+ if (regtry(®info, &s)) goto got_it;
s++;
while (s < strend && *s == ch)
s++;
}
if (do_utf8) {
while (s <= last1) {
- if (regtry(®info, s))
+ if (regtry(®info, &s))
goto got_it;
s += UTF8SKIP(s);
}
}
else {
while (s <= last1) {
- if (regtry(®info, s))
+ if (regtry(®info, &s))
goto got_it;
s++;
}
/* We don't know much -- general case. */
if (do_utf8) {
for (;;) {
- if (regtry(®info, s))
+ if (regtry(®info, &s))
goto got_it;
if (s >= strend)
break;
}
else {
do {
- if (regtry(®info, s))
+ if (regtry(®info, &s))
goto got_it;
} while (s++ < strend);
}
- regtry - try match at specific point
*/
STATIC I32 /* 0 failure, 1 success */
-S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
+S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
{
dVAR;
register I32 *sp;
CHECKPOINT lastcp;
regexp *prog = reginfo->prog;
GET_RE_DEBUG_FLAGS_DECL;
+ reginfo->cutpoint=NULL;
if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
MAGIC *mg;
prog->subbeg = PL_bostr;
prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
}
- DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
- prog->startp[0] = startpos - PL_bostr;
- PL_reginput = startpos;
+ DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
+ prog->startp[0] = *startpos - PL_bostr;
+ PL_reginput = *startpos;
PL_reglastparen = &prog->lastparen;
PL_reglastcloseparen = &prog->lastcloseparen;
prog->lastparen = 0;
prog->endp[0] = PL_reginput - PL_bostr;
return 1;
}
+ if (reginfo->cutpoint)
+ *startpos= reginfo->cutpoint;
REGCP_UNWIND(lastcp);
return 0;
}
}
STATIC I32 /* 0 failure, 1 success */
-S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
+S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
{
#if PERL_VERSION < 9
dMY_CXT;
regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
struct regmatch_state *cur_curlyx = NULL; /* most recent curlyx */
U32 state_num;
- bool no_final = 0; /* if true then we dont backtrack on failure */
+ bool no_final = 0;
/* these three flags are set by various ops to signal information to
* the very next op. They have a useful lifetime of exactly one loop
GET_RE_DEBUG_FLAGS_DECL;
#endif
+ DEBUG_STACK_r( {
+ PerlIO_printf(Perl_debug_log,"regmatch start\n");
+ });
/* on first ever call to regmatch, allocate first slab */
if (!PL_regmatch_slab) {
Newx(PL_regmatch_slab, 1, regmatch_slab);
case GOSTART:
case GOSUB: /* /(...(?1))/ */
if (cur_eval && cur_eval->locinput==locinput) {
- if (cur_eval->u.eval.close_paren == ARG(scan))
+ if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
Perl_croak(aTHX_ "Infinite recursion in regex");
if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH )
Perl_croak(aTHX_
n = ARG(scan);
PL_op = (OP_4tree*)rex->data->data[n];
- DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
+ DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
+ " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]);
PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
locinput = PL_reginput;
if (cur_eval && cur_eval->u.eval.close_paren &&
- cur_eval->u.eval.close_paren == ST.me->flags)
+ cur_eval->u.eval.close_paren == (U32)ST.me->flags)
goto fake_end;
if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
|| (cur_eval && cur_eval->u.eval.close_paren &&
- cur_eval->u.eval.close_paren == ST.me->flags))
+ cur_eval->u.eval.close_paren == (U32)ST.me->flags))
sayNO;
curlym_do_B: /* execute the B in /A{m,n}B/ */
else
PL_regendp[paren] = -1;
if (cur_eval && cur_eval->u.eval.close_paren &&
- cur_eval->u.eval.close_paren == ST.me->flags)
+ cur_eval->u.eval.close_paren == (U32)ST.me->flags)
{
if (ST.count)
goto fake_end;
if (next == scan)
next = NULL;
break;
+ case OPERROR:
+ reginfo->cutpoint=PL_regeol;
+ goto do_commit;
+ /* NOTREACHED */
+ case CUT:
+ if ( locinput > reginfo->bol )
+ reginfo->cutpoint = HOPBACKc(locinput, 1);
+ /* FALLTHROUGH */
case COMMIT:
+ do_commit:
+ PL_reginput = locinput;
PUSH_STATE_GOTO(COMMIT_next,next);
/* NOTREACHED */
case COMMIT_next_fail:
{
regmatch_state *newst;
- DEBUG_STATE_pp("push");
+ DEBUG_STACK_r({
+ regmatch_state *cur = st;
+ regmatch_state *curyes = yes_state;
+ int curd = depth;
+ regmatch_slab *slab = PL_regmatch_slab;
+ for (;curd > -1;cur--,curd--) {
+ if (cur < SLAB_FIRST(slab)) {
+ slab = slab->prev;
+ cur = SLAB_LAST(slab);
+ }
+ PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
+ REPORT_CODE_OFF + 2 + depth * 2,"",
+ curd, reg_name[cur->resume_state],
+ (curyes == cur) ? "yes" : ""
+ );
+ if (curyes == cur)
+ curyes = cur->u.yes.prev_yes_state;
+ }
+ } else
+ DEBUG_STATE_pp("push")
+ );
depth++;
st->locinput = locinput;
newst = st+1;
st = yes_state;
yes_state = st->u.yes.prev_yes_state;
PL_regmatch_state = st;
+
state_num = st->resume_state + no_final;
goto reenter_switch;
char *till;
SV *sv;
char *ganch;
+ char *cutpoint;
} regmatch_info;
/* Regops and State definitions */
-#define REGNODE_MAX 76
-#define REGMATCH_STATE_MAX 108
+#define REGNODE_MAX 78
+#define REGMATCH_STATE_MAX 110
#define END 0 /* 0000 End of program. */
#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
#define DEFINEP 72 /* 0x48 Never execute directly. */
#define OPFAIL 73 /* 0x49 Same as (?!) */
#define COMMIT 74 /* 0x4a Pattern fails if backtracking through this */
-#define OPTIMIZED 75 /* 0x4b Placeholder for dump. */
-#define PSEUDO 76 /* 0x4c Pseudo opcode for internal use. */
-
+#define CUT 75 /* 0x4b ... and restarts at the cursor point */
+#define OPERROR 76 /* 0x4c Pattern fails outright if backtracking through this */
+#define OPTIMIZED 77 /* 0x4d Placeholder for dump. */
+#define PSEUDO 78 /* 0x4e Pseudo opcode for internal use. */
/* ------------ States ------------- */
-
-#define TRIE_next 77 /* 0x4d Regmatch state for TRIE */
-#define TRIE_next_fail 78 /* 0x4e Regmatch state for TRIE */
-#define EVAL_AB 79 /* 0x4f Regmatch state for EVAL */
-#define EVAL_AB_fail 80 /* 0x50 Regmatch state for EVAL */
-#define CURLYX_end 81 /* 0x51 Regmatch state for CURLYX */
-#define CURLYX_end_fail 82 /* 0x52 Regmatch state for CURLYX */
-#define WHILEM_A_pre 83 /* 0x53 Regmatch state for WHILEM */
-#define WHILEM_A_pre_fail 84 /* 0x54 Regmatch state for WHILEM */
-#define WHILEM_A_min 85 /* 0x55 Regmatch state for WHILEM */
-#define WHILEM_A_min_fail 86 /* 0x56 Regmatch state for WHILEM */
-#define WHILEM_A_max 87 /* 0x57 Regmatch state for WHILEM */
-#define WHILEM_A_max_fail 88 /* 0x58 Regmatch state for WHILEM */
-#define WHILEM_B_min 89 /* 0x59 Regmatch state for WHILEM */
-#define WHILEM_B_min_fail 90 /* 0x5a Regmatch state for WHILEM */
-#define WHILEM_B_max 91 /* 0x5b Regmatch state for WHILEM */
-#define WHILEM_B_max_fail 92 /* 0x5c Regmatch state for WHILEM */
-#define BRANCH_next 93 /* 0x5d Regmatch state for BRANCH */
-#define BRANCH_next_fail 94 /* 0x5e Regmatch state for BRANCH */
-#define CURLYM_A 95 /* 0x5f Regmatch state for CURLYM */
-#define CURLYM_A_fail 96 /* 0x60 Regmatch state for CURLYM */
-#define CURLYM_B 97 /* 0x61 Regmatch state for CURLYM */
-#define CURLYM_B_fail 98 /* 0x62 Regmatch state for CURLYM */
-#define IFMATCH_A 99 /* 0x63 Regmatch state for IFMATCH */
-#define IFMATCH_A_fail 100 /* 0x64 Regmatch state for IFMATCH */
-#define CURLY_B_min_known 101 /* 0x65 Regmatch state for CURLY */
-#define CURLY_B_min_known_fail 102 /* 0x66 Regmatch state for CURLY */
-#define CURLY_B_min 103 /* 0x67 Regmatch state for CURLY */
-#define CURLY_B_min_fail 104 /* 0x68 Regmatch state for CURLY */
-#define CURLY_B_max 105 /* 0x69 Regmatch state for CURLY */
-#define CURLY_B_max_fail 106 /* 0x6a Regmatch state for CURLY */
-#define COMMIT_next 107 /* 0x6b Regmatch state for COMMIT */
-#define COMMIT_next_fail 108 /* 0x6c Regmatch state for COMMIT */
+#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
+#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
+#define EVAL_AB (REGNODE_MAX + 3) /* state for EVAL */
+#define EVAL_AB_fail (REGNODE_MAX + 4) /* state for EVAL */
+#define CURLYX_end (REGNODE_MAX + 5) /* state for CURLYX */
+#define CURLYX_end_fail (REGNODE_MAX + 6) /* state for CURLYX */
+#define WHILEM_A_pre (REGNODE_MAX + 7) /* state for WHILEM */
+#define WHILEM_A_pre_fail (REGNODE_MAX + 8) /* state for WHILEM */
+#define WHILEM_A_min (REGNODE_MAX + 9) /* state for WHILEM */
+#define WHILEM_A_min_fail (REGNODE_MAX + 10) /* state for WHILEM */
+#define WHILEM_A_max (REGNODE_MAX + 11) /* state for WHILEM */
+#define WHILEM_A_max_fail (REGNODE_MAX + 12) /* state for WHILEM */
+#define WHILEM_B_min (REGNODE_MAX + 13) /* state for WHILEM */
+#define WHILEM_B_min_fail (REGNODE_MAX + 14) /* state for WHILEM */
+#define WHILEM_B_max (REGNODE_MAX + 15) /* state for WHILEM */
+#define WHILEM_B_max_fail (REGNODE_MAX + 16) /* state for WHILEM */
+#define BRANCH_next (REGNODE_MAX + 17) /* state for BRANCH */
+#define BRANCH_next_fail (REGNODE_MAX + 18) /* state for BRANCH */
+#define CURLYM_A (REGNODE_MAX + 19) /* state for CURLYM */
+#define CURLYM_A_fail (REGNODE_MAX + 20) /* state for CURLYM */
+#define CURLYM_B (REGNODE_MAX + 21) /* state for CURLYM */
+#define CURLYM_B_fail (REGNODE_MAX + 22) /* state for CURLYM */
+#define IFMATCH_A (REGNODE_MAX + 23) /* state for IFMATCH */
+#define IFMATCH_A_fail (REGNODE_MAX + 24) /* state for IFMATCH */
+#define CURLY_B_min_known (REGNODE_MAX + 25) /* state for CURLY */
+#define CURLY_B_min_known_fail (REGNODE_MAX + 26) /* state for CURLY */
+#define CURLY_B_min (REGNODE_MAX + 27) /* state for CURLY */
+#define CURLY_B_min_fail (REGNODE_MAX + 28) /* state for CURLY */
+#define CURLY_B_max (REGNODE_MAX + 29) /* state for CURLY */
+#define CURLY_B_max_fail (REGNODE_MAX + 30) /* state for CURLY */
+#define COMMIT_next (REGNODE_MAX + 31) /* state for COMMIT */
+#define COMMIT_next_fail (REGNODE_MAX + 32) /* state for COMMIT */
/* PL_regkind[] What type of regop or state is this. */
DEFINEP, /* DEFINEP */
OPFAIL, /* OPFAIL */
COMMIT, /* COMMIT */
+ COMMIT, /* CUT */
+ OPERROR, /* OPERROR */
NOTHING, /* OPTIMIZED */
PSEUDO, /* PSEUDO */
/* ------------ States ------------- */
EXTRA_SIZE(struct regnode_1), /* DEFINEP */
0, /* OPFAIL */
0, /* COMMIT */
+ 0, /* CUT */
+ 0, /* OPERROR */
0, /* OPTIMIZED */
0, /* PSEUDO */
};
0, /* DEFINEP */
0, /* OPFAIL */
0, /* COMMIT */
+ 0, /* CUT */
+ 0, /* OPERROR */
0, /* OPTIMIZED */
0, /* PSEUDO */
};
"DEFINEP", /* 0x48 */
"OPFAIL", /* 0x49 */
"COMMIT", /* 0x4a */
- "OPTIMIZED", /* 0x4b */
- "PSEUDO", /* 0x4c */
+ "CUT", /* 0x4b */
+ "OPERROR", /* 0x4c */
+ "OPTIMIZED", /* 0x4d */
+ "PSEUDO", /* 0x4e */
/* ------------ States ------------- */
- "TRIE_next", /* 0x4d */
- "TRIE_next_fail", /* 0x4e */
- "EVAL_AB", /* 0x4f */
- "EVAL_AB_fail", /* 0x50 */
- "CURLYX_end", /* 0x51 */
- "CURLYX_end_fail", /* 0x52 */
- "WHILEM_A_pre", /* 0x53 */
- "WHILEM_A_pre_fail", /* 0x54 */
- "WHILEM_A_min", /* 0x55 */
- "WHILEM_A_min_fail", /* 0x56 */
- "WHILEM_A_max", /* 0x57 */
- "WHILEM_A_max_fail", /* 0x58 */
- "WHILEM_B_min", /* 0x59 */
- "WHILEM_B_min_fail", /* 0x5a */
- "WHILEM_B_max", /* 0x5b */
- "WHILEM_B_max_fail", /* 0x5c */
- "BRANCH_next", /* 0x5d */
- "BRANCH_next_fail", /* 0x5e */
- "CURLYM_A", /* 0x5f */
- "CURLYM_A_fail", /* 0x60 */
- "CURLYM_B", /* 0x61 */
- "CURLYM_B_fail", /* 0x62 */
- "IFMATCH_A", /* 0x63 */
- "IFMATCH_A_fail", /* 0x64 */
- "CURLY_B_min_known", /* 0x65 */
- "CURLY_B_min_known_fail", /* 0x66 */
- "CURLY_B_min", /* 0x67 */
- "CURLY_B_min_fail", /* 0x68 */
- "CURLY_B_max", /* 0x69 */
- "CURLY_B_max_fail", /* 0x6a */
- "COMMIT_next", /* 0x6b */
- "COMMIT_next_fail", /* 0x6c */
+ "TRIE_next", /* REGNODE_MAX +0x01 */
+ "TRIE_next_fail", /* REGNODE_MAX +0x02 */
+ "EVAL_AB", /* REGNODE_MAX +0x03 */
+ "EVAL_AB_fail", /* REGNODE_MAX +0x04 */
+ "CURLYX_end", /* REGNODE_MAX +0x05 */
+ "CURLYX_end_fail", /* REGNODE_MAX +0x06 */
+ "WHILEM_A_pre", /* REGNODE_MAX +0x07 */
+ "WHILEM_A_pre_fail", /* REGNODE_MAX +0x08 */
+ "WHILEM_A_min", /* REGNODE_MAX +0x09 */
+ "WHILEM_A_min_fail", /* REGNODE_MAX +0x0a */
+ "WHILEM_A_max", /* REGNODE_MAX +0x0b */
+ "WHILEM_A_max_fail", /* REGNODE_MAX +0x0c */
+ "WHILEM_B_min", /* REGNODE_MAX +0x0d */
+ "WHILEM_B_min_fail", /* REGNODE_MAX +0x0e */
+ "WHILEM_B_max", /* REGNODE_MAX +0x0f */
+ "WHILEM_B_max_fail", /* REGNODE_MAX +0x10 */
+ "BRANCH_next", /* REGNODE_MAX +0x11 */
+ "BRANCH_next_fail", /* REGNODE_MAX +0x12 */
+ "CURLYM_A", /* REGNODE_MAX +0x13 */
+ "CURLYM_A_fail", /* REGNODE_MAX +0x14 */
+ "CURLYM_B", /* REGNODE_MAX +0x15 */
+ "CURLYM_B_fail", /* REGNODE_MAX +0x16 */
+ "IFMATCH_A", /* REGNODE_MAX +0x17 */
+ "IFMATCH_A_fail", /* REGNODE_MAX +0x18 */
+ "CURLY_B_min_known", /* REGNODE_MAX +0x19 */
+ "CURLY_B_min_known_fail", /* REGNODE_MAX +0x1a */
+ "CURLY_B_min", /* REGNODE_MAX +0x1b */
+ "CURLY_B_min_fail", /* REGNODE_MAX +0x1c */
+ "CURLY_B_max", /* REGNODE_MAX +0x1d */
+ "CURLY_B_max_fail", /* REGNODE_MAX +0x1e */
+ "COMMIT_next", /* REGNODE_MAX +0x1f */
+ "COMMIT_next_fail", /* REGNODE_MAX +0x20 */
};
#endif /* DEBUGGING */
#else
';
ok(!$@,'lvalue $+{...} should not throw an exception');
}
-{
- our $count = 0;
- 'aaab'=~/a+b?(?{$count++})(?FAIL)/;
- iseq($count,9,"expect 9 for no (?COMMIT)");
- $count = 0;
- 'aaab'=~/a+b?(?COMMIT)(?{$count++})(?FAIL)/;
- iseq($count,3,"expect 3 with (?COMMIT)");
-}
+
# stress test CURLYX/WHILEM.
#
# This test includes varying levels of nesting, and according to
# CURLYX and WHILEM blocks, except those related to LONGJMP, the
# super-linear cache and warnings. It executes about 0.5M regexes
-{
+if ($ENV{PERL_SKIP_PSYCHO_TEST}){
+ printf "ok %d Skip: No psycho tests\n", $test++;
+} else {
my $r = qr/^
(?:
( (?:a|z+)+ )
iseq($count,1,"should have matched once only [RT#36046]");
}
+{ # Test the (?COMMIT) pattern
+ our $count = 0;
+ 'aaab'=~/a+b?(?{$count++})(?FAIL)/;
+ iseq($count,9,"expect 9 for no (?COMMIT)");
+ $count = 0;
+ 'aaab'=~/a+b?(?COMMIT)(?{$count++})(?FAIL)/;
+ iseq($count,3,"expect 3 with (?COMMIT)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(?COMMIT)(?{$count++})(?FAIL)/g;
+ iseq($count,4,"/.(?COMMIT)/");
+ $count = 0;
+ 'aaab'=~/a+b?(??{'(?COMMIT)'})(?{$count++})(?FAIL)/;
+ iseq($count,3,"expect 3 with (?COMMIT)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(??{'(?COMMIT)'})(?{$count++})(?FAIL)/g;
+ iseq($count,4,"/.(?COMMIT)/");
+}
+{ # Test the (?CUT) pattern
+ our $count = 0;
+ 'aaab'=~/a+b?(?CUT)(?{$count++})(?FAIL)/;
+ iseq($count,1,"expect 1 with (?CUT)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(?CUT)(?{$count++})(?FAIL)/g;
+ iseq($count,4,"/.(?CUT)/");
+ $_='aaabaaab';
+ $count=0;
+ our @res=();
+ 1 while /(a+b?)(?CUT)(?{$count++; push @res,$1})(?FAIL)/g;
+ iseq($count,2,"Expect 2 with (?CUT)" );
+ iseq("@res","aaab aaab","adjacent (?CUT) works as expected" );
+}
+{ # Test the (?ERROR) pattern
+ our $count = 0;
+ 'aaabaaab'=~/a+b?(?ERROR)(?{$count++})(?FAIL)/;
+ iseq($count,1,"expect 1 with (?ERROR)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(?ERROR)(?{$count++})(?FAIL)/g;
+ iseq($count,1,"/.(?ERROR)/");
+ $_='aaabaaab';
+ $count=0;
+ our @res=();
+ 1 while /(a+b?)(?ERROR)(?{$count++; push @res,$1})(?FAIL)/g;
+ iseq($count,1,"Expect 1 with (?ERROR)" );
+ iseq("@res","aaab","adjacent (?ERROR) works as expected" );
+}
+#-------------------------------------------------------------------
+
# Keep the following tests last -- they may crash perl
ok(("a" x (2**15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker: #24274")
"Regexp /^(??{'(.)'x 100})/ crashes older perls")
or print "# Unexpected outcome: should pass or crash perl\n";
+# Put new tests above the line, not here.
+
# Don't forget to update this!
-BEGIN{print "1..1289\n"};
+BEGIN{print "1..1300\n"};