register I32 tmp;
I32 minlen; /* must match at least this many chars */
I32 dontbother = 0; /* how many characters not to try at end */
- CURCUR cc;
I32 start_shift = 0; /* Offset of the start to find
constant substr. */ /* CC */
I32 end_shift = 0; /* Same for the end. */ /* CC */
char *scream_olds;
SV* oreplsv = GvSV(PL_replgv);
- cc.cur = 0;
- cc.oldcc = 0;
- PL_regcc = &cc;
+ PL_regcc = 0;
cache_re(prog);
#ifdef DEBUGGING
regexp *re;
MAGIC *mg = Null(MAGIC*);
re_cc_state state;
- CURCUR cctmp;
CHECKPOINT cp, lastcp;
if(SvROK(ret) || SvRMAGICAL(ret)) {
state.cc = PL_regcc;
state.re = PL_reg_re;
- cctmp.cur = 0;
- cctmp.oldcc = 0;
- PL_regcc = &cctmp;
+ PL_regcc = 0;
cp = regcppush(0); /* Save *all* the positions. */
REGCP_SET;
case LOGICAL:
logical = scan->flags;
break;
+/*******************************************************************
+ PL_regcc contains infoblock about the innermost (...)* loop, and
+ a pointer to the next outer infoblock.
+
+ Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
+
+ 1) After matching X, regnode for CURLYX is processed;
+
+ 2) This regnode creates infoblock on the stack, and calls
+ regmatch() recursively with the starting point at WHILEM node;
+
+ 3) Each hit of WHILEM node tries to match A and Z (in the order
+ depending on the current iteration, min/max of {min,max} and
+ greediness). The information about where are nodes for "A"
+ and "Z" is read from the infoblock, as is info on how many times "A"
+ was already matched, and greediness.
+
+ 4) After A matches, the same WHILEM node is hit again.
+
+ 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
+ of the same pair. Thus when WHILEM tries to match Z, it temporarily
+ resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
+ as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
+ of the external loop.
+
+ Currently present infoblocks form a tree with a stem formed by PL_curcc
+ and whatever it mentions via ->next, and additional attached trees
+ corresponding to temporarily unset infoblocks as in "5" above.
+
+ In the following picture infoblocks for outer loop of
+ (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
+ is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
+ infoblocks are drawn below the "reset" infoblock.
+
+ In fact in the picture below we do not show failed matches for Z and T
+ by WHILEM blocks. [We illustrate minimal matches, since for them it is
+ more obvious *why* one needs to *temporary* unset infoblocks.]
+
+ Matched REx position InfoBlocks Comment
+ (Y(A)*?Z)*?T x
+ Y(A)*?Z)*?T x <- O
+ Y (A)*?Z)*?T x <- O
+ Y A)*?Z)*?T x <- O <- I
+ YA )*?Z)*?T x <- O <- I
+ YA A)*?Z)*?T x <- O <- I
+ YAA )*?Z)*?T x <- O <- I
+ YAA Z)*?T x <- O # Temporary unset I
+ I
+
+ YAAZ Y(A)*?Z)*?T x <- O
+ I
+
+ YAAZY (A)*?Z)*?T x <- O
+ I
+
+ YAAZY A)*?Z)*?T x <- O <- I
+ I
+
+ YAAZYA )*?Z)*?T x <- O <- I
+ I
+
+ YAAZYA Z)*?T x <- O # Temporary unset I
+ I,I
+
+ YAAZYAZ )*?T x <- O
+ I,I
+
+ YAAZYAZ T x # Temporary unset O
+ O
+ I,I
+
+ YAAZYAZT x
+ O
+ I,I
+ *******************************************************************/
case CURLYX: {
CURCUR cc;
CHECKPOINT cp = PL_savestack_ix;
if (locinput == cc->lastloc && n >= cc->min) {
PL_regcc = cc->oldcc;
- ln = PL_regcc->cur;
+ if (PL_regcc)
+ ln = PL_regcc->cur;
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"%*s empty match detected, try continuation...\n",
"%*s failed...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
- PL_regcc->cur = ln;
+ if (PL_regcc)
+ PL_regcc->cur = ln;
PL_regcc = cc;
sayNO;
}
if (cc->minmod) {
PL_regcc = cc->oldcc;
- ln = PL_regcc->cur;
+ if (PL_regcc)
+ ln = PL_regcc->cur;
cp = regcppush(cc->parenfloor);
REGCP_SET;
if (regmatch(cc->next)) {
}
REGCP_UNWIND;
regcppop();
- PL_regcc->cur = ln;
+ if (PL_regcc)
+ PL_regcc->cur = ln;
PL_regcc = cc;
if (n >= cc->max) { /* Maximum greed exceeded? */
/* Failed deeper matches of scan, so see if this one works. */
PL_regcc = cc->oldcc;
- ln = PL_regcc->cur;
+ if (PL_regcc)
+ ln = PL_regcc->cur;
if (regmatch(cc->next))
sayYES;
DEBUG_r(
PerlIO_printf(Perl_debug_log, "%*s failed...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
- PL_regcc->cur = ln;
+ if (PL_regcc)
+ PL_regcc->cur = ln;
PL_regcc = cc;
cc->cur = n - 1;
cc->lastloc = lastloc;