I32 sawback; /* Did we see \1, ...? */
U32 seen;
I32 size; /* Code size. */
- I32 npar; /* () count. */
+ I32 npar; /* Capture buffer count, (OPEN). */
+ I32 cpar; /* Capture buffer count, (CLOSE). */
I32 nestroot; /* root parens we are in - used by accept */
I32 extralen;
I32 seen_zerolen;
#define RExC_seen (pRExC_state->seen)
#define RExC_size (pRExC_state->size)
#define RExC_npar (pRExC_state->npar)
+#define RExC_cpar (pRExC_state->cpar)
#define RExC_nestroot (pRExC_state->nestroot)
#define RExC_extralen (pRExC_state->extralen)
#define RExC_seen_zerolen (pRExC_state->seen_zerolen)
#define DEBUG_STUDYDATA(data,depth) \
DEBUG_OPTIMISE_MORE_r(if(data){ \
PerlIO_printf(Perl_debug_log, \
- "%*s"/* Len:%"IVdf"/%"IVdf" */" Pos:%"IVdf"/%"IVdf \
+ "%*s"/* Len:%"IVdf"/%"IVdf" */"Pos:%"IVdf"/%"IVdf \
" Flags: %"IVdf" Whilem_c: %"IVdf" Lcp: %"IVdf" ", \
(int)(depth)*2, "", \
(IV)((data)->pos_min), \
}
if ( count == 1 ) {
SV **tmp = av_fetch( TRIE_REVCHARMAP(trie), idx, 0);
- const char *ch = SvPV_nolen_const( *tmp );
- DEBUG_OPTIMISE_r(
+ char *ch = SvPV_nolen( *tmp );
+ DEBUG_OPTIMISE_r({
+ SV *sv=sv_newmortal();
PerlIO_printf( Perl_debug_log,
"%*sPrefix State: %"UVuf" Idx:%"UVuf" Char='%s'\n",
(int)depth * 2 + 2, "",
- (UV)state, (UV)idx, ch)
- );
+ (UV)state, (UV)idx,
+ pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 6,
+ PL_colors[0], PL_colors[1],
+ (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
+ PERL_PV_ESCAPE_FIRSTCHAR
+ )
+ );
+ });
if ( state==1 ) {
OP( convert ) = nodetype;
str=STRING(convert);
STR_LEN(convert)=0;
}
- *str++=*ch;
- STR_LEN(convert)++;
-
+ while (*ch) {
+ *str++ = *ch++;
+ STR_LEN(convert)++;
+ }
+
} else {
#ifdef DEBUGGING
if (state>1)
trie->maxlen -= (state - 1);
DEBUG_r({
regnode *fix = convert;
+ U32 word = trie->wordcount;
mjd_nodelen++;
Set_Node_Offset_Length(convert, mjd_offset, state - 1);
while( ++fix < n ) {
Set_Node_Offset_Length(fix, 0, 0);
}
+ while (word--) {
+ SV ** const tmp = av_fetch( trie->words, word, 0 );
+ if (tmp) {
+ if ( STR_LEN(convert) <= SvCUR(*tmp) )
+ sv_chop(*tmp, SvPV_nolen(*tmp) + STR_LEN(convert));
+ else
+ sv_chop(*tmp, SvPV_nolen(*tmp) + SvCUR(*tmp));
+ }
+ }
});
if (trie->maxlen) {
convert = n;
#endif
#define DEBUG_PEEP(str,scan,depth) \
- DEBUG_OPTIMISE_r({ \
+ DEBUG_OPTIMISE_r({if (scan){ \
SV * const mysv=sv_newmortal(); \
regnode *Next = regnext(scan); \
regprop(RExC_rx, mysv, scan); \
PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)\n", \
(int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(mysv),\
Next ? (REG_NODE_NUM(Next)) : 0 ); \
- });
+ }});
Newx(and_withp,1,struct regnode_charclass_class); \
SAVEFREEPV(and_withp)
+/* this is a chain of data about sub patterns we are processing that
+ need to be handled seperately/specially in study_chunk. Its so
+ we can simulate recursion without losing state. */
+struct scan_frame;
+typedef struct scan_frame {
+ regnode *last; /* last node to process in this frame */
+ regnode *next; /* next node to process when last is reached */
+ struct scan_frame *prev; /*previous frame*/
+ I32 stop; /* what stopparen do we use */
+} scan_frame;
+
STATIC I32
S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
I32 *minlenp, I32 *deltap,
SV *re_trie_maxbuff = NULL;
regnode *first_non_open = scan;
I32 stopmin = I32_MAX;
+ scan_frame *frame = NULL;
+
GET_RE_DEBUG_FLAGS_DECL;
+
#ifdef DEBUGGING
StructCopy(&zero_scan_data, &data_fake, scan_data_t);
#endif
}
- while (scan && OP(scan) != END && scan < last) {
+ fake_study_recurse:
+ while ( scan && OP(scan) != END && scan < last ){
/* Peephole optimizer: */
DEBUG_STUDYDATA(data,depth);
DEBUG_PEEP("Peep",scan,depth);
/* The principal pseudo-switch. Cannot be a switch, since we
look into several different things. */
if (OP(scan) == BRANCH || OP(scan) == BRANCHJ
- || OP(scan) == IFTHEN || OP(scan) == SUSPEND) {
+ || OP(scan) == IFTHEN) {
next = regnext(scan);
code = OP(scan);
/* demq: the op(next)==code check is to see if we have "branch-branch" AFAICT */
- if (OP(next) == code || code == IFTHEN || code == SUSPEND) {
+ if (OP(next) == code || code == IFTHEN) {
/* NOTE - There is similar code to this block below for handling
TRIE nodes on a re-study. If you change stuff here check there
too. */
struct regnode_charclass_class accum;
regnode * const startbranch=scan;
- if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
+ if (flags & SCF_DO_SUBSTR)
scan_commit(pRExC_state, data, minlenp); /* Cannot merge strings after this. */
if (flags & SCF_DO_STCLASS)
cl_init_zero(pRExC_state, &accum);
}
if (flags & SCF_DO_STCLASS)
cl_or(pRExC_state, &accum, &this_class);
- if (code == SUSPEND)
- break;
}
if (code == IFTHEN && num < 2) /* Empty ELSE branch */
min1 = 0;
} else /* single branch is optimized. */
scan = NEXTOPER(scan);
continue;
+ } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB || OP(scan) == GOSTART) {
+ scan_frame *newframe = NULL;
+ I32 paren;
+ regnode *start;
+ regnode *end;
+
+ if (OP(scan) != SUSPEND) {
+ /* set the pointer */
+ if (OP(scan) == GOSUB) {
+ paren = ARG(scan);
+ RExC_recurse[ARG2L(scan)] = scan;
+ start = RExC_open_parens[paren-1];
+ end = RExC_close_parens[paren-1];
+ } else {
+ paren = 0;
+ start = RExC_rx->program + 1;
+ end = RExC_opend;
+ }
+ if (!recursed) {
+ Newxz(recursed, (((RExC_npar)>>3) +1), U8);
+ SAVEFREEPV(recursed);
+ }
+ if (!PAREN_TEST(recursed,paren+1)) {
+ PAREN_SET(recursed,paren+1);
+ Newx(newframe,1,scan_frame);
+ } else {
+ if (flags & SCF_DO_SUBSTR) {
+ scan_commit(pRExC_state,data,minlenp);
+ data->longest = &(data->longest_float);
+ }
+ is_inf = is_inf_internal = 1;
+ if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
+ cl_anything(pRExC_state, data->start_class);
+ flags &= ~SCF_DO_STCLASS;
+ }
+ } else {
+ Newx(newframe,1,scan_frame);
+ paren = stopparen;
+ start = scan+2;
+ end = regnext(scan);
+ }
+ if (newframe) {
+ assert(start);
+ assert(end);
+ SAVEFREEPV(newframe);
+ newframe->next = regnext(scan);
+ newframe->last = last;
+ newframe->stop = stopparen;
+ newframe->prev = frame;
+
+ frame = newframe;
+ scan = start;
+ stopparen = paren;
+ last = end;
+
+ continue;
+ }
}
else if (OP(scan) == EXACT) {
I32 l = STR_LEN(scan);
if (data)
*(data->last_closep) = ARG(scan);
}
- else if (OP(scan) == GOSUB || OP(scan) == GOSTART) {
- /* set the pointer */
- I32 paren;
- regnode *start;
- regnode *end;
- if (OP(scan) == GOSUB) {
- paren = ARG(scan);
- RExC_recurse[ARG2L(scan)] = scan;
- start = RExC_open_parens[paren-1];
- end = RExC_close_parens[paren-1];
- } else {
- paren = 0;
- start = RExC_rx->program + 1;
- end = RExC_opend;
- }
- assert(start);
- assert(end);
- if (!recursed) {
- Newxz(recursed, (((RExC_npar)>>3) +1), U8);
- SAVEFREEPV(recursed);
- }
- if (!PAREN_TEST(recursed,paren+1)) {
- I32 deltanext = 0;
- PAREN_SET(recursed,paren+1);
-
- DEBUG_PEEP("goto",start,depth);
- min += study_chunk(
- pRExC_state,
- &start,
- minlenp,
- &deltanext,
- end+1,
- data,
- paren,
- recursed,
- and_withp,
- flags,depth+1);
- delta+=deltanext;
- if (deltanext == I32_MAX) {
- is_inf = is_inf_internal = 1;
- delta=deltanext;
- }
- DEBUG_PEEP("rtrn",end,depth);
- PAREN_UNSET(recursed,paren+1);
- } else {
- if (flags & SCF_DO_SUBSTR) {
- scan_commit(pRExC_state,data,minlenp);
- data->longest = &(data->longest_float);
- }
- is_inf = is_inf_internal = 1;
- if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
- cl_anything(pRExC_state, data->start_class);
- flags &= ~SCF_DO_STCLASS;
- }
- }
else if (OP(scan) == EVAL) {
if (data)
data->flags |= SF_HAS_EVAL;
/* Else: zero-length, ignore. */
scan = regnext(scan);
}
+ if (frame) {
+ last = frame->last;
+ scan = frame->next;
+ stopparen = frame->stop;
+ frame = frame->prev;
+ goto fake_study_recurse;
+ }
finish:
+ assert(!frame);
+
*scanp = scan;
*deltap = is_inf_internal ? I32_MAX : delta;
if (flags & SCF_DO_SUBSTR && is_inf)
RExC_end = xend;
RExC_naughty = 0;
RExC_npar = 1;
+ RExC_cpar = 1;
RExC_nestroot = 0;
RExC_size = 0L;
RExC_emit = &PL_regdummy;
r->substrs = 0; /* Useful during FAIL. */
r->startp = 0; /* Useful during FAIL. */
r->endp = 0;
+ r->swap = NULL;
r->paren_names = 0;
if (RExC_seen & REG_SEEN_RECURSE) {
RExC_end = xend;
RExC_naughty = 0;
RExC_npar = 1;
+ RExC_cpar = 1;
RExC_emit_start = r->program;
RExC_emit = r->program;
#ifdef DEBUGGING
first = NEXTOPER(first);
goto again;
}
- else if (!sawopen && (OP(first) == STAR &&
+ else if ((!sawopen || !RExC_sawback) &&
+ (OP(first) == STAR &&
PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
!(r->reganch & ROPT_ANCH) )
{
}
Newxz(r->startp, RExC_npar, I32);
Newxz(r->endp, RExC_npar, I32);
-
- DEBUG_r( RX_DEBUG_on(r) );
+ /* assume we don't need to swap parens around before we match */
+
DEBUG_DUMP_r({
PerlIO_printf(Perl_debug_log,"Final program:\n");
regdump(r);
}
goto gen_recurse_regop;
/* NOT REACHED */
+ case '+':
+ if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
+ RExC_parse++;
+ vFAIL("Illegal pattern");
+ }
+ goto parse_recursion;
+ /* NOT REACHED*/
+ case '-': /* (?-1) */
+ if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
+ RExC_parse--; /* rewind to let it be handled later */
+ goto parse_flags;
+ }
+ /*FALLTHROUGH */
case '1': case '2': case '3': case '4': /* (?1) */
case '5': case '6': case '7': case '8': case '9':
RExC_parse--;
+ parse_recursion:
num = atoi(RExC_parse);
parse_start = RExC_parse - 1; /* MJD */
+ if (*RExC_parse == '-')
+ RExC_parse++;
while (isDIGIT(*RExC_parse))
RExC_parse++;
if (*RExC_parse!=')')
vFAIL("Expecting close bracket");
gen_recurse_regop:
+ if ( paren == '-' ) {
+ /*
+ Diagram of capture buffer numbering.
+ Top line is the normal capture buffer numbers
+ Botton line is the negative indexing as from
+ the X (the (?-2))
+
+ + 1 2 3 4 5 X 6 7
+ /(a(x)y)(a(b(c(?-2)d)e)f)(g(h))/
+ - 5 4 3 2 1 X x x
+
+ */
+ num = RExC_npar + num;
+ if (num < 1) {
+ RExC_parse++;
+ vFAIL("Reference to nonexistent group");
+ }
+ } else if ( paren == '+' ) {
+ num = RExC_npar + num - 1;
+ }
+
ret = reganode(pRExC_state, GOSUB, num);
if (!SIZE_ONLY) {
if (num > (I32)RExC_rx->nparens) {
ender = reg_node(pRExC_state, TAIL);
break;
case 1:
+ RExC_cpar++;
ender = reganode(pRExC_state, CLOSE, parno);
if (!SIZE_ONLY && RExC_seen & REG_SEEN_RECURSE) {
DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
case 'c':
case '0':
goto defchar;
+ case 'R':
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
{
- const I32 num = atoi(RExC_parse);
-
+ I32 num;
+ bool isrel=(*RExC_parse=='R');
+ if (isrel)
+ RExC_parse++;
+ num = atoi(RExC_parse);
+ if (isrel) {
+ num = RExC_cpar - num;
+ if (num < 1)
+ vFAIL("Reference to nonexistent or unclosed group");
+ }
if (num > 9 && num >= RExC_npar)
goto defchar;
else {
while (isDIGIT(*RExC_parse))
RExC_parse++;
- if (!SIZE_ONLY && num > (I32)RExC_rx->nparens)
- vFAIL("Reference to nonexistent group");
+ if (!SIZE_ONLY) {
+ if (num > (I32)RExC_rx->nparens)
+ vFAIL("Reference to nonexistent group");
+ /* People make this error all the time apparently.
+ So we cant fail on it, even though we should
+
+ else if (num >= RExC_cpar)
+ vFAIL("Reference to unclosed group will always match");
+ */
+ }
RExC_sawback = 1;
ret = reganode(pRExC_state,
(U8)(FOLD ? (LOC ? REFFL : REFF) : REF),
case 'p':
case 'P':
case 'N':
+ case 'R':
--p;
goto loopdone;
case 'n':
GET_RE_DEBUG_FLAGS_DECL;
sv_setpvn(sv, "", 0);
+
if (OP(o) > REGNODE_MAX) /* regnode.type is unsigned */
/* It would be nice to FAIL() here, but this may be called from
regexec.c, and it would be hard to supply pRExC_state. */
DEBUG_COMPILE_r({
if (!PL_colorset)
reginitcolors();
- if (RX_DEBUG(r)){
- SV *dsv= sv_newmortal();
+ {
+ SV *dsv= sv_newmortal();
RE_PV_QUOTED_DECL(s, (r->reganch & ROPT_UTF8),
dsv, r->precomp, r->prelen, 60);
PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n",
if (trie->nextword)
Safefree(trie->nextword);
#ifdef DEBUGGING
- if (RX_DEBUG(r)) {
- if (trie->words)
- SvREFCNT_dec((SV*)trie->words);
- if (trie->revcharmap)
- SvREFCNT_dec((SV*)trie->revcharmap);
- }
+ if (trie->words)
+ SvREFCNT_dec((SV*)trie->words);
+ if (trie->revcharmap)
+ SvREFCNT_dec((SV*)trie->revcharmap);
#endif
Safefree(r->data->data[n]); /* do this last!!!! */
}
}
Safefree(r->startp);
Safefree(r->endp);
+ if (r->swap) {
+ Safefree(r->swap->startp);
+ Safefree(r->swap->endp);
+ Safefree(r->swap);
+ }
Safefree(r);
}
Copy(r->startp, ret->startp, npar, I32);
Newx(ret->endp, npar, I32);
Copy(r->startp, ret->startp, npar, I32);
+ if(r->swap) {
+ Newx(ret->swap, 1, regexp_paren_ofs);
+ /* no need to copy these */
+ Newx(ret->swap->startp, npar, I32);
+ Newx(ret->swap->endp, npar, I32);
+ } else {
+ ret->swap = NULL;
+ }
Newx(ret->substrs, 1, struct reg_substr_data);
for (s = ret->substrs->data, i = 0; i < 3; i++, s++) {
NODE_ALIGN(node);
op = OP(node);
- if (op == CLOSE)
+ if (op == CLOSE || op == WHILEM)
indent--;
next = regnext((regnode *)node);
}
if (op == CURLYX || op == OPEN)
indent++;
- else if (op == WHILEM)
- indent--;
}
CLEAR_OPTSTART;
#ifdef DEBUG_DUMPUNTIL