char *parse; /* Input-scan pointer. */
I32 whilem_seen; /* number of WHILEM in this expr */
regnode *emit_start; /* Start of emitted-code area */
+ regnode *emit_bound; /* First regnode outside of the allocated space */
regnode *emit; /* Code-emit pointer; ®dummy = don't = compiling */
I32 naughty; /* How bad is this pattern? */
I32 sawback; /* Did we see \1, ...? */
#endif
#define RExC_emit (pRExC_state->emit)
#define RExC_emit_start (pRExC_state->emit_start)
+#define RExC_emit_bound (pRExC_state->emit_bound)
#define RExC_naughty (pRExC_state->naughty)
#define RExC_sawback (pRExC_state->sawback)
#define RExC_seen (pRExC_state->seen)
* Flags to be passed up and down.
*/
#define WORST 0 /* Worst case. */
-#define HASWIDTH 0x1 /* Known to match non-null strings. */
-#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
-#define SPSTART 0x4 /* Starts with * or +. */
-#define TRYAGAIN 0x8 /* Weeded out a declaration. */
+#define HASWIDTH 0x01 /* Known to match non-null strings. */
+#define SIMPLE 0x02 /* Simple enough to be STAR/PLUS operand. */
+#define SPSTART 0x04 /* Starts with * or +. */
+#define TRYAGAIN 0x08 /* Weeded out a declaration. */
+#define POSTPONED 0x10 /* (?1),(?&name), (??{...}) or similar */
#define REG_NODE_NUM(x) ((x) ? (int)((x)-RExC_emit_start) : -1)
if (RExC_whilem_seen > 15)
RExC_whilem_seen = 15;
-#ifdef DEBUGGING
- /* Make room for a sentinel value at the end of the program */
- RExC_size++;
-#endif
-
/* Allocate space and zero-initialize. Note, the two step process
of zeroing when in debug mode, thus anything assigned has to
happen after that */
RExC_npar = 1;
RExC_emit_start = ri->program;
RExC_emit = ri->program;
-#ifdef DEBUGGING
- /* put a sentinal on the end of the program so we can check for
- overwrites */
- ri->program[RExC_size].type = 255;
-#endif
+ RExC_emit_bound = ri->program + RExC_size + 1;
+
/* Store the count of eval-groups for security checks: */
RExC_rx->seen_evals = RExC_seen_evals;
REGC((U8)REG_MAGIC, (char*) RExC_emit++);
return(r);
}
-#undef CORE_ONLY_BLOCK
#undef RE_ENGINE_PTR
-#ifndef PERL_IN_XSUB_RE
+
SV*
-Perl_reg_named_buff_get(pTHX_ SV* namesv, const REGEXP * const from_re, U32 flags)
+Perl_reg_named_buff_get(pTHX_ const REGEXP * const rx, SV* namesv, U32 flags)
{
AV *retarray = NULL;
SV *ret;
if (flags & 1)
retarray=newAV();
-
- if (from_re || PL_curpm) {
- const REGEXP * const rx = from_re ? from_re : PM_GETRE(PL_curpm);
- if (rx && rx->paren_names) {
- HE *he_str = hv_fetch_ent( rx->paren_names, namesv, 0, 0 );
- if (he_str) {
- IV i;
- SV* sv_dat=HeVAL(he_str);
- I32 *nums=(I32*)SvPVX(sv_dat);
- for ( i=0; i<SvIVX(sv_dat); i++ ) {
- if ((I32)(rx->nparens) >= nums[i]
- && rx->startp[nums[i]] != -1
- && rx->endp[nums[i]] != -1)
- {
- ret = reg_numbered_buff_get(nums[i],rx,NULL,0);
- if (!retarray)
- return ret;
- } else {
- ret = newSVsv(&PL_sv_undef);
- }
- if (retarray) {
- SvREFCNT_inc(ret);
- av_push(retarray, ret);
- }
+
+ if (rx && rx->paren_names) {
+ HE *he_str = hv_fetch_ent( rx->paren_names, namesv, 0, 0 );
+ if (he_str) {
+ IV i;
+ SV* sv_dat=HeVAL(he_str);
+ I32 *nums=(I32*)SvPVX(sv_dat);
+ for ( i=0; i<SvIVX(sv_dat); i++ ) {
+ if ((I32)(rx->nparens) >= nums[i]
+ && rx->startp[nums[i]] != -1
+ && rx->endp[nums[i]] != -1)
+ {
+ ret = CALLREG_NUMBUF(rx,nums[i],NULL);
+ if (!retarray)
+ return ret;
+ } else {
+ ret = newSVsv(&PL_sv_undef);
+ }
+ if (retarray) {
+ SvREFCNT_inc(ret);
+ av_push(retarray, ret);
}
- if (retarray)
- return (SV*)retarray;
}
+ if (retarray)
+ return (SV*)retarray;
}
}
return NULL;
}
SV*
-Perl_reg_numbered_buff_get(pTHX_ I32 paren, const REGEXP * const rx, SV* usesv, U32 flags)
+Perl_reg_numbered_buff_get(pTHX_ const REGEXP * const rx, I32 paren, SV* usesv)
{
char *s = NULL;
I32 i = 0;
I32 s1, t1;
SV *sv = usesv ? usesv : newSVpvs("");
- PERL_UNUSED_ARG(flags);
if (!rx->subbeg) {
sv_setsv(sv,&PL_sv_undef);
}
return sv;
}
-#endif
+
/* Scans the name of a named buffer from the pattern.
* If flags is REG_RSN_RETURN_NULL returns null.
PerlIO_printf(Perl_debug_log,"%16s",""); \
\
if (SIZE_ONLY) \
- num=RExC_size; \
+ num = RExC_size + 1; \
else \
num=REG_NODE_NUM(RExC_emit); \
if (RExC_lastnum!=num) \
Perl_croak(aTHX_
"panic: paren_name hash element allocation failed");
} else if ( SvPOK(sv_dat) ) {
- IV count=SvIV(sv_dat);
- I32 *pv=(I32*)SvGROW(sv_dat,SvCUR(sv_dat)+sizeof(I32)+1);
- SvCUR_set(sv_dat,SvCUR(sv_dat)+sizeof(I32));
- pv[count]=RExC_npar;
- SvIVX(sv_dat)++;
+ /* (?|...) can mean we have dupes so scan to check
+ its already been stored. Maybe a flag indicating
+ we are inside such a construct would be useful,
+ but the arrays are likely to be quite small, so
+ for now we punt -- dmq */
+ IV count = SvIV(sv_dat);
+ I32 *pv = (I32*)SvPVX(sv_dat);
+ IV i;
+ for ( i = 0 ; i < count ; i++ ) {
+ if ( pv[i] == RExC_npar ) {
+ count = 0;
+ break;
+ }
+ }
+ if ( count ) {
+ pv = (I32*)SvGROW(sv_dat, SvCUR(sv_dat) + sizeof(I32)+1);
+ SvCUR_set(sv_dat, SvCUR(sv_dat) + sizeof(I32));
+ pv[count] = RExC_npar;
+ SvIVX(sv_dat)++;
+ }
} else {
(void)SvUPGRADE(sv_dat,SVt_PVNV);
sv_setpvn(sv_dat, (char *)&(RExC_npar), sizeof(I32));
if (*RExC_parse != ')')
FAIL("Sequence (?R) not terminated");
ret = reg_node(pRExC_state, GOSTART);
+ *flagp |= POSTPONED;
nextchar(pRExC_state);
return ret;
/*notreached*/
Set_Node_Length(ret, 1 + regarglen[OP(ret)]); /* MJD */
Set_Node_Offset(ret, parse_start); /* MJD */
+ *flagp |= POSTPONED;
nextchar(pRExC_state);
return ret;
} /* named and numeric backreferences */
vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
/*NOTREACHED*/
}
+ *flagp |= POSTPONED;
paren = *RExC_parse++;
/* FALL THROUGH */
case '{': /* (?{...}) */
}
else
REGTAIL(pRExC_state, ret, ender);
+ RExC_size++; /* XXX WHY do we need this?!!
+ For large programs it seems to be required
+ but I can't figure out why. -- dmq*/
return ret;
}
else {
if (!SIZE_ONLY ){
if (!RExC_nestroot)
RExC_nestroot = parno;
- if (RExC_seen & REG_SEEN_RECURSE) {
+ if (RExC_seen & REG_SEEN_RECURSE
+ && !RExC_open_parens[parno-1])
+ {
DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
"Setting open paren #%"IVdf" to %d\n",
(IV)parno, REG_NODE_NUM(ret)));
}
else if (paren != '?') /* Not Conditional */
ret = br;
- *flagp |= flags & (SPSTART | HASWIDTH);
+ *flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
lastbr = br;
while (*RExC_parse == '|') {
if (!SIZE_ONLY && RExC_extralen) {
return(NULL);
REGTAIL(pRExC_state, lastbr, br); /* BRANCH -> BRANCH. */
lastbr = br;
- if (flags&HASWIDTH)
- *flagp |= HASWIDTH;
- *flagp |= flags&SPSTART;
+ *flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
}
if (have_branch || paren != ':') {
}
else if (ret == NULL)
ret = latest;
- *flagp |= flags&HASWIDTH;
+ *flagp |= flags&(HASWIDTH|POSTPONED);
if (chain == NULL) /* First piece. */
*flagp |= flags&SPSTART;
else {
goto do_curly;
}
nest_check:
- if (!SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3 && ckWARN(WARN_REGEXP)) {
+ if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3 && ckWARN(WARN_REGEXP)) {
vWARN3(RExC_parse,
"%.*s matches null string many times",
(int)(RExC_parse >= origparse ? RExC_parse - origparse : 0),
}
return(NULL);
}
- *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE);
+ *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
break;
case '|':
case ')':
return ret;
/****** !SIZE_ONLY AFTER HERE *********/
- if( stored == 1 && value < 256
+ if( stored == 1 && (value < 128 || (value < 256 && !UTF))
&& !( ANYOF_FLAGS(ret) & ( ANYOF_FLAGS_ALL ^ ANYOF_FOLD ) )
) {
/* optimize single char class to an EXACT node
RExC_size += 1;
return(ret);
}
-#ifdef DEBUGGING
- if (OP(RExC_emit) == 255)
- Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %s: %d ",
- reg_name[op], OP(RExC_emit));
-#endif
+ if (RExC_emit >= RExC_emit_bound)
+ Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %d", op);
+
NODE_ALIGN_FILL(ret);
ptr = ret;
FILL_ADVANCE_NODE(ptr, op);
*/
return(ret);
}
-#ifdef DEBUGGING
- if (OP(RExC_emit) == 255)
- Perl_croak(aTHX_ "panic: reganode overwriting end of allocated program space");
-#endif
+ if (RExC_emit >= RExC_emit_bound)
+ Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %d", op);
+
NODE_ALIGN_FILL(ret);
ptr = ret;
FILL_ADVANCE_NODE_ARG(ptr, op, arg);
const int offset = regarglen[(U8)op];
const int size = NODE_STEP_REGNODE + offset;
GET_RE_DEBUG_FLAGS_DECL;
+ PERL_UNUSED_ARG(depth);
/* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
DEBUG_PARSE_FMT("inst"," - %s",reg_name[op]);
if (SIZE_ONLY) {
dst = RExC_emit;
if (RExC_open_parens) {
int paren;
- DEBUG_PARSE_FMT("inst"," - %"IVdf, (IV)RExC_npar);
+ /*DEBUG_PARSE_FMT("inst"," - %"IVdf, (IV)RExC_npar);*/
for ( paren=0 ; paren < RExC_npar ; paren++ ) {
if ( RExC_open_parens[paren] >= opnd ) {
- DEBUG_PARSE_FMT("open"," - %d",size);
+ /*DEBUG_PARSE_FMT("open"," - %d",size);*/
RExC_open_parens[paren] += size;
} else {
- DEBUG_PARSE_FMT("open"," - %s","ok");
+ /*DEBUG_PARSE_FMT("open"," - %s","ok");*/
}
if ( RExC_close_parens[paren] >= opnd ) {
- DEBUG_PARSE_FMT("close"," - %d",size);
+ /*DEBUG_PARSE_FMT("close"," - %d",size);*/
RExC_close_parens[paren] += size;
} else {
- DEBUG_PARSE_FMT("close"," - %s","ok");
+ /*DEBUG_PARSE_FMT("close"," - %s","ok");*/
}
}
}