#include "perl.h"
#include "regcomp.h"
-static char * reginput; /* String-input pointer. */
-static char * regbol; /* Beginning of input, for ^ check. */
-static char * regeol; /* End of input, for $ check. */
-static char ** regstartp; /* Pointer to startp array. */
-static char ** regendp; /* Ditto for endp. */
-static U32 * reglastparen; /* Similarly for lastparen. */
-static char * regtill; /* How far we are required to go. */
-static char regprev; /* char before regbol, \n if none */
-
-static char * regprecomp; /* uncompiled string. */
-static I32 regnpar; /* () count. */
-static I32 regsize; /* Largest OPEN seens. */
-static char ** reg_start_tmp;
-static U32 reg_start_tmpl;
-static struct reg_data *data;
-static char *bostr;
-
-static U32 reg_flags; /* tainted/warned */
-static I32 reg_eval_set;
-
#define RF_tainted 1 /* tainted information used? */
#define RF_warned 2 /* warned about big count? */
-#define RF_evaled 4 /* Did an EVAL? */
+#define RF_evaled 4 /* Did an EVAL with setting? */
+
+#define RS_init 1 /* eval environment created */
+#define RS_set 2 /* replsv value is set */
#ifndef STATIC
#define STATIC static
#endif
-#ifdef DEBUGGING
-static I32 regnarrate = 0;
-static regnode* regprogram = 0;
-#endif
-
-/* Current curly descriptor */
-typedef struct curcur CURCUR;
-struct curcur {
- int parenfloor; /* how far back to strip paren data */
- int cur; /* how many instances of scan we've matched */
- int min; /* the minimal number of scans to match */
- int max; /* the maximal number of scans to match */
- int minmod; /* whether to work our way up or down */
- regnode * scan; /* the thing to match */
- regnode * next; /* what has to match after it */
- char * lastloc; /* where we started matching this scan */
- CURCUR * oldcc; /* current curly before we started this one */
-};
-
-static CURCUR* regcc;
-
+#ifndef PERL_OBJECT
typedef I32 CHECKPOINT;
/*
static I32 regrepeat _((regnode *p, I32 max));
static I32 regrepeat_hard _((regnode *p, I32 max, I32 *lp));
static I32 regtry _((regexp *prog, char *startpos));
+
static bool reginclass _((char *p, I32 c));
static CHECKPOINT regcppush _((I32 parenfloor));
static char * regcppop _((void));
+#endif
+#define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c))
-static CHECKPOINT
+STATIC CHECKPOINT
regcppush(I32 parenfloor)
{
dTHR;
# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, " Setting an EVAL scope, savestack=%i\n", savestack_ix)); lastcp = savestack_ix
# define REGCP_UNWIND DEBUG_r(lastcp != savestack_ix ? PerlIO_printf(Perl_debug_log," Clearing an EVAL scope, savestack=%i..%i\n", lastcp, savestack_ix) : 0); regcpblow(lastcp)
-static char *
+STATIC char *
regcppop(void)
{
dTHR;
I32 end_shift = 0; /* Same for the end. */
I32 scream_pos = -1; /* Internal iterator of scream. */
char *scream_olds;
+ SV* oreplsv = GvSV(replgv);
cc.cur = 0;
cc.oldcc = 0;
case ANYOF:
Class = (char *) OPERAND(c);
while (s < strend) {
- if (reginclass(Class, *s)) {
+ if (REGINCLASS(Class, *s)) {
if (tmp && regtry(prog, s))
goto got_it;
else
}
}
}
+ /* Preserve the current value of $^R */
+ if (oreplsv != GvSV(replgv)) {
+ sv_setsv(oreplsv, GvSV(replgv));/* So that when GvSV(replgv) is
+ restored, the value remains
+ the same. */
+ }
return 1;
phooey:
/*
- regtry - try match at specific point
*/
-static I32 /* 0 failure, 1 success */
+STATIC I32 /* 0 failure, 1 success */
regtry(regexp *prog, char *startpos)
{
dTHR;
register char **ep;
CHECKPOINT lastcp;
+ if ((prog->reganch & ROPT_EVAL_SEEN) && !reg_eval_set) {
+ reg_eval_set = RS_init;
+ DEBUG_r(DEBUG_s(
+ PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n", stack_sp - stack_base);
+ ));
+ SAVEINT(cxstack[cxstack_ix].blk_oldsp);
+ cxstack[cxstack_ix].blk_oldsp = stack_sp - stack_base;
+ /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
+ SAVETMPS;
+ /* Apparently this is not needed, judging by wantarray. */
+ /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
+ cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
+ }
reginput = startpos;
regstartp = prog->startp;
regendp = prog->endp;
sp = prog->startp;
ep = prog->endp;
- data = prog->data;
+ regdata = prog->data;
if (prog->nparens) {
for (i = prog->nparens; i >= 0; i--) {
*sp++ = NULL;
}
}
REGCP_SET;
- if (regmatch(prog->program + 1) && reginput >= regtill) {
+ if (regmatch(prog->program + 1)) {
prog->startp[0] = startpos;
prog->endp[0] = reginput;
return 1;
* maybe save a little bit of pushing and popping on the stack. It also takes
* advantage of machines that use a register save mask on subroutine entry.
*/
-static I32 /* 0 failure, 1 success */
+STATIC I32 /* 0 failure, 1 success */
regmatch(regnode *prog)
{
dTHR;
register regnode *scan; /* Current node. */
regnode *next; /* Next node. */
regnode *inner; /* Next node in internal branch. */
- register I32 nextchar;
+ register I32 nextchr; /* renamed nextchr - nextchar colides with function of same name */
register I32 n; /* no or next */
register I32 ln; /* len or last */
register char *s; /* operand or save */
register I32 c1, c2, paren; /* case fold search, parenth */
int minmod = 0, sw = 0, logical = 0;
#ifdef DEBUGGING
- static int regindent = 0;
regindent++;
#endif
- nextchar = UCHARAT(locinput);
+ nextchr = UCHARAT(locinput);
scan = prog;
while (scan != NULL) {
#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
SvPVX(prop));
} );
-#ifdef REGALIGN
next = scan + NEXT_OFF(scan);
if (next == scan)
next = NULL;
-#else
- next = regnext(scan);
-#endif
switch (OP(scan)) {
case BOL:
if (locinput == regbol
? regprev == '\n'
: (multiline &&
- (nextchar || locinput < regeol) && locinput[-1] == '\n') )
+ (nextchr || locinput < regeol) && locinput[-1] == '\n') )
{
/* regtill = regbol; */
break;
case MBOL:
if (locinput == regbol
? regprev == '\n'
- : ((nextchar || locinput < regeol) && locinput[-1] == '\n') )
+ : ((nextchr || locinput < regeol) && locinput[-1] == '\n') )
{
break;
}
goto seol;
case MEOL:
meol:
- if ((nextchar || locinput < regeol) && nextchar != '\n')
+ if ((nextchr || locinput < regeol) && nextchr != '\n')
sayNO;
break;
case SEOL:
seol:
- if ((nextchar || locinput < regeol) && nextchar != '\n')
+ if ((nextchr || locinput < regeol) && nextchr != '\n')
sayNO;
if (regeol - locinput > 1)
sayNO;
break;
case SANY:
- if (!nextchar && locinput >= regeol)
+ if (!nextchr && locinput >= regeol)
sayNO;
- nextchar = UCHARAT(++locinput);
+ nextchr = UCHARAT(++locinput);
break;
case ANY:
- if (!nextchar && locinput >= regeol || nextchar == '\n')
+ if (!nextchr && locinput >= regeol || nextchr == '\n')
sayNO;
- nextchar = UCHARAT(++locinput);
+ nextchr = UCHARAT(++locinput);
break;
case EXACT:
s = (char *) OPERAND(scan);
ln = UCHARAT(s++);
/* Inline the first character, for speed. */
- if (UCHARAT(s) != nextchar)
+ if (UCHARAT(s) != nextchr)
sayNO;
if (regeol - locinput < ln)
sayNO;
if (ln > 1 && memNE(s, locinput, ln))
sayNO;
locinput += ln;
- nextchar = UCHARAT(locinput);
+ nextchr = UCHARAT(locinput);
break;
case EXACTFL:
reg_flags |= RF_tainted;
s = (char *) OPERAND(scan);
ln = UCHARAT(s++);
/* Inline the first character, for speed. */
- if (UCHARAT(s) != nextchar &&
+ if (UCHARAT(s) != nextchr &&
UCHARAT(s) != ((OP(scan) == EXACTF)
- ? fold : fold_locale)[nextchar])
+ ? fold : fold_locale)[nextchr])
sayNO;
if (regeol - locinput < ln)
sayNO;
: ibcmp_locale(s, locinput, ln)))
sayNO;
locinput += ln;
- nextchar = UCHARAT(locinput);
+ nextchr = UCHARAT(locinput);
break;
case ANYOF:
s = (char *) OPERAND(scan);
- if (nextchar < 0)
- nextchar = UCHARAT(locinput);
- if (!reginclass(s, nextchar))
+ if (nextchr < 0)
+ nextchr = UCHARAT(locinput);
+ if (!REGINCLASS(s, nextchr))
sayNO;
- if (!nextchar && locinput >= regeol)
+ if (!nextchr && locinput >= regeol)
sayNO;
- nextchar = UCHARAT(++locinput);
+ nextchr = UCHARAT(++locinput);
break;
case ALNUML:
reg_flags |= RF_tainted;
/* FALL THROUGH */
case ALNUM:
- if (!nextchar)
+ if (!nextchr)
sayNO;
if (!(OP(scan) == ALNUM
- ? isALNUM(nextchar) : isALNUM_LC(nextchar)))
+ ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
sayNO;
- nextchar = UCHARAT(++locinput);
+ nextchr = UCHARAT(++locinput);
break;
case NALNUML:
reg_flags |= RF_tainted;
/* FALL THROUGH */
case NALNUM:
- if (!nextchar && locinput >= regeol)
+ if (!nextchr && locinput >= regeol)
sayNO;
if (OP(scan) == NALNUM
- ? isALNUM(nextchar) : isALNUM_LC(nextchar))
+ ? isALNUM(nextchr) : isALNUM_LC(nextchr))
sayNO;
- nextchar = UCHARAT(++locinput);
+ nextchr = UCHARAT(++locinput);
break;
case BOUNDL:
case NBOUNDL:
ln = (locinput != regbol) ? UCHARAT(locinput - 1) : regprev;
if (OP(scan) == BOUND || OP(scan) == NBOUND) {
ln = isALNUM(ln);
- n = isALNUM(nextchar);
+ n = isALNUM(nextchr);
}
else {
ln = isALNUM_LC(ln);
- n = isALNUM_LC(nextchar);
+ n = isALNUM_LC(nextchr);
}
if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
sayNO;
reg_flags |= RF_tainted;
/* FALL THROUGH */
case SPACE:
- if (!nextchar && locinput >= regeol)
+ if (!nextchr && locinput >= regeol)
sayNO;
if (!(OP(scan) == SPACE
- ? isSPACE(nextchar) : isSPACE_LC(nextchar)))
+ ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
sayNO;
- nextchar = UCHARAT(++locinput);
+ nextchr = UCHARAT(++locinput);
break;
case NSPACEL:
reg_flags |= RF_tainted;
/* FALL THROUGH */
case NSPACE:
- if (!nextchar)
+ if (!nextchr)
sayNO;
if (OP(scan) == SPACE
- ? isSPACE(nextchar) : isSPACE_LC(nextchar))
+ ? isSPACE(nextchr) : isSPACE_LC(nextchr))
sayNO;
- nextchar = UCHARAT(++locinput);
+ nextchr = UCHARAT(++locinput);
break;
case DIGIT:
- if (!isDIGIT(nextchar))
+ if (!isDIGIT(nextchr))
sayNO;
- nextchar = UCHARAT(++locinput);
+ nextchr = UCHARAT(++locinput);
break;
case NDIGIT:
- if (!nextchar && locinput >= regeol)
+ if (!nextchr && locinput >= regeol)
sayNO;
- if (isDIGIT(nextchar))
+ if (isDIGIT(nextchr))
sayNO;
- nextchar = UCHARAT(++locinput);
+ nextchr = UCHARAT(++locinput);
break;
case REFFL:
reg_flags |= RF_tainted;
if (s == regendp[n])
break;
/* Inline the first character, for speed. */
- if (UCHARAT(s) != nextchar &&
+ if (UCHARAT(s) != nextchr &&
(OP(scan) == REF ||
(UCHARAT(s) != ((OP(scan) == REFF
- ? fold : fold_locale)[nextchar]))))
+ ? fold : fold_locale)[nextchr]))))
sayNO;
ln = regendp[n] - s;
if (locinput + ln > regeol)
: ibcmp_locale(s, locinput, ln))))
sayNO;
locinput += ln;
- nextchar = UCHARAT(locinput);
+ nextchr = UCHARAT(locinput);
break;
case NOTHING:
SV *ret;
n = ARG(scan);
- op = (OP_4tree*)data->data[n];
+ op = (OP_4tree*)regdata->data[n];
DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", op) );
- curpad = AvARRAY((AV*)data->data[n + 1]);
- if (!reg_eval_set) {
- /* Preserve whatever is on stack now, otherwise
- OP_NEXTSTATE will overwrite it. */
- SAVEINT(reg_eval_set); /* Protect against unwinding. */
- reg_eval_set = 1;
- DEBUG_r(DEBUG_s(
- PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n", stack_sp - stack_base);
- ));
- SAVEINT(cxstack[cxstack_ix].blk_oldsp);
- cxstack[cxstack_ix].blk_oldsp = stack_sp - stack_base;
- /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
- SAVETMPS;
- /* Apparently this is not needed, judging by wantarray. */
- /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
- cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
- }
+ curpad = AvARRAY((AV*)regdata->data[n + 1]);
- runops(); /* Scalar context. */
+ CALLRUNOPS(); /* Scalar context. */
SPAGAIN;
ret = POPs;
PUTBACK;
if (logical) {
logical = 0;
sw = SvTRUE(ret);
- }
+ } else
+ sv_setsv(save_scalar(replgv), ret);
op = oop;
curpad = ocurpad;
curcop = ocurcop;
regendp[n] = 0;
*reglastparen = n;
scan = next;
-#ifdef REGALIGN
/*SUPPRESS 560*/
if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
next += n;
else
next = NULL;
-#else
- next = regnext(next);
-#endif
inner = NEXTOPER(scan);
if (c1 == BRANCHJ) {
inner = NEXTOPER(inner);
break;
case CURLYM:
{
- I32 l;
+ I32 l = 0;
CHECKPOINT lastcp;
/* We suppose that the next guy does not need
and has no parenths to influence future backrefs. */
ln = ARG1(scan); /* min to match */
n = ARG2(scan); /* max to match */
-#ifdef REGALIGN_STRUCT
paren = scan->flags;
if (paren) {
if (paren > regsize)
if (paren > *reglastparen)
*reglastparen = paren;
}
-#endif
- scan = NEXTOPER(scan) + 4/sizeof(regnode);
+ scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
if (paren)
scan += NEXT_OFF(scan); /* Skip former OPEN. */
reginput = locinput;
minmod = 0;
if (ln && regrepeat_hard(scan, ln, &l) < ln)
sayNO;
- if (l == 0 && n >= ln
+ if (ln && l == 0 && n >= ln
/* In fact, this is tricky. If paren, then the
fact that we did/didnot match may influence
future execution. */
} else
c1 = c2 = -1000;
REGCP_SET;
+ /* This may be improved if l == 0. */
while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
/* If it could work, try it. */
if (c1 == -1000 ||
*reglastparen = paren;
ln = ARG1(scan); /* min to match */
n = ARG2(scan); /* max to match */
- scan = regnext(NEXTOPER(scan) + 4/sizeof(regnode));
+ scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
goto repeat;
case CURLY:
paren = 0;
ln = ARG1(scan); /* min to match */
n = ARG2(scan); /* max to match */
- scan = NEXTOPER(scan) + 4/sizeof(regnode);
+ scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
goto repeat;
case STAR:
ln = 0;
}
sayNO;
break;
- case SUCCEED:
case END:
+ if (locinput < regtill)
+ sayNO; /* Cannot match: too short. */
+ /* Fall through */
+ case SUCCEED:
reginput = locinput; /* put where regtry can find it */
sayYES; /* Success! */
case SUSPEND:
}
if (OP(scan) == SUSPEND) {
locinput = reginput;
- nextchar = UCHARAT(locinput);
+ nextchr = UCHARAT(locinput);
}
/* FALL THROUGH. */
case LONGJMP:
* That was true before, but now we assume scan - reginput is the count,
* rather than incrementing count on every character.]
*/
-static I32
+STATIC I32
regrepeat(regnode *p, I32 max)
{
register char *scan;
scan++;
break;
case ANYOF:
- while (scan < loceol && reginclass(opnd, *scan))
+ while (scan < loceol && REGINCLASS(opnd, *scan))
scan++;
break;
case ALNUM:
* The repeater is supposed to have constant length.
*/
-static I32
+STATIC I32
regrepeat_hard(regnode *p, I32 max, I32 *lp)
{
register char *scan;
- regclass - determine if a character falls into a character class
*/
-static bool
+STATIC bool
reginclass(register char *p, register I32 c)
{
char flags = *p;
bool match = FALSE;
c &= 0xFF;
- if (p[1 + (c >> 3)] & (1 << (c & 7)))
+ if (ANYOF_TEST(p, c))
match = TRUE;
else if (flags & ANYOF_FOLD) {
I32 cf;
}
else
cf = fold[c];
- if (p[1 + (cf >> 3)] & (1 << (cf & 7)))
+ if (ANYOF_TEST(p, cf))
match = TRUE;
}
}
}
- return match ^ ((flags & ANYOF_INVERT) != 0);
+ return (flags & ANYOF_INVERT) ? !match : match;
}