#undef op
#endif /* op */
-static regnode regdummy;
-static char * regparse; /* Input-scan pointer. */
-static char * regxend; /* End of input for compile */
-static regnode * regcode; /* Code-emit pointer; ®dummy = don't. */
-static I32 regnaughty; /* How bad is this pattern? */
-static I32 regsawback; /* Did we see \1, ...? */
-
-/* This guys appear both in regcomp.c and regexec.c, but there is no
- other reason to have them global. */
-static char * regprecomp; /* uncompiled string. */
-static I32 regnpar; /* () count. */
-static I32 regsize; /* Code size. */
-static U16 regflags; /* are we folding, multilining? */
-
#ifdef MSDOS
# if defined(BUGGY_MSC6)
/* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */
* Forward declarations for pregcomp()'s friends.
*/
+#ifndef PERL_OBJECT
static regnode *reg _((I32, I32 *));
static regnode *reganode _((U8, U32));
static regnode *regatom _((I32 *));
static regnode *regpiece _((I32 *));
static void reginsert _((U8, regnode *));
static void regoptail _((regnode *, regnode *));
-static void regset _((char *, I32));
static void regtail _((regnode *, regnode *));
static char* regwhite _((char *, char *));
static char* nextchar _((void));
-
-static U32 regseen;
-static I32 seen_zerolen;
-static regexp *rx;
-static I32 extralen;
-
-#ifdef DEBUGGING
-static int colorset;
-char *colors[4];
-#endif
+static void re_croak2 _((const char* pat1,const char* pat2,...)) __attribute__((noreturn));
+#endif
/* Length of a variant. */
+#ifndef PERL_OBJECT
typedef struct {
I32 len_min;
I32 len_delta;
I32 offset_float_max;
I32 flags;
} scan_data_t;
+#endif
static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
#define SF_FIX_BEFORE_EOL (SF_FIX_BEFORE_SEOL|SF_FIX_BEFORE_MEOL)
#define SF_FL_BEFORE_EOL (SF_FL_BEFORE_SEOL|SF_FL_BEFORE_MEOL)
-#define SF_FIX_SHIFT_EOL (+2)
-#define SF_FL_SHIFT_EOL (+4)
+#ifdef NO_UNARY_PLUS
+# define SF_FIX_SHIFT_EOL (0+2)
+# define SF_FL_SHIFT_EOL (0+4)
+#else
+# define SF_FIX_SHIFT_EOL (+2)
+# define SF_FL_SHIFT_EOL (+4)
+#endif
#define SF_FIX_BEFORE_SEOL (SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL)
#define SF_FIX_BEFORE_MEOL (SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL)
#define SF_HAS_PAR 0x80
#define SF_IN_PAR 0x100
#define SF_HAS_EVAL 0x200
+#define SCF_DO_SUBSTR 0x400
-static void
+STATIC void
scan_commit(scan_data_t *data)
{
STRLEN l = SvCUR(data->last_found);
data->flags &= ~SF_BEFORE_EOL;
}
-#define SCF_DO_SUBSTR 1
-
/* Stops at toplevel WHILEM as well as at `last'. At end *scanp is set
to the position after last scanned or to NULL. */
-static I32
+STATIC I32
study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 flags)
/* scanp: Start here (read-write). */
/* deltap: Write maxlen-minlen here. */
scan = next;
if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
pars++;
- if (data_fake.flags & SF_HAS_EVAL)
+ if (data && (data_fake.flags & SF_HAS_EVAL))
data->flags |= SF_HAS_EVAL;
if (code == SUSPEND)
break;
data->longest = &(data->longest_float);
}
}
- if (fl & SF_HAS_EVAL)
+ if (data && (fl & SF_HAS_EVAL))
data->flags |= SF_HAS_EVAL;
optimize_curly_tail:
#ifdef REGALIGN
}
if (data && data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
pars++;
- if (data_fake.flags & SF_HAS_EVAL)
+ if (data && (data_fake.flags & SF_HAS_EVAL))
data->flags |= SF_HAS_EVAL;
} else if (OP(scan) == OPEN) {
pars++;
return min;
}
-static I32
+STATIC I32
add_data(I32 n, char *s)
{
if (rx->data) {
r->regstclass = NULL;
r->naughty = regnaughty >= 10; /* Probably an expensive pattern. */
scan = r->program + 1; /* First BRANCH. */
+
+ /* XXXX To minimize changes to RE engine we always allocate
+ 3-units-long substrs field. */
+ Newz(1004, r->substrs, 1, struct reg_substr_data);
+
if (OP(scan) != BRANCH) { /* Only one top-level choice. */
scan_data_t data;
I32 fake;
r->float_substr = data.longest_float;
r->float_min_offset = data.offset_float_min;
r->float_max_offset = data.offset_float_max;
- fbm_compile(r->float_substr);
+ fbm_compile(r->float_substr, 0);
BmUSEFUL(r->float_substr) = 100;
if (data.flags & SF_FL_BEFORE_EOL /* Cannot have SEOL and MULTI */
&& (!(data.flags & SF_FL_BEFORE_MEOL)
|| (regflags & PMf_MULTILINE)))) {
r->anchored_substr = data.longest_fixed;
r->anchored_offset = data.offset_fixed;
- fbm_compile(r->anchored_substr);
+ fbm_compile(r->anchored_substr, 0);
BmUSEFUL(r->anchored_substr) = 100;
if (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
&& (!(data.flags & SF_FIX_BEFORE_MEOL)
* is a trifle forced, but the need to tie the tails of the branches to what
* follows makes it hard to avoid.
*/
-static regnode *
+STATIC regnode *
reg(I32 paren, I32 *flagp)
/* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
{
rx->data->data[n+1] = (void*)av;
rx->data->data[n+2] = (void*)sop;
SvREFCNT_dec(sv);
+ } else { /* First pass */
+ if (tainted)
+ FAIL("Eval-group in insecure regular expression");
}
nextchar();
- if (tainted)
- FAIL("Eval-group in insecure regular expression");
return reganode(EVAL, n);
}
case '(':
break;
default:
--regparse;
- while (*regparse && strchr("iogcmsx", *regparse))
- pmflag(®flags, *regparse++);
+ while (*regparse && strchr("iogcmsx", *regparse)) {
+ if (*regparse != 'o')
+ pmflag(®flags, *regparse);
+ ++regparse;
+ }
unknown:
if (*regparse != ')')
FAIL2("Sequence (?%c...) not recognized", *regparse);
*
* Implements the concatenation operator.
*/
-static regnode *
+STATIC regnode *
regbranch(I32 *flagp, I32 first)
{
register regnode *ret;
* It might seem that this node could be dispensed with entirely, but the
* endmarker role is not redundant.
*/
-static regnode *
+STATIC regnode *
regpiece(I32 *flagp)
{
register regnode *ret;
*
* [Yes, it is worth fixing, some scripts can run twice the speed.]
*/
-static regnode *
+STATIC regnode *
regatom(I32 *flagp)
{
register regnode *ret = 0;
return(ret);
}
-static char *
+STATIC char *
regwhite(char *p, char *e)
{
while (p < e) {
return p;
}
-static void
-regset(char *opnd, register I32 c)
-{
- if (SIZE_ONLY)
- return;
- c &= 0xFF;
- opnd[1 + (c >> 3)] |= (1 << (c & 7));
-}
-
-static regnode *
+STATIC regnode *
regclass(void)
{
register char *opnd, *s;
while (regparse < regxend && *regparse != ']') {
skipcond:
Class = UCHARAT(regparse++);
+ if (Class == '[' && regparse + 1 < regxend &&
+ /* I smell either [: or [= or [. -- POSIX has been here, right? */
+ (*regparse == ':' || *regparse == '=' || *regparse == '.')) {
+ char posixccc = *regparse;
+ char* posixccs = regparse++;
+
+ while (regparse < regxend && *regparse != posixccc)
+ regparse++;
+ if (regparse == regxend)
+ /* Grandfather lone [:, [=, [. */
+ regparse = posixccs;
+ else {
+ regparse++; /* skip over the posixccc */
+ if (*regparse == ']') {
+ /* Not Implemented Yet.
+ * (POSIX Extended Character Classes, that is)
+ * The text between e.g. [: and :] would start
+ * at posixccs + 1 and stop at regparse - 2. */
+ if (dowarn && !SIZE_ONLY)
+ warn("Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc);
+ regparse++; /* skip over the ending ] */
+ }
+ }
+ }
if (Class == '\\') {
Class = UCHARAT(regparse++);
switch (Class) {
case 'w':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_ALNUML;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (isALNUM(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (isALNUM(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'W':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_NALNUML;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (!isALNUM(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (!isALNUM(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 's':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_SPACEL;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (isSPACE(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (isSPACE(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'S':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_NSPACEL;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (!isSPACE(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (!isSPACE(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'd':
- for (Class = '0'; Class <= '9'; Class++)
- regset(opnd, Class);
+ if (!SIZE_ONLY) {
+ for (Class = '0'; Class <= '9'; Class++)
+ ANYOF_SET(opnd, Class);
+ }
lastclass = 1234;
continue;
case 'D':
- for (Class = 0; Class < '0'; Class++)
- regset(opnd, Class);
- for (Class = '9' + 1; Class < 256; Class++)
- regset(opnd, Class);
+ if (!SIZE_ONLY) {
+ for (Class = 0; Class < '0'; Class++)
+ ANYOF_SET(opnd, Class);
+ for (Class = '9' + 1; Class < 256; Class++)
+ ANYOF_SET(opnd, Class);
+ }
lastclass = 1234;
continue;
case 'n':
continue; /* do it next time */
}
}
- for ( ; lastclass <= Class; lastclass++)
- regset(opnd, lastclass);
+ if (!SIZE_ONLY) {
+ for ( ; lastclass <= Class; lastclass++)
+ ANYOF_SET(opnd, lastclass);
+ }
lastclass = Class;
}
if (*regparse != ']')
FAIL("unmatched [] in regexp");
nextchar();
+ /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
+ if (!SIZE_ONLY && (*opnd & (0xFF ^ ANYOF_INVERT)) == ANYOF_FOLD) {
+ for (Class = 0; Class < 256; ++Class) {
+ if (ANYOF_TEST(opnd, Class)) {
+ I32 cf = fold[Class];
+ ANYOF_SET(opnd, cf);
+ }
+ }
+ *opnd &= ~ANYOF_FOLD;
+ }
+ /* optimize inverted simple patterns (e.g. [^a-z]) */
+ if (!SIZE_ONLY && (*opnd & 0xFF) == ANYOF_INVERT) {
+ for (Class = 0; Class < 32; ++Class)
+ opnd[1 + Class] ^= 0xFF;
+ *opnd = 0;
+ }
return ret;
}
-static char*
+STATIC char*
nextchar(void)
{
char* retval = regparse++;
/*
- reg_node - emit a node
*/
-static regnode * /* Location. */
-#ifdef CAN_PROTOTYPE
+STATIC regnode * /* Location. */
reg_node(U8 op)
-#else
-reg_node(op)
-U8 op;
-#endif
{
register regnode *ret;
register regnode *ptr;
/*
- reganode - emit a node with an argument
*/
-static regnode * /* Location. */
-#ifdef CAN_PROTOTYPE
+STATIC regnode * /* Location. */
reganode(U8 op, U32 arg)
-#else
-reganode(op, arg)
-U8 op;
-U32 arg;
-#endif
{
register regnode *ret;
register regnode *ptr;
/*
- regc - emit (if appropriate) a byte of code
*/
-#ifdef CAN_PROTOTYPE
-static void
+STATIC void
regc(U8 b, char* s)
-#else
-static void
-regc(b, s)
-U8 b;
-char *s;
-#endif
{
if (!SIZE_ONLY)
*s = b;
*
* Means relocating the operand.
*/
-#ifdef CAN_PROTOTYPE
-static void
+STATIC void
reginsert(U8 op, regnode *opnd)
-#else
-static void
-reginsert(op, opnd)
-U8 op;
-regnode *opnd;
-#endif
{
register regnode *src;
register regnode *dst;
/*
- regtail - set the next-pointer at the end of a node chain of p to val.
*/
-static void
+STATIC void
regtail(regnode *p, regnode *val)
{
register regnode *scan;
/*
- regoptail - regtail on operand of first argument; nop if operandless
*/
-static void
+STATIC void
regoptail(regnode *p, regnode *val)
{
/* "Operandless" and "op != BRANCH" are synonymous in practice. */
return TRUE;
}
-#ifdef DEBUGGING
-static regnode *
+STATIC regnode *
dumpuntil(regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
{
+#ifdef DEBUGGING
register char op = EXACT; /* Arbitrary non-END op. */
register regnode *next, *onode;
else if (op == WHILEM)
l--;
}
+#endif /* DEBUGGING */
return node;
}
void
regdump(regexp *r)
{
+#ifdef DEBUGGING
SV *sv = sv_newmortal();
(void)dumpuntil(r->program, r->program + 1, NULL, sv, 0);
PerlIO_printf(Perl_debug_log, "implicit ");
PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen);
PerlIO_printf(Perl_debug_log, "\n");
+#endif /* DEBUGGING */
}
/*
void
regprop(SV *sv, regnode *o)
{
+#ifdef DEBUGGING
register char *p = 0;
sv_setpv(sv, ":");
}
if (p)
sv_catpv(sv, p);
+#endif /* DEBUGGING */
}
-#endif /* DEBUGGING */
void
pregfree(struct regexp *r)
Safefree(r->precomp);
if (r->subbase)
Safefree(r->subbase);
- if (r->anchored_substr)
- SvREFCNT_dec(r->anchored_substr);
- if (r->float_substr)
- SvREFCNT_dec(r->float_substr);
+ if (r->substrs) {
+ if (r->anchored_substr)
+ SvREFCNT_dec(r->anchored_substr);
+ if (r->float_substr)
+ SvREFCNT_dec(r->float_substr);
+ Safefree(r->substrs);
+ }
if (r->data) {
int n = r->data->count;
while (--n >= 0) {
#endif
}
-#ifdef I_STDARG
-void
+STATIC void
re_croak2(const char* pat1,const char* pat2,...)
-#else
-/*VARARGS0*/
-void
-re_croak2(const char* pat1,const char* pat2, va_alist)
- const char* pat1;
- const char* pat2;
- va_dcl
-#endif
{
va_list args;
STRLEN l1 = strlen(pat1);
Copy(pat2, buf + l1, l2 , char);
buf[l1 + l2 + 1] = '\n';
buf[l1 + l2 + 2] = '\0';
-#ifdef I_STDARG
va_start(args, pat2);
-#else
- va_start(args);
-#endif
message = mess(buf, &args);
va_end(args);
l1 = strlen(message);
buf[l1] = '\0'; /* Overwrite \n */
croak("%s", buf);
}
-
-
-