static regnode *regpiece _((I32 *));
static void reginsert _((U8, regnode *));
static void regoptail _((regnode *, regnode *));
-static void regset _((char *, I32));
static void regtail _((regnode *, regnode *));
static char* regwhite _((char *, char *));
static char* nextchar _((void));
+static void re_croak2 _((const char* pat1,const char* pat2,...)) __attribute__((noreturn));
static U32 regseen;
static I32 seen_zerolen;
#ifdef DEBUGGING
static int colorset;
-char *colors[4];
#endif
/* Length of a variant. */
#define SF_FIX_BEFORE_EOL (SF_FIX_BEFORE_SEOL|SF_FIX_BEFORE_MEOL)
#define SF_FL_BEFORE_EOL (SF_FL_BEFORE_SEOL|SF_FL_BEFORE_MEOL)
-#define SF_FIX_SHIFT_EOL (+2)
-#define SF_FL_SHIFT_EOL (+4)
+#ifdef NO_UNARY_PLUS
+# define SF_FIX_SHIFT_EOL (0+2)
+# define SF_FL_SHIFT_EOL (0+4)
+#else
+# define SF_FIX_SHIFT_EOL (+2)
+# define SF_FL_SHIFT_EOL (+4)
+#endif
#define SF_FIX_BEFORE_SEOL (SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL)
#define SF_FIX_BEFORE_MEOL (SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL)
r->regstclass = NULL;
r->naughty = regnaughty >= 10; /* Probably an expensive pattern. */
scan = r->program + 1; /* First BRANCH. */
+
+ /* XXXX To minimize changes to RE engine we always allocate
+ 3-units-long substrs field. */
+ Newz(1004, r->substrs, 1, struct reg_substr_data);
+
if (OP(scan) != BRANCH) { /* Only one top-level choice. */
scan_data_t data;
I32 fake;
r->float_substr = data.longest_float;
r->float_min_offset = data.offset_float_min;
r->float_max_offset = data.offset_float_max;
- fbm_compile(r->float_substr);
+ fbm_compile(r->float_substr, 0);
BmUSEFUL(r->float_substr) = 100;
if (data.flags & SF_FL_BEFORE_EOL /* Cannot have SEOL and MULTI */
&& (!(data.flags & SF_FL_BEFORE_MEOL)
|| (regflags & PMf_MULTILINE)))) {
r->anchored_substr = data.longest_fixed;
r->anchored_offset = data.offset_fixed;
- fbm_compile(r->anchored_substr);
+ fbm_compile(r->anchored_substr, 0);
BmUSEFUL(r->anchored_substr) = 100;
if (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
&& (!(data.flags & SF_FIX_BEFORE_MEOL)
return p;
}
-static void
-regset(char *opnd, register I32 c)
-{
- if (SIZE_ONLY)
- return;
- c &= 0xFF;
- opnd[1 + (c >> 3)] |= (1 << (c & 7));
-}
-
static regnode *
regclass(void)
{
Class = UCHARAT(regparse++);
switch (Class) {
case 'w':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_ALNUML;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (isALNUM(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (isALNUM(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'W':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_NALNUML;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (!isALNUM(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (!isALNUM(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 's':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_SPACEL;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (isSPACE(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (isSPACE(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'S':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_NSPACEL;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (!isSPACE(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (!isSPACE(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'd':
- for (Class = '0'; Class <= '9'; Class++)
- regset(opnd, Class);
+ if (!SIZE_ONLY) {
+ for (Class = '0'; Class <= '9'; Class++)
+ ANYOF_SET(opnd, Class);
+ }
lastclass = 1234;
continue;
case 'D':
- for (Class = 0; Class < '0'; Class++)
- regset(opnd, Class);
- for (Class = '9' + 1; Class < 256; Class++)
- regset(opnd, Class);
+ if (!SIZE_ONLY) {
+ for (Class = 0; Class < '0'; Class++)
+ ANYOF_SET(opnd, Class);
+ for (Class = '9' + 1; Class < 256; Class++)
+ ANYOF_SET(opnd, Class);
+ }
lastclass = 1234;
continue;
case 'n':
continue; /* do it next time */
}
}
- for ( ; lastclass <= Class; lastclass++)
- regset(opnd, lastclass);
+ if (!SIZE_ONLY) {
+ for ( ; lastclass <= Class; lastclass++)
+ ANYOF_SET(opnd, lastclass);
+ }
lastclass = Class;
}
if (*regparse != ']')
FAIL("unmatched [] in regexp");
nextchar();
+ /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
+ if (!SIZE_ONLY && (*opnd & (0xFF ^ ANYOF_INVERT)) == ANYOF_FOLD) {
+ for (Class = 0; Class < 256; ++Class) {
+ if (ANYOF_TEST(opnd, Class)) {
+ I32 cf = fold[Class];
+ ANYOF_SET(opnd, cf);
+ }
+ }
+ *opnd &= ~ANYOF_FOLD;
+ }
+ /* optimize inverted simple patterns (e.g. [^a-z]) */
+ if (!SIZE_ONLY && (*opnd & 0xFF) == ANYOF_INVERT) {
+ for (Class = 0; Class < 32; ++Class)
+ opnd[1 + Class] ^= 0xFF;
+ *opnd = 0;
+ }
return ret;
}
Safefree(r->precomp);
if (r->subbase)
Safefree(r->subbase);
- if (r->anchored_substr)
- SvREFCNT_dec(r->anchored_substr);
- if (r->float_substr)
- SvREFCNT_dec(r->float_substr);
+ if (r->substrs) {
+ if (r->anchored_substr)
+ SvREFCNT_dec(r->anchored_substr);
+ if (r->float_substr)
+ SvREFCNT_dec(r->float_substr);
+ Safefree(r->substrs);
+ }
if (r->data) {
int n = r->data->count;
while (--n >= 0) {
}
#ifdef I_STDARG
-void
+static void
re_croak2(const char* pat1,const char* pat2,...)
#else
/*VARARGS0*/
-void
+static void
re_croak2(const char* pat1,const char* pat2, va_alist)
const char* pat1;
const char* pat2;