static regnode *regpiece _((I32 *));
static void reginsert _((U8, regnode *));
static void regoptail _((regnode *, regnode *));
-static void regset _((char *, I32));
static void regtail _((regnode *, regnode *));
static char* regwhite _((char *, char *));
static char* nextchar _((void));
return p;
}
-static void
-regset(char *opnd, register I32 c)
-{
- if (SIZE_ONLY)
- return;
- c &= 0xFF;
- opnd[1 + (c >> 3)] |= (1 << (c & 7));
-}
-
static regnode *
regclass(void)
{
Class = UCHARAT(regparse++);
switch (Class) {
case 'w':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_ALNUML;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (isALNUM(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (isALNUM(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'W':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_NALNUML;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (!isALNUM(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (!isALNUM(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 's':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_SPACEL;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (isSPACE(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (isSPACE(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'S':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_NSPACEL;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (!isSPACE(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (!isSPACE(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'd':
- for (Class = '0'; Class <= '9'; Class++)
- regset(opnd, Class);
+ if (!SIZE_ONLY) {
+ for (Class = '0'; Class <= '9'; Class++)
+ ANYOF_SET(opnd, Class);
+ }
lastclass = 1234;
continue;
case 'D':
- for (Class = 0; Class < '0'; Class++)
- regset(opnd, Class);
- for (Class = '9' + 1; Class < 256; Class++)
- regset(opnd, Class);
+ if (!SIZE_ONLY) {
+ for (Class = 0; Class < '0'; Class++)
+ ANYOF_SET(opnd, Class);
+ for (Class = '9' + 1; Class < 256; Class++)
+ ANYOF_SET(opnd, Class);
+ }
lastclass = 1234;
continue;
case 'n':
continue; /* do it next time */
}
}
- for ( ; lastclass <= Class; lastclass++)
- regset(opnd, lastclass);
+ if (!SIZE_ONLY) {
+ for ( ; lastclass <= Class; lastclass++)
+ ANYOF_SET(opnd, lastclass);
+ }
lastclass = Class;
}
if (*regparse != ']')
FAIL("unmatched [] in regexp");
nextchar();
+ /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
+ if (!SIZE_ONLY && (*opnd & (0xFF ^ ANYOF_INVERT)) == ANYOF_FOLD) {
+ for (Class = 0; Class < 256; ++Class) {
+ if (ANYOF_TEST(opnd, Class)) {
+ I32 cf = fold[Class];
+ ANYOF_SET(opnd, cf);
+ }
+ }
+ *opnd &= ~ANYOF_FOLD;
+ }
+ /* optimize inverted simple patterns (e.g. [^a-z]) */
+ if (!SIZE_ONLY && (*opnd & 0xFF) == ANYOF_INVERT) {
+ for (Class = 0; Class < 32; ++Class)
+ opnd[1 + Class] ^= 0xFF;
+ *opnd = 0;
+ }
return ret;
}
#define ANYOF_SPACEL 0x02
#define ANYOF_NSPACEL 0x01
+/* Utility macros for bitmap of ANYOF */
+#define ANYOF_BYTE(p,c) (p)[1 + (((c) >> 3) & 31)]
+#define ANYOF_BIT(c) (1 << ((c) & 7))
+#define ANYOF_SET(p,c) (ANYOF_BYTE(p,c) |= ANYOF_BIT(c))
+#define ANYOF_CLEAR(p,c) (ANYOF_BYTE(p,c) &= ~ANYOF_BIT(c))
+#define ANYOF_TEST(p,c) (ANYOF_BYTE(p,c) & ANYOF_BIT(c))
+
#ifdef REGALIGN_STRUCT
#define ANY_SKIP ((33 - 1)/sizeof(regnode) + 1)
#else
static I32 regrepeat _((regnode *p, I32 max));
static I32 regrepeat_hard _((regnode *p, I32 max, I32 *lp));
static I32 regtry _((regexp *prog, char *startpos));
+
static bool reginclass _((char *p, I32 c));
static CHECKPOINT regcppush _((I32 parenfloor));
static char * regcppop _((void));
+#define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c))
static CHECKPOINT
regcppush(I32 parenfloor)
case ANYOF:
Class = (char *) OPERAND(c);
while (s < strend) {
- if (reginclass(Class, *s)) {
+ if (REGINCLASS(Class, *s)) {
if (tmp && regtry(prog, s))
goto got_it;
else
s = (char *) OPERAND(scan);
if (nextchar < 0)
nextchar = UCHARAT(locinput);
- if (!reginclass(s, nextchar))
+ if (!REGINCLASS(s, nextchar))
sayNO;
if (!nextchar && locinput >= regeol)
sayNO;
scan++;
break;
case ANYOF:
- while (scan < loceol && reginclass(opnd, *scan))
+ while (scan < loceol && REGINCLASS(opnd, *scan))
scan++;
break;
case ALNUM:
bool match = FALSE;
c &= 0xFF;
- if (p[1 + (c >> 3)] & (1 << (c & 7)))
+ if (ANYOF_TEST(p, c))
match = TRUE;
else if (flags & ANYOF_FOLD) {
I32 cf;
}
else
cf = fold[c];
- if (p[1 + (cf >> 3)] & (1 << (cf & 7)))
+ if (ANYOF_TEST(p, cf))
match = TRUE;
}
}
}
- return match ^ ((flags & ANYOF_INVERT) != 0);
+ return (flags & ANYOF_INVERT) ? !match : match;
}