* Forward declarations for pregcomp()'s friends.
*/
-static char* regwhite _((char *, char *));
#ifndef PERL_OBJECT
static regnode *reg _((I32, I32 *));
static regnode *reganode _((U8, U32));
static regnode *regpiece _((I32 *));
static void reginsert _((U8, regnode *));
static void regoptail _((regnode *, regnode *));
-static void regset _((char *, I32));
static void regtail _((regnode *, regnode *));
+static char* regwhite _((char *, char *));
static char* nextchar _((void));
+static void re_croak2 _((const char* pat1,const char* pat2,...)) __attribute__((noreturn));
#endif
/* Length of a variant. */
#define SF_FIX_BEFORE_EOL (SF_FIX_BEFORE_SEOL|SF_FIX_BEFORE_MEOL)
#define SF_FL_BEFORE_EOL (SF_FL_BEFORE_SEOL|SF_FL_BEFORE_MEOL)
-#define SF_FIX_SHIFT_EOL (+2)
-#define SF_FL_SHIFT_EOL (+4)
+#ifdef NO_UNARY_PLUS
+# define SF_FIX_SHIFT_EOL (0+2)
+# define SF_FL_SHIFT_EOL (0+4)
+#else
+# define SF_FIX_SHIFT_EOL (+2)
+# define SF_FL_SHIFT_EOL (+4)
+#endif
#define SF_FIX_BEFORE_SEOL (SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL)
#define SF_FIX_BEFORE_MEOL (SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL)
r->regstclass = NULL;
r->naughty = regnaughty >= 10; /* Probably an expensive pattern. */
scan = r->program + 1; /* First BRANCH. */
+
+ /* XXXX To minimize changes to RE engine we always allocate
+ 3-units-long substrs field. */
+ Newz(1004, r->substrs, 1, struct reg_substr_data);
+
if (OP(scan) != BRANCH) { /* Only one top-level choice. */
scan_data_t data;
I32 fake;
r->float_substr = data.longest_float;
r->float_min_offset = data.offset_float_min;
r->float_max_offset = data.offset_float_max;
- fbm_compile(r->float_substr);
+ fbm_compile(r->float_substr, 0);
BmUSEFUL(r->float_substr) = 100;
if (data.flags & SF_FL_BEFORE_EOL /* Cannot have SEOL and MULTI */
&& (!(data.flags & SF_FL_BEFORE_MEOL)
|| (regflags & PMf_MULTILINE)))) {
r->anchored_substr = data.longest_fixed;
r->anchored_offset = data.offset_fixed;
- fbm_compile(r->anchored_substr);
+ fbm_compile(r->anchored_substr, 0);
BmUSEFUL(r->anchored_substr) = 100;
if (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
&& (!(data.flags & SF_FIX_BEFORE_MEOL)
break;
default:
--regparse;
- while (*regparse && strchr("iogcmsx", *regparse))
- pmflag(®flags, *regparse++);
+ while (*regparse && strchr("iogcmsx", *regparse)) {
+ if (*regparse != 'o')
+ pmflag(®flags, *regparse);
+ ++regparse;
+ }
unknown:
if (*regparse != ')')
FAIL2("Sequence (?%c...) not recognized", *regparse);
return(ret);
}
-static char *
+STATIC char *
regwhite(char *p, char *e)
{
while (p < e) {
return p;
}
-STATIC void
-regset(char *opnd, register I32 c)
-{
- if (SIZE_ONLY)
- return;
- c &= 0xFF;
- opnd[1 + (c >> 3)] |= (1 << (c & 7));
-}
-
STATIC regnode *
regclass(void)
{
Class = UCHARAT(regparse++);
switch (Class) {
case 'w':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_ALNUML;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (isALNUM(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (isALNUM(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'W':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_NALNUML;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (!isALNUM(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (!isALNUM(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 's':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_SPACEL;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (isSPACE(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (isSPACE(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'S':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_NSPACEL;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (!isSPACE(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (!isSPACE(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'd':
- for (Class = '0'; Class <= '9'; Class++)
- regset(opnd, Class);
+ if (!SIZE_ONLY) {
+ for (Class = '0'; Class <= '9'; Class++)
+ ANYOF_SET(opnd, Class);
+ }
lastclass = 1234;
continue;
case 'D':
- for (Class = 0; Class < '0'; Class++)
- regset(opnd, Class);
- for (Class = '9' + 1; Class < 256; Class++)
- regset(opnd, Class);
+ if (!SIZE_ONLY) {
+ for (Class = 0; Class < '0'; Class++)
+ ANYOF_SET(opnd, Class);
+ for (Class = '9' + 1; Class < 256; Class++)
+ ANYOF_SET(opnd, Class);
+ }
lastclass = 1234;
continue;
case 'n':
continue; /* do it next time */
}
}
- for ( ; lastclass <= Class; lastclass++)
- regset(opnd, lastclass);
+ if (!SIZE_ONLY) {
+ for ( ; lastclass <= Class; lastclass++)
+ ANYOF_SET(opnd, lastclass);
+ }
lastclass = Class;
}
if (*regparse != ']')
FAIL("unmatched [] in regexp");
nextchar();
+ /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
+ if (!SIZE_ONLY && (*opnd & (0xFF ^ ANYOF_INVERT)) == ANYOF_FOLD) {
+ for (Class = 0; Class < 256; ++Class) {
+ if (ANYOF_TEST(opnd, Class)) {
+ I32 cf = fold[Class];
+ ANYOF_SET(opnd, cf);
+ }
+ }
+ *opnd &= ~ANYOF_FOLD;
+ }
+ /* optimize inverted simple patterns (e.g. [^a-z]) */
+ if (!SIZE_ONLY && (*opnd & 0xFF) == ANYOF_INVERT) {
+ for (Class = 0; Class < 32; ++Class)
+ opnd[1 + Class] ^= 0xFF;
+ *opnd = 0;
+ }
return ret;
}
return TRUE;
}
-#ifdef DEBUGGING
STATIC regnode *
dumpuntil(regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
{
+#ifdef DEBUGGING
register char op = EXACT; /* Arbitrary non-END op. */
register regnode *next, *onode;
else if (op == WHILEM)
l--;
}
+#endif /* DEBUGGING */
return node;
}
void
regdump(regexp *r)
{
+#ifdef DEBUGGING
SV *sv = sv_newmortal();
(void)dumpuntil(r->program, r->program + 1, NULL, sv, 0);
PerlIO_printf(Perl_debug_log, "implicit ");
PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen);
PerlIO_printf(Perl_debug_log, "\n");
+#endif /* DEBUGGING */
}
/*
void
regprop(SV *sv, regnode *o)
{
+#ifdef DEBUGGING
register char *p = 0;
sv_setpv(sv, ":");
}
if (p)
sv_catpv(sv, p);
+#endif /* DEBUGGING */
}
-#endif /* DEBUGGING */
void
pregfree(struct regexp *r)
Safefree(r->precomp);
if (r->subbase)
Safefree(r->subbase);
- if (r->anchored_substr)
- SvREFCNT_dec(r->anchored_substr);
- if (r->float_substr)
- SvREFCNT_dec(r->float_substr);
+ if (r->substrs) {
+ if (r->anchored_substr)
+ SvREFCNT_dec(r->anchored_substr);
+ if (r->float_substr)
+ SvREFCNT_dec(r->float_substr);
+ Safefree(r->substrs);
+ }
if (r->data) {
int n = r->data->count;
while (--n >= 0) {
#endif
}
-#ifdef I_STDARG
-void
+STATIC void
re_croak2(const char* pat1,const char* pat2,...)
-#else
-/*VARARGS0*/
-void
-re_croak2(const char* pat1,const char* pat2, va_alist)
- const char* pat1;
- const char* pat2;
- va_dcl
-#endif
{
va_list args;
STRLEN l1 = strlen(pat1);
Copy(pat2, buf + l1, l2 , char);
buf[l1 + l2 + 1] = '\n';
buf[l1 + l2 + 2] = '\0';
-#ifdef I_STDARG
va_start(args, pat2);
-#else
- va_start(args);
-#endif
message = mess(buf, &args);
va_end(args);
l1 = strlen(message);