X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=b6d8b01c3fef450895223f83b90e6bc5fa576e55;hb=9e5a0491dfd6742194347115bac72d779c95ac13;hp=ae2b02ee75e06108fa57ad3b71378fa62d172d5c;hpb=fbbbcc485c1d03c76a91f998e1e4492c8ad56b38;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index ae2b02e..b6d8b01 100644 --- a/regcomp.c +++ b/regcomp.c @@ -102,7 +102,6 @@ * Forward declarations for pregcomp()'s friends. */ -static char* regwhite _((char *, char *)); #ifndef PERL_OBJECT static regnode *reg _((I32, I32 *)); static regnode *reganode _((U8, U32)); @@ -115,9 +114,10 @@ static regnode *reg_node _((U8)); static regnode *regpiece _((I32 *)); static void reginsert _((U8, regnode *)); static void regoptail _((regnode *, regnode *)); -static void regset _((char *, I32)); static void regtail _((regnode *, regnode *)); +static char* regwhite _((char *, char *)); static char* nextchar _((void)); +static void re_croak2 _((const char* pat1,const char* pat2,...)) __attribute__((noreturn)); #endif /* Length of a variant. */ @@ -150,8 +150,13 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #define SF_FIX_BEFORE_EOL (SF_FIX_BEFORE_SEOL|SF_FIX_BEFORE_MEOL) #define SF_FL_BEFORE_EOL (SF_FL_BEFORE_SEOL|SF_FL_BEFORE_MEOL) -#define SF_FIX_SHIFT_EOL (+2) -#define SF_FL_SHIFT_EOL (+4) +#ifdef NO_UNARY_PLUS +# define SF_FIX_SHIFT_EOL (0+2) +# define SF_FL_SHIFT_EOL (0+4) +#else +# define SF_FIX_SHIFT_EOL (+2) +# define SF_FL_SHIFT_EOL (+4) +#endif #define SF_FIX_BEFORE_SEOL (SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL) #define SF_FIX_BEFORE_MEOL (SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL) @@ -791,6 +796,11 @@ pregcomp(char *exp, char *xend, PMOP *pm) r->regstclass = NULL; r->naughty = regnaughty >= 10; /* Probably an expensive pattern. */ scan = r->program + 1; /* First BRANCH. */ + + /* XXXX To minimize changes to RE engine we always allocate + 3-units-long substrs field. */ + Newz(1004, r->substrs, 1, struct reg_substr_data); + if (OP(scan) != BRANCH) { /* Only one top-level choice. */ scan_data_t data; I32 fake; @@ -885,7 +895,7 @@ pregcomp(char *exp, char *xend, PMOP *pm) r->float_substr = data.longest_float; r->float_min_offset = data.offset_float_min; r->float_max_offset = data.offset_float_max; - fbm_compile(r->float_substr); + fbm_compile(r->float_substr, 0); BmUSEFUL(r->float_substr) = 100; if (data.flags & SF_FL_BEFORE_EOL /* Cannot have SEOL and MULTI */ && (!(data.flags & SF_FL_BEFORE_MEOL) @@ -905,7 +915,7 @@ pregcomp(char *exp, char *xend, PMOP *pm) || (regflags & PMf_MULTILINE)))) { r->anchored_substr = data.longest_fixed; r->anchored_offset = data.offset_fixed; - fbm_compile(r->anchored_substr); + fbm_compile(r->anchored_substr, 0); BmUSEFUL(r->anchored_substr) = 100; if (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */ && (!(data.flags & SF_FIX_BEFORE_MEOL) @@ -1106,8 +1116,11 @@ reg(I32 paren, I32 *flagp) break; default: --regparse; - while (*regparse && strchr("iogcmsx", *regparse)) - pmflag(®flags, *regparse++); + while (*regparse && strchr("iogcmsx", *regparse)) { + if (*regparse != 'o') + pmflag(®flags, *regparse); + ++regparse; + } unknown: if (*regparse != ')') FAIL2("Sequence (?%c...) not recognized", *regparse); @@ -1784,7 +1797,7 @@ tryagain: return(ret); } -static char * +STATIC char * regwhite(char *p, char *e) { while (p < e) { @@ -1801,15 +1814,6 @@ regwhite(char *p, char *e) return p; } -STATIC void -regset(char *opnd, register I32 c) -{ - if (SIZE_ONLY) - return; - c &= 0xFF; - opnd[1 + (c >> 3)] |= (1 << (c & 7)); -} - STATIC regnode * regclass(void) { @@ -1873,63 +1877,67 @@ regclass(void) Class = UCHARAT(regparse++); switch (Class) { case 'w': - if (regflags & PMf_LOCALE) { - if (!SIZE_ONLY) + if (!SIZE_ONLY) { + if (regflags & PMf_LOCALE) *opnd |= ANYOF_ALNUML; - } - else { - for (Class = 0; Class < 256; Class++) - if (isALNUM(Class)) - regset(opnd, Class); + else { + for (Class = 0; Class < 256; Class++) + if (isALNUM(Class)) + ANYOF_SET(opnd, Class); + } } lastclass = 1234; continue; case 'W': - if (regflags & PMf_LOCALE) { - if (!SIZE_ONLY) + if (!SIZE_ONLY) { + if (regflags & PMf_LOCALE) *opnd |= ANYOF_NALNUML; - } - else { - for (Class = 0; Class < 256; Class++) - if (!isALNUM(Class)) - regset(opnd, Class); + else { + for (Class = 0; Class < 256; Class++) + if (!isALNUM(Class)) + ANYOF_SET(opnd, Class); + } } lastclass = 1234; continue; case 's': - if (regflags & PMf_LOCALE) { - if (!SIZE_ONLY) + if (!SIZE_ONLY) { + if (regflags & PMf_LOCALE) *opnd |= ANYOF_SPACEL; - } - else { - for (Class = 0; Class < 256; Class++) - if (isSPACE(Class)) - regset(opnd, Class); + else { + for (Class = 0; Class < 256; Class++) + if (isSPACE(Class)) + ANYOF_SET(opnd, Class); + } } lastclass = 1234; continue; case 'S': - if (regflags & PMf_LOCALE) { - if (!SIZE_ONLY) + if (!SIZE_ONLY) { + if (regflags & PMf_LOCALE) *opnd |= ANYOF_NSPACEL; - } - else { - for (Class = 0; Class < 256; Class++) - if (!isSPACE(Class)) - regset(opnd, Class); + else { + for (Class = 0; Class < 256; Class++) + if (!isSPACE(Class)) + ANYOF_SET(opnd, Class); + } } lastclass = 1234; continue; case 'd': - for (Class = '0'; Class <= '9'; Class++) - regset(opnd, Class); + if (!SIZE_ONLY) { + for (Class = '0'; Class <= '9'; Class++) + ANYOF_SET(opnd, Class); + } lastclass = 1234; continue; case 'D': - for (Class = 0; Class < '0'; Class++) - regset(opnd, Class); - for (Class = '9' + 1; Class < 256; Class++) - regset(opnd, Class); + if (!SIZE_ONLY) { + for (Class = 0; Class < '0'; Class++) + ANYOF_SET(opnd, Class); + for (Class = '9' + 1; Class < 256; Class++) + ANYOF_SET(opnd, Class); + } lastclass = 1234; continue; case 'n': @@ -1982,13 +1990,31 @@ regclass(void) continue; /* do it next time */ } } - for ( ; lastclass <= Class; lastclass++) - regset(opnd, lastclass); + if (!SIZE_ONLY) { + for ( ; lastclass <= Class; lastclass++) + ANYOF_SET(opnd, lastclass); + } lastclass = Class; } if (*regparse != ']') FAIL("unmatched [] in regexp"); nextchar(); + /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */ + if (!SIZE_ONLY && (*opnd & (0xFF ^ ANYOF_INVERT)) == ANYOF_FOLD) { + for (Class = 0; Class < 256; ++Class) { + if (ANYOF_TEST(opnd, Class)) { + I32 cf = fold[Class]; + ANYOF_SET(opnd, cf); + } + } + *opnd &= ~ANYOF_FOLD; + } + /* optimize inverted simple patterns (e.g. [^a-z]) */ + if (!SIZE_ONLY && (*opnd & 0xFF) == ANYOF_INVERT) { + for (Class = 0; Class < 32; ++Class) + opnd[1 + Class] ^= 0xFF; + *opnd = 0; + } return ret; } @@ -2205,11 +2231,11 @@ regcurly(register char *s) return TRUE; } -#ifdef DEBUGGING STATIC regnode * dumpuntil(regnode *start, regnode *node, regnode *last, SV* sv, I32 l) { +#ifdef DEBUGGING register char op = EXACT; /* Arbitrary non-END op. */ register regnode *next, *onode; @@ -2266,6 +2292,7 @@ dumpuntil(regnode *start, regnode *node, regnode *last, SV* sv, I32 l) else if (op == WHILEM) l--; } +#endif /* DEBUGGING */ return node; } @@ -2275,6 +2302,7 @@ dumpuntil(regnode *start, regnode *node, regnode *last, SV* sv, I32 l) void regdump(regexp *r) { +#ifdef DEBUGGING SV *sv = sv_newmortal(); (void)dumpuntil(r->program, r->program + 1, NULL, sv, 0); @@ -2327,6 +2355,7 @@ regdump(regexp *r) PerlIO_printf(Perl_debug_log, "implicit "); PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen); PerlIO_printf(Perl_debug_log, "\n"); +#endif /* DEBUGGING */ } /* @@ -2335,6 +2364,7 @@ regdump(regexp *r) void regprop(SV *sv, regnode *o) { +#ifdef DEBUGGING register char *p = 0; sv_setpv(sv, ":"); @@ -2532,8 +2562,8 @@ regprop(SV *sv, regnode *o) } if (p) sv_catpv(sv, p); +#endif /* DEBUGGING */ } -#endif /* DEBUGGING */ void pregfree(struct regexp *r) @@ -2544,10 +2574,13 @@ pregfree(struct regexp *r) Safefree(r->precomp); if (r->subbase) Safefree(r->subbase); - if (r->anchored_substr) - SvREFCNT_dec(r->anchored_substr); - if (r->float_substr) - SvREFCNT_dec(r->float_substr); + if (r->substrs) { + if (r->anchored_substr) + SvREFCNT_dec(r->anchored_substr); + if (r->float_substr) + SvREFCNT_dec(r->float_substr); + Safefree(r->substrs); + } if (r->data) { int n = r->data->count; while (--n >= 0) { @@ -2600,17 +2633,8 @@ regnext(register regnode *p) #endif } -#ifdef I_STDARG -void +STATIC void re_croak2(const char* pat1,const char* pat2,...) -#else -/*VARARGS0*/ -void -re_croak2(const char* pat1,const char* pat2, va_alist) - const char* pat1; - const char* pat2; - va_dcl -#endif { va_list args; STRLEN l1 = strlen(pat1); @@ -2626,11 +2650,7 @@ re_croak2(const char* pat1,const char* pat2, va_alist) Copy(pat2, buf + l1, l2 , char); buf[l1 + l2 + 1] = '\n'; buf[l1 + l2 + 2] = '\0'; -#ifdef I_STDARG va_start(args, pat2); -#else - va_start(args); -#endif message = mess(buf, &args); va_end(args); l1 = strlen(message);