X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=0f48976db8f0e94347c1758f9839af49b8c3d7cd;hb=8803afc236dca2c2990fc3236c7c43e710a099fb;hp=603a421bd034d15fbff6053ba99577411d11816b;hpb=c5254dd600e35509100e643867b417e514781043;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index 603a421..0f48976 100644 --- a/regcomp.c +++ b/regcomp.c @@ -60,9 +60,9 @@ #define REG_COMP_C #include "regcomp.h" -#ifdef USE_THREADS +#ifdef op #undef op -#endif /* USE_THREADS */ +#endif /* op */ static regnode regdummy; static char * regparse; /* Input-scan pointer. */ @@ -131,6 +131,7 @@ static void regset _((char *, I32)); static void regtail _((regnode *, regnode *)); static char* regwhite _((char *, char *)); static char* nextchar _((void)); +static void re_croak2 _((const char* pat1,const char* pat2,...)) __attribute__((noreturn)); static U32 regseen; static I32 seen_zerolen; @@ -139,7 +140,6 @@ static I32 extralen; #ifdef DEBUGGING static int colorset; -char *colors[4]; #endif /* Length of a variant. */ @@ -170,8 +170,13 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #define SF_FIX_BEFORE_EOL (SF_FIX_BEFORE_SEOL|SF_FIX_BEFORE_MEOL) #define SF_FL_BEFORE_EOL (SF_FL_BEFORE_SEOL|SF_FL_BEFORE_MEOL) -#define SF_FIX_SHIFT_EOL (+2) -#define SF_FL_SHIFT_EOL (+4) +#ifdef NO_UNARY_PLUS +# define SF_FIX_SHIFT_EOL (0+2) +# define SF_FL_SHIFT_EOL (0+4) +#else +# define SF_FIX_SHIFT_EOL (+2) +# define SF_FL_SHIFT_EOL (+4) +#endif #define SF_FIX_BEFORE_SEOL (SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL) #define SF_FIX_BEFORE_MEOL (SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL) @@ -182,6 +187,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #define SF_HAS_PAR 0x80 #define SF_IN_PAR 0x100 #define SF_HAS_EVAL 0x200 +#define SCF_DO_SUBSTR 0x400 static void scan_commit(scan_data_t *data) @@ -215,8 +221,6 @@ scan_commit(scan_data_t *data) data->flags &= ~SF_BEFORE_EOL; } -#define SCF_DO_SUBSTR 1 - /* Stops at toplevel WHILEM as well as at `last'. At end *scanp is set to the position after last scanned or to NULL. */ @@ -340,7 +344,7 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 scan = next; if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR)) pars++; - if (data_fake.flags & SF_HAS_EVAL) + if (data && (data_fake.flags & SF_HAS_EVAL)) data->flags |= SF_HAS_EVAL; if (code == SUSPEND) break; @@ -372,7 +376,7 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 data->last_start_max = is_inf ? I32_MAX : data->pos_min + data->pos_delta; } - sv_catpvn(data->last_found, OPERAND(scan)+1, l); + sv_catpvn(data->last_found, (char *)(OPERAND(scan)+1), l); data->last_end = data->pos_min + l; data->pos_min += l; /* As in the first entry. */ data->flags &= ~SF_BEFORE_EOL; @@ -586,7 +590,7 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 data->longest = &(data->longest_float); } } - if (fl & SF_HAS_EVAL) + if (data && (fl & SF_HAS_EVAL)) data->flags |= SF_HAS_EVAL; optimize_curly_tail: #ifdef REGALIGN @@ -635,7 +639,7 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 } if (data && data_fake.flags & (SF_HAS_PAR|SF_IN_PAR)) pars++; - if (data_fake.flags & SF_HAS_EVAL) + if (data && (data_fake.flags & SF_HAS_EVAL)) data->flags |= SF_HAS_EVAL; } else if (OP(scan) == OPEN) { pars++; @@ -750,7 +754,7 @@ pregcomp(char *exp, char *xend, PMOP *pm) DEBUG_r( if (!colorset) { int i = 0; - char *s = getenv("TERMCAP_COLORS"); + char *s = PerlEnv_getenv("TERMCAP_COLORS"); colorset = 1; if (s) { @@ -812,6 +816,11 @@ pregcomp(char *exp, char *xend, PMOP *pm) r->regstclass = NULL; r->naughty = regnaughty >= 10; /* Probably an expensive pattern. */ scan = r->program + 1; /* First BRANCH. */ + + /* XXXX To minimize changes to RE engine we always allocate + 3-units-long substrs field. */ + Newz(1004, r->substrs, 1, struct reg_substr_data); + if (OP(scan) != BRANCH) { /* Only one top-level choice. */ scan_data_t data; I32 fake; @@ -906,7 +915,7 @@ pregcomp(char *exp, char *xend, PMOP *pm) r->float_substr = data.longest_float; r->float_min_offset = data.offset_float_min; r->float_max_offset = data.offset_float_max; - fbm_compile(r->float_substr); + fbm_compile(r->float_substr, 0); BmUSEFUL(r->float_substr) = 100; if (data.flags & SF_FL_BEFORE_EOL /* Cannot have SEOL and MULTI */ && (!(data.flags & SF_FL_BEFORE_MEOL) @@ -926,7 +935,7 @@ pregcomp(char *exp, char *xend, PMOP *pm) || (regflags & PMf_MULTILINE)))) { r->anchored_substr = data.longest_fixed; r->anchored_offset = data.offset_fixed; - fbm_compile(r->anchored_substr); + fbm_compile(r->anchored_substr, 0); BmUSEFUL(r->anchored_substr) = 100; if (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */ && (!(data.flags & SF_FIX_BEFORE_MEOL) @@ -1065,11 +1074,12 @@ reg(I32 paren, I32 *flagp) rx->data->data[n+1] = (void*)av; rx->data->data[n+2] = (void*)sop; SvREFCNT_dec(sv); + } else { /* First pass */ + if (tainted) + FAIL("Eval-group in insecure regular expression"); } nextchar(); - if (tainted) - FAIL("Eval-group in insecure regular expression"); return reganode(EVAL, n); } case '(': @@ -1673,7 +1683,7 @@ tryagain: ret = reg_node((regflags & PMf_FOLD) ? ((regflags & PMf_LOCALE) ? EXACTFL : EXACTF) : EXACT); - s = OPERAND(ret); + s = (char *) OPERAND(ret); regc(0, s++); /* save spot for len */ for (len = 0, p = regparse - 1; len < 127 && p < regxend; @@ -1841,7 +1851,7 @@ regclass(void) register I32 def; I32 numlen; - s = opnd = OPERAND(regcode); + s = opnd = (char *) OPERAND(regcode); ret = reg_node(ANYOF); for (Class = 0; Class < 33; Class++) regc(0, s++); @@ -1865,6 +1875,30 @@ regclass(void) while (regparse < regxend && *regparse != ']') { skipcond: Class = UCHARAT(regparse++); + if (Class == '[' && regparse + 1 < regxend && + /* I smell either [: or [= or [. -- POSIX has been here, right? */ + (*regparse == ':' || *regparse == '=' || *regparse == '.')) { + char posixccc = *regparse; + char* posixccs = regparse++; + + while (regparse < regxend && *regparse != posixccc) + regparse++; + if (regparse == regxend) + /* Grandfather lone [:, [=, [. */ + regparse = posixccs; + else { + regparse++; /* skip over the posixccc */ + if (*regparse == ']') { + /* Not Implemented Yet. + * (POSIX Extended Character Classes, that is) + * The text between e.g. [: and :] would start + * at posixccs + 1 and stop at regparse - 2. */ + if (dowarn && !SIZE_ONLY) + warn("Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc); + regparse++; /* skip over the ending ] */ + } + } + } if (Class == '\\') { Class = UCHARAT(regparse++); switch (Class) { @@ -2021,12 +2055,7 @@ nextchar(void) - reg_node - emit a node */ static regnode * /* Location. */ -#ifdef CAN_PROTOTYPE reg_node(U8 op) -#else -reg_node(op) -U8 op; -#endif { register regnode *ret; register regnode *ptr; @@ -2054,13 +2083,7 @@ U8 op; - reganode - emit a node with an argument */ static regnode * /* Location. */ -#ifdef CAN_PROTOTYPE reganode(U8 op, U32 arg) -#else -reganode(op, arg) -U8 op; -U32 arg; -#endif { register regnode *ret; register regnode *ptr; @@ -2087,15 +2110,8 @@ U32 arg; /* - regc - emit (if appropriate) a byte of code */ -#ifdef CAN_PROTOTYPE static void regc(U8 b, char* s) -#else -static void -regc(b, s) -U8 b; -char *s; -#endif { if (!SIZE_ONLY) *s = b; @@ -2106,15 +2122,8 @@ char *s; * * Means relocating the operand. */ -#ifdef CAN_PROTOTYPE static void reginsert(U8 op, regnode *opnd) -#else -static void -reginsert(op, opnd) -U8 op; -regnode *opnd; -#endif { register regnode *src; register regnode *dst; @@ -2565,10 +2574,13 @@ pregfree(struct regexp *r) Safefree(r->precomp); if (r->subbase) Safefree(r->subbase); - if (r->anchored_substr) - SvREFCNT_dec(r->anchored_substr); - if (r->float_substr) - SvREFCNT_dec(r->float_substr); + if (r->substrs) { + if (r->anchored_substr) + SvREFCNT_dec(r->anchored_substr); + if (r->float_substr) + SvREFCNT_dec(r->float_substr); + Safefree(r->substrs); + } if (r->data) { int n = r->data->count; while (--n >= 0) { @@ -2622,11 +2634,11 @@ regnext(register regnode *p) } #ifdef I_STDARG -void +static void re_croak2(const char* pat1,const char* pat2,...) #else /*VARARGS0*/ -void +static void re_croak2(const char* pat1,const char* pat2, va_alist) const char* pat1; const char* pat2; @@ -2661,4 +2673,3 @@ re_croak2(const char* pat1,const char* pat2, va_alist) buf[l1] = '\0'; /* Overwrite \n */ croak("%s", buf); } -