X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=32414445d97ca6b6952589dcd90c614ba2dfbae7;hb=3f939f220e50adb6f28f2dd14f06461c7cebfe14;hp=34d5b37ec321907ecb270843b73767590090fbb7;hpb=12d33761ed16a63d0b1b4afd6443ea446d40b38e;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index 34d5b37..3241444 100644 --- a/regcomp.c +++ b/regcomp.c @@ -250,10 +250,10 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, * of t/op/regmesg.t, the tests in t/op/re_tests, and those in * op/pragma/warn/regcomp. */ -#define MARKER1 "HERE" /* marker as it appears in the description */ -#define MARKER2 " << HERE " /* marker as it appears within the regex */ +#define MARKER1 "<-- HERE" /* marker as it appears in the description */ +#define MARKER2 " <-- HERE " /* marker as it appears within the regex */ -#define REPORT_LOCATION " before " MARKER1 " mark in regex m/%.*s" MARKER2 "%s/" +#define REPORT_LOCATION " in regex; marked by " MARKER1 " in m/%.*s" MARKER2 "%s/" /* * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given @@ -393,6 +393,14 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, m, (int)offset, RExC_precomp, RExC_precomp + offset); \ } STMT_END \ +#define vWARNdep(loc,m) \ + STMT_START { \ + unsigned offset = strlen(RExC_precomp)-(RExC_end-(loc)); \ + int warn_cat = ckWARN(WARN_REGEXP) ? WARN_REGEXP : WARN_DEPRECATED; \ + Perl_warner(aTHX_ warn_cat, "%s" REPORT_LOCATION,\ + m, (int)offset, RExC_precomp, RExC_precomp + offset); \ + } STMT_END \ + #define vWARN2(loc, m, a1) \ STMT_START { \ @@ -418,9 +426,18 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (int)offset, RExC_precomp, RExC_precomp + offset); \ } STMT_END +/* used for the parse_flags section for (?c) -- japhy */ +#define vWARN5(loc, m, a1, a2, a3, a4) \ + STMT_START { \ + unsigned offset = strlen(RExC_precomp)-(RExC_end-(loc)); \ + Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION, \ + a1, a2, a3, a4, \ + (int)offset, RExC_precomp, RExC_precomp + offset); \ + } STMT_END + /* Allow for side effects in s */ -#define REGC(c,s) STMT_START { if (!SIZE_ONLY) *(s) = (c); else (s);} STMT_END +#define REGC(c,s) STMT_START { if (!SIZE_ONLY) *(s) = (c); else (void)(s);} STMT_END /* Macros for recording node offsets. 20001227 mjd@plover.com * Nodes are numbered 1, 2, 3, 4. Node #n's position is recorded in @@ -877,10 +894,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg int compat = 1; if (uc >= 0x100 || - !(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE)) + (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE)) && !ANYOF_BITMAP_TEST(data->start_class, uc) && (!(data->start_class->flags & ANYOF_FOLD) || !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc]))) + ) compat = 0; ANYOF_CLASS_ZERO(data->start_class); ANYOF_BITMAP_ZERO(data->start_class); @@ -921,9 +939,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg int compat = 1; if (uc >= 0x100 || - !(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE)) + (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE)) && !ANYOF_BITMAP_TEST(data->start_class, uc) - && !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])) + && !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc]))) compat = 0; ANYOF_CLASS_ZERO(data->start_class); ANYOF_BITMAP_ZERO(data->start_class); @@ -948,7 +966,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg flags &= ~SCF_DO_STCLASS; } else if (strchr((char*)PL_varies,OP(scan))) { - I32 mincount, maxcount, minnext, deltanext, fl; + I32 mincount, maxcount, minnext, deltanext, fl = 0; I32 f = flags, pos_before = 0; regnode *oscan = scan; struct regnode_charclass_class this_class; @@ -1059,7 +1077,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg if (!scan) /* It was not CURLYX, but CURLY. */ scan = next; if (ckWARN(WARN_REGEXP) && (minnext + deltanext == 0) - && !(data->flags & (SF_HAS_PAR|SF_IN_PAR)) + && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR)) && maxcount <= REG_INFTY/3) /* Complement check for big count */ { vWARN(RExC_parse, @@ -1259,7 +1277,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg } } else if (strchr((char*)PL_simple,OP(scan))) { - int value; + int value = 0; if (flags & SCF_DO_SUBSTR) { scan_commit(pRExC_state,data); @@ -1715,9 +1733,9 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) r->offsets[0] = RExC_size; } DEBUG_r(PerlIO_printf(Perl_debug_log, - "%s %u bytes for offset annotations.\n", + "%s %"UVuf" bytes for offset annotations.\n", r->offsets ? "Got" : "Couldn't get", - (2*RExC_size+1) * sizeof(U32))); + (UV)((2*RExC_size+1) * sizeof(U32)))); RExC_rx = r; @@ -1913,7 +1931,6 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) if ((!r->anchored_substr || r->anchored_offset) && stclass_flag && !(data.start_class->flags & ANYOF_EOS) && !cl_is_anything(data.start_class)) { - SV *sv; I32 n = add_data(pRExC_state, 1, "f"); New(1006, RExC_rx->data->data[n], 1, @@ -1924,10 +1941,11 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) r->regstclass = (regnode*)RExC_rx->data->data[n]; r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */ PL_regdata = r->data; /* for regprop() */ - DEBUG_r((sv = sv_newmortal(), - regprop(sv, (regnode*)data.start_class), - PerlIO_printf(Perl_debug_log, "synthetic stclass `%s'.\n", - SvPVX(sv)))); + DEBUG_r({ SV *sv = sv_newmortal(); + regprop(sv, (regnode*)data.start_class); + PerlIO_printf(Perl_debug_log, + "synthetic stclass `%s'.\n", + SvPVX(sv));}); } /* A temporary algorithm prefers floated substr to fixed one to dig more info. */ @@ -1965,7 +1983,6 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) r->check_substr = r->anchored_substr = r->float_substr = Nullsv; if (!(data.start_class->flags & ANYOF_EOS) && !cl_is_anything(data.start_class)) { - SV *sv; I32 n = add_data(pRExC_state, 1, "f"); New(1006, RExC_rx->data->data[n], 1, @@ -1975,10 +1992,11 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) struct regnode_charclass_class); r->regstclass = (regnode*)RExC_rx->data->data[n]; r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */ - DEBUG_r((sv = sv_newmortal(), - regprop(sv, (regnode*)data.start_class), - PerlIO_printf(Perl_debug_log, "synthetic stclass `%s'.\n", - SvPVX(sv)))); + DEBUG_r({ SV* sv = sv_newmortal(); + regprop(sv, (regnode*)data.start_class); + PerlIO_printf(Perl_debug_log, + "synthetic stclass `%s'.\n", + SvPVX(sv));}); } } @@ -2017,12 +2035,23 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) register regnode *ender = 0; register I32 parno = 0; I32 flags, oregflags = RExC_flags16, have_branch = 0, open = 0; + + /* for (?g), (?gc), and (?o) warnings; warning + about (?c) will warn about (?g) -- japhy */ + + I32 wastedflags = 0x00, + wasted_o = 0x01, + wasted_g = 0x02, + wasted_gc = 0x02 | 0x04, + wasted_c = 0x04; + char * parse_start = RExC_parse; /* MJD */ char *oregcomp_parse = RExC_parse; char c; *flagp = 0; /* Tentatively. */ + /* Make an OPEN node, if parenthesized. */ if (paren) { if (*RExC_parse == '?') { /* (?...) */ @@ -2061,8 +2090,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) *flagp = TRYAGAIN; return NULL; case 'p': /* (?p...) */ - if (SIZE_ONLY) - vWARN(RExC_parse, "(?p{}) is deprecated - use (??{})"); + if (SIZE_ONLY && ckWARN2(WARN_DEPRECATED, WARN_REGEXP)) + vWARNdep(RExC_parse, "(?p{}) is deprecated - use (??{})"); /* FALL THROUGH*/ case '?': /* (??...) */ logical = 1; @@ -2117,7 +2146,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) /* No compiled RE interpolated, has runtime components ===> unsafe. */ FAIL("Eval-group not allowed at runtime, use re 'eval'"); - if (PL_tainted) + if (PL_tainting && PL_tainted) FAIL("Eval-group in insecure regular expression"); } @@ -2201,12 +2230,45 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) --RExC_parse; parse_flags: /* (?i) */ while (*RExC_parse && strchr("iogcmsx", *RExC_parse)) { - if (*RExC_parse != 'o') - pmflag(flagsp, *RExC_parse); + /* (?g), (?gc) and (?o) are useless here + and must be globally applied -- japhy */ + + if (*RExC_parse == 'o' || *RExC_parse == 'g') { + if (SIZE_ONLY && ckWARN(WARN_REGEXP)) { + I32 wflagbit = *RExC_parse == 'o' ? wasted_o : wasted_g; + if (! (wastedflags & wflagbit) ) { + wastedflags |= wflagbit; + vWARN5( + RExC_parse + 1, + "Useless (%s%c) - %suse /%c modifier", + flagsp == &negflags ? "?-" : "?", + *RExC_parse, + flagsp == &negflags ? "don't " : "", + *RExC_parse + ); + } + } + } + else if (*RExC_parse == 'c') { + if (SIZE_ONLY && ckWARN(WARN_REGEXP)) { + if (! (wastedflags & wasted_c) ) { + wastedflags |= wasted_gc; + vWARN3( + RExC_parse + 1, + "Useless (%sc) - %suse /gc modifier", + flagsp == &negflags ? "?-" : "?", + flagsp == &negflags ? "don't " : "" + ); + } + } + } + else { pmflag(flagsp, *RExC_parse); } + ++RExC_parse; } if (*RExC_parse == '-') { flagsp = &negflags; + wastedflags = 0; /* reset so (?g-c) warns twice */ ++RExC_parse; goto parse_flags; } @@ -2615,7 +2677,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp) { register regnode *ret = 0; I32 flags; - char *parse_start = RExC_parse; *flagp = WORST; /* Tentatively. */ @@ -3265,11 +3326,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) register regnode *ret; STRLEN numlen; IV namedclass; - char *rangebegin; + char *rangebegin = 0; bool need_class = 0; - SV *listsv; + SV *listsv = Nullsv; register char *e; - char *parse_start = RExC_parse; /* MJD */ UV n; bool optimize_invert = TRUE; @@ -3515,14 +3575,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) ANYOF_BITMAP_SET(ret, value); #else /* EBCDIC */ for (value = 0; value < 256; value++) { - if (PL_hints & HINT_RE_ASCIIR) { - if (NATIVE_TO_ASCII(value) < 128) - ANYOF_BITMAP_SET(ret, value); - } - else { - if (isASCII(value)) - ANYOF_BITMAP_SET(ret, value); - } + if (isASCII(value)) + ANYOF_BITMAP_SET(ret, value); } #endif /* EBCDIC */ } @@ -3537,14 +3591,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) ANYOF_BITMAP_SET(ret, value); #else /* EBCDIC */ for (value = 0; value < 256; value++) { - if (PL_hints & HINT_RE_ASCIIR) { - if (NATIVE_TO_ASCII(value) >= 128) - ANYOF_BITMAP_SET(ret, value); - } - else { - if (!isASCII(value)) - ANYOF_BITMAP_SET(ret, value); - } + if (!isASCII(value)) + ANYOF_BITMAP_SET(ret, value); } #endif /* EBCDIC */ } @@ -3783,9 +3831,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) } /* end of namedclass \blah */ if (range) { - if (((prevvalue > value) && !(PL_hints & HINT_RE_ASCIIR)) || - ((NATIVE_TO_UNI(prevvalue) > NATIVE_TO_UNI(value)) && - (PL_hints & HINT_RE_ASCIIR))) /* b-a */ { + if (prevvalue > value) /* b-a */ { Simple_vFAIL4("Invalid [] range \"%*.*s\"", RExC_parse - rangebegin, RExC_parse - rangebegin, @@ -3823,17 +3869,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) IV ceilvalue = value < 256 ? value : 255; #ifdef EBCDIC - /* New style scheme for ranges: - * use re 'asciir'; - * do ranges in ASCII/Unicode space - */ - for (i = NATIVE_TO_ASCII(prevvalue); - i <= NATIVE_TO_ASCII(ceilvalue); - i++) - ANYOF_BITMAP_SET(ret, ASCII_TO_NATIVE(i)); - } - else if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) || - (isUPPER(prevvalue) && isUPPER(ceilvalue))) + if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) || + (isUPPER(prevvalue) && isUPPER(ceilvalue))) { if (isLOWER(prevvalue)) { for (i = prevvalue; i <= ceilvalue; i++) @@ -3874,9 +3911,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */ if (!SIZE_ONLY && - (ANYOF_FLAGS(ret) & /* If the only flag is folding (plus possibly inversion). */ - (ANYOF_FLAGS_ALL ^ ANYOF_INVERT) == ANYOF_FOLD)) { + ((ANYOF_FLAGS(ret) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD) + ) { for (value = 0; value < 256; ++value) { if (ANYOF_BITMAP_TEST(ret, value)) { IV fold = PL_fold[value]; @@ -4146,10 +4183,11 @@ S_regcurly(pTHX_ register char *s) } +#ifdef DEBUGGING + STATIC regnode * S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l) { -#ifdef DEBUGGING register U8 op = EXACT; /* Arbitrary non-END op. */ register regnode *next; @@ -4215,10 +4253,11 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l) else if (op == WHILEM) l--; } -#endif /* DEBUGGING */ return node; } +#endif /* DEBUGGING */ + /* - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form */ @@ -4289,16 +4328,18 @@ Perl_regdump(pTHX_ regexp *r) if (r->offsets) { U32 i; U32 len = r->offsets[0]; - PerlIO_printf(Perl_debug_log, "Offsets: [%u]\n\t", r->offsets[0]); + PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)r->offsets[0]); for (i = 1; i <= len; i++) - PerlIO_printf(Perl_debug_log, "%u[%u] ", - r->offsets[i*2-1], - r->offsets[i*2]); + PerlIO_printf(Perl_debug_log, "%"UVuf"[%"UVuf"] ", + (UV)r->offsets[i*2-1], + (UV)r->offsets[i*2]); PerlIO_printf(Perl_debug_log, "\n"); } #endif /* DEBUGGING */ } +#ifdef DEBUGGING + STATIC void S_put_byte(pTHX_ SV *sv, int c) { @@ -4310,6 +4351,8 @@ S_put_byte(pTHX_ SV *sv, int c) Perl_sv_catpvf(aTHX_ sv, "%c", c); } +#endif /* DEBUGGING */ + /* - regprop - printable representation of opcode */ @@ -4680,6 +4723,7 @@ Perl_save_re_context(pTHX) SAVEVPTR(PL_reg_oldcurpm); /* from regexec.c */ SAVEVPTR(PL_reg_curpm); /* from regexec.c */ SAVEI32(PL_regnpar); /* () count. */ + SAVEI32(PL_regsize); /* from regexec.c */ #ifdef DEBUGGING SAVEPPTR(PL_reg_starttry); /* from regexec.c */ #endif