X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=b3c31b753d7602a7bc43ce92e4a2b9f90f61c698;hb=6a28abbc8c08ff5da570415ad3f8a343b51e103d;hp=83b9015373b6ab9b61a79237ada7615cfde9d584;hpb=97aff369fa5580e7a888d4fa4c86be74ab000409;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index 83b9015..b3c31b7 100644 --- a/regcomp.c +++ b/regcomp.c @@ -282,27 +282,6 @@ static const scan_data_t zero_scan_data = } STMT_END /* - * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given - * args. Show regex, up to a maximum length. If it's too long, chop and add - * "...". - */ -#define FAIL2(pat,msg) STMT_START { \ - const char *ellipses = ""; \ - IV len = RExC_end - RExC_precomp; \ - \ - if (!SIZE_ONLY) \ - SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \ - if (len > RegexLengthToShowInErrorMessages) { \ - /* chop 10 shorter than the max, to ensure meaning of "..." */ \ - len = RegexLengthToShowInErrorMessages - 10; \ - ellipses = "..."; \ - } \ - S_re_croak2(aTHX_ pat, " in regex m/%.*s%s/", \ - msg, (int)len, RExC_precomp, ellipses); \ -} STMT_END - - -/* * Simple_vFAIL -- like FAIL, but marks the current location in the scan */ #define Simple_vFAIL(m) STMT_START { \ @@ -508,7 +487,7 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data) /* Can match anything (initialization) */ STATIC void -S_cl_anything(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) +S_cl_anything(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) { ANYOF_CLASS_ZERO(cl); ANYOF_BITMAP_SETALL(cl); @@ -519,7 +498,7 @@ S_cl_anything(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *c /* Can match anything (initialization) */ STATIC int -S_cl_is_anything(pTHX_ const struct regnode_charclass_class *cl) +S_cl_is_anything(const struct regnode_charclass_class *cl) { int value; @@ -535,7 +514,7 @@ S_cl_is_anything(pTHX_ const struct regnode_charclass_class *cl) /* Can match anything (initialization) */ STATIC void -S_cl_init(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) +S_cl_init(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) { Zero(cl, 1, struct regnode_charclass_class); cl->type = ANYOF; @@ -543,7 +522,7 @@ S_cl_init(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) } STATIC void -S_cl_init_zero(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) +S_cl_init_zero(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) { Zero(cl, 1, struct regnode_charclass_class); cl->type = ANYOF; @@ -555,7 +534,7 @@ S_cl_init_zero(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class * /* 'And' a given class with another one. Can create false positives */ /* We assume that cl is not inverted */ STATIC void -S_cl_and(pTHX_ struct regnode_charclass_class *cl, +S_cl_and(struct regnode_charclass_class *cl, const struct regnode_charclass_class *and_with) { if (!(and_with->flags & ANYOF_CLASS) @@ -592,7 +571,7 @@ S_cl_and(pTHX_ struct regnode_charclass_class *cl, /* 'OR' a given class with another one. Can create false positives */ /* We assume that cl is not inverted */ STATIC void -S_cl_or(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with) +S_cl_or(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with) { if (or_with->flags & ANYOF_INVERT) { /* We do not use @@ -765,7 +744,7 @@ and would end up looking like: DEBUG_TRIE_COMPILE_r({ \ SV *tmp; \ if ( UTF ) { \ - tmp = newSVpvn( "", 0 ); \ + tmp = newSVpvs( "" ); \ pv_uni_display( tmp, uc, len, 60, UNI_DISPLAY_REGEX ); \ } else { \ tmp = Perl_newSVpvf_nocontext( "%c", (int)uvc ); \ @@ -1036,7 +1015,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs }); } else { - /* Its a dupe. So ignore it. */ + /*EMPTY*/; /* It's a dupe. So ignore it. */ } } /* end second pass */ @@ -1242,7 +1221,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs }); } else { - /* Its a dupe. So ignore it. */ + /*EMPTY*/; /* Its a dupe. So ignore it. */ } } /* end second pass */ @@ -2051,7 +2030,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, if (flags & SCF_DO_SUBSTR) scan_commit(pRExC_state, data); if (UTF) { - U8 *s = (U8 *)STRING(scan); + const U8 * const s = (U8 *)STRING(scan); l = utf8_length(s, s + l); uc = utf8_to_uvchr(s, NULL); } @@ -2746,7 +2725,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, } STATIC I32 -S_add_data(pTHX_ RExC_state_t *pRExC_state, I32 n, const char *s) +S_add_data(RExC_state_t *pRExC_state, I32 n, const char *s) { if (RExC_rx->data) { Renewc(RExC_rx->data, @@ -2891,6 +2870,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) #endif r->reganch = pm->op_pmflags & PMf_COMPILETIME; r->nparens = RExC_npar - 1; /* set early to validate backrefs */ + r->lastparen = 0; /* mg.c reads this. */ r->substrs = 0; /* Useful during FAIL. */ r->startp = 0; /* Useful during FAIL. */ @@ -2966,7 +2946,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) again: if (PL_regkind[(U8)OP(first)] == EXACT) { if (OP(first) == EXACT) - ; /* Empty, get anchored substr later. */ + /*EMPTY*/; /* Empty, get anchored substr later. */ else if ((OP(first) == EXACTF || OP(first) == EXACTFL)) r->regstclass = first; } @@ -3222,21 +3202,20 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) register I32 parno = 0; I32 flags; const I32 oregflags = RExC_flags; - I32 have_branch = 0; - I32 open = 0; + bool have_branch = 0; + bool is_open = 0; /* for (?g), (?gc), and (?o) warnings; warning about (?c) will warn about (?g) -- japhy */ +#define WASTED_O 0x01 +#define WASTED_G 0x02 +#define WASTED_C 0x04 +#define WASTED_GC (0x02|0x04) I32 wastedflags = 0x00; - const I32 wasted_o = 0x01; - const I32 wasted_g = 0x02; - const I32 wasted_gc = 0x02 | 0x04; - const I32 wasted_c = 0x04; char * parse_start = RExC_parse; /* MJD */ char * const oregcomp_parse = RExC_parse; - char c; *flagp = 0; /* Tentatively. */ @@ -3246,7 +3225,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) if (*RExC_parse == '?') { /* (?...) */ U32 posflags = 0, negflags = 0; U32 *flagsp = &posflags; - int logical = 0; + bool is_logical = 0; const char * const seqstart = RExC_parse; RExC_parse++; @@ -3283,7 +3262,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) vWARNdep(RExC_parse, "(?p{}) is deprecated - use (??{})"); /* FALL THROUGH*/ case '?': /* (??...) */ - logical = 1; + is_logical = 1; if (*RExC_parse != '{') goto unknown; paren = *RExC_parse++; @@ -3293,32 +3272,28 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) I32 count = 1, n = 0; char c; char *s = RExC_parse; - SV *sv; - OP_4tree *sop, *rop; RExC_seen_zerolen++; RExC_seen |= REG_SEEN_EVAL; while (count && (c = *RExC_parse)) { - if (c == '\\' && RExC_parse[1]) - RExC_parse++; + if (c == '\\') { + if (RExC_parse[1]) + RExC_parse++; + } else if (c == '{') count++; else if (c == '}') count--; RExC_parse++; } - if (*RExC_parse != ')') - { + if (*RExC_parse != ')') { RExC_parse = s; vFAIL("Sequence (?{...}) not terminated or not {}-balanced"); } if (!SIZE_ONLY) { PAD *pad; - - if (RExC_parse - 1 - s) - sv = newSVpvn(s, RExC_parse - 1 - s); - else - sv = newSVpvs(""); + OP_4tree *sop, *rop; + SV * const sv = newSVpvn(s, RExC_parse - 1 - s); ENTER; Perl_save_re_context(aTHX); @@ -3347,7 +3322,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) } nextchar(pRExC_state); - if (logical) { + if (is_logical) { ret = reg_node(pRExC_state, LOGICAL); if (!SIZE_ONLY) ret->flags = 2; @@ -3377,6 +3352,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) } else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) { /* (?(1)...) */ + char c; parno = atoi(RExC_parse++); while (isDIGIT(*RExC_parse)) @@ -3434,7 +3410,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) if (*RExC_parse == 'o' || *RExC_parse == 'g') { if (SIZE_ONLY && ckWARN(WARN_REGEXP)) { - I32 wflagbit = *RExC_parse == 'o' ? wasted_o : wasted_g; + const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G; if (! (wastedflags & wflagbit) ) { wastedflags |= wflagbit; vWARN5( @@ -3450,8 +3426,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) } else if (*RExC_parse == 'c') { if (SIZE_ONLY && ckWARN(WARN_REGEXP)) { - if (! (wastedflags & wasted_c) ) { - wastedflags |= wasted_gc; + if (! (wastedflags & WASTED_C) ) { + wastedflags |= WASTED_GC; vWARN3( RExC_parse + 1, "Useless (%sc) - %suse /gc modifier", @@ -3494,7 +3470,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) ret = reganode(pRExC_state, OPEN, parno); Set_Node_Length(ret, 1); /* MJD */ Set_Node_Offset(ret, RExC_parse); /* MJD */ - open = 1; + is_open = 1; } } else /* ! paren */ @@ -3523,7 +3499,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) else if (paren == ':') { *flagp |= flags&SIMPLE; } - if (open) { /* Starts with OPEN. */ + if (is_open) { /* Starts with OPEN. */ regtail(pRExC_state, ret, br); /* OPEN -> first. */ } else if (paren != '?') /* Not Conditional */ @@ -3847,7 +3823,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp) if (!SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3 && ckWARN(WARN_REGEXP)) { vWARN3(RExC_parse, "%.*s matches null string many times", - RExC_parse - origparse, + (int)(RExC_parse >= origparse ? RExC_parse - origparse : 0), origparse); } @@ -4437,7 +4413,7 @@ tryagain: } STATIC char * -S_regwhite(pTHX_ char *p, const char *e) +S_regwhite(char *p, const char *e) { while (p < e) { if (isSPACE(*p)) @@ -4850,12 +4826,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) /* a bad range like a-\d, a-[:digit:] ? */ if (range) { if (!SIZE_ONLY) { - if (ckWARN(WARN_REGEXP)) + if (ckWARN(WARN_REGEXP)) { + int w = + RExC_parse >= rangebegin ? + RExC_parse - rangebegin : 0; vWARN4(RExC_parse, "False [] range \"%*.*s\"", - RExC_parse - rangebegin, - RExC_parse - rangebegin, + w, + w, rangebegin); + } if (prevvalue < 256) { ANYOF_BITMAP_SET(ret, prevvalue); ANYOF_BITMAP_SET(ret, '-'); @@ -5259,12 +5239,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) /* a bad range like \w-, [:word:]- ? */ if (namedclass > OOB_NAMEDCLASS) { - if (ckWARN(WARN_REGEXP)) + if (ckWARN(WARN_REGEXP)) { + int w = + RExC_parse >= rangebegin ? + RExC_parse - rangebegin : 0; vWARN4(RExC_parse, "False [] range \"%*.*s\"", - RExC_parse - rangebegin, - RExC_parse - rangebegin, + w, + w, rangebegin); + } if (!SIZE_ONLY) ANYOF_BITMAP_SET(ret, '-'); } else @@ -5431,7 +5415,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) STATIC char* S_nextchar(pTHX_ RExC_state_t *pRExC_state) { - dVAR; char* retval = RExC_parse++; for (;;) { @@ -5657,7 +5640,7 @@ S_regoptail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val) - regcurly - a little FSA that accepts {\d+,?\d*} */ STATIC I32 -S_regcurly(pTHX_ register const char *s) +S_regcurly(register const char *s) { if (*s++ != '{') return FALSE; @@ -5775,6 +5758,7 @@ Perl_regdump(pTHX_ regexp *r) }); } #else + PERL_UNUSED_CONTEXT; PERL_UNUSED_ARG(r); #endif /* DEBUGGING */ } @@ -5815,7 +5799,9 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o) PL_colors[0], len, s, PL_colors[1]); - } else if (k == TRIE) {/* + } else if (k == TRIE) { + /*EMPTY*/; + /* this isn't always safe, as Pl_regdata may not be for this regex yet (depending on where its called from) so its being moved to dumpuntil I32 n = ARG(o); @@ -5978,6 +5964,7 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o) else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH)) Perl_sv_catpvf(aTHX_ sv, "[-%d]", o->flags); #else + PERL_UNUSED_CONTEXT; PERL_UNUSED_ARG(sv); PERL_UNUSED_ARG(o); #endif /* DEBUGGING */ @@ -5988,6 +5975,8 @@ Perl_re_intuit_string(pTHX_ regexp *prog) { /* Assume that RE_INTUIT is set */ dVAR; GET_RE_DEBUG_FLAGS_DECL; + PERL_UNUSED_CONTEXT; + DEBUG_COMPILE_r( { const char * const s = SvPV_nolen_const(prog->check_substr @@ -6243,9 +6232,14 @@ Perl_save_re_context(pTHX) for (i = 1; i <= rx->nparens; i++) { char digits[TYPE_CHARS(long)]; const STRLEN len = my_sprintf(digits, "%lu", (long)i); - GV * const mgv = gv_fetchpvn_flags(digits, len, 0, SVt_PV); - if (mgv) - save_scalar(mgv); + GV *const *const gvp + = (GV**)hv_fetch(PL_defstash, digits, len, 0); + + if (gvp) { + GV * const gv = *gvp; + if (SvTYPE(gv) == SVt_PVGV && GvSV(gv)) + save_scalar(gv); + } } } }