From: Andy Lester Date: Mon, 10 Apr 2006 02:15:58 +0000 (-0500) Subject: True consting goodness in regexes X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=097eb12cc39d98a6d964631d26d4a7e23e180985;p=p5sagit%2Fp5-mst-13.2.git True consting goodness in regexes Message-Id: <20060410071558.GA24756@petdance.com> p4raw-id: //depot/perl@27753 --- diff --git a/embed.fnc b/embed.fnc index fe1246d..c7e6495 100644 --- a/embed.fnc +++ b/embed.fnc @@ -656,7 +656,7 @@ p |OP* |prepend_elem |I32 optype|NULLOK OP* head|NULLOK OP* tail Ap |void |push_scope Amb |OP* |ref |NULLOK OP* o|I32 type p |OP* |refkids |NULLOK OP* o|I32 type -Ap |void |regdump |NN regexp* r +Ap |void |regdump |NN const regexp* r Ap |SV* |regclass_swash |NN const struct regnode *n|bool doinit|NULLOK SV **listsvp|NULLOK SV **altsvp Ap |I32 |pregexec |NN regexp* prog|NN char* stringarg \ |NN char* strend|NN char* strbeg|I32 minend \ @@ -1288,23 +1288,23 @@ ERsn |I32 |regcurly |NN const char * Es |regnode*|reg_node |NN struct RExC_state_t *state|U8 op Es |regnode*|regpiece |NN struct RExC_state_t *state|NN I32 *flagp Es |void |reginsert |NN struct RExC_state_t *state|U8 op|NN regnode *opnd -Es |void |regoptail |NN struct RExC_state_t *state|NN regnode *p|NN regnode *val -Es |void |regtail |NN struct RExC_state_t *state|NN regnode *p|NN regnode *val +Es |void |regoptail |NN const struct RExC_state_t *state|NN regnode *p|NN const regnode *val +Es |void |regtail |NN const struct RExC_state_t *state|NN regnode *p|NN const regnode *val EsRn |char* |regwhite |NN char *p|NN const char *e Es |char* |nextchar |NN struct RExC_state_t *state # ifdef DEBUGGING -Es |regnode*|dumpuntil |NN regexp *r|NN regnode *start|NN regnode *node \ +Es |regnode*|dumpuntil |NN const regexp *r|NN regnode *start|NN regnode *node \ |NULLOK regnode *last|NN SV* sv|I32 l Es |void |put_byte |NN SV* sv|int c # endif -Es |void |scan_commit |NN struct RExC_state_t* state|NN struct scan_data_t *data -Esn |void |cl_anything |NN struct RExC_state_t* state|NN struct regnode_charclass_class *cl +Es |void |scan_commit |NN const struct RExC_state_t* state|NN struct scan_data_t *data +Esn |void |cl_anything |NN const struct RExC_state_t* state|NN struct regnode_charclass_class *cl EsRn |int |cl_is_anything |NN const struct regnode_charclass_class *cl -Esn |void |cl_init |NN struct RExC_state_t* state|NN struct regnode_charclass_class *cl -Esn |void |cl_init_zero |NN struct RExC_state_t* state|NN struct regnode_charclass_class *cl +Esn |void |cl_init |NN const struct RExC_state_t* state|NN struct regnode_charclass_class *cl +Esn |void |cl_init_zero |NN const struct RExC_state_t* state|NN struct regnode_charclass_class *cl Esn |void |cl_and |NN struct regnode_charclass_class *cl \ |NN const struct regnode_charclass_class *and_with -Esn |void |cl_or |NN struct RExC_state_t* state|NN struct regnode_charclass_class *cl \ +Esn |void |cl_or |NN const struct RExC_state_t* state|NN struct regnode_charclass_class *cl \ |NN const struct regnode_charclass_class *or_with Es |I32 |study_chunk |NN struct RExC_state_t* state|NN regnode **scanp \ |NN I32 *deltap|NN regnode *last|NULLOK struct scan_data_t *data \ @@ -1326,10 +1326,10 @@ ERs |I32 |regtry |NN regexp *prog|NN char *startpos ERs |bool |reginclass |NN const regnode *n|NN const U8 *p|NULLOK STRLEN *lenp\ |bool do_utf8sv_is_utf8 Es |CHECKPOINT|regcppush |I32 parenfloor -Es |char* |regcppop |NN regexp *rex +Es |char* |regcppop |NN const regexp *rex ERsn |U8* |reghop3 |NN U8 *pos|I32 off|NN U8 *lim ERsn |U8* |reghopmaybe3 |NN U8 *pos|I32 off|NN U8 *lim -ERs |char* |find_byclass |NN regexp * prog|NN regnode *c|NN char *s|NN const char *strend|I32 norun +ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c|NN char *s|NN const char *strend|I32 norun Es |void |to_utf8_substr |NN regexp * prog Es |void |to_byte_substr |NN regexp * prog #endif diff --git a/proto.h b/proto.h index 96c664c..736d35c 100644 --- a/proto.h +++ b/proto.h @@ -1808,7 +1808,7 @@ PERL_CALLCONV OP* Perl_prepend_elem(pTHX_ I32 optype, OP* head, OP* tail); PERL_CALLCONV void Perl_push_scope(pTHX); /* PERL_CALLCONV OP* ref(pTHX_ OP* o, I32 type); */ PERL_CALLCONV OP* Perl_refkids(pTHX_ OP* o, I32 type); -PERL_CALLCONV void Perl_regdump(pTHX_ regexp* r) +PERL_CALLCONV void Perl_regdump(pTHX_ const regexp* r) __attribute__nonnull__(pTHX_1); PERL_CALLCONV SV* Perl_regclass_swash(pTHX_ const struct regnode *n, bool doinit, SV **listsvp, SV **altsvp) @@ -3529,12 +3529,12 @@ STATIC void S_reginsert(pTHX_ struct RExC_state_t *state, U8 op, regnode *opnd) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_3); -STATIC void S_regoptail(pTHX_ struct RExC_state_t *state, regnode *p, regnode *val) +STATIC void S_regoptail(pTHX_ const struct RExC_state_t *state, regnode *p, const regnode *val) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); -STATIC void S_regtail(pTHX_ struct RExC_state_t *state, regnode *p, regnode *val) +STATIC void S_regtail(pTHX_ const struct RExC_state_t *state, regnode *p, const regnode *val) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); @@ -3548,7 +3548,7 @@ STATIC char* S_nextchar(pTHX_ struct RExC_state_t *state) __attribute__nonnull__(pTHX_1); # ifdef DEBUGGING -STATIC regnode* S_dumpuntil(pTHX_ regexp *r, regnode *start, regnode *node, regnode *last, SV* sv, I32 l) +STATIC regnode* S_dumpuntil(pTHX_ const regexp *r, regnode *start, regnode *node, regnode *last, SV* sv, I32 l) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3) @@ -3558,11 +3558,11 @@ STATIC void S_put_byte(pTHX_ SV* sv, int c) __attribute__nonnull__(pTHX_1); # endif -STATIC void S_scan_commit(pTHX_ struct RExC_state_t* state, struct scan_data_t *data) +STATIC void S_scan_commit(pTHX_ const struct RExC_state_t* state, struct scan_data_t *data) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2); -STATIC void S_cl_anything(struct RExC_state_t* state, struct regnode_charclass_class *cl) +STATIC void S_cl_anything(const struct RExC_state_t* state, struct regnode_charclass_class *cl) __attribute__nonnull__(1) __attribute__nonnull__(2); @@ -3570,11 +3570,11 @@ STATIC int S_cl_is_anything(const struct regnode_charclass_class *cl) __attribute__warn_unused_result__ __attribute__nonnull__(1); -STATIC void S_cl_init(struct RExC_state_t* state, struct regnode_charclass_class *cl) +STATIC void S_cl_init(const struct RExC_state_t* state, struct regnode_charclass_class *cl) __attribute__nonnull__(1) __attribute__nonnull__(2); -STATIC void S_cl_init_zero(struct RExC_state_t* state, struct regnode_charclass_class *cl) +STATIC void S_cl_init_zero(const struct RExC_state_t* state, struct regnode_charclass_class *cl) __attribute__nonnull__(1) __attribute__nonnull__(2); @@ -3582,7 +3582,7 @@ STATIC void S_cl_and(struct regnode_charclass_class *cl, const struct regnode_ch __attribute__nonnull__(1) __attribute__nonnull__(2); -STATIC void S_cl_or(struct RExC_state_t* state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with) +STATIC void S_cl_or(const struct RExC_state_t* state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with) __attribute__nonnull__(1) __attribute__nonnull__(2) __attribute__nonnull__(3); @@ -3640,7 +3640,7 @@ STATIC bool S_reginclass(pTHX_ const regnode *n, const U8 *p, STRLEN *lenp, bool __attribute__nonnull__(pTHX_2); STATIC CHECKPOINT S_regcppush(pTHX_ I32 parenfloor); -STATIC char* S_regcppop(pTHX_ regexp *rex) +STATIC char* S_regcppop(pTHX_ const regexp *rex) __attribute__nonnull__(pTHX_1); STATIC U8* S_reghop3(U8 *pos, I32 off, U8 *lim) @@ -3653,7 +3653,7 @@ STATIC U8* S_reghopmaybe3(U8 *pos, I32 off, U8 *lim) __attribute__nonnull__(1) __attribute__nonnull__(3); -STATIC char* S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32 norun) +STATIC char* S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, I32 norun) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) diff --git a/regcomp.c b/regcomp.c index 9ec5784..fe32ce2 100644 --- a/regcomp.c +++ b/regcomp.c @@ -444,7 +444,7 @@ static void clear_re(pTHX_ void *r); floating substrings if needed. */ STATIC void -S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data) +S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data) { const STRLEN l = CHR_SVLEN(data->last_found); const STRLEN old_l = CHR_SVLEN(*data->longest); @@ -476,10 +476,11 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data) SvCUR_set(data->last_found, 0); { SV * const sv = data->last_found; - MAGIC * const mg = - SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL; - if (mg) - mg->mg_len = 0; + if (SvUTF8(sv) && SvMAGICAL(sv)) { + MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8); + if (mg) + mg->mg_len = 0; + } } data->last_end = -1; data->flags &= ~SF_BEFORE_EOL; @@ -487,7 +488,7 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data) /* Can match anything (initialization) */ STATIC void -S_cl_anything(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) +S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) { ANYOF_CLASS_ZERO(cl); ANYOF_BITMAP_SETALL(cl); @@ -514,7 +515,7 @@ S_cl_is_anything(const struct regnode_charclass_class *cl) /* Can match anything (initialization) */ STATIC void -S_cl_init(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) +S_cl_init(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) { Zero(cl, 1, struct regnode_charclass_class); cl->type = ANYOF; @@ -522,7 +523,7 @@ S_cl_init(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) } STATIC void -S_cl_init_zero(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) +S_cl_init_zero(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl) { Zero(cl, 1, struct regnode_charclass_class); cl->type = ANYOF; @@ -571,7 +572,7 @@ S_cl_and(struct regnode_charclass_class *cl, /* 'OR' a given class with another one. Can create false positives */ /* We assume that cl is not inverted */ STATIC void -S_cl_or(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with) +S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with) { if (or_with->flags & ANYOF_INVERT) { /* We do not use @@ -4453,7 +4454,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value) /* I smell either [: or [= or [. -- POSIX has been here, right? */ POSIXCC(UCHARAT(RExC_parse))) { const char c = UCHARAT(RExC_parse); - char* s = RExC_parse++; + char* const s = RExC_parse++; while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c) RExC_parse++; @@ -4757,12 +4758,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) n--; } } - if (value == 'p') - Perl_sv_catpvf(aTHX_ listsv, - "+utf8::%.*s\n", (int)n, RExC_parse); - else - Perl_sv_catpvf(aTHX_ listsv, - "!utf8::%.*s\n", (int)n, RExC_parse); + Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%.*s\n", + (value=='p' ? '+' : '!'), (int)n, RExC_parse); } RExC_parse = e + 1; ANYOF_FLAGS(ret) |= ANYOF_UNICODE; @@ -4831,14 +4828,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (range) { if (!SIZE_ONLY) { if (ckWARN(WARN_REGEXP)) { - int w = + const int w = RExC_parse >= rangebegin ? RExC_parse - rangebegin : 0; vWARN4(RExC_parse, "False [] range \"%*.*s\"", - w, - w, - rangebegin); + w, w, rangebegin); } if (prevvalue < 256) { ANYOF_BITMAP_SET(ret, prevvalue); @@ -5247,9 +5242,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) RExC_parse - rangebegin : 0; vWARN4(RExC_parse, "False [] range \"%*.*s\"", - w, - w, - rangebegin); + w, w, rangebegin); } if (!SIZE_ONLY) ANYOF_BITMAP_SET(ret, '-'); @@ -5393,7 +5386,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) } if (!SIZE_ONLY) { - AV *av = newAV(); + AV * const av = newAV(); SV *rv; /* The 0th element stores the character class description @@ -5417,7 +5410,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) STATIC char* S_nextchar(pTHX_ RExC_state_t *pRExC_state) { - char* retval = RExC_parse++; + char* const retval = RExC_parse++; for (;;) { if (*RExC_parse == '(' && RExC_parse[1] == '?' && @@ -5592,8 +5585,9 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd) /* - regtail - set the next-pointer at the end of a node chain of p to val. */ +/* TODO: All three parms should be const */ STATIC void -S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val) +S_regtail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode *val) { dVAR; register regnode *scan; @@ -5621,8 +5615,9 @@ S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val) /* - regoptail - regtail on operand of first argument; nop if operandless */ +/* TODO: All three parms should be const */ STATIC void -S_regoptail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val) +S_regoptail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode *val) { dVAR; /* "Operandless" and "op != BRANCH" are synonymous in practice. */ @@ -5664,7 +5659,7 @@ S_regcurly(register const char *s) - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form */ void -Perl_regdump(pTHX_ regexp *r) +Perl_regdump(pTHX_ const regexp *r) { #ifdef DEBUGGING dVAR; @@ -6234,7 +6229,7 @@ S_put_byte(pTHX_ SV *sv, int c) STATIC regnode * -S_dumpuntil(pTHX_ regexp *r, regnode *start, regnode *node, regnode *last, +S_dumpuntil(pTHX_ const regexp *r, regnode *start, regnode *node, regnode *last, SV* sv, I32 l) { dVAR; @@ -6288,7 +6283,7 @@ S_dumpuntil(pTHX_ regexp *r, regnode *start, regnode *node, regnode *last, node->flags ? " EVAL mode" : ""); for (word_idx=0; word_idx < arry_len; word_idx++) { - SV **elem_ptr=av_fetch(trie->words,word_idx,0); + SV ** const elem_ptr = av_fetch(trie->words,word_idx,0); if (elem_ptr) { PerlIO_printf(Perl_debug_log, "%*s<%s%s%s>\n", (int)(2*(l+4)), "", diff --git a/regexec.c b/regexec.c index 2028e8d..2f4d8a7 100644 --- a/regexec.c +++ b/regexec.c @@ -213,11 +213,10 @@ S_regcppush(pTHX_ I32 parenfloor) (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp) STATIC char * -S_regcppop(pTHX_ regexp *rex) +S_regcppop(pTHX_ const regexp *rex) { dVAR; I32 i; - U32 paren = 0; char *input; GET_RE_DEBUG_FLAGS_DECL; @@ -235,7 +234,7 @@ S_regcppop(pTHX_ regexp *rex) for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS); i > 0; i -= REGCP_PAREN_ELEMS) { I32 tmps; - paren = (U32)SSPOPINT; + U32 paren = (U32)SSPOPINT; PL_reg_start_tmp[paren] = (char *) SSPOPPTR; PL_regstartp[paren] = SSPOPINT; tmps = SSPOPINT; @@ -268,10 +267,10 @@ S_regcppop(pTHX_ regexp *rex) * building DynaLoader will fail: * "Error: '*' not in typemap in DynaLoader.xs, line 164" * --jhi */ - for (paren = *PL_reglastparen + 1; (I32)paren <= rex->nparens; paren++) { - if ((I32)paren > PL_regsize) - PL_regstartp[paren] = -1; - PL_regendp[paren] = -1; + for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) { + if (i > PL_regsize) + PL_regstartp[i] = -1; + PL_regendp[i] = -1; } #endif return input; @@ -934,7 +933,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, /* We know what class REx starts with. Try to find this position... */ STATIC char * -S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32 norun) +S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, I32 norun) { dVAR; const I32 doevery = (prog->reganch & ROPT_SKIP) == 0; @@ -2065,7 +2064,6 @@ STATIC I32 /* 0 failure, 1 success */ S_regtry(pTHX_ regexp *prog, char *startpos) { dVAR; - register I32 i; register I32 *sp; register I32 *ep; CHECKPOINT lastcp; @@ -2180,6 +2178,7 @@ S_regtry(pTHX_ regexp *prog, char *startpos) sp = prog->startp; ep = prog->endp; if (prog->nparens) { + register I32 i; for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) { *++sp = -1; *++ep = -1; @@ -2767,7 +2766,7 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog) if ( st->u.trie.accepted == 1 ) { DEBUG_EXECUTE_r({ - SV **tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 ); + SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 ); PerlIO_printf( Perl_debug_log, "%*s %sonly one match : #%d <%s>%s\n", REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], @@ -5051,7 +5050,7 @@ static void restore_pos(pTHX_ void *arg) { dVAR; - regexp *rex = (regexp *)arg; + regexp * const rex = (regexp *)arg; if (PL_reg_eval_set) { if (PL_reg_oldsaved) { rex->subbeg = PL_reg_oldsaved; @@ -5071,8 +5070,8 @@ STATIC void S_to_utf8_substr(pTHX_ register regexp *prog) { if (prog->float_substr && !prog->float_utf8) { - SV* sv; - prog->float_utf8 = sv = newSVsv(prog->float_substr); + SV* const sv = newSVsv(prog->float_substr); + prog->float_utf8 = sv; sv_utf8_upgrade(sv); if (SvTAIL(prog->float_substr)) SvTAIL_on(sv); @@ -5080,8 +5079,8 @@ S_to_utf8_substr(pTHX_ register regexp *prog) prog->check_utf8 = sv; } if (prog->anchored_substr && !prog->anchored_utf8) { - SV* sv; - prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr); + SV* const sv = newSVsv(prog->anchored_substr); + prog->anchored_utf8 = sv; sv_utf8_upgrade(sv); if (SvTAIL(prog->anchored_substr)) SvTAIL_on(sv); @@ -5095,8 +5094,8 @@ S_to_byte_substr(pTHX_ register regexp *prog) { dVAR; if (prog->float_utf8 && !prog->float_substr) { - SV* sv; - prog->float_substr = sv = newSVsv(prog->float_utf8); + SV* sv = newSVsv(prog->float_utf8); + prog->float_substr = sv; if (sv_utf8_downgrade(sv, TRUE)) { if (SvTAIL(prog->float_utf8)) SvTAIL_on(sv); @@ -5108,8 +5107,8 @@ S_to_byte_substr(pTHX_ register regexp *prog) prog->check_substr = sv; } if (prog->anchored_utf8 && !prog->anchored_substr) { - SV* sv; - prog->anchored_substr = sv = newSVsv(prog->anchored_utf8); + SV* sv = newSVsv(prog->anchored_utf8); + prog->anchored_substr = sv; if (sv_utf8_downgrade(sv, TRUE)) { if (SvTAIL(prog->anchored_utf8)) SvTAIL_on(sv);