From: Ilya Zakharevich Date: Mon, 2 Aug 1999 21:53:36 +0000 (-0400) Subject: Clarify RE engine code; inline a static function to a macro; X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=cd439c502eb7c742eb6cf76c8359a4d3384ae0f5;p=p5sagit%2Fp5-mst-13.2.git Clarify RE engine code; inline a static function to a macro; make exact string nodes smaller. To: perl5-porters@perl.org (Mailing list Perl5) Subject: [PATCH 5.005_60] Cosmetic change to REx engine Message-Id: <199908030153.VAA10542@monk.mps.ohio-state.edu> p4raw-id: //depot/cfgperl@3908 --- diff --git a/Makefile.SH b/Makefile.SH index 6c6fcc4..1567e2f 100644 --- a/Makefile.SH +++ b/Makefile.SH @@ -548,6 +548,7 @@ SYMH = perlvars.h intrpvar.h thrdvar.h # ext/ByteLoader/byterun.h: bytecode.pl # ext/ByteLoader/byterun.c: bytecode.pl # ext/B/Asmdata.pm: bytecode.pl +# global.sym: embed.pl # regnodes.h: regcomp.pl # warning.h lib/warning.pm: warning.pl # The correct versions should be already supplied with the perl kit, diff --git a/embed.pl b/embed.pl index 09035e8..09df177 100755 --- a/embed.pl +++ b/embed.pl @@ -1920,7 +1920,6 @@ s |regnode*|reg |I32|I32 * s |regnode*|reganode |U8|U32 s |regnode*|regatom |I32 * s |regnode*|regbranch |I32 *|I32 -s |void |regc |U8|char * s |void |reguni |UV|char *|I32* s |regnode*|regclass s |regnode*|regclassutf8 diff --git a/regcomp.c b/regcomp.c index 3fc298f..b06077b 100644 --- a/regcomp.c +++ b/regcomp.c @@ -196,6 +196,9 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv)) #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b) +/* Allow for side effects in s */ +#define REGC(c,s) STMT_START { if (!SIZE_ONLY) *(s) = (c); else (s);} STMT_END + static void clear_re(pTHXo_ void *r); STATIC void @@ -260,7 +263,7 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da regnode *stop = scan; #endif - next = scan + (*OPERAND(scan) + 2 - 1)/sizeof(regnode) + 2; + next = scan + NODE_SZ_STR(scan); /* Skip NOTHING, merge EXACT*. */ while (n && ( PL_regkind[(U8)OP(n)] == NOTHING || @@ -279,17 +282,17 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da n = regnext(n); } else { - int oldl = *OPERAND(scan); + int oldl = STR_LEN(scan); regnode *nnext = regnext(n); - if (oldl + *OPERAND(n) > U8_MAX) + if (oldl + STR_LEN(n) > U8_MAX) break; NEXT_OFF(scan) += NEXT_OFF(n); - *OPERAND(scan) += *OPERAND(n); - next = n + (*OPERAND(n) + 2 - 1)/sizeof(regnode) + 2; + STR_LEN(scan) += STR_LEN(n); + next = n + NODE_SZ_STR(n); /* Now we can overwrite *n : */ - Move(OPERAND(n) + 1, OPERAND(scan) + oldl + 1, - *OPERAND(n) + 1, char); + Move(STRING(n), STRING(scan) + oldl, + STR_LEN(n), char); #ifdef DEBUGGING if (stringok) stop = next - 1; @@ -299,7 +302,7 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da } #ifdef DEBUGGING /* Allow dumping */ - n = scan + (*OPERAND(scan) + 2 - 1)/sizeof(regnode) + 2; + n = scan + NODE_SZ_STR(scan); while (n <= stop) { /* Purify reports a benign UMR here sometimes, because we * don't initialize the OP() slot of a node when that node @@ -392,9 +395,9 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da continue; } else if (OP(scan) == EXACT) { - I32 l = *OPERAND(scan); + I32 l = STR_LEN(scan); if (UTF) { - unsigned char *s = (unsigned char *)(OPERAND(scan)+1); + unsigned char *s = (unsigned char *)STRING(scan); unsigned char *e = s + l; I32 newl = 0; while (s < e) { @@ -412,18 +415,18 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da data->last_start_max = is_inf ? I32_MAX : data->pos_min + data->pos_delta; } - sv_catpvn(data->last_found, (char *)(OPERAND(scan)+1), *OPERAND(scan)); + sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan)); data->last_end = data->pos_min + l; data->pos_min += l; /* As in the first entry. */ data->flags &= ~SF_BEFORE_EOL; } } else if (PL_regkind[(U8)OP(scan)] == EXACT) { - I32 l = *OPERAND(scan); + I32 l = STR_LEN(scan); if (flags & SCF_DO_SUBSTR) scan_commit(data); if (UTF) { - unsigned char *s = (unsigned char *)(OPERAND(scan)+1); + unsigned char *s = (unsigned char *)STRING(scan); unsigned char *e = s + l; I32 newl = 0; while (s < e) { @@ -514,7 +517,7 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da nxt = regnext(nxt); if (!strchr((char*)PL_simple,OP(nxt)) && !(PL_regkind[(U8)OP(nxt)] == EXACT - && *OPERAND(nxt) == 1)) + && STR_LEN(nxt) == 1)) goto nogo; nxt2 = nxt; nxt = regnext(nxt); @@ -861,7 +864,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) PL_regsize = 0L; PL_regcode = &PL_regdummy; PL_reg_whilem_seen = 0; - regc((U8)REG_MAGIC, (char*)PL_regcode); + REGC((U8)REG_MAGIC, (char*)PL_regcode); if (reg(0, &flags) == NULL) { Safefree(PL_regprecomp); PL_regprecomp = Nullch; @@ -904,7 +907,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) PL_regcode = r->program; /* Store the count of eval-groups for security checks: */ PL_regcode->next_off = ((PL_seen_evals > U16_MAX) ? U16_MAX : PL_seen_evals); - regc((U8)REG_MAGIC, (char*) PL_regcode++); + REGC((U8)REG_MAGIC, (char*) PL_regcode++); r->data = 0; if (reg(0, &flags) == NULL) return(NULL); @@ -1953,8 +1956,7 @@ tryagain: ret = reg_node(FOLD ? (LOC ? EXACTFL : EXACTF) : EXACT); - s = (char *) OPERAND(ret); - regc(0, s++); /* save spot for len */ + s = STRING(ret); for (len = 0, p = PL_regcomp_parse - 1; len < 127 && p < PL_regxend; len++) @@ -2094,7 +2096,7 @@ tryagain: } else { len++; - regc(ender, s++); + REGC(ender, s++); } break; } @@ -2104,7 +2106,7 @@ tryagain: len += numlen - 1; } else - regc(ender, s++); + REGC(ender, s++); } loopdone: PL_regcomp_parse = p - 1; @@ -2116,14 +2118,11 @@ tryagain: if (len == 1) *flagp |= SIMPLE; if (!SIZE_ONLY) - *OPERAND(ret) = len; - regc('\0', s++); - if (SIZE_ONLY) { - PL_regsize += (len + 2 + sizeof(regnode) - 1) / sizeof(regnode); - } - else { - PL_regcode += (len + 2 + sizeof(regnode) - 1) / sizeof(regnode); - } + STR_LEN(ret) = len; + if (SIZE_ONLY) + PL_regsize += STR_SZ(len); + else + PL_regcode += STR_SZ(len); } break; } @@ -2303,10 +2302,10 @@ S_regclass(pTHX) I32 numlen; I32 namedclass; - s = opnd = (char *) OPERAND(PL_regcode); + s = opnd = MASK(PL_regcode); ret = reg_node(ANYOF); for (value = 0; value < ANYOF_SIZE; value++) - regc(0, s++); + REGC(0, s++); if (*PL_regcomp_parse == '^') { /* Complement of range. */ PL_regnaughty++; PL_regcomp_parse++; @@ -2957,7 +2956,7 @@ S_reganode(pTHX_ U8 op, U32 arg) } /* -- regc - emit (if appropriate) a Unicode character +- reguni - emit (if appropriate) a Unicode character */ STATIC void S_reguni(pTHX_ UV uv, char* s, I32* lenp) @@ -2973,17 +2972,6 @@ S_reguni(pTHX_ UV uv, char* s, I32* lenp) } /* -- regc - emit (if appropriate) a byte of code -*/ -STATIC void -S_regc(pTHX_ U8 b, char* s) -{ - dTHR; - if (!SIZE_ONLY) - *s = b; -} - -/* - reginsert - insert an operator in front of already-emitted operand * * Means relocating the operand. @@ -3144,7 +3132,7 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l) } else if (PL_regkind[(U8)op] == EXACT) { /* Literal string, where present. */ - node += ((*OPERAND(node)) + 2 + sizeof(regnode) - 1) / sizeof(regnode); + node += NODE_SZ_STR(node) - 1; node = NEXTOPER(node); } else { @@ -3247,7 +3235,8 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) k = PL_regkind[(U8)OP(o)]; if (k == EXACT) - Perl_sv_catpvf(aTHX_ sv, " <%s%s%s>", PL_colors[0], OPERAND(o) + 1, PL_colors[1]); + Perl_sv_catpvf(aTHX_ sv, " <%s%*s%s>", PL_colors[0], + STR_LEN(o), STRING(o), PL_colors[1]); else if (k == CURLY) { if (OP(o) == CURLYM || OP(o) == CURLYN) Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */ diff --git a/regcomp.h b/regcomp.h index 7bddc1b..e30c8e7 100644 --- a/regcomp.h +++ b/regcomp.h @@ -66,10 +66,10 @@ typedef OP OP_4tree; /* Will be redefined later. */ */ struct regnode_string { - U8 flags; + U8 str_len; U8 type; U16 next_off; - U8 string[1]; + char string[1]; }; struct regnode_1 { @@ -133,6 +133,12 @@ struct regnode_2 { #define OP(p) ((p)->type) #define OPERAND(p) (((struct regnode_string *)p)->string) +#define MASK(p) ((char*)OPERAND(p)) +#define STR_LEN(p) (((struct regnode_string *)p)->str_len) +#define STRING(p) (((struct regnode_string *)p)->string) +#define STR_SZ(l) ((l + sizeof(regnode) - 1) / sizeof(regnode)) +#define NODE_SZ_STR(p) (STR_SZ(STR_LEN(p))+1) + #define NODE_ALIGN(node) #define ARG_LOC(p) (((struct regnode_1 *)p)->arg1) #define ARG1_LOC(p) (((struct regnode_2 *)p)->arg1) diff --git a/regexec.c b/regexec.c index 6be77cc..3fb1826 100644 --- a/regexec.c +++ b/regexec.c @@ -884,7 +884,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* We know what class it must start with. */ switch (OP(c)) { case ANYOFUTF8: - cc = (char *) OPERAND(c); + cc = MASK(c); while (s < strend) { if (REGINCLASSUTF8(c, (U8*)s)) { if (tmp && regtry(prog, s)) @@ -898,7 +898,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * } break; case ANYOF: - cc = (char *) OPERAND(c); + cc = MASK(c); while (s < strend) { if (REGINCLASS(cc, *s)) { if (tmp && regtry(prog, s)) @@ -1697,8 +1697,8 @@ S_regmatch(pTHX_ regnode *prog) nextchr = UCHARAT(++locinput); break; case EXACT: - s = (char *) OPERAND(scan); - ln = UCHARAT(s++); + s = STRING(scan); + ln = STR_LEN(scan); /* Inline the first character, for speed. */ if (UCHARAT(s) != nextchr) sayNO; @@ -1713,8 +1713,8 @@ S_regmatch(pTHX_ regnode *prog) PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case EXACTF: - s = (char *) OPERAND(scan); - ln = UCHARAT(s++); + s = STRING(scan); + ln = STR_LEN(scan); if (UTF) { char *l = locinput; @@ -1752,7 +1752,7 @@ S_regmatch(pTHX_ regnode *prog) nextchr = UCHARAT(locinput); break; case ANYOFUTF8: - s = (char *) OPERAND(scan); + s = MASK(scan); if (!REGINCLASSUTF8(scan, (U8*)locinput)) sayNO; if (locinput >= PL_regeol) @@ -1761,7 +1761,7 @@ S_regmatch(pTHX_ regnode *prog) nextchr = UCHARAT(locinput); break; case ANYOF: - s = (char *) OPERAND(scan); + s = MASK(scan); if (nextchr < 0) nextchr = UCHARAT(locinput); if (!REGINCLASS(s, nextchr)) @@ -2535,7 +2535,7 @@ S_regmatch(pTHX_ regnode *prog) ln = n; locinput = PL_reginput; if (PL_regkind[(U8)OP(next)] == EXACT) { - c1 = UCHARAT(OPERAND(next) + 1); + c1 = (U8)*STRING(next); if (OP(next) == EXACTF) c2 = PL_fold[c1]; else if (OP(next) == EXACTFL) @@ -2592,7 +2592,7 @@ S_regmatch(pTHX_ regnode *prog) ); if (n >= ln) { if (PL_regkind[(U8)OP(next)] == EXACT) { - c1 = UCHARAT(OPERAND(next) + 1); + c1 = (U8)*STRING(next); if (OP(next) == EXACTF) c2 = PL_fold[c1]; else if (OP(next) == EXACTFL) @@ -2669,7 +2669,7 @@ S_regmatch(pTHX_ regnode *prog) * when we know what character comes next. */ if (PL_regkind[(U8)OP(next)] == EXACT) { - c1 = UCHARAT(OPERAND(next) + 1); + c1 = (U8)*STRING(next); if (OP(next) == EXACTF) c2 = PL_fold[c1]; else if (OP(next) == EXACTFL) @@ -2975,7 +2975,6 @@ S_regrepeat(pTHX_ regnode *p, I32 max) scan = PL_reginput; if (max != REG_INFTY && max < loceol - scan) loceol = scan + max; - opnd = (char *) OPERAND(p); switch (OP(p)) { case REG_ANY: while (scan < loceol && *scan != '\n') @@ -2999,19 +2998,19 @@ S_regrepeat(pTHX_ regnode *p, I32 max) } break; case EXACT: /* length of string is 1 */ - c = UCHARAT(++opnd); + c = (U8)*STRING(p); while (scan < loceol && UCHARAT(scan) == c) scan++; break; case EXACTF: /* length of string is 1 */ - c = UCHARAT(++opnd); + c = (U8)*STRING(p); while (scan < loceol && (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c])) scan++; break; case EXACTFL: /* length of string is 1 */ PL_reg_flags |= RF_tainted; - c = UCHARAT(++opnd); + c = (U8)*STRING(p); while (scan < loceol && (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c])) scan++; @@ -3024,6 +3023,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max) } break; case ANYOF: + opnd = MASK(p); while (scan < loceol && REGINCLASS(opnd, *scan)) scan++; break;