X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=d3788c8f16fede5d566e4c84e584d0cbb8d000e8;hb=a9581ec21a2686ca09657757555fcd66435bb205;hp=bbb7c8e4445c845b04cff7e0b4cf92d3b8de64f2;hpb=bbce6d69784bf43b0e69e8d312042d65f258af23;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index bbb7c8e..d3788c8 100644 --- a/regcomp.c +++ b/regcomp.c @@ -43,7 +43,7 @@ * **** Alterations to Henry's code are... **** - **** Copyright (c) 1991-1994, Larry Wall + **** Copyright (c) 1991-1997, Larry Wall **** **** You may distribute under the terms of either the GNU General Public **** License or the Artistic License, as specified in the README file. @@ -109,6 +109,7 @@ static void reginsert _((char, char *)); static void regoptail _((char *, char *)); static void regset _((char *, I32)); static void regtail _((char *, char *)); +static char* regwhite _((char *, char *)); static char* nextchar _((void)); /* @@ -145,6 +146,13 @@ PMOP* pm; I32 minlen = 0; I32 sawplus = 0; I32 sawopen = 0; +#define MAX_REPEAT_DEPTH 12 + struct { + char *opcode; + I32 count; + } repeat_stack[MAX_REPEAT_DEPTH]; + I32 repeat_depth = 0; + I32 repeat_count = 1; /* We start unmultiplied. */ if (exp == NULL) croak("NULL regexp argument"); @@ -228,18 +236,23 @@ PMOP* pm; regkind[(U8)OP(first)] == NBOUND) r->regstclass = first; else if (regkind[(U8)OP(first)] == BOL) { - r->reganch = ROPT_ANCH; + r->reganch |= ROPT_ANCH_BOL; first = NEXTOPER(first); - goto again; + goto again; + } + else if (OP(first) == GPOS) { + r->reganch |= ROPT_ANCH_GPOS; + first = NEXTOPER(first); + goto again; } else if ((OP(first) == STAR && regkind[(U8)OP(NEXTOPER(first))] == ANY) && !(r->reganch & ROPT_ANCH) ) { /* turn .* into ^.* with an implied $*=1 */ - r->reganch = ROPT_ANCH | ROPT_IMPLICIT; + r->reganch |= ROPT_ANCH_BOL | ROPT_IMPLICIT; first = NEXTOPER(first); - goto again; + goto again; } if (sawplus && (!sawopen || !regsawback)) r->reganch |= ROPT_SKIP; /* x+ must match 1st of run */ @@ -284,9 +297,9 @@ PMOP* pm; char *t; first = scan; - while (OP(t = regnext(scan)) == CLOSE) + while ((t = regnext(scan)) && OP(t) == CLOSE) scan = t; - minlen += *OPERAND(first); + minlen += *OPERAND(first) * repeat_count; if (curback - backish == len) { sv_catpvn(longish, OPERAND(first)+1, *OPERAND(first)); @@ -305,22 +318,57 @@ PMOP* pm; curback += *OPERAND(first); } else if (strchr(varies,OP(scan))) { - curback = -30000; + int tcount; + char *next; + + if (repeat_depth < MAX_REPEAT_DEPTH + && ((OP(scan) == PLUS + && (tcount = 1) + && (next = NEXTOPER(scan))) + || (regkind[(U8)OP(scan)] == CURLY + && (tcount = ARG1(scan)) + && (next = NEXTOPER(scan)+4)))) + { + /* We treat (abc)+ as (abc)(abc)*. */ + + /* Mark the place to return back. */ + repeat_stack[repeat_depth].opcode = regnext(scan); + repeat_stack[repeat_depth].count = repeat_count; + repeat_depth++; + repeat_count *= tcount; + + /* Go deeper: */ + scan = next; + continue; + } + else { + curback = -30000; + len = 0; + if (SvCUR(longish) > SvCUR(longest)) { + sv_setsv(longest,longish); + backest = backish; + } + sv_setpvn(longish,"",0); + } + } + else if (strchr(simple,OP(scan))) { + curback++; + minlen += repeat_count; len = 0; if (SvCUR(longish) > SvCUR(longest)) { sv_setsv(longest,longish); backest = backish; } sv_setpvn(longish,"",0); - if (OP(scan) == PLUS && strchr(simple,OP(NEXTOPER(scan)))) - minlen++; - else if (regkind[(U8)OP(scan)] == CURLY && - strchr(simple,OP(NEXTOPER(scan)+4))) - minlen += ARG1(scan); } - else if (strchr(simple,OP(scan))) { - curback++; - minlen++; + scan = regnext(scan); + if (!scan) { /* Go up PLUS or CURLY. */ + if (!repeat_depth--) + croak("panic: re scan"); + scan = repeat_stack[repeat_depth].opcode; + repeat_count = repeat_stack[repeat_depth].count; + /* Need to submit the longest string found: */ + curback = -30000; len = 0; if (SvCUR(longish) > SvCUR(longest)) { sv_setsv(longest,longish); @@ -328,12 +376,11 @@ PMOP* pm; } sv_setpvn(longish,"",0); } - scan = regnext(scan); } /* Prefer earlier on tie, unless we can tail match latter */ - if (SvCUR(longish) + (regkind[(U8)OP(first)] == EOL) + if (SvCUR(longish) + (first && regkind[(U8)OP(first)] == EOL) > SvCUR(longest)) { sv_setsv(longest,longish); @@ -352,11 +399,12 @@ PMOP* pm; if (backest < 0) backest = -1; r->regback = backest; - if (SvCUR(longest) > !(sawstudy || regkind[(U8)OP(first)] == EOL)) + if (SvCUR(longest) > !(sawstudy || + (first && regkind[(U8)OP(first)] == EOL))) fbm_compile(r->regmust); (void)SvUPGRADE(r->regmust, SVt_PVBM); BmUSEFUL(r->regmust) = 100; - if (regkind[(U8)OP(first)] == EOL && SvCUR(longish)) + if (first && regkind[(U8)OP(first)] == EOL && SvCUR(longish)) SvTAIL_on(r->regmust); } else { @@ -409,7 +457,7 @@ I32 *flagp; break; case '$': case '@': - croak("Sequence (?%c...) not implemented", paren); + croak("Sequence (?%c...) not implemented", (int)paren); break; case '#': while (*regparse && *regparse != ')') @@ -421,7 +469,7 @@ I32 *flagp; return NULL; default: --regparse; - while (*regparse && strchr("iogmsx", *regparse)) + while (*regparse && strchr("iogcmsx", *regparse)) pmflag(®flags, *regparse++); if (*regparse != ')') croak("Sequence (?%c...) not recognized", *regparse); @@ -649,6 +697,10 @@ I32 *flagp; *flagp = flags; return(ret); } + + if (!(flags&HASWIDTH) && op != '?') + FAIL("regexp *+ operand could be empty"); + nextchar(); *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); @@ -764,10 +816,16 @@ tryagain: croak("internal urp in regexp at /%s/", regparse); /* Supposed to be caught earlier. */ break; + case '{': + if (!regcurly(regparse)) { + regparse++; + goto defchar; + } + /* FALL THROUGH */ case '?': case '+': case '*': - FAIL("?+* follows nothing in regexp"); + FAIL("?+*{} follows nothing in regexp"); break; case '\\': switch (*++regparse) { @@ -777,7 +835,7 @@ tryagain: nextchar(); break; case 'G': - ret = regnode(GBOL); + ret = regnode(GPOS); *flagp |= SIMPLE; nextchar(); break; @@ -890,6 +948,9 @@ tryagain: len++) { oldp = p; + + if (regflags & PMf_EXTENDED) + p = regwhite(p, regxend); switch (*p) { case '^': case '$': @@ -968,22 +1029,12 @@ tryagain: break; } break; - case '#': - if (regflags & PMf_EXTENDED) { - while (p < regxend && *p != '\n') p++; - } - /* FALL THROUGH */ - case ' ': case '\t': case '\n': case '\r': case '\f': case '\v': - if (regflags & PMf_EXTENDED) { - p++; - len--; - continue; - } - /* FALL THROUGH */ default: ender = *p++; break; } + if (regflags & PMf_EXTENDED) + p = regwhite(p, regxend); if (ISMULT2(p)) { /* Back off on ?+*. */ if (len) p = oldp; @@ -1014,6 +1065,25 @@ tryagain: return(ret); } +static char * +regwhite(p, e) +char *p; +char *e; +{ + while (p < e) { + if (isSPACE(*p)) + ++p; + else if (*p == '#') { + do { + p++; + } while (p < e && *p != '\n'); + } + else + break; + } + return p; +} + static void regset(opnd, c) char *opnd; @@ -1453,7 +1523,7 @@ regexp *r; register char *s; register char op = EXACT; /* Arbitrary non-END op. */ register char *next; - + SV *sv = sv_newmortal(); s = r->program + 1; while (op != END) { /* While that wasn't END last time... */ @@ -1462,7 +1532,9 @@ regexp *r; s++; #endif op = OP(s); - PerlIO_printf(Perl_debug_log, "%2d%s", s-r->program, regprop(s)); /* Where, what. */ + /* where, what */ + regprop(sv, s); + PerlIO_printf(Perl_debug_log, "%2d%s", s - r->program, SvPVX(sv)); next = regnext(s); s += regarglen[(U8)op]; if (next == NULL) /* Next ptr. */ @@ -1491,10 +1563,18 @@ regexp *r; /* Header fields of interest. */ if (r->regstart) PerlIO_printf(Perl_debug_log, "start `%s' ", SvPVX(r->regstart)); - if (r->regstclass) - PerlIO_printf(Perl_debug_log, "stclass `%s' ", regprop(r->regstclass)); - if (r->reganch & ROPT_ANCH) - PerlIO_printf(Perl_debug_log, "anchored "); + if (r->regstclass) { + regprop(sv, r->regstclass); + PerlIO_printf(Perl_debug_log, "stclass `%s' ", SvPVX(sv)); + } + if (r->reganch & ROPT_ANCH) { + PerlIO_printf(Perl_debug_log, "anchored"); + if (r->reganch & ROPT_ANCH_BOL) + PerlIO_printf(Perl_debug_log, "(BOL)"); + if (r->reganch & ROPT_ANCH_GPOS) + PerlIO_printf(Perl_debug_log, "(GPOS)"); + PerlIO_putc(Perl_debug_log, ' '); + } if (r->reganch & ROPT_SKIP) PerlIO_printf(Perl_debug_log, "plus "); if (r->reganch & ROPT_IMPLICIT) @@ -1509,14 +1589,14 @@ regexp *r; /* - regprop - printable representation of opcode */ -char * -regprop(op) +void +regprop(sv, op) +SV *sv; char *op; { register char *p = 0; - (void) strcpy(buf, ":"); - + sv_setpv(sv, ":"); switch (OP(op)) { case BOL: p = "BOL"; @@ -1579,23 +1659,19 @@ char *op; p = "NBOUNDL"; break; case CURLY: - (void)sprintf(buf+strlen(buf), "CURLY {%d,%d}", ARG1(op),ARG2(op)); - p = NULL; + sv_catpvf(sv, "CURLY {%d,%d}", ARG1(op), ARG2(op)); break; case CURLYX: - (void)sprintf(buf+strlen(buf), "CURLYX {%d,%d}", ARG1(op),ARG2(op)); - p = NULL; + sv_catpvf(sv, "CURLYX {%d,%d}", ARG1(op), ARG2(op)); break; case REF: - (void)sprintf(buf+strlen(buf), "REF%d", ARG1(op)); - p = NULL; + sv_catpvf(sv, "REF%d", ARG1(op)); break; case OPEN: - (void)sprintf(buf+strlen(buf), "OPEN%d", ARG1(op)); - p = NULL; + sv_catpvf(sv, "OPEN%d", ARG1(op)); break; case CLOSE: - (void)sprintf(buf+strlen(buf), "CLOSE%d", ARG1(op)); + sv_catpvf(sv, "CLOSE%d", ARG1(op)); p = NULL; break; case STAR: @@ -1607,8 +1683,8 @@ char *op; case MINMOD: p = "MINMOD"; break; - case GBOL: - p = "GBOL"; + case GPOS: + p = "GPOS"; break; case UNLESSM: p = "UNLESSM"; @@ -1655,9 +1731,8 @@ char *op; default: FAIL("corrupted regexp opcode"); } - if (p != NULL) - (void) strcat(buf, p); - return(buf); + if (p) + sv_catpv(sv, p); } #endif /* DEBUGGING */