Fix tcsh hack in Configure
[p5sagit/p5-mst-13.2.git] / regcomp.c
index 2f3fb40..5dad7d7 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -43,7 +43,7 @@
  *
  ****    Alterations to Henry's code are...
  ****
- ****    Copyright (c) 1991-1994, Larry Wall
+ ****    Copyright (c) 1991-1997, Larry Wall
  ****
  ****    You may distribute under the terms of either the GNU General Public
  ****    License or the Artistic License, as specified in the README file.
@@ -109,6 +109,7 @@ static void reginsert _((char, char *));
 static void regoptail _((char *, char *));
 static void regset _((char *, I32));
 static void regtail _((char *, char *));
+static char* regwhite _((char *, char *));
 static char* nextchar _((void));
 
 /*
@@ -145,6 +146,13 @@ PMOP* pm;
     I32 minlen = 0;
     I32 sawplus = 0;
     I32 sawopen = 0;
+#define MAX_REPEAT_DEPTH 12
+    struct {
+       char *opcode;
+       I32 count;
+    } repeat_stack[MAX_REPEAT_DEPTH];
+    I32 repeat_depth = 0;
+    I32 repeat_count = 1;      /* We start unmultiplied. */
 
     if (exp == NULL)
        croak("NULL regexp argument");
@@ -228,18 +236,23 @@ PMOP* pm;
                 regkind[(U8)OP(first)] == NBOUND)
            r->regstclass = first;
        else if (regkind[(U8)OP(first)] == BOL) {
-           r->reganch = ROPT_ANCH;
+           r->reganch |= ROPT_ANCH_BOL;
            first = NEXTOPER(first);
-               goto again;
+           goto again;
+       }
+       else if (OP(first) == GPOS) {
+           r->reganch |= ROPT_ANCH_GPOS;
+           first = NEXTOPER(first);
+           goto again;
        }
        else if ((OP(first) == STAR &&
            regkind[(U8)OP(NEXTOPER(first))] == ANY) &&
            !(r->reganch & ROPT_ANCH) )
        {
            /* turn .* into ^.* with an implied $*=1 */
-           r->reganch = ROPT_ANCH | ROPT_IMPLICIT;
+           r->reganch |= ROPT_ANCH_BOL | ROPT_IMPLICIT;
            first = NEXTOPER(first);
-               goto again;
+           goto again;
        }
        if (sawplus && (!sawopen || !regsawback))
            r->reganch |= ROPT_SKIP;    /* x+ must match 1st of run */
@@ -284,9 +297,9 @@ PMOP* pm;
                char *t;
 
                first = scan;
-               while (OP(t = regnext(scan)) == CLOSE)
+               while ((t = regnext(scan)) && OP(t) == CLOSE)
                    scan = t;
-               minlen += *OPERAND(first);
+               minlen += *OPERAND(first) * repeat_count;
                if (curback - backish == len) {
                    sv_catpvn(longish, OPERAND(first)+1,
                        *OPERAND(first));
@@ -305,22 +318,57 @@ PMOP* pm;
                    curback += *OPERAND(first);
            }
            else if (strchr(varies,OP(scan))) {
-               curback = -30000;
+               int tcount;
+               char *next;
+
+               if (repeat_depth < MAX_REPEAT_DEPTH
+                   && ((OP(scan) == PLUS
+                        && (tcount = 1)
+                        && (next = NEXTOPER(scan)))
+                       || (regkind[(U8)OP(scan)] == CURLY
+                           && (tcount = ARG1(scan))
+                           && (next = NEXTOPER(scan)+4))))
+               {
+                   /* We treat (abc)+ as (abc)(abc)*. */
+
+                   /* Mark the place to return back. */
+                   repeat_stack[repeat_depth].opcode = regnext(scan);
+                   repeat_stack[repeat_depth].count = repeat_count;
+                   repeat_depth++;
+                   repeat_count *= tcount;
+
+                   /* Go deeper: */
+                   scan = next;
+                   continue;
+               }
+               else {
+                   curback = -30000;
+                   len = 0;
+                   if (SvCUR(longish) > SvCUR(longest)) {
+                       sv_setsv(longest,longish);
+                       backest = backish;
+                   }
+                   sv_setpvn(longish,"",0);
+               }
+           }
+           else if (strchr(simple,OP(scan))) {
+               curback++;
+               minlen += repeat_count;
                len = 0;
                if (SvCUR(longish) > SvCUR(longest)) {
                    sv_setsv(longest,longish);
                    backest = backish;
                }
                sv_setpvn(longish,"",0);
-               if (OP(scan) == PLUS && strchr(simple,OP(NEXTOPER(scan))))
-                   minlen++;
-               else if (regkind[(U8)OP(scan)] == CURLY &&
-                 strchr(simple,OP(NEXTOPER(scan)+4)))
-                   minlen += ARG1(scan);
            }
-           else if (strchr(simple,OP(scan))) {
-               curback++;
-               minlen++;
+           scan = regnext(scan);
+           if (!scan) {                /* Go up PLUS or CURLY. */
+               if (!repeat_depth--)
+                   croak("panic: re scan");
+               scan = repeat_stack[repeat_depth].opcode;
+               repeat_count = repeat_stack[repeat_depth].count;
+               /* Need to submit the longest string found: */
+               curback = -30000;
                len = 0;
                if (SvCUR(longish) > SvCUR(longest)) {
                    sv_setsv(longest,longish);
@@ -328,12 +376,11 @@ PMOP* pm;
                }
                sv_setpvn(longish,"",0);
            }
-           scan = regnext(scan);
        }
 
        /* Prefer earlier on tie, unless we can tail match latter */
 
-       if (SvCUR(longish) + (regkind[(U8)OP(first)] == EOL)
+       if (SvCUR(longish) + (first && regkind[(U8)OP(first)] == EOL)
                > SvCUR(longest))
        {
            sv_setsv(longest,longish);
@@ -352,11 +399,12 @@ PMOP* pm;
            if (backest < 0)
                backest = -1;
            r->regback = backest;
-           if (SvCUR(longest) > !(sawstudy || regkind[(U8)OP(first)] == EOL))
+           if (SvCUR(longest) > !(sawstudy || 
+                                  (first && regkind[(U8)OP(first)] == EOL)))
                fbm_compile(r->regmust);
            (void)SvUPGRADE(r->regmust, SVt_PVBM);
            BmUSEFUL(r->regmust) = 100;
-           if (regkind[(U8)OP(first)] == EOL && SvCUR(longish))
+           if (first && regkind[(U8)OP(first)] == EOL && SvCUR(longish))
                SvTAIL_on(r->regmust);
        }
        else {
@@ -409,7 +457,7 @@ I32 *flagp;
                break;
            case '$':
            case '@':
-               croak("Sequence (?%c...) not implemented", paren);
+               croak("Sequence (?%c...) not implemented", (int)paren);
                break;
            case '#':
                while (*regparse && *regparse != ')')
@@ -649,6 +697,10 @@ I32 *flagp;
        *flagp = flags;
        return(ret);
     }
+
+    if (!(flags&HASWIDTH) && op != '?')
+      FAIL("regexp *+ operand could be empty");
+
     nextchar();
 
     *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
@@ -764,10 +816,15 @@ tryagain:
        croak("internal urp in regexp at /%s/", regparse);
                                /* Supposed to be caught earlier. */
        break;
+    case '{':
+       if (!regcurly(regparse)) {
+           regparse++;
+           goto defchar;
+       }
+       /* FALL THROUGH */
     case '?':
     case '+':
     case '*':
-    case '{':
        FAIL("?+*{} follows nothing in regexp");
        break;
     case '\\':
@@ -778,7 +835,7 @@ tryagain:
            nextchar();
            break;
        case 'G':
-           ret = regnode(GBOL);
+           ret = regnode(GPOS);
            *flagp |= SIMPLE;
            nextchar();
            break;
@@ -891,6 +948,9 @@ tryagain:
              len++)
            {
                oldp = p;
+
+               if (regflags & PMf_EXTENDED)
+                   p = regwhite(p, regxend);
                switch (*p) {
                case '^':
                case '$':
@@ -969,22 +1029,12 @@ tryagain:
                        break;
                    }
                    break;
-               case '#':
-                   if (regflags & PMf_EXTENDED) {
-                       while (p < regxend && *p != '\n') p++;
-                   }
-                   /* FALL THROUGH */
-               case ' ': case '\t': case '\n': case '\r': case '\f': case '\v':
-                   if (regflags & PMf_EXTENDED) {
-                       p++;
-                       len--;
-                       continue;
-                   }
-                   /* FALL THROUGH */
                default:
                    ender = *p++;
                    break;
                }
+               if (regflags & PMf_EXTENDED)
+                   p = regwhite(p, regxend);
                if (ISMULT2(p)) { /* Back off on ?+*. */
                    if (len)
                        p = oldp;
@@ -1015,6 +1065,25 @@ tryagain:
     return(ret);
 }
 
+static char *
+regwhite(p, e)
+char *p;
+char *e;
+{
+    while (p < e) {
+       if (isSPACE(*p))
+           ++p;
+       else if (*p == '#') {
+           do {
+               p++;
+           } while (p < e && *p != '\n');
+       }
+       else
+           break;
+    }
+    return p;
+}
+
 static void
 regset(opnd, c)
 char *opnd;
@@ -1494,8 +1563,14 @@ regexp *r;
        PerlIO_printf(Perl_debug_log, "start `%s' ", SvPVX(r->regstart));
     if (r->regstclass)
        PerlIO_printf(Perl_debug_log, "stclass `%s' ", regprop(r->regstclass));
-    if (r->reganch & ROPT_ANCH)
-       PerlIO_printf(Perl_debug_log, "anchored ");
+    if (r->reganch & ROPT_ANCH) {
+       PerlIO_printf(Perl_debug_log, "anchored");
+       if (r->reganch & ROPT_ANCH_BOL)
+           PerlIO_printf(Perl_debug_log, "(BOL)");
+       if (r->reganch & ROPT_ANCH_GPOS)
+           PerlIO_printf(Perl_debug_log, "(GPOS)");
+       PerlIO_putc(Perl_debug_log, ' ');
+    }
     if (r->reganch & ROPT_SKIP)
        PerlIO_printf(Perl_debug_log, "plus ");
     if (r->reganch & ROPT_IMPLICIT)
@@ -1608,8 +1683,8 @@ char *op;
     case MINMOD:
        p = "MINMOD";
        break;
-    case GBOL:
-       p = "GBOL";
+    case GPOS:
+       p = "GPOS";
        break;
     case UNLESSM:
        p = "UNLESSM";