perl 5.003_03: hints/sunos_4_1.sh
[p5sagit/p5-mst-13.2.git] / regexec.c
index 1267065..4119dfc 100644 (file)
--- a/regexec.c
+++ b/regexec.c
  * blame Henry for some of the lack of readability.
  */
 
+/* The names of the functions have been changed from regcomp and
+ * regexec to  pregcomp and pregexec in order to avoid conflicts
+ * with the POSIX routines of the same names.
+*/
+
 /*SUPPRESS 112*/
 /*
- * regcomp and regexec -- regsub and regerror are not used in perl
+ * pregcomp and pregexec -- regsub and regerror are not used in perl
  *
  *     Copyright (c) 1986 by University of Toronto.
  *     Written by Henry Spencer.  Not derived from licensed software.
@@ -132,7 +137,7 @@ regcppop()
 #define regcpblow(cp) leave_scope(cp)
 
 /*
- * regexec and friends
+ * pregexec and friends
  */
 
 /*
@@ -144,10 +149,10 @@ static I32 regrepeat _((char *p, I32 max));
 static I32 regtry _((regexp *prog, char *startpos));
 
 /*
- - regexec - match a regexp against a string
+ - pregexec - match a regexp against a string
  */
 I32
-regexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
+pregexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
 register regexp *prog;
 char *stringarg;
 register char *strend; /* pointer to null at end of string */
@@ -166,6 +171,7 @@ I32 safebase;       /* no need to remember string in subbase */
     CURCUR cc;
 
     cc.cur = 0;
+    cc.oldcc = 0;
     regcc = &cc;
 
 #ifdef DEBUGGING
@@ -471,10 +477,10 @@ I32 safebase;     /* no need to remember string in subbase */
     goto phooey;
 
 got_it:
+    strend += dontbother;      /* uncheat */
     prog->subbeg = strbeg;
     prog->subend = strend;
     if ((!safebase && (prog->nparens || sawampersand)) || prog->do_folding) {
-       strend += dontbother;   /* uncheat */
        i = strend - startpos + (stringarg - strbeg);
        if (safebase) {                 /* no need for $digit later */
            s = strbeg;
@@ -571,14 +577,26 @@ char *prog;
     register char *s;          /* operand or save */
     register char *locinput = reginput;
     int minmod = 0;
+#ifdef DEBUGGING
+    static int regindent = 0;
+    regindent++;
+#endif
 
     nextchar = *locinput;
     scan = prog;
     while (scan != NULL) {
 #ifdef DEBUGGING
-       if (regnarrate)
-           fprintf(stderr, "%2d%-8.8s\t<%.10s>\n",
+#define sayYES goto yes
+#define sayNO goto no
+#define saySAME(x) if (x) goto yes; else goto no
+       if (regnarrate) {
+           PerlIO_printf(Perl_debug_log, "%*s%2d%-8.8s\t<%.10s>\n", regindent*2, "",
                scan - regprogram, regprop(scan), locinput);
+       }
+#else
+#define sayYES return 1
+#define sayNO return 0
+#define saySAME(x) return x
 #endif
 
 #ifdef REGALIGN
@@ -598,7 +616,7 @@ char *prog;
                /* regtill = regbol; */
                break;
            }
-           return 0;
+           sayNO;
        case MBOL:
            if (locinput == regbol
                ? regprev == '\n'
@@ -606,15 +624,15 @@ char *prog;
            {
                break;
            }
-           return 0;
+           sayNO;
        case SBOL:
            if (locinput == regbol && regprev == '\n')
                break;
-           return 0;
+           sayNO;
        case GBOL:
            if (locinput == regbol)
                break;
-           return 0;
+           sayNO;
        case EOL:
            if (multiline)
                goto meol;
@@ -623,23 +641,23 @@ char *prog;
        case MEOL:
          meol:
            if ((nextchar || locinput < regeol) && nextchar != '\n')
-               return 0;
+               sayNO;
            break;
        case SEOL:
          seol:
            if ((nextchar || locinput < regeol) && nextchar != '\n')
-               return 0;
+               sayNO;
            if (regeol - locinput > 1)
-               return 0;
+               sayNO;
            break;
        case SANY:
            if (!nextchar && locinput >= regeol)
-               return 0;
+               sayNO;
            nextchar = *++locinput;
            break;
        case ANY:
            if (!nextchar && locinput >= regeol || nextchar == '\n')
-               return 0;
+               sayNO;
            nextchar = *++locinput;
            break;
        case EXACTLY:
@@ -647,11 +665,11 @@ char *prog;
            ln = *s++;
            /* Inline the first character, for speed. */
            if (*s != nextchar)
-               return 0;
+               sayNO;
            if (regeol - locinput < ln)
-               return 0;
+               sayNO;
            if (ln > 1 && bcmp(s, locinput, ln) != 0)
-               return 0;
+               sayNO;
            locinput += ln;
            nextchar = *locinput;
            break;
@@ -660,23 +678,23 @@ char *prog;
            if (nextchar < 0)
                nextchar = UCHARAT(locinput);
            if (s[nextchar >> 3] & (1 << (nextchar&7)))
-               return 0;
+               sayNO;
            if (!nextchar && locinput >= regeol)
-               return 0;
+               sayNO;
            nextchar = *++locinput;
            break;
        case ALNUM:
            if (!nextchar)
-               return 0;
+               sayNO;
            if (!isALNUM(nextchar))
-               return 0;
+               sayNO;
            nextchar = *++locinput;
            break;
        case NALNUM:
            if (!nextchar && locinput >= regeol)
-               return 0;
+               sayNO;
            if (isALNUM(nextchar))
-               return 0;
+               sayNO;
            nextchar = *++locinput;
            break;
        case NBOUND:
@@ -687,51 +705,51 @@ char *prog;
                ln = isALNUM(locinput[-1]);
            n = isALNUM(nextchar); /* is next char in word? */
            if ((ln == n) == (OP(scan) == BOUND))
-               return 0;
+               sayNO;
            break;
        case SPACE:
            if (!nextchar && locinput >= regeol)
-               return 0;
+               sayNO;
            if (!isSPACE(nextchar))
-               return 0;
+               sayNO;
            nextchar = *++locinput;
            break;
        case NSPACE:
            if (!nextchar)
-               return 0;
+               sayNO;
            if (isSPACE(nextchar))
-               return 0;
+               sayNO;
            nextchar = *++locinput;
            break;
        case DIGIT:
            if (!isDIGIT(nextchar))
-               return 0;
+               sayNO;
            nextchar = *++locinput;
            break;
        case NDIGIT:
            if (!nextchar && locinput >= regeol)
-               return 0;
+               sayNO;
            if (isDIGIT(nextchar))
-               return 0;
+               sayNO;
            nextchar = *++locinput;
            break;
        case REF:
            n = ARG1(scan);  /* which paren pair */
            s = regstartp[n];
            if (!s)
-               return 0;
+               sayNO;
            if (!regendp[n])
-               return 0;
+               sayNO;
            if (s == regendp[n])
                break;
            /* Inline the first character, for speed. */
            if (*s != nextchar)
-               return 0;
+               sayNO;
            ln = regendp[n] - s;
            if (locinput + ln > regeol)
-               return 0;
+               sayNO;
            if (ln > 1 && bcmp(s, locinput, ln) != 0)
-               return 0;
+               sayNO;
            locinput += ln;
            nextchar = *locinput;
            break;
@@ -769,7 +787,7 @@ char *prog;
                n = regmatch(PREVOPER(next));   /* start on the WHILEM */
                regcpblow(cp);
                regcc = cc.oldcc;
-               return n;
+               saySAME(n);
            }
            /* NOT REACHED */
        case WHILEM: {
@@ -783,19 +801,25 @@ char *prog;
                 */
 
                CURCUR* cc = regcc;
-               n = cc->cur + 1;
+               n = cc->cur + 1;        /* how many we know we matched */
                reginput = locinput;
 
+#ifdef DEBUGGING
+               if (regnarrate)
+                   PerlIO_printf(Perl_debug_log, "%*s  %d  %lx\n", regindent*2, "",
+                       n, (long)cc);
+#endif
+
                /* If degenerate scan matches "", assume scan done. */
 
                if (locinput == cc->lastloc) {
                    regcc = cc->oldcc;
                    ln = regcc->cur;
                    if (regmatch(cc->next))
-                       return TRUE;
+                       sayYES;
                    regcc->cur = ln;
                    regcc = cc;
-                   return FALSE;
+                   sayNO;
                }
 
                /* First just match a string of min scans. */
@@ -803,7 +827,10 @@ char *prog;
                if (n < cc->min) {
                    cc->cur = n;
                    cc->lastloc = locinput;
-                   return regmatch(cc->scan);
+                   if (regmatch(cc->scan))
+                       sayYES;
+                   cc->cur = n - 1;
+                   sayNO;
                }
 
                /* Prefer next over scan for minimal matching. */
@@ -812,18 +839,21 @@ char *prog;
                    regcc = cc->oldcc;
                    ln = regcc->cur;
                    if (regmatch(cc->next))
-                       return TRUE;    /* All done. */
+                       sayYES; /* All done. */
                    regcc->cur = ln;
                    regcc = cc;
 
                    if (n >= cc->max)   /* Maximum greed exceeded? */
-                       return FALSE;
+                       sayNO;
 
                    /* Try scanning more and see if it helps. */
                    reginput = locinput;
                    cc->cur = n;
                    cc->lastloc = locinput;
-                   return regmatch(cc->scan);
+                   if (regmatch(cc->scan))
+                       sayYES;
+                   cc->cur = n - 1;
+                   sayNO;
                }
 
                /* Prefer scan over next for maximal matching. */
@@ -833,7 +863,7 @@ char *prog;
                    cc->cur = n;
                    cc->lastloc = locinput;
                    if (regmatch(cc->scan))
-                       return TRUE;
+                       sayYES;
                    regcppop();         /* Restore some previous $<digit>s? */
                    reginput = locinput;
                }
@@ -842,20 +872,26 @@ char *prog;
                regcc = cc->oldcc;
                ln = regcc->cur;
                if (regmatch(cc->next))
-                   return TRUE;
+                   sayYES;
                regcc->cur = ln;
                regcc = cc;
-               return FALSE;
+               cc->cur = n - 1;
+               sayNO;
            }
            /* NOT REACHED */
        case BRANCH: {
                if (OP(next) != BRANCH)   /* No choice. */
                    next = NEXTOPER(scan);/* Avoid recursion. */
                else {
+                   int lastparen = *reglastparen;
                    do {
                        reginput = locinput;
                        if (regmatch(NEXTOPER(scan)))
-                           return 1;
+                           sayYES;
+                       for (n = *reglastparen; n > lastparen; n--)
+                           regendp[n] = 0;
+                       *reglastparen = n;
+                           
 #ifdef REGALIGN
                        /*SUPPRESS 560*/
                        if (n = NEXT(scan))
@@ -866,7 +902,7 @@ char *prog;
                        scan = regnext(scan);
 #endif
                    } while (scan != NULL && OP(scan) == BRANCH);
-                   return 0;
+                   sayNO;
                    /* NOTREACHED */
                }
            }
@@ -901,19 +937,20 @@ char *prog;
            if (minmod) {
                minmod = 0;
                if (ln && regrepeat(scan, ln) < ln)
-                   return 0;
-               while (n >= ln) {
+                   sayNO;
+               while (n >= ln || (n == 32767 && ln > 0)) { /* ln overflow ? */
                    /* If it could work, try it. */
                    if (nextchar == -1000 || *reginput == nextchar)
                        if (regmatch(next))
-                           return 1;
+                           sayYES;
                    /* Couldn't or didn't -- back up. */
+                   reginput = locinput + ln;
                    if (regrepeat(scan, 1)) {
                        ln++;
                        reginput = locinput + ln;
                    }
                    else
-                       return 0;
+                       sayNO;
                }
            }
            else {
@@ -925,31 +962,31 @@ char *prog;
                    /* If it could work, try it. */
                    if (nextchar == -1000 || *reginput == nextchar)
                        if (regmatch(next))
-                           return 1;
+                           sayYES;
                    /* Couldn't or didn't -- back up. */
                    n--;
                    reginput = locinput + n;
                }
            }
-           return 0;
+           sayNO;
        case SUCCEED:
        case END:
            reginput = locinput;        /* put where regtry can find it */
-           return 1;                   /* Success! */
+           sayYES;                     /* Success! */
        case IFMATCH:
            reginput = locinput;
            scan = NEXTOPER(scan);
            if (!regmatch(scan))
-               return 0;
+               sayNO;
            break;
        case UNLESSM:
            reginput = locinput;
            scan = NEXTOPER(scan);
            if (regmatch(scan))
-               return 0;
+               sayNO;
            break;
        default:
-           fprintf(stderr, "%x %d\n",(unsigned)scan,scan[1]);
+           PerlIO_printf(PerlIO_stderr(), "%x %d\n",(unsigned)scan,scan[1]);
            FAIL("regexp memory corruption");
        }
        scan = next;
@@ -961,6 +998,18 @@ char *prog;
     */
     FAIL("corrupted regexp pointers");
     /*NOTREACHED*/
+    sayNO;
+
+yes:
+#ifdef DEBUGGING
+    regindent--;
+#endif
+    return 1;
+
+no:
+#ifdef DEBUGGING
+    regindent--;
+#endif
     return 0;
 }