Repost of fork() debugger patch
[p5sagit/p5-mst-13.2.git] / regexec.c
index da3097e..19fdbfa 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -42,7 +42,7 @@
  *
  ****    Alterations to Henry's code are...
  ****
- ****    Copyright (c) 1991-1994, Larry Wall
+ ****    Copyright (c) 1991-1997, Larry Wall
  ****
  ****    You may distribute under the terms of either the GNU General Public
  ****    License or the Artistic License, as specified in the README file.
@@ -134,6 +134,30 @@ regcppop()
     return input;
 }
 
+static void
+regcppartblow()
+{
+    I32 i = SSPOPINT;
+    U32 paren = 0;
+    char *input;
+    char *startp;
+    char *endp;
+    int lastparen;
+    int size;
+    assert(i == SAVEt_REGCONTEXT);
+    i = SSPOPINT;
+    input = (char *) SSPOPPTR;
+    lastparen = SSPOPINT;
+    size = SSPOPINT;
+    for (i -= 3; i > 0; i -= 3) {
+       paren = (U32)SSPOPINT;
+       startp = (char *) SSPOPPTR;
+       endp = (char *) SSPOPPTR;
+       if (paren <= *reglastparen && regendp[paren] == endp)
+           regstartp[paren] = startp;
+    }
+}
+
 #define regcpblow(cp) leave_scope(cp)
 
 /*
@@ -207,7 +231,8 @@ I32 safebase;       /* no need to remember string in subbase */
     /* If there is a "must appear" string, look for it. */
     s = startpos;
     if (prog->regmust != Nullsv &&
-       (!(prog->reganch & ROPT_ANCH)
+       !(prog->reganch & ROPT_ANCH_GPOS) &&
+       (!(prog->reganch & ROPT_ANCH_BOL)
         || (multiline && prog->regback >= 0)) )
     {
        if (stringarg == strbeg && screamer) {
@@ -250,11 +275,13 @@ I32 safebase;     /* no need to remember string in subbase */
     regtill = startpos+minend;
 
     /* Simplest case:  anchored match need be tried only once. */
-    /*  [unless multiline is set] */
+    /*  [unless only anchor is BOL and multiline is set] */
     if (prog->reganch & ROPT_ANCH) {
        if (regtry(prog, startpos))
            goto got_it;
-       else if (multiline || (prog->reganch & ROPT_IMPLICIT)) {
+       else if (!(prog->reganch & ROPT_ANCH_GPOS) &&
+                (multiline || (prog->reganch & ROPT_IMPLICIT)))
+       {
            if (minlen)
                dontbother = minlen - 1;
            strend -= dontbother;
@@ -287,7 +314,7 @@ I32 safebase;       /* no need to remember string in subbase */
                s++;
            }
        }
-       else if (SvPOK(prog->regstart) == 3) {
+       else if (SvTYPE(prog->regstart) == SVt_PVBM) {
            /* We know what string it must start with. */
            while ((s = fbm_instr((unsigned char*)s,
              (unsigned char*)strend, prog->regstart)) != NULL)
@@ -297,7 +324,7 @@ I32 safebase;       /* no need to remember string in subbase */
                s++;
            }
        }
-       else {
+       else {                          /* Optimized fbm_instr: */
            c = SvPVX(prog->regstart);
            while ((s = ninstr(s, strend, c, c + SvCUR(prog->regstart))) != NULL)
            {
@@ -524,28 +551,28 @@ got_it:
     prog->subbeg = strbeg;
     prog->subend = strend;
     prog->exec_tainted = regtainted;
-    if (!safebase && (prog->nparens || sawampersand)) {
-       I32 i = strend - startpos + (stringarg - strbeg);
-       if (safebase) {                 /* no need for $digit later */
-           s = strbeg;
-           prog->subend = s+i;
-       }
-       else if (strbeg != prog->subbase) {
-           s = savepvn(strbeg,i);      /* so $digit will work later */
-           if (prog->subbase)
+
+    /* make sure $`, $&, $', and $digit will work later */
+    if (strbeg != prog->subbase) {
+       if (safebase) {
+           if (prog->subbase) {
                Safefree(prog->subbase);
-           prog->subbeg = prog->subbase = s;
-           prog->subend = s+i;
+               prog->subbase = Nullch;
+           }
        }
        else {
-           prog->subbeg = s = prog->subbase;
-           prog->subend = s+i;
-       }
-       s += (stringarg - strbeg);
-       for (i = 0; i <= prog->nparens; i++) {
-           if (prog->endp[i]) {
-               prog->startp[i] = s + (prog->startp[i] - startpos);
-               prog->endp[i] = s + (prog->endp[i] - startpos);
+           I32 i = strend - startpos + (stringarg - strbeg);
+           s = savepvn(strbeg, i);
+           Safefree(prog->subbase);
+           prog->subbase = s;
+           prog->subbeg = prog->subbase;
+           prog->subend = prog->subbase + i;
+           s = prog->subbase + (stringarg - strbeg);
+           for (i = 0; i <= prog->nparens; i++) {
+               if (prog->endp[i]) {
+                   prog->startp[i] = s + (prog->startp[i] - startpos);
+                   prog->endp[i] = s + (prog->endp[i] - startpos);
+               }
            }
        }
     }
@@ -631,8 +658,11 @@ char *prog;
 #define sayNO goto no
 #define saySAME(x) if (x) goto yes; else goto no
        if (regnarrate) {
-           PerlIO_printf(Perl_debug_log, "%*s%2d%-8.8s\t<%.10s>\n", regindent*2, "",
-               scan - regprogram, regprop(scan), locinput);
+           SV *prop = sv_newmortal();
+           regprop(prop, scan);
+           PerlIO_printf(Perl_debug_log, "%*s%2d%-8.8s\t<%.10s>\n",
+                         regindent*2, "", scan - regprogram,
+                         SvPVX(prop), locinput);
        }
 #else
 #define sayYES return 1
@@ -670,7 +700,7 @@ char *prog;
            if (locinput == regbol && regprev == '\n')
                break;
            sayNO;
-       case GBOL:
+       case GPOS:
            if (locinput == regbol)
                break;
            sayNO;
@@ -727,8 +757,9 @@ char *prog;
                sayNO;
            if (regeol - locinput < ln)
                sayNO;
-           if (ln > 1 && ((OP(scan) == EXACTF)
-                          ? ibcmp : ibcmp_locale)(s, locinput, ln) != 0)
+           if (ln > 1 && (OP(scan) == EXACTF
+                          ? ibcmp(s, locinput, ln)
+                          : ibcmp_locale(s, locinput, ln)))
                sayNO;
            locinput += ln;
            nextchar = UCHARAT(locinput);
@@ -818,7 +849,11 @@ char *prog;
                sayNO;
            nextchar = UCHARAT(++locinput);
            break;
+       case REFFL:
+           regtainted = TRUE;
+           /* FALL THROUGH */
        case REF:
+       case REFF:
            n = ARG1(scan);  /* which paren pair */
            s = regstartp[n];
            if (!s)
@@ -828,12 +863,19 @@ char *prog;
            if (s == regendp[n])
                break;
            /* Inline the first character, for speed. */
-           if (UCHARAT(s) != nextchar)
+           if (UCHARAT(s) != nextchar &&
+               (OP(scan) == REF ||
+                (UCHARAT(s) != ((OP(scan) == REFF
+                                ? fold : fold_locale)[nextchar]))))
                sayNO;
            ln = regendp[n] - s;
            if (locinput + ln > regeol)
                sayNO;
-           if (ln > 1 && memNE(s, locinput, ln))
+           if (ln > 1 && (OP(scan) == REF
+                          ? memNE(s, locinput, ln)
+                          : (OP(scan) == REFF
+                             ? ibcmp(s, locinput, ln)
+                             : ibcmp_locale(s, locinput, ln))))
                sayNO;
            locinput += ln;
            nextchar = UCHARAT(locinput);
@@ -846,6 +888,7 @@ char *prog;
        case OPEN:
            n = ARG1(scan);  /* which paren pair */
            regstartp[n] = locinput;
+           regendp[n] = 0;
            if (n > regsize)
                regsize = n;
            break;
@@ -885,19 +928,20 @@ char *prog;
                 * that we can try again after backing off.
                 */
 
+               CHECKPOINT cp;
                CURCUR* cc = regcc;
                n = cc->cur + 1;        /* how many we know we matched */
                reginput = locinput;
 
 #ifdef DEBUGGING
                if (regnarrate)
-                   PerlIO_printf(Perl_debug_log, "%*s  %d  %lx\n", regindent*2, "",
-                       n, (long)cc);
+                   PerlIO_printf(Perl_debug_log, "%*s  %ld  %lx\n", regindent*2, "",
+                       (long)n, (long)cc);
 #endif
 
                /* If degenerate scan matches "", assume scan done. */
 
-               if (locinput == cc->lastloc) {
+               if (locinput == cc->lastloc && n >= cc->min) {
                    regcc = cc->oldcc;
                    ln = regcc->cur;
                    if (regmatch(cc->next))
@@ -923,8 +967,12 @@ char *prog;
                if (cc->minmod) {
                    regcc = cc->oldcc;
                    ln = regcc->cur;
-                   if (regmatch(cc->next))
+                   cp = regcppush(cc->parenfloor);
+                   if (regmatch(cc->next)) {
+                       regcppartblow(cp);
                        sayYES; /* All done. */
+                   }
+                   regcppop();
                    regcc->cur = ln;
                    regcc = cc;
 
@@ -935,8 +983,12 @@ char *prog;
                    reginput = locinput;
                    cc->cur = n;
                    cc->lastloc = locinput;
-                   if (regmatch(cc->scan))
+                   cp = regcppush(cc->parenfloor);
+                   if (regmatch(cc->scan)) {
+                       regcppartblow(cp);
                        sayYES;
+                   }
+                   regcppop();
                    cc->cur = n - 1;
                    sayNO;
                }
@@ -944,11 +996,13 @@ char *prog;
                /* Prefer scan over next for maximal matching. */
 
                if (n < cc->max) {      /* More greed allowed? */
-                   regcppush(cc->parenfloor);
+                   cp = regcppush(cc->parenfloor);
                    cc->cur = n;
                    cc->lastloc = locinput;
-                   if (regmatch(cc->scan))
+                   if (regmatch(cc->scan)) {
+                       regcppartblow(cp);
                        sayYES;
+                   }
                    regcppop();         /* Restore some previous $<digit>s? */
                    reginput = locinput;
                }
@@ -1086,7 +1140,8 @@ char *prog;
                sayNO;
            break;
        default:
-           PerlIO_printf(PerlIO_stderr(), "%x %d\n",(unsigned)scan,scan[1]);
+           PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
+                         (unsigned long)scan, scan[1]);
            FAIL("regexp memory corruption");
        }
        scan = next;