Repost of fork() debugger patch
[p5sagit/p5-mst-13.2.git] / regexec.c
index bed5a99..19fdbfa 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -42,7 +42,7 @@
  *
  ****    Alterations to Henry's code are...
  ****
- ****    Copyright (c) 1991-1994, Larry Wall
+ ****    Copyright (c) 1991-1997, Larry Wall
  ****
  ****    You may distribute under the terms of either the GNU General Public
  ****    License or the Artistic License, as specified in the README file.
@@ -134,6 +134,30 @@ regcppop()
     return input;
 }
 
+static void
+regcppartblow()
+{
+    I32 i = SSPOPINT;
+    U32 paren = 0;
+    char *input;
+    char *startp;
+    char *endp;
+    int lastparen;
+    int size;
+    assert(i == SAVEt_REGCONTEXT);
+    i = SSPOPINT;
+    input = (char *) SSPOPPTR;
+    lastparen = SSPOPINT;
+    size = SSPOPINT;
+    for (i -= 3; i > 0; i -= 3) {
+       paren = (U32)SSPOPINT;
+       startp = (char *) SSPOPPTR;
+       endp = (char *) SSPOPPTR;
+       if (paren <= *reglastparen && regendp[paren] == endp)
+           regstartp[paren] = startp;
+    }
+}
+
 #define regcpblow(cp) leave_scope(cp)
 
 /*
@@ -207,7 +231,8 @@ I32 safebase;       /* no need to remember string in subbase */
     /* If there is a "must appear" string, look for it. */
     s = startpos;
     if (prog->regmust != Nullsv &&
-       (!(prog->reganch & ROPT_ANCH)
+       !(prog->reganch & ROPT_ANCH_GPOS) &&
+       (!(prog->reganch & ROPT_ANCH_BOL)
         || (multiline && prog->regback >= 0)) )
     {
        if (stringarg == strbeg && screamer) {
@@ -250,11 +275,13 @@ I32 safebase;     /* no need to remember string in subbase */
     regtill = startpos+minend;
 
     /* Simplest case:  anchored match need be tried only once. */
-    /*  [unless multiline is set] */
+    /*  [unless only anchor is BOL and multiline is set] */
     if (prog->reganch & ROPT_ANCH) {
        if (regtry(prog, startpos))
            goto got_it;
-       else if (multiline || (prog->reganch & ROPT_IMPLICIT)) {
+       else if (!(prog->reganch & ROPT_ANCH_GPOS) &&
+                (multiline || (prog->reganch & ROPT_IMPLICIT)))
+       {
            if (minlen)
                dontbother = minlen - 1;
            strend -= dontbother;
@@ -287,7 +314,7 @@ I32 safebase;       /* no need to remember string in subbase */
                s++;
            }
        }
-       else if (SvPOK(prog->regstart) == 3) {
+       else if (SvTYPE(prog->regstart) == SVt_PVBM) {
            /* We know what string it must start with. */
            while ((s = fbm_instr((unsigned char*)s,
              (unsigned char*)strend, prog->regstart)) != NULL)
@@ -297,7 +324,7 @@ I32 safebase;       /* no need to remember string in subbase */
                s++;
            }
        }
-       else {
+       else {                          /* Optimized fbm_instr: */
            c = SvPVX(prog->regstart);
            while ((s = ninstr(s, strend, c, c + SvCUR(prog->regstart))) != NULL)
            {
@@ -526,18 +553,26 @@ got_it:
     prog->exec_tainted = regtainted;
 
     /* make sure $`, $&, $', and $digit will work later */
-    if (!safebase && (strbeg != prog->subbase)) {
-       I32 i = strend - startpos + (stringarg - strbeg);
-       s = savepvn(strbeg, i);
-       Safefree(prog->subbase);
-       prog->subbase = s;
-       prog->subbeg = prog->subbase;
-       prog->subend = prog->subbase + i;
-       s = prog->subbase + (stringarg - strbeg);
-       for (i = 0; i <= prog->nparens; i++) {
-           if (prog->endp[i]) {
-               prog->startp[i] = s + (prog->startp[i] - startpos);
-               prog->endp[i] = s + (prog->endp[i] - startpos);
+    if (strbeg != prog->subbase) {
+       if (safebase) {
+           if (prog->subbase) {
+               Safefree(prog->subbase);
+               prog->subbase = Nullch;
+           }
+       }
+       else {
+           I32 i = strend - startpos + (stringarg - strbeg);
+           s = savepvn(strbeg, i);
+           Safefree(prog->subbase);
+           prog->subbase = s;
+           prog->subbeg = prog->subbase;
+           prog->subend = prog->subbase + i;
+           s = prog->subbase + (stringarg - strbeg);
+           for (i = 0; i <= prog->nparens; i++) {
+               if (prog->endp[i]) {
+                   prog->startp[i] = s + (prog->startp[i] - startpos);
+                   prog->endp[i] = s + (prog->endp[i] - startpos);
+               }
            }
        }
     }
@@ -623,8 +658,11 @@ char *prog;
 #define sayNO goto no
 #define saySAME(x) if (x) goto yes; else goto no
        if (regnarrate) {
-           PerlIO_printf(Perl_debug_log, "%*s%2d%-8.8s\t<%.10s>\n", regindent*2, "",
-               scan - regprogram, regprop(scan), locinput);
+           SV *prop = sv_newmortal();
+           regprop(prop, scan);
+           PerlIO_printf(Perl_debug_log, "%*s%2d%-8.8s\t<%.10s>\n",
+                         regindent*2, "", scan - regprogram,
+                         SvPVX(prop), locinput);
        }
 #else
 #define sayYES return 1
@@ -662,7 +700,7 @@ char *prog;
            if (locinput == regbol && regprev == '\n')
                break;
            sayNO;
-       case GBOL:
+       case GPOS:
            if (locinput == regbol)
                break;
            sayNO;
@@ -811,7 +849,11 @@ char *prog;
                sayNO;
            nextchar = UCHARAT(++locinput);
            break;
+       case REFFL:
+           regtainted = TRUE;
+           /* FALL THROUGH */
        case REF:
+       case REFF:
            n = ARG1(scan);  /* which paren pair */
            s = regstartp[n];
            if (!s)
@@ -821,12 +863,19 @@ char *prog;
            if (s == regendp[n])
                break;
            /* Inline the first character, for speed. */
-           if (UCHARAT(s) != nextchar)
+           if (UCHARAT(s) != nextchar &&
+               (OP(scan) == REF ||
+                (UCHARAT(s) != ((OP(scan) == REFF
+                                ? fold : fold_locale)[nextchar]))))
                sayNO;
            ln = regendp[n] - s;
            if (locinput + ln > regeol)
                sayNO;
-           if (ln > 1 && memNE(s, locinput, ln))
+           if (ln > 1 && (OP(scan) == REF
+                          ? memNE(s, locinput, ln)
+                          : (OP(scan) == REFF
+                             ? ibcmp(s, locinput, ln)
+                             : ibcmp_locale(s, locinput, ln))))
                sayNO;
            locinput += ln;
            nextchar = UCHARAT(locinput);
@@ -839,6 +888,7 @@ char *prog;
        case OPEN:
            n = ARG1(scan);  /* which paren pair */
            regstartp[n] = locinput;
+           regendp[n] = 0;
            if (n > regsize)
                regsize = n;
            break;
@@ -885,8 +935,8 @@ char *prog;
 
 #ifdef DEBUGGING
                if (regnarrate)
-                   PerlIO_printf(Perl_debug_log, "%*s  %d  %lx\n", regindent*2, "",
-                       n, (long)cc);
+                   PerlIO_printf(Perl_debug_log, "%*s  %ld  %lx\n", regindent*2, "",
+                       (long)n, (long)cc);
 #endif
 
                /* If degenerate scan matches "", assume scan done. */
@@ -919,7 +969,7 @@ char *prog;
                    ln = regcc->cur;
                    cp = regcppush(cc->parenfloor);
                    if (regmatch(cc->next)) {
-                       regcpblow(cp);
+                       regcppartblow(cp);
                        sayYES; /* All done. */
                    }
                    regcppop();
@@ -935,7 +985,7 @@ char *prog;
                    cc->lastloc = locinput;
                    cp = regcppush(cc->parenfloor);
                    if (regmatch(cc->scan)) {
-                       regcpblow(cp);
+                       regcppartblow(cp);
                        sayYES;
                    }
                    regcppop();
@@ -950,7 +1000,7 @@ char *prog;
                    cc->cur = n;
                    cc->lastloc = locinput;
                    if (regmatch(cc->scan)) {
-                       regcpblow(cp);
+                       regcppartblow(cp);
                        sayYES;
                    }
                    regcppop();         /* Restore some previous $<digit>s? */
@@ -1090,7 +1140,8 @@ char *prog;
                sayNO;
            break;
        default:
-           PerlIO_printf(PerlIO_stderr(), "%x %d\n",(unsigned)scan,scan[1]);
+           PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
+                         (unsigned long)scan, scan[1]);
            FAIL("regexp memory corruption");
        }
        scan = next;