Changes to perlfaq8 "How do I find out if I'm running interactively
[p5sagit/p5-mst-13.2.git] / regexec.c
index 87c0e99..890736c 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -2234,7 +2234,6 @@ typedef union re_unwind_t {
 #define sayNO goto no
 #define sayNO_ANYOF goto no_anyof
 #define sayYES_FINAL goto yes_final
-#define sayYES_LOUD  goto yes_loud
 #define sayNO_FINAL  goto no_final
 #define sayNO_SILENT goto do_no
 #define saySAME(x) if (x) goto yes; else goto no
@@ -2242,11 +2241,17 @@ typedef union re_unwind_t {
 #define POSCACHE_SUCCESS 0     /* caching success rather than failure */
 #define POSCACHE_SEEN 1                /* we know what we're caching */
 #define POSCACHE_START 2       /* the real cache: this bit maps to pos 0 */
+
 #define CACHEsayYES STMT_START { \
     if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
-       if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
-           PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
-        else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
+       if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
+           PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) | (1<<POSCACHE_SEEN); \
+           PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
+       } \
+        else if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS)) { \
+           PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
+       } \
+       else { \
            /* cache records failure, but this is success */ \
            DEBUG_r( \
                PerlIO_printf(Perl_debug_log, \
@@ -2258,11 +2263,17 @@ typedef union re_unwind_t {
     } \
     sayYES; \
 } STMT_END
+
 #define CACHEsayNO STMT_START { \
     if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
-       if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
+       if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
            PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
-        else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
+           PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
+       } \
+        else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
+           PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
+       } \
+       else { \
            /* cache records success, but this is failure */ \
            DEBUG_r( \
                PerlIO_printf(Perl_debug_log, \
@@ -2287,6 +2298,9 @@ typedef union re_unwind_t {
 /* Make sure there is a test for this +1 options in re_tests */
 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
 
+#define SLAB_FIRST(s) (&(s)->states[0])
+#define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
+
 /* grab a new slab and return the first slot in it */
 
 STATIC regmatch_state *
@@ -2300,7 +2314,7 @@ S_push_slab(pTHX)
        PL_regmatch_slab->next = s;
     }
     PL_regmatch_slab = s;
-    return &s->states[0];
+    return SLAB_FIRST(s);
 }
 
 /* simulate a recursive call to regmatch */
@@ -2324,7 +2338,7 @@ S_push_slab(pTHX)
     st->locinput = locinput;   \
     st->resume_state = resume; \
     newst = st+1;   \
-    if (newst >  &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) \
+    if (newst >  SLAB_LAST(PL_regmatch_slab)) \
        newst = S_push_slab(aTHX);  \
     PL_regmatch_state = newst; \
     newst->cc = 0;  \
@@ -2339,9 +2353,9 @@ S_push_slab(pTHX)
     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
     depth--; \
     st--; \
-    if (st < &PL_regmatch_slab->states[0]) { \
+    if (st < SLAB_FIRST(PL_regmatch_slab)) { \
        PL_regmatch_slab = PL_regmatch_slab->prev; \
-       st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]; \
+       st = SLAB_LAST(PL_regmatch_slab); \
     } \
     PL_regmatch_state = st; \
     scan       = st->scan; \
@@ -2483,7 +2497,8 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
     regnode *inner;        /* Next node in internal branch. */
     int depth = 0;         /* depth of recursion */
     regmatch_state *newst;  /* when pushing a state, this is the new one */
-    regmatch_state *cur_eval = NULL;  /* most recent (??{}) state */
+    regmatch_state *yes_state = NULL; /* state to pop to on success of
+                                                           subpattern */
     
 #ifdef DEBUGGING
     SV *re_debug_flags = NULL;
@@ -2496,7 +2511,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
        Newx(PL_regmatch_slab, 1, regmatch_slab);
        PL_regmatch_slab->prev = NULL;
        PL_regmatch_slab->next = NULL;
-       PL_regmatch_state = &PL_regmatch_slab->states[0] - 1;
+       PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
     }
 
     /* remember current high-water mark for exit */
@@ -2506,7 +2521,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
 
     /* grab next free state slot */
     st = ++PL_regmatch_state;
-    if (st >  &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
+    if (st >  SLAB_LAST(PL_regmatch_slab))
        st = PL_regmatch_state = S_push_slab(aTHX);
 
     st->minmod = 0;
@@ -3399,10 +3414,9 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                st->u.eval.prev_rex = rex;
                rex = re;
 
-               st->u.eval.prev_eval = cur_eval;
-               st->u.eval.prev_slab = PL_regmatch_slab;
-               st->u.eval.depth = depth;
-               cur_eval = st;
+               /* resume to current state on success */
+               st->u.yes.prev_yes_state = yes_state;
+               yes_state = st;
                PUSH_STATE(newst, resume_EVAL);
                st = newst;
 
@@ -3528,6 +3542,18 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                /* No need to save/restore up to this paren */
                I32 parenfloor = scan->flags;
 
+               /* Dave says:
+                  
+                  CURLYX and WHILEM are always paired: they're the moral
+                  equivalent of pp_enteriter anbd pp_iter.
+
+                  The only time next could be null is if the node tree is
+                  corrupt. This was mentioned on p5p a few days ago.
+
+                  See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
+                  So we'll assert that this is true:
+               */
+               assert(next);
                if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
                    next += ARG(next);
                /* XXXX Probably it is better to teach regpush to support
@@ -3567,6 +3593,16 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                 * that we can try again after backing off.
                 */
 
+               /* Dave says:
+
+                  st->cc gets initialised by CURLYX ready for use by WHILEM.
+                  So again, unless somethings been corrupted, st->cc cannot
+                  be null at that point in WHILEM.
+                  
+                  See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
+                  So we'll assert that this is true:
+               */
+               assert(st->cc);
                st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */
                st->u.whilem.cache_offset = 0;
                st->u.whilem.cache_bit = 0;
@@ -3665,7 +3701,6 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                            /* cache records failure */
                            sayNO_SILENT;
                    }
-                   PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit);
                }
                }
 
@@ -3839,7 +3874,11 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
            st->u.curlym.maxwanted = st->minmod ? st->ln : n;
            if (st->u.curlym.maxwanted) {
                while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
+                   /* resume to current state on success */
+                   st->u.yes.prev_yes_state = yes_state;
+                   yes_state = st;
                    REGMATCH(scan, CURLYM1);
+                   yes_state = st->u.yes.prev_yes_state;
                    /*** all unsaved local vars undefined at this point */
                    if (!result)
                        break;
@@ -3909,15 +3948,24 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                            else
                                PL_regendp[st->u.curlym.paren] = -1;
                        }
+                       /* resume to current state on success */
+                       st->u.yes.prev_yes_state = yes_state;
+                       yes_state = st;
                        REGMATCH(next, CURLYM2);
+                       yes_state = st->u.yes.prev_yes_state;
                        /*** all unsaved local vars undefined at this point */
                        if (result)
-                           sayYES;
+                           /* XXX tmp sayYES; */
+                           sayYES_FINAL;
                        REGCP_UNWIND(st->u.curlym.lastcp);
                    }
                    /* Couldn't or didn't -- move forward. */
                    PL_reginput = locinput;
+                   /* resume to current state on success */
+                   st->u.yes.prev_yes_state = yes_state;
+                   yes_state = st;
                    REGMATCH(scan, CURLYM3);
+                   yes_state = st->u.yes.prev_yes_state;
                    /*** all unsaved local vars undefined at this point */
                    if (result) {
                        st->ln++;
@@ -3982,10 +4030,15 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                            else
                                PL_regendp[st->u.curlym.paren] = -1;
                        }
+                       /* resume to current state on success */
+                       st->u.yes.prev_yes_state = yes_state;
+                       yes_state = st;
                        REGMATCH(next, CURLYM4);
+                       yes_state = st->u.yes.prev_yes_state;
                        /*** all unsaved local vars undefined at this point */
                        if (result)
-                           sayYES;
+                           /* XXX tmp sayYES; */
+                           sayYES_FINAL;
                        REGCP_UNWIND(st->u.curlym.lastcp);
                    }
                    /* Couldn't or didn't -- back up. */
@@ -4251,48 +4304,6 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
            sayNO;
            break;
        case END:
-           if (cur_eval) {
-               /* we have successfully completed the execution of a
-                * postponed re. Pop all states back to the last EVAL
-                * then continue with the node following the (??{...})
-                */
-
-               /* this simulates a POP_STATE, except that it pops several
-                * levels, and doesn't restore locinput */
-
-               st = cur_eval;
-               PL_regmatch_slab = st->u.eval.prev_slab;
-               cur_eval = st->u.eval.prev_eval;
-               depth = st->u.eval.depth;
-
-               PL_regmatch_state = st;
-               scan    = st->scan;
-               next    = st->next;
-               n               = st->n;
-
-               if (st->u.eval.toggleutf)
-                   PL_reg_flags ^= RF_utf8;
-               ReREFCNT_dec(rex);
-               rex = st->u.eval.prev_rex;
-               /* XXXX This is too dramatic a measure... */
-               PL_reg_maxiter = 0;
-
-               /* Restore parens of the caller without popping the
-                * savestack */
-               {
-                   I32 tmp = PL_savestack_ix;
-                   PL_savestack_ix = st->u.eval.lastcp;
-                   regcppop(rex);
-                   PL_savestack_ix = tmp;
-               }
-
-
-               PL_reginput = locinput;
-               /* resume at node following the (??{...}) */
-               break;
-
-           }
-
            if (locinput < reginfo->till) {
                DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
                                      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
@@ -4304,60 +4315,57 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
            }
            PL_reginput = locinput;     /* put where regtry can find it */
            sayYES_FINAL;               /* Success! */
-       case SUCCEED:
+
+       case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
+           DEBUG_EXECUTE_r(
+           PerlIO_printf(Perl_debug_log,
+               "%*s  %ssubpattern success...%s\n",
+               REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5]));
            PL_reginput = locinput;     /* put where regtry can find it */
-           sayYES_LOUD;                /* Success! */
-       case SUSPEND:
-           n = 1;
+           sayYES_FINAL;               /* Success! */
+
+       case SUSPEND:   /* (?>FOO) */
+           st->u.ifmatch.wanted = 1;
            PL_reginput = locinput;
            goto do_ifmatch;    
-       case UNLESSM:
-           n = 0;
-           if (scan->flags) {
-               char * const s = HOPBACKc(locinput, scan->flags);
-               if (!s)
-                   goto say_yes;
-               PL_reginput = s;
-           }
-           else
-               PL_reginput = locinput;
-           goto do_ifmatch;
-       case IFMATCH:
-           n = 1;
+
+       case UNLESSM:   /* -ve lookaround: (?!FOO), or with flags, (?<!foo) */
+           st->u.ifmatch.wanted = 0;
+           goto ifmatch_trivial_fail_test;
+
+       case IFMATCH:   /* +ve lookaround: (?=FOO), or with flags, (?<=foo) */
+           st->u.ifmatch.wanted = 1;
+         ifmatch_trivial_fail_test:
            if (scan->flags) {
                char * const s = HOPBACKc(locinput, scan->flags);
-               if (!s)
-                   goto say_no;
+               if (!s) {
+                   /* trivial fail */
+                   if (st->logical) {
+                       st->logical = 0;
+                       st->sw = 1 - st->u.ifmatch.wanted;
+                   }
+                   else if (st->u.ifmatch.wanted)
+                       sayNO;
+                   next = scan + ARG(scan);
+                   if (next == scan)
+                       next = NULL;
+                   break;
+               }
                PL_reginput = s;
            }
            else
                PL_reginput = locinput;
 
          do_ifmatch:
-           REGMATCH(NEXTOPER(NEXTOPER(scan)), IFMATCH);
-           /*** all unsaved local vars undefined at this point */
-           if (result != n) {
-             say_no:
-               if (st->logical) {
-                   st->logical = 0;
-                   st->sw = 0;
-                   goto do_longjump;
-               }
-               else
-                   sayNO;
-           }
-         say_yes:
-           if (st->logical) {
-               st->logical = 0;
-               st->sw = 1;
-           }
-           if (OP(scan) == SUSPEND) {
-               locinput = PL_reginput;
-               nextchr = UCHARAT(locinput);
-           }
-           /* FALL THROUGH. */
+           /* resume to current state on success */
+           st->u.yes.prev_yes_state = yes_state;
+           yes_state = st;
+           PUSH_STATE(newst, resume_IFMATCH);
+           st = newst;
+           next = NEXTOPER(NEXTOPER(scan));
+           break;
+
        case LONGJMP:
-         do_longjump:
            next = scan + ARG(scan);
            if (next == scan)
                next = NULL;
@@ -4387,7 +4395,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
 
            /* grab the next free state slot */
            st++;
-           if (st >  &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
+           if (st >  SLAB_LAST(PL_regmatch_slab))
                st = S_push_slab(aTHX);
            PL_regmatch_state = st;
 
@@ -4418,14 +4426,90 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
     /*NOTREACHED*/
     sayNO;
 
-yes_loud:
-    DEBUG_EXECUTE_r(
-       PerlIO_printf(Perl_debug_log,
-                     "%*s  %scould match...%s\n",
-                     REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
-       );
-    goto yes;
 yes_final:
+
+    if (yes_state) {
+       /* we have successfully completed a subexpression, but we must now
+        * pop to the state marked by yes_state and continue from there */
+
+       /*XXX tmp for CURLYM*/
+       regmatch_slab *oslab = PL_regmatch_slab;
+       regmatch_state *ost = st, *oys=yes_state;
+       int odepth = depth;
+
+       assert(st != yes_state);
+       while (yes_state < SLAB_FIRST(PL_regmatch_slab)
+           || yes_state > SLAB_LAST(PL_regmatch_slab))
+       {
+           /* not in this slab, pop slab */
+           depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
+           PL_regmatch_slab = PL_regmatch_slab->prev;
+           st = SLAB_LAST(PL_regmatch_slab);
+       }
+       depth -= (st - yes_state);
+       DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE TO (%d)\n", depth));
+       st = yes_state;
+       yes_state = st->u.yes.prev_yes_state;
+       PL_regmatch_state = st;
+
+       switch (st->resume_state) {
+       case resume_EVAL:
+           if (st->u.eval.toggleutf)
+               PL_reg_flags ^= RF_utf8;
+           ReREFCNT_dec(rex);
+           rex = st->u.eval.prev_rex;
+           /* XXXX This is too dramatic a measure... */
+           PL_reg_maxiter = 0;
+           /* Restore parens of the caller without popping the
+            * savestack */
+           {
+               I32 tmp = PL_savestack_ix;
+               PL_savestack_ix = st->u.eval.lastcp;
+               regcppop(rex);
+               PL_savestack_ix = tmp;
+           }
+           PL_reginput = locinput;
+            /* continue at the node following the (??{...}) */
+           next        = st->next;
+           goto reenter;
+
+       case resume_IFMATCH:
+           if (st->logical) {
+               st->logical = 0;
+               st->sw = st->u.ifmatch.wanted;
+           }
+           else if (!st->u.ifmatch.wanted)
+               sayNO;
+
+           if (OP(st->scan) == SUSPEND)
+               locinput = PL_reginput;
+           else {
+               locinput = PL_reginput = st->locinput;
+               nextchr = UCHARAT(locinput);
+           }
+           next = st->scan + ARG(st->scan);
+           if (next == st->scan)
+               next = NULL;
+           goto reenter;
+
+       /* XXX tmp  don't handle yes_state yet */
+       case resume_CURLYM1:
+       case resume_CURLYM2:
+       case resume_CURLYM3:
+       case resume_CURLYM4:
+           PL_regmatch_slab =oslab;
+           st = ost;
+           PL_regmatch_state = st;
+           depth = odepth;
+           yes_state = oys;
+           DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "XXX revering a CURLYM\n"));
+           goto yes;
+
+       default:
+           Perl_croak(aTHX_ "unexpected yes reume state");
+       }
+    }
+
     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
                          PL_colors[4], PL_colors[5]));
 yes:
@@ -4435,7 +4519,8 @@ yes:
 
     result = 1;
     /* XXX this is duplicate(ish) code to that in the do_no section.
-     * eventually a yes should just pop the whole stack */
+     * eventually a yes should just pop the stack back to the current
+     * yes_state */
     if (depth) {
        /* restore previous state and re-enter */
        POP_STATE;
@@ -4445,8 +4530,6 @@ yes:
            goto resume_point_TRIE1;
        case resume_TRIE2:
            goto resume_point_TRIE2;
-       case resume_EVAL:
-           break;
        case resume_CURLYX:
            goto resume_point_CURLYX;
        case resume_WHILEM1:
@@ -4469,8 +4552,6 @@ yes:
            goto resume_point_CURLYM3;
        case resume_CURLYM4:
            goto resume_point_CURLYM4;
-       case resume_IFMATCH:
-           goto resume_point_IFMATCH;
        case resume_PLUS1:
            goto resume_point_PLUS1;
        case resume_PLUS2:
@@ -4479,6 +4560,9 @@ yes:
            goto resume_point_PLUS3;
        case resume_PLUS4:
            goto resume_point_PLUS4;
+
+       case resume_IFMATCH:
+       case resume_EVAL:
        default:
            Perl_croak(aTHX_ "regexp resume memory corruption");
        }
@@ -4564,7 +4648,7 @@ do_no:
                PL_reg_flags ^= RF_utf8;
            ReREFCNT_dec(rex);
            rex = st->u.eval.prev_rex;
-           cur_eval = st->u.eval.prev_eval;
+           yes_state = st->u.yes.prev_yes_state;
 
            /* XXXX This is too dramatic a measure... */
            PL_reg_maxiter = 0;
@@ -4597,7 +4681,22 @@ do_no:
        case resume_CURLYM4:
            goto resume_point_CURLYM4;
        case resume_IFMATCH:
-           goto resume_point_IFMATCH;
+           yes_state = st->u.yes.prev_yes_state;
+           if (st->logical) {
+               st->logical = 0;
+               st->sw = !st->u.ifmatch.wanted;
+           }
+           else if (st->u.ifmatch.wanted)
+               sayNO;
+
+           assert(OP(scan) != SUSPEND); /* XXX DAPM tmp */
+           locinput = PL_reginput = st->locinput;
+           nextchr = UCHARAT(locinput);
+           next = scan + ARG(scan);
+           if (next == scan)
+               next = NULL;
+           goto reenter;
+
        case resume_PLUS1:
            goto resume_point_PLUS1;
        case resume_PLUS2: