Fix a problem with jump-tries, add (?FAIL) pattern.
Yves Orton [Thu, 26 Oct 2006 14:59:11 +0000 (16:59 +0200)]
Message-ID: <9b18b3110610260559k3efa98barc28987e88c581a8a@mail.gmail.com>

p4raw-id: //depot/perl@29118

regcomp.c
regcomp.sym
regexec.c
regexp.h
regnodes.h
t/op/re_tests

index 25dc17f..be8be1b 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -1174,7 +1174,7 @@ is the recommended Unicode-aware way of saying
     if ( noper_next < tail ) {                                  \
         if (!trie->jump)                                        \
             Newxz( trie->jump, word_count + 1, U16);            \
-        trie->jump[curword] = (U16)(tail - noper_next);         \
+        trie->jump[curword] = (U16)(noper_next - convert);      \
         if (!jumper)                                            \
             jumper = noper_next;                                \
         if (!nextbranch)                                        \
@@ -1225,6 +1225,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     U32 next_alloc = 0;
     regnode *jumper = NULL;
     regnode *nextbranch = NULL;
+    regnode *convert = NULL;
     /* we just use folder as a flag in utf8 */
     const U8 * const folder = ( flags == EXACTF
                        ? PL_fold
@@ -1273,6 +1274,16 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                   REG_NODE_NUM(last), REG_NODE_NUM(tail),
                   (int)depth);
     });
+   
+   /* Find the node we are going to overwrite */
+    if ( first == startbranch && OP( last ) != BRANCH ) {
+        /* whole branch chain */
+        convert = first;
+    } else {
+        /* branch sub-chain */
+        convert = NEXTOPER( first );
+    }
+        
     /*  -- First loop and Setup --
 
        We first traverse the branches and scan each word to determine if it
@@ -1770,7 +1781,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     );
 
     {   /* Modify the program and insert the new TRIE node*/ 
-        regnode *convert;
         U8 nodetype =(U8)(flags & 0xFF);
         char *str=NULL;
         
@@ -1788,23 +1798,22 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
            the whole branch sequence, including the first.
          */
         /* Find the node we are going to overwrite */
-        if ( first == startbranch && OP( last ) != BRANCH ) {
-            /* whole branch chain */
-            convert = first;
-            DEBUG_r({
-                const  regnode *nop = NEXTOPER( convert );
-                mjd_offset= Node_Offset((nop));
-                mjd_nodelen= Node_Length((nop));
-            });
-        } else {
+        if ( first != startbranch || OP( last ) == BRANCH ) {
             /* branch sub-chain */
-            convert = NEXTOPER( first );
             NEXT_OFF( first ) = (U16)(last - first);
             DEBUG_r({
                 mjd_offset= Node_Offset((convert));
                 mjd_nodelen= Node_Length((convert));
             });
+            /* whole branch chain */
+        } else {
+            DEBUG_r({
+                const  regnode *nop = NEXTOPER( convert );
+                mjd_offset= Node_Offset((nop));
+                mjd_nodelen= Node_Length((nop));
+            });
         }
+        
         DEBUG_OPTIMISE_r(
             PerlIO_printf(Perl_debug_log, "%*sMJD offset:%"UVuf" MJD length:%"UVuf"\n",
                 (int)depth * 2 + 2, "",
@@ -1917,7 +1926,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
               jump[0], which is otherwise unused by the jump logic. 
               We use this when dumping a trie and during optimisation. */
            if (trie->jump) 
-               trie->jump[0] = (U16)(tail - nextbranch);
+               trie->jump[0] = (U16)(nextbranch - convert);
             
             /* XXXX */
             if ( !trie->states[trie->startstate].wordnum && trie->bitmap && 
@@ -2091,7 +2100,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
        SV * const mysv=sv_newmortal(); \
        regnode *Next = regnext(scan); \
        regprop(RExC_rx, mysv, scan); \
-       PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s [%d]\n", \
+       PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)\n", \
        (int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(mysv),\
        Next ? (REG_NODE_NUM(Next)) : 0 ); \
    });
@@ -3483,6 +3492,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             /* NOTE - There is similar code to this block above for handling
                BRANCH nodes on the initial study.  If you change stuff here 
                check there too. */
+            regnode *trie_node= scan;
             regnode *tail= regnext(scan);
             reg_trie_data *trie = (reg_trie_data*)RExC_rx->data->data[ ARG(scan) ];
             I32 max1 = 0, min1 = I32_MAX;
@@ -3523,8 +3533,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
     
                     if (trie->jump[word]) {
                         if (!nextbranch)
-                            nextbranch = tail - trie->jump[0];
-                        scan= tail - trie->jump[word];
+                            nextbranch = trie_node + trie->jump[0];
+                        scan= trie_node + trie->jump[word];
                         /* We go from the jump point to the branch that follows
                            it. Note this means we need the vestigal unused branches
                            even though they arent otherwise used.
@@ -3855,7 +3865,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     r->paren_names = 0;
     
     if (RExC_seen & REG_SEEN_RECURSE) {
-        Newx(RExC_parens, RExC_npar,regnode *);
+        Newxz(RExC_parens, RExC_npar,regnode *);
         SAVEFREEPV(RExC_parens);
     }
 
@@ -4568,10 +4578,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                RExC_parse++;
            case '=':           /* (?=...) */
            case '!':           /* (?!...) */
+               if (*RExC_parse == ')')
+                   goto do_op_fail;
                RExC_seen_zerolen++;
            case ':':           /* (?:...) */
            case '>':           /* (?>...) */
                break;
+            case 'F':
+                if (RExC_parse[0] == 'A' &&
+                    RExC_parse[1] == 'I' &&
+                    RExC_parse[2] == 'L')
+                    RExC_parse+=3;
+                if (*RExC_parse != ')')
+                   vFAIL("Sequence (?FAIL) or (?F) not terminated");
+             do_op_fail:
+               ret = reg_node(pRExC_state, OPFAIL);
+               nextchar(pRExC_state);
+               return ret;
+               break;
            case '$':           /* (?$...) */
            case '@':           /* (?@...) */
                vFAIL2("Sequence (?%c...) not implemented", (int)paren);
@@ -4588,8 +4612,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            case 'R' :           /* (?R) */
                if (*RExC_parse != ')')
                    FAIL("Sequence (?R) not terminated");
-               reg_node(pRExC_state, SRECURSE);
-               break;           /* (?PARNO) */
+               ret = reg_node(pRExC_state, SRECURSE);
+               nextchar(pRExC_state);
+               return ret;
+               /*notreached*/
             { /* named and numeric backreferences */
                 I32 num;
                 char * parse_start;
@@ -8442,6 +8468,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
            DUMPUNTIL(NEXTOPER(node), next);
        }
        else if ( PL_regkind[(U8)op]  == TRIE ) {
+           const regnode *this_trie = node;
            const char op = OP(node);
             const I32 n = ARG(node);
            const reg_ac_data * const ac = op>=AHOCORASICK ?
@@ -8462,18 +8489,19 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                            PL_colors[0], PL_colors[1],
                            (SvUTF8(*elem_ptr) ? PERL_PV_ESCAPE_UNI : 0) |
                            PERL_PV_PRETTY_ELIPSES    |
-                           PERL_PV_PRETTY_LTGT    
+                           PERL_PV_PRETTY_LTGT
                             )
                             : "???"
                 );
                 if (trie->jump) {
-                    U16 dist= trie->jump[word_idx+1];
-                    PerlIO_printf(Perl_debug_log, "(%u)\n",(next - dist) - start);
+                    U16 dist = trie->jump[word_idx+1];
+                   PerlIO_printf(Perl_debug_log, "(%u)\n",
+                           (dist ? this_trie + dist : next) - start);
                     if (dist) {
                         if (!nextbranch)
-                            nextbranch= next - trie->jump[0];
-                        DUMPUNTIL(next - dist, nextbranch);
-                    } 
+                           nextbranch = this_trie + trie->jump[0];
+                       DUMPUNTIL(this_trie + dist, nextbranch);
+                    }
                     if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
                         nextbranch= regnext((regnode *)nextbranch);
                 } else {
index 561b25d..73e27a8 100644 (file)
@@ -164,16 +164,19 @@ NREFF             NREF,    no-sv 1        Match already matched string, folded
 NREFFL         NREF,    no-sv 1        Match already matched string, folded in loc.
 
 
-#*Special conditionals 
+#*Special conditionals  (70..72)
 NGROUPP                NGROUPP,   no-sv 1      Whether the group matched.            
 RECURSEP       RECURSEP,  num 1        Whether we are in a specific recurse.  
 DEFINEP                DEFINEP,   none 1       Never execute directly.               
 
+#*Bactracking 
+OPFAIL         OPFAIL, none            Same as (?!)
+
 # NEW STUFF ABOVE THIS LINE -- Please update counts below. 
 
 ################################################################################
 
-#*SPECIAL  REGOPS (70, 71)
+#*SPECIAL  REGOPS
 
 # This is not really a node, but an optimized away piece of a "long" node.
 # To simplify debugging output, we mark it as if it were a node
index 60ec4ff..17d0e01 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -168,6 +168,7 @@ S_regcppush(pTHX_ I32 parenfloor)
 
 #define REGCP_OTHER_ELEMS 8
     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
+    
     for (p = PL_regsize; p > parenfloor; p--) {
 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
        SSPUSHINT(PL_regendp[p]);
@@ -2763,13 +2764,8 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                ST.accepted = 0; /* how many accepting states we have seen */
                ST.B = next;
                ST.jump = trie->jump;
-               
-#ifdef DEBUGGING
                ST.me = scan;
-#endif
                 
-               
-
                /*
                   traverse the TRIE keeping track of all accepting states
                   we transition through until we get to a failing node.
@@ -2894,10 +2890,10 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                locinput = PL_reginput;
                nextchr = UCHARAT(locinput);
                
-               if ( !ST.jump ) 
+               if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum]) 
                    scan = ST.B;
                else
-                   scan = ST.B - ST.jump[ST.accept_buff[0].wordnum];
+                   scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
                
                continue; /* execute rest of RE */
            }
@@ -2943,9 +2939,9 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                    SV ** const tmp = RX_DEBUG(reginfo->prog)
                                ? av_fetch( trie->words, ST.accept_buff[ best ].wordnum - 1, 0 )
                                : NULL;
-                   regnode *nextop=!ST.jump ? 
+                   regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ? 
                                    ST.B : 
-                                   ST.B - ST.jump[ST.accept_buff[best].wordnum];    
+                                   ST.me + ST.jump[ST.accept_buff[best].wordnum];    
                    PerlIO_printf( Perl_debug_log, 
                        "%*s  %strying alternation #%d <%s> at node #%d %s\n",
                        REPORT_CODE_OFF+depth*2, "", PL_colors[4],
@@ -2962,11 +2958,11 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                    best = ST.accepted;
                }
                PL_reginput = (char *)ST.accept_buff[ best ].endpos;
-               if ( !ST.jump ) {
+               if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
                    PUSH_STATE_GOTO(TRIE_next, ST.B);
                    /* NOTREACHED */
                } else {
-                   PUSH_STATE_GOTO(TRIE_next, ST.B - ST.jump[ST.accept_buff[best].wordnum]);
+                   PUSH_STATE_GOTO(TRIE_next, ST.me + ST.jump[ST.accept_buff[best].wordnum]);
                    /* NOTREACHED */
                 }
                 /* NOTREACHED */
@@ -3601,6 +3597,8 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
            n = ARG(scan);  /* which paren pair */
            PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
            PL_regendp[n] = locinput - PL_bostr;
+           /*if (n > PL_regsize)
+               PL_regsize = n;*/
            if (n > (I32)*PL_reglastparen)
                *PL_reglastparen = n;
            *PL_reglastcloseparen = n;
@@ -4484,7 +4482,6 @@ NULL
 
 #undef ST
 
-
        case END:
            fake_end:
            if (cur_eval) {
@@ -4611,6 +4608,8 @@ NULL
            if (next == scan)
                next = NULL;
            break;
+       case OPFAIL:
+           sayNO;
        default:
            PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
                          PTR2UV(scan), OP(scan));
index faed0ee..89fcea7 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -227,9 +227,9 @@ typedef struct regmatch_state {
        struct {
            reg_trie_accepted *accept_buff;
            U32         accepted; /* how many accepting states we have seen */
-           U16         *jump;  /* negative offsets from B */
+           U16         *jump;  /* positive offsets from me */
            regnode     *B;     /* node following the trie */
-           regnode     *me;    /* only needed for debugging */
+           regnode     *me;    /* Which node am I - needed for jump tries*/
        } trie;
 
        struct {
index f7ebda1..d6842b5 100644 (file)
@@ -6,8 +6,8 @@
 
 /* Regops and State definitions */
 
-#define REGNODE_MAX            74
-#define REGMATCH_STATE_MAX     104
+#define REGNODE_MAX            75
+#define REGMATCH_STATE_MAX     105
 
 #define        END                     0       /* 0000 End of program. */
 #define        SUCCEED                 1       /* 0x01 Return from a subroutine, basically. */
 #define        NGROUPP                 70      /* 0x46 Whether the group matched. */
 #define        RECURSEP                71      /* 0x47 Whether we are in a specific recurse. */
 #define        DEFINEP                 72      /* 0x48 Never execute directly. */
-#define        OPTIMIZED               73      /* 0x49 Placeholder for dump. */
-#define        PSEUDO                  74      /* 0x4a Pseudo opcode for internal use. */
+#define        OPFAIL                  73      /* 0x49 Same as (?!) */
+#define        OPTIMIZED               74      /* 0x4a Placeholder for dump. */
+#define        PSEUDO                  75      /* 0x4b Pseudo opcode for internal use. */
 
        /* ------------ States ------------- */
 
-#define        TRIE_next               75      /* 0x4b Regmatch state for TRIE */
-#define        TRIE_next_fail          76      /* 0x4c Regmatch state for TRIE */
-#define        EVAL_AB                 77      /* 0x4d Regmatch state for EVAL */
-#define        EVAL_AB_fail            78      /* 0x4e Regmatch state for EVAL */
-#define        CURLYX_end              79      /* 0x4f Regmatch state for CURLYX */
-#define        CURLYX_end_fail         80      /* 0x50 Regmatch state for CURLYX */
-#define        WHILEM_A_pre            81      /* 0x51 Regmatch state for WHILEM */
-#define        WHILEM_A_pre_fail       82      /* 0x52 Regmatch state for WHILEM */
-#define        WHILEM_A_min            83      /* 0x53 Regmatch state for WHILEM */
-#define        WHILEM_A_min_fail       84      /* 0x54 Regmatch state for WHILEM */
-#define        WHILEM_A_max            85      /* 0x55 Regmatch state for WHILEM */
-#define        WHILEM_A_max_fail       86      /* 0x56 Regmatch state for WHILEM */
-#define        WHILEM_B_min            87      /* 0x57 Regmatch state for WHILEM */
-#define        WHILEM_B_min_fail       88      /* 0x58 Regmatch state for WHILEM */
-#define        WHILEM_B_max            89      /* 0x59 Regmatch state for WHILEM */
-#define        WHILEM_B_max_fail       90      /* 0x5a Regmatch state for WHILEM */
-#define        BRANCH_next             91      /* 0x5b Regmatch state for BRANCH */
-#define        BRANCH_next_fail        92      /* 0x5c Regmatch state for BRANCH */
-#define        CURLYM_A                93      /* 0x5d Regmatch state for CURLYM */
-#define        CURLYM_A_fail           94      /* 0x5e Regmatch state for CURLYM */
-#define        CURLYM_B                95      /* 0x5f Regmatch state for CURLYM */
-#define        CURLYM_B_fail           96      /* 0x60 Regmatch state for CURLYM */
-#define        IFMATCH_A               97      /* 0x61 Regmatch state for IFMATCH */
-#define        IFMATCH_A_fail          98      /* 0x62 Regmatch state for IFMATCH */
-#define        CURLY_B_min_known       99      /* 0x63 Regmatch state for CURLY */
-#define        CURLY_B_min_known_fail  100     /* 0x64 Regmatch state for CURLY */
-#define        CURLY_B_min             101     /* 0x65 Regmatch state for CURLY */
-#define        CURLY_B_min_fail        102     /* 0x66 Regmatch state for CURLY */
-#define        CURLY_B_max             103     /* 0x67 Regmatch state for CURLY */
-#define        CURLY_B_max_fail        104     /* 0x68 Regmatch state for CURLY */
+#define        TRIE_next               76      /* 0x4c Regmatch state for TRIE */
+#define        TRIE_next_fail          77      /* 0x4d Regmatch state for TRIE */
+#define        EVAL_AB                 78      /* 0x4e Regmatch state for EVAL */
+#define        EVAL_AB_fail            79      /* 0x4f Regmatch state for EVAL */
+#define        CURLYX_end              80      /* 0x50 Regmatch state for CURLYX */
+#define        CURLYX_end_fail         81      /* 0x51 Regmatch state for CURLYX */
+#define        WHILEM_A_pre            82      /* 0x52 Regmatch state for WHILEM */
+#define        WHILEM_A_pre_fail       83      /* 0x53 Regmatch state for WHILEM */
+#define        WHILEM_A_min            84      /* 0x54 Regmatch state for WHILEM */
+#define        WHILEM_A_min_fail       85      /* 0x55 Regmatch state for WHILEM */
+#define        WHILEM_A_max            86      /* 0x56 Regmatch state for WHILEM */
+#define        WHILEM_A_max_fail       87      /* 0x57 Regmatch state for WHILEM */
+#define        WHILEM_B_min            88      /* 0x58 Regmatch state for WHILEM */
+#define        WHILEM_B_min_fail       89      /* 0x59 Regmatch state for WHILEM */
+#define        WHILEM_B_max            90      /* 0x5a Regmatch state for WHILEM */
+#define        WHILEM_B_max_fail       91      /* 0x5b Regmatch state for WHILEM */
+#define        BRANCH_next             92      /* 0x5c Regmatch state for BRANCH */
+#define        BRANCH_next_fail        93      /* 0x5d Regmatch state for BRANCH */
+#define        CURLYM_A                94      /* 0x5e Regmatch state for CURLYM */
+#define        CURLYM_A_fail           95      /* 0x5f Regmatch state for CURLYM */
+#define        CURLYM_B                96      /* 0x60 Regmatch state for CURLYM */
+#define        CURLYM_B_fail           97      /* 0x61 Regmatch state for CURLYM */
+#define        IFMATCH_A               98      /* 0x62 Regmatch state for IFMATCH */
+#define        IFMATCH_A_fail          99      /* 0x63 Regmatch state for IFMATCH */
+#define        CURLY_B_min_known       100     /* 0x64 Regmatch state for CURLY */
+#define        CURLY_B_min_known_fail  101     /* 0x65 Regmatch state for CURLY */
+#define        CURLY_B_min             102     /* 0x66 Regmatch state for CURLY */
+#define        CURLY_B_min_fail        103     /* 0x67 Regmatch state for CURLY */
+#define        CURLY_B_max             104     /* 0x68 Regmatch state for CURLY */
+#define        CURLY_B_max_fail        105     /* 0x69 Regmatch state for CURLY */
 
 /* PL_regkind[] What type of regop or state is this. */
 
@@ -197,6 +198,7 @@ EXTCONST U8 PL_regkind[] = {
        NGROUPP,        /* NGROUPP                */
        RECURSEP,       /* RECURSEP               */
        DEFINEP,        /* DEFINEP                */
+       OPFAIL,         /* OPFAIL                 */
        NOTHING,        /* OPTIMIZED              */
        PSEUDO,         /* PSEUDO                 */
        /* ------------ States ------------- */
@@ -310,6 +312,7 @@ static const U8 regarglen[] = {
        EXTRA_SIZE(struct regnode_1),           /* NGROUPP      */
        EXTRA_SIZE(struct regnode_1),           /* RECURSEP     */
        EXTRA_SIZE(struct regnode_1),           /* DEFINEP      */
+       0,                                      /* OPFAIL       */
        0,                                      /* OPTIMIZED    */
        0,                                      /* PSEUDO       */
 };
@@ -390,6 +393,7 @@ static const char reg_off_by_arg[] = {
        0,      /* NGROUPP      */
        0,      /* RECURSEP     */
        0,      /* DEFINEP      */
+       0,      /* OPFAIL       */
        0,      /* OPTIMIZED    */
        0,      /* PSEUDO       */
 };
@@ -471,39 +475,40 @@ const char * reg_name[] = {
        "NGROUPP",                      /* 0x46 */
        "RECURSEP",                     /* 0x47 */
        "DEFINEP",                      /* 0x48 */
-       "OPTIMIZED",                    /* 0x49 */
-       "PSEUDO",                       /* 0x4a */
+       "OPFAIL",                       /* 0x49 */
+       "OPTIMIZED",                    /* 0x4a */
+       "PSEUDO",                       /* 0x4b */
        /* ------------ States ------------- */
-       "TRIE_next",                    /* 0x4b */
-       "TRIE_next_fail",               /* 0x4c */
-       "EVAL_AB",                      /* 0x4d */
-       "EVAL_AB_fail",                 /* 0x4e */
-       "CURLYX_end",                   /* 0x4f */
-       "CURLYX_end_fail",              /* 0x50 */
-       "WHILEM_A_pre",                 /* 0x51 */
-       "WHILEM_A_pre_fail",            /* 0x52 */
-       "WHILEM_A_min",                 /* 0x53 */
-       "WHILEM_A_min_fail",            /* 0x54 */
-       "WHILEM_A_max",                 /* 0x55 */
-       "WHILEM_A_max_fail",            /* 0x56 */
-       "WHILEM_B_min",                 /* 0x57 */
-       "WHILEM_B_min_fail",            /* 0x58 */
-       "WHILEM_B_max",                 /* 0x59 */
-       "WHILEM_B_max_fail",            /* 0x5a */
-       "BRANCH_next",                  /* 0x5b */
-       "BRANCH_next_fail",             /* 0x5c */
-       "CURLYM_A",                     /* 0x5d */
-       "CURLYM_A_fail",                /* 0x5e */
-       "CURLYM_B",                     /* 0x5f */
-       "CURLYM_B_fail",                /* 0x60 */
-       "IFMATCH_A",                    /* 0x61 */
-       "IFMATCH_A_fail",               /* 0x62 */
-       "CURLY_B_min_known",            /* 0x63 */
-       "CURLY_B_min_known_fail",       /* 0x64 */
-       "CURLY_B_min",                  /* 0x65 */
-       "CURLY_B_min_fail",             /* 0x66 */
-       "CURLY_B_max",                  /* 0x67 */
-       "CURLY_B_max_fail",             /* 0x68 */
+       "TRIE_next",                    /* 0x4c */
+       "TRIE_next_fail",               /* 0x4d */
+       "EVAL_AB",                      /* 0x4e */
+       "EVAL_AB_fail",                 /* 0x4f */
+       "CURLYX_end",                   /* 0x50 */
+       "CURLYX_end_fail",              /* 0x51 */
+       "WHILEM_A_pre",                 /* 0x52 */
+       "WHILEM_A_pre_fail",            /* 0x53 */
+       "WHILEM_A_min",                 /* 0x54 */
+       "WHILEM_A_min_fail",            /* 0x55 */
+       "WHILEM_A_max",                 /* 0x56 */
+       "WHILEM_A_max_fail",            /* 0x57 */
+       "WHILEM_B_min",                 /* 0x58 */
+       "WHILEM_B_min_fail",            /* 0x59 */
+       "WHILEM_B_max",                 /* 0x5a */
+       "WHILEM_B_max_fail",            /* 0x5b */
+       "BRANCH_next",                  /* 0x5c */
+       "BRANCH_next_fail",             /* 0x5d */
+       "CURLYM_A",                     /* 0x5e */
+       "CURLYM_A_fail",                /* 0x5f */
+       "CURLYM_B",                     /* 0x60 */
+       "CURLYM_B_fail",                /* 0x61 */
+       "IFMATCH_A",                    /* 0x62 */
+       "IFMATCH_A_fail",               /* 0x63 */
+       "CURLY_B_min_known",            /* 0x64 */
+       "CURLY_B_min_known_fail",       /* 0x65 */
+       "CURLY_B_min",                  /* 0x66 */
+       "CURLY_B_min_fail",             /* 0x67 */
+       "CURLY_B_max",                  /* 0x68 */
+       "CURLY_B_max_fail",             /* 0x69 */
 };
 #endif /* DEBUGGING */
 #else
index dbbe993..9b9e5f8 100644 (file)
@@ -1147,3 +1147,36 @@ foo(?:aA|bB)?+b  foobBb  y       $&      foobBb
 ([^()]++|\([^()]*\))+  ((abc(ade)ufh()()x      y       $&      abc(ade)ufh()()x
 round\(([^()]++)\)     _I(round(xs * sz),1)    y       $1      xs * sz
 
+(foo[1x]|bar[2x]|baz[3x])+y    foo1bar2baz3y   y       $1      baz3
+(foo[1x]|bar[2x]|baz[3x])+y    foo1bar2baz3y   y       $&      foo1bar2baz3y
+(foo[1x]|bar[2x]|baz[3x])*y    foo1bar2baz3y   y       $1      baz3
+(foo[1x]|bar[2x]|baz[3x])*y    foo1bar2baz3y   y       $&      foo1bar2baz3y
+
+([yX].|WORDS|[yX].|WORD)S      WORDS   y       $1      WORD
+(WORDS|WORLD|WORD)S    WORDS   y       $1      WORD
+([yX].|WORDS|WORD|[xY].)S      WORDS   y       $1      WORD
+(foo|fool|[zx].|money|parted)$ fool    y       $1      fool
+([zx].|foo|fool|[zq].|money|parted|[yx].)$     fool    y       $1      fool
+(foo|fool|[zx].|money|parted)$ fools   n       -       -
+([zx].|foo|fool|[qx].|money|parted|[py].)$     fools   n       -       -
+
+([yX].|WORDS|[yX].|WORD)+S     WORDS   y       $1      WORD
+(WORDS|WORLD|WORD)+S   WORDS   y       $1      WORD
+([yX].|WORDS|WORD|[xY].)+S     WORDS   y       $1      WORD
+(foo|fool|[zx].|money|parted)+$        fool    y       $1      fool
+([zx].|foo|fool|[zq].|money|parted|[yx].)+$    fool    y       $1      fool
+(foo|fool|[zx].|money|parted)+$        fools   n       -       -
+([zx].|foo|fool|[qx].|money|parted|[py].)+$    fools   n       -       -
+
+(x|y|z[QW])+(longish|loquatious|excessive|overblown[QW])+      xyzQzWlongishoverblownW y       $1-$2   zW-overblownW
+(x|y|z[QW])*(longish|loquatious|excessive|overblown[QW])*      xyzQzWlongishoverblownW y       $1-$2   zW-overblownW
+(x|y|z[QW]){1,5}(longish|loquatious|excessive|overblown[QW]){1,5}      xyzQzWlongishoverblownW y       $1-$2   zW-overblownW
+
+(x|y|z[QW])++(longish|loquatious|excessive|overblown[QW])++    xyzQzWlongishoverblownW y       $1-$2   zW-overblownW
+(x|y|z[QW])*+(longish|loquatious|excessive|overblown[QW])*+    xyzQzWlongishoverblownW y       $1-$2   zW-overblownW
+(x|y|z[QW]){1,5}+(longish|loquatious|excessive|overblown[QW]){1,5}+    xyzQzWlongishoverblownW y       $1-$2   zW-overblownW
+
+
+a*(?!) aaaab   n       -       -
+a*(?FAIL)      aaaab   n       -       -
+a*(?F) aaaab   n       -       -