From: Yves Orton <demerphq@gmail.com>
Date: Thu, 26 Oct 2006 14:59:11 +0000 (+0200)
Subject: Fix a problem with jump-tries, add (?FAIL) pattern.
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=7f69552c33ff9bd1eb6665f732d0f22956ac2f30;p=p5sagit%2Fp5-mst-13.2.git

Fix a problem with jump-tries, add (?FAIL) pattern.
Message-ID: <9b18b3110610260559k3efa98barc28987e88c581a8a@mail.gmail.com>

p4raw-id: //depot/perl@29118
---

diff --git a/regcomp.c b/regcomp.c
index 25dc17f..be8be1b 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1174,7 +1174,7 @@ is the recommended Unicode-aware way of saying
     if ( noper_next < tail ) {                                  \
         if (!trie->jump)                                        \
             Newxz( trie->jump, word_count + 1, U16);            \
-        trie->jump[curword] = (U16)(tail - noper_next);         \
+        trie->jump[curword] = (U16)(noper_next - convert);      \
         if (!jumper)                                            \
             jumper = noper_next;                                \
         if (!nextbranch)                                        \
@@ -1225,6 +1225,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     U32 next_alloc = 0;
     regnode *jumper = NULL;
     regnode *nextbranch = NULL;
+    regnode *convert = NULL;
     /* we just use folder as a flag in utf8 */
     const U8 * const folder = ( flags == EXACTF
                        ? PL_fold
@@ -1273,6 +1274,16 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                   REG_NODE_NUM(last), REG_NODE_NUM(tail),
                   (int)depth);
     });
+   
+   /* Find the node we are going to overwrite */
+    if ( first == startbranch && OP( last ) != BRANCH ) {
+        /* whole branch chain */
+        convert = first;
+    } else {
+        /* branch sub-chain */
+        convert = NEXTOPER( first );
+    }
+        
     /*  -- First loop and Setup --
 
        We first traverse the branches and scan each word to determine if it
@@ -1770,7 +1781,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     );
 
     {   /* Modify the program and insert the new TRIE node*/ 
-        regnode *convert;
         U8 nodetype =(U8)(flags & 0xFF);
         char *str=NULL;
         
@@ -1788,23 +1798,22 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
            the whole branch sequence, including the first.
          */
         /* Find the node we are going to overwrite */
-        if ( first == startbranch && OP( last ) != BRANCH ) {
-            /* whole branch chain */
-            convert = first;
-            DEBUG_r({
-                const  regnode *nop = NEXTOPER( convert );
-                mjd_offset= Node_Offset((nop));
-                mjd_nodelen= Node_Length((nop));
-            });
-        } else {
+        if ( first != startbranch || OP( last ) == BRANCH ) {
             /* branch sub-chain */
-            convert = NEXTOPER( first );
             NEXT_OFF( first ) = (U16)(last - first);
             DEBUG_r({
                 mjd_offset= Node_Offset((convert));
                 mjd_nodelen= Node_Length((convert));
             });
+            /* whole branch chain */
+        } else {
+            DEBUG_r({
+                const  regnode *nop = NEXTOPER( convert );
+                mjd_offset= Node_Offset((nop));
+                mjd_nodelen= Node_Length((nop));
+            });
         }
+        
         DEBUG_OPTIMISE_r(
             PerlIO_printf(Perl_debug_log, "%*sMJD offset:%"UVuf" MJD length:%"UVuf"\n",
                 (int)depth * 2 + 2, "",
@@ -1917,7 +1926,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
 	       jump[0], which is otherwise unused by the jump logic. 
 	       We use this when dumping a trie and during optimisation. */
 	    if (trie->jump) 
-	        trie->jump[0] = (U16)(tail - nextbranch);
+	        trie->jump[0] = (U16)(nextbranch - convert);
             
             /* XXXX */
             if ( !trie->states[trie->startstate].wordnum && trie->bitmap && 
@@ -2091,7 +2100,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
        SV * const mysv=sv_newmortal(); \
        regnode *Next = regnext(scan); \
        regprop(RExC_rx, mysv, scan); \
-       PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s [%d]\n", \
+       PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)\n", \
        (int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(mysv),\
        Next ? (REG_NODE_NUM(Next)) : 0 ); \
    });
@@ -3483,6 +3492,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             /* NOTE - There is similar code to this block above for handling
                BRANCH nodes on the initial study.  If you change stuff here 
                check there too. */
+            regnode *trie_node= scan;
             regnode *tail= regnext(scan);
             reg_trie_data *trie = (reg_trie_data*)RExC_rx->data->data[ ARG(scan) ];
             I32 max1 = 0, min1 = I32_MAX;
@@ -3523,8 +3533,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
     
                     if (trie->jump[word]) {
                         if (!nextbranch)
-                            nextbranch = tail - trie->jump[0];
-                        scan= tail - trie->jump[word];
+                            nextbranch = trie_node + trie->jump[0];
+                        scan= trie_node + trie->jump[word];
                         /* We go from the jump point to the branch that follows
                            it. Note this means we need the vestigal unused branches
                            even though they arent otherwise used.
@@ -3855,7 +3865,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     r->paren_names = 0;
     
     if (RExC_seen & REG_SEEN_RECURSE) {
-        Newx(RExC_parens, RExC_npar,regnode *);
+        Newxz(RExC_parens, RExC_npar,regnode *);
         SAVEFREEPV(RExC_parens);
     }
 
@@ -4568,10 +4578,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 		RExC_parse++;
 	    case '=':           /* (?=...) */
 	    case '!':           /* (?!...) */
+	        if (*RExC_parse == ')')
+	            goto do_op_fail;
 		RExC_seen_zerolen++;
 	    case ':':           /* (?:...) */
 	    case '>':           /* (?>...) */
 		break;
+            case 'F':
+                if (RExC_parse[0] == 'A' &&
+                    RExC_parse[1] == 'I' &&
+                    RExC_parse[2] == 'L')
+                    RExC_parse+=3;
+                if (*RExC_parse != ')')
+	            vFAIL("Sequence (?FAIL) or (?F) not terminated");
+	      do_op_fail:
+		ret = reg_node(pRExC_state, OPFAIL);
+	        nextchar(pRExC_state);
+	        return ret;
+		break;
 	    case '$':           /* (?$...) */
 	    case '@':           /* (?@...) */
 		vFAIL2("Sequence (?%c...) not implemented", (int)paren);
@@ -4588,8 +4612,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 	    case 'R' :           /* (?R) */
 		if (*RExC_parse != ')')
 		    FAIL("Sequence (?R) not terminated");
-		reg_node(pRExC_state, SRECURSE);
-		break;           /* (?PARNO) */
+		ret = reg_node(pRExC_state, SRECURSE);
+		nextchar(pRExC_state);
+		return ret;
+		/*notreached*/
             { /* named and numeric backreferences */
                 I32 num;
                 char * parse_start;
@@ -8442,6 +8468,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
 	    DUMPUNTIL(NEXTOPER(node), next);
 	}
 	else if ( PL_regkind[(U8)op]  == TRIE ) {
+	    const regnode *this_trie = node;
 	    const char op = OP(node);
             const I32 n = ARG(node);
 	    const reg_ac_data * const ac = op>=AHOCORASICK ?
@@ -8462,18 +8489,19 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
 	                    PL_colors[0], PL_colors[1],
 	                    (SvUTF8(*elem_ptr) ? PERL_PV_ESCAPE_UNI : 0) |
 	                    PERL_PV_PRETTY_ELIPSES    |
-	                    PERL_PV_PRETTY_LTGT    
+	                    PERL_PV_PRETTY_LTGT
                             )
                             : "???"
                 );
                 if (trie->jump) {
-                    U16 dist= trie->jump[word_idx+1];
-                    PerlIO_printf(Perl_debug_log, "(%u)\n",(next - dist) - start);
+                    U16 dist = trie->jump[word_idx+1];
+		    PerlIO_printf(Perl_debug_log, "(%u)\n",
+			    (dist ? this_trie + dist : next) - start);
                     if (dist) {
                         if (!nextbranch)
-                            nextbranch= next - trie->jump[0];
-                        DUMPUNTIL(next - dist, nextbranch);
-                    } 
+			    nextbranch = this_trie + trie->jump[0];
+			DUMPUNTIL(this_trie + dist, nextbranch);
+                    }
                     if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
                         nextbranch= regnext((regnode *)nextbranch);
                 } else {
diff --git a/regcomp.sym b/regcomp.sym
index 561b25d..73e27a8 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -164,16 +164,19 @@ NREFF		NREF,    no-sv 1	Match already matched string, folded
 NREFFL		NREF,    no-sv 1	Match already matched string, folded in loc.
 
 
-#*Special conditionals 
+#*Special conditionals  (70..72)
 NGROUPP		NGROUPP,   no-sv 1	Whether the group matched.            
 RECURSEP	RECURSEP,  num 1 	Whether we are in a specific recurse.  
 DEFINEP		DEFINEP,   none 1 	Never execute directly.               
 
+#*Bactracking 
+OPFAIL		OPFAIL, none 		Same as (?!)
+
 # NEW STUFF ABOVE THIS LINE -- Please update counts below. 
 
 ################################################################################
 
-#*SPECIAL  REGOPS (70, 71)
+#*SPECIAL  REGOPS
 
 # This is not really a node, but an optimized away piece of a "long" node.
 # To simplify debugging output, we mark it as if it were a node
diff --git a/regexec.c b/regexec.c
index 60ec4ff..17d0e01 100644
--- a/regexec.c
+++ b/regexec.c
@@ -168,6 +168,7 @@ S_regcppush(pTHX_ I32 parenfloor)
 
 #define REGCP_OTHER_ELEMS 8
     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
+    
     for (p = PL_regsize; p > parenfloor; p--) {
 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 	SSPUSHINT(PL_regendp[p]);
@@ -2763,13 +2764,8 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
 	    	ST.accepted = 0; /* how many accepting states we have seen */
 		ST.B = next;
 		ST.jump = trie->jump;
-		
-#ifdef DEBUGGING
 		ST.me = scan;
-#endif
                 
-        	
-
 	        /*
         	   traverse the TRIE keeping track of all accepting states
         	   we transition through until we get to a failing node.
@@ -2894,10 +2890,10 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
 		locinput = PL_reginput;
 		nextchr = UCHARAT(locinput);
 		
-		if ( !ST.jump ) 
+		if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum]) 
 		    scan = ST.B;
 		else
-		    scan = ST.B - ST.jump[ST.accept_buff[0].wordnum];
+		    scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
 		
 		continue; /* execute rest of RE */
 	    }
@@ -2943,9 +2939,9 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
 		    SV ** const tmp = RX_DEBUG(reginfo->prog)
 				? av_fetch( trie->words, ST.accept_buff[ best ].wordnum - 1, 0 )
 				: NULL;
-		    regnode *nextop=!ST.jump ? 
+		    regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ? 
 		                    ST.B : 
-		                    ST.B - ST.jump[ST.accept_buff[best].wordnum];    
+		                    ST.me + ST.jump[ST.accept_buff[best].wordnum];    
 		    PerlIO_printf( Perl_debug_log, 
 		        "%*s  %strying alternation #%d <%s> at node #%d %s\n",
 			REPORT_CODE_OFF+depth*2, "", PL_colors[4],
@@ -2962,11 +2958,11 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
 		    best = ST.accepted;
 		}
 		PL_reginput = (char *)ST.accept_buff[ best ].endpos;
-		if ( !ST.jump ) {
+		if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
 		    PUSH_STATE_GOTO(TRIE_next, ST.B);
 		    /* NOTREACHED */
 		} else {
-		    PUSH_STATE_GOTO(TRIE_next, ST.B - ST.jump[ST.accept_buff[best].wordnum]);
+		    PUSH_STATE_GOTO(TRIE_next, ST.me + ST.jump[ST.accept_buff[best].wordnum]);
 		    /* NOTREACHED */
                 }
                 /* NOTREACHED */
@@ -3601,6 +3597,8 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
 	    n = ARG(scan);  /* which paren pair */
 	    PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
 	    PL_regendp[n] = locinput - PL_bostr;
+	    /*if (n > PL_regsize)
+		PL_regsize = n;*/
 	    if (n > (I32)*PL_reglastparen)
 		*PL_reglastparen = n;
 	    *PL_reglastcloseparen = n;
@@ -4484,7 +4482,6 @@ NULL
 
 #undef ST
 
-
 	case END:
 	    fake_end:
 	    if (cur_eval) {
@@ -4611,6 +4608,8 @@ NULL
 	    if (next == scan)
 		next = NULL;
 	    break;
+	case OPFAIL:
+	    sayNO;
 	default:
 	    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
 			  PTR2UV(scan), OP(scan));
diff --git a/regexp.h b/regexp.h
index faed0ee..89fcea7 100644
--- a/regexp.h
+++ b/regexp.h
@@ -227,9 +227,9 @@ typedef struct regmatch_state {
 	struct {
 	    reg_trie_accepted *accept_buff;
 	    U32		accepted; /* how many accepting states we have seen */
-	    U16         *jump;  /* negative offsets from B */
+	    U16         *jump;  /* positive offsets from me */
 	    regnode	*B;	/* node following the trie */
-	    regnode	*me;	/* only needed for debugging */
+	    regnode	*me;	/* Which node am I - needed for jump tries*/
 	} trie;
 
 	struct {
diff --git a/regnodes.h b/regnodes.h
index f7ebda1..d6842b5 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -6,8 +6,8 @@
 
 /* Regops and State definitions */
 
-#define REGNODE_MAX           	74
-#define REGMATCH_STATE_MAX    	104
+#define REGNODE_MAX           	75
+#define REGMATCH_STATE_MAX    	105
 
 #define	END                   	0	/* 0000 End of program. */
 #define	SUCCEED               	1	/* 0x01 Return from a subroutine, basically. */
@@ -82,41 +82,42 @@
 #define	NGROUPP               	70	/* 0x46 Whether the group matched. */
 #define	RECURSEP              	71	/* 0x47 Whether we are in a specific recurse. */
 #define	DEFINEP               	72	/* 0x48 Never execute directly. */
-#define	OPTIMIZED             	73	/* 0x49 Placeholder for dump. */
-#define	PSEUDO                	74	/* 0x4a Pseudo opcode for internal use. */
+#define	OPFAIL                	73	/* 0x49 Same as (?!) */
+#define	OPTIMIZED             	74	/* 0x4a Placeholder for dump. */
+#define	PSEUDO                	75	/* 0x4b Pseudo opcode for internal use. */
 
 	/* ------------ States ------------- */
 
-#define	TRIE_next             	75	/* 0x4b Regmatch state for TRIE */
-#define	TRIE_next_fail        	76	/* 0x4c Regmatch state for TRIE */
-#define	EVAL_AB               	77	/* 0x4d Regmatch state for EVAL */
-#define	EVAL_AB_fail          	78	/* 0x4e Regmatch state for EVAL */
-#define	CURLYX_end            	79	/* 0x4f Regmatch state for CURLYX */
-#define	CURLYX_end_fail       	80	/* 0x50 Regmatch state for CURLYX */
-#define	WHILEM_A_pre          	81	/* 0x51 Regmatch state for WHILEM */
-#define	WHILEM_A_pre_fail     	82	/* 0x52 Regmatch state for WHILEM */
-#define	WHILEM_A_min          	83	/* 0x53 Regmatch state for WHILEM */
-#define	WHILEM_A_min_fail     	84	/* 0x54 Regmatch state for WHILEM */
-#define	WHILEM_A_max          	85	/* 0x55 Regmatch state for WHILEM */
-#define	WHILEM_A_max_fail     	86	/* 0x56 Regmatch state for WHILEM */
-#define	WHILEM_B_min          	87	/* 0x57 Regmatch state for WHILEM */
-#define	WHILEM_B_min_fail     	88	/* 0x58 Regmatch state for WHILEM */
-#define	WHILEM_B_max          	89	/* 0x59 Regmatch state for WHILEM */
-#define	WHILEM_B_max_fail     	90	/* 0x5a Regmatch state for WHILEM */
-#define	BRANCH_next           	91	/* 0x5b Regmatch state for BRANCH */
-#define	BRANCH_next_fail      	92	/* 0x5c Regmatch state for BRANCH */
-#define	CURLYM_A              	93	/* 0x5d Regmatch state for CURLYM */
-#define	CURLYM_A_fail         	94	/* 0x5e Regmatch state for CURLYM */
-#define	CURLYM_B              	95	/* 0x5f Regmatch state for CURLYM */
-#define	CURLYM_B_fail         	96	/* 0x60 Regmatch state for CURLYM */
-#define	IFMATCH_A             	97	/* 0x61 Regmatch state for IFMATCH */
-#define	IFMATCH_A_fail        	98	/* 0x62 Regmatch state for IFMATCH */
-#define	CURLY_B_min_known     	99	/* 0x63 Regmatch state for CURLY */
-#define	CURLY_B_min_known_fail	100	/* 0x64 Regmatch state for CURLY */
-#define	CURLY_B_min           	101	/* 0x65 Regmatch state for CURLY */
-#define	CURLY_B_min_fail      	102	/* 0x66 Regmatch state for CURLY */
-#define	CURLY_B_max           	103	/* 0x67 Regmatch state for CURLY */
-#define	CURLY_B_max_fail      	104	/* 0x68 Regmatch state for CURLY */
+#define	TRIE_next             	76	/* 0x4c Regmatch state for TRIE */
+#define	TRIE_next_fail        	77	/* 0x4d Regmatch state for TRIE */
+#define	EVAL_AB               	78	/* 0x4e Regmatch state for EVAL */
+#define	EVAL_AB_fail          	79	/* 0x4f Regmatch state for EVAL */
+#define	CURLYX_end            	80	/* 0x50 Regmatch state for CURLYX */
+#define	CURLYX_end_fail       	81	/* 0x51 Regmatch state for CURLYX */
+#define	WHILEM_A_pre          	82	/* 0x52 Regmatch state for WHILEM */
+#define	WHILEM_A_pre_fail     	83	/* 0x53 Regmatch state for WHILEM */
+#define	WHILEM_A_min          	84	/* 0x54 Regmatch state for WHILEM */
+#define	WHILEM_A_min_fail     	85	/* 0x55 Regmatch state for WHILEM */
+#define	WHILEM_A_max          	86	/* 0x56 Regmatch state for WHILEM */
+#define	WHILEM_A_max_fail     	87	/* 0x57 Regmatch state for WHILEM */
+#define	WHILEM_B_min          	88	/* 0x58 Regmatch state for WHILEM */
+#define	WHILEM_B_min_fail     	89	/* 0x59 Regmatch state for WHILEM */
+#define	WHILEM_B_max          	90	/* 0x5a Regmatch state for WHILEM */
+#define	WHILEM_B_max_fail     	91	/* 0x5b Regmatch state for WHILEM */
+#define	BRANCH_next           	92	/* 0x5c Regmatch state for BRANCH */
+#define	BRANCH_next_fail      	93	/* 0x5d Regmatch state for BRANCH */
+#define	CURLYM_A              	94	/* 0x5e Regmatch state for CURLYM */
+#define	CURLYM_A_fail         	95	/* 0x5f Regmatch state for CURLYM */
+#define	CURLYM_B              	96	/* 0x60 Regmatch state for CURLYM */
+#define	CURLYM_B_fail         	97	/* 0x61 Regmatch state for CURLYM */
+#define	IFMATCH_A             	98	/* 0x62 Regmatch state for IFMATCH */
+#define	IFMATCH_A_fail        	99	/* 0x63 Regmatch state for IFMATCH */
+#define	CURLY_B_min_known     	100	/* 0x64 Regmatch state for CURLY */
+#define	CURLY_B_min_known_fail	101	/* 0x65 Regmatch state for CURLY */
+#define	CURLY_B_min           	102	/* 0x66 Regmatch state for CURLY */
+#define	CURLY_B_min_fail      	103	/* 0x67 Regmatch state for CURLY */
+#define	CURLY_B_max           	104	/* 0x68 Regmatch state for CURLY */
+#define	CURLY_B_max_fail      	105	/* 0x69 Regmatch state for CURLY */
 
 /* PL_regkind[] What type of regop or state is this. */
 
@@ -197,6 +198,7 @@ EXTCONST U8 PL_regkind[] = {
 	NGROUPP, 	/* NGROUPP                */
 	RECURSEP,	/* RECURSEP               */
 	DEFINEP, 	/* DEFINEP                */
+	OPFAIL,  	/* OPFAIL                 */
 	NOTHING, 	/* OPTIMIZED              */
 	PSEUDO,  	/* PSEUDO                 */
 	/* ------------ States ------------- */
@@ -310,6 +312,7 @@ static const U8 regarglen[] = {
 	EXTRA_SIZE(struct regnode_1),        	/* NGROUPP      */
 	EXTRA_SIZE(struct regnode_1),        	/* RECURSEP     */
 	EXTRA_SIZE(struct regnode_1),        	/* DEFINEP      */
+	0,                                   	/* OPFAIL       */
 	0,                                   	/* OPTIMIZED    */
 	0,                                   	/* PSEUDO       */
 };
@@ -390,6 +393,7 @@ static const char reg_off_by_arg[] = {
 	0,	/* NGROUPP      */
 	0,	/* RECURSEP     */
 	0,	/* DEFINEP      */
+	0,	/* OPFAIL       */
 	0,	/* OPTIMIZED    */
 	0,	/* PSEUDO       */
 };
@@ -471,39 +475,40 @@ const char * reg_name[] = {
 	"NGROUPP",               	/* 0x46 */
 	"RECURSEP",              	/* 0x47 */
 	"DEFINEP",               	/* 0x48 */
-	"OPTIMIZED",             	/* 0x49 */
-	"PSEUDO",                	/* 0x4a */
+	"OPFAIL",                	/* 0x49 */
+	"OPTIMIZED",             	/* 0x4a */
+	"PSEUDO",                	/* 0x4b */
 	/* ------------ States ------------- */
-	"TRIE_next",             	/* 0x4b */
-	"TRIE_next_fail",        	/* 0x4c */
-	"EVAL_AB",               	/* 0x4d */
-	"EVAL_AB_fail",          	/* 0x4e */
-	"CURLYX_end",            	/* 0x4f */
-	"CURLYX_end_fail",       	/* 0x50 */
-	"WHILEM_A_pre",          	/* 0x51 */
-	"WHILEM_A_pre_fail",     	/* 0x52 */
-	"WHILEM_A_min",          	/* 0x53 */
-	"WHILEM_A_min_fail",     	/* 0x54 */
-	"WHILEM_A_max",          	/* 0x55 */
-	"WHILEM_A_max_fail",     	/* 0x56 */
-	"WHILEM_B_min",          	/* 0x57 */
-	"WHILEM_B_min_fail",     	/* 0x58 */
-	"WHILEM_B_max",          	/* 0x59 */
-	"WHILEM_B_max_fail",     	/* 0x5a */
-	"BRANCH_next",           	/* 0x5b */
-	"BRANCH_next_fail",      	/* 0x5c */
-	"CURLYM_A",              	/* 0x5d */
-	"CURLYM_A_fail",         	/* 0x5e */
-	"CURLYM_B",              	/* 0x5f */
-	"CURLYM_B_fail",         	/* 0x60 */
-	"IFMATCH_A",             	/* 0x61 */
-	"IFMATCH_A_fail",        	/* 0x62 */
-	"CURLY_B_min_known",     	/* 0x63 */
-	"CURLY_B_min_known_fail",	/* 0x64 */
-	"CURLY_B_min",           	/* 0x65 */
-	"CURLY_B_min_fail",      	/* 0x66 */
-	"CURLY_B_max",           	/* 0x67 */
-	"CURLY_B_max_fail",      	/* 0x68 */
+	"TRIE_next",             	/* 0x4c */
+	"TRIE_next_fail",        	/* 0x4d */
+	"EVAL_AB",               	/* 0x4e */
+	"EVAL_AB_fail",          	/* 0x4f */
+	"CURLYX_end",            	/* 0x50 */
+	"CURLYX_end_fail",       	/* 0x51 */
+	"WHILEM_A_pre",          	/* 0x52 */
+	"WHILEM_A_pre_fail",     	/* 0x53 */
+	"WHILEM_A_min",          	/* 0x54 */
+	"WHILEM_A_min_fail",     	/* 0x55 */
+	"WHILEM_A_max",          	/* 0x56 */
+	"WHILEM_A_max_fail",     	/* 0x57 */
+	"WHILEM_B_min",          	/* 0x58 */
+	"WHILEM_B_min_fail",     	/* 0x59 */
+	"WHILEM_B_max",          	/* 0x5a */
+	"WHILEM_B_max_fail",     	/* 0x5b */
+	"BRANCH_next",           	/* 0x5c */
+	"BRANCH_next_fail",      	/* 0x5d */
+	"CURLYM_A",              	/* 0x5e */
+	"CURLYM_A_fail",         	/* 0x5f */
+	"CURLYM_B",              	/* 0x60 */
+	"CURLYM_B_fail",         	/* 0x61 */
+	"IFMATCH_A",             	/* 0x62 */
+	"IFMATCH_A_fail",        	/* 0x63 */
+	"CURLY_B_min_known",     	/* 0x64 */
+	"CURLY_B_min_known_fail",	/* 0x65 */
+	"CURLY_B_min",           	/* 0x66 */
+	"CURLY_B_min_fail",      	/* 0x67 */
+	"CURLY_B_max",           	/* 0x68 */
+	"CURLY_B_max_fail",      	/* 0x69 */
 };
 #endif /* DEBUGGING */
 #else
diff --git a/t/op/re_tests b/t/op/re_tests
index dbbe993..9b9e5f8 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -1147,3 +1147,36 @@ foo(?:aA|bB)?+b	foobBb	y	$&	foobBb
 ([^()]++|\([^()]*\))+	((abc(ade)ufh()()x	y	$&	abc(ade)ufh()()x
 round\(([^()]++)\)	_I(round(xs * sz),1)	y	$1	xs * sz
 
+(foo[1x]|bar[2x]|baz[3x])+y	foo1bar2baz3y	y	$1	baz3
+(foo[1x]|bar[2x]|baz[3x])+y	foo1bar2baz3y	y	$&	foo1bar2baz3y
+(foo[1x]|bar[2x]|baz[3x])*y	foo1bar2baz3y	y	$1	baz3
+(foo[1x]|bar[2x]|baz[3x])*y	foo1bar2baz3y	y	$&	foo1bar2baz3y
+
+([yX].|WORDS|[yX].|WORD)S	WORDS	y	$1	WORD
+(WORDS|WORLD|WORD)S	WORDS	y	$1	WORD
+([yX].|WORDS|WORD|[xY].)S	WORDS	y	$1	WORD
+(foo|fool|[zx].|money|parted)$	fool	y	$1	fool
+([zx].|foo|fool|[zq].|money|parted|[yx].)$	fool	y	$1	fool
+(foo|fool|[zx].|money|parted)$	fools	n	-	-
+([zx].|foo|fool|[qx].|money|parted|[py].)$	fools	n	-	-
+
+([yX].|WORDS|[yX].|WORD)+S	WORDS	y	$1	WORD
+(WORDS|WORLD|WORD)+S	WORDS	y	$1	WORD
+([yX].|WORDS|WORD|[xY].)+S	WORDS	y	$1	WORD
+(foo|fool|[zx].|money|parted)+$	fool	y	$1	fool
+([zx].|foo|fool|[zq].|money|parted|[yx].)+$	fool	y	$1	fool
+(foo|fool|[zx].|money|parted)+$	fools	n	-	-
+([zx].|foo|fool|[qx].|money|parted|[py].)+$	fools	n	-	-
+
+(x|y|z[QW])+(longish|loquatious|excessive|overblown[QW])+	xyzQzWlongishoverblownW	y	$1-$2	zW-overblownW
+(x|y|z[QW])*(longish|loquatious|excessive|overblown[QW])*	xyzQzWlongishoverblownW	y	$1-$2	zW-overblownW
+(x|y|z[QW]){1,5}(longish|loquatious|excessive|overblown[QW]){1,5}	xyzQzWlongishoverblownW	y	$1-$2	zW-overblownW
+
+(x|y|z[QW])++(longish|loquatious|excessive|overblown[QW])++	xyzQzWlongishoverblownW	y	$1-$2	zW-overblownW
+(x|y|z[QW])*+(longish|loquatious|excessive|overblown[QW])*+	xyzQzWlongishoverblownW	y	$1-$2	zW-overblownW
+(x|y|z[QW]){1,5}+(longish|loquatious|excessive|overblown[QW]){1,5}+	xyzQzWlongishoverblownW	y	$1-$2	zW-overblownW
+
+
+a*(?!)	aaaab	n	-	-
+a*(?FAIL)	aaaab	n	-	-
+a*(?F)	aaaab	n	-	-