The second patch from:
Yves Orton [Mon, 30 Oct 2006 19:15:13 +0000 (21:15 +0200)]
Subject: [PATCH] regex engine optimiser should grok subroutine patterns, and, name subroutine regops more intuitively
Message-ID: <9b18b3110610300915x3abf6cddu9c2071a70bea48e1@mail.gmail.com>

p4raw-id: //depot/perl@29162

pod/perldiag.pod
regcomp.c
regcomp.sym
regexec.c
regnodes.h

index 68df69f..f785603 100644 (file)
@@ -1231,7 +1231,7 @@ instead.
 
    unpack("H", "\x{2a1}")
 
-where the format expects to process a byte (a character with a value 
+where the format expects to process a byte (a character with a value
 below 256), but a higher value was provided instead. Perl uses the value
 modulus 256 instead, as if you had provided:
 
@@ -1243,8 +1243,8 @@ modulus 256 instead, as if you had provided:
 
    pack("u", "\x{1f3}b")
 
-where the format expects to process a sequence of bytes (character with a 
-value below 256), but some of the characters had a higher value. Perl 
+where the format expects to process a sequence of bytes (character with a
+value below 256), but some of the characters had a higher value. Perl
 uses the character values modulus 256 instead, as if you had provided:
 
    pack("u", "\x{f3}b")
@@ -1255,8 +1255,8 @@ uses the character values modulus 256 instead, as if you had provided:
 
    unpack("s", "\x{1f3}b")
 
-where the format expects to process a sequence of bytes (character with a 
-value below 256), but some of the characters had a higher value. Perl 
+where the format expects to process a sequence of bytes (character with a
+value below 256), but some of the characters had a higher value. Perl
 uses the character values modulus 256 instead, as if you had provided:
 
    unpack("s", "\x{f3}b")
@@ -1336,9 +1336,9 @@ L<overload>.
 
 =item Constant(%s)%s: %s in regex; marked by <-- HERE in m/%s/
 
-(F) The parser found inconsistencies while attempting to find 
-the character name specified in the C<\N{...}> escape.  Perhaps you 
-forgot to load the corresponding C<charnames> pragma?  
+(F) The parser found inconsistencies while attempting to find
+the character name specified in the C<\N{...}> escape.  Perhaps you
+forgot to load the corresponding C<charnames> pragma?
 See L<charnames>.
 
 
@@ -1586,6 +1586,14 @@ that in an eval().  See L<perlre/(?{ code })>.
 assertion, but that construct is only allowed when the C<use re 'eval'>
 pragma is in effect.  See L<perlre/(?{ code })>.
 
+=item EVAL without pos change exceeded limit in regex; marked by <-- HERE in m/%s/
+
+(F) You used a pattern that nested too many EVAL calls without consuming
+any text. Restructure the pattern so that text is consumed.
+
+The <-- HERE shows in the regular expression about where the problem was
+discovered.
+
 =item Excessively long <> operator
 
 (F) The contents of a <> operator may not exceed the maximum size of a
@@ -1853,7 +1861,7 @@ of Perl are likely to eliminate these arbitrary limitations.
 
 (W) Named unicode character escapes (\N{...}) may return multi-char
 or zero length sequences. When such an escape is used in a character class
-its behaviour is not well defined. Check that the correct escape has 
+its behaviour is not well defined. Check that the correct escape has
 been used, and the correct charname handler is in scope.
 
 =item Illegal binary digit %s
@@ -1959,6 +1967,15 @@ also result in this warning.  See L<perlcall/G_KEEPERR>.
 Unicode code points, and encoded in EBCDIC as UTF-EBCDIC.  The UTF-EBCDIC
 encoding is limited to code points no larger than 2147483647 (0x7FFFFFFF).
 
+=item Infinite recursion in regex; marked by <-- HERE in m/%s/
+
+(F) You used a pattern that references itself without consuming any input
+text. You should check the pattern to ensure that recursive patterns
+either consume text or fail.
+
+The <-- HERE shows in the regular expression about where the problem was
+discovered.
+
 =item Insecure dependency in %s
 
 (F) You tried to do something that the tainting mechanism didn't like.
@@ -3091,6 +3108,15 @@ to even) byte length.
 
 (P) The lexer got into a bad state while processing a case modifier.
 
+=item Pattern subroutine nesting without pos change exceeded limit in regex; marked by <-- HERE in m/%s/
+
+(F) You used a pattern that uses too many nested subpattern calls without
+consuming any text. Restructure the pattern so text is consumed before the
+nesting limit is exceeded.
+
+The <-- HERE shows in the regular expression about where the problem was
+discovered.
+
 =item Parentheses missing around "%s" list
 
 (W parenthesis) You said something like
@@ -3386,7 +3412,7 @@ by prepending "0" to your numbers.
 
 =item readdir() attempted on invalid dirhandle %s
 
-(W io) The dirhandle you're reading from is either closed or not really 
+(W io) The dirhandle you're reading from is either closed or not really
 a dirhandle.  Check your control flow.
 
 =item readline() on closed filehandle %s
@@ -3460,6 +3486,25 @@ prepend a zero to make the number at least two digits: C<\07>
 The <-- HERE shows in the regular expression about where the problem was
 discovered.
 
+=item Reference to nonexistent named group in regex; marked by <-- HERE in m/%s/
+
+(F) You used something like C<\k'NAME'> or C<< \k<NAME> >> in your regular
+expression, but there is no corresponding named capturing parentheses such
+as C<(?'NAME'...)> or C<(?<NAME>...). Check if the name has been spelled
+correctly both in the backreference and the declaration.
+
+The <-- HERE shows in the regular expression about where the problem was
+discovered.
+
+=item (?(DEFINE)....) does not allow branches in regex; marked by <-- HERE in m/%s/
+
+(F) You used something like C<(?(DEFINE)...|..)> which is illegal. The
+most likely cause of this error is that you left out a parenthesis inside
+of the C<....> part.
+
+The <-- HERE shows in the regular expression about where the problem was
+discovered.
+
 =item regexp memory corruption
 
 (P) The regular expression engine got confused by what the regular
@@ -4024,7 +4069,7 @@ See L<perlunicode/"User-Defined Character Properties">.
 
 =item Too deeply nested ()-groups
 
-(F) Your template contains ()-groups with a ridiculously deep nesting level. 
+(F) Your template contains ()-groups with a ridiculously deep nesting level.
 
 =item Too few args to syscall
 
index 8534b0c..4e933a9 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3521,12 +3521,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
            if (data)
                *(data->last_closep) = ARG(scan);
        }
-       else if (OP(scan) == RECURSE || OP(scan) == SRECURSE) {
+       else if (OP(scan) == GOSUB || OP(scan) == GOSTART) {
            /* set the pointer */
            I32 paren;
            regnode *start;
            regnode *end;
-           if (OP(scan) == RECURSE) {
+           if (OP(scan) == GOSUB) {
                paren = ARG(scan);
                RExC_recurse[ARG2L(scan)] = scan;
                 start = RExC_open_parens[paren-1];
@@ -4745,7 +4745,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            case 'R' :           /* (?R) */
                if (*RExC_parse != ')')
                    FAIL("Sequence (?R) not terminated");
-               ret = reg_node(pRExC_state, SRECURSE);
+               ret = reg_node(pRExC_state, GOSTART);
                nextchar(pRExC_state);
                return ret;
                /*notreached*/
@@ -4772,7 +4772,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                    vFAIL("Expecting close bracket");
                        
               gen_recurse_regop:
-                ret = reganode(pRExC_state, RECURSE, num);
+                ret = reganode(pRExC_state, GOSUB, num);
                 if (!SIZE_ONLY) {
                    if (num > (I32)RExC_rx->nparens) {
                        RExC_parse++;
@@ -4936,7 +4936,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                            SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
                        parno = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
                    }
-                   ret = reganode(pRExC_state,RECURSEP,parno); 
+                   ret = reganode(pRExC_state,INSUBP,parno); 
                    goto insert_if_check_paren;
                }
                else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
@@ -7938,7 +7938,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
        Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
     else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP) 
        Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o));    /* Parenth number */
-    else if (k == RECURSE)
+    else if (k == GOSUB) 
        Perl_sv_catpvf(aTHX_ sv, "%d[%+d]", (int)ARG(o),(int)ARG2L(o)); /* Paren and offset */
     else if (k == LOGICAL)
        Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);     /* 2: embedded, otherwise 1 */
index a61f172..072b969 100644 (file)
@@ -153,9 +153,9 @@ TRIEC               TRIE,   trie charclass  Same as TRIE, but with embedded charclass data
 AHOCORASICK    TRIE,   trie 1  Aho Corasick stclass. flags==type
 AHOCORASICKC   TRIE,   trie charclass  Same as AHOCORASICK, but with embedded charclass data
 
-#*Recursion (65..66) 
-RECURSE                RECURSE,   num/ofs 2L   recurse to paren arg1 at (signed) ofs arg2
-SRECURSE       SRECURSE,   no          recurse to start of pattern
+#*Regex Subroutines (65..66) 
+GOSUB          GOSUB,   num/ofs 2L     recurse to paren arg1 at (signed) ofs arg2
+GOSTART                GOSTART,   no           recurse to start of pattern
 
 #*Named references (67..69)
 NREF           NREF,    no-sv 1        Match some already matched string
@@ -165,7 +165,7 @@ NREFFL              NREF,    no-sv 1        Match already matched string, folded in loc.
 
 #*Special conditionals  (70..72)
 NGROUPP                NGROUPP,   no-sv 1      Whether the group matched.            
-RECURSEP       RECURSEP,  num 1        Whether we are in a specific recurse.  
+INSUBP         INSUBP,    num 1        Whether we are in a specific recurse.  
 DEFINEP                DEFINEP,   none 1       Never execute directly.               
 
 #*Bactracking 
index d57fd35..7950805 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -2565,7 +2565,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
 
     bool result = 0;       /* return value of S_regmatch */
     int depth = 0;         /* depth of backtrack stack */
-    int nochange_depth = 0; /* depth of RECURSE recursion with nochange*/
+    int nochange_depth = 0; /* depth of GOSUB recursion with nochange*/
     regmatch_state *yes_state = NULL; /* state to pop to on success of
                                                            subpattern */
     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
@@ -3408,21 +3408,23 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
        {
            SV *ret;
             regexp *re;
-            regnode *startpoint;           
-            
-        case SRECURSE:
-       case RECURSE: /*    /(...(?1))/      */
+            regnode *startpoint;
+
+       case GOSTART:
+       case GOSUB: /*    /(...(?1))/      */
             if (cur_eval && cur_eval->locinput==locinput) {
                 if (cur_eval->u.eval.close_paren == ARG(scan)) 
-                    Perl_croak(aTHX_ "Infinite recursion in RECURSE in regexp");
+                    Perl_croak(aTHX_ "Infinite recursion in regex");
                 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH ) 
-                    Perl_croak(aTHX_ "RECURSE without pos change exceeded limit in regexp");
+                    Perl_croak(aTHX_ 
+                        "Pattern subroutine nesting without pos change"
+                        " exceeded limit in regex");
             } else {
                 nochange_depth = 0;
-            }    
+            }
             re = rex;
             (void)ReREFCNT_inc(rex);
-            if (OP(scan)==RECURSE) {
+            if (OP(scan)==GOSUB) {
                 startpoint = scan + ARG2L(scan);
                 ST.close_paren = ARG(scan);
             } else {
@@ -3434,7 +3436,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */        
             if (cur_eval && cur_eval->locinput==locinput) {
                 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH ) 
-                    Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regexp");
+                    Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
             } else {
                 nochange_depth = 0;
             }    
@@ -3513,7 +3515,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                         "Matching embedded");
                );              
                startpoint = re->program + 1;
-                       ST.close_paren = 0; /* only used for RECURSE */
+                       ST.close_paren = 0; /* only used for GOSUB */
                        /* borrowed from regtry */
                 if (PL_reg_start_tmpl <= re->nparens) {
                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
@@ -3523,13 +3525,13 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
                 }                      
 
-        eval_recurse_doit: /* Share code with RECURSE below this line */
+        eval_recurse_doit: /* Share code with GOSUB below this line */                         
                /* run the pattern returned from (??{...}) */
                ST.cp = regcppush(0);   /* Save *all* the positions. */
                REGCP_SET(ST.lastcp);
                
-               PL_regstartp = re->startp; /* essentially NOOP on RECURSE */
-               PL_regendp = re->endp;     /* essentially NOOP on RECURSE */
+               PL_regstartp = re->startp; /* essentially NOOP on GOSUB */
+               PL_regendp = re->endp;     /* essentially NOOP on GOSUB */
                
                *PL_reglastparen = 0;
                *PL_reglastcloseparen = 0;
@@ -3618,7 +3620,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
            /* reg_check_named_buff_matched returns 0 for no match */
            sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
            break;
-        case RECURSEP:
+        case INSUBP:
             n = ARG(scan);
             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
             break;
index e3d1837..766dcff 100644 (file)
 #define        TRIEC                   62      /* 0x3e Same as TRIE, but with embedded charclass data */
 #define        AHOCORASICK             63      /* 0x3f Aho Corasick stclass. flags==type */
 #define        AHOCORASICKC            64      /* 0x40 Same as AHOCORASICK, but with embedded charclass data */
-#define        RECURSE                 65      /* 0x41 recurse to paren arg1 at (signed) ofs arg2 */
-#define        SRECURSE                66      /* 0x42 recurse to start of pattern */
+#define        GOSUB                   65      /* 0x41 recurse to paren arg1 at (signed) ofs arg2 */
+#define        GOSTART                 66      /* 0x42 recurse to start of pattern */
 #define        NREF                    67      /* 0x43 Match some already matched string */
 #define        NREFF                   68      /* 0x44 Match already matched string, folded */
 #define        NREFFL                  69      /* 0x45 Match already matched string, folded in loc. */
 #define        NGROUPP                 70      /* 0x46 Whether the group matched. */
-#define        RECURSEP                71      /* 0x47 Whether we are in a specific recurse. */
+#define        INSUBP                  71      /* 0x47 Whether we are in a specific recurse. */
 #define        DEFINEP                 72      /* 0x48 Never execute directly. */
 #define        OPFAIL                  73      /* 0x49 Same as (?!) */
 #define        OPTIMIZED               74      /* 0x4a Placeholder for dump. */
 EXTCONST U8 PL_regkind[];
 #else
 EXTCONST U8 PL_regkind[] = {
-       END,            /* END                    */
-       END,            /* SUCCEED                */
-       BOL,            /* BOL                    */
-       BOL,            /* MBOL                   */
-       BOL,            /* SBOL                   */
-       EOL,            /* EOS                    */
-       EOL,            /* EOL                    */
-       EOL,            /* MEOL                   */
-       EOL,            /* SEOL                   */
-       BOUND,          /* BOUND                  */
-       BOUND,          /* BOUNDL                 */
-       NBOUND,         /* NBOUND                 */
-       NBOUND,         /* NBOUNDL                */
-       GPOS,           /* GPOS                   */
-       REG_ANY,        /* REG_ANY                */
-       REG_ANY,        /* SANY                   */
-       REG_ANY,        /* CANY                   */
-       ANYOF,          /* ANYOF                  */
-       ALNUM,          /* ALNUM                  */
-       ALNUM,          /* ALNUML                 */
-       NALNUM,         /* NALNUM                 */
-       NALNUM,         /* NALNUML                */
-       SPACE,          /* SPACE                  */
-       SPACE,          /* SPACEL                 */
-       NSPACE,         /* NSPACE                 */
-       NSPACE,         /* NSPACEL                */
-       DIGIT,          /* DIGIT                  */
-       DIGIT,          /* DIGITL                 */
-       NDIGIT,         /* NDIGIT                 */
-       NDIGIT,         /* NDIGITL                */
-       CLUMP,          /* CLUMP                  */
-       BRANCH,         /* BRANCH                 */
-       BACK,           /* BACK                   */
-       EXACT,          /* EXACT                  */
-       EXACT,          /* EXACTF                 */
-       EXACT,          /* EXACTFL                */
-       NOTHING,        /* NOTHING                */
-       NOTHING,        /* TAIL                   */
-       STAR,           /* STAR                   */
-       PLUS,           /* PLUS                   */
-       CURLY,          /* CURLY                  */
-       CURLY,          /* CURLYN                 */
-       CURLY,          /* CURLYM                 */
-       CURLY,          /* CURLYX                 */
-       WHILEM,         /* WHILEM                 */
-       OPEN,           /* OPEN                   */
-       CLOSE,          /* CLOSE                  */
-       REF,            /* REF                    */
-       REF,            /* REFF                   */
-       REF,            /* REFFL                  */
-       BRANCHJ,        /* IFMATCH                */
-       BRANCHJ,        /* UNLESSM                */
-       BRANCHJ,        /* SUSPEND                */
-       BRANCHJ,        /* IFTHEN                 */
-       GROUPP,         /* GROUPP                 */
-       LONGJMP,        /* LONGJMP                */
-       BRANCHJ,        /* BRANCHJ                */
-       EVAL,           /* EVAL                   */
-       MINMOD,         /* MINMOD                 */
-       LOGICAL,        /* LOGICAL                */
-       BRANCHJ,        /* RENUM                  */
-       TRIE,           /* TRIE                   */
-       TRIE,           /* TRIEC                  */
-       TRIE,           /* AHOCORASICK            */
-       TRIE,           /* AHOCORASICKC           */
-       RECURSE,        /* RECURSE                */
-       SRECURSE,       /* SRECURSE               */
-       NREF,           /* NREF                   */
-       NREF,           /* NREFF                  */
-       NREF,           /* NREFFL                 */
-       NGROUPP,        /* NGROUPP                */
-       RECURSEP,       /* RECURSEP               */
-       DEFINEP,        /* DEFINEP                */
-       OPFAIL,         /* OPFAIL                 */
-       NOTHING,        /* OPTIMIZED              */
-       PSEUDO,         /* PSEUDO                 */
+       END,            /* END                    */
+       END,            /* SUCCEED                */
+       BOL,            /* BOL                    */
+       BOL,            /* MBOL                   */
+       BOL,            /* SBOL                   */
+       EOL,            /* EOS                    */
+       EOL,            /* EOL                    */
+       EOL,            /* MEOL                   */
+       EOL,            /* SEOL                   */
+       BOUND,          /* BOUND                  */
+       BOUND,          /* BOUNDL                 */
+       NBOUND,         /* NBOUND                 */
+       NBOUND,         /* NBOUNDL                */
+       GPOS,           /* GPOS                   */
+       REG_ANY,        /* REG_ANY                */
+       REG_ANY,        /* SANY                   */
+       REG_ANY,        /* CANY                   */
+       ANYOF,          /* ANYOF                  */
+       ALNUM,          /* ALNUM                  */
+       ALNUM,          /* ALNUML                 */
+       NALNUM,         /* NALNUM                 */
+       NALNUM,         /* NALNUML                */
+       SPACE,          /* SPACE                  */
+       SPACE,          /* SPACEL                 */
+       NSPACE,         /* NSPACE                 */
+       NSPACE,         /* NSPACEL                */
+       DIGIT,          /* DIGIT                  */
+       DIGIT,          /* DIGITL                 */
+       NDIGIT,         /* NDIGIT                 */
+       NDIGIT,         /* NDIGITL                */
+       CLUMP,          /* CLUMP                  */
+       BRANCH,         /* BRANCH                 */
+       BACK,           /* BACK                   */
+       EXACT,          /* EXACT                  */
+       EXACT,          /* EXACTF                 */
+       EXACT,          /* EXACTFL                */
+       NOTHING,        /* NOTHING                */
+       NOTHING,        /* TAIL                   */
+       STAR,           /* STAR                   */
+       PLUS,           /* PLUS                   */
+       CURLY,          /* CURLY                  */
+       CURLY,          /* CURLYN                 */
+       CURLY,          /* CURLYM                 */
+       CURLY,          /* CURLYX                 */
+       WHILEM,         /* WHILEM                 */
+       OPEN,           /* OPEN                   */
+       CLOSE,          /* CLOSE                  */
+       REF,            /* REF                    */
+       REF,            /* REFF                   */
+       REF,            /* REFFL                  */
+       BRANCHJ,        /* IFMATCH                */
+       BRANCHJ,        /* UNLESSM                */
+       BRANCHJ,        /* SUSPEND                */
+       BRANCHJ,        /* IFTHEN                 */
+       GROUPP,         /* GROUPP                 */
+       LONGJMP,        /* LONGJMP                */
+       BRANCHJ,        /* BRANCHJ                */
+       EVAL,           /* EVAL                   */
+       MINMOD,         /* MINMOD                 */
+       LOGICAL,        /* LOGICAL                */
+       BRANCHJ,        /* RENUM                  */
+       TRIE,           /* TRIE                   */
+       TRIE,           /* TRIEC                  */
+       TRIE,           /* AHOCORASICK            */
+       TRIE,           /* AHOCORASICKC           */
+       GOSUB,          /* GOSUB                  */
+       GOSTART,        /* GOSTART                */
+       NREF,           /* NREF                   */
+       NREF,           /* NREFF                  */
+       NREF,           /* NREFFL                 */
+       NGROUPP,        /* NGROUPP                */
+       INSUBP,         /* INSUBP                 */
+       DEFINEP,        /* DEFINEP                */
+       OPFAIL,         /* OPFAIL                 */
+       NOTHING,        /* OPTIMIZED              */
+       PSEUDO,         /* PSEUDO                 */
        /* ------------ States ------------- */
-       TRIE,           /* TRIE_next              */
-       TRIE,           /* TRIE_next_fail         */
-       EVAL,           /* EVAL_AB                */
-       EVAL,           /* EVAL_AB_fail           */
-       CURLYX,         /* CURLYX_end             */
-       CURLYX,         /* CURLYX_end_fail        */
-       WHILEM,         /* WHILEM_A_pre           */
-       WHILEM,         /* WHILEM_A_pre_fail      */
-       WHILEM,         /* WHILEM_A_min           */
-       WHILEM,         /* WHILEM_A_min_fail      */
-       WHILEM,         /* WHILEM_A_max           */
-       WHILEM,         /* WHILEM_A_max_fail      */
-       WHILEM,         /* WHILEM_B_min           */
-       WHILEM,         /* WHILEM_B_min_fail      */
-       WHILEM,         /* WHILEM_B_max           */
-       WHILEM,         /* WHILEM_B_max_fail      */
-       BRANCH,         /* BRANCH_next            */
-       BRANCH,         /* BRANCH_next_fail       */
-       CURLYM,         /* CURLYM_A               */
-       CURLYM,         /* CURLYM_A_fail          */
-       CURLYM,         /* CURLYM_B               */
-       CURLYM,         /* CURLYM_B_fail          */
-       IFMATCH,        /* IFMATCH_A              */
-       IFMATCH,        /* IFMATCH_A_fail         */
-       CURLY,          /* CURLY_B_min_known      */
-       CURLY,          /* CURLY_B_min_known_fail */
-       CURLY,          /* CURLY_B_min            */
-       CURLY,          /* CURLY_B_min_fail       */
-       CURLY,          /* CURLY_B_max            */
-       CURLY,          /* CURLY_B_max_fail       */
+       TRIE,           /* TRIE_next              */
+       TRIE,           /* TRIE_next_fail         */
+       EVAL,           /* EVAL_AB                */
+       EVAL,           /* EVAL_AB_fail           */
+       CURLYX,         /* CURLYX_end             */
+       CURLYX,         /* CURLYX_end_fail        */
+       WHILEM,         /* WHILEM_A_pre           */
+       WHILEM,         /* WHILEM_A_pre_fail      */
+       WHILEM,         /* WHILEM_A_min           */
+       WHILEM,         /* WHILEM_A_min_fail      */
+       WHILEM,         /* WHILEM_A_max           */
+       WHILEM,         /* WHILEM_A_max_fail      */
+       WHILEM,         /* WHILEM_B_min           */
+       WHILEM,         /* WHILEM_B_min_fail      */
+       WHILEM,         /* WHILEM_B_max           */
+       WHILEM,         /* WHILEM_B_max_fail      */
+       BRANCH,         /* BRANCH_next            */
+       BRANCH,         /* BRANCH_next_fail       */
+       CURLYM,         /* CURLYM_A               */
+       CURLYM,         /* CURLYM_A_fail          */
+       CURLYM,         /* CURLYM_B               */
+       CURLYM,         /* CURLYM_B_fail          */
+       IFMATCH,        /* IFMATCH_A              */
+       IFMATCH,        /* IFMATCH_A_fail         */
+       CURLY,          /* CURLY_B_min_known      */
+       CURLY,          /* CURLY_B_min_known_fail */
+       CURLY,          /* CURLY_B_min            */
+       CURLY,          /* CURLY_B_min_fail       */
+       CURLY,          /* CURLY_B_max            */
+       CURLY,          /* CURLY_B_max_fail       */
 };
 #endif
 
@@ -304,13 +304,13 @@ static const U8 regarglen[] = {
        EXTRA_SIZE(struct regnode_charclass),   /* TRIEC        */
        EXTRA_SIZE(struct regnode_1),           /* AHOCORASICK  */
        EXTRA_SIZE(struct regnode_charclass),   /* AHOCORASICKC */
-       EXTRA_SIZE(struct regnode_2L),          /* RECURSE      */
-       0,                                      /* SRECURSE     */
+       EXTRA_SIZE(struct regnode_2L),          /* GOSUB        */
+       0,                                      /* GOSTART      */
        EXTRA_SIZE(struct regnode_1),           /* NREF         */
        EXTRA_SIZE(struct regnode_1),           /* NREFF        */
        EXTRA_SIZE(struct regnode_1),           /* NREFFL       */
        EXTRA_SIZE(struct regnode_1),           /* NGROUPP      */
-       EXTRA_SIZE(struct regnode_1),           /* RECURSEP     */
+       EXTRA_SIZE(struct regnode_1),           /* INSUBP       */
        EXTRA_SIZE(struct regnode_1),           /* DEFINEP      */
        0,                                      /* OPFAIL       */
        0,                                      /* OPTIMIZED    */
@@ -385,13 +385,13 @@ static const char reg_off_by_arg[] = {
        0,      /* TRIEC        */
        0,      /* AHOCORASICK  */
        0,      /* AHOCORASICKC */
-       0,      /* RECURSE      */
-       0,      /* SRECURSE     */
+       0,      /* GOSUB        */
+       0,      /* GOSTART      */
        0,      /* NREF         */
        0,      /* NREFF        */
        0,      /* NREFFL       */
        0,      /* NGROUPP      */
-       0,      /* RECURSEP     */
+       0,      /* INSUBP       */
        0,      /* DEFINEP      */
        0,      /* OPFAIL       */
        0,      /* OPTIMIZED    */
@@ -467,13 +467,13 @@ const char * reg_name[] = {
        "TRIEC",                        /* 0x3e */
        "AHOCORASICK",                  /* 0x3f */
        "AHOCORASICKC",                 /* 0x40 */
-       "RECURSE",                      /* 0x41 */
-       "SRECURSE",                     /* 0x42 */
+       "GOSUB",                        /* 0x41 */
+       "GOSTART",                      /* 0x42 */
        "NREF",                         /* 0x43 */
        "NREFF",                        /* 0x44 */
        "NREFFL",                       /* 0x45 */
        "NGROUPP",                      /* 0x46 */
-       "RECURSEP",                     /* 0x47 */
+       "INSUBP",                       /* 0x47 */
        "DEFINEP",                      /* 0x48 */
        "OPFAIL",                       /* 0x49 */
        "OPTIMIZED",                    /* 0x4a */