say() has the same syntax than print(), so forbid to override it
[p5sagit/p5-mst-13.2.git] / regcomp.c
index 063549e..1523fc1 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -117,8 +117,14 @@ typedef struct RExC_state_t {
     I32                extralen;
     I32                seen_zerolen;
     I32                seen_evals;
+    regnode    **open_parens;          /* pointers to open parens */
+    regnode    **close_parens;         /* pointers to close parens */
+    regnode    *opend;                 /* END node in program */
     I32                utf8;
-    HV         *charnames; /* cache of named sequences */
+    HV         *charnames;             /* cache of named sequences */
+    HV         *paren_names;           /* Paren names */
+    regnode    **recurse;              /* Recurse regops */
+    I32                recurse_count;          /* Number of recurse regops */
 #if ADD_TO_REGEXEC
     char       *starttry;              /* -Dr: where regtry was called. */
 #define RExC_starttry  (pRExC_state->starttry)
@@ -151,6 +157,12 @@ typedef struct RExC_state_t {
 #define RExC_seen_evals        (pRExC_state->seen_evals)
 #define RExC_utf8      (pRExC_state->utf8)
 #define RExC_charnames  (pRExC_state->charnames)
+#define RExC_open_parens       (pRExC_state->open_parens)
+#define RExC_close_parens      (pRExC_state->close_parens)
+#define RExC_opend     (pRExC_state->opend)
+#define RExC_paren_names       (pRExC_state->paren_names)
+#define RExC_recurse   (pRExC_state->recurse)
+#define RExC_recurse_count     (pRExC_state->recurse_count)
 
 #define        ISMULT1(c)      ((c) == '*' || (c) == '+' || (c) == '?')
 #define        ISMULT2(s)      ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
@@ -178,6 +190,14 @@ typedef struct RExC_state_t {
 #endif
 
 
+
+#define PBYTE(u8str,paren) ((U8*)(u8str))[(paren) >> 3]
+#define PBITVAL(paren) (1 << ((paren) & 7))
+#define PAREN_TEST(u8str,paren) ( PBYTE(u8str,paren) & PBITVAL(paren))
+#define PAREN_SET(u8str,paren) PBYTE(u8str,paren) |= PBITVAL(paren)
+#define PAREN_UNSET(u8str,paren) PBYTE(u8str,paren) &= (~PBITVAL(paren))
+
+
 /* About scan_data_t.
 
   During optimisation we recurse through the regexp program performing
@@ -525,7 +545,7 @@ static const scan_data_t zero_scan_data =
 #endif
 
 #define DEBUG_STUDYDATA(data,depth)                                  \
-DEBUG_OPTIMISE_r(if(data){                                           \
+DEBUG_OPTIMISE_MORE_r(if(data){                                           \
     PerlIO_printf(Perl_debug_log,                                    \
         "%*s"/* Len:%"IVdf"/%"IVdf" */" Pos:%"IVdf"/%"IVdf           \
         " Flags: %"IVdf" Whilem_c: %"IVdf" Lcp: %"IVdf" ",           \
@@ -664,6 +684,8 @@ STATIC void
 S_cl_and(struct regnode_charclass_class *cl,
        const struct regnode_charclass_class *and_with)
 {
+
+    assert(and_with->type == ANYOF);
     if (!(and_with->flags & ANYOF_CLASS)
        && !(cl->flags & ANYOF_CLASS)
        && (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
@@ -816,7 +838,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie,U32 depth)
         PerlIO_printf( Perl_debug_log, "%.*s", colwidth, "--------");
     PerlIO_printf( Perl_debug_log, "\n");
 
-    for( state = 1 ; state < trie->laststate ; state++ ) {
+    for( state = 1 ; state < trie->statecount ; state++ ) {
        const U32 base = trie->states[ state ].trans.base;
 
         PerlIO_printf( Perl_debug_log, "%*s#%4"UVXf"|", (int)depth * 2 + 2,"", (UV)state);
@@ -899,10 +921,13 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc
                            (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
                            PERL_PV_ESCAPE_FIRSTCHAR 
                     ) ,
-                TRIE_LIST_ITEM(state,charid).forid,
-                (UV)TRIE_LIST_ITEM(state,charid).newstate
-            );
-        }
+                    TRIE_LIST_ITEM(state,charid).forid,
+                    (UV)TRIE_LIST_ITEM(state,charid).newstate
+                );
+                if (!(charid % 10)) 
+                    PerlIO_printf(Perl_debug_log, "\n%*s| ",
+                        (int)((depth * 2) + 14), "");
+            }
         }
         PerlIO_printf( Perl_debug_log, "\n");
     }
@@ -1094,10 +1119,11 @@ is the recommended Unicode-aware way of saying
     *(d++) = uv;
 */
 
-#define TRIE_STORE_REVCHAR                                                    \
+#define TRIE_STORE_REVCHAR                                                 \
     STMT_START {                                                           \
-       SV *tmp = Perl_newSVpvf_nocontext( "%c", (int)uvc );               \
+       SV *tmp = newSVpvs("");                                            \
        if (UTF) SvUTF8_on(tmp);                                           \
+       Perl_sv_catpvf( aTHX_ tmp, "%c", (int)uvc );                       \
        av_push( TRIE_REVCHARMAP(trie), tmp );                             \
     } STMT_END
 
@@ -1129,9 +1155,8 @@ is the recommended Unicode-aware way of saying
 
 #define TRIE_LIST_PUSH(state,fid,ns) STMT_START {               \
     if ( TRIE_LIST_CUR( state ) >=TRIE_LIST_LEN( state ) ) {    \
-       TRIE_LIST_LEN( state ) *= 2;                            \
-       Renew( trie->states[ state ].trans.list,                \
-              TRIE_LIST_LEN( state ), reg_trie_trans_le );     \
+       U32 ging = TRIE_LIST_LEN( state ) *= 2;                 \
+       Renew( trie->states[ state ].trans.list, ging, reg_trie_trans_le ); \
     }                                                           \
     TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).forid = fid;     \
     TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).newstate = ns;   \
@@ -1167,7 +1192,7 @@ is the recommended Unicode-aware way of saying
     if ( noper_next < tail ) {                                  \
         if (!trie->jump)                                        \
             Newxz( trie->jump, word_count + 1, U16);            \
-        trie->jump[curword] = (U16)(tail - noper_next);         \
+        trie->jump[curword] = (U16)(noper_next - convert);      \
         if (!jumper)                                            \
             jumper = noper_next;                                \
         if (!nextbranch)                                        \
@@ -1218,6 +1243,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     U32 next_alloc = 0;
     regnode *jumper = NULL;
     regnode *nextbranch = NULL;
+    regnode *convert = NULL;
     /* we just use folder as a flag in utf8 */
     const U8 * const folder = ( flags == EXACTF
                        ? PL_fold
@@ -1266,6 +1292,16 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                   REG_NODE_NUM(last), REG_NODE_NUM(tail),
                   (int)depth);
     });
+   
+   /* Find the node we are going to overwrite */
+    if ( first == startbranch && OP( last ) != BRANCH ) {
+        /* whole branch chain */
+        convert = first;
+    } else {
+        /* branch sub-chain */
+        convert = NEXTOPER( first );
+    }
+        
     /*  -- First loop and Setup --
 
        We first traverse the branches and scan each word to determine if it
@@ -1390,6 +1426,10 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
 
         STRLEN transcount = 1;
 
+        DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log, 
+            "%*sCompiling trie using list compiler\n",
+            (int)depth * 2 + 2, ""));
+
         Newxz( trie->states, TRIE_CHARCOUNT(trie) + 2, reg_trie_state );
         TRIE_LIST_NEW(1);
         next_alloc = 2;
@@ -1452,13 +1492,14 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
 
         } /* end second pass */
 
-        trie->laststate = next_alloc;
+        /* next alloc is the NEXT state to be allocated */
+        trie->statecount = next_alloc; 
         Renew( trie->states, next_alloc, reg_trie_state );
 
         /* and now dump it out before we compress it */
         DEBUG_TRIE_COMPILE_MORE_r(
             dump_trie_interim_list(trie,next_alloc,depth+1)
-                    );
+        );
 
         Newxz( trie->trans, transcount ,reg_trie_trans );
         {
@@ -1567,7 +1608,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
            use TRIE_NODENUM() to convert.
 
         */
-
+        DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log, 
+            "%*sCompiling trie using table compiler\n",
+            (int)depth * 2 + 2, ""));
 
         Newxz( trie->trans, ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1,
               reg_trie_trans );
@@ -1691,7 +1734,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
         const U32 laststate = TRIE_NODENUM( next_alloc );
        U32 state, charid;
         U32 pos = 0, zp=0;
-        trie->laststate = laststate;
+        trie->statecount = laststate;
 
         for ( state = 1 ; state < laststate ; state++ ) {
             U8 flag = 0;
@@ -1728,7 +1771,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
             }
         }
         trie->lasttrans = pos + 1;
-        Renew( trie->states, laststate + 1, reg_trie_state);
+        Renew( trie->states, laststate, reg_trie_state);
         DEBUG_TRIE_COMPILE_MORE_r(
                 PerlIO_printf( Perl_debug_log,
                    "%*sAlloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n",
@@ -1741,6 +1784,12 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
 
         } /* end table compress */
     }
+    DEBUG_TRIE_COMPILE_MORE_r(
+            PerlIO_printf(Perl_debug_log, "%*sStatecount:%"UVxf" Lasttrans:%"UVxf"\n",
+                (int)depth * 2 + 2, "",
+                (UV)trie->statecount,
+                (UV)trie->lasttrans)
+    );
     /* resize the trans array to remove unused space */
     Renew( trie->trans, trie->lasttrans, reg_trie_trans);
 
@@ -1750,12 +1799,11 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     );
 
     {   /* Modify the program and insert the new TRIE node*/ 
-        regnode *convert;
         U8 nodetype =(U8)(flags & 0xFF);
         char *str=NULL;
         
 #ifdef DEBUGGING
-        
+        regnode *optimize = NULL;
         U32 mjd_offset = 0;
         U32 mjd_nodelen = 0;
 #endif
@@ -1768,23 +1816,22 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
            the whole branch sequence, including the first.
          */
         /* Find the node we are going to overwrite */
-        if ( first == startbranch && OP( last ) != BRANCH ) {
-            /* whole branch chain */
-            convert = first;
-            DEBUG_r({
-                const  regnode *nop = NEXTOPER( convert );
-                mjd_offset= Node_Offset((nop));
-                mjd_nodelen= Node_Length((nop));
-            });
-        } else {
+        if ( first != startbranch || OP( last ) == BRANCH ) {
             /* branch sub-chain */
-            convert = NEXTOPER( first );
             NEXT_OFF( first ) = (U16)(last - first);
             DEBUG_r({
                 mjd_offset= Node_Offset((convert));
                 mjd_nodelen= Node_Length((convert));
             });
+            /* whole branch chain */
+        } else {
+            DEBUG_r({
+                const  regnode *nop = NEXTOPER( convert );
+                mjd_offset= Node_Offset((nop));
+                mjd_nodelen= Node_Length((nop));
+            });
         }
+        
         DEBUG_OPTIMISE_r(
             PerlIO_printf(Perl_debug_log, "%*sMJD offset:%"UVuf" MJD length:%"UVuf"\n",
                 (int)depth * 2 + 2, "",
@@ -1796,12 +1843,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
         trie->startstate= 1;
         if ( trie->bitmap && !trie->widecharmap && !trie->jump  ) {
             U32 state;
-            DEBUG_OPTIMISE_r(
-                PerlIO_printf(Perl_debug_log, "%*sLaststate:%"UVuf"\n",
-                    (int)depth * 2 + 2, "",
-                    (UV)trie->laststate)
-           );
-            for ( state = 1 ; state < trie->laststate-1 ; state++ ) {
+            for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
                 U32 ofs = 0;
                 I32 idx = -1;
                 U32 count = 0;
@@ -1889,9 +1931,12 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                     convert = n;
                } else {
                     NEXT_OFF(convert) = (U16)(tail - convert);
+                    DEBUG_r(optimize= n);
                 }
             }
         }
+        if (!jumper) 
+            jumper = last; 
         if ( trie->maxlen ) {
            NEXT_OFF( convert ) = (U16)(tail - convert);
            ARG_SET( convert, data_slot );
@@ -1899,9 +1944,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
               jump[0], which is otherwise unused by the jump logic. 
               We use this when dumping a trie and during optimisation. */
            if (trie->jump) 
-               trie->jump[0] = (U16)(tail - nextbranch);
-            if (!jumper) 
-                jumper = last; 
+               trie->jump[0] = (U16)(nextbranch - convert);
+            
             /* XXXX */
             if ( !trie->states[trie->startstate].wordnum && trie->bitmap && 
                  ( (char *)jumper - (char *)convert) >= (int)sizeof(struct regnode_charclass) )
@@ -1915,15 +1959,18 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
 
             /* store the type in the flags */
             convert->flags = nodetype;
-            /* XXX We really should free up the resource in trie now, as we wont use them */
+            DEBUG_r({
+            optimize = convert 
+                      + NODE_STEP_REGNODE 
+                      + regarglen[ OP( convert ) ];
+            });
+            /* XXX We really should free up the resource in trie now, 
+                   as we won't use them - (which resources?) dmq */
         }
         /* needed for dumping*/
-        DEBUG_r({
-            regnode *optimize = convert 
-                              + NODE_STEP_REGNODE 
-                              + regarglen[ OP( convert ) ];
+        DEBUG_r(if (optimize) {
             regnode *opt = convert;
-            while (++opt<optimize) {
+            while ( ++opt < optimize) {
                 Set_Node_Offset_Length(opt,0,0);
             }
             /* 
@@ -1973,7 +2020,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
     reg_trie_data *trie=(reg_trie_data *)RExC_rx->data->data[ARG(source)];
     U32 *q;
     const U32 ucharcount = trie->uniquecharcount;
-    const U32 numstates = trie->laststate;
+    const U32 numstates = trie->statecount;
     const U32 ubound = trie->lasttrans + ucharcount;
     U32 q_read = 0;
     U32 q_write = 0;
@@ -1993,7 +2040,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
     RExC_rx->data->data[ data_slot ] = (void*)aho;
     aho->trie=trie;
     aho->states=(reg_trie_state *)savepvn((const char*)trie->states,
-        (trie->laststate+1)*sizeof(reg_trie_state));
+        numstates * sizeof(reg_trie_state));
     Newxz( q, numstates, U32);
     Newxz( aho->fail, numstates, U32 );
     aho->refcount = 1;
@@ -2042,7 +2089,9 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
      */
     fail[ 0 ] = fail[ 1 ] = 0;
     DEBUG_TRIE_COMPILE_r({
-        PerlIO_printf(Perl_debug_log, "%*sStclass Failtable: 0", (int)(depth * 2), "");
+        PerlIO_printf(Perl_debug_log, "%*sStclass Failtable (%"UVuf" states): 0", 
+            (int)(depth * 2), "", numstates
+        );
         for( q_read=1; q_read<numstates; q_read++ ) {
             PerlIO_printf(Perl_debug_log, ", %"UVuf, (UV)fail[q_read]);
         }
@@ -2069,7 +2118,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
        SV * const mysv=sv_newmortal(); \
        regnode *Next = regnext(scan); \
        regprop(RExC_rx, mysv, scan); \
-       PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s [%d]\n", \
+       PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)\n", \
        (int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(mysv),\
        Next ? (REG_NODE_NUM(Next)) : 0 ); \
    });
@@ -2230,15 +2279,27 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, I32 *min, U32 flags
 /* Stops at toplevel WHILEM as well as at "last". At end *scanp is set
    to the position after last scanned or to NULL. */
 
-
+#define INIT_AND_WITHP \
+    assert(!and_withp); \
+    Newx(and_withp,1,struct regnode_charclass_class); \
+    SAVEFREEPV(and_withp)
 
 STATIC I32
-S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, 
+S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         I32 *minlenp, I32 *deltap,
-                       regnode *last, scan_data_t *data, U32 flags, U32 depth)
+                       regnode *last,
+                       scan_data_t *data,
+                       I32 stopparen,
+                       U8* recursed,
+                       struct regnode_charclass_class *and_withp,
+                       U32 flags, U32 depth)
                        /* scanp: Start here (read-write). */
                        /* deltap: Write maxlen-minlen here. */
                        /* last: Stop before this one. */
+                       /* data: string data about the pattern */
+                       /* stopparen: treat close N as END */
+                       /* recursed: which subroutines have we recursed into */
+                       /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
 {
     dVAR;
     I32 min = 0, pars = 0, code;
@@ -2248,17 +2309,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
     int is_inf_internal = 0;           /* The studied chunk is infinite */
     I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
     scan_data_t data_fake;
-    struct regnode_charclass_class and_with; /* Valid if flags & SCF_DO_STCLASS_OR */
     SV *re_trie_maxbuff = NULL;
     regnode *first_non_open = scan;
-
-
     GET_RE_DEBUG_FLAGS_DECL;
 #ifdef DEBUGGING
-    StructCopy(&zero_scan_data, &data_fake, scan_data_t);    
+    StructCopy(&zero_scan_data, &data_fake, scan_data_t);
 #endif
+
     if ( depth == 0 ) {
-        while (first_non_open && OP(first_non_open) == OPEN) 
+        while (first_non_open && OP(first_non_open) == OPEN)
             first_non_open=regnext(first_non_open);
     }
 
@@ -2321,7 +2380,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 
                    num++;
                    data_fake.flags = 0;
-                   if (data) {         
+                   if (data) {
                        data_fake.whilem_c = data->whilem_c;
                        data_fake.last_closep = data->last_closep;
                    }
@@ -2341,7 +2400,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 
                    /* we suppose the run is continuous, last=next...*/
                    minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
-                                         next, &data_fake, f,depth+1);
+                                         next, &data_fake,
+                                         stopparen, recursed, NULL, f,depth+1);
                    if (min1 > minnext)
                        min1 = minnext;
                    if (max1 < minnext + deltanext)
@@ -2374,7 +2434,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                if (flags & SCF_DO_STCLASS_OR) {
                    cl_or(pRExC_state, data->start_class, &accum);
                    if (min1) {
-                       cl_and(data->start_class, &and_with);
+                       cl_and(data->start_class, and_withp);
                        flags &= ~SCF_DO_STCLASS;
                    }
                }
@@ -2386,7 +2446,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    else {
                        /* Switch to OR mode: cache the old value of
                         * data->start_class */
-                       StructCopy(data->start_class, &and_with,
+                       INIT_AND_WITHP;
+                       StructCopy(data->start_class, and_withp,
                                   struct regnode_charclass_class);
                        flags &= ~SCF_DO_STCLASS_AND;
                        StructCopy(&accum, data->start_class,
@@ -2652,7 +2713,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                else
                    data->start_class->flags |= ANYOF_UNICODE_ALL;
                data->start_class->flags &= ~ANYOF_EOS;
-               cl_and(data->start_class, &and_with);
+               cl_and(data->start_class, and_withp);
            }
            flags &= ~SCF_DO_STCLASS;
        }
@@ -2700,7 +2761,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        ANYOF_BITMAP_SET(data->start_class, uc);
                    data->start_class->flags &= ~ANYOF_EOS;
                }
-               cl_and(data->start_class, &and_with);
+               cl_and(data->start_class, and_withp);
            }
            flags &= ~SCF_DO_STCLASS;
        }
@@ -2747,8 +2808,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                }
                goto optimize_curly_tail;
            case CURLY:
-               mincount = ARG1(scan);
-               maxcount = ARG2(scan);
+               if (stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM)
+                   && (scan->flags == stopparen))
+               {
+                   mincount = 1;
+                   maxcount = 1;
+               } else {
+                   mincount = ARG1(scan);
+                   maxcount = ARG2(scan);
+               }
                next = regnext(scan);
                if (OP(scan) == CURLYX) {
                    I32 lp = (data ? *(data->last_closep) : 0);
@@ -2784,7 +2852,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    f &= ~SCF_WHILEM_VISITED_POS;
 
                /* This will finish on WHILEM, setting scan, or on NULL: */
-               minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, last, data,
+               minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, 
+                                     last, data, stopparen, recursed, NULL,
                                      (mincount == 0
                                        ? (f & ~SCF_DO_SUBSTR) : f),depth+1);
 
@@ -2797,7 +2866,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    else if (flags & SCF_DO_STCLASS_AND) {
                        /* Switch to OR mode: cache the old value of
                         * data->start_class */
-                       StructCopy(data->start_class, &and_with,
+                       INIT_AND_WITHP;
+                       StructCopy(data->start_class, and_withp,
                                   struct regnode_charclass_class);
                        flags &= ~SCF_DO_STCLASS_AND;
                        StructCopy(&this_class, data->start_class,
@@ -2808,7 +2878,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                } else {                /* Non-zero len */
                    if (flags & SCF_DO_STCLASS_OR) {
                        cl_or(pRExC_state, data->start_class, &this_class);
-                       cl_and(data->start_class, &and_with);
+                       cl_and(data->start_class, and_withp);
                    }
                    else if (flags & SCF_DO_STCLASS_AND)
                        cl_and(data->start_class, &this_class);
@@ -2858,10 +2928,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    nxt = regnext(nxt);
                    if (OP(nxt) != CLOSE)
                        goto nogo;
+                   if (RExC_open_parens) {
+                       RExC_open_parens[ARG(nxt1)-1]=oscan; /*open->CURLYM*/
+                       RExC_close_parens[ARG(nxt1)-1]=nxt+2; /*close->while*/
+                   }
                    /* Now we know that nxt2 is the only contents: */
                    oscan->flags = (U8)ARG(nxt);
                    OP(oscan) = CURLYN;
                    OP(nxt1) = NOTHING; /* was OPEN. */
+
 #ifdef DEBUGGING
                    OP(nxt1 + 1) = OPTIMIZED; /* was count. */
                    NEXT_OFF(nxt1+ 1) = 0; /* just for consistancy. */
@@ -2898,8 +2973,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        if (OP(nxt) != CLOSE)
                            FAIL("Panic opt close");
                        oscan->flags = (U8)ARG(nxt);
+                       if (RExC_open_parens) {
+                           RExC_open_parens[ARG(nxt1)-1]=oscan; /*open->CURLYM*/
+                           RExC_close_parens[ARG(nxt1)-1]=nxt2+1; /*close->NOTHING*/
+                       }
                        OP(nxt1) = OPTIMIZED;   /* was OPEN. */
                        OP(nxt) = OPTIMIZED;    /* was CLOSE. */
+
 #ifdef DEBUGGING
                        OP(nxt1 + 1) = OPTIMIZED; /* was count. */
                        OP(nxt + 1) = OPTIMIZED; /* was count. */
@@ -2923,7 +3003,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 #endif
                        /* Optimize again: */
                        study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
-                                   NULL, 0,depth+1);
+                                   NULL, stopparen, recursed, NULL, 0,depth+1);
                    }
                    else
                        oscan->flags = 0;
@@ -3255,7 +3335,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    break;
                }
                if (flags & SCF_DO_STCLASS_OR)
-                   cl_and(data->start_class, &and_with);
+                   cl_and(data->start_class, and_withp);
                flags &= ~SCF_DO_STCLASS;
            }
        }
@@ -3297,7 +3377,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     f |= SCF_WHILEM_VISITED_POS;
                 next = regnext(scan);
                 nscan = NEXTOPER(NEXTOPER(scan));
-                minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext, last, &data_fake, f,depth+1);
+                minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext, 
+                    last, &data_fake, stopparen, recursed, NULL, f, depth+1);
                 if (scan->flags) {
                     if (deltanext) {
                         vFAIL("Variable length lookbehind not implemented");
@@ -3369,8 +3450,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     f |= SCF_WHILEM_VISITED_POS;
                 next = regnext(scan);
                 nscan = NEXTOPER(NEXTOPER(scan));
-                
-                *minnextp = study_chunk(pRExC_state, &nscan, minnextp, &deltanext, last, &data_fake, f,depth+1);
+
+                *minnextp = study_chunk(pRExC_state, &nscan, minnextp, &deltanext, 
+                    last, &data_fake, stopparen, recursed, NULL, f,depth+1);
                 if (scan->flags) {
                     if (deltanext) {
                         vFAIL("Variable length lookbehind not implemented");
@@ -3380,17 +3462,16 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     }
                     scan->flags = (U8)*minnextp;
                 }
-                
+
                 *minnextp += min;
-                
-                    
+
                 if (f & SCF_DO_STCLASS_AND) {
                     const int was = (data->start_class->flags & ANYOF_EOS);
 
                     cl_and(data->start_class, &intrnl);
                     if (was)
                         data->start_class->flags |= ANYOF_EOS;
-                }                
+                }
                 if (data) {
                     if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
                         pars++;
@@ -3424,9 +3505,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 #endif
        }
        else if (OP(scan) == OPEN) {
-           pars++;
+           if (stopparen != (I32)ARG(scan))
+               pars++;
        }
        else if (OP(scan) == CLOSE) {
+           if (stopparen == (I32)ARG(scan)) {
+               break;
+           }
            if ((I32)ARG(scan) == is_par) {
                next = regnext(scan);
 
@@ -3436,11 +3521,73 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
            if (data)
                *(data->last_closep) = ARG(scan);
        }
+       else if (OP(scan) == GOSUB || OP(scan) == GOSTART) {
+           /* set the pointer */
+           I32 paren;
+           regnode *start;
+           regnode *end;
+           if (OP(scan) == GOSUB) {
+               paren = ARG(scan);
+               RExC_recurse[ARG2L(scan)] = scan;
+                start = RExC_open_parens[paren-1];
+                end   = RExC_close_parens[paren-1];
+            } else {
+                paren = 0;
+                start = RExC_rx->program + 1;
+                end   = RExC_opend;
+            }
+            assert(start);
+            assert(end);
+           if (!recursed) {
+               Newxz(recursed, (((RExC_npar)>>3) +1), U8);
+               SAVEFREEPV(recursed);
+           }
+           if (!PAREN_TEST(recursed,paren+1)) {
+               I32 deltanext = 0;
+               PAREN_SET(recursed,paren+1);
+
+               DEBUG_PEEP("goto",start,depth);
+               min += study_chunk(
+                       pRExC_state,
+                       &start,
+                       minlenp,
+                       &deltanext,
+                       end+1,
+                       data,
+                       paren,
+                       recursed,
+                       and_withp,
+                       flags,depth+1);
+               delta+=deltanext;
+               if (deltanext == I32_MAX) {
+                   is_inf = is_inf_internal = 1;
+                   delta=deltanext;
+               }
+               DEBUG_PEEP("rtrn",end,depth);
+               PAREN_UNSET(recursed,paren+1);
+           } else {
+               if (flags & SCF_DO_SUBSTR) {
+                   scan_commit(pRExC_state,data,minlenp);
+                   data->longest = &(data->longest_float);
+               }
+               is_inf = is_inf_internal = 1;
+               if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
+                   cl_anything(pRExC_state, data->start_class);
+               flags &= ~SCF_DO_STCLASS;
+           }
+       }
        else if (OP(scan) == EVAL) {
                if (data)
                    data->flags |= SF_HAS_EVAL;
        }
-       else if (OP(scan) == LOGICAL && scan->flags == 2) { /* Embedded follows */
+       else if ( OP(scan)==OPFAIL ) {
+           if (flags & SCF_DO_SUBSTR) {
+               scan_commit(pRExC_state,data,minlenp);
+               flags &= ~SCF_DO_SUBSTR;
+           }
+       }
+       else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
+       {
                if (flags & SCF_DO_SUBSTR) {
                    scan_commit(pRExC_state,data,minlenp);
                    data->longest = &(data->longest_float);
@@ -3451,11 +3598,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                flags &= ~SCF_DO_STCLASS;
        }
 #ifdef TRIE_STUDY_OPT
-#ifdef FULL_TRIE_STUDY        
+#ifdef FULL_TRIE_STUDY
         else if (PL_regkind[OP(scan)] == TRIE) {
             /* NOTE - There is similar code to this block above for handling
-               BRANCH nodes on the initial study.  If you change stuff here 
+               BRANCH nodes on the initial study.  If you change stuff here
                check there too. */
+            regnode *trie_node= scan;
             regnode *tail= regnext(scan);
             reg_trie_data *trie = (reg_trie_data*)RExC_rx->data->data[ ARG(scan) ];
             I32 max1 = 0, min1 = I32_MAX;
@@ -3496,14 +3644,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
     
                     if (trie->jump[word]) {
                         if (!nextbranch)
-                            nextbranch = tail - trie->jump[0];
-                        scan= tail - trie->jump[word];
+                            nextbranch = trie_node + trie->jump[0];
+                        scan= trie_node + trie->jump[word];
                         /* We go from the jump point to the branch that follows
                            it. Note this means we need the vestigal unused branches
                            even though they arent otherwise used.
                          */
-                        minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
-                                              (regnode *)nextbranch, &data_fake, f,depth+1);
+                        minnext = study_chunk(pRExC_state, &scan, minlenp, 
+                            &deltanext, (regnode *)nextbranch, &data_fake, 
+                            stopparen, recursed, NULL, f,depth+1);
                     }
                     if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
                         nextbranch= regnext((regnode*)nextbranch);
@@ -3538,7 +3687,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             if (flags & SCF_DO_STCLASS_OR) {
                 cl_or(pRExC_state, data->start_class, &accum);
                 if (min1) {
-                    cl_and(data->start_class, &and_with);
+                    cl_and(data->start_class, and_withp);
                     flags &= ~SCF_DO_STCLASS;
                 }
             }
@@ -3550,7 +3699,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 else {
                     /* Switch to OR mode: cache the old value of
                      * data->start_class */
-                    StructCopy(data->start_class, &and_with,
+                   INIT_AND_WITHP;
+                    StructCopy(data->start_class, and_withp,
                                struct regnode_charclass_class);
                     flags &= ~SCF_DO_STCLASS_AND;
                     StructCopy(&accum, data->start_class,
@@ -3602,7 +3752,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
        data->flags &= ~SF_IN_PAR;
     }
     if (flags & SCF_DO_STCLASS_OR)
-       cl_and(data->start_class, &and_with);
+       cl_and(data->start_class, and_withp);
     if (flags & SCF_TRIE_RESTUDY)
         data->flags |=         SCF_TRIE_RESTUDY;
     
@@ -3615,10 +3765,11 @@ STATIC I32
 S_add_data(RExC_state_t *pRExC_state, I32 n, const char *s)
 {
     if (RExC_rx->data) {
+       const U32 count = RExC_rx->data->count;
        Renewc(RExC_rx->data,
-              sizeof(*RExC_rx->data) + sizeof(void*) * (RExC_rx->data->count + n - 1),
+              sizeof(*RExC_rx->data) + sizeof(void*) * (count + n - 1),
               char, struct reg_data);
-       Renew(RExC_rx->data->what, RExC_rx->data->count + n, U8);
+       Renew(RExC_rx->data->what, count + n, U8);
        RExC_rx->data->count += n;
     }
     else {
@@ -3685,14 +3836,17 @@ Perl_reginitcolors(pTHX)
  * Beware that the optimization-preparation code in here knows about some
  * of the structure of the compiled regexp.  [I'll say.]
  */
+
+
+
 #ifndef PERL_IN_XSUB_RE
-#define CORE_ONLY_BLOCK(c) {c}{
 #define RE_ENGINE_PTR &PL_core_reg_engine
 #else
-#define CORE_ONLY_BLOCK(c) {
 extern const struct regexp_engine my_reg_engine;
 #define RE_ENGINE_PTR &my_reg_engine
 #endif
+/* these make a few things look better, to avoid indentation */
+#define BEGIN_BLOCK {
 #define END_BLOCK }
  
 regexp *
@@ -3701,21 +3855,25 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     dVAR;
     GET_RE_DEBUG_FLAGS_DECL;
     DEBUG_r(if (!PL_colorset) reginitcolors());
-    CORE_ONLY_BLOCK(
+#ifndef PERL_IN_XSUB_RE
+    BEGIN_BLOCK
     /* Dispatch a request to compile a regexp to correct 
        regexp engine. */
     HV * const table = GvHV(PL_hintgv);
     if (table) {
         SV **ptr= hv_fetchs(table, "regcomp", FALSE);
-        if (ptr && SvIOK(*ptr)) {
+        if (ptr && SvIOK(*ptr) && SvIV(*ptr)) {
             const regexp_engine *eng=INT2PTR(regexp_engine*,SvIV(*ptr));
             DEBUG_COMPILE_r({
                 PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
                     SvIV(*ptr));
             });            
-            return CALL_FPTR((eng->regcomp))(aTHX_ exp, xend, pm);
+            return CALLREGCOMP_ENG(eng, exp, xend, pm);
         } 
-    })
+    }
+    END_BLOCK
+#endif
+    BEGIN_BLOCK    
     register regexp *r;
     regnode *scan;
     regnode *first;
@@ -3761,7 +3919,13 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     RExC_emit = &PL_regdummy;
     RExC_whilem_seen = 0;
     RExC_charnames = NULL;
-    
+    RExC_open_parens = NULL;
+    RExC_close_parens = NULL;
+    RExC_opend = NULL;
+    RExC_paren_names = NULL;
+    RExC_recurse = NULL;
+    RExC_recurse_count = 0;
+
 #if 0 /* REGC() is (currently) a NOP at the first pass.
        * Clever compilers notice this and complain. --jhi */
     REGC((U8)REG_MAGIC, (char*)RExC_emit);
@@ -3771,15 +3935,14 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        RExC_precomp = NULL;
        return(NULL);
     }
-    DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "Required "));
-    DEBUG_COMPILE_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" nodes ", (IV)RExC_size));
-    DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "\nStarting second pass (creation)\n"));
     DEBUG_PARSE_r({
+        PerlIO_printf(Perl_debug_log, 
+            "Required size %"IVdf" nodes\n"
+            "Starting second pass (creation)\n", 
+            (IV)RExC_size);
         RExC_lastnum=0; 
         RExC_lastparse=NULL; 
     });
-
-    
     /* Small enough for pointer-storage convention?
        If extralen==0, this means that we will not need long jumps. */
     if (RExC_size >= 0x10000L && RExC_extralen)
@@ -3815,8 +3978,17 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
 
     r->substrs = 0;                    /* Useful during FAIL. */
     r->startp = 0;                     /* Useful during FAIL. */
-    r->endp = 0;                       /* Useful during FAIL. */
+    r->endp = 0;                       
+    r->paren_names = 0;
+    
+    if (RExC_seen & REG_SEEN_RECURSE) {
+        Newxz(RExC_open_parens, RExC_npar,regnode *);
+        SAVEFREEPV(RExC_open_parens);
+        Newxz(RExC_close_parens,RExC_npar,regnode *);
+        SAVEFREEPV(RExC_close_parens);
+    }
 
+    /* Useful during FAIL. */
     Newxz(r->offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
     if (r->offsets) {
        r->offsets[0] = RExC_size;
@@ -3842,14 +4014,18 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     r->data = 0;
     if (reg(pRExC_state, 0, &flags,1) == NULL)
        return(NULL);
+
     /* XXXX To minimize changes to RE engine we always allocate
        3-units-long substrs field. */
     Newx(r->substrs, 1, struct reg_substr_data);
+    if (RExC_recurse_count) {
+        Newxz(RExC_recurse,RExC_recurse_count,regnode *);
+        SAVEFREEPV(RExC_recurse);
+    }
 
 reStudy:
     r->minlen = minlen = sawplus = sawopen = 0;
     Zero(r->substrs, 1, struct reg_substr_data);
-    StructCopy(&zero_scan_data, &data, scan_data_t);
 
 #ifdef TRIE_STUDY_OPT
     if ( restudied ) {
@@ -3860,9 +4036,13 @@ reStudy:
            SvREFCNT_dec(data.longest_float);
            SvREFCNT_dec(data.last_found);
        }
+       StructCopy(&zero_scan_data, &data, scan_data_t);
     } else {
+        StructCopy(&zero_scan_data, &data, scan_data_t);
         copyRExC_state=RExC_state;
     }
+#else
+    StructCopy(&zero_scan_data, &data, scan_data_t);
 #endif    
 
     /* Dig out information for optimizations. */
@@ -3877,7 +4057,7 @@ reStudy:
 
     /* testing for BRANCH here tells us whether there is "must appear"
        data in the pattern. If there is then we can use it for optimisations */
-    if (OP(scan) != BRANCH) {  /* Only one top-level choice. */
+    if (!(RExC_seen & REG_TOP_LEVEL_BRANCHES)) { /*  Only one top-level choice. */
        I32 fake;
        STRLEN longest_float_length, longest_fixed_length;
        struct regnode_charclass_class ch_class; /* pointed to by data */
@@ -3979,13 +4159,13 @@ reStudy:
 
        /* Scan is after the zeroth branch, first is atomic matcher. */
 #ifdef TRIE_STUDY_OPT
-       DEBUG_COMPILE_r(
+       DEBUG_PARSE_r(
            if (!restudied)
                PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
                              (IV)(first - scan + 1))
         );
 #else
-       DEBUG_COMPILE_r(
+       DEBUG_PARSE_r(
            PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
                (IV)(first - scan + 1))
         );
@@ -4019,7 +4199,8 @@ reStudy:
        data.last_closep = &last_close;
 
        minlen = study_chunk(pRExC_state, &first, &minlen, &fake, scan + RExC_size, /* Up to end */
-                            &data, SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag,0);
+            &data, -1, NULL, NULL,
+            SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag,0);
 
        
         CHECK_RESTUDY_GOTO;
@@ -4195,7 +4376,7 @@ reStudy:
        data.last_closep = &last_close;
 
        minlen = study_chunk(pRExC_state, &scan, &minlen, &fake, scan + RExC_size,
-           &data, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS,0);
+           &data, -1, NULL, NULL, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS,0);
 
         CHECK_RESTUDY_GOTO;
 
@@ -4234,13 +4415,20 @@ reStudy:
        r->reganch |= ROPT_EVAL_SEEN;
     if (RExC_seen & REG_SEEN_CANY)
        r->reganch |= ROPT_CANY_SEEN;
+    if (RExC_paren_names)
+        r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names);
+    else
+        r->paren_names = NULL;
+               
+    if (RExC_recurse_count) {
+        for ( ; RExC_recurse_count ; RExC_recurse_count-- ) {
+            const regnode *scan = RExC_recurse[RExC_recurse_count-1];
+            ARG2L_SET( scan, RExC_open_parens[ARG(scan)-1] - scan );
+        }
+    }
     Newxz(r->startp, RExC_npar, I32);
     Newxz(r->endp, RExC_npar, I32);
     
-    
-    if (RExC_charnames) 
-        SvREFCNT_dec((SV*)(RExC_charnames));
-
     DEBUG_r( RX_DEBUG_on(r) );
     DEBUG_DUMP_r({
         PerlIO_printf(Perl_debug_log,"Final program:\n");
@@ -4266,6 +4454,96 @@ reStudy:
 #undef END_BLOCK
 #undef RE_ENGINE_PTR
 
+#ifndef PERL_IN_XSUB_RE
+SV*
+Perl_reg_named_buff_sv(pTHX_ SV* namesv)
+{
+    I32 parno = 0; /* no match */
+    if (PL_curpm) {
+        const REGEXP * const rx = PM_GETRE(PL_curpm);
+        if (rx && rx->paren_names) {            
+            HE *he_str = hv_fetch_ent( rx->paren_names, namesv, 0, 0 );
+            if (he_str) {
+                IV i;
+                SV* sv_dat=HeVAL(he_str);
+                I32 *nums=(I32*)SvPVX(sv_dat);
+                for ( i=0; i<SvIVX(sv_dat); i++ ) {
+                    if ((I32)(rx->lastparen) >= nums[i] &&
+                        rx->endp[nums[i]] != -1) 
+                    {
+                        parno = nums[i];
+                        break;
+                    }
+                }
+            }
+        }
+    }
+    if ( !parno ) {
+        return 0;
+    } else {
+        GV *gv_paren;
+        SV *sv= sv_newmortal();
+        Perl_sv_setpvf(aTHX_ sv, "%"IVdf,(IV)parno);
+        gv_paren= Perl_gv_fetchsv(aTHX_ sv, GV_ADD, SVt_PVGV);
+        return GvSVn(gv_paren);
+    }
+}
+#endif
+
+/* Scans the name of a named buffer from the pattern.
+ * If flags is REG_RSN_RETURN_NULL returns null.
+ * If flags is REG_RSN_RETURN_NAME returns an SV* containing the name
+ * If flags is REG_RSN_RETURN_DATA returns the data SV* corresponding
+ * to the parsed name as looked up in the RExC_paren_names hash.
+ * If there is an error throws a vFAIL().. type exception.
+ */
+
+#define REG_RSN_RETURN_NULL    0
+#define REG_RSN_RETURN_NAME    1
+#define REG_RSN_RETURN_DATA    2
+
+STATIC SV*
+S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) {
+    char *name_start = RExC_parse;
+    if ( UTF ) {
+       STRLEN numlen;
+        while( isIDFIRST_uni(utf8n_to_uvchr((U8*)RExC_parse,
+            RExC_end - RExC_parse, &numlen, UTF8_ALLOW_DEFAULT)))
+        {
+                RExC_parse += numlen;
+        }
+    } else {
+        while( isIDFIRST(*RExC_parse) )
+           RExC_parse++;
+    }
+    if ( flags ) {
+        SV* sv_name = sv_2mortal(Perl_newSVpvn(aTHX_ name_start,
+            (int)(RExC_parse - name_start)));
+       if (UTF)
+            SvUTF8_on(sv_name);
+        if ( flags == REG_RSN_RETURN_NAME)
+            return sv_name;
+        else if (flags==REG_RSN_RETURN_DATA) {
+            HE *he_str = NULL;
+            SV *sv_dat = NULL;
+            if ( ! sv_name )      /* should not happen*/
+                Perl_croak(aTHX_ "panic: no svname in reg_scan_name");
+            if (RExC_paren_names)
+                he_str = hv_fetch_ent( RExC_paren_names, sv_name, 0, 0 );
+            if ( he_str )
+                sv_dat = HeVAL(he_str);
+            if ( ! sv_dat )
+                vFAIL("Reference to nonexistent named group");
+            return sv_dat;
+        }
+        else {
+            Perl_croak(aTHX_ "panic: bad flag in reg_scan_name");
+        }
+        /* NOT REACHED */
+    }
+    return NULL;
+}
+
 #define DEBUG_PARSE_MSG(funcname)     DEBUG_PARSE_r({           \
     int rem=(int)(RExC_end - RExC_parse);                       \
     int cut;                                                    \
@@ -4290,9 +4568,9 @@ reStudy:
     else                                                        \
        num=REG_NODE_NUM(RExC_emit);                             \
     if (RExC_lastnum!=num)                                      \
-       PerlIO_printf(Perl_debug_log,"|%4d",num);                 \
+       PerlIO_printf(Perl_debug_log,"|%4d",num);                \
     else                                                        \
-       PerlIO_printf(Perl_debug_log,"|%4s","");                  \
+       PerlIO_printf(Perl_debug_log,"|%4s","");                 \
     PerlIO_printf(Perl_debug_log,"|%*s%-4s",                    \
         (int)((depth*2)), "",                                   \
         (funcname)                                              \
@@ -4307,6 +4585,10 @@ reStudy:
     DEBUG_PARSE_MSG((funcname));                            \
     PerlIO_printf(Perl_debug_log,"%4s","\n");               \
 })
+#define DEBUG_PARSE_FMT(funcname,fmt,args)     DEBUG_PARSE_r({           \
+    DEBUG_PARSE_MSG((funcname));                            \
+    PerlIO_printf(Perl_debug_log,fmt "\n",args);               \
+})
 /*
  - reg - regular expression, i.e. main body or parenthesized thing
  *
@@ -4369,19 +4651,122 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            paren = *RExC_parse++;
            ret = NULL;                 /* For look-ahead/behind. */
            switch (paren) {
+
            case '<':           /* (?<...) */
-               RExC_seen |= REG_SEEN_LOOKBEHIND;
                if (*RExC_parse == '!')
                    paren = ',';
-               if (*RExC_parse != '=' && *RExC_parse != '!')
-                   goto unknown;
+               else if (*RExC_parse != '=') 
+               {               /* (?<...>) */
+                   char *name_start;
+                   SV *svname;
+                   paren= '>';
+            case '\'':          /* (?'...') */
+                   name_start= RExC_parse;
+                   svname = reg_scan_name(pRExC_state,
+                       SIZE_ONLY ?  /* reverse test from the others */
+                       REG_RSN_RETURN_NAME : 
+                       REG_RSN_RETURN_NULL);
+                   if (RExC_parse == name_start)
+                       goto unknown;
+                   if (*RExC_parse != paren)
+                       vFAIL2("Sequence (?%c... not terminated",
+                           paren=='>' ? '<' : paren);
+                   if (SIZE_ONLY) {
+                       HE *he_str;
+                       SV *sv_dat = NULL;
+                        if (!svname) /* shouldnt happen */
+                            Perl_croak(aTHX_
+                                "panic: reg_scan_name returned NULL");
+                        if (!RExC_paren_names) {
+                            RExC_paren_names= newHV();
+                            sv_2mortal((SV*)RExC_paren_names);
+                        }
+                        he_str = hv_fetch_ent( RExC_paren_names, svname, 1, 0 );
+                        if ( he_str )
+                            sv_dat = HeVAL(he_str);
+                        if ( ! sv_dat ) {
+                            /* croak baby croak */
+                            Perl_croak(aTHX_
+                                "panic: paren_name hash element allocation failed");
+                        } else if ( SvPOK(sv_dat) ) {
+                            IV count=SvIV(sv_dat);
+                            I32 *pv=(I32*)SvGROW(sv_dat,SvCUR(sv_dat)+sizeof(I32)+1);
+                            SvCUR_set(sv_dat,SvCUR(sv_dat)+sizeof(I32));
+                            pv[count]=RExC_npar;
+                            SvIVX(sv_dat)++;
+                        } else {
+                            (void)SvUPGRADE(sv_dat,SVt_PVNV);
+                            sv_setpvn(sv_dat, (char *)&(RExC_npar), sizeof(I32));
+                            SvIOK_on(sv_dat);
+                            SvIVX(sv_dat)= 1;
+                        }
+
+                        /*sv_dump(sv_dat);*/
+                    }
+                    nextchar(pRExC_state);
+                   paren = 1;
+                   goto capturing_parens;
+               }
+                RExC_seen |= REG_SEEN_LOOKBEHIND;
                RExC_parse++;
            case '=':           /* (?=...) */
            case '!':           /* (?!...) */
+               if (*RExC_parse == ')')
+                   goto do_op_fail;
                RExC_seen_zerolen++;
            case ':':           /* (?:...) */
            case '>':           /* (?>...) */
                break;
+            case 'C':           /* (?CUT) and (?COMMIT) */
+               if (RExC_parse[0] == 'O' &&
+                   RExC_parse[1] == 'M' &&
+                   RExC_parse[2] == 'M' &&
+                   RExC_parse[3] == 'I' &&
+                   RExC_parse[4] == 'T' &&
+                   RExC_parse[5] == ')')
+               {
+                   RExC_parse+=5;
+                   ret = reg_node(pRExC_state, COMMIT);
+                } else if (
+                    RExC_parse[0] == 'U' &&
+                    RExC_parse[1] == 'T' &&
+                    RExC_parse[2] == ')') 
+                {
+                    RExC_parse+=2;
+                    ret = reg_node(pRExC_state, CUT);
+               } else {
+                   vFAIL("Sequence (?C... not terminated");
+               }
+               nextchar(pRExC_state);
+               return ret;
+               break;
+            case 'E':            /* (?ERROR) */
+                if (RExC_parse[0] == 'R' &&
+                    RExC_parse[1] == 'R' &&
+                    RExC_parse[2] == 'O' &&
+                    RExC_parse[3] == 'R' &&
+                    RExC_parse[4] == ')') 
+                {
+                    RExC_parse+=4;
+                    ret = reg_node(pRExC_state, OPERROR);
+                } else {
+                    vFAIL("Sequence (?E... not terminated"); 
+                }
+               nextchar(pRExC_state);
+               return ret;
+                break;                
+            case 'F':
+                if (RExC_parse[0] == 'A' &&
+                    RExC_parse[1] == 'I' &&
+                    RExC_parse[2] == 'L')
+                    RExC_parse+=3;
+                if (*RExC_parse != ')')
+                   vFAIL("Sequence (?FAIL) or (?F) not terminated");
+             do_op_fail:
+               ret = reg_node(pRExC_state, OPFAIL);
+               nextchar(pRExC_state);
+               return ret;
+               break;
            case '$':           /* (?$...) */
            case '@':           /* (?@...) */
                vFAIL2("Sequence (?%c...) not implemented", (int)paren);
@@ -4394,6 +4779,59 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                nextchar(pRExC_state);
                *flagp = TRYAGAIN;
                return NULL;
+           case '0' :           /* (?0) */
+           case 'R' :           /* (?R) */
+               if (*RExC_parse != ')')
+                   FAIL("Sequence (?R) not terminated");
+               ret = reg_node(pRExC_state, GOSTART);
+               nextchar(pRExC_state);
+               return ret;
+               /*notreached*/
+            { /* named and numeric backreferences */
+                I32 num;
+                char * parse_start;
+            case '&':            /* (?&NAME) */
+                parse_start = RExC_parse - 1;
+                {
+                   SV *sv_dat = reg_scan_name(pRExC_state,
+                       SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
+                    num = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
+                }
+                goto gen_recurse_regop;
+                /* NOT REACHED */
+            case '1': case '2': case '3': case '4': /* (?1) */
+           case '5': case '6': case '7': case '8': case '9':
+               RExC_parse--;
+               num = atoi(RExC_parse);
+               parse_start = RExC_parse - 1; /* MJD */
+               while (isDIGIT(*RExC_parse))
+                       RExC_parse++;
+               if (*RExC_parse!=')') 
+                   vFAIL("Expecting close bracket");
+                       
+              gen_recurse_regop:
+                ret = reganode(pRExC_state, GOSUB, num);
+                if (!SIZE_ONLY) {
+                   if (num > (I32)RExC_rx->nparens) {
+                       RExC_parse++;
+                       vFAIL("Reference to nonexistent group");
+                   }
+                   ARG2L_SET( ret, RExC_recurse_count++);
+                    RExC_emit++;
+                   DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
+                       "Recurse #%"UVuf" to %"IVdf"\n", (UV)ARG(ret), (IV)ARG2L(ret)));
+               } else {
+                   RExC_size++;
+               }
+               RExC_seen |= REG_SEEN_RECURSE;
+                Set_Node_Length(ret, 1 + regarglen[OP(ret)]); /* MJD */
+               Set_Node_Offset(ret, parse_start); /* MJD */
+
+                nextchar(pRExC_state);
+                return ret;
+            } /* named and numeric backreferences */
+            /* NOT REACHED */
+
            case 'p':           /* (?p...) */
                if (SIZE_ONLY && ckWARN2(WARN_DEPRECATED, WARN_REGEXP))
                    vWARNdep(RExC_parse, "(?p{}) is deprecated - use (??{})");
@@ -4476,6 +4914,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            }
            case '(':           /* (?(?{...})...) and (?(?=...)...) */
            {
+               int is_define= 0;
                if (RExC_parse[0] == '?') {        /* (?(?...)) */
                    if (RExC_parse[1] == '=' || RExC_parse[1] == '!'
                        || RExC_parse[1] == '<'
@@ -4489,6 +4928,55 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                        goto insert_if;
                    }
                }
+               else if ( RExC_parse[0] == '<'     /* (?(<NAME>)...) */
+                        || RExC_parse[0] == '\'' ) /* (?('NAME')...) */
+               {
+                   char ch = RExC_parse[0] == '<' ? '>' : '\'';
+                   char *name_start= RExC_parse++;
+                   I32 num = 0;
+                   SV *sv_dat=reg_scan_name(pRExC_state,
+                       SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
+                   if (RExC_parse == name_start || *RExC_parse != ch)
+                        vFAIL2("Sequence (?(%c... not terminated",
+                            (ch == '>' ? '<' : ch));
+                    RExC_parse++;
+                   if (!SIZE_ONLY) {
+                        num = add_data( pRExC_state, 1, "S" );
+                        RExC_rx->data->data[num]=(void*)sv_dat;
+                        SvREFCNT_inc(sv_dat);
+                    }
+                    ret = reganode(pRExC_state,NGROUPP,num);
+                    goto insert_if_check_paren;
+               }
+               else if (RExC_parse[0] == 'D' &&
+                        RExC_parse[1] == 'E' &&
+                        RExC_parse[2] == 'F' &&
+                        RExC_parse[3] == 'I' &&
+                        RExC_parse[4] == 'N' &&
+                        RExC_parse[5] == 'E')
+               {
+                   ret = reganode(pRExC_state,DEFINEP,0);
+                   RExC_parse +=6 ;
+                   is_define = 1;
+                   goto insert_if_check_paren;
+               }
+               else if (RExC_parse[0] == 'R') {
+                   RExC_parse++;
+                   parno = 0;
+                   if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
+                       parno = atoi(RExC_parse++);
+                       while (isDIGIT(*RExC_parse))
+                           RExC_parse++;
+                   } else if (RExC_parse[0] == '&') {
+                       SV *sv_dat;
+                       RExC_parse++;
+                       sv_dat = reg_scan_name(pRExC_state,
+                           SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
+                       parno = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
+                   }
+                   ret = reganode(pRExC_state,INSUBP,parno); 
+                   goto insert_if_check_paren;
+               }
                else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
                     /* (?(1)...) */
                    char c;
@@ -4498,6 +4986,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                        RExC_parse++;
                     ret = reganode(pRExC_state, GROUPP, parno);
 
+                 insert_if_check_paren:
                    if ((c = *nextchar(pRExC_state)) != ')')
                        vFAIL("Switch condition not recognized");
                  insert_if:
@@ -4511,6 +5000,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                    if (flags&HASWIDTH)
                        *flagp |= HASWIDTH;
                    if (c == '|') {
+                       if (is_define) 
+                           vFAIL("(?(DEFINE)....) does not allow branches");
                        lastbr = reganode(pRExC_state, IFTHEN, 0); /* Fake one for optimizer. */
                         regbranch(pRExC_state, &flags, 1,depth+1);
                         REGTAIL(pRExC_state, ret, lastbr);
@@ -4604,9 +5095,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            }
        }
        else {                  /* (...) */
+         capturing_parens:
            parno = RExC_npar;
            RExC_npar++;
            ret = reganode(pRExC_state, OPEN, parno);
+           if (!SIZE_ONLY && RExC_seen & REG_SEEN_RECURSE) {
+               DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
+                       "Setting open paren #%"IVdf" to %d\n", 
+                       (IV)parno, REG_NODE_NUM(ret)));
+               RExC_open_parens[parno-1]= ret;
+           }
             Set_Node_Length(ret, 1); /* MJD */
             Set_Node_Offset(ret, RExC_parse); /* MJD */
            is_open = 1;
@@ -4624,10 +5122,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
        return(NULL);
     if (*RExC_parse == '|') {
        if (!SIZE_ONLY && RExC_extralen) {
-           reginsert(pRExC_state, BRANCHJ, br);
+           reginsert(pRExC_state, BRANCHJ, br, depth+1);
        }
        else {                  /* MJD */
-           reginsert(pRExC_state, BRANCH, br);
+           reginsert(pRExC_state, BRANCH, br, depth+1);
             Set_Node_Length(br, paren != 0);
             Set_Node_Offset_To_R(br-RExC_emit_start, parse_start-RExC_start);
         }
@@ -4672,6 +5170,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            break;
        case 1:
            ender = reganode(pRExC_state, CLOSE, parno);
+           if (!SIZE_ONLY && RExC_seen & REG_SEEN_RECURSE) {
+               DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
+                       "Setting close paren #%"IVdf" to %d\n", 
+                       (IV)parno, REG_NODE_NUM(ender)));
+               RExC_close_parens[parno-1]= ender;
+           }       
             Set_Node_Offset(ender,RExC_parse+1); /* MJD */
             Set_Node_Length(ender,1); /* MJD */
            break;
@@ -4686,11 +5190,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            break;
        case 0:
            ender = reg_node(pRExC_state, END);
+           if (!SIZE_ONLY) {
+                assert(!RExC_opend); /* there can only be one! */
+                RExC_opend = ender;
+            }
            break;
        }
-        REGTAIL_STUDY(pRExC_state, lastbr, ender);
+        REGTAIL(pRExC_state, lastbr, ender);
 
        if (have_branch && !SIZE_ONLY) {
+           if (depth==1)
+               RExC_seen |= REG_TOP_LEVEL_BRANCHES;
+
            /* Hook the tails of the branches to the closing node. */
            for (br = ret; br; br = regnext(br)) {
                const U8 op = PL_regkind[OP(br)];
@@ -4714,7 +5225,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 
            if (paren == '>')
                node = SUSPEND, flag = 0;
-           reginsert(pRExC_state, node,ret);
+           reginsert(pRExC_state, node,ret, depth+1);
            Set_Node_Cur_Length(ret);
            Set_Node_Offset(ret, parse_start + 1);
            ret->flags = flag;
@@ -4875,7 +5386,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        do_curly:
            if ((flags&SIMPLE)) {
                RExC_naughty += 2 + RExC_naughty / 2;
-               reginsert(pRExC_state, CURLY, ret);
+               reginsert(pRExC_state, CURLY, ret, depth+1);
                 Set_Node_Offset(ret, parse_start+1); /* MJD */
                 Set_Node_Cur_Length(ret);
            }
@@ -4885,11 +5396,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                w->flags = 0;
                 REGTAIL(pRExC_state, ret, w);
                if (!SIZE_ONLY && RExC_extralen) {
-                   reginsert(pRExC_state, LONGJMP,ret);
-                   reginsert(pRExC_state, NOTHING,ret);
+                   reginsert(pRExC_state, LONGJMP,ret, depth+1);
+                   reginsert(pRExC_state, NOTHING,ret, depth+1);
                    NEXT_OFF(ret) = 3;  /* Go over LONGJMP. */
                }
-               reginsert(pRExC_state, CURLYX,ret);
+               reginsert(pRExC_state, CURLYX,ret, depth+1);
                                 /* MJD hk */
                 Set_Node_Offset(ret, parse_start+1);
                 Set_Node_Length(ret,
@@ -4946,7 +5457,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     *flagp = (op != '+') ? (WORST|SPSTART|HASWIDTH) : (WORST|HASWIDTH);
 
     if (op == '*' && (flags&SIMPLE)) {
-       reginsert(pRExC_state, STAR, ret);
+       reginsert(pRExC_state, STAR, ret, depth+1);
        ret->flags = 0;
        RExC_naughty += 4;
     }
@@ -4955,7 +5466,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        goto do_curly;
     }
     else if (op == '+' && (flags&SIMPLE)) {
-       reginsert(pRExC_state, PLUS, ret);
+       reginsert(pRExC_state, PLUS, ret, depth+1);
        ret->flags = 0;
        RExC_naughty += 3;
     }
@@ -4975,12 +5486,27 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
               origparse);
     }
 
-    if (*RExC_parse == '?') {
+    if (RExC_parse < RExC_end && *RExC_parse == '?') {
        nextchar(pRExC_state);
-       reginsert(pRExC_state, MINMOD, ret);
+       reginsert(pRExC_state, MINMOD, ret, depth+1);
         REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE);
     }
-    if (ISMULT2(RExC_parse)) {
+#ifndef REG_ALLOW_MINMOD_SUSPEND
+    else
+#endif
+    if (RExC_parse < RExC_end && *RExC_parse == '+') {
+        regnode *ender;
+        nextchar(pRExC_state);
+        ender = reg_node(pRExC_state, SUCCEED);
+        REGTAIL(pRExC_state, ret, ender);
+        reginsert(pRExC_state, SUSPEND, ret, depth+1);
+        ret->flags = 0;
+        ender = reg_node(pRExC_state, TAIL);
+        REGTAIL(pRExC_state, ret, ender);
+        /*ret= ender;*/
+    }
+
+    if (RExC_parse < RExC_end && ISMULT2(RExC_parse)) {
        RExC_parse++;
        vFAIL("Nested quantifiers");
     }
@@ -5093,6 +5619,7 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
         if (!RExC_charnames) {
             /* make sure our cache is allocated */
             RExC_charnames = newHV();
+            sv_2mortal((SV*)RExC_charnames);
         } 
             /* see if we have looked this one up before */
         he_str = hv_fetch_ent( RExC_charnames, sv_name, 0, 0 );
@@ -5500,6 +6027,47 @@ tryagain:
             ++RExC_parse;
             ret= reg_namedseq(pRExC_state, NULL); 
             break;
+       case 'k':    /* Handle \k<NAME> and \k'NAME' */
+        {   
+            char ch= RExC_parse[1];        
+           if (ch != '<' && ch != '\'') {
+               if (SIZE_ONLY)
+                   vWARN( RExC_parse + 1, 
+                       "Possible broken named back reference treated as literal k");
+               parse_start--;
+               goto defchar;
+           } else {
+               char* name_start = (RExC_parse += 2);
+               I32 num = 0;
+                SV *sv_dat = reg_scan_name(pRExC_state,
+                    SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
+                ch= (ch == '<') ? '>' : '\'';
+                    
+                if (RExC_parse == name_start || *RExC_parse != ch)
+                    vFAIL2("Sequence \\k%c... not terminated",
+                        (ch == '>' ? '<' : ch));
+                
+                RExC_sawback = 1;
+                ret = reganode(pRExC_state,
+                          (U8)(FOLD ? (LOC ? NREFFL : NREFF) : NREF),
+                          num);
+                *flagp |= HASWIDTH;
+                
+               
+                if (!SIZE_ONLY) {
+                    num = add_data( pRExC_state, 1, "S" );
+                    ARG_SET(ret,num);
+                    RExC_rx->data->data[num]=(void*)sv_dat;
+                    SvREFCNT_inc(sv_dat);
+                }    
+                /* override incorrect value set in reganode MJD */
+                Set_Node_Offset(ret, parse_start+1);
+                Set_Node_Cur_Length(ret); /* MJD */
+                nextchar(pRExC_state);
+                              
+            }
+            break;
+        }            
        case 'n':
        case 'r':
        case 't':
@@ -6939,6 +7507,20 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
     if (SIZE_ONLY) {
        SIZE_ALIGN(RExC_size);
        RExC_size += 2;
+       /* 
+          We can't do this:
+          
+          assert(2==regarglen[op]+1); 
+       
+          Anything larger than this has to allocate the extra amount.
+          If we changed this to be:
+          
+          RExC_size += (1 + regarglen[op]);
+          
+          then it wouldn't matter. Its not clear what side effect
+          might come from that so its not done so far.
+          -- dmq
+       */
        return(ret);
     }
 
@@ -6979,24 +7561,44 @@ S_reguni(pTHX_ const RExC_state_t *pRExC_state, UV uv, char* s)
 * Means relocating the operand.
 */
 STATIC void
-S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd)
+S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
 {
     dVAR;
     register regnode *src;
     register regnode *dst;
     register regnode *place;
     const int offset = regarglen[(U8)op];
+    const int size = NODE_STEP_REGNODE + offset;
     GET_RE_DEBUG_FLAGS_DECL;
 /* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
-
+    DEBUG_PARSE_FMT("inst"," - %s",reg_name[op]);
     if (SIZE_ONLY) {
-       RExC_size += NODE_STEP_REGNODE + offset;
+       RExC_size += size;
        return;
     }
 
     src = RExC_emit;
-    RExC_emit += NODE_STEP_REGNODE + offset;
+    RExC_emit += size;
     dst = RExC_emit;
+    if (RExC_open_parens) {
+        int paren;
+        DEBUG_PARSE_FMT("inst"," - %d",RExC_npar);
+        for ( paren=0 ; paren < RExC_npar ; paren++ ) {
+            if ( RExC_open_parens[paren] >= opnd ) {
+                DEBUG_PARSE_FMT("open"," - %d",size);
+                RExC_open_parens[paren] += size;
+            } else {
+                DEBUG_PARSE_FMT("open"," - %s","ok");
+            }
+            if ( RExC_close_parens[paren] >= opnd ) {
+                DEBUG_PARSE_FMT("close"," - %d",size);
+                RExC_close_parens[paren] += size;
+            } else {
+                DEBUG_PARSE_FMT("close"," - %s","ok");
+            }
+        }
+    }
+
     while (src > opnd) {
        StructCopy(--src, --dst, regnode);
         if (RExC_offsets) {     /* MJD 20010112 */
@@ -7060,8 +7662,11 @@ S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val,U32 de
             SV * const mysv=sv_newmortal();
             DEBUG_PARSE_MSG((scan==p ? "tail" : ""));
             regprop(RExC_rx, mysv, scan);
-            PerlIO_printf(Perl_debug_log, "~ %s (%d)\n",
-                SvPV_nolen_const(mysv), REG_NODE_NUM(scan));
+            PerlIO_printf(Perl_debug_log, "~ %s (%d) %s %s\n",
+                SvPV_nolen_const(mysv), REG_NODE_NUM(scan),
+                    (temp == NULL ? "->" : ""),
+                    (temp == NULL ? reg_name[OP(val)] : "")
+            );
         });
         if (temp == NULL)
             break;
@@ -7138,10 +7743,10 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val,
             SV * const mysv=sv_newmortal();
             DEBUG_PARSE_MSG((scan==p ? "tsdy" : ""));
             regprop(RExC_rx, mysv, scan);
-            PerlIO_printf(Perl_debug_log, "~ %s (%s) (%d)\n",
+            PerlIO_printf(Perl_debug_log, "~ %s (%d) -> %s\n",
                 SvPV_nolen_const(mysv),
-                reg_name[exact],
-                REG_NODE_NUM(scan));
+                REG_NODE_NUM(scan),
+                reg_name[exact]);
         });
        if (temp == NULL)
            break;
@@ -7294,7 +7899,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
     if (OP(o) > REGNODE_MAX)           /* regnode.type is unsigned */
        /* It would be nice to FAIL() here, but this may be called from
           regexec.c, and it would be hard to supply pRExC_state. */
-       Perl_croak(aTHX_ "Corrupted regexp opcode");
+       Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d", (int)OP(o), (int)REGNODE_MAX);
     sv_catpv(sv, reg_name[OP(o)]); /* Take off const! */
 
     k = PL_regkind[OP(o)];
@@ -7330,7 +7935,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
             Perl_sv_catpvf(aTHX_ sv,
                 "<S:%"UVuf"/%"IVdf" W:%"UVuf" L:%"UVuf"/%"UVuf" C:%"UVuf"/%"UVuf">",
                 (UV)trie->startstate,
-                (IV)trie->laststate-1,
+                (IV)trie->statecount-1, /* -1 because of the unused 0 element */
                 (UV)trie->wordcount,
                 (UV)trie->minlen,
                 (UV)trie->maxlen,
@@ -7369,8 +7974,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
     }
     else if (k == WHILEM && o->flags)                  /* Ordinal/of */
        Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
-    else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP )
+    else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP) 
        Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o));    /* Parenth number */
+    else if (k == GOSUB) 
+       Perl_sv_catpvf(aTHX_ sv, "%d[%+d]", (int)ARG(o),(int)ARG2L(o)); /* Paren and offset */
     else if (k == LOGICAL)
        Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);     /* 2: embedded, otherwise 1 */
     else if (k == ANYOF) {
@@ -7595,6 +8202,8 @@ Perl_pregfree(pTHX_ struct regexp *r)
            SvREFCNT_dec(r->float_utf8);
        Safefree(r->substrs);
     }
+    if (r->paren_names)
+            SvREFCNT_dec(r->paren_names);
     if (r->data) {
        int n = r->data->count;
        PAD* new_comppad = NULL;
@@ -7605,6 +8214,7 @@ Perl_pregfree(pTHX_ struct regexp *r)
           /* If you add a ->what type here, update the comment in regcomp.h */
            switch (r->data->what[n]) {
            case 's':
+           case 'S':
                SvREFCNT_dec((SV*)r->data->data[n]);
                break;
            case 'f':
@@ -7698,6 +8308,7 @@ Perl_pregfree(pTHX_ struct regexp *r)
 
 #define sv_dup_inc(s,t)        SvREFCNT_inc(sv_dup(s,t))
 #define av_dup_inc(s,t)        (AV*)SvREFCNT_inc(sv_dup((SV*)s,t))
+#define hv_dup_inc(s,t)        (HV*)SvREFCNT_inc(sv_dup((SV*)s,t))
 #define SAVEPVN(p,n)   ((p) ? savepvn(p,n) : NULL)
 
 /* 
@@ -7761,6 +8372,7 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param)
                /* legal options are one of: sfpont
                   see also regcomp.h and pregfree() */
            case 's':
+           case 'S':
                d->data[i] = sv_dup_inc((SV *)r->data->data[i], param);
                break;
            case 'p':
@@ -7825,6 +8437,8 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param)
     ret->sublen         = r->sublen;
 
     ret->engine         = r->engine;
+    
+    ret->paren_names    = hv_dup_inc(r->paren_names, param);
 
     if (RX_MATCH_COPIED(ret))
        ret->subbeg  = SAVEPVN(r->subbeg, r->sublen);
@@ -8046,6 +8660,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
            DUMPUNTIL(NEXTOPER(node), next);
        }
        else if ( PL_regkind[(U8)op]  == TRIE ) {
+           const regnode *this_trie = node;
            const char op = OP(node);
             const I32 n = ARG(node);
            const reg_ac_data * const ac = op>=AHOCORASICK ?
@@ -8066,18 +8681,19 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                            PL_colors[0], PL_colors[1],
                            (SvUTF8(*elem_ptr) ? PERL_PV_ESCAPE_UNI : 0) |
                            PERL_PV_PRETTY_ELIPSES    |
-                           PERL_PV_PRETTY_LTGT    
+                           PERL_PV_PRETTY_LTGT
                             )
                             : "???"
                 );
                 if (trie->jump) {
                     U16 dist= trie->jump[word_idx+1];
-                    PerlIO_printf(Perl_debug_log, "(%u)\n",(next - dist) - start);
+                   PerlIO_printf(Perl_debug_log, "(%u)\n",
+                        (dist ? this_trie + dist : next) - start);
                     if (dist) {
                         if (!nextbranch)
-                            nextbranch= next - trie->jump[0];
-                        DUMPUNTIL(next - dist, nextbranch);
-                    } 
+                            nextbranch= this_trie + trie->jump[0];    
+                       DUMPUNTIL(this_trie + dist, nextbranch);
+                    }
                     if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
                         nextbranch= regnext((regnode *)nextbranch);
                 } else {