Re: Fix \k<foo> preceded by literal
[p5sagit/p5-mst-13.2.git] / regcomp.c
index 3d1211a..30b0660 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -156,7 +156,6 @@ typedef struct RExC_state_t {
 #define RExC_seen      (pRExC_state->seen)
 #define RExC_size      (pRExC_state->size)
 #define RExC_npar      (pRExC_state->npar)
-#define RExC_cpar      (pRExC_state->cpar)
 #define RExC_nestroot   (pRExC_state->nestroot)
 #define RExC_extralen  (pRExC_state->extralen)
 #define RExC_seen_zerolen      (pRExC_state->seen_zerolen)
@@ -798,9 +797,9 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
 
 #ifdef DEBUGGING
 /*
-   dump_trie(trie,widecharmap)
-   dump_trie_interim_list(trie,widecharmap,next_alloc)
-   dump_trie_interim_table(trie,widecharmap,next_alloc)
+   dump_trie(trie,widecharmap,revcharmap)
+   dump_trie_interim_list(trie,widecharmap,revcharmap,next_alloc)
+   dump_trie_interim_table(trie,widecharmap,revcharmap,next_alloc)
 
    These routines dump out a trie in a somewhat readable format.
    The _interim_ variants are used for debugging the interim
@@ -818,7 +817,8 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
 */
  
 STATIC void
-S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, U32 depth)
+S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
+           AV *revcharmap, U32 depth)
 {
     U32 state;
     SV *sv=sv_newmortal();
@@ -831,7 +831,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, U32 depth)
         "Match","Base","Ofs" );
 
     for( state = 0 ; state < trie->uniquecharcount ; state++ ) {
-       SV ** const tmp = av_fetch( trie->revcharmap, state, 0);
+       SV ** const tmp = av_fetch( revcharmap, state, 0);
         if ( tmp ) {
             PerlIO_printf( Perl_debug_log, "%*s", 
                 colwidth,
@@ -900,7 +900,8 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, U32 depth)
 */
 STATIC void
 S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
-                        HV *widecharmap, U32 next_alloc, U32 depth)
+                        HV *widecharmap, AV *revcharmap, U32 next_alloc,
+                        U32 depth)
 {
     U32 state;
     SV *sv=sv_newmortal();
@@ -924,7 +925,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
             );
         }
         for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) {
-           SV ** const tmp = av_fetch( trie->revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0);
+           SV ** const tmp = av_fetch( revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0);
            if ( tmp ) {
                 PerlIO_printf( Perl_debug_log, "%*s:%3X=%4"UVXf" | ",
                     colwidth,
@@ -953,7 +954,8 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
 */
 STATIC void
 S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
-                         HV *widecharmap, U32 next_alloc, U32 depth)
+                         HV *widecharmap, AV *revcharmap, U32 next_alloc,
+                         U32 depth)
 {
     U32 state;
     U16 charid;
@@ -969,7 +971,7 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
     PerlIO_printf( Perl_debug_log, "%*sChar : ",(int)depth * 2 + 2,"" );
 
     for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
-       SV ** const tmp = av_fetch( trie->revcharmap, charid, 0);
+       SV ** const tmp = av_fetch( revcharmap, charid, 0);
         if ( tmp ) {
             PerlIO_printf( Perl_debug_log, "%*s", 
                 colwidth,
@@ -1136,7 +1138,7 @@ is the recommended Unicode-aware way of saying
        SV *tmp = newSVpvs("");                                            \
        if (UTF) SvUTF8_on(tmp);                                           \
        Perl_sv_catpvf( aTHX_ tmp, "%c", (int)uvc );                       \
-       av_push( TRIE_REVCHARMAP(trie), tmp );                             \
+       av_push( revcharmap, tmp );                                        \
     } STMT_END
 
 #define TRIE_READ_CHAR STMT_START {                                           \
@@ -1196,7 +1198,7 @@ is the recommended Unicode-aware way of saying
         else                                                    \
             tmp = newSVpvn( "", 0 );                            \
         if ( UTF ) SvUTF8_on( tmp );                            \
-        av_push( trie->words, tmp );                            \
+        av_push( trie_words, tmp );                             \
     });                                                         \
                                                                 \
     curword++;                                                  \
@@ -1249,6 +1251,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     /* first pass, loop through and scan words */
     reg_trie_data *trie;
     HV *widecharmap = NULL;
+    AV *revcharmap = newAV();
     regnode *cur;
     const U32 uniflags = UTF8_ALLOW_DEFAULT;
     STRLEN len = 0;
@@ -1267,15 +1270,17 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                          )
                      );
 
-    const U32 data_slot = add_data( pRExC_state, 2, "tu" );
-    SV *re_trie_maxbuff;
-#ifndef DEBUGGING
-    /* these are only used during construction but are useful during
-     * debugging so we store them in the struct when debugging.
+#ifdef DEBUGGING
+    const U32 data_slot = add_data( pRExC_state, 4, "tuuu" );
+    AV *trie_words = NULL;
+    /* along with revcharmap, this only used during construction but both are
+     * useful during debugging so we store them in the struct when debugging.
      */
+#else
+    const U32 data_slot = add_data( pRExC_state, 2, "tu" );
     STRLEN trie_charcount=0;
-    AV *trie_revcharmap;
 #endif
+    SV *re_trie_maxbuff;
     GET_RE_DEBUG_FLAGS_DECL;
 #ifndef DEBUGGING
     PERL_UNUSED_ARG(depth);
@@ -1290,9 +1295,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     if (!(UTF && folder))
        trie->bitmap = PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
     DEBUG_r({
-        trie->words = newAV();
+        trie_words = newAV();
     });
-    TRIE_REVCHARMAP(trie) = newAV();
 
     re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
     if (!SvIOK(re_trie_maxbuff)) {
@@ -1513,8 +1517,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                                              * sizeof(reg_trie_state) );
 
         /* and now dump it out before we compress it */
-        DEBUG_TRIE_COMPILE_MORE_r(
-            dump_trie_interim_list(trie,widecharmap,next_alloc,depth+1)
+        DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_list(trie, widecharmap,
+                                                        revcharmap, next_alloc,
+                                                        depth+1)
         );
 
         trie->trans
@@ -1687,9 +1692,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
         } /* end second pass */
 
         /* and now dump it out before we compress it */
-        DEBUG_TRIE_COMPILE_MORE_r(
-            dump_trie_interim_table(trie,widecharmap,next_alloc,depth+1)
-        );
+        DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_table(trie, widecharmap,
+                                                         revcharmap,
+                                                         next_alloc, depth+1));
 
         {
         /*
@@ -1818,9 +1823,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                                         * sizeof(reg_trie_trans) );
 
     /* and now dump out the compressed format */
-    DEBUG_TRIE_COMPILE_r(
-        dump_trie(trie,widecharmap,depth+1)
-    );
+    DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1));
 
     {   /* Modify the program and insert the new TRIE node*/ 
         U8 nodetype =(U8)(flags & 0xFF);
@@ -1882,7 +1885,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                          trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
                     {
                         if ( ++count > 1 ) {
-                            SV **tmp = av_fetch( TRIE_REVCHARMAP(trie), ofs, 0);
+                            SV **tmp = av_fetch( revcharmap, ofs, 0);
                            const U8 *ch = (U8*)SvPV_nolen_const( *tmp );
                             if ( state == 1 ) break;
                             if ( count == 2 ) {
@@ -1893,7 +1896,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                                         (int)depth * 2 + 2, "",
                                         (UV)state));
                                if (idx >= 0) {
-                                   SV ** const tmp = av_fetch( TRIE_REVCHARMAP(trie), idx, 0);
+                                   SV ** const tmp = av_fetch( revcharmap, idx, 0);
                                    const U8 * const ch = (U8*)SvPV_nolen_const( *tmp );
 
                                     TRIE_BITMAP_SET(trie,*ch);
@@ -1913,7 +1916,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                    }
                 }
                 if ( count == 1 ) {
-                    SV **tmp = av_fetch( TRIE_REVCHARMAP(trie), idx, 0);
+                    SV **tmp = av_fetch( revcharmap, idx, 0);
                     char *ch = SvPV_nolen( *tmp );
                     DEBUG_OPTIMISE_r({
                         SV *sv=sv_newmortal();
@@ -1961,7 +1964,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                         Set_Node_Offset_Length(fix, 0, 0);
                     }
                     while (word--) {
-                        SV ** const tmp = av_fetch( trie->words, word, 0 );
+                        SV ** const tmp = av_fetch( trie_words, word, 0 );
                         if (tmp) {
                             if ( STR_LEN(convert) <= SvCUR(*tmp) )
                                 sv_chop(*tmp, SvPV_nolen(*tmp) + STR_LEN(convert));
@@ -2030,8 +2033,11 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
         });
     } /* end node insert */
     RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap;
-#ifndef DEBUGGING
-    SvREFCNT_dec(TRIE_REVCHARMAP(trie));
+#ifdef DEBUGGING
+    RExC_rxi->data->data[ data_slot + TRIE_WORDS_OFFSET ] = (void*)trie_words;
+    RExC_rxi->data->data[ data_slot + 3 ] = (void*)revcharmap;
+#else
+    SvREFCNT_dec(revcharmap);
 #endif
     return trie->jump 
            ? MADE_JUMP_TRIE 
@@ -4024,7 +4030,6 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     RExC_end = xend;
     RExC_naughty = 0;
     RExC_npar = 1;
-    RExC_cpar = 1;
     RExC_nestroot = 0;
     RExC_size = 0L;
     RExC_emit = &PL_regdummy;
@@ -4120,7 +4125,6 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     RExC_end = xend;
     RExC_naughty = 0;
     RExC_npar = 1;
-    RExC_cpar = 1;
     RExC_emit_start = ri->program;
     RExC_emit = ri->program;
 #ifdef DEBUGGING
@@ -5410,7 +5414,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            ender = reg_node(pRExC_state, TAIL);
            break;
        case 1:
-           RExC_cpar++;
            ender = reganode(pRExC_state, CLOSE, parno);
            if (!SIZE_ONLY && RExC_seen & REG_SEEN_RECURSE) {
                DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
@@ -6365,7 +6368,7 @@ tryagain:
                    RExC_parse++;
                num = atoi(RExC_parse);
                 if (isrel) {
-                    num = RExC_cpar - num;
+                    num = RExC_npar - num;
                     if (num < 1)
                         vFAIL("Reference to nonexistent or unclosed group");
                 }
@@ -6379,12 +6382,6 @@ tryagain:
                    if (!SIZE_ONLY) {
                        if (num > (I32)RExC_rx->nparens)
                            vFAIL("Reference to nonexistent group");
-                       /* People make this error all the time apparently.
-                          So we cant fail on it, even though we should 
-                       
-                       else if (num >= RExC_cpar)
-                           vFAIL("Reference to unclosed group will always match");
-                       */
                    }
                    RExC_sawback = 1;
                    ret = reganode(pRExC_state,
@@ -6475,6 +6472,7 @@ tryagain:
                    case 'P':
                     case 'N':
                     case 'R':
+                    case 'k':
                        --p;
                        goto loopdone;
                    case 'n':
@@ -8587,12 +8585,6 @@ Perl_pregfree(pTHX_ struct regexp *r)
                             PerlMemShared_free(trie->jump);
                         if (trie->nextword)
                             PerlMemShared_free(trie->nextword);
-#ifdef DEBUGGING
-                        if (trie->words)
-                            SvREFCNT_dec((SV*)trie->words);
-                        if (trie->revcharmap)
-                            SvREFCNT_dec((SV*)trie->revcharmap);
-#endif
                         /* do this last!!!! */
                         PerlMemShared_free(ri->data->data[n]);
                    }
@@ -9093,11 +9085,14 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                NULL;
            const reg_trie_data * const trie =
                (reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
+#ifdef DEBUGGING
+           AV *const trie_words = (AV *) ri->data->data[n + TRIE_WORDS_OFFSET];
+#endif
            const regnode *nextbranch= NULL;
            I32 word_idx;
             sv_setpvn(sv, "", 0);
            for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
-               SV ** const elem_ptr = av_fetch(trie->words,word_idx,0);
+               SV ** const elem_ptr = av_fetch(trie_words,word_idx,0);
                
                 PerlIO_printf(Perl_debug_log, "%*s%s ",
                    (int)(2*(indent+3)), "",