X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=f665f0b5d5234263e0943cfd7388b7ab59497af4;hb=2e64971a6530d2645969bc489f564bfd3ce64993;hp=b5c685ccacbc9f12d11661f12f528b03c782f42d;hpb=c3c4140635dd08363a20c93a8c8b6d8e7464b891;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index b5c685c..f665f0b 100644 --- a/regcomp.c +++ b/regcomp.c @@ -878,6 +878,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, U32 state; SV *sv=sv_newmortal(); int colwidth= widecharmap ? 6 : 4; + U16 word; GET_RE_DEBUG_FLAGS_DECL; PERL_ARGS_ASSERT_DUMP_TRIE; @@ -947,6 +948,13 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, } PerlIO_printf( Perl_debug_log, "\n" ); } + PerlIO_printf(Perl_debug_log, "%*sword_info N:(prev,len)=", (int)depth*2, ""); + for (word=1; word <= trie->wordcount; word++) { + PerlIO_printf(Perl_debug_log, " %d:(%d,%d)", + (int)word, (int)(trie->wordinfo[word].prev), + (int)(trie->wordinfo[word].len)); + } + PerlIO_printf(Perl_debug_log, "\n" ); } /* Dumps a fully constructed but uncompressed trie in list form. @@ -1077,6 +1085,7 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, #endif + /* make_trie(startbranch,first,last,tail,word_count,flags,depth) startbranch: the first branch in the whole branch sequence first : start branch of sequence of branch-exact nodes. @@ -1257,8 +1266,6 @@ is the recommended Unicode-aware way of saying U16 dupe= trie->states[ state ].wordnum; \ regnode * const noper_next = regnext( noper ); \ \ - if (trie->wordlen) \ - trie->wordlen[ curword ] = wordlen; \ DEBUG_r({ \ /* store the word for dumping */ \ SV* tmp; \ @@ -1270,6 +1277,9 @@ is the recommended Unicode-aware way of saying }); \ \ curword++; \ + trie->wordinfo[curword].prev = 0; \ + trie->wordinfo[curword].len = wordlen; \ + trie->wordinfo[curword].accept = state; \ \ if ( noper_next < tail ) { \ if (!trie->jump) \ @@ -1282,16 +1292,11 @@ is the recommended Unicode-aware way of saying } \ \ if ( dupe ) { \ - /* So it's a dupe. This means we need to maintain a */\ - /* linked-list from the first to the next. */\ - /* we only allocate the nextword buffer when there */\ - /* a dupe, so first time we have to do the allocation */\ - if (!trie->nextword) \ - trie->nextword = (U16 *) \ - PerlMemShared_calloc( word_count + 1, sizeof(U16)); \ - while ( trie->nextword[dupe] ) \ - dupe= trie->nextword[dupe]; \ - trie->nextword[dupe]= curword; \ + /* It's a dupe. Pre-insert into the wordinfo[].prev */\ + /* chain, so that when the bits of chain are later */\ + /* linked together, the dups appear in the chain */\ + trie->wordinfo[curword].prev = trie->wordinfo[dupe].prev; \ + trie->wordinfo[dupe].prev = curword; \ } else { \ /* we haven't inserted this word yet. */ \ trie->states[ state ].wordnum = curword; \ @@ -1329,6 +1334,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs regnode *jumper = NULL; regnode *nextbranch = NULL; regnode *convert = NULL; + U32 *prev_states; /* temp array mapping each state to previous one */ /* we just use folder as a flag in utf8 */ const U8 * const folder = ( flags == EXACTF ? PL_fold @@ -1364,6 +1370,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs trie->charmap = (U16 *) PerlMemShared_calloc( 256, sizeof(U16) ); if (!(UTF && folder)) trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 ); + trie->wordinfo = (reg_trie_wordinfo *) PerlMemShared_calloc( + trie->wordcount+1, sizeof(reg_trie_wordinfo)); + DEBUG_r({ trie_words = newAV(); }); @@ -1496,7 +1505,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs (int)TRIE_CHARCOUNT(trie), trie->uniquecharcount, (int)trie->minlen, (int)trie->maxlen ) ); - trie->wordlen = (U32 *) PerlMemShared_calloc( word_count, sizeof(U32) ); /* We now know what we are dealing with in terms of unique chars and @@ -1520,6 +1528,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs */ + Newx(prev_states, TRIE_CHARCOUNT(trie) + 2, U32); + prev_states[1] = 0; + if ( (IV)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1) > SvIV(re_trie_maxbuff) ) { /* Second Pass -- Array Of Lists Representation @@ -1590,6 +1601,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs } if ( ! newstate ) { newstate = next_alloc++; + prev_states[newstate] = state; TRIE_LIST_PUSH( state, charid, newstate ); transcount++; } @@ -1773,6 +1785,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs if ( !trie->trans[ state + charid ].next ) { trie->trans[ state + charid ].next = next_alloc; trie->trans[ state ].check++; + prev_states[TRIE_NODENUM(next_alloc)] + = TRIE_NODENUM(state); next_alloc += trie->uniquecharcount; } state = trie->trans[ state + charid ].next; @@ -1920,9 +1934,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs PerlMemShared_realloc( trie->trans, trie->lasttrans * sizeof(reg_trie_trans) ); - /* and now dump out the compressed format */ - DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1)); - { /* Modify the program and insert the new TRIE node*/ U8 nodetype =(U8)(flags & 0xFF); char *str=NULL; @@ -2052,6 +2063,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs break; } } + trie->prefixlen = (state-1); if (str) { regnode *n = convert+NODE_SZ_STR(convert); NEXT_OFF(convert) = NODE_SZ_STR(convert); @@ -2147,6 +2159,42 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs Set_Node_Offset_Length(convert,mjd_offset,mjd_nodelen); }); } /* end node insert */ + + /* Finish populating the prev field of the wordinfo array. Walk back + * from each accept state until we find another accept state, and if + * so, point the first word's .prev field at the second word. If the + * second already has a .prev field set, stop now. This will be the + * case either if we've already processed that word's accept state, + * or that that state had multiple words, and the overspill words + * were already linked up earlier. + */ + { + U16 word; + U32 state; + U16 prev; + + for (word=1; word <= trie->wordcount; word++) { + prev = 0; + if (trie->wordinfo[word].prev) + continue; + state = trie->wordinfo[word].accept; + while (state) { + state = prev_states[state]; + if (!state) + break; + prev = trie->states[state].wordnum; + if (prev) + break; + } + trie->wordinfo[word].prev = prev; + } + Safefree(prev_states); + } + + + /* and now dump out the compressed format */ + DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1)); + RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap; #ifdef DEBUGGING RExC_rxi->data->data[ data_slot + TRIE_WORDS_OFFSET ] = (void*)trie_words; @@ -7445,8 +7493,7 @@ tryagain: break; case 'c': p++; - ender = UCHARAT(p++); - ender = toCTRL(ender); + ender = grok_bslash_c(*p++, SIZE_ONLY); break; case '0': case '1': case '2': case '3':case '4': case '5': case '6': case '7': case '8':case '9': @@ -8063,8 +8110,7 @@ parseit: goto recode_encoding; break; case 'c': - value = UCHARAT(RExC_parse++); - value = toCTRL(value); + value = grok_bslash_c(*RExC_parse++, SIZE_ONLY); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -8887,6 +8933,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val, /* - regcurly - a little FSA that accepts {\d+,?\d*} */ +#ifndef PERL_IN_XSUB_RE I32 Perl_regcurly(register const char *s) { @@ -8906,7 +8953,7 @@ Perl_regcurly(register const char *s) return FALSE; return TRUE; } - +#endif /* - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form @@ -9572,12 +9619,9 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) PerlMemShared_free(trie->trans); if (trie->bitmap) PerlMemShared_free(trie->bitmap); - if (trie->wordlen) - PerlMemShared_free(trie->wordlen); if (trie->jump) PerlMemShared_free(trie->jump); - if (trie->nextword) - PerlMemShared_free(trie->nextword); + PerlMemShared_free(trie->wordinfo); /* do this last!!!! */ PerlMemShared_free(ri->data->data[n]); } @@ -9884,7 +9928,7 @@ Perl_save_re_context(pTHX) state = (struct re_save_state *)(PL_savestack + PL_savestack_ix); PL_savestack_ix += SAVESTACK_ALLOC_FOR_RE_SAVE_STATE; - SSPUSHINT(SAVEt_RE_STATE); + SSPUSHUV(SAVEt_RE_STATE); Copy(&PL_reg_state, state, 1, struct re_save_state);