From: Dave Mitchell Date: Mon, 20 Mar 2006 00:52:09 +0000 (+0000) Subject: merge the TRIEF/TRIEFL and TRIE branches in regmatch(), X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=5b47454deb66294b95a0cf8c5e3b43cd3450dc8d;p=p5sagit%2Fp5-mst-13.2.git merge the TRIEF/TRIEFL and TRIE branches in regmatch(), expunge a couple of macros, and move some variable declarations inwards p4raw-id: //depot/perl@27544 --- diff --git a/regexec.c b/regexec.c index c79bee0..fcdf30f 100644 --- a/regexec.c +++ b/regexec.c @@ -2312,56 +2312,7 @@ typedef union re_unwind_t { /* Make sure there is a test for this +1 options in re_tests */ #define TRIE_INITAL_ACCEPT_BUFFLEN 4; -#define TRIE_CHECK_STATE_IS_ACCEPTING STMT_START { \ - if ( trie->states[ state ].wordnum ) { \ - if ( !accepted ) { \ - ENTER; \ - SAVETMPS; \ - bufflen = TRIE_INITAL_ACCEPT_BUFFLEN ; \ - sv_accept_buff=newSV(bufflen * sizeof(reg_trie_accepted) - 1 );\ - SvCUR_set( sv_accept_buff, sizeof(reg_trie_accepted) ); \ - SvPOK_on( sv_accept_buff ); \ - sv_2mortal( sv_accept_buff ); \ - accept_buff = (reg_trie_accepted*)SvPV_nolen( sv_accept_buff );\ - } else { \ - if ( accepted >= bufflen ) { \ - bufflen *= 2; \ - accept_buff =(reg_trie_accepted*)SvGROW( sv_accept_buff, \ - bufflen * sizeof(reg_trie_accepted) ); \ - } \ - SvCUR_set( sv_accept_buff,SvCUR( sv_accept_buff ) \ - + sizeof( reg_trie_accepted ) ); \ - } \ - accept_buff[ accepted ].wordnum = trie->states[ state ].wordnum; \ - accept_buff[ accepted ].endpos = uc; \ - ++accepted; \ - } } STMT_END - -#define TRIE_HANDLE_CHAR STMT_START { \ - if ( uvc < 256 ) { \ - charid = trie->charmap[ uvc ]; \ - } else { \ - charid = 0; \ - if( trie->widecharmap ) { \ - SV** svpp = (SV**)NULL; \ - svpp = hv_fetch( trie->widecharmap, (char*)&uvc, \ - sizeof( UV ), 0 ); \ - if ( svpp ) { \ - charid = (U16)SvIV( *svpp ); \ - } \ - } \ - } \ - if ( charid && \ - ( base + charid > trie->uniquecharcount ) && \ - ( base + charid - 1 - trie->uniquecharcount < trie->lasttrans) && \ - trie->trans[ base + charid - 1 - trie->uniquecharcount ].check == state ) \ - { \ - state = trie->trans[ base + charid - 1 - trie->uniquecharcount ].next; \ - } else { \ - state = 0; \ - } \ - uc += len; \ - } STMT_END + /* - regmatch - main matching routine @@ -2394,12 +2345,6 @@ S_regmatch(pTHX_ regnode *prog) int minmod = 0, sw = 0, logical = 0; I32 unwind = 0; - /* used by the trie code */ - SV *sv_accept_buff = NULL; /* accepting states we have traversed */ - reg_trie_accepted *accept_buff = NULL; /* "" */ - reg_trie_data *trie; /* what trie are we using right now */ - U32 accepted = 0; /* how many accepting states we have seen*/ - #if 0 I32 firstcp = PL_savestack_ix; #endif @@ -2570,12 +2515,9 @@ S_regmatch(pTHX_ regnode *prog) traverse the TRIE keeping track of all accepting states we transition through until we get to a failing node. - we use two slightly different pieces of code to handle - the traversal depending on whether its case sensitive or - not. we reuse the accept code however. (this should probably - be turned into a macro.) */ + case TRIE: case TRIEF: case TRIEFL: { @@ -2588,13 +2530,50 @@ S_regmatch(pTHX_ regnode *prog) STRLEN foldlen = 0; U8 *uscan = (U8*)NULL; STRLEN bufflen=0; - accepted = 0; + const enum { trie_plain, trie_utf8, trie_uft8_fold } + trie_type = do_utf8 ? + (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold) + : trie_plain; + + int gotit = 0; + /* accepting states we have traversed */ + SV *sv_accept_buff = NULL; + reg_trie_accepted *accept_buff = NULL; + reg_trie_data *trie; /* what trie are we using right now */ + U32 accepted = 0; /* how many accepting states we have seen */ trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ]; while ( state && uc <= (U8*)PL_regeol ) { - TRIE_CHECK_STATE_IS_ACCEPTING; + if (trie->states[ state ].wordnum) { + if (!accepted ) { + ENTER; + SAVETMPS; + bufflen = TRIE_INITAL_ACCEPT_BUFFLEN; + sv_accept_buff=newSV(bufflen * + sizeof(reg_trie_accepted) - 1); + SvCUR_set(sv_accept_buff, + sizeof(reg_trie_accepted)); + SvPOK_on(sv_accept_buff); + sv_2mortal(sv_accept_buff); + accept_buff = + (reg_trie_accepted*)SvPV_nolen(sv_accept_buff ); + } + else { + if (accepted >= bufflen) { + bufflen *= 2; + accept_buff =(reg_trie_accepted*) + SvGROW(sv_accept_buff, + bufflen * sizeof(reg_trie_accepted)); + } + SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff) + + sizeof(reg_trie_accepted)); + } + accept_buff[accepted].wordnum = trie->states[state].wordnum; + accept_buff[accepted].endpos = uc; + ++accepted; + } base = trie->states[ state ].trans.base; @@ -2606,8 +2585,8 @@ S_regmatch(pTHX_ regnode *prog) ); if ( base ) { - - if ( do_utf8 ) { + switch (trie_type) { + case trie_uft8_fold: if ( foldlen>0 ) { uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); foldlen -= len; @@ -2620,14 +2599,47 @@ S_regmatch(pTHX_ regnode *prog) foldlen -= UNISKIP( uvc ); uscan = foldbuf + UNISKIP( uvc ); } - } else { + break; + case trie_utf8: + uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, + &len, uniflags ); + break; + case trie_plain: uvc = (UV)*uc; len = 1; } - TRIE_HANDLE_CHAR; + if (uvc < 256) { + charid = trie->charmap[ uvc ]; + } + else { + charid = 0; + if (trie->widecharmap) { + SV** svpp = (SV**)NULL; + svpp = hv_fetch(trie->widecharmap, + (char*)&uvc, sizeof(UV), 0); + if (svpp) + charid = (U16)SvIV(*svpp); + } + } + + if (charid && + (base + charid > trie->uniquecharcount ) + && (base + charid - 1 - trie->uniquecharcount + < trie->lasttrans) + && trie->trans[base + charid - 1 - + trie->uniquecharcount].check == state) + { + state = trie->trans[base + charid - 1 - + trie->uniquecharcount ].next; + } + else { + state = 0; + } + uc += len; - } else { + } + else { state = 0; } DEBUG_TRIE_EXECUTE_r( @@ -2636,65 +2648,8 @@ S_regmatch(pTHX_ regnode *prog) charid, uvc, (UV)state, PL_colors[5] ); ); } - if ( !accepted ) { + if (!accepted ) sayNO; - } else { - goto TrieAccept; - } - } - /* unreached codepoint: we jump into the middle of the next case - from previous if blocks */ - case TRIE: - { - U8 *uc = (U8*)locinput; - U32 state = 1; - U16 charid = 0; - U32 base = 0; - UV uvc = 0; - STRLEN len = 0; - STRLEN bufflen = 0; - accepted = 0; - - trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ]; - - while ( state && uc <= (U8*)PL_regeol ) { - - TRIE_CHECK_STATE_IS_ACCEPTING; - - base = trie->states[ state ].trans.base; - - DEBUG_TRIE_EXECUTE_r( - PerlIO_printf( Perl_debug_log, - "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ", - REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], - (UV)state, (UV)base, (UV)accepted ); - ); - - if ( base ) { - - if ( do_utf8 ) { - uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); - } else { - uvc = (U32)*uc; - len = 1; - } - - TRIE_HANDLE_CHAR; - - } else { - state = 0; - } - DEBUG_TRIE_EXECUTE_r( - PerlIO_printf( Perl_debug_log, - "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n", - charid, uvc, (UV)state, PL_colors[5] ); - ); - } - if ( !accepted ) { - sayNO; - } - } - /* There was at least one accepting state that we @@ -2708,9 +2663,6 @@ S_regmatch(pTHX_ regnode *prog) eventually terminates once all of the accepting states have been tried. */ - TrieAccept: - { - int gotit = 0; if ( accepted == 1 ) { DEBUG_EXECUTE_r({