X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=4ea9a5a6485d7539031ce3e18f2fb41562d251b8;hb=79fd8837531c3ec705645385c6a99d6e9c263225;hp=6d916f1ff1fa552ccfa7cba5536fbcbada51ef0c;hpb=c74340f9cdee6010339b6bfd0e8b0dc8bc875344;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index 6d916f1..4ea9a5a 100644 --- a/regcomp.c +++ b/regcomp.c @@ -102,7 +102,8 @@ typedef struct RExC_state_t { U32 flags; /* are we folding, multilining? */ char *precomp; /* uncompiled string. */ - regexp *rx; + regexp *rx; /* perl core regexp structure */ + regexp_internal *rxi; /* internal data for regexp object pprivate field */ char *start; /* Start of input for compile */ char *end; /* End of input for compile */ char *parse; /* Input-scan pointer. */ @@ -142,11 +143,12 @@ typedef struct RExC_state_t { #define RExC_flags (pRExC_state->flags) #define RExC_precomp (pRExC_state->precomp) #define RExC_rx (pRExC_state->rx) +#define RExC_rxi (pRExC_state->rxi) #define RExC_start (pRExC_state->start) #define RExC_end (pRExC_state->end) #define RExC_parse (pRExC_state->parse) #define RExC_whilem_seen (pRExC_state->whilem_seen) -#define RExC_offsets (pRExC_state->rx->offsets) /* I am not like the others */ +#define RExC_offsets (pRExC_state->rxi->offsets) /* I am not like the others */ #define RExC_emit (pRExC_state->emit) #define RExC_emit_start (pRExC_state->emit_start) #define RExC_naughty (pRExC_state->naughty) @@ -154,7 +156,6 @@ typedef struct RExC_state_t { #define RExC_seen (pRExC_state->seen) #define RExC_size (pRExC_state->size) #define RExC_npar (pRExC_state->npar) -#define RExC_cpar (pRExC_state->cpar) #define RExC_nestroot (pRExC_state->nestroot) #define RExC_extralen (pRExC_state->extralen) #define RExC_seen_zerolen (pRExC_state->seen_zerolen) @@ -342,8 +343,8 @@ static const scan_data_t zero_scan_data = #define SCF_SEEN_ACCEPT 0x8000 #define UTF (RExC_utf8 != 0) -#define LOC ((RExC_flags & PMf_LOCALE) != 0) -#define FOLD ((RExC_flags & PMf_FOLD) != 0) +#define LOC ((RExC_flags & RXf_PMf_LOCALE) != 0) +#define FOLD ((RExC_flags & RXf_PMf_FOLD) != 0) #define OOB_UNICODE 12345678 #define OOB_NAMEDCLASS -1 @@ -370,7 +371,7 @@ static const scan_data_t zero_scan_data = * arg. Show regex, up to a maximum length. If it's too long, chop and add * "...". */ -#define FAIL(msg) STMT_START { \ +#define _FAIL(code) STMT_START { \ const char *ellipses = ""; \ IV len = RExC_end - RExC_precomp; \ \ @@ -381,10 +382,17 @@ static const scan_data_t zero_scan_data = len = RegexLengthToShowInErrorMessages - 10; \ ellipses = "..."; \ } \ - Perl_croak(aTHX_ "%s in regex m/%.*s%s/", \ - msg, (int)len, RExC_precomp, ellipses); \ + code; \ } STMT_END +#define FAIL(msg) _FAIL( \ + Perl_croak(aTHX_ "%s in regex m/%.*s%s/", \ + msg, (int)len, RExC_precomp, ellipses)) + +#define FAIL2(msg,arg) _FAIL( \ + Perl_croak(aTHX_ msg " in regex m/%.*s%s/", \ + arg, (int)len, RExC_precomp, ellipses)) + /* * Simple_vFAIL -- like FAIL, but marks the current location in the scan */ @@ -548,17 +556,18 @@ static const scan_data_t zero_scan_data = #define EXPERIMENTAL_INPLACESCAN #endif -#define DEBUG_STUDYDATA(data,depth) \ -DEBUG_OPTIMISE_MORE_r(if(data){ \ +#define DEBUG_STUDYDATA(str,data,depth) \ +DEBUG_OPTIMISE_MORE_r(if(data){ \ PerlIO_printf(Perl_debug_log, \ - "%*s"/* Len:%"IVdf"/%"IVdf" */" Pos:%"IVdf"/%"IVdf \ - " Flags: %"IVdf" Whilem_c: %"IVdf" Lcp: %"IVdf" ", \ + "%*s" str "Pos:%"IVdf"/%"IVdf \ + " Flags: 0x%"UVXf" Whilem_c: %"IVdf" Lcp: %"IVdf" %s", \ (int)(depth)*2, "", \ (IV)((data)->pos_min), \ (IV)((data)->pos_delta), \ - (IV)((data)->flags), \ + (UV)((data)->flags), \ (IV)((data)->whilem_c), \ - (IV)((data)->last_closep ? *((data)->last_closep) : -1) \ + (IV)((data)->last_closep ? *((data)->last_closep) : -1), \ + is_inf ? "INF " : "" \ ); \ if ((data)->last_found) \ PerlIO_printf(Perl_debug_log, \ @@ -588,7 +597,7 @@ static void clear_re(pTHX_ void *r); floating substrings if needed. */ STATIC void -S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *minlenp) +S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *minlenp, int is_inf) { const STRLEN l = CHR_SVLEN(data->last_found); const STRLEN old_l = CHR_SVLEN(*data->longest); @@ -606,12 +615,12 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *min data->minlen_fixed=minlenp; data->lookbehind_fixed=0; } - else { + else { /* *data->longest == data->longest_float */ data->offset_float_min = l ? data->last_start_min : data->pos_min; data->offset_float_max = (l ? data->last_start_max : data->pos_min + data->pos_delta); - if ((U32)data->offset_float_max > (U32)I32_MAX) + if (is_inf || (U32)data->offset_float_max > (U32)I32_MAX) data->offset_float_max = I32_MAX; if (data->flags & SF_BEFORE_EOL) data->flags @@ -633,7 +642,7 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *min } data->last_end = -1; data->flags &= ~SF_BEFORE_EOL; - DEBUG_STUDYDATA(data,0); + DEBUG_STUDYDATA("cl_anything: ",data,0); } /* Can match anything (initialization) */ @@ -789,9 +798,9 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con #ifdef DEBUGGING /* - dump_trie(trie) - dump_trie_interim_list(trie,next_alloc) - dump_trie_interim_table(trie,next_alloc) + dump_trie(trie,widecharmap,revcharmap) + dump_trie_interim_list(trie,widecharmap,revcharmap,next_alloc) + dump_trie_interim_table(trie,widecharmap,revcharmap,next_alloc) These routines dump out a trie in a somewhat readable format. The _interim_ variants are used for debugging the interim @@ -804,17 +813,17 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con */ /* - dump_trie(trie) Dumps the final compressed table form of the trie to Perl_debug_log. Used for debugging make_trie(). */ STATIC void -S_dump_trie(pTHX_ const struct _reg_trie_data *trie,U32 depth) +S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, + AV *revcharmap, U32 depth) { U32 state; SV *sv=sv_newmortal(); - int colwidth= trie->widecharmap ? 6 : 4; + int colwidth= widecharmap ? 6 : 4; GET_RE_DEBUG_FLAGS_DECL; @@ -823,7 +832,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie,U32 depth) "Match","Base","Ofs" ); for( state = 0 ; state < trie->uniquecharcount ; state++ ) { - SV ** const tmp = av_fetch( trie->revcharmap, state, 0); + SV ** const tmp = av_fetch( revcharmap, state, 0); if ( tmp ) { PerlIO_printf( Perl_debug_log, "%*s", colwidth, @@ -885,18 +894,19 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie,U32 depth) } } /* - dump_trie_interim_list(trie,next_alloc) Dumps a fully constructed but uncompressed trie in list form. List tries normally only are used for construction when the number of possible chars (trie->uniquecharcount) is very high. Used for debugging make_trie(). */ STATIC void -S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc,U32 depth) +S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, + HV *widecharmap, AV *revcharmap, U32 next_alloc, + U32 depth) { U32 state; SV *sv=sv_newmortal(); - int colwidth= trie->widecharmap ? 6 : 4; + int colwidth= widecharmap ? 6 : 4; GET_RE_DEBUG_FLAGS_DECL; /* print out the table precompression. */ PerlIO_printf( Perl_debug_log, "%*sState :Word | Transition Data\n%*s%s", @@ -916,7 +926,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc ); } for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) { - SV ** const tmp = av_fetch( trie->revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0); + SV ** const tmp = av_fetch( revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0); if ( tmp ) { PerlIO_printf( Perl_debug_log, "%*s:%3X=%4"UVXf" | ", colwidth, @@ -938,19 +948,20 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc } /* - dump_trie_interim_table(trie,next_alloc) Dumps a fully constructed but uncompressed trie in table form. This is the normal DFA style state transition table, with a few twists to facilitate compression later. Used for debugging make_trie(). */ STATIC void -S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc, U32 depth) +S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, + HV *widecharmap, AV *revcharmap, U32 next_alloc, + U32 depth) { U32 state; U16 charid; SV *sv=sv_newmortal(); - int colwidth= trie->widecharmap ? 6 : 4; + int colwidth= widecharmap ? 6 : 4; GET_RE_DEBUG_FLAGS_DECL; /* @@ -961,7 +972,7 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, U32 next_allo PerlIO_printf( Perl_debug_log, "%*sChar : ",(int)depth * 2 + 2,"" ); for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) { - SV ** const tmp = av_fetch( trie->revcharmap, charid, 0); + SV ** const tmp = av_fetch( revcharmap, charid, 0); if ( tmp ) { PerlIO_printf( Perl_debug_log, "%*s", colwidth, @@ -1128,7 +1139,7 @@ is the recommended Unicode-aware way of saying SV *tmp = newSVpvs(""); \ if (UTF) SvUTF8_on(tmp); \ Perl_sv_catpvf( aTHX_ tmp, "%c", (int)uvc ); \ - av_push( TRIE_REVCHARMAP(trie), tmp ); \ + av_push( revcharmap, tmp ); \ } STMT_END #define TRIE_READ_CHAR STMT_START { \ @@ -1188,14 +1199,14 @@ is the recommended Unicode-aware way of saying else \ tmp = newSVpvn( "", 0 ); \ if ( UTF ) SvUTF8_on( tmp ); \ - av_push( trie->words, tmp ); \ + av_push( trie_words, tmp ); \ }); \ \ curword++; \ \ if ( noper_next < tail ) { \ if (!trie->jump) \ - Newxz( trie->jump, word_count + 1, U16); \ + trie->jump = PerlMemShared_calloc( word_count + 1, sizeof(U16) ); \ trie->jump[curword] = (U16)(noper_next - convert); \ if (!jumper) \ jumper = noper_next; \ @@ -1209,7 +1220,8 @@ is the recommended Unicode-aware way of saying /* we only allocate the nextword buffer when there */\ /* a dupe, so first time we have to do the allocation */\ if (!trie->nextword) \ - Newxz( trie->nextword, word_count + 1, U16); \ + trie->nextword = \ + PerlMemShared_calloc( word_count + 1, sizeof(U16)); \ while ( trie->nextword[dupe] ) \ dupe= trie->nextword[dupe]; \ trie->nextword[dupe]= curword; \ @@ -1239,6 +1251,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs dVAR; /* first pass, loop through and scan words */ reg_trie_data *trie; + HV *widecharmap = NULL; + AV *revcharmap = newAV(); regnode *cur; const U32 uniflags = UTF8_ALLOW_DEFAULT; STRLEN len = 0; @@ -1257,32 +1271,33 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs ) ); - const U32 data_slot = add_data( pRExC_state, 1, "t" ); - SV *re_trie_maxbuff; -#ifndef DEBUGGING - /* these are only used during construction but are useful during - * debugging so we store them in the struct when debugging. +#ifdef DEBUGGING + const U32 data_slot = add_data( pRExC_state, 4, "tuuu" ); + AV *trie_words = NULL; + /* along with revcharmap, this only used during construction but both are + * useful during debugging so we store them in the struct when debugging. */ +#else + const U32 data_slot = add_data( pRExC_state, 2, "tu" ); STRLEN trie_charcount=0; - AV *trie_revcharmap; #endif + SV *re_trie_maxbuff; GET_RE_DEBUG_FLAGS_DECL; #ifndef DEBUGGING PERL_UNUSED_ARG(depth); #endif - Newxz( trie, 1, reg_trie_data ); + trie = PerlMemShared_calloc( 1, sizeof(reg_trie_data) ); trie->refcount = 1; trie->startstate = 1; trie->wordcount = word_count; - RExC_rx->data->data[ data_slot ] = (void*)trie; - Newxz( trie->charmap, 256, U16 ); + RExC_rxi->data->data[ data_slot ] = (void*)trie; + trie->charmap = PerlMemShared_calloc( 256, sizeof(U16) ); if (!(UTF && folder)) - Newxz( trie->bitmap, ANYOF_BITMAP_SIZE, char ); + trie->bitmap = PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 ); DEBUG_r({ - trie->words = newAV(); + trie_words = newAV(); }); - TRIE_REVCHARMAP(trie) = newAV(); re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1); if (!SvIOK(re_trie_maxbuff)) { @@ -1360,10 +1375,10 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs } } else { SV** svpp; - if ( !trie->widecharmap ) - trie->widecharmap = newHV(); + if ( !widecharmap ) + widecharmap = newHV(); - svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 1 ); + svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 ); if ( !svpp ) Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%"UVXf, uvc ); @@ -1387,11 +1402,11 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs DEBUG_TRIE_COMPILE_r( PerlIO_printf( Perl_debug_log, "%*sTRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n", (int)depth * 2 + 2,"", - ( trie->widecharmap ? "UTF8" : "NATIVE" ), (int)word_count, + ( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count, (int)TRIE_CHARCOUNT(trie), trie->uniquecharcount, (int)trie->minlen, (int)trie->maxlen ) ); - Newxz( trie->wordlen, word_count, U32 ); + trie->wordlen = PerlMemShared_calloc( word_count, sizeof(U32) ); /* We now know what we are dealing with in terms of unique chars and @@ -1433,8 +1448,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log, "%*sCompiling trie using list compiler\n", (int)depth * 2 + 2, "")); - - Newxz( trie->states, TRIE_CHARCOUNT(trie) + 2, reg_trie_state ); + + trie->states = PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2, + sizeof(reg_trie_state) ); TRIE_LIST_NEW(1); next_alloc = 2; @@ -1458,7 +1474,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs if ( uvc < 256 ) { charid = trie->charmap[ uvc ]; } else { - SV** const svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0); + SV** const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0); if ( !svpp ) { charid = 0; } else { @@ -1498,14 +1514,17 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs /* next alloc is the NEXT state to be allocated */ trie->statecount = next_alloc; - Renew( trie->states, next_alloc, reg_trie_state ); + trie->states = PerlMemShared_realloc( trie->states, next_alloc + * sizeof(reg_trie_state) ); /* and now dump it out before we compress it */ - DEBUG_TRIE_COMPILE_MORE_r( - dump_trie_interim_list(trie,next_alloc,depth+1) + DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_list(trie, widecharmap, + revcharmap, next_alloc, + depth+1) ); - Newxz( trie->trans, transcount ,reg_trie_trans ); + trie->trans + = PerlMemShared_calloc( transcount, sizeof(reg_trie_trans) ); { U32 state; U32 tp = 0; @@ -1536,7 +1555,10 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs } if ( transcount < tp + maxid - minid + 1) { transcount *= 2; - Renew( trie->trans, transcount, reg_trie_trans ); + trie->trans + = PerlMemShared_realloc( trie->trans, + transcount + * sizeof(reg_trie_trans) ); Zero( trie->trans + (transcount / 2), transcount / 2 , reg_trie_trans ); } base = trie->uniquecharcount + tp - minid; @@ -1616,9 +1638,11 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs "%*sCompiling trie using table compiler\n", (int)depth * 2 + 2, "")); - Newxz( trie->trans, ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1, - reg_trie_trans ); - Newxz( trie->states, TRIE_CHARCOUNT(trie) + 2, reg_trie_state ); + trie->trans = PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 ) + * trie->uniquecharcount + 1, + sizeof(reg_trie_trans) ); + trie->states = PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2, + sizeof(reg_trie_state) ); next_alloc = trie->uniquecharcount + 1; @@ -1646,7 +1670,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs if ( uvc < 256 ) { charid = trie->charmap[ uvc ]; } else { - SV* const * const svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0); + SV* const * const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0); charid = svpp ? (U16)SvIV(*svpp) : 0; } if ( charid ) { @@ -1669,9 +1693,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs } /* end second pass */ /* and now dump it out before we compress it */ - DEBUG_TRIE_COMPILE_MORE_r( - dump_trie_interim_table(trie,next_alloc,depth+1) - ); + DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_table(trie, widecharmap, + revcharmap, + next_alloc, depth+1)); { /* @@ -1775,7 +1799,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs } } trie->lasttrans = pos + 1; - Renew( trie->states, laststate, reg_trie_state); + trie->states = PerlMemShared_realloc( trie->states, laststate + * sizeof(reg_trie_state) ); DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log, "%*sAlloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n", @@ -1795,12 +1820,11 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs (UV)trie->lasttrans) ); /* resize the trans array to remove unused space */ - Renew( trie->trans, trie->lasttrans, reg_trie_trans); + trie->trans = PerlMemShared_realloc( trie->trans, trie->lasttrans + * sizeof(reg_trie_trans) ); /* and now dump out the compressed format */ - DEBUG_TRIE_COMPILE_r( - dump_trie(trie,depth+1) - ); + DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1)); { /* Modify the program and insert the new TRIE node*/ U8 nodetype =(U8)(flags & 0xFF); @@ -1845,7 +1869,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs /* But first we check to see if there is a common prefix we can split out as an EXACT and put in front of the TRIE node. */ trie->startstate= 1; - if ( trie->bitmap && !trie->widecharmap && !trie->jump ) { + if ( trie->bitmap && !widecharmap && !trie->jump ) { U32 state; for ( state = 1 ; state < trie->statecount-1 ; state++ ) { U32 ofs = 0; @@ -1862,7 +1886,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs trie->trans[ base + ofs - trie->uniquecharcount ].check == state ) { if ( ++count > 1 ) { - SV **tmp = av_fetch( TRIE_REVCHARMAP(trie), ofs, 0); + SV **tmp = av_fetch( revcharmap, ofs, 0); const U8 *ch = (U8*)SvPV_nolen_const( *tmp ); if ( state == 1 ) break; if ( count == 2 ) { @@ -1873,7 +1897,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs (int)depth * 2 + 2, "", (UV)state)); if (idx >= 0) { - SV ** const tmp = av_fetch( TRIE_REVCHARMAP(trie), idx, 0); + SV ** const tmp = av_fetch( revcharmap, idx, 0); const U8 * const ch = (U8*)SvPV_nolen_const( *tmp ); TRIE_BITMAP_SET(trie,*ch); @@ -1893,22 +1917,31 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs } } if ( count == 1 ) { - SV **tmp = av_fetch( TRIE_REVCHARMAP(trie), idx, 0); - const char *ch = SvPV_nolen_const( *tmp ); - DEBUG_OPTIMISE_r( + SV **tmp = av_fetch( revcharmap, idx, 0); + char *ch = SvPV_nolen( *tmp ); + DEBUG_OPTIMISE_r({ + SV *sv=sv_newmortal(); PerlIO_printf( Perl_debug_log, "%*sPrefix State: %"UVuf" Idx:%"UVuf" Char='%s'\n", (int)depth * 2 + 2, "", - (UV)state, (UV)idx, ch) - ); + (UV)state, (UV)idx, + pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 6, + PL_colors[0], PL_colors[1], + (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) | + PERL_PV_ESCAPE_FIRSTCHAR + ) + ); + }); if ( state==1 ) { OP( convert ) = nodetype; str=STRING(convert); STR_LEN(convert)=0; } - *str++=*ch; - STR_LEN(convert)++; - + while (*ch) { + *str++ = *ch++; + STR_LEN(convert)++; + } + } else { #ifdef DEBUGGING if (state>1) @@ -1925,11 +1958,21 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs trie->maxlen -= (state - 1); DEBUG_r({ regnode *fix = convert; + U32 word = trie->wordcount; mjd_nodelen++; Set_Node_Offset_Length(convert, mjd_offset, state - 1); while( ++fix < n ) { Set_Node_Offset_Length(fix, 0, 0); } + while (word--) { + SV ** const tmp = av_fetch( trie_words, word, 0 ); + if (tmp) { + if ( STR_LEN(convert) <= SvCUR(*tmp) ) + sv_chop(*tmp, SvPV_nolen(*tmp) + STR_LEN(convert)); + else + sv_chop(*tmp, SvPV_nolen(*tmp) + SvCUR(*tmp)); + } + } }); if (trie->maxlen) { convert = n; @@ -1956,7 +1999,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs { OP( convert ) = TRIEC; Copy(trie->bitmap, ((struct regnode_charclass *)convert)->bitmap, ANYOF_BITMAP_SIZE, char); - Safefree(trie->bitmap); + PerlMemShared_free(trie->bitmap); trie->bitmap= NULL; } else OP( convert ) = TRIE; @@ -1990,8 +2033,12 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs Set_Node_Offset_Length(convert,mjd_offset,mjd_nodelen); }); } /* end node insert */ -#ifndef DEBUGGING - SvREFCNT_dec(TRIE_REVCHARMAP(trie)); + RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap; +#ifdef DEBUGGING + RExC_rxi->data->data[ data_slot + TRIE_WORDS_OFFSET ] = (void*)trie_words; + RExC_rxi->data->data[ data_slot + 3 ] = (void*)revcharmap; +#else + SvREFCNT_dec(revcharmap); #endif return trie->jump ? MADE_JUMP_TRIE @@ -2021,7 +2068,8 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode try 'g' and succeed, prodceding to match 'cdgu'. */ /* add a fail transition */ - reg_trie_data *trie=(reg_trie_data *)RExC_rx->data->data[ARG(source)]; + const U32 trie_offset = ARG(source); + reg_trie_data *trie=(reg_trie_data *)RExC_rxi->data->data[trie_offset]; U32 *q; const U32 ucharcount = trie->uniquecharcount; const U32 numstates = trie->statecount; @@ -2040,13 +2088,13 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode ARG_SET( stclass, data_slot ); - Newxz( aho, 1, reg_ac_data ); - RExC_rx->data->data[ data_slot ] = (void*)aho; - aho->trie=trie; - aho->states=(reg_trie_state *)savepvn((const char*)trie->states, - numstates * sizeof(reg_trie_state)); + aho = PerlMemShared_calloc( 1, sizeof(reg_ac_data) ); + RExC_rxi->data->data[ data_slot ] = (void*)aho; + aho->trie=trie_offset; + aho->states=(reg_trie_state *)PerlMemShared_malloc( numstates * sizeof(reg_trie_state) ); + Copy( trie->states, aho->states, numstates, reg_trie_state ); Newxz( q, numstates, U32); - Newxz( aho->fail, numstates, U32 ); + aho->fail = PerlMemShared_calloc( numstates, sizeof(U32) ); aho->refcount = 1; fail = aho->fail; /* initialize fail[0..1] to be 1 so that we always have @@ -2119,14 +2167,14 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode #endif #define DEBUG_PEEP(str,scan,depth) \ - DEBUG_OPTIMISE_r({ \ + DEBUG_OPTIMISE_r({if (scan){ \ SV * const mysv=sv_newmortal(); \ regnode *Next = regnext(scan); \ regprop(RExC_rx, mysv, scan); \ PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)\n", \ (int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(mysv),\ Next ? (REG_NODE_NUM(Next)) : 0 ); \ - }); + }}); @@ -2289,6 +2337,20 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, I32 *min, U32 flags Newx(and_withp,1,struct regnode_charclass_class); \ SAVEFREEPV(and_withp) +/* this is a chain of data about sub patterns we are processing that + need to be handled seperately/specially in study_chunk. Its so + we can simulate recursion without losing state. */ +struct scan_frame; +typedef struct scan_frame { + regnode *last; /* last node to process in this frame */ + regnode *next; /* next node to process when last is reached */ + struct scan_frame *prev; /*previous frame*/ + I32 stop; /* what stopparen do we use */ +} scan_frame; + + +#define SCAN_COMMIT(s, data, m) scan_commit(s, data, m, is_inf) + STATIC I32 S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *minlenp, I32 *deltap, @@ -2317,7 +2379,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SV *re_trie_maxbuff = NULL; regnode *first_non_open = scan; I32 stopmin = I32_MAX; + scan_frame *frame = NULL; + GET_RE_DEBUG_FLAGS_DECL; + #ifdef DEBUGGING StructCopy(&zero_scan_data, &data_fake, scan_data_t); #endif @@ -2328,9 +2393,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } - while (scan && OP(scan) != END && scan < last) { + fake_study_recurse: + while ( scan && OP(scan) != END && scan < last ){ /* Peephole optimizer: */ - DEBUG_STUDYDATA(data,depth); + DEBUG_STUDYDATA("Peep:", data,depth); DEBUG_PEEP("Peep",scan,depth); JOIN_EXACT(scan,&min,0); @@ -2362,12 +2428,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* The principal pseudo-switch. Cannot be a switch, since we look into several different things. */ if (OP(scan) == BRANCH || OP(scan) == BRANCHJ - || OP(scan) == IFTHEN || OP(scan) == SUSPEND) { + || OP(scan) == IFTHEN) { next = regnext(scan); code = OP(scan); /* demq: the op(next)==code check is to see if we have "branch-branch" AFAICT */ - if (OP(next) == code || code == IFTHEN || code == SUSPEND) { + if (OP(next) == code || code == IFTHEN) { /* NOTE - There is similar code to this block below for handling TRIE nodes on a re-study. If you change stuff here check there too. */ @@ -2375,8 +2441,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, struct regnode_charclass_class accum; regnode * const startbranch=scan; - if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */ - scan_commit(pRExC_state, data, minlenp); /* Cannot merge strings after this. */ + if (flags & SCF_DO_SUBSTR) + SCAN_COMMIT(pRExC_state, data, minlenp); /* Cannot merge strings after this. */ if (flags & SCF_DO_STCLASS) cl_init_zero(pRExC_state, &accum); @@ -2392,6 +2458,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } else data_fake.last_closep = &fake; + + data_fake.pos_delta = delta; next = regnext(scan); scan = NEXTOPER(scan); if (code != BRANCH) @@ -2400,7 +2468,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, cl_init(pRExC_state, &this_class); data_fake.start_class = &this_class; f = SCF_DO_STCLASS_AND; - } + } if (flags & SCF_WHILEM_VISITED_POS) f |= SCF_WHILEM_VISITED_POS; @@ -2431,8 +2499,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } if (flags & SCF_DO_STCLASS) cl_or(pRExC_state, &accum, &this_class); - if (code == SUSPEND) - break; } if (code == IFTHEN && num < 2) /* Empty ELSE branch */ min1 = 0; @@ -2671,6 +2737,63 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } else /* single branch is optimized. */ scan = NEXTOPER(scan); continue; + } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB || OP(scan) == GOSTART) { + scan_frame *newframe = NULL; + I32 paren; + regnode *start; + regnode *end; + + if (OP(scan) != SUSPEND) { + /* set the pointer */ + if (OP(scan) == GOSUB) { + paren = ARG(scan); + RExC_recurse[ARG2L(scan)] = scan; + start = RExC_open_parens[paren-1]; + end = RExC_close_parens[paren-1]; + } else { + paren = 0; + start = RExC_rxi->program + 1; + end = RExC_opend; + } + if (!recursed) { + Newxz(recursed, (((RExC_npar)>>3) +1), U8); + SAVEFREEPV(recursed); + } + if (!PAREN_TEST(recursed,paren+1)) { + PAREN_SET(recursed,paren+1); + Newx(newframe,1,scan_frame); + } else { + if (flags & SCF_DO_SUBSTR) { + SCAN_COMMIT(pRExC_state,data,minlenp); + data->longest = &(data->longest_float); + } + is_inf = is_inf_internal = 1; + if (flags & SCF_DO_STCLASS_OR) /* Allow everything */ + cl_anything(pRExC_state, data->start_class); + flags &= ~SCF_DO_STCLASS; + } + } else { + Newx(newframe,1,scan_frame); + paren = stopparen; + start = scan+2; + end = regnext(scan); + } + if (newframe) { + assert(start); + assert(end); + SAVEFREEPV(newframe); + newframe->next = regnext(scan); + newframe->last = last; + newframe->stop = stopparen; + newframe->prev = frame; + + frame = newframe; + scan = start; + stopparen = paren; + last = end; + + continue; + } } else if (OP(scan) == EXACT) { I32 l = STR_LEN(scan); @@ -2743,7 +2866,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* Search for fixed substrings supports EXACT only. */ if (flags & SCF_DO_SUBSTR) { assert(data); - scan_commit(pRExC_state, data, minlenp); + SCAN_COMMIT(pRExC_state, data, minlenp); } if (UTF) { const U8 * const s = (U8 *)STRING(scan); @@ -2822,7 +2945,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, is_inf = is_inf_internal = 1; scan = regnext(scan); if (flags & SCF_DO_SUBSTR) { - scan_commit(pRExC_state, data, minlenp); /* Cannot extend fixed substrings */ + SCAN_COMMIT(pRExC_state, data, minlenp); /* Cannot extend fixed substrings */ data->longest = &(data->longest_float); } goto optimize_curly_tail; @@ -2845,7 +2968,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, next_is_eval = (OP(scan) == EVAL); do_curly: if (flags & SCF_DO_SUBSTR) { - if (mincount == 0) scan_commit(pRExC_state,data,minlenp); /* Cannot extend fixed substrings */ + if (mincount == 0) SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot extend fixed substrings */ pos_before = data->pos_min; } if (data) { @@ -3116,7 +3239,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (mincount != maxcount) { /* Cannot extend fixed substrings found inside the group. */ - scan_commit(pRExC_state,data,minlenp); + SCAN_COMMIT(pRExC_state,data,minlenp); if (mincount && last_str) { SV * const sv = data->last_found; MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ? @@ -3148,7 +3271,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, continue; default: /* REF and CLUMP only? */ if (flags & SCF_DO_SUBSTR) { - scan_commit(pRExC_state,data,minlenp); /* Cannot expect anything... */ + SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect anything... */ data->longest = &(data->longest_float); } is_inf = is_inf_internal = 1; @@ -3162,7 +3285,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, int value = 0; if (flags & SCF_DO_SUBSTR) { - scan_commit(pRExC_state,data,minlenp); + SCAN_COMMIT(pRExC_state,data,minlenp); data->pos_min++; } min++; @@ -3386,6 +3509,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } else data_fake.last_closep = &fake; + data_fake.pos_delta = delta; if ( flags & SCF_DO_STCLASS && !scan->flags && OP(scan) == IFMATCH ) { /* Lookahead */ cl_init(pRExC_state, &intrnl); @@ -3400,10 +3524,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, last, &data_fake, stopparen, recursed, NULL, f, depth+1); if (scan->flags) { if (deltanext) { - vFAIL("Variable length lookbehind not implemented"); + FAIL("Variable length lookbehind not implemented"); } else if (minnext > (I32)U8_MAX) { - vFAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX); + FAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX); } scan->flags = (U8)minnext; } @@ -3450,13 +3574,14 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if ((flags & SCF_DO_SUBSTR) && data->last_found) { f |= SCF_DO_SUBSTR; if (scan->flags) - scan_commit(pRExC_state, &data_fake,minlenp); + SCAN_COMMIT(pRExC_state, &data_fake,minlenp); data_fake.last_found=newSVsv(data->last_found); } } else data_fake.last_closep = &fake; data_fake.flags = 0; + data_fake.pos_delta = delta; if (is_inf) data_fake.flags |= SF_IS_INF; if ( flags & SCF_DO_STCLASS && !scan->flags @@ -3474,10 +3599,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, last, &data_fake, stopparen, recursed, NULL, f,depth+1); if (scan->flags) { if (deltanext) { - vFAIL("Variable length lookbehind not implemented"); + FAIL("Variable length lookbehind not implemented"); } else if (*minnextp > (I32)U8_MAX) { - vFAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX); + FAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX); } scan->flags = (U8)*minnextp; } @@ -3500,7 +3625,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) { if (RExC_rx->minlen<*minnextp) RExC_rx->minlen=*minnextp; - scan_commit(pRExC_state, &data_fake, minnextp); + SCAN_COMMIT(pRExC_state, &data_fake, minnextp); SvREFCNT_dec(data_fake.last_found); if ( data_fake.minlen_fixed != minlenp ) @@ -3540,68 +3665,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (data) *(data->last_closep) = ARG(scan); } - else if (OP(scan) == GOSUB || OP(scan) == GOSTART) { - /* set the pointer */ - I32 paren; - regnode *start; - regnode *end; - if (OP(scan) == GOSUB) { - paren = ARG(scan); - RExC_recurse[ARG2L(scan)] = scan; - start = RExC_open_parens[paren-1]; - end = RExC_close_parens[paren-1]; - } else { - paren = 0; - start = RExC_rx->program + 1; - end = RExC_opend; - } - assert(start); - assert(end); - if (!recursed) { - Newxz(recursed, (((RExC_npar)>>3) +1), U8); - SAVEFREEPV(recursed); - } - if (!PAREN_TEST(recursed,paren+1)) { - I32 deltanext = 0; - PAREN_SET(recursed,paren+1); - - DEBUG_PEEP("goto",start,depth); - min += study_chunk( - pRExC_state, - &start, - minlenp, - &deltanext, - end+1, - data, - paren, - recursed, - and_withp, - flags,depth+1); - delta+=deltanext; - if (deltanext == I32_MAX) { - is_inf = is_inf_internal = 1; - delta=deltanext; - } - DEBUG_PEEP("rtrn",end,depth); - PAREN_UNSET(recursed,paren+1); - } else { - if (flags & SCF_DO_SUBSTR) { - scan_commit(pRExC_state,data,minlenp); - data->longest = &(data->longest_float); - } - is_inf = is_inf_internal = 1; - if (flags & SCF_DO_STCLASS_OR) /* Allow everything */ - cl_anything(pRExC_state, data->start_class); - flags &= ~SCF_DO_STCLASS; - } - } else if (OP(scan) == EVAL) { if (data) data->flags |= SF_HAS_EVAL; } else if ( PL_regkind[OP(scan)] == ENDLIKE ) { if (flags & SCF_DO_SUBSTR) { - scan_commit(pRExC_state,data,minlenp); + SCAN_COMMIT(pRExC_state,data,minlenp); flags &= ~SCF_DO_SUBSTR; } if (data && OP(scan)==ACCEPT) { @@ -3613,7 +3683,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */ { if (flags & SCF_DO_SUBSTR) { - scan_commit(pRExC_state,data,minlenp); + SCAN_COMMIT(pRExC_state,data,minlenp); data->longest = &(data->longest_float); } is_inf = is_inf_internal = 1; @@ -3621,6 +3691,19 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, cl_anything(pRExC_state, data->start_class); flags &= ~SCF_DO_STCLASS; } + else if (OP(scan) == GPOS) { + if (!(RExC_rx->extflags & RXf_GPOS_FLOAT) && + !(delta || is_inf || (data && data->pos_delta))) + { + if (!(RExC_rx->extflags & RXf_ANCH) && (flags & SCF_DO_SUBSTR)) + RExC_rx->extflags |= RXf_ANCH_GPOS; + if (RExC_rx->gofs < (U32)min) + RExC_rx->gofs = min; + } else { + RExC_rx->extflags |= RXf_GPOS_FLOAT; + RExC_rx->gofs = 0; + } + } #ifdef TRIE_STUDY_OPT #ifdef FULL_TRIE_STUDY else if (PL_regkind[OP(scan)] == TRIE) { @@ -3629,12 +3712,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, check there too. */ regnode *trie_node= scan; regnode *tail= regnext(scan); - reg_trie_data *trie = (reg_trie_data*)RExC_rx->data->data[ ARG(scan) ]; + reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ]; I32 max1 = 0, min1 = I32_MAX; struct regnode_charclass_class accum; if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */ - scan_commit(pRExC_state, data,minlenp); /* Cannot merge strings after this. */ + SCAN_COMMIT(pRExC_state, data,minlenp); /* Cannot merge strings after this. */ if (flags & SCF_DO_STCLASS) cl_init_zero(pRExC_state, &accum); @@ -3657,7 +3740,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } else data_fake.last_closep = &fake; - + data_fake.pos_delta = delta; if (flags & SCF_DO_STCLASS) { cl_init(pRExC_state, &this_class); data_fake.start_class = &this_class; @@ -3744,14 +3827,14 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } #else else if (PL_regkind[OP(scan)] == TRIE) { - reg_trie_data *trie = (reg_trie_data*)RExC_rx->data->data[ ARG(scan) ]; + reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ]; U8*bang=NULL; min += trie->minlen; delta += (trie->maxlen - trie->minlen); flags &= ~SCF_DO_STCLASS; /* xxx */ if (flags & SCF_DO_SUBSTR) { - scan_commit(pRExC_state,data,minlenp); /* Cannot expect anything... */ + SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect anything... */ data->pos_min += trie->minlen; data->pos_delta += (trie->maxlen - trie->minlen); if (trie->maxlen != trie->minlen) @@ -3765,8 +3848,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* Else: zero-length, ignore. */ scan = regnext(scan); } + if (frame) { + last = frame->last; + scan = frame->next; + stopparen = frame->stop; + frame = frame->prev; + goto fake_study_recurse; + } finish: + assert(!frame); + DEBUG_STUDYDATA("pre-fin:",data,depth); + *scanp = scan; *deltap = is_inf_internal ? I32_MAX : delta; if (flags & SCF_DO_SUBSTR && is_inf) @@ -3786,32 +3879,29 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (flags & SCF_TRIE_RESTUDY) data->flags |= SCF_TRIE_RESTUDY; - DEBUG_STUDYDATA(data,depth); + DEBUG_STUDYDATA("post-fin:",data,depth); return min < stopmin ? min : stopmin; } -STATIC I32 -S_add_data(RExC_state_t *pRExC_state, I32 n, const char *s) +STATIC U32 +S_add_data(RExC_state_t *pRExC_state, U32 n, const char *s) { - if (RExC_rx->data) { - const U32 count = RExC_rx->data->count; - Renewc(RExC_rx->data, - sizeof(*RExC_rx->data) + sizeof(void*) * (count + n - 1), - char, struct reg_data); - Renew(RExC_rx->data->what, count + n, U8); - RExC_rx->data->count += n; - } - else { - Newxc(RExC_rx->data, sizeof(*RExC_rx->data) + sizeof(void*) * (n - 1), - char, struct reg_data); - Newx(RExC_rx->data->what, n, U8); - RExC_rx->data->count = n; - } - Copy(s, RExC_rx->data->what + RExC_rx->data->count - n, n, U8); - return RExC_rx->data->count - n; + U32 count = RExC_rxi->data ? RExC_rxi->data->count : 0; + + Renewc(RExC_rxi->data, + sizeof(*RExC_rxi->data) + sizeof(void*) * (count + n - 1), + char, struct reg_data); + if(count) + Renew(RExC_rxi->data->what, count + n, U8); + else + Newx(RExC_rxi->data->what, n, U8); + RExC_rxi->data->count = count + n; + Copy(s, RExC_rxi->data->what + count, n, U8); + return count; } +/*XXX: todo make this not included in a non debugging perl */ #ifndef PERL_IN_XSUB_RE void Perl_reginitcolors(pTHX) @@ -3905,6 +3995,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) #endif BEGIN_BLOCK register regexp *r; + register regexp_internal *ri; regnode *scan; regnode *first; I32 flags; @@ -3945,7 +4036,6 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) RExC_end = xend; RExC_naughty = 0; RExC_npar = 1; - RExC_cpar = 1; RExC_nestroot = 0; RExC_size = 0L; RExC_emit = &PL_regdummy; @@ -3992,32 +4082,28 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) /* Allocate space and zero-initialize. Note, the two step process of zeroing when in debug mode, thus anything assigned has to happen after that */ - Newxc(r, sizeof(regexp) + (unsigned)RExC_size * sizeof(regnode), - char, regexp); - if (r == NULL) + Newxz(r, 1, regexp); + Newxc(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode), + char, regexp_internal); + if ( r == NULL || ri == NULL ) FAIL("Regexp out of space"); #ifdef DEBUGGING /* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */ - Zero(r, sizeof(regexp) + (unsigned)RExC_size * sizeof(regnode), char); + Zero(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode), char); +#else + /* bulk initialize base fields with 0. */ + Zero(ri, sizeof(regexp_internal), char); #endif - /* initialization begins here */ + + /* non-zero initialization begins here */ + RXi_SET( r, ri ); r->engine= RE_ENGINE_PTR; r->refcnt = 1; r->prelen = xend - exp; r->precomp = savepvn(RExC_precomp, r->prelen); - r->subbeg = NULL; -#ifdef PERL_OLD_COPY_ON_WRITE - r->saved_copy = NULL; -#endif - r->reganch = pm->op_pmflags & PMf_COMPILETIME; + r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME; + r->intflags = 0; r->nparens = RExC_npar - 1; /* set early to validate backrefs */ - r->lastparen = 0; /* mg.c reads this. */ - - r->substrs = 0; /* Useful during FAIL. */ - r->startp = 0; /* Useful during FAIL. */ - r->endp = 0; - r->swap = NULL; - r->paren_names = 0; if (RExC_seen & REG_SEEN_RECURSE) { Newxz(RExC_open_parens, RExC_npar,regnode *); @@ -4027,16 +4113,17 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) } /* Useful during FAIL. */ - Newxz(r->offsets, 2*RExC_size+1, U32); /* MJD 20001228 */ - if (r->offsets) { - r->offsets[0] = RExC_size; + Newxz(ri->offsets, 2*RExC_size+1, U32); /* MJD 20001228 */ + if (ri->offsets) { + ri->offsets[0] = RExC_size; } DEBUG_OFFSETS_r(PerlIO_printf(Perl_debug_log, "%s %"UVuf" bytes for offset annotations.\n", - r->offsets ? "Got" : "Couldn't get", + ri->offsets ? "Got" : "Couldn't get", (UV)((2*RExC_size+1) * sizeof(U32)))); RExC_rx = r; + RExC_rxi = ri; /* Second pass: emit code. */ RExC_flags = pm->op_pmflags; /* don't let top level (?i) bleed */ @@ -4044,18 +4131,16 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) RExC_end = xend; RExC_naughty = 0; RExC_npar = 1; - RExC_cpar = 1; - RExC_emit_start = r->program; - RExC_emit = r->program; + RExC_emit_start = ri->program; + RExC_emit = ri->program; #ifdef DEBUGGING /* put a sentinal on the end of the program so we can check for overwrites */ - r->program[RExC_size].type = 255; + ri->program[RExC_size].type = 255; #endif /* Store the count of eval-groups for security checks: */ - RExC_emit->next_off = (RExC_seen_evals > (I32)U16_MAX) ? U16_MAX : (U16)RExC_seen_evals; + RExC_rx->seen_evals = RExC_seen_evals; REGC((U8)REG_MAGIC, (char*) RExC_emit++); - r->data = 0; if (reg(pRExC_state, 0, &flags,1) == NULL) return(NULL); @@ -4096,14 +4181,14 @@ reStudy: #endif /* Dig out information for optimizations. */ - r->reganch = pm->op_pmflags & PMf_COMPILETIME; /* Again? */ + r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME; /* Again? */ pm->op_pmflags = RExC_flags; if (UTF) - r->reganch |= ROPT_UTF8; /* Unicode in it? */ - r->regstclass = NULL; + r->extflags |= RXf_UTF8; /* Unicode in it? */ + ri->regstclass = NULL; if (RExC_naughty >= 10) /* Probably an expensive pattern. */ - r->reganch |= ROPT_NAUGHTY; - scan = r->program + 1; /* First BRANCH. */ + r->intflags |= PREGf_NAUGHTY; + scan = ri->program + 1; /* First BRANCH. */ /* testing for BRANCH here tells us whether there is "must appear" data in the pattern. If there is then we can use it for optimisations */ @@ -4146,66 +4231,68 @@ reStudy: if (OP(first) == EXACT) NOOP; /* Empty, get anchored substr later. */ else if ((OP(first) == EXACTF || OP(first) == EXACTFL)) - r->regstclass = first; + ri->regstclass = first; } #ifdef TRIE_STCLASS else if (PL_regkind[OP(first)] == TRIE && - ((reg_trie_data *)r->data->data[ ARG(first) ])->minlen>0) + ((reg_trie_data *)ri->data->data[ ARG(first) ])->minlen>0) { regnode *trie_op; /* this can happen only on restudy */ if ( OP(first) == TRIE ) { - struct regnode_1 *trieop; - Newxz(trieop,1,struct regnode_1); + struct regnode_1 *trieop = + PerlMemShared_calloc(1, sizeof(struct regnode_1)); StructCopy(first,trieop,struct regnode_1); trie_op=(regnode *)trieop; } else { - struct regnode_charclass *trieop; - Newxz(trieop,1,struct regnode_charclass); + struct regnode_charclass *trieop = + PerlMemShared_calloc(1, sizeof(struct regnode_charclass)); StructCopy(first,trieop,struct regnode_charclass); trie_op=(regnode *)trieop; } OP(trie_op)+=2; make_trie_failtable(pRExC_state, (regnode *)first, trie_op, 0); - r->regstclass = trie_op; + ri->regstclass = trie_op; } #endif else if (strchr((const char*)PL_simple,OP(first))) - r->regstclass = first; + ri->regstclass = first; else if (PL_regkind[OP(first)] == BOUND || PL_regkind[OP(first)] == NBOUND) - r->regstclass = first; + ri->regstclass = first; else if (PL_regkind[OP(first)] == BOL) { - r->reganch |= (OP(first) == MBOL - ? ROPT_ANCH_MBOL + r->extflags |= (OP(first) == MBOL + ? RXf_ANCH_MBOL : (OP(first) == SBOL - ? ROPT_ANCH_SBOL - : ROPT_ANCH_BOL)); + ? RXf_ANCH_SBOL + : RXf_ANCH_BOL)); first = NEXTOPER(first); goto again; } else if (OP(first) == GPOS) { - r->reganch |= ROPT_ANCH_GPOS; + r->extflags |= RXf_ANCH_GPOS; first = NEXTOPER(first); goto again; } - else if (!sawopen && (OP(first) == STAR && + else if ((!sawopen || !RExC_sawback) && + (OP(first) == STAR && PL_regkind[OP(NEXTOPER(first))] == REG_ANY) && - !(r->reganch & ROPT_ANCH) ) + !(r->extflags & RXf_ANCH) && !(RExC_seen & REG_SEEN_EVAL)) { /* turn .* into ^.* with an implied $*=1 */ const int type = (OP(NEXTOPER(first)) == REG_ANY) - ? ROPT_ANCH_MBOL - : ROPT_ANCH_SBOL; - r->reganch |= type | ROPT_IMPLICIT; + ? RXf_ANCH_MBOL + : RXf_ANCH_SBOL; + r->extflags |= type; + r->intflags |= PREGf_IMPLICIT; first = NEXTOPER(first); goto again; } if (sawplus && (!sawopen || !RExC_sawback) && !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */ /* x+ must match at the 1st pos of run of x's */ - r->reganch |= ROPT_SKIP; + r->intflags |= PREGf_SKIP; /* Scan is after the zeroth branch, first is atomic matcher. */ #ifdef TRIE_STUDY_OPT @@ -4239,7 +4326,7 @@ reStudy: data.last_found = newSVpvs(""); data.longest = &(data.longest_fixed); first = scan; - if (!r->regstclass) { + if (!ri->regstclass) { cl_init(pRExC_state, &ch_class); data.start_class = &ch_class; stclass_flag = SCF_DO_STCLASS_AND; @@ -4258,9 +4345,10 @@ reStudy: if ( RExC_npar == 1 && data.longest == &(data.longest_fixed) && data.last_start_min == 0 && data.last_end > 0 && !RExC_seen_zerolen - && (!(RExC_seen & REG_SEEN_GPOS) || (r->reganch & ROPT_ANCH_GPOS))) - r->reganch |= ROPT_CHECK_ALL; - scan_commit(pRExC_state, &data,&minlen); + && !(RExC_seen & REG_SEEN_VERBARG) + && (!(RExC_seen & REG_SEEN_GPOS) || (r->extflags & RXf_ANCH_GPOS))) + r->extflags |= RXf_CHECK_ALL; + scan_commit(pRExC_state, &data,&minlen,0); SvREFCNT_dec(data.last_found); /* Note that code very similar to this but for anchored string @@ -4271,7 +4359,7 @@ reStudy: if (longest_float_length || (data.flags & SF_FL_BEFORE_EOL && (!(data.flags & SF_FL_BEFORE_MEOL) - || (RExC_flags & PMf_MULTILINE)))) + || (RExC_flags & RXf_PMf_MULTILINE)))) { I32 t,ml; @@ -4305,7 +4393,7 @@ reStudy: t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */ && (!(data.flags & SF_FL_BEFORE_MEOL) - || (RExC_flags & PMf_MULTILINE))); + || (RExC_flags & RXf_PMf_MULTILINE))); fbm_compile(data.longest_float, t ? FBMcf_TAIL : 0); } else { @@ -4323,7 +4411,7 @@ reStudy: if (longest_fixed_length || (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */ && (!(data.flags & SF_FIX_BEFORE_MEOL) - || (RExC_flags & PMf_MULTILINE)))) + || (RExC_flags & RXf_PMf_MULTILINE)))) { I32 t,ml; @@ -4349,7 +4437,7 @@ reStudy: t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */ && (!(data.flags & SF_FIX_BEFORE_MEOL) - || (RExC_flags & PMf_MULTILINE))); + || (RExC_flags & RXf_PMf_MULTILINE))); fbm_compile(data.longest_fixed, t ? FBMcf_TAIL : 0); } else { @@ -4357,23 +4445,23 @@ reStudy: SvREFCNT_dec(data.longest_fixed); longest_fixed_length = 0; } - if (r->regstclass - && (OP(r->regstclass) == REG_ANY || OP(r->regstclass) == SANY)) - r->regstclass = NULL; + if (ri->regstclass + && (OP(ri->regstclass) == REG_ANY || OP(ri->regstclass) == SANY)) + ri->regstclass = NULL; if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset) && stclass_flag && !(data.start_class->flags & ANYOF_EOS) && !cl_is_anything(data.start_class)) { - const I32 n = add_data(pRExC_state, 1, "f"); + const U32 n = add_data(pRExC_state, 1, "f"); - Newx(RExC_rx->data->data[n], 1, + Newx(RExC_rxi->data->data[n], 1, struct regnode_charclass_class); StructCopy(data.start_class, - (struct regnode_charclass_class*)RExC_rx->data->data[n], + (struct regnode_charclass_class*)RExC_rxi->data->data[n], struct regnode_charclass_class); - r->regstclass = (regnode*)RExC_rx->data->data[n]; - r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */ + ri->regstclass = (regnode*)RExC_rxi->data->data[n]; + r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ DEBUG_COMPILE_r({ SV *sv = sv_newmortal(); regprop(r, sv, (regnode*)data.start_class); PerlIO_printf(Perl_debug_log, @@ -4387,8 +4475,8 @@ reStudy: r->check_substr = r->anchored_substr; r->check_utf8 = r->anchored_utf8; r->check_offset_min = r->check_offset_max = r->anchored_offset; - if (r->reganch & ROPT_ANCH_SINGLE) - r->reganch |= ROPT_NOSCAN; + if (r->extflags & RXf_ANCH_SINGLE) + r->extflags |= RXf_NOSCAN; } else { r->check_end_shift = r->float_end_shift; @@ -4399,10 +4487,10 @@ reStudy: } /* XXXX Currently intuiting is not compatible with ANCH_GPOS. This should be changed ASAP! */ - if ((r->check_substr || r->check_utf8) && !(r->reganch & ROPT_ANCH_GPOS)) { - r->reganch |= RE_USE_INTUIT; + if ((r->check_substr || r->check_utf8) && !(r->extflags & RXf_ANCH_GPOS)) { + r->extflags |= RXf_USE_INTUIT; if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8)) - r->reganch |= RE_INTUIT_TAIL; + r->extflags |= RXf_INTUIT_TAIL; } /* XXX Unneeded? dmq (shouldn't as this is handled elsewhere) if ( (STRLEN)minlen < longest_float_length ) @@ -4419,7 +4507,7 @@ reStudy: DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "\nMulti Top Level\n")); - scan = r->program + 1; + scan = ri->program + 1; cl_init(pRExC_state, &ch_class); data.start_class = &ch_class; data.last_closep = &last_close; @@ -4435,15 +4523,15 @@ reStudy: if (!(data.start_class->flags & ANYOF_EOS) && !cl_is_anything(data.start_class)) { - const I32 n = add_data(pRExC_state, 1, "f"); + const U32 n = add_data(pRExC_state, 1, "f"); - Newx(RExC_rx->data->data[n], 1, + Newx(RExC_rxi->data->data[n], 1, struct regnode_charclass_class); StructCopy(data.start_class, - (struct regnode_charclass_class*)RExC_rx->data->data[n], + (struct regnode_charclass_class*)RExC_rxi->data->data[n], struct regnode_charclass_class); - r->regstclass = (regnode*)RExC_rx->data->data[n]; - r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */ + ri->regstclass = (regnode*)RExC_rxi->data->data[n]; + r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ DEBUG_COMPILE_r({ SV* sv = sv_newmortal(); regprop(r, sv, (regnode*)data.start_class); PerlIO_printf(Perl_debug_log, @@ -4456,24 +4544,24 @@ reStudy: the "real" pattern. */ DEBUG_OPTIMISE_r({ PerlIO_printf(Perl_debug_log,"minlen: %"IVdf" r->minlen:%"IVdf"\n", - minlen, r->minlen); + (IV)minlen, (IV)r->minlen); }); r->minlenret = minlen; if (r->minlen < minlen) r->minlen = minlen; if (RExC_seen & REG_SEEN_GPOS) - r->reganch |= ROPT_GPOS_SEEN; + r->extflags |= RXf_GPOS_SEEN; if (RExC_seen & REG_SEEN_LOOKBEHIND) - r->reganch |= ROPT_LOOKBEHIND_SEEN; + r->extflags |= RXf_LOOKBEHIND_SEEN; if (RExC_seen & REG_SEEN_EVAL) - r->reganch |= ROPT_EVAL_SEEN; + r->extflags |= RXf_EVAL_SEEN; if (RExC_seen & REG_SEEN_CANY) - r->reganch |= ROPT_CANY_SEEN; + r->extflags |= RXf_CANY_SEEN; if (RExC_seen & REG_SEEN_VERBARG) - r->reganch |= ROPT_VERBARG_SEEN; + r->intflags |= PREGf_VERBARG_SEEN; if (RExC_seen & REG_SEEN_CUTGROUP) - r->reganch |= ROPT_CUTGROUP_SEEN; + r->intflags |= PREGf_CUTGROUP_SEEN; if (RExC_paren_names) r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names); else @@ -4493,15 +4581,15 @@ reStudy: PerlIO_printf(Perl_debug_log,"Final program:\n"); regdump(r); }); - DEBUG_OFFSETS_r(if (r->offsets) { - const U32 len = r->offsets[0]; + DEBUG_OFFSETS_r(if (ri->offsets) { + const U32 len = ri->offsets[0]; U32 i; GET_RE_DEBUG_FLAGS_DECL; - PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)r->offsets[0]); + PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)ri->offsets[0]); for (i = 1; i <= len; i++) { - if (r->offsets[i*2-1] || r->offsets[i*2]) + if (ri->offsets[i*2-1] || ri->offsets[i*2]) PerlIO_printf(Perl_debug_log, "%"UVuf":%"UVuf"[%"UVuf"] ", - (UV)i, (UV)r->offsets[i*2-1], (UV)r->offsets[i*2]); + (UV)i, (UV)ri->offsets[i*2-1], (UV)ri->offsets[i*2]); } PerlIO_printf(Perl_debug_log, "\n"); }); @@ -4791,7 +4879,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) if (start_arg) { SV *sv = newSVpvn( start_arg, RExC_parse - start_arg); ARG(ret) = add_data( pRExC_state, 1, "S" ); - RExC_rx->data->data[ARG(ret)]=(void*)sv; + RExC_rxi->data->data[ARG(ret)]=(void*)sv; ret->flags = 0; } else { ret->flags = 1; @@ -5002,7 +5090,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) /* FALL THROUGH */ case '{': /* (?{...}) */ { - I32 count = 1, n = 0; + I32 count = 1; + U32 n = 0; char c; char *s = RExC_parse; @@ -5037,9 +5126,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) LEAVE; n = add_data(pRExC_state, 3, "nop"); - RExC_rx->data->data[n] = (void*)rop; - RExC_rx->data->data[n+1] = (void*)sop; - RExC_rx->data->data[n+2] = (void*)pad; + RExC_rxi->data->data[n] = (void*)rop; + RExC_rxi->data->data[n+1] = (void*)sop; + RExC_rxi->data->data[n+2] = (void*)pad; SvREFCNT_dec(sv); } else { /* First pass */ @@ -5091,7 +5180,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) { char ch = RExC_parse[0] == '<' ? '>' : '\''; char *name_start= RExC_parse++; - I32 num = 0; + U32 num = 0; SV *sv_dat=reg_scan_name(pRExC_state, SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA); if (RExC_parse == name_start || *RExC_parse != ch) @@ -5100,7 +5189,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) RExC_parse++; if (!SIZE_ONLY) { num = add_data( pRExC_state, 1, "S" ); - RExC_rx->data->data[num]=(void*)sv_dat; + RExC_rxi->data->data[num]=(void*)sv_dat; SvREFCNT_inc(sv_dat); } ret = reganode(pRExC_state,NGROUPP,num); @@ -5332,7 +5421,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) ender = reg_node(pRExC_state, TAIL); break; case 1: - RExC_cpar++; ender = reganode(pRExC_state, CLOSE, parno); if (!SIZE_ONLY && RExC_seen & REG_SEEN_RECURSE) { DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log, @@ -6009,9 +6097,9 @@ tryagain: case '^': RExC_seen_zerolen++; nextchar(pRExC_state); - if (RExC_flags & PMf_MULTILINE) + if (RExC_flags & RXf_PMf_MULTILINE) ret = reg_node(pRExC_state, MBOL); - else if (RExC_flags & PMf_SINGLELINE) + else if (RExC_flags & RXf_PMf_SINGLELINE) ret = reg_node(pRExC_state, SBOL); else ret = reg_node(pRExC_state, BOL); @@ -6021,9 +6109,9 @@ tryagain: nextchar(pRExC_state); if (*RExC_parse) RExC_seen_zerolen++; - if (RExC_flags & PMf_MULTILINE) + if (RExC_flags & RXf_PMf_MULTILINE) ret = reg_node(pRExC_state, MEOL); - else if (RExC_flags & PMf_SINGLELINE) + else if (RExC_flags & RXf_PMf_SINGLELINE) ret = reg_node(pRExC_state, SEOL); else ret = reg_node(pRExC_state, EOL); @@ -6031,7 +6119,7 @@ tryagain: break; case '.': nextchar(pRExC_state); - if (RExC_flags & PMf_SINGLELINE) + if (RExC_flags & RXf_PMf_SINGLELINE) ret = reg_node(pRExC_state, SANY); else ret = reg_node(pRExC_state, REG_ANY); @@ -6237,7 +6325,7 @@ tryagain: goto defchar; } else { char* name_start = (RExC_parse += 2); - I32 num = 0; + U32 num = 0; SV *sv_dat = reg_scan_name(pRExC_state, SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA); ch= (ch == '<') ? '>' : '\''; @@ -6256,7 +6344,7 @@ tryagain: if (!SIZE_ONLY) { num = add_data( pRExC_state, 1, "S" ); ARG_SET(ret,num); - RExC_rx->data->data[num]=(void*)sv_dat; + RExC_rxi->data->data[num]=(void*)sv_dat; SvREFCNT_inc(sv_dat); } /* override incorrect value set in reganode MJD */ @@ -6277,36 +6365,45 @@ tryagain: case 'c': case '0': goto defchar; - case 'R': + case 'g': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { I32 num; - bool isrel=(*RExC_parse=='R'); - if (isrel) + bool isg = *RExC_parse == 'g'; + bool isrel = 0; + bool hasbrace = 0; + if (isg) { RExC_parse++; + if (*RExC_parse == '{') { + RExC_parse++; + hasbrace = 1; + } + if (*RExC_parse == '-') { + RExC_parse++; + isrel = 1; + } + } num = atoi(RExC_parse); if (isrel) { - num = RExC_cpar - num; + num = RExC_npar - num; if (num < 1) vFAIL("Reference to nonexistent or unclosed group"); } - if (num > 9 && num >= RExC_npar) + if (!isg && num > 9 && num >= RExC_npar) goto defchar; else { char * const parse_start = RExC_parse - 1; /* MJD */ while (isDIGIT(*RExC_parse)) RExC_parse++; - + if (hasbrace) { + if (*RExC_parse != '}') + vFAIL("Unterminated \\g{...} pattern"); + RExC_parse++; + } if (!SIZE_ONLY) { if (num > (I32)RExC_rx->nparens) vFAIL("Reference to nonexistent group"); - /* People make this error all the time apparently. - So we cant fail on it, even though we should - - else if (num >= RExC_cpar) - vFAIL("Reference to unclosed group will always match"); - */ } RExC_sawback = 1; ret = reganode(pRExC_state, @@ -6335,7 +6432,7 @@ tryagain: break; case '#': - if (RExC_flags & PMf_EXTENDED) { + if (RExC_flags & RXf_PMf_EXTENDED) { while (RExC_parse < RExC_end && *RExC_parse != '\n') RExC_parse++; if (RExC_parse < RExC_end) @@ -6366,7 +6463,7 @@ tryagain: { char * const oldp = p; - if (RExC_flags & PMf_EXTENDED) + if (RExC_flags & RXf_PMf_EXTENDED) p = regwhite(p, RExC_end); switch (*p) { case '^': @@ -6383,6 +6480,7 @@ tryagain: case 'C': case 'X': case 'G': + case 'g': case 'Z': case 'z': case 'w': @@ -6397,6 +6495,7 @@ tryagain: case 'P': case 'N': case 'R': + case 'k': --p; goto loopdone; case 'n': @@ -6502,7 +6601,7 @@ tryagain: ender = *p++; break; } - if (RExC_flags & PMf_EXTENDED) + if (RExC_flags & RXf_PMf_EXTENDED) p = regwhite(p, RExC_end); if (UTF && FOLD) { /* Prime the casefolded buffer. */ @@ -7636,7 +7735,7 @@ parseit: av_store(av, 2, (SV*)unicode_alternate); rv = newRV_noinc((SV*)av); n = add_data(pRExC_state, 1, "s"); - RExC_rx->data->data[n] = (void*)rv; + RExC_rxi->data->data[n] = (void*)rv; ARG_SET(ret, n); } return ret; @@ -7658,7 +7757,7 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state) RExC_parse++; continue; } - if (RExC_flags & PMf_EXTENDED) { + if (RExC_flags & RXf_PMf_EXTENDED) { if (isSPACE(*RExC_parse)) { RExC_parse++; continue; @@ -7978,10 +8077,10 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val, SV * const mysv_val=sv_newmortal(); DEBUG_PARSE_MSG(""); regprop(RExC_rx, mysv_val, val); - PerlIO_printf(Perl_debug_log, "~ attach to %s (%d) offset to %d\n", - SvPV_nolen_const(mysv_val), - REG_NODE_NUM(val), - val - scan + PerlIO_printf(Perl_debug_log, "~ attach to %s (%"IVdf") offset to %"IVdf"\n", + SvPV_nolen_const(mysv_val), + (IV)REG_NODE_NUM(val), + (IV)(val - scan) ); }); if (reg_off_by_arg[OP(scan)]) { @@ -8027,8 +8126,9 @@ Perl_regdump(pTHX_ const regexp *r) dVAR; SV * const sv = sv_newmortal(); SV *dsv= sv_newmortal(); + RXi_GET_DECL(r,ri); - (void)dumpuntil(r, r->program, r->program + 1, NULL, NULL, sv, 0, 0); + (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0); /* Header fields of interest. */ if (r->anchored_substr) { @@ -8067,37 +8167,37 @@ Perl_regdump(pTHX_ const regexp *r) (r->check_substr == r->float_substr && r->check_utf8 == r->float_utf8 ? "(checking floating" : "(checking anchored")); - if (r->reganch & ROPT_NOSCAN) + if (r->extflags & RXf_NOSCAN) PerlIO_printf(Perl_debug_log, " noscan"); - if (r->reganch & ROPT_CHECK_ALL) + if (r->extflags & RXf_CHECK_ALL) PerlIO_printf(Perl_debug_log, " isall"); if (r->check_substr || r->check_utf8) PerlIO_printf(Perl_debug_log, ") "); - if (r->regstclass) { - regprop(r, sv, r->regstclass); + if (ri->regstclass) { + regprop(r, sv, ri->regstclass); PerlIO_printf(Perl_debug_log, "stclass %s ", SvPVX_const(sv)); } - if (r->reganch & ROPT_ANCH) { + if (r->extflags & RXf_ANCH) { PerlIO_printf(Perl_debug_log, "anchored"); - if (r->reganch & ROPT_ANCH_BOL) + if (r->extflags & RXf_ANCH_BOL) PerlIO_printf(Perl_debug_log, "(BOL)"); - if (r->reganch & ROPT_ANCH_MBOL) + if (r->extflags & RXf_ANCH_MBOL) PerlIO_printf(Perl_debug_log, "(MBOL)"); - if (r->reganch & ROPT_ANCH_SBOL) + if (r->extflags & RXf_ANCH_SBOL) PerlIO_printf(Perl_debug_log, "(SBOL)"); - if (r->reganch & ROPT_ANCH_GPOS) + if (r->extflags & RXf_ANCH_GPOS) PerlIO_printf(Perl_debug_log, "(GPOS)"); PerlIO_putc(Perl_debug_log, ' '); } - if (r->reganch & ROPT_GPOS_SEEN) - PerlIO_printf(Perl_debug_log, "GPOS "); - if (r->reganch & ROPT_SKIP) + if (r->extflags & RXf_GPOS_SEEN) + PerlIO_printf(Perl_debug_log, "GPOS:%"UVuf" ", (UV)r->gofs); + if (r->intflags & PREGf_SKIP) PerlIO_printf(Perl_debug_log, "plus "); - if (r->reganch & ROPT_IMPLICIT) + if (r->intflags & PREGf_IMPLICIT) PerlIO_printf(Perl_debug_log, "implicit "); - PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen); - if (r->reganch & ROPT_EVAL_SEEN) + PerlIO_printf(Perl_debug_log, "minlen %"IVdf" ", (IV)r->minlen); + if (r->extflags & RXf_EVAL_SEEN) PerlIO_printf(Perl_debug_log, "with eval "); PerlIO_printf(Perl_debug_log, "\n"); #else @@ -8115,9 +8215,12 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) #ifdef DEBUGGING dVAR; register int k; + RXi_GET_DECL(prog,progi); GET_RE_DEBUG_FLAGS_DECL; + sv_setpvn(sv, "", 0); + if (OP(o) > REGNODE_MAX) /* regnode.type is unsigned */ /* It would be nice to FAIL() here, but this may be called from regexec.c, and it would be hard to supply pRExC_state. */ @@ -8142,15 +8245,14 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) Perl_sv_catpvf(aTHX_ sv, " %s", s ); } else if (k == TRIE) { /* print the details of the trie in dumpuntil instead, as - * prog->data isn't available here */ + * progi->data isn't available here */ const char op = OP(o); const I32 n = ARG(o); const reg_ac_data * const ac = IS_TRIE_AC(op) ? - (reg_ac_data *)prog->data->data[n] : + (reg_ac_data *)progi->data->data[n] : NULL; - const reg_trie_data * const trie = !IS_TRIE_AC(op) ? - (reg_trie_data*)prog->data->data[n] : - ac->trie; + const reg_trie_data * const trie + = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie]; Perl_sv_catpvf(aTHX_ sv, "-%s",reg_name[o->flags]); DEBUG_TRIE_COMPILE_r( @@ -8203,7 +8305,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) else if (k == VERB) { if (!o->flags) Perl_sv_catpvf(aTHX_ sv, ":%"SVf, - (SV*)prog->data->data[ ARG( o ) ]); + (SV*)progi->data->data[ ARG( o ) ]); } else if (k == LOGICAL) Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */ else if (k == ANYOF) { @@ -8382,36 +8484,32 @@ Perl_re_intuit_string(pTHX_ regexp *prog) } /* - pregfree - free a regexp + pregfree() - See regdupe below if you change anything here. + handles refcounting and freeing the perl core regexp structure. When + it is necessary to actually free the structure the first thing it + does is call the 'free' method of the regexp_engine associated to to + the regexp, allowing the handling of the void *pprivate; member + first. (This routine is not overridable by extensions, which is why + the extensions free is called first.) + + See regdupe and regdupe_internal if you change anything here. */ - +#ifndef PERL_IN_XSUB_RE void Perl_pregfree(pTHX_ struct regexp *r) { dVAR; - GET_RE_DEBUG_FLAGS_DECL; if (!r || (--r->refcnt > 0)) return; - DEBUG_COMPILE_r({ - if (!PL_colorset) - reginitcolors(); - { - SV *dsv= sv_newmortal(); - RE_PV_QUOTED_DECL(s, (r->reganch & ROPT_UTF8), - dsv, r->precomp, r->prelen, 60); - PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n", - PL_colors[4],PL_colors[5],s); - } - }); - + + CALLREGFREE_PVT(r); /* free the private data */ + /* gcov results gave these as non-null 100% of the time, so there's no optimisation in checking them before calling Safefree */ Safefree(r->precomp); - Safefree(r->offsets); /* 20010421 MJD */ RX_MATCH_COPY_FREE(r); #ifdef PERL_OLD_COPY_ON_WRITE if (r->saved_copy) @@ -8430,24 +8528,64 @@ Perl_pregfree(pTHX_ struct regexp *r) } if (r->paren_names) SvREFCNT_dec(r->paren_names); - if (r->data) { - int n = r->data->count; + + Safefree(r->startp); + Safefree(r->endp); + Safefree(r); +} +#endif + +/* regfree_internal() + + Free the private data in a regexp. This is overloadable by + extensions. Perl takes care of the regexp structure in pregfree(), + this covers the *pprivate pointer which technically perldoesnt + know about, however of course we have to handle the + regexp_internal structure when no extension is in use. + + Note this is called before freeing anything in the regexp + structure. + */ + +void +Perl_regfree_internal(pTHX_ struct regexp *r) +{ + dVAR; + RXi_GET_DECL(r,ri); + GET_RE_DEBUG_FLAGS_DECL; + + DEBUG_COMPILE_r({ + if (!PL_colorset) + reginitcolors(); + { + SV *dsv= sv_newmortal(); + RE_PV_QUOTED_DECL(s, (r->extflags & RXf_UTF8), + dsv, r->precomp, r->prelen, 60); + PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n", + PL_colors[4],PL_colors[5],s); + } + }); + + Safefree(ri->offsets); /* 20010421 MJD */ + if (ri->data) { + int n = ri->data->count; PAD* new_comppad = NULL; PAD* old_comppad; PADOFFSET refcnt; while (--n >= 0) { /* If you add a ->what type here, update the comment in regcomp.h */ - switch (r->data->what[n]) { + switch (ri->data->what[n]) { case 's': case 'S': - SvREFCNT_dec((SV*)r->data->data[n]); + case 'u': + SvREFCNT_dec((SV*)ri->data->data[n]); break; case 'f': - Safefree(r->data->data[n]); + Safefree(ri->data->data[n]); break; case 'p': - new_comppad = (AV*)r->data->data[n]; + new_comppad = (AV*)ri->data->data[n]; break; case 'o': if (new_comppad == NULL) @@ -8457,10 +8595,10 @@ Perl_pregfree(pTHX_ struct regexp *r) (SvTYPE(new_comppad) == SVt_PVAV) ? new_comppad : NULL ); OP_REFCNT_LOCK; - refcnt = OpREFCNT_dec((OP_4tree*)r->data->data[n]); + refcnt = OpREFCNT_dec((OP_4tree*)ri->data->data[n]); OP_REFCNT_UNLOCK; if (!refcnt) - op_free((OP_4tree*)r->data->data[n]); + op_free((OP_4tree*)ri->data->data[n]); PAD_RESTORE_LOCAL(old_comppad); SvREFCNT_dec((SV*)new_comppad); @@ -8472,17 +8610,16 @@ Perl_pregfree(pTHX_ struct regexp *r) { /* Aho Corasick add-on structure for a trie node. Used in stclass optimization only */ U32 refcount; - reg_ac_data *aho=(reg_ac_data*)r->data->data[n]; + reg_ac_data *aho=(reg_ac_data*)ri->data->data[n]; OP_REFCNT_LOCK; refcount = --aho->refcount; OP_REFCNT_UNLOCK; if ( !refcount ) { - Safefree(aho->states); - Safefree(aho->fail); - aho->trie=NULL; /* not necessary to free this as it is - handled by the 't' case */ - Safefree(r->data->data[n]); /* do this last!!!! */ - Safefree(r->regstclass); + PerlMemShared_free(aho->states); + PerlMemShared_free(aho->fail); + /* do this last!!!! */ + PerlMemShared_free(ri->data->data[n]); + PerlMemShared_free(ri->regstclass); } } break; @@ -8490,49 +8627,40 @@ Perl_pregfree(pTHX_ struct regexp *r) { /* trie structure. */ U32 refcount; - reg_trie_data *trie=(reg_trie_data*)r->data->data[n]; + reg_trie_data *trie=(reg_trie_data*)ri->data->data[n]; OP_REFCNT_LOCK; refcount = --trie->refcount; OP_REFCNT_UNLOCK; if ( !refcount ) { - Safefree(trie->charmap); - if (trie->widecharmap) - SvREFCNT_dec((SV*)trie->widecharmap); - Safefree(trie->states); - Safefree(trie->trans); + PerlMemShared_free(trie->charmap); + PerlMemShared_free(trie->states); + PerlMemShared_free(trie->trans); if (trie->bitmap) - Safefree(trie->bitmap); + PerlMemShared_free(trie->bitmap); if (trie->wordlen) - Safefree(trie->wordlen); + PerlMemShared_free(trie->wordlen); if (trie->jump) - Safefree(trie->jump); + PerlMemShared_free(trie->jump); if (trie->nextword) - Safefree(trie->nextword); -#ifdef DEBUGGING - if (trie->words) - SvREFCNT_dec((SV*)trie->words); - if (trie->revcharmap) - SvREFCNT_dec((SV*)trie->revcharmap); -#endif - Safefree(r->data->data[n]); /* do this last!!!! */ + PerlMemShared_free(trie->nextword); + /* do this last!!!! */ + PerlMemShared_free(ri->data->data[n]); } } break; default: - Perl_croak(aTHX_ "panic: regfree data code '%c'", r->data->what[n]); + Perl_croak(aTHX_ "panic: regfree data code '%c'", ri->data->what[n]); } } - Safefree(r->data->what); - Safefree(r->data); + Safefree(ri->data->what); + Safefree(ri->data); } - Safefree(r->startp); - Safefree(r->endp); - if (r->swap) { - Safefree(r->swap->startp); - Safefree(r->swap->endp); - Safefree(r->swap); + if (ri->swap) { + Safefree(ri->swap->startp); + Safefree(ri->swap->endp); + Safefree(ri->swap); } - Safefree(r); + Safefree(ri); } #define sv_dup_inc(s,t) SvREFCNT_inc(sv_dup(s,t)) @@ -8547,15 +8675,21 @@ Perl_pregfree(pTHX_ struct regexp *r) given regexp structure. It is a no-op when not under USE_ITHREADS. (Originally this *was* re_dup() for change history see sv.c) - See pregfree() above if you change anything here. + After all of the core data stored in struct regexp is duplicated + the regexp_engine.dupe method is used to copy any private data + stored in the *pprivate pointer. This allows extensions to handle + any duplication it needs to do. + + See pregfree() and regfree_internal() if you change anything here. */ #if defined(USE_ITHREADS) +#ifndef PERL_IN_XSUB_RE regexp * -Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) +Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param) { dVAR; - REGEXP *ret; - int i, len, npar; + regexp *ret; + int i, npar; struct reg_substr_datum *s; if (!r) @@ -8564,24 +8698,13 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) if ((ret = (REGEXP *)ptr_table_fetch(PL_ptr_table, r))) return ret; - len = r->offsets[0]; + npar = r->nparens+1; - - Newxc(ret, sizeof(regexp) + (len+1)*sizeof(regnode), char, regexp); - Copy(r->program, ret->program, len+1, regnode); - + Newxz(ret, 1, regexp); Newx(ret->startp, npar, I32); Copy(r->startp, ret->startp, npar, I32); Newx(ret->endp, npar, I32); - Copy(r->startp, ret->startp, npar, I32); - if(r->swap) { - Newx(ret->swap, 1, regexp_paren_ofs); - /* no need to copy these */ - Newx(ret->swap->startp, npar, I32); - Newx(ret->swap->endp, npar, I32); - } else { - ret->swap = NULL; - } + Copy(r->endp, ret->endp, npar, I32); Newx(ret->substrs, 1, struct reg_substr_data); for (s = ret->substrs->data, i = 0; i < 3; i++, s++) { @@ -8591,11 +8714,83 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) s->substr = sv_dup_inc(r->substrs->data[i].substr, param); s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param); } + - ret->regstclass = NULL; - if (r->data) { + ret->precomp = SAVEPVN(r->precomp, r->prelen); + ret->refcnt = r->refcnt; + ret->minlen = r->minlen; + ret->minlenret = r->minlenret; + ret->prelen = r->prelen; + ret->nparens = r->nparens; + ret->lastparen = r->lastparen; + ret->lastcloseparen = r->lastcloseparen; + ret->intflags = r->intflags; + ret->extflags = r->extflags; + + ret->sublen = r->sublen; + + ret->engine = r->engine; + + ret->paren_names = hv_dup_inc(r->paren_names, param); + + if (RX_MATCH_COPIED(ret)) + ret->subbeg = SAVEPVN(r->subbeg, r->sublen); + else + ret->subbeg = NULL; +#ifdef PERL_OLD_COPY_ON_WRITE + ret->saved_copy = NULL; +#endif + + ret->pprivate = r->pprivate; + RXi_SET(ret,CALLREGDUPE_PVT(ret,param)); + + ptr_table_store(PL_ptr_table, r, ret); + return ret; +} +#endif /* PERL_IN_XSUB_RE */ + +/* + regdupe_internal() + + This is the internal complement to regdupe() which is used to copy + the structure pointed to by the *pprivate pointer in the regexp. + This is the core version of the extension overridable cloning hook. + The regexp structure being duplicated will be copied by perl prior + to this and will be provided as the regexp *r argument, however + with the /old/ structures pprivate pointer value. Thus this routine + may override any copying normally done by perl. + + It returns a pointer to the new regexp_internal structure. +*/ + +void * +Perl_regdupe_internal(pTHX_ const regexp *r, CLONE_PARAMS *param) +{ + dVAR; + regexp_internal *reti; + int len, npar; + RXi_GET_DECL(r,ri); + + npar = r->nparens+1; + len = ri->offsets[0]; + + Newxc(reti, sizeof(regexp_internal) + (len+1)*sizeof(regnode), char, regexp_internal); + Copy(ri->program, reti->program, len+1, regnode); + + if(ri->swap) { + Newx(reti->swap, 1, regexp_paren_ofs); + /* no need to copy these */ + Newx(reti->swap->startp, npar, I32); + Newx(reti->swap->endp, npar, I32); + } else { + reti->swap = NULL; + } + + + reti->regstclass = NULL; + if (ri->data) { struct reg_data *d; - const int count = r->data->count; + const int count = ri->data->count; int i; Newxc(d, sizeof(struct reg_data) + count*sizeof(void *), @@ -8604,92 +8799,62 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) d->count = count; for (i = 0; i < count; i++) { - d->what[i] = r->data->what[i]; + d->what[i] = ri->data->what[i]; switch (d->what[i]) { - /* legal options are one of: sSfpont + /* legal options are one of: sSfpontTu see also regcomp.h and pregfree() */ case 's': case 'S': - d->data[i] = sv_dup_inc((SV *)r->data->data[i], param); - break; - case 'p': - d->data[i] = av_dup_inc((AV *)r->data->data[i], param); + case 'p': /* actually an AV, but the dup function is identical. */ + case 'u': /* actually an HV, but the dup function is identical. */ + d->data[i] = sv_dup_inc((SV *)ri->data->data[i], param); break; case 'f': /* This is cheating. */ Newx(d->data[i], 1, struct regnode_charclass_class); - StructCopy(r->data->data[i], d->data[i], + StructCopy(ri->data->data[i], d->data[i], struct regnode_charclass_class); - ret->regstclass = (regnode*)d->data[i]; + reti->regstclass = (regnode*)d->data[i]; break; case 'o': - /* Compiled op trees are readonly, and can thus be - shared without duplication. */ - OP_REFCNT_LOCK; - d->data[i] = (void*)OpREFCNT_inc((OP*)r->data->data[i]); - OP_REFCNT_UNLOCK; - break; - case 'n': - d->data[i] = r->data->data[i]; - break; - case 't': - d->data[i] = r->data->data[i]; + /* Compiled op trees are readonly and in shared memory, + and can thus be shared without duplication. */ OP_REFCNT_LOCK; - ((reg_trie_data*)d->data[i])->refcount++; + d->data[i] = (void*)OpREFCNT_inc((OP*)ri->data->data[i]); OP_REFCNT_UNLOCK; break; case 'T': - d->data[i] = r->data->data[i]; - OP_REFCNT_LOCK; - ((reg_ac_data*)d->data[i])->refcount++; - OP_REFCNT_UNLOCK; /* Trie stclasses are readonly and can thus be shared * without duplication. We free the stclass in pregfree * when the corresponding reg_ac_data struct is freed. */ - ret->regstclass= r->regstclass; + reti->regstclass= ri->regstclass; + /* Fall through */ + case 't': + OP_REFCNT_LOCK; + ((reg_trie_data*)ri->data->data[i])->refcount++; + OP_REFCNT_UNLOCK; + /* Fall through */ + case 'n': + d->data[i] = ri->data->data[i]; break; default: - Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", r->data->what[i]); + Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", ri->data->what[i]); } } - ret->data = d; + reti->data = d; } else - ret->data = NULL; - - Newx(ret->offsets, 2*len+1, U32); - Copy(r->offsets, ret->offsets, 2*len+1, U32); - - ret->precomp = SAVEPVN(r->precomp, r->prelen); - ret->refcnt = r->refcnt; - ret->minlen = r->minlen; - ret->minlenret = r->minlenret; - ret->prelen = r->prelen; - ret->nparens = r->nparens; - ret->lastparen = r->lastparen; - ret->lastcloseparen = r->lastcloseparen; - ret->reganch = r->reganch; - - ret->sublen = r->sublen; + reti->data = NULL; - ret->engine = r->engine; + Newx(reti->offsets, 2*len+1, U32); + Copy(ri->offsets, reti->offsets, 2*len+1, U32); - ret->paren_names = hv_dup_inc(r->paren_names, param); - - if (RX_MATCH_COPIED(ret)) - ret->subbeg = SAVEPVN(r->subbeg, r->sublen); - else - ret->subbeg = NULL; -#ifdef PERL_OLD_COPY_ON_WRITE - ret->saved_copy = NULL; -#endif - - ptr_table_store(PL_ptr_table, r, ret); - return ret; + return (void*)reti; } -#endif + +#endif /* USE_ITHREADS */ /* reg_stringify() @@ -8702,23 +8867,23 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) resulting string If flags is nonnull and the returned string contains UTF8 then - (flags & 1) will be true. + (*flags & 1) will be true. If haseval is nonnull then it is used to return whether the pattern contains evals. Normally called via macro: - CALLREG_STRINGIFY(mg,0,0); + CALLREG_STRINGIFY(mg,&len,&utf8); And internally with - CALLREG_AS_STR(mg,lp,flags,haseval) + CALLREG_AS_STR(mg,&lp,&flags,&haseval) See sv_2pv_flags() in sv.c for an example of internal usage. */ - +#ifndef PERL_IN_XSUB_RE char * Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) { dVAR; @@ -8731,7 +8896,7 @@ Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) { int left = 0; int right = 4; bool need_newline = 0; - U16 reganch = (U16)((re->reganch & PMf_COMPILETIME) >> 12); + U16 reganch = (U16)((re->extflags & RXf_PMf_COMPILETIME) >> 12); while((ch = *fptr++)) { if(reganch & 1) { @@ -8759,7 +8924,7 @@ Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) { * ourself. If we find a '\n' first (or if we don't find '#' or '\n'), * we don't need to add anything. -jfriedl */ - if (PMf_EXTENDED & re->reganch) { + if (PMf_EXTENDED & re->extflags) { const char *endptr = re->precomp + re->prelen; while (endptr >= re->precomp) { const char c = *(endptr--); @@ -8786,17 +8951,15 @@ Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) { mg->mg_ptr[mg->mg_len] = 0; } if (haseval) - *haseval = re->program[0].next_off; + *haseval = re->seen_evals; if (flags) - *flags = ((re->reganch & ROPT_UTF8) ? 1 : 0); + *flags = ((re->extflags & RXf_UTF8) ? 1 : 0); if (lp) *lp = mg->mg_len; return mg->mg_ptr; } - -#ifndef PERL_IN_XSUB_RE /* - regnext - dig the "next" pointer out of a node */ @@ -8806,7 +8969,7 @@ Perl_regnext(pTHX_ register regnode *p) dVAR; register I32 offset; - if (p == &PL_regdummy) + if (!p) return(NULL); offset = (reg_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p)); @@ -8927,7 +9090,7 @@ S_put_byte(pTHX_ SV *sv, int c) #define CLEAR_OPTSTART \ if (optstart) STMT_START { \ - DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log, " (%d nodes)\n", node - optstart)); \ + DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log, " (%"IVdf" nodes)\n", (IV)(node - optstart))); \ optstart=NULL; \ } STMT_END @@ -8942,6 +9105,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, register U8 op = PSEUDO; /* Arbitrary non-END op. */ register const regnode *next; const regnode *optstart= NULL; + RXi_GET_DECL(r,ri); GET_RE_DEBUG_FLAGS_DECL; #ifdef DEBUG_DUMPUNTIL @@ -8957,7 +9121,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, NODE_ALIGN(node); op = OP(node); - if (op == CLOSE) + if (op == CLOSE || op == WHILEM) indent--; next = regnext((regnode *)node); @@ -9007,16 +9171,18 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, const char op = OP(node); const I32 n = ARG(node); const reg_ac_data * const ac = op>=AHOCORASICK ? - (reg_ac_data *)r->data->data[n] : + (reg_ac_data *)ri->data->data[n] : NULL; - const reg_trie_data * const trie = opdata->data[n] : - ac->trie; + const reg_trie_data * const trie = + (reg_trie_data*)ri->data->data[optrie]; +#ifdef DEBUGGING + AV *const trie_words = (AV *) ri->data->data[n + TRIE_WORDS_OFFSET]; +#endif const regnode *nextbranch= NULL; I32 word_idx; sv_setpvn(sv, "", 0); for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) { - SV ** const elem_ptr = av_fetch(trie->words,word_idx,0); + SV ** const elem_ptr = av_fetch(trie_words,word_idx,0); PerlIO_printf(Perl_debug_log, "%*s%s ", (int)(2*(indent+3)), "", @@ -9030,8 +9196,8 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, ); if (trie->jump) { U16 dist= trie->jump[word_idx+1]; - PerlIO_printf(Perl_debug_log, "(%u)\n", - (dist ? this_trie + dist : next) - start); + PerlIO_printf(Perl_debug_log, "(%"UVuf")\n", + (UV)((dist ? this_trie + dist : next) - start)); if (dist) { if (!nextbranch) nextbranch= this_trie + trie->jump[0]; @@ -9076,12 +9242,10 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, } if (op == CURLYX || op == OPEN) indent++; - else if (op == WHILEM) - indent--; } CLEAR_OPTSTART; #ifdef DEBUG_DUMPUNTIL - PerlIO_printf(Perl_debug_log, "--- %d\n",indent); + PerlIO_printf(Perl_debug_log, "--- %d\n", (int)indent); #endif return node; }