X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=17ee6af0f722bc257686d2a46371ce39ab118466;hb=62703e7218aceb3f5d30f70a2307dd02e5eb8c63;hp=62c48dd8d0ab0336acbbd7240b549eb251c7332f;hpb=bfed75c6338f0ba740aa210f4b0267e39ca4662b;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index 62c48dd..17ee6af 100644 --- a/regexec.c +++ b/regexec.c @@ -98,7 +98,6 @@ #define RF_warned 2 /* warned about big count? */ #define RF_evaled 4 /* Did an EVAL with setting? */ #define RF_utf8 8 /* String contains multibyte chars? */ -#define RF_false 16 /* odd number of nested negatives */ #define UTF ((PL_reg_flags & RF_utf8) != 0) @@ -440,7 +439,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, "UTF-8 target...\n")); PerlIO_printf(Perl_debug_log, "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n", - PL_colors[4],PL_colors[5],PL_colors[0], + PL_colors[4], PL_colors[5], PL_colors[0], prog->precomp, PL_colors[1], (strlen(prog->precomp) > 60 ? "..." : ""), @@ -765,33 +764,33 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, */ strpos = t + 1; DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n", - PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset))); + PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset))); goto do_other_anchored; } /* We don't contradict the found floating substring. */ /* XXXX Why not check for STCLASS? */ s = t + 1; DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n", - PL_colors[0],PL_colors[1], (long)(s - i_strpos))); + PL_colors[0], PL_colors[1], (long)(s - i_strpos))); goto set_useful; } /* Position contradicts check-string */ /* XXXX probably better to look for check-string than for "\n", so one should lower the limit for t? */ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n", - PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos))); + PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos))); other_last = strpos = s = t + 1; goto restart; } t++; } DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n", - PL_colors[0],PL_colors[1])); + PL_colors[0], PL_colors[1])); goto fail_finish; } else { DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n", - PL_colors[0],PL_colors[1])); + PL_colors[0], PL_colors[1])); } s = t; set_useful: @@ -815,7 +814,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, } DEBUG_EXECUTE_r( if (ml_anch) PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n", - (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]); + (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]); ); success_at_start: if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */ @@ -858,8 +857,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, regstclass does not come from lookahead... */ /* If regstclass takes bytelength more than 1: If charlength==1, OK. This leaves EXACTF only, which is dealt with in find_byclass(). */ - U8* str = (U8*)STRING(prog->regstclass); - int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT + const U8* str = (U8*)STRING(prog->regstclass); + const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT ? CHR_DIST(str+STR_LEN(prog->regstclass), str) : 1); char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch) @@ -868,14 +867,13 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, ? HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend) : strend); - char *startpos = strbeg; t = s; cache_re(prog); - s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1); + s = find_byclass(prog, prog->regstclass, s, endpos, 1); if (!s) { #ifdef DEBUGGING - char *what = 0; + const char *what = 0; #endif if (endpos == strend) { DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, @@ -925,7 +923,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, goto giveup; DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "Looking for /%s^%s/m starting at offset %ld...\n", - PL_colors[0],PL_colors[1], (long)(t - i_strpos)) ); + PL_colors[0], PL_colors[1], (long)(t - i_strpos)) ); goto try_at_offset; } if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */ @@ -959,13 +957,13 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */ fail: DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n", - PL_colors[4],PL_colors[5])); + PL_colors[4], PL_colors[5])); return Nullch; } /* We know what class REx starts with. Try to find this position... */ STATIC char * -S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun) +S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, I32 norun) { I32 doevery = (prog->reganch & ROPT_SKIP) == 0; char *m; @@ -1744,7 +1742,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * reginitcolors(); PerlIO_printf(Perl_debug_log, "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n", - PL_colors[4],PL_colors[5],PL_colors[0], + PL_colors[4], PL_colors[5], PL_colors[0], len0, len0, s0, PL_colors[1], len0 > 60 ? "..." : "", @@ -1964,7 +1962,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * len0, len0, s0, len1, len1, s1); }); - if (find_byclass(prog, c, s, strend, startpos, 0)) + if (find_byclass(prog, c, s, strend, 0)) goto got_it; DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n")); } @@ -1990,7 +1988,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * } else { STRLEN len; - char *little = SvPV(float_real, len); + const char * const little = SvPV(float_real, len); if (SvTAIL(float_real)) { if (memEQ(strend - len + 1, little, len - 1)) @@ -2011,7 +2009,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * if (last == NULL) { DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sCan't trim the tail, match fails (should not happen)%s\n", - PL_colors[4],PL_colors[5])); + PL_colors[4], PL_colors[5])); goto phooey; /* Should not happen! */ } dontbother = strend - last + prog->float_min_offset; @@ -2087,7 +2085,7 @@ got_it: phooey: DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n", - PL_colors[4],PL_colors[5])); + PL_colors[4], PL_colors[5])); if (PL_reg_eval_set) restore_pos(aTHX_ 0); return 0; @@ -2144,7 +2142,7 @@ S_regtry(pTHX_ regexp *prog, char *startpos) SAVEDESTRUCTOR_X(restore_pos, 0); } if (!PL_reg_curpm) { - Newz(22,PL_reg_curpm, 1, PMOP); + Newz(22, PL_reg_curpm, 1, PMOP); #ifdef USE_ITHREADS { SV* repointer = newSViv(0); @@ -2190,7 +2188,7 @@ S_regtry(pTHX_ regexp *prog, char *startpos) if(PL_reg_start_tmp) Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*); else - New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*); + New(22, PL_reg_start_tmp, PL_reg_start_tmpl, char*); } /* XXXX What this code is doing here?!!! There should be no need @@ -2265,6 +2263,42 @@ typedef union re_unwind_t { #define sayNO_SILENT goto do_no #define saySAME(x) if (x) goto yes; else goto no +#define POSCACHE_SUCCESS 0 /* caching success rather than failure */ +#define POSCACHE_SEEN 1 /* we know what we're caching */ +#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */ +#define CACHEsayYES STMT_START { \ + if (cache_offset | cache_bit) { \ + if (!(PL_reg_poscache[0] & (1<uniquecharcount ) >=0 && \ + ( base + charid > trie->uniquecharcount ) && \ + ( base + charid - 1 - trie->uniquecharcount < trie->lasttrans) && \ trie->trans[ base + charid - 1 - trie->uniquecharcount ].check == state ) \ { \ state = trie->trans[ base + charid - 1 - trie->uniquecharcount ].next; \ @@ -2359,10 +2394,10 @@ S_regmatch(pTHX_ regnode *prog) I32 unwind = 0; /* used by the trie code */ - SV *sv_accept_buff; /* accepting states we have traversed */ - reg_trie_accepted *accept_buff; /* "" */ - reg_trie_data *trie; /* what trie are we using right now */ - U32 accepted = 0; /* how many accepting states we have seen*/ + SV *sv_accept_buff = 0; /* accepting states we have traversed */ + reg_trie_accepted *accept_buff = 0; /* "" */ + reg_trie_data *trie; /* what trie are we using right now */ + U32 accepted = 0; /* how many accepting states we have seen*/ #if 0 I32 firstcp = PL_savestack_ix; @@ -2373,7 +2408,7 @@ S_regmatch(pTHX_ regnode *prog) SV *dsv1 = PERL_DEBUG_PAD_ZERO(1); SV *dsv2 = PERL_DEBUG_PAD_ZERO(2); - SV *re_debug_flags; + SV *re_debug_flags = NULL; #endif GET_RE_DEBUG_FLAGS; @@ -2566,9 +2601,9 @@ S_regmatch(pTHX_ regnode *prog) DEBUG_TRIE_EXECUTE_r( PerlIO_printf( Perl_debug_log, - "%*s %sState: %4x, Base: %4x Accepted: %4x ", + "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ", REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], - state, base, accepted ); + (UV)state, (UV)base, (UV)accepted ); ); if ( base ) { @@ -2586,7 +2621,7 @@ S_regmatch(pTHX_ regnode *prog) uscan = foldbuf + UNISKIP( uvc ); } } else { - uvc = (U32)*uc; + uvc = (UV)*uc; len = 1; } @@ -2596,9 +2631,9 @@ S_regmatch(pTHX_ regnode *prog) state = 0; } DEBUG_TRIE_EXECUTE_r( - PerlIO_printf( Perl_debug_log, - "Charid:%3x CV:%4x After State: %4x%s\n", - charid, uvc, state, PL_colors[5] ); + PerlIO_printf( Perl_debug_log, + "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n", + charid, uvc, (UV)state, PL_colors[5] ); ); } if ( !accepted ) { @@ -2631,9 +2666,9 @@ S_regmatch(pTHX_ regnode *prog) DEBUG_TRIE_EXECUTE_r( PerlIO_printf( Perl_debug_log, - "%*s %sState: %4x, Base: %4x Accepted: %4x ", + "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ", REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], - state, base, accepted ); + (UV)state, (UV)base, (UV)accepted ); ); if ( base ) { @@ -2652,8 +2687,8 @@ S_regmatch(pTHX_ regnode *prog) } DEBUG_TRIE_EXECUTE_r( PerlIO_printf( Perl_debug_log, - "Charid:%3x CV:%4x After State: %4x%s\n", - charid, uvc, state, PL_colors[5] ); + "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n", + charid, uvc, (UV)state, PL_colors[5] ); ); } if ( !accepted ) { @@ -2683,12 +2718,12 @@ S_regmatch(pTHX_ regnode *prog) SV **tmp = av_fetch( trie->words, accept_buff[ 0 ].wordnum-1, 0 ); PerlIO_printf( Perl_debug_log, "%*s %sonly one match : #%d <%s>%s\n", - REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4], + REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], accept_buff[ 0 ].wordnum, tmp ? SvPV_nolen( *tmp ) : "not compiled under -Dr", PL_colors[5] ); }); - PL_reginput = accept_buff[ 0 ].endpos; + PL_reginput = (char *)accept_buff[ 0 ].endpos; /* in this case we free tmps/leave before we call regmatch as we wont be using accept_buff again. */ FREETMPS; @@ -2696,21 +2731,21 @@ S_regmatch(pTHX_ regnode *prog) gotit = regmatch( scan + NEXT_OFF( scan ) ); } else { DEBUG_EXECUTE_r( - PerlIO_printf( Perl_debug_log,"%*s %sgot %d possible matches%s\n", - REPORT_CODE_OFF + PL_regindent * 2, "",PL_colors[4], accepted, + PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n", + REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)accepted, PL_colors[5] ); ); while ( !gotit && accepted-- ) { U32 best = 0; U32 cur; for( cur = 1 ; cur <= accepted ; cur++ ) { - DEBUG_TRIE_EXECUTE_r( - PerlIO_printf( Perl_debug_log, - "%*s %sgot %d (%d) as best, looking at %d (%d)%s\n", - REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], - best, accept_buff[ best ].wordnum, cur, - accept_buff[ cur ].wordnum, PL_colors[5] ); - ); + DEBUG_TRIE_EXECUTE_r( + PerlIO_printf( Perl_debug_log, + "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n", + REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], + (IV)best, accept_buff[ best ].wordnum, (IV)cur, + accept_buff[ cur ].wordnum, PL_colors[5] ); + ); if ( accept_buff[ cur ].wordnum < accept_buff[ best ].wordnum ) best = cur; @@ -2718,7 +2753,7 @@ S_regmatch(pTHX_ regnode *prog) DEBUG_EXECUTE_r({ SV **tmp = av_fetch( trie->words, accept_buff[ best ].wordnum - 1, 0 ); PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n", - REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4], + REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], accept_buff[best].wordnum, tmp ? SvPV_nolen( *tmp ) : "not compiled under -Dr",scan, PL_colors[5] ); @@ -2729,7 +2764,7 @@ S_regmatch(pTHX_ regnode *prog) accept_buff[ accepted ] = tmp; best = accepted; } - PL_reginput = accept_buff[ best ].endpos; + PL_reginput = (char *)accept_buff[ best ].endpos; /* as far as I can tell we only need the SAVETMPS/FREETMPS @@ -3449,6 +3484,7 @@ S_regmatch(pTHX_ regnode *prog) CHECKPOINT cp, lastcp; CURCUR* cc = PL_regcc; char *lastloc = cc->lastloc; /* Detection of 0-len. */ + I32 cache_offset = 0, cache_bit = 0; n = cc->cur + 1; /* how many we know we matched */ PL_reginput = locinput; @@ -3501,7 +3537,7 @@ S_regmatch(pTHX_ regnode *prog) PL_reg_leftiter = PL_reg_maxiter; } if (PL_reg_leftiter-- == 0) { - I32 size = (PL_reg_maxiter + 7)/8; + I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8; if (PL_reg_poscache) { if ((I32)PL_reg_poscache_size < size) { Renew(PL_reg_poscache, size, char); @@ -3520,23 +3556,26 @@ S_regmatch(pTHX_ regnode *prog) ); } if (PL_reg_leftiter < 0) { - I32 o = locinput - PL_bostr, b; + cache_offset = locinput - PL_bostr; - o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4); - b = o % 8; - o /= 8; - if (PL_reg_poscache[o] & (1<flags & 0xf) - 1 + POSCACHE_START + + cache_offset * (scan->flags>>4); + cache_bit = cache_offset % 8; + cache_offset /= 8; + if (PL_reg_poscache[cache_offset] & (1<next)) { regcpblow(cp); - sayYES; /* All done. */ + CACHEsayYES; /* All done. */ } REGCP_UNWIND(lastcp); regcppop(); @@ -3566,7 +3605,7 @@ S_regmatch(pTHX_ regnode *prog) "Complex regular subexpression recursion", REG_INFTY - 1); } - sayNO; + CACHEsayNO; } DEBUG_EXECUTE_r( @@ -3582,13 +3621,13 @@ S_regmatch(pTHX_ regnode *prog) REGCP_SET(lastcp); if (regmatch(cc->scan)) { regcpblow(cp); - sayYES; + CACHEsayYES; } REGCP_UNWIND(lastcp); regcppop(); cc->cur = n - 1; cc->lastloc = lastloc; - sayNO; + CACHEsayNO; } /* Prefer scan over next for maximal matching. */ @@ -3600,7 +3639,7 @@ S_regmatch(pTHX_ regnode *prog) REGCP_SET(lastcp); if (regmatch(cc->scan)) { regcpblow(cp); - sayYES; + CACHEsayYES; } REGCP_UNWIND(lastcp); regcppop(); /* Restore some previous $s? */ @@ -3624,13 +3663,13 @@ S_regmatch(pTHX_ regnode *prog) if (PL_regcc) ln = PL_regcc->cur; if (regmatch(cc->next)) - sayYES; + CACHEsayYES; if (PL_regcc) PL_regcc->cur = ln; PL_regcc = cc; cc->cur = n - 1; cc->lastloc = lastloc; - sayNO; + CACHEsayNO; } /* NOT REACHED */ case BRANCHJ: @@ -4167,7 +4206,6 @@ S_regmatch(pTHX_ regnode *prog) } else PL_reginput = locinput; - PL_reg_flags ^= RF_false; goto do_ifmatch; case IFMATCH: n = 1; @@ -4183,8 +4221,6 @@ S_regmatch(pTHX_ regnode *prog) do_ifmatch: inner = NEXTOPER(NEXTOPER(scan)); if (regmatch(inner) != n) { - if (n == 0) - PL_reg_flags ^= RF_false; say_no: if (logical) { logical = 0; @@ -4194,8 +4230,6 @@ S_regmatch(pTHX_ regnode *prog) else sayNO; } - if (n == 0) - PL_reg_flags ^= RF_false; say_yes: if (logical) { logical = 0; @@ -4233,12 +4267,12 @@ yes_loud: DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "%*s %scould match...%s\n", - REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5]) + REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5]) ); goto yes; yes_final: DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n", - PL_colors[4],PL_colors[5])); + PL_colors[4], PL_colors[5])); yes: #ifdef DEBUGGING PL_regindent--; @@ -4254,7 +4288,7 @@ no: DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "%*s %sfailed...%s\n", - REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5]) + REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5]) ); goto do_no; no_final: @@ -4547,7 +4581,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max) PL_reginput = scan; DEBUG_r({ - SV *re_debug_flags; + SV *re_debug_flags = NULL; SV *prop = sv_newmortal(); GET_RE_DEBUG_FLAGS; DEBUG_EXECUTE_r({ @@ -4857,6 +4891,7 @@ S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim) static void restore_pos(pTHX_ void *arg) { + (void)arg; /* unused */ if (PL_reg_eval_set) { if (PL_reg_oldsaved) { PL_reg_re->subbeg = PL_reg_oldsaved;