X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=9ded51187d4f47bfa13ae7b5fd1a731c95397cd3;hb=054149a8968eff13946521ef29d0987fadd9a28b;hp=2470821d68e0d19542aad4a7c0020362ab740d9e;hpb=5d458dd8ef53373c3f90d568f6668084b0ccbc62;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index 2470821..9ded511 100644 --- a/regexec.c +++ b/regexec.c @@ -371,7 +371,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, I32 ml_anch; register char *other_last = NULL; /* other substr checked before this */ char *check_at = NULL; /* check substr found at this pos */ - const I32 multiline = prog->reganch & PMf_MULTILINE; + const I32 multiline = prog->extflags & RXf_PMf_MULTILINE; + RXi_GET_DECL(prog,progi); #ifdef DEBUGGING const char * const i_strpos = strpos; #endif @@ -380,7 +381,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, RX_MATCH_UTF8_set(prog,do_utf8); - if (prog->reganch & ROPT_UTF8) { + if (prog->extflags & RXf_UTF8) { PL_reg_flags |= RF_utf8; } DEBUG_EXECUTE_r( @@ -412,14 +413,14 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, "Non-utf8 string cannot match utf8 check string\n")); goto fail; } - if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */ - ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE) - || ( (prog->reganch & ROPT_ANCH_BOL) + if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */ + ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE) + || ( (prog->extflags & RXf_ANCH_BOL) && !multiline ) ); /* Check after \n? */ if (!ml_anch) { - if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */ - | ROPT_IMPLICIT)) /* not a real BOL */ + if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */ + && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */ /* SvCUR is not set on references: SvRV and SvPVX_const overlap */ && sv && !SvROK(sv) && (strpos != strbeg)) { @@ -427,7 +428,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, goto fail; } if (prog->check_offset_min == prog->check_offset_max && - !(prog->reganch & ROPT_CANY_SEEN)) { + !(prog->extflags & RXf_CANY_SEEN)) { /* Substring at constant offset from beg-of-str... */ I32 slen; @@ -513,7 +514,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, if (PL_screamfirst[BmRARE(check)] >= 0 || ( BmRARE(check) == '\n' - && (BmPREVIOUS(check) == SvCUR(check) - 1) + && (BmPREVIOUS(check) == (U8)SvCUR(check) - 1) && SvTAIL(check) )) s = screaminstr(sv, check, srch_start_shift + (s - strbeg), srch_end_shift, pp, 0); @@ -528,7 +529,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, else { U8* start_point; U8* end_point; - if (prog->reganch & ROPT_CANY_SEEN) { + if (prog->extflags & RXf_CANY_SEEN) { start_point= (U8*)(s + srch_start_shift); end_point= (U8*)(strend - srch_end_shift); } else { @@ -814,17 +815,17 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */ && (strpos != strbeg) && strpos[-1] != '\n' /* May be due to an implicit anchor of m{.*foo} */ - && !(prog->reganch & ROPT_IMPLICIT)) + && !(prog->intflags & PREGf_IMPLICIT)) { t = strpos; goto find_anchor; } DEBUG_EXECUTE_r( if (ml_anch) PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n", - (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]); + (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]); ); success_at_start: - if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */ + if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */ && (do_utf8 ? ( prog->check_utf8 /* Could be deleted already */ && --BmUSEFUL(prog->check_utf8) < 0 @@ -847,7 +848,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, /* XXXX This is a remnant of the old implementation. It looks wasteful, since now INTUIT can use many other heuristics. */ - prog->reganch &= ~RE_USE_INTUIT; + prog->extflags &= ~RXf_USE_INTUIT; } else s = strpos; @@ -857,7 +858,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */ /* trie stclasses are too expensive to use here, we are better off to leave it to regmatch itself */ - if (prog->regstclass && PL_regkind[OP(prog->regstclass)]!=TRIE) { + if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) { /* minlen == 0 is possible if regstclass is \b or \B, and the fixed substr is ''$. Since minlen is already taken into account, s+1 is before strend; @@ -866,9 +867,9 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, regstclass does not come from lookahead... */ /* If regstclass takes bytelength more than 1: If charlength==1, OK. This leaves EXACTF only, which is dealt with in find_byclass(). */ - const U8* const str = (U8*)STRING(prog->regstclass); - const int cl_l = (PL_regkind[OP(prog->regstclass)] == EXACT - ? CHR_DIST(str+STR_LEN(prog->regstclass), str) + const U8* const str = (U8*)STRING(progi->regstclass); + const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT + ? CHR_DIST(str+STR_LEN(progi->regstclass), str) : 1); char * endpos; if (prog->anchored_substr || prog->anchored_utf8 || ml_anch) @@ -878,11 +879,11 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, else endpos= strend; - DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %d s: %d endpos: %d\n", - (IV)start_shift, check_at - strbeg, s - strbeg, endpos - strbeg)); + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n", + (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg))); t = s; - s = find_byclass(prog, prog->regstclass, s, endpos, NULL); + s = find_byclass(prog, progi->regstclass, s, endpos, NULL); if (!s) { #ifdef DEBUGGING const char *what = NULL; @@ -894,7 +895,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, } DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "This position contradicts STCLASS...\n") ); - if ((prog->reganch & ROPT_ANCH) && !ml_anch) + if ((prog->extflags & RXf_ANCH) && !ml_anch) goto fail; /* Contradict one of substrings */ if (prog->anchored_substr || prog->anchored_utf8) { @@ -975,8 +976,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, -#define REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, uvc, charid, \ -foldlen, foldbuf, uniflags) STMT_START { \ +#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \ +uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \ switch (trie_type) { \ case trie_utf8_fold: \ if ( foldlen>0 ) { \ @@ -1004,8 +1005,8 @@ foldlen, foldbuf, uniflags) STMT_START { \ } \ else { \ charid = 0; \ - if (trie->widecharmap) { \ - SV** const svpp = hv_fetch(trie->widecharmap, \ + if (widecharmap) { \ + SV** const svpp = hv_fetch(widecharmap, \ (char*)&uvc, sizeof(UV), 0); \ if (svpp) \ charid = (U16)SvIV(*svpp); \ @@ -1126,7 +1127,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, regmatch_info *reginfo) { dVAR; - const I32 doevery = (prog->reganch & ROPT_SKIP) == 0; + const I32 doevery = (prog->intflags & PREGf_SKIP) == 0; char *m; STRLEN ln; STRLEN lnc; @@ -1136,7 +1137,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, char *e; register I32 tmp = 1; /* Scratch variable? */ register const bool do_utf8 = PL_reg_match_utf8; - + RXi_GET_DECL(prog,progi); + /* We know what class it must start with. */ switch (OP(c)) { case ANYOF: @@ -1416,8 +1418,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, : trie_plain; /* what trie are we using right now */ reg_ac_data *aho - = (reg_ac_data*)prog->data->data[ ARG( c ) ]; - reg_trie_data *trie=aho->trie; + = (reg_ac_data*)progi->data->data[ ARG( c ) ]; + reg_trie_data *trie + = (reg_trie_data*)progi->data->data[ aho->trie ]; + HV *widecharmap = (HV*) progi->data->data[ aho->trie + 1 ]; const char *last_start = strend - trie->minlen; #ifdef DEBUGGING @@ -1520,8 +1524,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, } points[pointpos++ % maxlen]= uc; - REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, - uvc, charid, foldlen, foldbuf, uniflags); + REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, + uscan, len, uvc, charid, foldlen, + foldbuf, uniflags); DEBUG_TRIE_EXECUTE_r({ dump_exec_pos( (char *)uc, c, strend, real_start, s, do_utf8 ); @@ -1594,8 +1599,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, s = (char*)leftmost; DEBUG_TRIE_EXECUTE_r({ PerlIO_printf( - Perl_debug_log,"Matches word #%"UVxf" at position %d. Trying full pattern...\n", - (UV)accepted_word, s - real_start + Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n", + (UV)accepted_word, (IV)(s - real_start) ); }); if (!reginfo || regtry(reginfo, &s)) { @@ -1635,7 +1640,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* strend: pointer to null at end of string */ /* strbeg: real beginning of string */ /* minend: end of match must be >=minend after stringarg. */ -/* data: May be used for some additional optimizations. */ +/* data: May be used for some additional optimizations. + Currently its only used, with a U32 cast, for transmitting + the ganch offset when doing a /g match. This will change */ /* nosave: For optimizations. */ { dVAR; @@ -1650,7 +1657,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * SV* const oreplsv = GvSV(PL_replgv); const bool do_utf8 = (bool)DO_UTF8(sv); I32 multiline; - + RXi_GET_DECL(prog,progi); regmatch_info reginfo; /* create some info to pass to regtry etc */ GET_RE_DEBUG_FLAGS_DECL; @@ -1663,7 +1670,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * return 0; } - multiline = prog->reganch & PMf_MULTILINE; + multiline = prog->extflags & RXf_PMf_MULTILINE; reginfo.prog = prog; RX_MATCH_UTF8_set(prog, do_utf8); @@ -1682,7 +1689,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* Check validity of program. */ - if (UCHARAT(prog->program) != REG_MAGIC) { + if (UCHARAT(progi->program) != REG_MAGIC) { Perl_croak(aTHX_ "corrupted regexp program"); } @@ -1690,7 +1697,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_eval_set = 0; PL_reg_maxiter = 0; - if (prog->reganch & ROPT_UTF8) + if (prog->extflags & RXf_UTF8) PL_reg_flags |= RF_utf8; /* Mark beginning of line for ^ and lookbehind. */ @@ -1707,26 +1714,49 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* If there is a "must appear" string, look for it. */ s = startpos; - if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to set reginfo->ganch */ + if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */ MAGIC *mg; if (flags & REXEC_IGNOREPOS) /* Means: check only at start */ - reginfo.ganch = startpos; + reginfo.ganch = startpos + prog->gofs; else if (sv && SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv) && (mg = mg_find(sv, PERL_MAGIC_regex_global)) && mg->mg_len >= 0) { reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */ - if (prog->reganch & ROPT_ANCH_GPOS) { + if (prog->extflags & RXf_ANCH_GPOS) { if (s > reginfo.ganch) goto phooey; - s = reginfo.ganch; + s = reginfo.ganch - prog->gofs; } } - else /* pos() not defined */ + else if (data) { + reginfo.ganch = strbeg + PTR2UV(data); + } else /* pos() not defined */ reginfo.ganch = strbeg; } - + if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) { + I32 *t; + if (!progi->swap) { + /* We have to be careful. If the previous successful match + was from this regex we don't want a subsequent paritally + successful match to clobber the old results. + So when we detect this possibility we add a swap buffer + to the re, and switch the buffer each match. If we fail + we switch it back, otherwise we leave it swapped. + */ + Newxz(progi->swap, 1, regexp_paren_ofs); + /* no need to copy these */ + Newxz(progi->swap->startp, prog->nparens + 1, I32); + Newxz(progi->swap->endp, prog->nparens + 1, I32); + } + t = progi->swap->startp; + progi->swap->startp = prog->startp; + prog->startp = t; + t = progi->swap->endp; + progi->swap->endp = prog->endp; + prog->endp = t; + } if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) { re_scream_pos_data d; @@ -1743,11 +1773,11 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* Simplest case: anchored match need be tried only once. */ /* [unless only anchor is BOL and multiline is set] */ - if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) { + if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) { if (s == startpos && regtry(®info, &startpos)) goto got_it; - else if (multiline || (prog->reganch & ROPT_IMPLICIT) - || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */ + else if (multiline || (prog->intflags & PREGf_IMPLICIT) + || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */ { char *end; @@ -1764,7 +1794,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * after_try: if (s >= end) goto phooey; - if (prog->reganch & RE_USE_INTUIT) { + if (prog->extflags & RXf_USE_INTUIT) { s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL); if (!s) goto phooey; @@ -1784,18 +1814,19 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * } } goto phooey; - } else if (ROPT_GPOS_CHECK == (prog->reganch & ROPT_GPOS_CHECK)) + } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK)) { /* the warning about reginfo.ganch being used without intialization - is bogus -- we set it above, when prog->reganch & ROPT_GPOS_SEEN + is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN and we only enter this block when the same bit is set. */ - if (regtry(®info, ®info.ganch)) + char *tmp_s = reginfo.ganch - prog->gofs; + if (regtry(®info, &tmp_s)) goto got_it; goto phooey; } /* Messy cases: unanchored match. */ - if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) { + if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) { /* we have /x+whatever/ */ /* it must be a one character string (XXXX Except UTF?) */ char ch; @@ -1926,9 +1957,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * }); goto phooey; } - else if ( (c = prog->regstclass) ) { + else if ( (c = progi->regstclass) ) { if (minlen) { - const OPCODE op = OP(prog->regstclass); + const OPCODE op = OP(progi->regstclass); /* don't bother with what can't match */ if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE) strend = HOPc(strend, -(minlen - 1)); @@ -2074,6 +2105,16 @@ phooey: PL_colors[4], PL_colors[5])); if (PL_reg_eval_set) restore_pos(aTHX_ prog); + if (progi->swap) { + /* we failed :-( roll it back */ + I32 *t; + t = progi->swap->startp; + progi->swap->startp = prog->startp; + prog->startp = t; + t = progi->swap->endp; + progi->swap->endp = prog->endp; + prog->endp = t; + } return 0; } @@ -2089,10 +2130,11 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) register I32 *ep; CHECKPOINT lastcp; regexp *prog = reginfo->prog; + RXi_GET_DECL(prog,progi); GET_RE_DEBUG_FLAGS_DECL; reginfo->cutpoint=NULL; - if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) { + if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) { MAGIC *mg; PL_reg_eval_set = RS_init; @@ -2195,8 +2237,8 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) * on those tests seems to be returning null fields from matches. * --jhi */ #if 1 - sp = prog->startp; - ep = prog->endp; + sp = PL_regstartp; + ep = PL_regendp; if (prog->nparens) { register I32 i; for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) { @@ -2206,8 +2248,8 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) } #endif REGCP_SET(lastcp); - if (regmatch(reginfo, prog->program + 1)) { - prog->endp[0] = PL_reginput - PL_bostr; + if (regmatch(reginfo, progi->program + 1)) { + PL_regendp[0] = PL_reginput - PL_bostr; return 1; } if (reginfo->cutpoint) @@ -2437,7 +2479,7 @@ STATIC void S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8, const char *start, const char *end, const char *blurb) { - const bool utf8_pat= prog->reganch & ROPT_UTF8 ? 1 : 0; + const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0; if (!PL_colorset) reginitcolors(); { @@ -2533,7 +2575,8 @@ S_dump_exec_pos(pTHX_ const char *locinput, STATIC I32 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) { I32 n; - SV *sv_dat=(SV*)rex->data->data[ ARG( scan ) ]; + RXi_GET_DECL(rex,rexi); + SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ]; I32 *nums=(I32*)SvPVX(sv_dat); for ( n=0; n= nums[n] && @@ -2556,7 +2599,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) const U32 uniflags = UTF8_ALLOW_DEFAULT; regexp *rex = reginfo->prog; - + RXi_GET_DECL(rex,rexi); + regmatch_slab *orig_slab; regmatch_state *orig_state; @@ -2592,6 +2636,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) during a successfull match */ U32 lastopen = 0; /* last open we saw */ bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0; + /* these three flags are set by various ops to signal information to * the very next op. They have a useful lifetime of exactly one loop @@ -2612,7 +2657,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) GET_RE_DEBUG_FLAGS_DECL; #endif - DEBUG_STACK_r( { + DEBUG_OPTIMISE_r( { PerlIO_printf(Perl_debug_log,"regmatch start\n"); }); /* on first ever call to regmatch, allocate first slab */ @@ -2646,10 +2691,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PerlIO_printf(Perl_debug_log, "%3"IVdf":%*s%s(%"IVdf")\n", - (IV)(scan - rex->program), depth*2, "", + (IV)(scan - rexi->program), depth*2, "", SvPVX_const(prop), (PL_regkind[OP(scan)] == END || !rnext) ? - 0 : (IV)(rnext - rex->program)); + 0 : (IV)(rnext - rexi->program)); }); next = scan + NEXT_OFF(scan); @@ -2756,7 +2801,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) /* what trie are we using right now */ reg_trie_data * const trie - = (reg_trie_data*)rex->data->data[ ARG( scan ) ]; + = (reg_trie_data*)rexi->data->data[ ARG( scan ) ]; + HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ]; U32 state = trie->startstate; if (trie->bitmap && trie_type != trie_utf8_fold && @@ -2852,8 +2898,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) }); if ( base ) { - REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, - uvc, charid, foldlen, foldbuf, uniflags); + REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, + uscan, len, uvc, charid, foldlen, + foldbuf, uniflags); if (charid && (base + charid > trie->uniquecharcount ) @@ -2900,16 +2947,21 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) if ( ST.accepted == 1 ) { /* only one choice left - just continue */ DEBUG_EXECUTE_r({ - reg_trie_data * const trie - = (reg_trie_data*)rex->data->data[ ARG(ST.me) ]; - SV ** const tmp = RX_DEBUG(reginfo->prog) - ? av_fetch( trie->words, ST.accept_buff[ 0 ].wordnum-1, 0 ) - : NULL; + AV *const trie_words + = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]; + SV ** const tmp = av_fetch( trie_words, + ST.accept_buff[ 0 ].wordnum-1, 0 ); + SV *sv= tmp ? sv_newmortal() : NULL; + PerlIO_printf( Perl_debug_log, "%*s %sonly one match left: #%d <%s>%s\n", REPORT_CODE_OFF+depth*2, "", PL_colors[4], ST.accept_buff[ 0 ].wordnum, - tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr", + tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0, + PL_colors[0], PL_colors[1], + (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) + ) + : "not compiled under -Dr", PL_colors[5] ); }); PL_reginput = (char *)ST.accept_buff[ 0 ].endpos; @@ -2976,19 +3028,23 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) } DEBUG_EXECUTE_r({ - reg_trie_data * const trie - = (reg_trie_data*)rex->data->data[ ARG(ST.me) ]; - SV ** const tmp = RX_DEBUG(reginfo->prog) - ? av_fetch( trie->words, ST.accept_buff[ best ].wordnum - 1, 0 ) - : NULL; + AV *const trie_words + = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]; + SV ** const tmp = av_fetch( trie_words, + ST.accept_buff[ best ].wordnum - 1, 0 ); regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ? ST.B : ST.me + ST.jump[ST.accept_buff[best].wordnum]; + SV *sv= tmp ? sv_newmortal() : NULL; + PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at node #%d %s\n", REPORT_CODE_OFF+depth*2, "", PL_colors[4], ST.accept_buff[best].wordnum, - tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr", + tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0, + PL_colors[0], PL_colors[1], + (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) + ) : "not compiled under -Dr", REG_NODE_NUM(nextop), PL_colors[5] ); }); @@ -3456,6 +3512,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) { SV *ret; regexp *re; + regexp_internal *rei; regnode *startpoint; case GOSTART: @@ -3471,12 +3528,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) nochange_depth = 0; } re = rex; + rei = rexi; (void)ReREFCNT_inc(rex); if (OP(scan)==GOSUB) { startpoint = scan + ARG2L(scan); ST.close_paren = ARG(scan); } else { - startpoint = re->program+1; + startpoint = rei->program+1; ST.close_paren = 0; } goto eval_recurse_doit; @@ -3497,12 +3555,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PAD *old_comppad; n = ARG(scan); - PL_op = (OP_4tree*)rex->data->data[n]; + PL_op = (OP_4tree*)rexi->data->data[n]; DEBUG_STATE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) ); - PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]); + PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]); PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr; + if (sv_yes_mark) { + SV *sv_mrk = get_sv("REGMARK", 1); + sv_setsv(sv_mrk, sv_yes_mark); + } + CALLRUNOPS(aTHX); /* Scalar context. */ SPAGAIN; if (SP == before) @@ -3559,11 +3622,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_regsize = osize; } } + rei = RXi_GET(re); DEBUG_EXECUTE_r( debug_start_match(re, do_utf8, locinput, PL_regeol, "Matching embedded"); ); - startpoint = re->program + 1; + startpoint = rei->program + 1; ST.close_paren = 0; /* only used for GOSUB */ /* borrowed from regtry */ if (PL_reg_start_tmpl <= re->nparens) { @@ -3591,7 +3655,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_reg_maxiter = 0; ST.toggle_reg_flags = PL_reg_flags; - if (re->reganch & ROPT_UTF8) + if (re->extflags & RXf_UTF8) PL_reg_flags |= RF_utf8; else PL_reg_flags &= ~RF_utf8; @@ -3600,6 +3664,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) ST.prev_rex = rex; ST.prev_curlyx = cur_curlyx; rex = re; + rexi = rei; cur_curlyx = NULL; ST.B = next; ST.prev_eval = cur_eval; @@ -3619,6 +3684,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_reg_flags ^= ST.toggle_reg_flags; ReREFCNT_dec(rex); rex = ST.prev_rex; + rexi = RXi_GET(rex); regcpblow(ST.cp); cur_eval = ST.prev_eval; cur_curlyx = ST.prev_curlyx; @@ -3632,6 +3698,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_reg_flags ^= ST.toggle_reg_flags; ReREFCNT_dec(rex); rex = ST.prev_rex; + rexi = RXi_GET(rex); PL_reginput = locinput; REGCP_UNWIND(ST.lastcp); regcppop(rex); @@ -3838,7 +3905,15 @@ NULL } case CURLYX_end: /* just finished matching all of A*B */ - regcpblow(ST.cp); + if (PL_reg_eval_set){ + SV *pres= GvSV(PL_replgv); + SvREFCNT_inc(pres); + regcpblow(ST.cp); + sv_setsv(GvSV(PL_replgv), pres); + SvREFCNT_dec(pres); + } else { + regcpblow(ST.cp); + } cur_curlyx = ST.prev_curlyx; sayYES; /* NOTREACHED */ @@ -4080,7 +4155,7 @@ NULL case CUTGROUP: PL_reginput = locinput; sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL : - (SV*)rex->data->data[ ARG( scan ) ]; + (SV*)rexi->data->data[ ARG( scan ) ]; PUSH_STATE_GOTO(CUTGROUP_next,next); /* NOTREACHED */ case CUTGROUP_next_fail: @@ -4610,7 +4685,8 @@ NULL PL_reg_flags ^= st->u.eval.toggle_reg_flags; st->u.eval.prev_rex = rex; /* inner */ - rex = cur_eval->u.eval.prev_rex; /* outer */ + rex = cur_eval->u.eval.prev_rex; /* outer */ + rexi = RXi_GET(rex); cur_curlyx = cur_eval->u.eval.prev_curlyx; ReREFCNT_inc(rex); st->u.eval.cp = regcppush(0); /* Save *all* the positions. */ @@ -4640,6 +4716,7 @@ NULL (long)(locinput - PL_reg_starttry), (long)(reginfo->till - PL_reg_starttry), PL_colors[5])); + sayNO_SILENT; /* Cannot match: too short. */ } PL_reginput = locinput; /* put where regtry can find it */ @@ -4730,7 +4807,7 @@ NULL case PRUNE: PL_reginput = locinput; if (!scan->flags) - sv_yes_mark = sv_commit = (SV*)rex->data->data[ ARG( scan ) ]; + sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ]; PUSH_STATE_GOTO(COMMIT_next,next); /* NOTREACHED */ case COMMIT_next_fail: @@ -4744,7 +4821,7 @@ NULL case MARKPOINT: ST.prev_mark = mark_state; ST.mark_name = sv_commit = sv_yes_mark - = (SV*)rex->data->data[ ARG( scan ) ]; + = (SV*)rexi->data->data[ ARG( scan ) ]; mark_state = st; ST.mark_loc = PL_reginput = locinput; PUSH_YES_STATE_GOTO(MARKPOINT_next,next); @@ -4776,16 +4853,16 @@ NULL case SKIP: PL_reginput = locinput; if (scan->flags) { - /* (*CUT) : if we fail we cut here*/ + /* (*SKIP) : if we fail we cut here*/ ST.mark_name = NULL; ST.mark_loc = locinput; PUSH_STATE_GOTO(SKIP_next,next); } else { - /* (*CUT:NAME) : if there is a (*MARK:NAME) fail where it was, + /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was, otherwise do nothing. Meaning we need to scan */ regmatch_state *cur = mark_state; - SV *find = (SV*)rex->data->data[ ARG( scan ) ]; + SV *find = (SV*)rexi->data->data[ ARG( scan ) ]; while (cur) { if ( sv_eq( cur->u.mark.mark_name, @@ -4797,7 +4874,7 @@ NULL cur = cur->u.mark.prev_mark; } } - /* Didn't find our (*MARK:NAME) so ignore this (*CUT:NAME) */ + /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */ break; case SKIP_next_fail: if (ST.mark_name) { @@ -4973,7 +5050,7 @@ no_silent: result = 0; final_exit: - if (rex->reganch & ROPT_VERBARG_SEEN) { + if (rex->intflags & PREGf_VERBARG_SEEN) { SV *sv_err = get_sv("REGERROR", 1); SV *sv_mrk = get_sv("REGMARK", 1); if (result) { @@ -5266,7 +5343,8 @@ Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool SV *sw = NULL; SV *si = NULL; SV *alt = NULL; - const struct reg_data * const data = prog ? prog->data : NULL; + RXi_GET_DECL(prog,progi); + const struct reg_data * const data = prog ? progi->data : NULL; if (data && data->count) { const U32 n = ARG(node); @@ -5539,56 +5617,46 @@ restore_pos(pTHX_ void *arg) STATIC void S_to_utf8_substr(pTHX_ register regexp *prog) { - if (prog->float_substr && !prog->float_utf8) { - SV* const sv = newSVsv(prog->float_substr); - prog->float_utf8 = sv; - sv_utf8_upgrade(sv); - if (SvTAIL(prog->float_substr)) - SvTAIL_on(sv); - if (prog->float_substr == prog->check_substr) - prog->check_utf8 = sv; - } - if (prog->anchored_substr && !prog->anchored_utf8) { - SV* const sv = newSVsv(prog->anchored_substr); - prog->anchored_utf8 = sv; - sv_utf8_upgrade(sv); - if (SvTAIL(prog->anchored_substr)) - SvTAIL_on(sv); - if (prog->anchored_substr == prog->check_substr) - prog->check_utf8 = sv; - } + int i = 1; + do { + if (prog->substrs->data[i].substr + && !prog->substrs->data[i].utf8_substr) { + SV* const sv = newSVsv(prog->substrs->data[i].substr); + prog->substrs->data[i].utf8_substr = sv; + sv_utf8_upgrade(sv); + if (SvVALID(prog->substrs->data[i].substr)) + fbm_compile(sv, 0); + if (SvTAIL(prog->substrs->data[i].substr)) + SvTAIL_on(sv); + if (prog->substrs->data[i].substr == prog->check_substr) + prog->check_utf8 = sv; + } + } while (i--); } STATIC void S_to_byte_substr(pTHX_ register regexp *prog) { dVAR; - if (prog->float_utf8 && !prog->float_substr) { - SV* sv = newSVsv(prog->float_utf8); - prog->float_substr = sv; - if (sv_utf8_downgrade(sv, TRUE)) { - if (SvTAIL(prog->float_utf8)) - SvTAIL_on(sv); - } else { - SvREFCNT_dec(sv); - prog->float_substr = sv = &PL_sv_undef; - } - if (prog->float_utf8 == prog->check_utf8) - prog->check_substr = sv; - } - if (prog->anchored_utf8 && !prog->anchored_substr) { - SV* sv = newSVsv(prog->anchored_utf8); - prog->anchored_substr = sv; - if (sv_utf8_downgrade(sv, TRUE)) { - if (SvTAIL(prog->anchored_utf8)) - SvTAIL_on(sv); - } else { - SvREFCNT_dec(sv); - prog->anchored_substr = sv = &PL_sv_undef; + int i = 1; + do { + if (prog->substrs->data[i].utf8_substr + && !prog->substrs->data[i].substr) { + SV* sv = newSVsv(prog->substrs->data[i].utf8_substr); + if (sv_utf8_downgrade(sv, TRUE)) { + if (SvVALID(prog->substrs->data[i].utf8_substr)) + fbm_compile(sv, 0); + if (SvTAIL(prog->substrs->data[i].utf8_substr)) + SvTAIL_on(sv); + } else { + SvREFCNT_dec(sv); + sv = &PL_sv_undef; + } + prog->substrs->data[i].substr = sv; + if (prog->substrs->data[i].utf8_substr == prog->check_utf8) + prog->check_substr = sv; } - if (prog->anchored_utf8 == prog->check_utf8) - prog->check_substr = sv; - } + } while (i--); } /*