X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=a7a9a675e0273ccc796849e092a1309b9430b5a7;hb=79316e7384d4e499a91e5690f6fcce22fa852ca5;hp=b691162a36c26bef4ffa84cdac509693abf72403;hpb=a8e8ab15da1db8d0342b4425b514ee293f371382;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index b691162..a7a9a67 100644 --- a/regexec.c +++ b/regexec.c @@ -383,20 +383,26 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, char *check_at = Nullch; /* check substr found at this pos */ #ifdef DEBUGGING char *i_strpos = strpos; + SV *dsv = sv_2mortal(newSVpvn("", 0)); #endif - DEBUG_r( if (!PL_colorset) reginitcolors() ); - DEBUG_r(PerlIO_printf(Perl_debug_log, - "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n", - PL_colors[4],PL_colors[5],PL_colors[0], - prog->precomp, - PL_colors[1], - (strlen(prog->precomp) > 60 ? "..." : ""), - PL_colors[0], - (int)(strend - strpos > 60 ? 60 : strend - strpos), - strpos, PL_colors[1], - (strend - strpos > 60 ? "..." : "")) - ); + DEBUG_r({ + char*s = UTF ? sv_uni_display(dsv, sv, 60, 0) : strpos; + int len = UTF ? strlen(s) : strend - strpos; + if (!PL_colorset) + reginitcolors(); + PerlIO_printf(Perl_debug_log, + "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n", + PL_colors[4],PL_colors[5],PL_colors[0], + prog->precomp, + PL_colors[1], + (strlen(prog->precomp) > 60 ? "..." : ""), + PL_colors[0], + (int)(len > 60 ? 60 : len), + s, PL_colors[1], + (len > 60 ? "..." : "") + ); + }); if (prog->reganch & ROPT_UTF8) PL_reg_flags |= RF_utf8; @@ -917,8 +923,15 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta m = STRING(c); ln = STR_LEN(c); if (UTF) { - c1 = to_utf8_lower((U8*)m); - c2 = to_utf8_upper((U8*)m); + STRLEN ulen1, ulen2; + U8 tmpbuf1[UTF8_MAXLEN*2+1]; + U8 tmpbuf2[UTF8_MAXLEN*2+1]; + + to_utf8_lower((U8*)m, tmpbuf1, &ulen1); + to_utf8_upper((U8*)m, tmpbuf2, &ulen2); + + c1 = utf8_to_uvuni(tmpbuf1, 0); + c2 = utf8_to_uvuni(tmpbuf2, 0); } else { c1 = *(U8*)m; @@ -1443,6 +1456,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * char *scream_olds; SV* oreplsv = GvSV(PL_replgv); bool do_utf8 = DO_UTF8(sv); +#ifdef DEBUGGING + SV *dsv = sv_2mortal(newSVpvn("", 0)); +#endif PL_regcc = 0; @@ -1525,18 +1541,23 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * goto phooey; /* not present */ } - DEBUG_r( if (!PL_colorset) reginitcolors() ); - DEBUG_r(PerlIO_printf(Perl_debug_log, - "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n", - PL_colors[4],PL_colors[5],PL_colors[0], - prog->precomp, - PL_colors[1], - (strlen(prog->precomp) > 60 ? "..." : ""), - PL_colors[0], - (int)(strend - startpos > 60 ? 60 : strend - startpos), - startpos, PL_colors[1], - (strend - startpos > 60 ? "..." : "")) - ); + DEBUG_r({ + char *s = UTF ? sv_uni_display(dsv, sv, 60, 0) : startpos; + int len = UTF ? strlen(s) : strend - startpos; + if (!PL_colorset) + reginitcolors(); + PerlIO_printf(Perl_debug_log, + "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n", + PL_colors[4],PL_colors[5],PL_colors[0], + prog->precomp, + PL_colors[1], + (strlen(prog->precomp) > 60 ? "..." : ""), + PL_colors[0], + (int)(len > 60 ? 60 : len), + s, PL_colors[1], + (len > 60 ? "..." : "") + ); + }); /* Simplest case: anchored match need be tried only once. */ /* [unless only anchor is BOL and multiline is set] */ @@ -1706,7 +1727,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * DEBUG_r({ SV *prop = sv_newmortal(); regprop(prop, c); - PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s); + PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), UTF ? sv_uni_display(dsv, sv, 60, 0) : s); }); if (find_byclass(prog, c, s, strend, startpos, 0)) goto got_it; @@ -2019,6 +2040,11 @@ S_regmatch(pTHX_ regnode *prog) I32 firstcp = PL_savestack_ix; #endif register bool do_utf8 = PL_reg_match_utf8; +#ifdef DEBUGGING + SV *dsv0 = sv_2mortal(newSVpvn("", 0)); + SV *dsv1 = sv_2mortal(newSVpvn("", 0)); + SV *dsv2 = sv_2mortal(newSVpvn("", 0)); +#endif #ifdef DEBUGGING PL_regindent++; @@ -2029,7 +2055,7 @@ S_regmatch(pTHX_ regnode *prog) scan = prog; while (scan != NULL) { - DEBUG_r( { + DEBUG_r( { SV *prop = sv_newmortal(); int docolor = *PL_colors[0]; int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */ @@ -2057,20 +2083,42 @@ S_regmatch(pTHX_ regnode *prog) if (pref0_len > pref_len) pref0_len = pref_len; regprop(prop, scan); - PerlIO_printf(Perl_debug_log, - "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n", - (IV)(locinput - PL_bostr), - PL_colors[4], pref0_len, - locinput - pref_len, PL_colors[5], - PL_colors[2], pref_len - pref0_len, - locinput - pref_len + pref0_len, PL_colors[3], - (docolor ? "" : "> <"), - PL_colors[0], l, locinput, PL_colors[1], - 15 - l - pref_len + 1, - "", - (IV)(scan - PL_regprogram), PL_regindent*2, "", - SvPVX(prop)); - } ); + { + char *s0 = + UTF ? + pv_uni_display(dsv0, (U8*)(locinput - pref_len), + pref0_len, 60, 0) : + locinput - pref_len; + int len0 = UTF ? strlen(s0) : pref0_len; + char *s1 = UTF ? + pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len), + pref_len - pref0_len, 60, 0) : + locinput - pref_len + pref0_len; + int len1 = UTF ? strlen(s1) : pref_len - pref0_len; + char *s2 = UTF ? + pv_uni_display(dsv2, (U8*)locinput, + PL_regeol - locinput, 60, 0) : + locinput; + int len2 = UTF ? strlen(s2) : l; + PerlIO_printf(Perl_debug_log, + "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n", + (IV)(locinput - PL_bostr), + PL_colors[4], + len0, s0, + PL_colors[5], + PL_colors[2], + len1, s1, + PL_colors[3], + (docolor ? "" : "> <"), + PL_colors[0], + len2, s2, + PL_colors[1], + 15 - l - pref_len + 1, + "", + (IV)(scan - PL_regprogram), PL_regindent*2, "", + SvPVX(prop)); + } + }); next = scan + NEXT_OFF(scan); if (next == scan) @@ -2199,17 +2247,17 @@ S_regmatch(pTHX_ regnode *prog) if (do_utf8) { char *l = locinput; char *e; + STRLEN ulen; + U8 tmpbuf[UTF8_MAXLEN*2+1]; e = s + ln; - c1 = OP(scan) == EXACTF; while (s < e) { - if (l >= PL_regeol) { + if (l >= PL_regeol) sayNO; - } - if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) != - (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l))) - sayNO; - s += UTF ? UTF8SKIP(s) : 1; - l += UTF8SKIP(l); + toLOWER_utf8((U8*)l, tmpbuf, &ulen); + if (memNE(s, tmpbuf, ulen)) + sayNO; + s += UTF8SKIP(s); + l += ulen; } locinput = l; nextchr = UCHARAT(locinput); @@ -2472,23 +2520,18 @@ S_regmatch(pTHX_ regnode *prog) * have to map both upper and title case to lower case. */ if (OP(scan) == REFF) { + STRLEN ulen1, ulen2; + U8 tmpbuf1[UTF8_MAXLEN*2+1]; + U8 tmpbuf2[UTF8_MAXLEN*2+1]; while (s < e) { if (l >= PL_regeol) sayNO; - if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l)) - sayNO; - s += UTF8SKIP(s); - l += UTF8SKIP(l); - } - } - else { - while (s < e) { - if (l >= PL_regeol) - sayNO; - if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l)) + toLOWER_utf8((U8*)s, tmpbuf1, &ulen1); + toLOWER_utf8((U8*)l, tmpbuf2, &ulen2); + if (ulen1 != ulen2 || memNE(tmpbuf1, tmpbuf2, ulen1)) sayNO; - s += UTF8SKIP(s); - l += UTF8SKIP(l); + s += ulen1; + l += ulen2; } } locinput = l; @@ -2534,11 +2577,18 @@ S_regmatch(pTHX_ regnode *prog) PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]); PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr; - CALLRUNOPS(aTHX); /* Scalar context. */ - SPAGAIN; - ret = POPs; - PUTBACK; - + { + SV **before = SP; + CALLRUNOPS(aTHX); /* Scalar context. */ + SPAGAIN; + if (SP == before) + ret = Nullsv; /* protect against empty (?{}) blocks. */ + else { + ret = POPs; + PUTBACK; + } + } + PL_op = oop; PL_curpad = ocurpad; PL_curcop = ocurcop; @@ -3237,8 +3287,15 @@ S_regmatch(pTHX_ regnode *prog) } else { /* UTF */ if (OP(text_node) == EXACTF) { - c1 = to_utf8_lower(s); - c2 = to_utf8_upper(s); + STRLEN ulen1, ulen2; + U8 tmpbuf1[UTF8_MAXLEN*2+1]; + U8 tmpbuf2[UTF8_MAXLEN*2+1]; + + to_utf8_lower((U8*)s, tmpbuf1, &ulen1); + to_utf8_upper((U8*)s, tmpbuf2, &ulen2); + + c1 = utf8_to_uvuni(tmpbuf1, 0); + c2 = utf8_to_uvuni(tmpbuf2, 0); } else { c2 = c1 = utf8_to_uvchr(s, NULL); @@ -3975,14 +4032,10 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8) if (swash_fetch(sw, p, do_utf8)) match = TRUE; else if (flags & ANYOF_FOLD) { - U8 tmpbuf[UTF8_MAXLEN+1]; - - if (flags & ANYOF_LOCALE) { - PL_reg_flags |= RF_tainted; - uvchr_to_utf8(tmpbuf, toLOWER_LC_utf8(p)); - } - else - uvchr_to_utf8(tmpbuf, toLOWER_utf8(p)); + STRLEN ulen; + U8 tmpbuf[UTF8_MAXLEN*2+1]; + + toLOWER_utf8(p, tmpbuf, &ulen); if (swash_fetch(sw, tmpbuf, do_utf8)) match = TRUE; }