X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=35a0a6c2b08b9c22472e759f5effe0aac9856554;hb=dfcfdb64cf0cdaf3745a1082d9b4a94480414c62;hp=8c3ff2e78d84709e20240eda8c0b135c68dab744;hpb=9246c65e7445d45babd69ada378ff7f5f266c771;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index 8c3ff2e..35a0a6c 100644 --- a/regexec.c +++ b/regexec.c @@ -390,7 +390,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, char *check_at = Nullch; /* check substr found at this pos */ #ifdef DEBUGGING char *i_strpos = strpos; - SV *dsv = sv_2mortal(newSVpvn("", 0)); + SV *dsv = PERL_DEBUG_PAD_ZERO(0); #endif DEBUG_r({ @@ -932,8 +932,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta ln = STR_LEN(c); if (UTF) { STRLEN ulen1, ulen2; - U8 tmpbuf1[UTF8_MAXLEN*2+1]; - U8 tmpbuf2[UTF8_MAXLEN*2+1]; + U8 tmpbuf1[UTF8_MAXLEN_UCLC+1]; + U8 tmpbuf2[UTF8_MAXLEN_UCLC+1]; to_utf8_lower((U8*)m, tmpbuf1, &ulen1); to_utf8_upper((U8*)m, tmpbuf2, &ulen2); @@ -959,17 +959,30 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta if (do_utf8) { STRLEN len; + /* The ibcmp_utf8() uses to_uni_fold() which is more + * correct folding for Unicode than using lowercase. + * However, it doesn't work quite fully since the folding + * is a one-to-many mapping and the regex optimizer is + * unaware of this, so it may throw out good matches. + * Fortunately, not getting this right is allowed + * for Unicode Regular Expression Support level 1, + * only one-to-one matching is required. --jhi */ if (c1 == c2) while (s <= e) { if ( utf8_to_uvchr((U8*)s, &len) == c1 - && regtry(prog, s) ) + && (ln == 1 || + ibcmp_utf8(s, do_utf8, strend - s, + m, UTF, ln)) ) goto got_it; s += len; } else while (s <= e) { UV c = utf8_to_uvchr((U8*)s, &len); - if ( (c == c1 || c == c2) && regtry(prog, s) ) + if ( (c == c1 || c == c2) + && (ln == 1 || + ibcmp_utf8(s, do_utf8, strend - s, + m, UTF, ln)) ) goto got_it; s += len; } @@ -1465,7 +1478,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * SV* oreplsv = GvSV(PL_replgv); bool do_utf8 = DO_UTF8(sv); #ifdef DEBUGGING - SV *dsv = sv_2mortal(newSVpvn("", 0)); + SV *dsv = PERL_DEBUG_PAD_ZERO(0); #endif PL_regcc = 0; @@ -1943,6 +1956,12 @@ S_regtry(pTHX_ regexp *prog, char *startpos) New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*); } +#ifdef DEBUGGING + sv_setpvn(PERL_DEBUG_PAD(0), "", 0); + sv_setpvn(PERL_DEBUG_PAD(1), "", 0); + sv_setpvn(PERL_DEBUG_PAD(2), "", 0); +#endif + /* XXXX What this code is doing here?!!! There should be no need to do this again and again, PL_reglastparen should take care of this! --ilya*/ @@ -2050,9 +2069,9 @@ S_regmatch(pTHX_ regnode *prog) #endif register bool do_utf8 = PL_reg_match_utf8; #ifdef DEBUGGING - SV *dsv0 = sv_2mortal(newSVpvn("", 0)); - SV *dsv1 = sv_2mortal(newSVpvn("", 0)); - SV *dsv2 = sv_2mortal(newSVpvn("", 0)); + SV *dsv0 = PERL_DEBUG_PAD_ZERO(0); + SV *dsv1 = PERL_DEBUG_PAD_ZERO(1); + SV *dsv2 = PERL_DEBUG_PAD_ZERO(2); #endif #ifdef DEBUGGING @@ -2266,7 +2285,7 @@ S_regmatch(pTHX_ regnode *prog) char *l = locinput; char *e; STRLEN ulen; - U8 tmpbuf[UTF8_MAXLEN*2+1]; + U8 tmpbuf[UTF8_MAXLEN_UCLC+1]; e = s + ln; while (s < e) { if (l >= PL_regeol) @@ -2539,8 +2558,8 @@ S_regmatch(pTHX_ regnode *prog) */ if (OP(scan) == REFF) { STRLEN ulen1, ulen2; - U8 tmpbuf1[UTF8_MAXLEN*2+1]; - U8 tmpbuf2[UTF8_MAXLEN*2+1]; + U8 tmpbuf1[UTF8_MAXLEN_UCLC+1]; + U8 tmpbuf2[UTF8_MAXLEN_UCLC+1]; while (s < e) { if (l >= PL_regeol) sayNO; @@ -3143,9 +3162,9 @@ S_regmatch(pTHX_ regnode *prog) c1 = *(PL_bostr + ln); } else { c1 = (U8)*STRING(text_node); } - if (OP(next) == EXACTF) + if (OP(text_node) == EXACTF || OP(text_node) == REFF) c2 = PL_fold[c1]; - else if (OP(text_node) == EXACTFL) + else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL) c2 = PL_fold_locale[c1]; else c2 = c1; @@ -3226,9 +3245,9 @@ S_regmatch(pTHX_ regnode *prog) } else { c1 = (U8)*STRING(text_node); } - if (OP(text_node) == EXACTF) + if (OP(text_node) == EXACTF || OP(text_node) == REFF) c2 = PL_fold[c1]; - else if (OP(text_node) == EXACTFL) + else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL) c2 = PL_fold_locale[c1]; else c2 = c1; @@ -3337,16 +3356,16 @@ S_regmatch(pTHX_ regnode *prog) if (!UTF) { c2 = c1 = *s; - if (OP(text_node) == EXACTF) + if (OP(text_node) == EXACTF || OP(text_node) == REFF) c2 = PL_fold[c1]; - else if (OP(text_node) == EXACTFL) + else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL) c2 = PL_fold_locale[c1]; } else { /* UTF */ - if (OP(text_node) == EXACTF) { + if (OP(text_node) == EXACTF || OP(text_node) == REFF) { STRLEN ulen1, ulen2; - U8 tmpbuf1[UTF8_MAXLEN*2+1]; - U8 tmpbuf2[UTF8_MAXLEN*2+1]; + U8 tmpbuf1[UTF8_MAXLEN_UCLC+1]; + U8 tmpbuf2[UTF8_MAXLEN_UCLC+1]; to_utf8_lower((U8*)s, tmpbuf1, &ulen1); to_utf8_upper((U8*)s, tmpbuf2, &ulen2); @@ -4091,7 +4110,7 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8) match = TRUE; else if (flags & ANYOF_FOLD) { STRLEN ulen; - U8 tmpbuf[UTF8_MAXLEN*2+1]; + U8 tmpbuf[UTF8_MAXLEN_UCLC+1]; toLOWER_utf8(p, tmpbuf, &ulen); if (swash_fetch(sw, tmpbuf, do_utf8))