X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=doop.c;h=823c88d18e4e9373294f977ef260584522c48eb2;hb=fe9745bfc279b016730696fd0a6abca4d493be60;hp=3b0ddc19e6f00d260b6bf9274e6f02aae54cd69d;hpb=0f4a810e6257ab8b7f289b497394cfdff380e306;p=p5sagit%2Fp5-mst-13.2.git diff --git a/doop.c b/doop.c index 3b0ddc1..823c88d 100644 --- a/doop.c +++ b/doop.c @@ -29,6 +29,7 @@ S_do_trans_simple(pTHX_ SV *sv) U8 *send; U8 *dstart; I32 matches = 0; + I32 grows = PL_op->op_private & OPpTRANS_GROWS; STRLEN len; short *tbl; I32 ch; @@ -55,27 +56,36 @@ S_do_trans_simple(pTHX_ SV *sv) } /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */ - Newz(0, d, len*2+1, U8); + if (grows) + New(0, d, len*2+1, U8); + else + d = s; dstart = d; while (s < send) { STRLEN ulen; - short c; + UV c; - ulen = 1; /* Need to check this, otherwise 128..255 won't match */ - c = utf8_to_uv(s, send - s, &ulen, 0); - if (c < 0x100 && (ch = tbl[(short)c]) >= 0) { + c = utf8n_to_uvchr(s, send - s, &ulen, 0); + if (c < 0x100 && (ch = tbl[c]) >= 0) { matches++; - d = uv_to_utf8(d, ch); + d = uvchr_to_utf8(d, ch); s += ulen; } else { /* No match -> copy */ - while (ulen--) - *d++ = *s++; + Copy(s, d, ulen, U8); + d += ulen; + s += ulen; } } - *d = '\0'; - sv_setpvn(sv, (char*)dstart, d - dstart); + if (grows) { + sv_setpvn(sv, (char*)dstart, d - dstart); + Safefree(dstart); + } + else { + *d = '\0'; + SvCUR_set(sv, d - dstart); + } SvUTF8_on(sv); SvSETMAGIC(sv); return matches; @@ -89,6 +99,7 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */ I32 matches = 0; STRLEN len; short *tbl; + I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; tbl = (short*)cPVOP->op_pv; if (!tbl) @@ -106,8 +117,11 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */ while (s < send) { UV c; STRLEN ulen; - c = utf8_to_uv(s, send - s, &ulen, 0); - if (c < 0x100 && tbl[c] >= 0) + c = utf8n_to_uvchr(s, send - s, &ulen, 0); + if (c < 0x100) { + if (tbl[c] >= 0) + matches++; + } else if (complement) matches++; s += ulen; } @@ -124,7 +138,10 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ U8 *dstart; I32 isutf8; I32 matches = 0; - STRLEN len; + I32 grows = PL_op->op_private & OPpTRANS_GROWS; + I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; + I32 del = PL_op->op_private & OPpTRANS_DELETE; + STRLEN len, rlen; short *tbl; I32 ch; @@ -170,29 +187,57 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ SvCUR_set(sv, d - dstart); } else { /* isutf8 */ - Newz(0, d, len*2+1, U8); + if (grows) + New(0, d, len*2+1, U8); + else + d = s; dstart = d; + if (complement && !del) + rlen = tbl[0x100]; + +#ifdef MACOS_TRADITIONAL +#define comp CoMP /* "comp" is a keyword in some compilers ... */ +#endif if (PL_op->op_private & OPpTRANS_SQUASH) { - U8* p = send; UV pch = 0xfeedface; while (s < send) { STRLEN len; - UV comp = utf8_to_uv_simple(s, &len); + UV comp = utf8_to_uvchr(s, &len); - if (comp > 0xff) - d = uv_to_utf8(d, comp); /* always unmapped */ + if (comp > 0xff) { + if (!complement) { + Copy(s, d, len, U8); + d += len; + } + else { + matches++; + if (!del) { + ch = (rlen == 0) ? comp : + (comp - 0x100 < rlen) ? + tbl[comp+1] : tbl[0x100+rlen]; + if (ch != pch) { + d = uvchr_to_utf8(d, ch); + pch = ch; + } + s += len; + continue; + } + } + } else if ((ch = tbl[comp]) >= 0) { matches++; if (ch != pch) { - d = uv_to_utf8(d, ch); + d = uvchr_to_utf8(d, ch); pch = ch; } s += len; continue; } - else if (ch == -1) /* -1 is unmapped character */ - d = uv_to_utf8(d, comp); + else if (ch == -1) { /* -1 is unmapped character */ + Copy(s, d, len, U8); + d += len; + } else if (ch == -2) /* -2 is delete character */ matches++; s += len; @@ -202,23 +247,43 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ else { while (s < send) { STRLEN len; - UV comp = utf8_to_uv_simple(s, &len); - if (comp > 0xff) - d = uv_to_utf8(d, comp); /* always unmapped */ + UV comp = utf8_to_uvchr(s, &len); + if (comp > 0xff) { + if (!complement) { + Copy(s, d, len, U8); + d += len; + } + else { + matches++; + if (!del) { + if (comp - 0x100 < rlen) + d = uvchr_to_utf8(d, tbl[comp+1]); + else + d = uvchr_to_utf8(d, tbl[0x100+rlen]); + } + } + } else if ((ch = tbl[comp]) >= 0) { - d = uv_to_utf8(d, ch); + d = uvchr_to_utf8(d, ch); matches++; } else if (ch == -1) { /* -1 is unmapped character */ - d = uv_to_utf8(d, comp); + Copy(s, d, len, U8); + d += len; } else if (ch == -2) /* -2 is delete character */ matches++; s += len; } } - *d = '\0'; - sv_setpvn(sv, (char*)dstart, d - dstart); + if (grows) { + sv_setpvn(sv, (char*)dstart, d - dstart); + Safefree(dstart); + } + else { + *d = '\0'; + SvCUR_set(sv, d - dstart); + } SvUTF8_on(sv); } SvSETMAGIC(sv); @@ -234,6 +299,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ U8 *start; U8 *dstart, *dend; I32 matches = 0; + I32 grows = PL_op->op_private & OPpTRANS_GROWS; STRLEN len; SV* rv = (SV*)cSVOP->op_sv; @@ -250,9 +316,11 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ isutf8 = SvUTF8(sv); if (!isutf8) { U8 *t = s, *e = s + len; - while (t < e) - if ((hibit = UTF8_IS_CONTINUED(*t++))) + while (t < e) { + U8 ch = *t++; + if ((hibit = !NATIVE_IS_INVARIANT(ch))) break; + } if (hibit) s = bytes_to_utf8(s, &len); } @@ -263,45 +331,61 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ if (svp) final = SvUV(*svp); - /* d needs to be bigger than s, in case e.g. upgrading is required */ - New(0, d, len*3+UTF8_MAXLEN, U8); - dend = d + len * 3; - dstart = d; + if (grows) { + /* d needs to be bigger than s, in case e.g. upgrading is required */ + New(0, d, len*3+UTF8_MAXLEN, U8); + dend = d + len * 3; + dstart = d; + } + else { + dstart = d = s; + dend = d + len; + } while (s < send) { if ((uv = swash_fetch(rv, s)) < none) { s += UTF8SKIP(s); matches++; - d = uv_to_utf8(d, uv); + d = uvchr_to_utf8(d, uv); } else if (uv == none) { int i = UTF8SKIP(s); - while(i--) - *d++ = *s++; + Copy(s, d, i, U8); + d += i; + s += i; } else if (uv == extra) { int i = UTF8SKIP(s); s += i; matches++; - d = uv_to_utf8(d, final); + d = uvchr_to_utf8(d, final); } else s += UTF8SKIP(s); - if (d >= dend) { + if (d > dend) { STRLEN clen = d - dstart; STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; + if (!grows) + Perl_croak(aTHX_ "panic: do_trans_complex_utf8"); Renew(dstart, nlen+UTF8_MAXLEN, U8); d = dstart + clen; dend = dstart + nlen; } } - *d = '\0'; - sv_setpvn(sv, (char*)dstart, d - dstart); + if (grows || hibit) { + sv_setpvn(sv, (char*)dstart, d - dstart); + Safefree(dstart); + if (grows && hibit) + Safefree(start); + } + else { + *d = '\0'; + SvCUR_set(sv, d - dstart); + } SvSETMAGIC(sv); SvUTF8_on(sv); - if (hibit) - Safefree(start); + /* Downgrading just 'cos it will is suspect - NI-S */ if (!isutf8 && !(PL_hints & HINT_UTF8)) sv_utf8_downgrade(sv, TRUE); @@ -320,22 +404,25 @@ S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */ HV* hv = (HV*)SvRV(rv); SV** svp = hv_fetch(hv, "NONE", 4, FALSE); UV none = svp ? SvUV(*svp) : 0x7fffffff; + UV extra = none + 1; UV uv; U8 hibit = 0; s = (U8*)SvPV(sv, len); if (!SvUTF8(sv)) { U8 *t = s, *e = s + len; - while (t < e) - if ((hibit = !UTF8_IS_ASCII(*t++))) + while (t < e) { + U8 ch = *t++; + if ((hibit = !NATIVE_IS_INVARIANT(ch))) break; + } if (hibit) start = s = bytes_to_utf8(s, &len); } send = s + len; while (s < send) { - if ((uv = swash_fetch(rv, s)) < none) + if ((uv = swash_fetch(rv, s)) < none || uv == extra) matches++; s += UTF8SKIP(s); } @@ -354,12 +441,14 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ I32 matches = 0; I32 squash = PL_op->op_private & OPpTRANS_SQUASH; I32 del = PL_op->op_private & OPpTRANS_DELETE; + I32 grows = PL_op->op_private & OPpTRANS_GROWS; SV* rv = (SV*)cSVOP->op_sv; HV* hv = (HV*)SvRV(rv); SV** svp = hv_fetch(hv, "NONE", 4, FALSE); UV none = svp ? SvUV(*svp) : 0x7fffffff; UV extra = none + 1; UV final; + bool havefinal = FALSE; UV uv; STRLEN len; U8 *dstart, *dend; @@ -370,9 +459,11 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ isutf8 = SvUTF8(sv); if (!isutf8) { U8 *t = s, *e = s + len; - while (t < e) - if ((hibit = !UTF8_IS_ASCII(*t++))) + while (t < e) { + U8 ch = *t++; + if ((hibit = !NATIVE_IS_INVARIANT(ch))) break; + } if (hibit) s = bytes_to_utf8(s, &len); } @@ -380,47 +471,72 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ start = s; svp = hv_fetch(hv, "FINAL", 5, FALSE); - if (svp) + if (svp) { final = SvUV(*svp); + havefinal = TRUE; + } - New(0, d, len*3+UTF8_MAXLEN, U8); - dend = d + len * 3; - dstart = d; + if (grows) { + /* d needs to be bigger than s, in case e.g. upgrading is required */ + New(0, d, len*3+UTF8_MAXLEN, U8); + dend = d + len * 3; + dstart = d; + } + else { + dstart = d = s; + dend = d + len; + } if (squash) { UV puv = 0xfeedface; while (s < send) { uv = swash_fetch(rv, s); - - if (d >= dend) { - STRLEN clen = d - dstart, nlen = dend - dstart + len; + + if (d > dend) { + STRLEN clen = d - dstart; + STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; + if (!grows) + Perl_croak(aTHX_ "panic: do_trans_complex_utf8"); Renew(dstart, nlen+UTF8_MAXLEN, U8); d = dstart + clen; dend = dstart + nlen; } if (uv < none) { matches++; + s += UTF8SKIP(s); if (uv != puv) { - d = uv_to_utf8(d, uv); + d = uvchr_to_utf8(d, uv); puv = uv; } - s += UTF8SKIP(s); continue; } else if (uv == none) { /* "none" is unmapped character */ int i = UTF8SKIP(s); - while(i--) - *d++ = *s++; + Copy(s, d, i, U8); + d += i; + s += i; puv = 0xfeedface; continue; } else if (uv == extra && !del) { matches++; - if (uv != puv) { - d = uv_to_utf8(d, final); - puv = final; + if (havefinal) { + s += UTF8SKIP(s); + if (puv != final) { + d = uvchr_to_utf8(d, final); + puv = final; + } + } + else { + STRLEN len; + uv = utf8_to_uvchr(s, &len); + if (uv != puv) { + Copy(s, d, len, U8); + d += len; + puv = uv; + } + s += len; } - s += UTF8SKIP(s); continue; } matches++; /* "none+1" is delete character */ @@ -430,39 +546,49 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ else { while (s < send) { uv = swash_fetch(rv, s); - if (d >= dend) { - STRLEN clen = d - dstart, nlen = dend - dstart + len; + if (d > dend) { + STRLEN clen = d - dstart; + STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; + if (!grows) + Perl_croak(aTHX_ "panic: do_trans_complex_utf8"); Renew(dstart, nlen+UTF8_MAXLEN, U8); d = dstart + clen; dend = dstart + nlen; } if (uv < none) { matches++; - d = uv_to_utf8(d, uv); s += UTF8SKIP(s); + d = uvchr_to_utf8(d, uv); continue; } else if (uv == none) { /* "none" is unmapped character */ int i = UTF8SKIP(s); - while(i--) - *d++ = *s++; + Copy(s, d, i, U8); + d += i; + s += i; continue; } else if (uv == extra && !del) { matches++; - d = uv_to_utf8(d, final); s += UTF8SKIP(s); + d = uvchr_to_utf8(d, final); continue; } matches++; /* "none+1" is delete character */ s += UTF8SKIP(s); } } - *d = '\0'; - sv_setpvn(sv, (char*)dstart, d - dstart); + if (grows || hibit) { + sv_setpvn(sv, (char*)dstart, d - dstart); + Safefree(dstart); + if (grows && hibit) + Safefree(start); + } + else { + *d = '\0'; + SvCUR_set(sv, d - dstart); + } SvUTF8_on(sv); - if (hibit) - Safefree(start); if (!isutf8 && !(PL_hints & HINT_UTF8)) sv_utf8_downgrade(sv, TRUE); SvSETMAGIC(sv); @@ -498,6 +624,7 @@ Perl_do_trans(pTHX_ SV *sv) return do_trans_simple(sv); case OPpTRANS_IDENTICAL: + case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT: if (hasutf) return do_trans_count_utf8(sv); else @@ -832,7 +959,7 @@ Perl_do_chop(pTHX_ register SV *astr, register SV *sv) s = send - 1; while (s > start && UTF8_IS_CONTINUATION(*s)) s--; - if (utf8_to_uv_simple((U8*)s, 0)) { + if (utf8_to_uvchr((U8*)s, 0)) { sv_setpvn(astr, s, send - s); *s = '\0'; SvCUR_set(sv, s - start); @@ -998,14 +1125,14 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) switch (optype) { case OP_BIT_AND: while (lulen && rulen) { - luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); + luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); lc += ulen; lulen -= ulen; - ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); + ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); rc += ulen; rulen -= ulen; duc = luc & ruc; - dc = (char*)uv_to_utf8((U8*)dc, duc); + dc = (char*)uvchr_to_utf8((U8*)dc, duc); } if (sv == left || sv == right) (void)sv_usepvn(sv, dcsave, needlen); @@ -1013,26 +1140,26 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) break; case OP_BIT_XOR: while (lulen && rulen) { - luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); + luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); lc += ulen; lulen -= ulen; - ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); + ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); rc += ulen; rulen -= ulen; duc = luc ^ ruc; - dc = (char*)uv_to_utf8((U8*)dc, duc); + dc = (char*)uvchr_to_utf8((U8*)dc, duc); } goto mop_up_utf; case OP_BIT_OR: while (lulen && rulen) { - luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); + luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); lc += ulen; lulen -= ulen; - ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); + ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); rc += ulen; rulen -= ulen; duc = luc | ruc; - dc = (char*)uv_to_utf8((U8*)dc, duc); + dc = (char*)uvchr_to_utf8((U8*)dc, duc); } mop_up_utf: if (sv == left || sv == right) @@ -1127,7 +1254,7 @@ finish: OP * Perl_do_kv(pTHX) { - djSP; + dSP; HV *hv = (HV*)POPs; HV *keys; register HE *entry; @@ -1141,7 +1268,7 @@ Perl_do_kv(pTHX) dokeys = dovalues = TRUE; if (!hv) { - if (PL_op->op_flags & OPf_MOD) { /* lvalue */ + if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ dTARGET; /* make sure to clear its target here */ if (SvTYPE(TARG) == SVt_PVLV) LvTARG(TARG) = Nullsv; @@ -1160,7 +1287,7 @@ Perl_do_kv(pTHX) IV i; dTARGET; - if (PL_op->op_flags & OPf_MOD) { /* lvalue */ + if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ if (SvTYPE(TARG) < SVt_PVLV) { sv_upgrade(TARG, SVt_PVLV); sv_magic(TARG, Nullsv, 'k', Nullch, 0);