X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=doop.c;h=d7baecc792213d049dbf1319ba037bebbfa6c3f1;hb=d0369dd15170efd8d6a93db21c003d0edd4bfdf4;hp=526409c16cdb59734510d86ca26ade7a7457bb5e;hpb=16ec844ddf6ed30b884deee1be32522f7b107b77;p=p5sagit%2Fp5-mst-13.2.git diff --git a/doop.c b/doop.c index 526409c..d7baecc 100644 --- a/doop.c +++ b/doop.c @@ -36,7 +36,7 @@ S_do_trans_simple(pTHX_ SV *sv) tbl = (short*)cPVOP->op_pv; if (!tbl) - Perl_croak(aTHX_ "panic: do_trans_simple"); + Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__); s = (U8*)SvPV(sv, len); send = s + len; @@ -66,10 +66,10 @@ S_do_trans_simple(pTHX_ SV *sv) UV c; /* Need to check this, otherwise 128..255 won't match */ - c = utf8_to_uv(s, send - s, &ulen, 0); + c = utf8n_to_uvchr(s, send - s, &ulen, 0); if (c < 0x100 && (ch = tbl[c]) >= 0) { matches++; - d = uv_to_utf8(d, ch); + d = uvchr_to_utf8(d, ch); s += ulen; } else { /* No match -> copy */ @@ -99,10 +99,11 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */ I32 matches = 0; STRLEN len; short *tbl; + I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; tbl = (short*)cPVOP->op_pv; if (!tbl) - Perl_croak(aTHX_ "panic: do_trans_count"); + Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__); s = (U8*)SvPV(sv, len); send = s + len; @@ -116,8 +117,11 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */ while (s < send) { UV c; STRLEN ulen; - c = utf8_to_uv(s, send - s, &ulen, 0); - if (c < 0x100 && tbl[c] >= 0) + c = utf8n_to_uvchr(s, send - s, &ulen, 0); + if (c < 0x100) { + if (tbl[c] >= 0) + matches++; + } else if (complement) matches++; s += ulen; } @@ -135,13 +139,15 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ I32 isutf8; I32 matches = 0; I32 grows = PL_op->op_private & OPpTRANS_GROWS; - STRLEN len; + I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; + I32 del = PL_op->op_private & OPpTRANS_DELETE; + STRLEN len, rlen; short *tbl; I32 ch; tbl = (short*)cPVOP->op_pv; if (!tbl) - Perl_croak(aTHX_ "panic: do_trans_complex"); + Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__); s = (U8*)SvPV(sv, len); isutf8 = SvUTF8(sv); @@ -178,6 +184,7 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ s++; } } + *d = '\0'; SvCUR_set(sv, d - dstart); } else { /* isutf8 */ @@ -186,22 +193,43 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ else d = s; dstart = d; + if (complement && !del) + rlen = tbl[0x100]; + +#ifdef MACOS_TRADITIONAL +#define comp CoMP /* "comp" is a keyword in some compilers ... */ +#endif if (PL_op->op_private & OPpTRANS_SQUASH) { - U8* p = send; UV pch = 0xfeedface; while (s < send) { STRLEN len; - UV comp = utf8_to_uv_simple(s, &len); + UV comp = utf8_to_uvchr(s, &len); - if (comp > 0xff) { /* always unmapped */ - Copy(s, d, len, U8); - d += len; + if (comp > 0xff) { + if (!complement) { + Copy(s, d, len, U8); + d += len; + } + else { + matches++; + if (!del) { + ch = (rlen == 0) ? comp : + (comp - 0x100 < rlen) ? + tbl[comp+1] : tbl[0x100+rlen]; + if (ch != pch) { + d = uvchr_to_utf8(d, ch); + pch = ch; + } + s += len; + continue; + } + } } else if ((ch = tbl[comp]) >= 0) { matches++; if (ch != pch) { - d = uv_to_utf8(d, ch); + d = uvchr_to_utf8(d, ch); pch = ch; } s += len; @@ -220,13 +248,24 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ else { while (s < send) { STRLEN len; - UV comp = utf8_to_uv_simple(s, &len); - if (comp > 0xff) { /* always unmapped */ - Copy(s, d, len, U8); - d += len; + UV comp = utf8_to_uvchr(s, &len); + if (comp > 0xff) { + if (!complement) { + Copy(s, d, len, U8); + d += len; + } + else { + matches++; + if (!del) { + if (comp - 0x100 < rlen) + d = uvchr_to_utf8(d, tbl[comp+1]); + else + d = uvchr_to_utf8(d, tbl[0x100+rlen]); + } + } } else if ((ch = tbl[comp]) >= 0) { - d = uv_to_utf8(d, ch); + d = uvchr_to_utf8(d, ch); matches++; } else if (ch == -1) { /* -1 is unmapped character */ @@ -278,9 +317,11 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ isutf8 = SvUTF8(sv); if (!isutf8) { U8 *t = s, *e = s + len; - while (t < e) - if ((hibit = UTF8_IS_CONTINUED(*t++))) + while (t < e) { + U8 ch = *t++; + if ((hibit = !NATIVE_IS_INVARIANT(ch))) break; + } if (hibit) s = bytes_to_utf8(s, &len); } @@ -306,7 +347,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ if ((uv = swash_fetch(rv, s)) < none) { s += UTF8SKIP(s); matches++; - d = uv_to_utf8(d, uv); + d = uvuni_to_utf8(d, uv); } else if (uv == none) { int i = UTF8SKIP(s); @@ -318,7 +359,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ int i = UTF8SKIP(s); s += i; matches++; - d = uv_to_utf8(d, final); + d = uvuni_to_utf8(d, final); } else s += UTF8SKIP(s); @@ -327,7 +368,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ STRLEN clen = d - dstart; STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; if (!grows) - Perl_croak(aTHX_ "panic: do_trans_complex_utf8"); + Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__); Renew(dstart, nlen+UTF8_MAXLEN, U8); d = dstart + clen; dend = dstart + nlen; @@ -345,6 +386,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ } SvSETMAGIC(sv); SvUTF8_on(sv); + /* Downgrading just 'cos it will is suspect - NI-S */ if (!isutf8 && !(PL_hints & HINT_UTF8)) sv_utf8_downgrade(sv, TRUE); @@ -363,22 +405,25 @@ S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */ HV* hv = (HV*)SvRV(rv); SV** svp = hv_fetch(hv, "NONE", 4, FALSE); UV none = svp ? SvUV(*svp) : 0x7fffffff; + UV extra = none + 1; UV uv; U8 hibit = 0; s = (U8*)SvPV(sv, len); if (!SvUTF8(sv)) { U8 *t = s, *e = s + len; - while (t < e) - if ((hibit = !UTF8_IS_ASCII(*t++))) + while (t < e) { + U8 ch = *t++; + if ((hibit = !NATIVE_IS_INVARIANT(ch))) break; + } if (hibit) start = s = bytes_to_utf8(s, &len); } send = s + len; while (s < send) { - if ((uv = swash_fetch(rv, s)) < none) + if ((uv = swash_fetch(rv, s)) < none || uv == extra) matches++; s += UTF8SKIP(s); } @@ -404,6 +449,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ UV none = svp ? SvUV(*svp) : 0x7fffffff; UV extra = none + 1; UV final; + bool havefinal = FALSE; UV uv; STRLEN len; U8 *dstart, *dend; @@ -414,9 +460,11 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ isutf8 = SvUTF8(sv); if (!isutf8) { U8 *t = s, *e = s + len; - while (t < e) - if ((hibit = !UTF8_IS_ASCII(*t++))) + while (t < e) { + U8 ch = *t++; + if ((hibit = !NATIVE_IS_INVARIANT(ch))) break; + } if (hibit) s = bytes_to_utf8(s, &len); } @@ -424,8 +472,10 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ start = s; svp = hv_fetch(hv, "FINAL", 5, FALSE); - if (svp) + if (svp) { final = SvUV(*svp); + havefinal = TRUE; + } if (grows) { /* d needs to be bigger than s, in case e.g. upgrading is required */ @@ -442,23 +492,23 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ UV puv = 0xfeedface; while (s < send) { uv = swash_fetch(rv, s); - + if (d > dend) { STRLEN clen = d - dstart; STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; if (!grows) - Perl_croak(aTHX_ "panic: do_trans_complex_utf8"); + Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); Renew(dstart, nlen+UTF8_MAXLEN, U8); d = dstart + clen; dend = dstart + nlen; } if (uv < none) { matches++; + s += UTF8SKIP(s); if (uv != puv) { - d = uv_to_utf8(d, uv); + d = uvuni_to_utf8(d, uv); puv = uv; } - s += UTF8SKIP(s); continue; } else if (uv == none) { /* "none" is unmapped character */ @@ -471,11 +521,23 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ } else if (uv == extra && !del) { matches++; - if (uv != puv) { - d = uv_to_utf8(d, final); - puv = final; + if (havefinal) { + s += UTF8SKIP(s); + if (puv != final) { + d = uvuni_to_utf8(d, final); + puv = final; + } + } + else { + STRLEN len; + uv = utf8_to_uvuni(s, &len); + if (uv != puv) { + Copy(s, d, len, U8); + d += len; + puv = uv; + } + s += len; } - s += UTF8SKIP(s); continue; } matches++; /* "none+1" is delete character */ @@ -489,15 +551,15 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ STRLEN clen = d - dstart; STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; if (!grows) - Perl_croak(aTHX_ "panic: do_trans_complex_utf8"); + Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); Renew(dstart, nlen+UTF8_MAXLEN, U8); d = dstart + clen; dend = dstart + nlen; } if (uv < none) { matches++; - d = uv_to_utf8(d, uv); s += UTF8SKIP(s); + d = uvuni_to_utf8(d, uv); continue; } else if (uv == none) { /* "none" is unmapped character */ @@ -509,8 +571,8 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ } else if (uv == extra && !del) { matches++; - d = uv_to_utf8(d, final); s += UTF8SKIP(s); + d = uvuni_to_utf8(d, final); continue; } matches++; /* "none+1" is delete character */ @@ -563,6 +625,7 @@ Perl_do_trans(pTHX_ SV *sv) return do_trans_simple(sv); case OPpTRANS_IDENTICAL: + case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT: if (hasutf) return do_trans_count_utf8(sv); else @@ -897,7 +960,7 @@ Perl_do_chop(pTHX_ register SV *astr, register SV *sv) s = send - 1; while (s > start && UTF8_IS_CONTINUATION(*s)) s--; - if (utf8_to_uv_simple((U8*)s, 0)) { + if (utf8_to_uvchr((U8*)s, 0)) { sv_setpvn(astr, s, send - s); *s = '\0'; SvCUR_set(sv, s - start); @@ -1063,14 +1126,14 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) switch (optype) { case OP_BIT_AND: while (lulen && rulen) { - luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); + luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); lc += ulen; lulen -= ulen; - ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); + ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); rc += ulen; rulen -= ulen; duc = luc & ruc; - dc = (char*)uv_to_utf8((U8*)dc, duc); + dc = (char*)uvchr_to_utf8((U8*)dc, duc); } if (sv == left || sv == right) (void)sv_usepvn(sv, dcsave, needlen); @@ -1078,26 +1141,26 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) break; case OP_BIT_XOR: while (lulen && rulen) { - luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); + luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); lc += ulen; lulen -= ulen; - ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); + ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); rc += ulen; rulen -= ulen; duc = luc ^ ruc; - dc = (char*)uv_to_utf8((U8*)dc, duc); + dc = (char*)uvchr_to_utf8((U8*)dc, duc); } goto mop_up_utf; case OP_BIT_OR: while (lulen && rulen) { - luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); + luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); lc += ulen; lulen -= ulen; - ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); + ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); rc += ulen; rulen -= ulen; duc = luc | ruc; - dc = (char*)uv_to_utf8((U8*)dc, duc); + dc = (char*)uvchr_to_utf8((U8*)dc, duc); } mop_up_utf: if (sv == left || sv == right) @@ -1192,7 +1255,7 @@ finish: OP * Perl_do_kv(pTHX) { - djSP; + dSP; HV *hv = (HV*)POPs; HV *keys; register HE *entry; @@ -1206,7 +1269,7 @@ Perl_do_kv(pTHX) dokeys = dovalues = TRUE; if (!hv) { - if (PL_op->op_flags & OPf_MOD) { /* lvalue */ + if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ dTARGET; /* make sure to clear its target here */ if (SvTYPE(TARG) == SVt_PVLV) LvTARG(TARG) = Nullsv; @@ -1225,7 +1288,7 @@ Perl_do_kv(pTHX) IV i; dTARGET; - if (PL_op->op_flags & OPf_MOD) { /* lvalue */ + if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ if (SvTYPE(TARG) < SVt_PVLV) { sv_upgrade(TARG, SVt_PVLV); sv_magic(TARG, Nullsv, 'k', Nullch, 0);