X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=doop.c;h=755cbfd16a6be2e30abd389169bd328a04481b63;hb=a403baf6db062a9762514a55376b87d7258108a5;hp=bd66b42739ef6aafb1b8f0105874a49be9f14a72;hpb=05d340b81db0665672bf1917f71d6cb9459a3b2b;p=p5sagit%2Fp5-mst-13.2.git diff --git a/doop.c b/doop.c index bd66b42..755cbfd 100644 --- a/doop.c +++ b/doop.c @@ -36,7 +36,7 @@ S_do_trans_simple(pTHX_ SV *sv) tbl = (short*)cPVOP->op_pv; if (!tbl) - Perl_croak(aTHX_ "panic: do_trans_simple"); + Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__); s = (U8*)SvPV(sv, len); send = s + len; @@ -103,7 +103,7 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */ tbl = (short*)cPVOP->op_pv; if (!tbl) - Perl_croak(aTHX_ "panic: do_trans_count"); + Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__); s = (U8*)SvPV(sv, len); send = s + len; @@ -147,7 +147,7 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ tbl = (short*)cPVOP->op_pv; if (!tbl) - Perl_croak(aTHX_ "panic: do_trans_complex"); + Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__); s = (U8*)SvPV(sv, len); isutf8 = SvUTF8(sv); @@ -184,6 +184,7 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ s++; } } + *d = '\0'; SvCUR_set(sv, d - dstart); } else { /* isutf8 */ @@ -316,9 +317,11 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ isutf8 = SvUTF8(sv); if (!isutf8) { U8 *t = s, *e = s + len; - while (t < e) - if ((hibit = UTF8_IS_CONTINUED(*t++))) + while (t < e) { + U8 ch = *t++; + if ((hibit = !NATIVE_IS_INVARIANT(ch))) break; + } if (hibit) s = bytes_to_utf8(s, &len); } @@ -341,10 +344,10 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ } while (s < send) { - if ((uv = swash_fetch(rv, s)) < none) { + if ((uv = swash_fetch(rv, s, TRUE)) < none) { s += UTF8SKIP(s); matches++; - d = uvchr_to_utf8(d, uv); + d = uvuni_to_utf8(d, uv); } else if (uv == none) { int i = UTF8SKIP(s); @@ -356,7 +359,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ int i = UTF8SKIP(s); s += i; matches++; - d = uvchr_to_utf8(d, final); + d = uvuni_to_utf8(d, final); } else s += UTF8SKIP(s); @@ -365,7 +368,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ STRLEN clen = d - dstart; STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; if (!grows) - Perl_croak(aTHX_ "panic: do_trans_complex_utf8"); + Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__); Renew(dstart, nlen+UTF8_MAXLEN, U8); d = dstart + clen; dend = dstart + nlen; @@ -402,22 +405,25 @@ S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */ HV* hv = (HV*)SvRV(rv); SV** svp = hv_fetch(hv, "NONE", 4, FALSE); UV none = svp ? SvUV(*svp) : 0x7fffffff; + UV extra = none + 1; UV uv; U8 hibit = 0; s = (U8*)SvPV(sv, len); if (!SvUTF8(sv)) { U8 *t = s, *e = s + len; - while (t < e) - if ((hibit = !UTF8_IS_INVARIANT(*t++))) + while (t < e) { + U8 ch = *t++; + if ((hibit = !NATIVE_IS_INVARIANT(ch))) break; + } if (hibit) start = s = bytes_to_utf8(s, &len); } send = s + len; while (s < send) { - if ((uv = swash_fetch(rv, s)) < none) + if ((uv = swash_fetch(rv, s, TRUE)) < none || uv == extra) matches++; s += UTF8SKIP(s); } @@ -443,6 +449,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ UV none = svp ? SvUV(*svp) : 0x7fffffff; UV extra = none + 1; UV final; + bool havefinal = FALSE; UV uv; STRLEN len; U8 *dstart, *dend; @@ -453,9 +460,11 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ isutf8 = SvUTF8(sv); if (!isutf8) { U8 *t = s, *e = s + len; - while (t < e) - if ((hibit = !UTF8_IS_INVARIANT(*t++))) + while (t < e) { + U8 ch = *t++; + if ((hibit = !NATIVE_IS_INVARIANT(ch))) break; + } if (hibit) s = bytes_to_utf8(s, &len); } @@ -463,8 +472,10 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ start = s; svp = hv_fetch(hv, "FINAL", 5, FALSE); - if (svp) + if (svp) { final = SvUV(*svp); + havefinal = TRUE; + } if (grows) { /* d needs to be bigger than s, in case e.g. upgrading is required */ @@ -480,13 +491,13 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ if (squash) { UV puv = 0xfeedface; while (s < send) { - uv = swash_fetch(rv, s); + uv = swash_fetch(rv, s, TRUE); if (d > dend) { STRLEN clen = d - dstart; STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; if (!grows) - Perl_croak(aTHX_ "panic: do_trans_complex_utf8"); + Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); Renew(dstart, nlen+UTF8_MAXLEN, U8); d = dstart + clen; dend = dstart + nlen; @@ -495,7 +506,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ matches++; s += UTF8SKIP(s); if (uv != puv) { - d = uvchr_to_utf8(d, uv); + d = uvuni_to_utf8(d, uv); puv = uv; } continue; @@ -510,10 +521,22 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ } else if (uv == extra && !del) { matches++; - s += UTF8SKIP(s); - if (uv != puv) { - d = uvchr_to_utf8(d, final); - puv = final; + if (havefinal) { + s += UTF8SKIP(s); + if (puv != final) { + d = uvuni_to_utf8(d, final); + puv = final; + } + } + else { + STRLEN len; + uv = utf8_to_uvuni(s, &len); + if (uv != puv) { + Copy(s, d, len, U8); + d += len; + puv = uv; + } + s += len; } continue; } @@ -523,12 +546,12 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ } else { while (s < send) { - uv = swash_fetch(rv, s); + uv = swash_fetch(rv, s, TRUE); if (d > dend) { STRLEN clen = d - dstart; STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; if (!grows) - Perl_croak(aTHX_ "panic: do_trans_complex_utf8"); + Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); Renew(dstart, nlen+UTF8_MAXLEN, U8); d = dstart + clen; dend = dstart + nlen; @@ -536,7 +559,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ if (uv < none) { matches++; s += UTF8SKIP(s); - d = uvchr_to_utf8(d, uv); + d = uvuni_to_utf8(d, uv); continue; } else if (uv == none) { /* "none" is unmapped character */ @@ -549,7 +572,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ else if (uv == extra && !del) { matches++; s += UTF8SKIP(s); - d = uvchr_to_utf8(d, final); + d = uvuni_to_utf8(d, final); continue; } matches++; /* "none+1" is delete character */