From: Simon Cozens Date: Fri, 8 Dec 2000 13:33:31 +0000 (+0000) Subject: Re: ebcdic <-> ascii tables interjected in uv <-> utf8 considered harmful X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ed646e6e695bd0d512934a33fa4b7fabef9ff020;p=p5sagit%2Fp5-mst-13.2.git Re: ebcdic <-> ascii tables interjected in uv <-> utf8 considered harmful Message-ID: <20001208133331.A11535@deep-dark-truthful-mirror.perlhacker.org> (The pp_hot part needed a rewrite.) p4raw-id: //depot/perl@8039 --- diff --git a/doop.c b/doop.c index 9dbee67..7acad60 100644 --- a/doop.c +++ b/doop.c @@ -79,10 +79,7 @@ S_do_trans_simple(pTHX_ SV *sv) c = utf8_to_uv(s, send - s, &ulen, 0); if (c < 0x100 && (ch = tbl[(short)c]) >= 0) { matches++; - if (ch < 0x80) - *d++ = ch; - else - d = uv_to_utf8(d,ch); + d = uv_to_utf8(d,ch); s += ulen; } else { /* No match -> copy */ @@ -192,12 +189,9 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ matches--; } - if (ch >= 0) { - if (hasutf) - d = uv_to_utf8(d, ch); - else - *d++ = ch; - } + if (ch >= 0) + d = uv_to_utf8(d, ch); + matches++; s += hasutf && *s & 0x80 ? UNISKIP(*s) : 1; diff --git a/pp_hot.c b/pp_hot.c index 830d56e..4020f20 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -142,19 +142,19 @@ PP(pp_concat) dPOPTOPssrl; STRLEN len; U8 *s; - bool left_utf; - bool right_utf; + bool left_utf8; + bool right_utf8; if (TARG == right && SvGMAGICAL(right)) mg_get(right); if (SvGMAGICAL(left)) mg_get(left); - left_utf = DO_UTF8(left); - right_utf = DO_UTF8(right); + left_utf8 = DO_UTF8(left); + right_utf8 = DO_UTF8(right); - if (left_utf != right_utf) { - if (TARG == right && !right_utf) { + if (left_utf8 != right_utf8) { + if (TARG == right && !right_utf8) { sv_utf8_upgrade(TARG); /* Now straight binary copy */ SvUTF8_on(TARG); } @@ -163,7 +163,7 @@ PP(pp_concat) U8 *l, *c, *olds = NULL; STRLEN targlen; s = (U8*)SvPV(right,len); - right_utf |= DO_UTF8(right); + right_utf8 |= DO_UTF8(right); if (TARG == right) { /* Take a copy since we're about to overwrite TARG */ olds = s = (U8*)savepvn((char*)s, len); @@ -175,28 +175,28 @@ PP(pp_concat) sv_setpv(left, ""); /* Suppress warning. */ } l = (U8*)SvPV(left, targlen); - left_utf |= DO_UTF8(left); + left_utf8 |= DO_UTF8(left); if (TARG != left) sv_setpvn(TARG, (char*)l, targlen); - if (!left_utf) + if (!left_utf8) sv_utf8_upgrade(TARG); /* Extend TARG to length of right (s) */ targlen = SvCUR(TARG) + len; - if (!right_utf) { + if (!right_utf8) { /* plus one for each hi-byte char if we have to upgrade */ for (c = s; c < s + len; c++) { - if (*c & 0x80) + if (UTF8_IS_CONTINUED(*c)) targlen++; } } SvGROW(TARG, targlen+1); /* And now copy, maybe upgrading right to UTF8 on the fly */ - for (c = (U8*)SvEND(TARG); len--; s++) { - if (*s & 0x80 && !right_utf) - c = uv_to_utf8(c, *s); - else - *c++ = *s; - } + if (right_utf8) + Copy(s, SvEND(TARG), len, U8); + else { + for (c = (U8*)SvEND(TARG); len--; s++) + c = uv_to_utf8(c, *s); + } SvCUR_set(TARG, targlen); *SvEND(TARG) = '\0'; SvUTF8_on(TARG); @@ -235,7 +235,7 @@ PP(pp_concat) } else sv_setpvn(TARG, (char *)s, len); /* suppress warning */ - if (left_utf) + if (left_utf8) SvUTF8_on(TARG); SETTARG; RETURN; diff --git a/utf8.c b/utf8.c index 7a652b4..e9c4386 100644 --- a/utf8.c +++ b/utf8.c @@ -506,14 +506,9 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len) d = s = save; while (s < send) { - if (*s < 0x80) { - *d++ = *s++; - } - else { - STRLEN ulen; - *d++ = (U8)utf8_to_uv_simple(s, &ulen); - s += ulen; - } + STRLEN ulen; + *d++ = (U8)utf8_to_uv_simple(s, &ulen); + s += ulen; } *d = '\0'; *len = d - save;