Re: ebcdic <-> ascii tables interjected in uv <-> utf8 considered harmful
Simon Cozens [Fri, 8 Dec 2000 13:33:31 +0000 (13:33 +0000)]
Message-ID: <20001208133331.A11535@deep-dark-truthful-mirror.perlhacker.org>

(The pp_hot part needed a rewrite.)

p4raw-id: //depot/perl@8039

doop.c
pp_hot.c
utf8.c

diff --git a/doop.c b/doop.c
index 9dbee67..7acad60 100644 (file)
--- a/doop.c
+++ b/doop.c
@@ -79,10 +79,7 @@ S_do_trans_simple(pTHX_ SV *sv)
        c = utf8_to_uv(s, send - s, &ulen, 0);
         if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
             matches++;
-            if (ch < 0x80)
-                *d++ = ch;
-            else
-                d = uv_to_utf8(d,ch);
+            d = uv_to_utf8(d,ch);
             s += ulen;
         }
        else { /* No match -> copy */
@@ -192,12 +189,9 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
                matches--;
            }
 
-           if (ch >= 0) {
-               if (hasutf)
-                 d = uv_to_utf8(d, ch);
-               else 
-                 *d++ = ch;
-           }
+           if (ch >= 0)
+               d = uv_to_utf8(d, ch);
+           
            matches++;
 
            s += hasutf && *s & 0x80 ? UNISKIP(*s) : 1;
index 830d56e..4020f20 100644 (file)
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -142,19 +142,19 @@ PP(pp_concat)
     dPOPTOPssrl;
     STRLEN len;
     U8 *s;
-    bool left_utf;
-    bool right_utf;
+    bool left_utf8;
+    bool right_utf8;
 
     if (TARG == right && SvGMAGICAL(right))
         mg_get(right);
     if (SvGMAGICAL(left))
         mg_get(left);
 
-    left_utf  = DO_UTF8(left);
-    right_utf = DO_UTF8(right);
+    left_utf8  = DO_UTF8(left);
+    right_utf8 = DO_UTF8(right);
  
-    if (left_utf != right_utf) {
-        if (TARG == right && !right_utf) {
+    if (left_utf8 != right_utf8) {
+        if (TARG == right && !right_utf8) {
             sv_utf8_upgrade(TARG); /* Now straight binary copy */
             SvUTF8_on(TARG);
         }
@@ -163,7 +163,7 @@ PP(pp_concat)
             U8 *l, *c, *olds = NULL;
             STRLEN targlen;
            s = (U8*)SvPV(right,len);
-           right_utf |= DO_UTF8(right);
+           right_utf8 |= DO_UTF8(right);
             if (TARG == right) {
                /* Take a copy since we're about to overwrite TARG */
                olds = s = (U8*)savepvn((char*)s, len);
@@ -175,28 +175,28 @@ PP(pp_concat)
                    sv_setpv(left, ""); /* Suppress warning. */
            }
             l = (U8*)SvPV(left, targlen);
-           left_utf |= DO_UTF8(left);
+           left_utf8 |= DO_UTF8(left);
             if (TARG != left)
                 sv_setpvn(TARG, (char*)l, targlen);
-            if (!left_utf)
+            if (!left_utf8)
                 sv_utf8_upgrade(TARG);
             /* Extend TARG to length of right (s) */
             targlen = SvCUR(TARG) + len;
-            if (!right_utf) {
+            if (!right_utf8) {
                 /* plus one for each hi-byte char if we have to upgrade */
                 for (c = s; c < s + len; c++)  {
-                    if (*c & 0x80)
+                    if (UTF8_IS_CONTINUED(*c))
                         targlen++;
                 }
             }
             SvGROW(TARG, targlen+1);
             /* And now copy, maybe upgrading right to UTF8 on the fly */
-            for (c = (U8*)SvEND(TARG); len--; s++) {
-                 if (*s & 0x80 && !right_utf)
-                     c = uv_to_utf8(c, *s);
-                 else
-                     *c++ = *s;
-            }
+           if (right_utf8)
+               Copy(s, SvEND(TARG), len, U8);
+           else {
+               for (c = (U8*)SvEND(TARG); len--; s++)
+                   c = uv_to_utf8(c, *s);
+           }
             SvCUR_set(TARG, targlen);
             *SvEND(TARG) = '\0';
             SvUTF8_on(TARG);
@@ -235,7 +235,7 @@ PP(pp_concat)
     }
     else
        sv_setpvn(TARG, (char *)s, len);        /* suppress warning */
-    if (left_utf)
+    if (left_utf8)
        SvUTF8_on(TARG);
     SETTARG;
     RETURN;
diff --git a/utf8.c b/utf8.c
index 7a652b4..e9c4386 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -506,14 +506,9 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len)
 
     d = s = save;
     while (s < send) {
-        if (*s < 0x80) {
-           *d++ = *s++;
-       }
-        else {
-            STRLEN ulen;
-            *d++ = (U8)utf8_to_uv_simple(s, &ulen);
-            s += ulen;
-        }
+        STRLEN ulen;
+        *d++ = (U8)utf8_to_uv_simple(s, &ulen);
+        s += ulen;
     }
     *d = '\0';
     *len = d - save;