UTF8 concat
Simon Cozens [Fri, 30 Jun 2000 06:13:25 +0000 (06:13 +0000)]
Message-ID: <slrn8loek5.9ai.simon@justanother.perlhacker.org>

p4raw-id: //depot/cfgperl@6292

pp_hot.c

index ede5342..e0789db 100644 (file)
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -145,37 +145,68 @@ PP(pp_concat)
   {
     dPOPTOPssrl;
     STRLEN len;
-    char *s;
+    U8 *s;
     bool left_utf = DO_UTF8(left);
     bool right_utf = DO_UTF8(right);
 
+    if (left_utf != right_utf) {
+        if (TARG == right && !right_utf) {
+            sv_utf8_upgrade(TARG); /* Now straight binary copy */
+            SvUTF8_on(TARG);
+        }
+        else {
+            /* Set TARG to PV(left), then add right */
+            U8 *l, *c;
+            STRLEN targlen;
+            if (TARG == right)
+                /* Need a safe copy elsewhere since we're just about to
+                   write onto TARG */
+                s = strdup(SvPV(right,len));
+            else
+                s = SvPV(right,len);
+            l = SvPV(left, targlen);
+            if (TARG != left)
+                sv_setpvn(TARG,l,targlen);
+            if (!left_utf)
+                sv_utf8_upgrade(TARG);
+            /* Extend TARG to length of right (s) */
+            targlen = SvCUR(TARG) + len;
+            if (!right_utf) {
+                /* plus one for each hi-byte char if we have to upgrade */
+                for (c = s; *c; c++)  {
+                    if (*c & 0x80)
+                        targlen++;
+                }
+            }
+            SvGROW(TARG, targlen+1);
+            /* And now copy, maybe upgrading right to UTF8 on the fly */
+            for (c = SvEND(TARG); *s; s++) {
+                 if (*s & 0x80 && !right_utf)
+                     c = uv_to_utf8(c, *s);
+                 else
+                     *c++ = *s;
+            }
+            SvCUR_set(TARG, targlen);
+            *SvEND(TARG) = '\0';
+            SvUTF8_on(TARG);
+            SETs(TARG);
+            RETURN;
+        }
+    }
+
     if (TARG != left) {
-       if (right_utf && !left_utf)
-           sv_utf8_upgrade(left);
        s = SvPV(left,len);
-       SvUTF8_off(TARG);
        if (TARG == right) {
-           if (left_utf && !right_utf)
-               sv_utf8_upgrade(right);
            sv_insert(TARG, 0, 0, s, len);
-           if (left_utf || right_utf)
-               SvUTF8_on(TARG);
            SETs(TARG);
            RETURN;
        }
        sv_setpvn(TARG,s,len);
     }
-    else if (SvGMAGICAL(TARG)) {
+    else if (SvGMAGICAL(TARG))
        mg_get(TARG);
-       if (right_utf && !left_utf)
-           sv_utf8_upgrade(left);
-    }
-    else if (!SvOK(TARG) && SvTYPE(TARG) <= SVt_PVMG) {
+    else if (!SvOK(TARG) && SvTYPE(TARG) <= SVt_PVMG)
        sv_setpv(TARG, "");     /* Suppress warning. */
-       s = SvPV_force(TARG, len);
-    }
-    if (left_utf && !right_utf)
-       sv_utf8_upgrade(right);
     s = SvPV(right,len);
     if (SvOK(TARG)) {
 #if defined(PERL_Y2KWARN)
@@ -194,7 +225,7 @@ PP(pp_concat)
     }
     else
        sv_setpvn(TARG,s,len);  /* suppress warning */
-    if (left_utf || right_utf)
+    if (left_utf)
        SvUTF8_on(TARG);
     SETTARG;
     RETURN;