make stringwise bitops work properly on utf8 strings
Gurusamy Sarathy [Mon, 28 Feb 2000 09:39:47 +0000 (09:39 +0000)]
p4raw-id: //depot/perl@5317

doop.c
t/op/bop.t

diff --git a/doop.c b/doop.c
index 0dbfe20..4f536f0 100644 (file)
--- a/doop.c
+++ b/doop.c
@@ -1060,6 +1060,13 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
     I32 lensave;
     char *lsave;
     char *rsave;
+    bool left_utf = DO_UTF8(left);
+    bool right_utf = DO_UTF8(right);
+
+    if (left_utf && !right_utf)
+       sv_utf8_upgrade(right);
+    if (!left_utf && right_utf)
+       sv_utf8_upgrade(left);
 
     if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
        sv_setpvn(sv, "", 0);   /* avoid undef warning on |= and ^= */
@@ -1084,6 +1091,66 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
     }
     SvCUR_set(sv, len);
     (void)SvPOK_only(sv);
+    if (left_utf || right_utf) {
+       UV duc, luc, ruc;
+       STRLEN lulen = leftlen;
+       STRLEN rulen = rightlen;
+       STRLEN dulen = 0;
+       I32 ulen;
+
+       switch (optype) {
+       case OP_BIT_AND:
+           while (lulen && rulen) {
+               luc = utf8_to_uv((U8*)lc, &ulen);
+               lc += ulen;
+               lulen -= ulen;
+               ruc = utf8_to_uv((U8*)rc, &ulen);
+               rc += ulen;
+               rulen -= ulen;
+               duc = luc & ruc;
+               dc = (char*)uv_to_utf8((U8*)dc, duc);
+           }
+           dulen = dc - SvPVX(sv);
+           SvCUR_set(sv, dulen);
+           break;
+       case OP_BIT_XOR:
+           while (lulen && rulen) {
+               luc = utf8_to_uv((U8*)lc, &ulen);
+               lc += ulen;
+               lulen -= ulen;
+               ruc = utf8_to_uv((U8*)rc, &ulen);
+               rc += ulen;
+               rulen -= ulen;
+               duc = luc ^ ruc;
+               dc = (char*)uv_to_utf8((U8*)dc, duc);
+           }
+           goto mop_up_utf;
+       case OP_BIT_OR:
+           while (lulen && rulen) {
+               luc = utf8_to_uv((U8*)lc, &ulen);
+               lc += ulen;
+               lulen -= ulen;
+               ruc = utf8_to_uv((U8*)rc, &ulen);
+               rc += ulen;
+               rulen -= ulen;
+               duc = luc | ruc;
+               dc = (char*)uv_to_utf8((U8*)dc, duc);
+           }
+         mop_up_utf:
+           dulen = dc - SvPVX(sv);
+           SvCUR_set(sv, dulen);
+           if (rulen)
+               sv_catpvn(sv, rc, rulen);
+           else if (lulen)
+               sv_catpvn(sv, lc, lulen);
+           else
+               *SvEND(sv) = '\0';
+           break;
+       }
+       SvUTF8_on(sv);
+       goto finish;
+    }
+    else
 #ifdef LIBERAL
     if (len >= sizeof(long)*4 &&
        !((long)dc % sizeof(long)) &&
@@ -1154,6 +1221,7 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
            break;
        }
     }
+finish:
     SvTAINT(sv);
 }
 
index 0c5ef48..7bcabdf 100755 (executable)
@@ -9,7 +9,7 @@ BEGIN {
     unshift @INC, '../lib';
 }
 
-print "1..18\n";
+print "1..30\n";
 
 # numerics
 print ((0xdead & 0xbeef) == 0x9ead ? "ok 1\n" : "not ok 1\n");
@@ -62,3 +62,22 @@ print (($foo | $bar) eq ($Aoz x 75 . $zap) ? "ok 17\n" : "not ok 17\n");
 # ^ does not truncate
 print (($foo ^ $bar) eq ($Axz x 75 . $zap) ? "ok 18\n" : "not ok 18\n");
 
+#
+print "ok \xFF\xFF\n" & "ok 19\n";
+print "ok 20\n" | "ok \0\0\n";
+print "o\000 \0001\000" ^ "\000k\0002\000\n";
+
+#
+print "ok \x{FF}\x{FF}\n" & "ok 22\n";
+print "ok 23\n" | "ok \x{0}\x{0}\n";
+print "o\x{0} \x{0}4\x{0}" ^ "\x{0}k\x{0}2\x{0}\n";
+
+#
+print "ok 25\n" if sprintf("%vd", v4095 & v801) eq 801;
+print "ok 26\n" if sprintf("%vd", v4095 | v801) eq 4095;
+print "ok 27\n" if sprintf("%vd", v4095 ^ v801) eq 3294;
+
+#
+print "ok 28\n" if sprintf("%vd", v4095.801.4095 & v801.4095) eq '801.801';
+print "ok 29\n" if sprintf("%vd", v4095.801.4095 | v801.4095) eq '4095.4095.4095';
+print "ok 30\n" if sprintf("%vd", v801.4095 ^ v4095.801.4095) eq '3294.3294.4095';