From: Gurusamy Sarathy Date: Mon, 28 Feb 2000 09:39:47 +0000 (+0000) Subject: make stringwise bitops work properly on utf8 strings X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=0c57e439868bda58b2cbd4708d7a8b4dcf15b989;p=p5sagit%2Fp5-mst-13.2.git make stringwise bitops work properly on utf8 strings p4raw-id: //depot/perl@5317 --- diff --git a/doop.c b/doop.c index 0dbfe20..4f536f0 100644 --- a/doop.c +++ b/doop.c @@ -1060,6 +1060,13 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) I32 lensave; char *lsave; char *rsave; + bool left_utf = DO_UTF8(left); + bool right_utf = DO_UTF8(right); + + if (left_utf && !right_utf) + sv_utf8_upgrade(right); + if (!left_utf && right_utf) + sv_utf8_upgrade(left); if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv))) sv_setpvn(sv, "", 0); /* avoid undef warning on |= and ^= */ @@ -1084,6 +1091,66 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) } SvCUR_set(sv, len); (void)SvPOK_only(sv); + if (left_utf || right_utf) { + UV duc, luc, ruc; + STRLEN lulen = leftlen; + STRLEN rulen = rightlen; + STRLEN dulen = 0; + I32 ulen; + + switch (optype) { + case OP_BIT_AND: + while (lulen && rulen) { + luc = utf8_to_uv((U8*)lc, &ulen); + lc += ulen; + lulen -= ulen; + ruc = utf8_to_uv((U8*)rc, &ulen); + rc += ulen; + rulen -= ulen; + duc = luc & ruc; + dc = (char*)uv_to_utf8((U8*)dc, duc); + } + dulen = dc - SvPVX(sv); + SvCUR_set(sv, dulen); + break; + case OP_BIT_XOR: + while (lulen && rulen) { + luc = utf8_to_uv((U8*)lc, &ulen); + lc += ulen; + lulen -= ulen; + ruc = utf8_to_uv((U8*)rc, &ulen); + rc += ulen; + rulen -= ulen; + duc = luc ^ ruc; + dc = (char*)uv_to_utf8((U8*)dc, duc); + } + goto mop_up_utf; + case OP_BIT_OR: + while (lulen && rulen) { + luc = utf8_to_uv((U8*)lc, &ulen); + lc += ulen; + lulen -= ulen; + ruc = utf8_to_uv((U8*)rc, &ulen); + rc += ulen; + rulen -= ulen; + duc = luc | ruc; + dc = (char*)uv_to_utf8((U8*)dc, duc); + } + mop_up_utf: + dulen = dc - SvPVX(sv); + SvCUR_set(sv, dulen); + if (rulen) + sv_catpvn(sv, rc, rulen); + else if (lulen) + sv_catpvn(sv, lc, lulen); + else + *SvEND(sv) = '\0'; + break; + } + SvUTF8_on(sv); + goto finish; + } + else #ifdef LIBERAL if (len >= sizeof(long)*4 && !((long)dc % sizeof(long)) && @@ -1154,6 +1221,7 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) break; } } +finish: SvTAINT(sv); } diff --git a/t/op/bop.t b/t/op/bop.t index 0c5ef48..7bcabdf 100755 --- a/t/op/bop.t +++ b/t/op/bop.t @@ -9,7 +9,7 @@ BEGIN { unshift @INC, '../lib'; } -print "1..18\n"; +print "1..30\n"; # numerics print ((0xdead & 0xbeef) == 0x9ead ? "ok 1\n" : "not ok 1\n"); @@ -62,3 +62,22 @@ print (($foo | $bar) eq ($Aoz x 75 . $zap) ? "ok 17\n" : "not ok 17\n"); # ^ does not truncate print (($foo ^ $bar) eq ($Axz x 75 . $zap) ? "ok 18\n" : "not ok 18\n"); +# +print "ok \xFF\xFF\n" & "ok 19\n"; +print "ok 20\n" | "ok \0\0\n"; +print "o\000 \0001\000" ^ "\000k\0002\000\n"; + +# +print "ok \x{FF}\x{FF}\n" & "ok 22\n"; +print "ok 23\n" | "ok \x{0}\x{0}\n"; +print "o\x{0} \x{0}4\x{0}" ^ "\x{0}k\x{0}2\x{0}\n"; + +# +print "ok 25\n" if sprintf("%vd", v4095 & v801) eq 801; +print "ok 26\n" if sprintf("%vd", v4095 | v801) eq 4095; +print "ok 27\n" if sprintf("%vd", v4095 ^ v801) eq 3294; + +# +print "ok 28\n" if sprintf("%vd", v4095.801.4095 & v801.4095) eq '801.801'; +print "ok 29\n" if sprintf("%vd", v4095.801.4095 | v801.4095) eq '4095.4095.4095'; +print "ok 30\n" if sprintf("%vd", v801.4095 ^ v4095.801.4095) eq '3294.3294.4095';