From: Jarkko Hietaniemi Date: Wed, 21 Feb 2001 00:24:22 +0000 (+0000) Subject: Make pack("C", 0x100) to create Unicode, unless under the X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=11882669c40759b5e727c31126bf37a49cf3288e;p=p5sagit%2Fp5-mst-13.2.git Make pack("C", 0x100) to create Unicode, unless under the evil influence of 'use bytes'. Similarly, unpack("C", ...) will understand Unicode, unless you under know what. p4raw-id: //depot/perl@8865 --- diff --git a/pp.c b/pp.c index a0361da..b3d769a 100644 --- a/pp.c +++ b/pp.c @@ -4064,6 +4064,7 @@ PP(pp_unpack) U16 aushort; unsigned int auint; U32 aulong; + UV auv; #ifdef HAS_QUAD Uquad_t auquad; #endif @@ -4331,20 +4332,44 @@ PP(pp_unpack) if (len > strend - s) len = strend - s; if (checksum) { - uchar_checksum: - while (len-- > 0) { - auint = *s++ & 255; - culong += auint; + if (DO_UTF8(right)) { + while (len > 0) { + STRLEN l; + auv = utf8_to_uv((U8*)s, len, &l, UTF8_ALLOW_ANYUV); + culong += auv; + s += l; + len -= l; + } + } + else { + uchar_checksum: + while (len-- > 0) { + auint = *s++ & 0xFF; + culong += auint; + } } } else { EXTEND(SP, len); EXTEND_MORTAL(len); - while (len-- > 0) { - auint = *s++ & 255; - sv = NEWSV(37, 0); - sv_setiv(sv, (IV)auint); - PUSHs(sv_2mortal(sv)); + if (DO_UTF8(right)) { + while (len > 0) { + STRLEN l; + auv = utf8_to_uv((U8*)s, len, &l, UTF8_ALLOW_ANYUV); + sv = NEWSV(37, 0); + sv_setuv(sv, auv); + PUSHs(sv_2mortal(sv)); + s += l; + len -= l; + } + } + else { + while (len-- > 0) { + auint = *s++ & 0xFF; + sv = NEWSV(37, 0); + sv_setuv(sv, auint); + PUSHs(sv_2mortal(sv)); + } } } break; @@ -5145,6 +5170,7 @@ PP(pp_pack) unsigned int auint; I32 along; U32 aulong; + UV auv; #ifdef HAS_QUAD Quad_t aquad; Uquad_t auquad; @@ -5156,6 +5182,7 @@ PP(pp_pack) #ifdef PERL_NATINT_PACK int natint; /* native integer */ #endif + bool has_utf8; items = SP - MARK; MARK++; @@ -5392,7 +5419,6 @@ PP(pp_pack) items = saveitems; } break; - case 'C': case 'c': while (len-- > 0) { fromstr = NEXTFROM; @@ -5401,12 +5427,41 @@ PP(pp_pack) sv_catpvn(cat, &achar, sizeof(char)); } break; + case 'C': + has_utf8 = SvUTF8(cat); + while (len-- > 0) { + fromstr = NEXTFROM; + auv = SvUV(fromstr); + if (!has_utf8 && auv > 0xFF && !IN_BYTE) { + has_utf8 = TRUE; + if (SvCUR(cat)) + sv_utf8_upgrade(cat); + else + SvUTF8_on(cat); /* There will be UTF8. */ + } + if (has_utf8) { + SvGROW(cat, SvCUR(cat) + UNISKIP(auv) + 1); + SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auv) + - SvPVX(cat)); + } + else { + achar = auv; + sv_catpvn(cat, &achar, sizeof(char)); + } + } + *SvEND(cat) = '\0'; + break; case 'U': + has_utf8 = SvUTF8(cat); while (len-- > 0) { fromstr = NEXTFROM; - auint = SvUV(fromstr); - SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1); - SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auint) + auv = SvUV(fromstr); + if (!has_utf8 && auv > 0x80) { + has_utf8 = TRUE; + sv_utf8_upgrade(cat); + } + SvGROW(cat, SvCUR(cat) + UNISKIP(auv) + 1); + SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auv) - SvPVX(cat)); } *SvEND(cat) = '\0'; diff --git a/t/op/pack.t b/t/op/pack.t index 67bd547..06e47f2 100755 --- a/t/op/pack.t +++ b/t/op/pack.t @@ -6,7 +6,7 @@ BEGIN { require Config; import Config; } -print "1..159\n"; +print "1..163\n"; $format = "c2 x5 C C x s d i l a6"; # Need the expression in here to force ary[5] to be numeric. This avoids @@ -22,7 +22,13 @@ $out2=join(':',@ary2); # Using long double NVs may introduce greater accuracy than wanted. $out1 =~ s/:9\.87654321097999\d*:/:9.87654321098:/; $out2 =~ s/:9\.87654321097999\d*:/:9.87654321098:/; -print ($out1 eq $out2? "ok 2\n" : "not ok 2\n"); +if ($out1 eq $out2) { + print "ok 2\n"; +} else { + print "# out1: $out1\n"; + print "# out2: $out2\n"; + print "not ok 2\n"; +} print ($foo =~ /def/ ? "ok 3\n" : "not ok 3\n"); @@ -407,6 +413,8 @@ $z = pack < 255 + +print "not " unless ord(pack("C", 0x100)) == 0x100; +print "ok $test\n"; $test++; + +# 163: pack C > 255 + use bytes == wraparound + +{ + use bytes; + + print "not " unless ord(pack("C", 0x100 + 0xab)) == 0xab; + print "ok $test\n"; $test++; +} + diff --git a/t/op/qu.t b/t/op/qu.t index 2800204..c24b507 100644 --- a/t/op/qu.t +++ b/t/op/qu.t @@ -1,5 +1,11 @@ print "1..6\n"; + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + my $foo = "foo"; print "not " unless qu(abc$foo) eq "abcfoo"; @@ -16,9 +22,15 @@ print "ok 3\n"; print "not " unless qu(\x{41}\x{100}\x61\x{200}) eq "A\x{100}a\x{200}"; print "ok 4\n"; +{ + +use bytes; + print "not " unless join(" ", unpack("C*", qu(\x80))) eq "194 128"; print "ok 5\n"; print "not " unless join(" ", unpack("C*", qu(\x{100}))) eq "196 128"; print "ok 6\n"; +} +