From: Simon Cozens Date: Sat, 17 Jun 2000 11:56:44 +0000 (+0000) Subject: Re: [PATCH] pack('U',$foo) doesn't UTF8 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=fc865a0069737312ca5ef9762fe8a9be7aa37747;p=p5sagit%2Fp5-mst-13.2.git Re: [PATCH] pack('U',$foo) doesn't UTF8 Message-ID: pack U0, pack C0 p4raw-id: //depot/cfgperl@6260 --- diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 6b4e971..00fc860 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -3202,6 +3202,15 @@ equal $foo). =item * +If the pattern begins with a C, the resulting string will be treated +as Unicode-encoded. You can force UTF8 encoding on in a string with an +initial C, and the bytes that follow will be interpreted as Unicode +characters. If you don't want this to happen, you can begin your pattern +with C (or anything else) to force Perl not to UTF8 encode your +string, and then follow this with a C somewhere in your pattern. + +=item * + You must yourself do any alignment or padding by inserting for example enough C<'x'>es while packing. There is no way to pack() and unpack() could know where the bytes are going to or coming from. Therefore diff --git a/pp.c b/pp.c index 428b2e4..efea0c1 100644 --- a/pp.c +++ b/pp.c @@ -4375,6 +4375,7 @@ PP(pp_pack) register I32 items; STRLEN fromlen; register char *pat = SvPVx(*++MARK, fromlen); + char *patcopy; register char *patend = pat + fromlen; register I32 len; I32 datumtype; @@ -4405,6 +4406,7 @@ PP(pp_pack) items = SP - MARK; MARK++; sv_setpvn(cat, "", 0); + patcopy = pat; while (pat < patend) { SV *lengthcode = Nullsv; #define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *MARK++ : &PL_sv_no) @@ -4412,8 +4414,12 @@ PP(pp_pack) #ifdef PERL_NATINT_PACK natint = 0; #endif - if (isSPACE(datumtype)) + if (isSPACE(datumtype)) { + patcopy++; continue; + } + if (datumtype == 'U' && pat==patcopy+1) + SvUTF8_on(cat); if (datumtype == '#') { while (pat < patend && *pat != '\n') pat++; diff --git a/t/op/pack.t b/t/op/pack.t index dda1cc7..5c215c6 100755 --- a/t/op/pack.t +++ b/t/op/pack.t @@ -6,7 +6,7 @@ BEGIN { require Config; import Config; } -print "1..156\n"; +print "1..159\n"; $format = "c2 x5 C C x s d i l a6"; # Need the expression in here to force ary[5] to be numeric. This avoids @@ -406,3 +406,13 @@ $z = pack <