From: Jarkko Hietaniemi Date: Tue, 30 Jan 2001 18:18:51 +0000 (+0000) Subject: UTF-8 nit from Inaba Hiroto. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ef9edfd01e3c829c5c5b98c37662df9d1108dc9e;p=p5sagit%2Fp5-mst-13.2.git UTF-8 nit from Inaba Hiroto. p4raw-id: //depot/perl@8615 --- diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 60cb725..40d40fe 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -186,10 +186,10 @@ Found in file av.c Converts a string C of length C from UTF8 into byte encoding. Unlike but like C, returns a pointer to -the newly-created string, and updates C to contain the new length. -Returns the original string if no conversion occurs, C and -C are unchanged. Do nothing if C points to 0. Sets -C to 0 if C is converted or malformed . +the newly-created string, and updates C to contain the new +length. Returns the original string if no conversion occurs, C +is unchanged. Do nothing if C points to 0. Sets C to +0 if C is converted or contains all 7bit characters. NOTE: this function is experimental and may change or be removed without notice. diff --git a/t/op/each.t b/t/op/each.t index f1012c6..397176a 100755 --- a/t/op/each.t +++ b/t/op/each.t @@ -6,7 +6,7 @@ BEGIN { push @INC, '../lib'; } -print "1..25\n"; +print "1..26\n"; $h{'abc'} = 'ABC'; $h{'def'} = 'DEF'; @@ -163,9 +163,15 @@ print "ok 23\n"; print "#$u{$_}\n" for keys %u; # Used to core dump before change #8056. print "ok 24\n"; -%u = (qu"\xe3\x81\x82" => "downglade"); +$d = qu"\xe3\x81\x82"; +%u = ($d => "downgrade"); for (keys %u) { use bytes; print "not " if length ne 3 or $_ ne "\xe3\x81\x82"; print "ok 25\n"; } +{ + use bytes; + print "not " if length($d) ne 6 or $d ne qu"\xe3\x81\x82"; + print "ok 26\n"; +} diff --git a/utf8.c b/utf8.c index 046df74..4555ecb 100644 --- a/utf8.c +++ b/utf8.c @@ -587,10 +587,10 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len) Converts a string C of length C from UTF8 into byte encoding. Unlike but like C, returns a pointer to -the newly-created string, and updates C to contain the new length. -Returns the original string if no conversion occurs, C and -C are unchanged. Do nothing if C points to 0. Sets -C to 0 if C is converted or malformed . +the newly-created string, and updates C to contain the new +length. Returns the original string if no conversion occurs, C +is unchanged. Do nothing if C points to 0. Sets C to +0 if C is converted or contains all 7bit characters. =cut */ @@ -605,16 +605,12 @@ Perl_bytes_from_utf8(pTHX_ U8* s, STRLEN *len, bool *is_utf8) if (!*is_utf8) return start; - /* ensure valid UTF8 and chars < 256 before updating string */ + /* ensure valid UTF8 and chars < 256 before converting string */ for (send = s + *len; s < send;) { U8 c = *s++; if (!UTF8_IS_ASCII(c)) { if (UTF8_IS_CONTINUATION(c) || s >= send || - !UTF8_IS_CONTINUATION(*s)) { - *is_utf8 = 0; - return start; - } - if ((c & 0xfc) != 0xc0) + !UTF8_IS_CONTINUATION(*s) || (c & 0xfc) != 0xc0) return start; s++, count++; } @@ -626,7 +622,7 @@ Perl_bytes_from_utf8(pTHX_ U8* s, STRLEN *len, bool *is_utf8) return start; Newz(801, d, (*len) - count + 1, U8); - d = s = start; + s = start; start = d; while (s < send) { U8 c = *s++; if (UTF8_IS_ASCII(c))