From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Tue, 30 Jan 2001 18:18:51 +0000 (+0000)
Subject: UTF-8 nit from Inaba Hiroto.
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ef9edfd01e3c829c5c5b98c37662df9d1108dc9e;p=p5sagit%2Fp5-mst-13.2.git

UTF-8 nit from Inaba Hiroto.

p4raw-id: //depot/perl@8615
---

diff --git a/pod/perlapi.pod b/pod/perlapi.pod
index 60cb725..40d40fe 100644
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -186,10 +186,10 @@ Found in file av.c
 
 Converts a string C<s> of length C<len> from UTF8 into byte encoding.
 Unlike <utf8_to_bytes> but like C<bytes_to_utf8>, returns a pointer to
-the newly-created string, and updates C<len> to contain the new length.
-Returns the original string if no conversion occurs, C<len> and
-C<is_utf8> are unchanged. Do nothing if C<is_utf8> points to 0. Sets
-C<is_utf8> to 0 if C<s> is converted or malformed .
+the newly-created string, and updates C<len> to contain the new
+length.  Returns the original string if no conversion occurs, C<len>
+is unchanged. Do nothing if C<is_utf8> points to 0. Sets C<is_utf8> to
+0 if C<s> is converted or contains all 7bit characters.
 
 NOTE: this function is experimental and may change or be
 removed without notice.
diff --git a/t/op/each.t b/t/op/each.t
index f1012c6..397176a 100755
--- a/t/op/each.t
+++ b/t/op/each.t
@@ -6,7 +6,7 @@ BEGIN {
     push @INC, '../lib';
 }    
 
-print "1..25\n";
+print "1..26\n";
 
 $h{'abc'} = 'ABC';
 $h{'def'} = 'DEF';
@@ -163,9 +163,15 @@ print "ok 23\n";
 print "#$u{$_}\n" for keys %u; # Used to core dump before change #8056.
 print "ok 24\n";
 
-%u = (qu"\xe3\x81\x82" => "downglade");
+$d = qu"\xe3\x81\x82";
+%u = ($d => "downgrade");
 for (keys %u) {
     use bytes;
     print "not " if length ne 3 or $_ ne "\xe3\x81\x82";
     print "ok 25\n";
 }
+{
+    use bytes;
+    print "not " if length($d) ne 6 or $d ne qu"\xe3\x81\x82";
+    print "ok 26\n";
+}
diff --git a/utf8.c b/utf8.c
index 046df74..4555ecb 100644
--- a/utf8.c
+++ b/utf8.c
@@ -587,10 +587,10 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len)
 
 Converts a string C<s> of length C<len> from UTF8 into byte encoding.
 Unlike <utf8_to_bytes> but like C<bytes_to_utf8>, returns a pointer to
-the newly-created string, and updates C<len> to contain the new length.
-Returns the original string if no conversion occurs, C<len> and
-C<is_utf8> are unchanged. Do nothing if C<is_utf8> points to 0. Sets
-C<is_utf8> to 0 if C<s> is converted or malformed .
+the newly-created string, and updates C<len> to contain the new
+length.  Returns the original string if no conversion occurs, C<len>
+is unchanged. Do nothing if C<is_utf8> points to 0. Sets C<is_utf8> to
+0 if C<s> is converted or contains all 7bit characters.
 
 =cut */
 
@@ -605,16 +605,12 @@ Perl_bytes_from_utf8(pTHX_ U8* s, STRLEN *len, bool *is_utf8)
     if (!*is_utf8)
 	return start;
 
-    /* ensure valid UTF8 and chars < 256 before updating string */
+    /* ensure valid UTF8 and chars < 256 before converting string */
     for (send = s + *len; s < send;) {
 	U8 c = *s++;
         if (!UTF8_IS_ASCII(c)) {
 	    if (UTF8_IS_CONTINUATION(c) || s >= send ||
-		!UTF8_IS_CONTINUATION(*s)) {
-		*is_utf8 = 0;		
-		return start;
-	    }
-	    if ((c & 0xfc) != 0xc0)
+		!UTF8_IS_CONTINUATION(*s) || (c & 0xfc) != 0xc0)
 		return start;
 	    s++, count++;
         }
@@ -626,7 +622,7 @@ Perl_bytes_from_utf8(pTHX_ U8* s, STRLEN *len, bool *is_utf8)
 	return start;
 
     Newz(801, d, (*len) - count + 1, U8);
-    d = s = start;
+    s = start; start = d;
     while (s < send) {
 	U8 c = *s++;
 	if (UTF8_IS_ASCII(c))