UTF-8 documentation.
Jarkko Hietaniemi [Sun, 11 Feb 2001 17:26:22 +0000 (17:26 +0000)]
p4raw-id: //depot/perl@8770

utf8.h

diff --git a/utf8.h b/utf8.h
index 00350f2..8b0c8c3 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -64,6 +64,22 @@ END_EXTERN_C
 
 #define UTF8_QUAD_MAX  UINT64_C(0x1000000000)
 
+/*
+ The following table is from Unicode 3.1.
+
+ Code Points           1st Byte  2nd Byte  3rd Byte  4th Byte
+
+   U+0000..U+007F      00..7F   
+   U+0080..U+07FF      C2..DF    80..BF   
+   U+0800..U+0FFF      E0        A0..BF    80..BF  
+   U+1000..U+FFFF      E1..EF    80..BF    80..BF  
+  U+10000..U+3FFFF     F0        90..BF    80..BF    80..BF
+  U+40000..U+FFFFF     F1..F3    80..BF    80..BF    80..BF
+ U+100000..U+10FFFF    F4        80..8F    80..BF    80..BF
+
+ */
+
 #define UTF8_IS_ASCII(c)               (((U8)c) <  0x80)
 #define UTF8_IS_START(c)               (((U8)c) >= 0xc0 && (((U8)c) <= 0xfd))
 #define UTF8_IS_CONTINUATION(c)                (((U8)c) >= 0x80 && (((U8)c) <= 0xbf))