From: Jarkko Hietaniemi Date: Sun, 11 Feb 2001 17:26:22 +0000 (+0000) Subject: UTF-8 documentation. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=877d9f0d218f342333990f21c1962a28c01d42b0;p=p5sagit%2Fp5-mst-13.2.git UTF-8 documentation. p4raw-id: //depot/perl@8770 --- diff --git a/utf8.h b/utf8.h index 00350f2..8b0c8c3 100644 --- a/utf8.h +++ b/utf8.h @@ -64,6 +64,22 @@ END_EXTERN_C #define UTF8_QUAD_MAX UINT64_C(0x1000000000) +/* + + The following table is from Unicode 3.1. + + Code Points 1st Byte 2nd Byte 3rd Byte 4th Byte + + U+0000..U+007F 00..7F    + U+0080..U+07FF C2..DF 80..BF    + U+0800..U+0FFF E0 A0..BF 80..BF   + U+1000..U+FFFF E1..EF 80..BF 80..BF   + U+10000..U+3FFFF F0 90..BF 80..BF 80..BF + U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF + U+100000..U+10FFFF F4 80..8F 80..BF 80..BF + + */ + #define UTF8_IS_ASCII(c) (((U8)c) < 0x80) #define UTF8_IS_START(c) (((U8)c) >= 0xc0 && (((U8)c) <= 0xfd)) #define UTF8_IS_CONTINUATION(c) (((U8)c) >= 0x80 && (((U8)c) <= 0xbf))