Make the UTF-8 decoding stricter and more verbose when
[p5sagit/p5-mst-13.2.git] / t / pragma / utf8.t
index 7224a74..e61baad 100755 (executable)
@@ -10,7 +10,7 @@ BEGIN {
     }
 }
 
-print "1..103\n";
+print "1..181\n";
 
 my $test = 1;
 
@@ -559,3 +559,170 @@ sub nok_bytes {
     print "ok $test\n";
     $test++;
 }
+
+# This table is based on Markus Kuhn's UTF-8 Decode Stress Tester,
+# http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt,
+# version dated 2000-09-02. 
+
+my @MK = split(/\n/, <<__EOMK__);
+1      Correct UTF-8
+1.1.1 y "κόσμε"  -               11      ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5        5
+2      Boundary conditions 
+2.1    First possible sequence of certain length
+2.1.1 y "\0"                    0               1       00      1
+2.1.2 y "\80"                   80              2       c2:80   1
+2.1.3 y "ࠀ"          800             3       e0:a0:80        1
+2.1.4 y "𐀀"         10000           4       f0:90:80:80     1
+2.1.5 y ""        200000          5       f8:88:80:80:80  1
+2.1.6 y ""       4000000         6       fc:84:80:80:80:80       1
+2.2    Last possible sequence of certain length
+2.2.1 y "\7f"                    7f              1       7f      1
+2.2.2 y "߿"                   7ff             2       df:bf   1
+# The ffff is legal unless under use utf8

Software error:

Malformed UTF-8 character (fatal) at /var/www/git.shadowcat.co.uk/docroot/gitweb/gitweb.cgi line 1024, <$fd> line 38.

For help, please send mail to the webmaster (chrisj@shadowcatsystems.co.uk), giving this error message and the time and date of the error.