Split off the UTF-8 decoder tests, make them to check also

[p5sagit/p5-mst-13.2.git] / t / pragma / utf8.t
diff --git a/t/pragma/utf8.t b/t/pragma/utf8.t

index 3d8693e..827be5a 100755 (executable)
--- a/t/pragma/utf8.t
+++ b/t/pragma/utf8.t
@@ -10,7 +10,7 @@ BEGIN {
     }
 }
 
-print "1..181\n";
+print "1..103\n";
 
 my $test = 1;
 
@@ -564,173 +564,3 @@ sub nok_bytes {
     print "ok $test\n";
     $test++;
 }
-
-# This table is based on Markus Kuhn's UTF-8 Decode Stress Tester,
-# http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt,
-# version dated 2000-09-02. 
-
-# Note the \0 instead of a raw zero byte in 2.1.1: for example
-# GNU patch v2.1 has "issues" with raw zero bytes.
-
-my @MK = split(/\n/, <<__EOMK__);
-1      Correct UTF-8
-1.1.1 y "κόσμε"  -               11      ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5        5
-2      Boundary conditions 
-2.1    First possible sequence of certain length
-2.1.1 y "\0"                   0               1       00      1
-2.1.2 y "\80"                   80              2       c2:80   1
-2.1.3 y "ࠀ"          800             3       e0:a0:80        1
-2.1.4 y "𐀀"         10000           4       f0:90:80:80     1
-2.1.5 y "�����"        200000          5       f8:88:80:80:80  1
-2.1.6 y "������"       4000000         6       fc:84:80:80:80:80       1
-2.2    Last possible sequence of certain length
-2.2.1 y "\7f"                    7f              1       7f      1
-2.2.2 y "߿"                   7ff             2       df:bf   1
-# The ffff is illegal unless UTF8_ALLOW_FFFF