c631c0a7a9ba1fa3d476d44c853c7934bb5305f5
[p5sagit/p5-mst-13.2.git] / t / op / utf8decode.t
1 #!./perl
2
3 BEGIN {
4     chdir 't' if -d 't';
5     @INC = '../lib';
6 }
7
8 print "1..78\n";
9
10 my $test = 1;
11
12 # This table is based on Markus Kuhn's UTF-8 Decode Stress Tester,
13 # http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt,
14 # version dated 2000-09-02. 
15
16 # Note the \0 instead of a raw zero byte in 2.1.1: for example
17 # GNU patch v2.1 has "issues" with raw zero bytes.
18
19 my @MK = split(/\n/, <<__EOMK__);
20 1       Correct UTF-8
21 1.1.1 y "κόσμε"   -               11      ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5        5
22 2       Boundary conditions 
23 2.1     First possible sequence of certain length
24 2.1.1 y "\0"                    0               1       00      1
25 2.1.2 y "\80"                    80              2       c2:80   1
26 2.1.3 y "ࠀ"           800             3       e0:a0:80        1
27 2.1.4 y "𐀀"          10000           4       f0:90:80:80     1
28 2.1.5 y "" 200000          5       f8:88:80:80:80  1
29 2.1.6 y ""        4000000         6       fc:84:80:80:80:80       1
30 2.2     Last possible sequence of certain length
31 2.2.1 y "\7f"                     7f              1       7f      1
32 2.2.2 y "߿"                    7ff             2       df:bf   1
33 # The ffff is illegal unless UTF8_ALLOW_FFFF

Software error:

Malformed UTF-8 character (fatal) at /var/www/git.shadowcat.co.uk/docroot/gitweb/gitweb.cgi line 1024, <$fd> line 34.

For help, please send mail to the webmaster (chrisj@shadowcatsystems.co.uk), giving this error message and the time and date of the error.