Polymorphic regexps.
[p5sagit/p5-mst-13.2.git] / t / op / utf8decode.t
1 #!./perl
2
3 BEGIN {
4     chdir 't' if -d 't';
5     @INC = '../lib';
6 }
7
8 no utf8; # this test contains raw 8-bit data on purpose; don't switch to \x{}
9
10 print "1..78\n";
11
12 my $test = 1;
13
14 # This table is based on Markus Kuhn's UTF-8 Decode Stress Tester,
15 # http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt,
16 # version dated 2000-09-02. 
17
18 # Note the \0 instead of a raw zero byte in 2.1.1: for example
19 # GNU patch v2.1 has "issues" with raw zero bytes.
20
21 my @MK = split(/\n/, <<__EOMK__);
22 1       Correct UTF-8
23 1.1.1 y "κόσμε"   -               11      ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5        5
24 2       Boundary conditions 
25 2.1     First possible sequence of certain length
26 2.1.1 y "\0"                    0               1       00      1
27 2.1.2 y "\80"                    80              2       c2:80   1
28 2.1.3 y "ࠀ"           800             3       e0:a0:80        1
29 2.1.4 y "𐀀"          10000           4       f0:90:80:80     1
30 2.1.5 y "" 200000          5       f8:88:80:80:80  1
31 2.1.6 y ""        4000000         6       fc:84:80:80:80:80       1
32 2.2     Last possible sequence of certain length
33 2.2.1 y "\7f"                     7f              1       7f      1
34 2.2.2 y "߿"                    7ff             2       df:bf   1
35 # The ffff is illegal unless UTF8_ALLOW_FFFF

Software error:

Malformed UTF-8 character (fatal) at /var/www/git.shadowcat.co.uk/docroot/gitweb/gitweb.cgi line 1024, <$fd> line 36.

For help, please send mail to the webmaster (chrisj@shadowcatsystems.co.uk), giving this error message and the time and date of the error.