@INC = '../lib';
}
-no utf8; # this test contains raw 8-bit data on purpose; don't switch to \x{}
+no utf8;
print "1..78\n";
# http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt,
# version dated 2000-09-02.
-# Note the \0 instead of a raw zero byte in 2.1.1: for example
-# GNU patch v2.1 has "issues" with raw zero bytes.
+# We use the \x notation instead of raw binary bytes for \x00-\x1f\x7f-\xff
+# because e.g. many patch programs have issues with binary data.
my @MK = split(/\n/, <<__EOMK__);
1 Correct UTF-8
-1.1.1 y "κόσμε" - 11 ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5 5
+1.1.1 y "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5" - 11 ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5 5
2 Boundary conditions
2.1 First possible sequence of certain length
-2.1.1 y "\0" 0 1 00 1
-2.1.2 y "\80" 80 2 c2:80 1
-2.1.3 y "ࠀ" 800 3 e0:a0:80 1
-2.1.4 y "𐀀" 10000 4 f0:90:80:80 1
-2.1.5 y "" 200000 5 f8:88:80:80:80 1
-2.1.6 y "" 4000000 6 fc:84:80:80:80:80 1
+2.1.1 y "\x00" 0 1 00 1
+2.1.2 y "\xc2\x80" 80 2 c2:80 1
+2.1.3 y "\xe0\xa0\x80" 800 3 e0:a0:80 1
+2.1.4 y "\xf0\x90\x80\x80" 10000 4 f0:90:80:80 1
+2.1.5 y "\xf8\x88\x80\x80\x80" 200000 5 f8:88:80:80:80 1
+2.1.6 y "\xfc\x84\x80\x80\x80\x80" 4000000 6 fc:84:80:80:80:80 1
2.2 Last possible sequence of certain length
-2.2.1 y "\7f" 7f 1 7f 1
-2.2.2 y "߿" 7ff 2 df:bf 1
+2.2.1 y "\x7f" 7f 1 7f 1
+2.2.2 y "\xdf\xbf" 7ff 2 df:bf 1
# The ffff is illegal unless UTF8_ALLOW_FFFF
Software error:
Malformed UTF-8 character (fatal) at /var/www/git.shadowcat.co.uk/docroot/gitweb/gitweb.cgi line 1024, <$fd> line 49.
For help, please send mail to the webmaster (chrisj@shadowcatsystems.co.uk), giving this error message
and the time and date of the error.