Split off the UTF-8 decoder tests, make them to check also
Jarkko Hietaniemi [Tue, 5 Dec 2000 20:13:34 +0000 (20:13 +0000)]
the error message.

p4raw-id: //depot/perl@7996

MANIFEST
t/op/utf8decode.t [new file with mode: 0644]
t/pragma/utf8.t
utf8.c

index 4f30dd1..03be963 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -1577,6 +1577,7 @@ t/op/tr.t         See if tr works
 t/op/undef.t           See if undef works
 t/op/universal.t       See if UNIVERSAL class works
 t/op/unshift.t         See if unshift works
+t/op/utf8decode.t      See if UTF-8 decoding works
 t/op/vec.t             See if vectors work
 t/op/ver.t             See if v-strings and the %v format flag work
 t/op/wantarray.t       See if wantarray works
diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t
new file mode 100644 (file)
index 0000000..c631c0a
--- /dev/null
@@ -0,0 +1,181 @@
+#!./perl
+
+BEGIN {
+    chdir 't' if -d 't';
+    @INC = '../lib';
+}
+
+print "1..78\n";
+
+my $test = 1;
+
+# This table is based on Markus Kuhn's UTF-8 Decode Stress Tester,
+# http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt,
+# version dated 2000-09-02. 
+
+# Note the \0 instead of a raw zero byte in 2.1.1: for example
+# GNU patch v2.1 has "issues" with raw zero bytes.
+
+my @MK = split(/\n/, <<__EOMK__);
+1      Correct UTF-8
+1.1.1 y "κόσμε"  -               11      ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5        5
+2      Boundary conditions 
+2.1    First possible sequence of certain length
+2.1.1 y "\0"                   0               1       00      1
+2.1.2 y "\80"                   80              2       c2:80   1
+2.1.3 y "ࠀ"          800             3       e0:a0:80        1
+2.1.4 y "𐀀"         10000           4       f0:90:80:80     1
+2.1.5 y ""        200000          5       f8:88:80:80:80  1
+2.1.6 y ""       4000000         6       fc:84:80:80:80:80       1
+2.2    Last possible sequence of certain length
+2.2.1 y "\7f"                    7f              1       7f      1
+2.2.2 y "߿"                   7ff             2       df:bf   1
+# The ffff is illegal unless UTF8_ALLOW_FFFF

Software error:

Malformed UTF-8 character (fatal) at /var/www/git.shadowcat.co.uk/docroot/gitweb/gitweb.cgi line 1024, <$fd> line 57.

For help, please send mail to the webmaster (chrisj@shadowcatsystems.co.uk), giving this error message and the time and date of the error.