Recode the naughty binary bytes ([\x00-\x08\x0b-\x1f\x7f-\xff])

diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t

index cd9d56a..4d05a6b 100644 (file)
--- a/t/op/utf8decode.t
+++ b/t/op/utf8decode.t
@@ -5,7 +5,7 @@ BEGIN {
     @INC = '../lib';
 }
 
-no utf8; # this test contains raw 8-bit data on purpose; don't switch to \x{}
+no utf8;
 
 print "1..78\n";
 
@@ -15,109 +15,109 @@ my $test = 1;
 # http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt,
 # version dated 2000-09-02. 
 
-# Note the \0 instead of a raw zero byte in 2.1.1: for example
-# GNU patch v2.1 has "issues" with raw zero bytes.
+# We use the \x notation instead of raw binary bytes for \x00-\x1f\x7f-\xff
+# because e.g. many patch programs have issues with binary data.
 
 my @MK = split(/\n/, <<__EOMK__);
 1      Correct UTF-8
-1.1.1 y "κόσμε"  -               11      ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5        5
+1.1.1 y "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5" -               11      ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5        5
 2      Boundary conditions 
 2.1    First possible sequence of certain length
-2.1.1 y "\0"                   0               1       00      1
-2.1.2 y "\80"                   80              2       c2:80   1
-2.1.3 y "ࠀ"          800             3       e0:a0:80        1
-2.1.4 y "𐀀"         10000           4       f0:90:80:80     1
-2.1.5 y "�����"        200000          5       f8:88:80:80:80  1
-2.1.6 y "������"       4000000         6       fc:84:80:80:80:80       1
+2.1.1 y "\x00"                 0               1       00      1
+2.1.2 y "\xc2\x80"                     80              2       c2:80   1
+2.1.3 y "\xe0\xa0\x80"         800             3       e0:a0:80        1
+2.1.4 y "\xf0\x90\x80\x80"             10000           4       f0:90:80:80     1
+2.1.5 y "\xf8\x88\x80\x80\x80" 200000          5       f8:88:80:80:80  1
+2.1.6 y "\xfc\x84\x80\x80\x80\x80"     4000000         6       fc:84:80:80:80:80       1
 2.2    Last possible sequence of certain length
-2.2.1 y "\7f"                    7f              1       7f      1
-2.2.2 y "߿"                   7ff             2       df:bf   1
+2.2.1 y "\x7f"                 7f              1       7f      1
+2.2.2 y "\xdf\xbf"                     7ff             2       df:bf   1
 # The ffff is illegal unless UTF8_ALLOW_FFFF