Integrate mainline
[p5sagit/p5-mst-13.2.git] / t / io / utf8.t
CommitLineData
7d59b7e4 1#!./perl
2
3BEGIN {
4 chdir 't' if -d 't';
5 @INC = '../lib';
0c4f7ff0 6 unless (find PerlIO::Layer 'perlio') {
7d59b7e4 7 print "1..0 # Skip: not perlio\n";
8 exit 0;
9 }
10}
11
169da838 12no utf8; # needed for use utf8 not griping about the raw octets
3ba0e062 13
7d59b7e4 14$| = 1;
62961d2e 15print "1..31\n";
7d59b7e4 16
17open(F,"+>:utf8",'a');
18print F chr(0x100).'£';
19print '#'.tell(F)."\n";
20print "not " unless tell(F) == 4;
21print "ok 1\n";
22print F "\n";
23print '#'.tell(F)."\n";
24print "not " unless tell(F) >= 5;
25print "ok 2\n";
26seek(F,0,0);
27print "not " unless getc(F) eq chr(0x100);
28print "ok 3\n";
29print "not " unless getc(F) eq "£";
30print "ok 4\n";
31print "not " unless getc(F) eq "\n";
32print "ok 5\n";
33seek(F,0,0);
34binmode(F,":bytes");
d2f5bb60 35my $chr = chr(0xc4);
36if (ord('A') == 193) { $chr = chr(0x8c); } # EBCDIC
37print "not " unless getc(F) eq $chr;
7d59b7e4 38print "ok 6\n";
d2f5bb60 39$chr = chr(0x80);
40if (ord('A') == 193) { $chr = chr(0x41); } # EBCDIC
41print "not " unless getc(F) eq $chr;
7d59b7e4 42print "ok 7\n";
d2f5bb60 43$chr = chr(0xc2);
44if (ord('A') == 193) { $chr = chr(0x80); } # EBCDIC
45print "not " unless getc(F) eq $chr;
7d59b7e4 46print "ok 8\n";
d2f5bb60 47$chr = chr(0xa3);
48if (ord('A') == 193) { $chr = chr(0x44); } # EBCDIC
49print "not " unless getc(F) eq $chr;
7d59b7e4 50print "ok 9\n";
51print "not " unless getc(F) eq "\n";
52print "ok 10\n";
53seek(F,0,0);
54binmode(F,":utf8");
55print "not " unless scalar(<F>) eq "\x{100}£\n";
56print "ok 11\n";
eb5c063a 57seek(F,0,0);
58$buf = chr(0x200);
59$count = read(F,$buf,2,1);
60print "not " unless $count == 2;
61print "ok 12\n";
62print "not " unless $buf eq "\x{200}\x{100}£";
63print "ok 13\n";
7d59b7e4 64close(F);
65
360eb788 66{
7b89fb7c 67 $a = chr(300); # This *is* UTF-encoded
68 $b = chr(130); # This is not.
69
70 open F, ">:utf8", 'a' or die $!;
71 print F $a,"\n";
72 close F;
73
74 open F, "<:utf8", 'a' or die $!;
75 $x = <F>;
76 chomp($x);
77 print "not " unless $x eq chr(300);
78 print "ok 14\n";
79
80 open F, "a" or die $!; # Not UTF
81 binmode(F, ":bytes");
82 $x = <F>;
83 chomp($x);
84 $chr = chr(196).chr(172);
85 if (ord('A') == 193) { $chr = chr(141).chr(83); } # EBCDIC
86 print "not " unless $x eq $chr;
87 print "ok 15\n";
88 close F;
89
90 open F, ">:utf8", 'a' or die $!;
91 binmode(F); # we write a "\n" and then tell() - avoid CRLF issues.
92 print F $a;
93 my $y;
94 { my $x = tell(F);
95 { use bytes; $y = length($a);}
96 print "not " unless $x == $y;
97 print "ok 16\n";
98 }
99
100 { # Check byte length of $b
101 use bytes; my $y = length($b);
102 print "not " unless $y == 1;
103 print "ok 17\n";
104 }
105
106 print F $b,"\n"; # Don't upgrades $b
107
108 { # Check byte length of $b
109 use bytes; my $y = length($b);
110 print "not ($y) " unless $y == 1;
111 print "ok 18\n";
112 }
113
114 {
115 my $x = tell(F);
116 { use bytes; if (ord('A')==193){$y += 2;}else{$y += 3;}} # EBCDIC ASCII
117 print "not ($x,$y) " unless $x == $y;
118 print "ok 19\n";
119 }
120
121 close F;
122
123 open F, "a" or die $!; # Not UTF
124 binmode(F, ":bytes");
125 $x = <F>;
126 chomp($x);
127 $chr = v196.172.194.130;
128 if (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC
129 printf "not (%vd) ", $x unless $x eq $chr;
130 print "ok 20\n";
131
132 open F, "<:utf8", "a" or die $!;
133 $x = <F>;
134 chomp($x);
135 close F;
136 printf "not (%vd) ", $x unless $x eq chr(300).chr(130);
137 print "ok 21\n";
3eb9224a 138
3eb9224a 139 open F, ">", "a" or die $!;
7b89fb7c 140 if (${^OPEN} =~ /:utf8/) {
141 binmode(F, ":bytes:");
142 }
143
144 # Now let's make it suffer.
3eb9224a 145 my $w;
146 {
147 use warnings 'utf8';
148 local $SIG{__WARN__} = sub { $w = $_[0] };
149 print F $a;
7b89fb7c 150 print "not " if ($@ || $w !~ /Wide character in print/i);
3eb9224a 151 }
3eb9224a 152 print "ok 22\n";
54d2e5f1 153}
360eb788 154
155# Hm. Time to get more evil.
156open F, ">:utf8", "a" or die $!;
157print F $a;
158binmode(F, ":bytes");
159print F chr(130)."\n";
160close F;
3eb9224a 161
360eb788 162open F, "<", "a" or die $!;
3eb9224a 163binmode(F, ":bytes");
360eb788 164$x = <F>; chomp $x;
d2f5bb60 165$chr = v196.172.130;
166if (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC
167print "not " unless $x eq $chr;
360eb788 168print "ok 23\n";
169
170# Right.
171open F, ">:utf8", "a" or die $!;
172print F $a;
173close F;
174open F, ">>", "a" or die $!;
175print F chr(130)."\n";
176close F;
177
178open F, "<", "a" or die $!;
179$x = <F>; chomp $x;
d2f5bb60 180print "not " unless $x eq $chr;
360eb788 181print "ok 24\n";
182
183# Now we have a deformed file.
d0965105 184
185if (ord('A') == 193) {
186 print "ok 25 # Skip: EBCDIC\n"; # EBCDIC doesn't complain
187} else {
188 open F, "<:utf8", "a" or die $!;
189 $x = <F>; chomp $x;
190 local $SIG{__WARN__} = sub { print "ok 25\n" };
191 eval { sprintf "%vd\n", $x };
360eb788 192}
193
4f0c37ba 194close F;
360eb788 195unlink('a');
7d59b7e4 196
62961d2e 197open F, ">:utf8", "a";
d0965105 198@a = map { chr(1 << ($_ << 2)) } 0..5; # 0x1, 0x10, .., 0x100000
c36dfd09 199unshift @a, chr(0); # ... and a null byte in front just for fun
d0965105 200print F @a;
201close F;
c36dfd09 202
c88828dc 203my $c;
204
205# read() should work on characters, not bytes
d0965105 206open F, "<:utf8", "a";
207$a = 0;
208for (@a) {
c88828dc 209 unless (($c = read(F, $b, 1) == 1) &&
210 length($b) == 1 &&
211 ord($b) == ord($_) &&
212 tell(F) == ($a += bytes::length($b))) {
34fce102 213 print '# ord($_) == ', ord($_), "\n";
214 print '# ord($b) == ', ord($b), "\n";
215 print '# length($b) == ', length($b), "\n";
216 print '# bytes::length($b) == ', bytes::length($b), "\n";
217 print '# tell(F) == ', tell(F), "\n";
218 print '# $a == ', $a, "\n";
219 print '# $c == ', $c, "\n";
d0965105 220 print "not ";
221 last;
222 }
223}
c36dfd09 224close F;
d0965105 225print "ok 26\n";
226
62961d2e 227{
228 # Check that warnings are on on I/O, and that they can be muffled.
229
230 local $SIG{__WARN__} = sub { $@ = shift };
231
232 undef $@;
233 open F, ">a";
3eb9224a 234 binmode(F, ":bytes");
62961d2e 235 print F chr(0x100);
236 close(F);
237
238 print $@ =~ /Wide character in print/ ? "ok 27\n" : "not ok 27\n";
239
240 undef $@;
2b1ff55a 241 open F, ">:utf8", "a";
62961d2e 242 print F chr(0x100);
243 close(F);
244
245 print defined $@ ? "not ok 28\n" : "ok 28\n";
246
247 undef $@;
248 open F, ">a";
249 binmode(F, ":utf8");
250 print F chr(0x100);
251 close(F);
252
253 print defined $@ ? "not ok 29\n" : "ok 29\n";
254
255 no warnings 'utf8';
256
257 undef $@;
258 open F, ">a";
259 print F chr(0x100);
260 close(F);
261
262 print defined $@ ? "not ok 30\n" : "ok 30\n";
263
264 use warnings 'utf8';
265
266 undef $@;
267 open F, ">a";
3eb9224a 268 binmode(F, ":bytes");
62961d2e 269 print F chr(0x100);
270 close(F);
271
272 print $@ =~ /Wide character in print/ ? "ok 31\n" : "not ok 31\n";
273}
274
e111333b 275# sysread() and syswrite() tested in lib/open.t since Fnctl is used
c88828dc 276
277END {
278 1 while unlink "a";
279 1 while unlink "b";
280}
e111333b 281