Re: Smoke 15526 /pro/3gl/CPAN/perl-current
[p5sagit/p5-mst-13.2.git] / t / io / utf8.t
CommitLineData
7d59b7e4 1#!./perl
2
3BEGIN {
4 chdir 't' if -d 't';
5 @INC = '../lib';
0c4f7ff0 6 unless (find PerlIO::Layer 'perlio') {
7d59b7e4 7 print "1..0 # Skip: not perlio\n";
8 exit 0;
9 }
10}
11
169da838 12no utf8; # needed for use utf8 not griping about the raw octets
3ba0e062 13
7d59b7e4 14$| = 1;
62961d2e 15print "1..31\n";
7d59b7e4 16
17open(F,"+>:utf8",'a');
18print F chr(0x100).'£';
19print '#'.tell(F)."\n";
20print "not " unless tell(F) == 4;
21print "ok 1\n";
22print F "\n";
23print '#'.tell(F)."\n";
24print "not " unless tell(F) >= 5;
25print "ok 2\n";
26seek(F,0,0);
27print "not " unless getc(F) eq chr(0x100);
28print "ok 3\n";
29print "not " unless getc(F) eq "£";
30print "ok 4\n";
31print "not " unless getc(F) eq "\n";
32print "ok 5\n";
33seek(F,0,0);
34binmode(F,":bytes");
d2f5bb60 35my $chr = chr(0xc4);
36if (ord('A') == 193) { $chr = chr(0x8c); } # EBCDIC
37print "not " unless getc(F) eq $chr;
7d59b7e4 38print "ok 6\n";
d2f5bb60 39$chr = chr(0x80);
40if (ord('A') == 193) { $chr = chr(0x41); } # EBCDIC
41print "not " unless getc(F) eq $chr;
7d59b7e4 42print "ok 7\n";
d2f5bb60 43$chr = chr(0xc2);
44if (ord('A') == 193) { $chr = chr(0x80); } # EBCDIC
45print "not " unless getc(F) eq $chr;
7d59b7e4 46print "ok 8\n";
d2f5bb60 47$chr = chr(0xa3);
48if (ord('A') == 193) { $chr = chr(0x44); } # EBCDIC
49print "not " unless getc(F) eq $chr;
7d59b7e4 50print "ok 9\n";
51print "not " unless getc(F) eq "\n";
52print "ok 10\n";
53seek(F,0,0);
54binmode(F,":utf8");
55print "not " unless scalar(<F>) eq "\x{100}£\n";
56print "ok 11\n";
eb5c063a 57seek(F,0,0);
58$buf = chr(0x200);
59$count = read(F,$buf,2,1);
60print "not " unless $count == 2;
61print "ok 12\n";
62print "not " unless $buf eq "\x{200}\x{100}£";
63print "ok 13\n";
7d59b7e4 64close(F);
65
360eb788 66{
67$a = chr(300); # This *is* UTF-encoded
68$b = chr(130); # This is not.
69
70open F, ">:utf8", 'a' or die $!;
71print F $a,"\n";
72close F;
73
74open F, "<:utf8", 'a' or die $!;
75$x = <F>;
76chomp($x);
77print "not " unless $x eq chr(300);
78print "ok 14\n";
79
80open F, "a" or die $!; # Not UTF
3eb9224a 81binmode(F, ":bytes");
360eb788 82$x = <F>;
83chomp($x);
d2f5bb60 84$chr = chr(196).chr(172);
85if (ord('A') == 193) { $chr = chr(141).chr(83); } # EBCDIC
86print "not " unless $x eq $chr;
360eb788 87print "ok 15\n";
88close F;
89
90open F, ">:utf8", 'a' or die $!;
79086a00 91binmode(F); # we write a "\n" and then tell() - avoid CRLF issues.
360eb788 92print F $a;
93my $y;
f6c77cf1 94{ my $x = tell(F);
360eb788 95 { use bytes; $y = length($a);}
96 print "not " unless $x == $y;
97 print "ok 16\n";
98}
99
100{ # Check byte length of $b
101use bytes; my $y = length($b);
102print "not " unless $y == 1;
103print "ok 17\n";
104}
105
f9a63242 106print F $b,"\n"; # Don't upgrades $b
360eb788 107
108{ # Check byte length of $b
109use bytes; my $y = length($b);
f9a63242 110print "not ($y) " unless $y == 1;
360eb788 111print "ok 18\n";
112}
113
f6c77cf1 114{ my $x = tell(F);
d2f5bb60 115 { use bytes; if (ord('A')==193){$y += 2;}else{$y += 3;}} # EBCDIC ASCII
f9a63242 116 print "not ($x,$y) " unless $x == $y;
360eb788 117 print "ok 19\n";
118}
119
120close F;
121
122open F, "a" or die $!; # Not UTF
3eb9224a 123binmode(F, ":bytes");
360eb788 124$x = <F>;
125chomp($x);
d2f5bb60 126$chr = v196.172.194.130;
127if (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC
128printf "not (%vd) ", $x unless $x eq $chr;
360eb788 129print "ok 20\n";
130
131open F, "<:utf8", "a" or die $!;
132$x = <F>;
133chomp($x);
134close F;
f9a63242 135printf "not (%vd) ", $x unless $x eq chr(300).chr(130);
360eb788 136print "ok 21\n";
137
3eb9224a 138if (${^OPEN} =~ /:utf8/) {
139
140} else {
141 # Now let's make it suffer.
142 open F, ">", "a" or die $!;
143 my $w;
144 {
145 use warnings 'utf8';
146 local $SIG{__WARN__} = sub { $w = $_[0] };
147 print F $a;
148 }
149 print "not " if ($@ || $w !~ /Wide character in print/i);
150 print "ok 22\n";
54d2e5f1 151}
360eb788 152}
153
154# Hm. Time to get more evil.
155open F, ">:utf8", "a" or die $!;
156print F $a;
157binmode(F, ":bytes");
158print F chr(130)."\n";
159close F;
3eb9224a 160
360eb788 161open F, "<", "a" or die $!;
3eb9224a 162binmode(F, ":bytes");
360eb788 163$x = <F>; chomp $x;
d2f5bb60 164$chr = v196.172.130;
165if (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC
166print "not " unless $x eq $chr;
360eb788 167print "ok 23\n";
168
169# Right.
170open F, ">:utf8", "a" or die $!;
171print F $a;
172close F;
173open F, ">>", "a" or die $!;
174print F chr(130)."\n";
175close F;
176
177open F, "<", "a" or die $!;
178$x = <F>; chomp $x;
d2f5bb60 179print "not " unless $x eq $chr;
360eb788 180print "ok 24\n";
181
182# Now we have a deformed file.
d0965105 183
184if (ord('A') == 193) {
185 print "ok 25 # Skip: EBCDIC\n"; # EBCDIC doesn't complain
186} else {
187 open F, "<:utf8", "a" or die $!;
188 $x = <F>; chomp $x;
189 local $SIG{__WARN__} = sub { print "ok 25\n" };
190 eval { sprintf "%vd\n", $x };
360eb788 191}
192
4f0c37ba 193close F;
360eb788 194unlink('a');
7d59b7e4 195
62961d2e 196open F, ">:utf8", "a";
d0965105 197@a = map { chr(1 << ($_ << 2)) } 0..5; # 0x1, 0x10, .., 0x100000
c36dfd09 198unshift @a, chr(0); # ... and a null byte in front just for fun
d0965105 199print F @a;
200close F;
c36dfd09 201
c88828dc 202my $c;
203
204# read() should work on characters, not bytes
d0965105 205open F, "<:utf8", "a";
206$a = 0;
207for (@a) {
c88828dc 208 unless (($c = read(F, $b, 1) == 1) &&
209 length($b) == 1 &&
210 ord($b) == ord($_) &&
211 tell(F) == ($a += bytes::length($b))) {
34fce102 212 print '# ord($_) == ', ord($_), "\n";
213 print '# ord($b) == ', ord($b), "\n";
214 print '# length($b) == ', length($b), "\n";
215 print '# bytes::length($b) == ', bytes::length($b), "\n";
216 print '# tell(F) == ', tell(F), "\n";
217 print '# $a == ', $a, "\n";
218 print '# $c == ', $c, "\n";
d0965105 219 print "not ";
220 last;
221 }
222}
c36dfd09 223close F;
d0965105 224print "ok 26\n";
225
62961d2e 226{
227 # Check that warnings are on on I/O, and that they can be muffled.
228
229 local $SIG{__WARN__} = sub { $@ = shift };
230
231 undef $@;
232 open F, ">a";
3eb9224a 233 binmode(F, ":bytes");
62961d2e 234 print F chr(0x100);
235 close(F);
236
237 print $@ =~ /Wide character in print/ ? "ok 27\n" : "not ok 27\n";
238
239 undef $@;
2b1ff55a 240 open F, ">:utf8", "a";
62961d2e 241 print F chr(0x100);
242 close(F);
243
244 print defined $@ ? "not ok 28\n" : "ok 28\n";
245
246 undef $@;
247 open F, ">a";
248 binmode(F, ":utf8");
249 print F chr(0x100);
250 close(F);
251
252 print defined $@ ? "not ok 29\n" : "ok 29\n";
253
254 no warnings 'utf8';
255
256 undef $@;
257 open F, ">a";
258 print F chr(0x100);
259 close(F);
260
261 print defined $@ ? "not ok 30\n" : "ok 30\n";
262
263 use warnings 'utf8';
264
265 undef $@;
266 open F, ">a";
3eb9224a 267 binmode(F, ":bytes");
62961d2e 268 print F chr(0x100);
269 close(F);
270
271 print $@ =~ /Wide character in print/ ? "ok 31\n" : "not ok 31\n";
272}
273
e111333b 274# sysread() and syswrite() tested in lib/open.t since Fnctl is used
c88828dc 275
276END {
277 1 while unlink "a";
278 1 while unlink "b";
279}
e111333b 280