Commit | Line | Data |
7d59b7e4 |
1 | #!./perl |
2 | |
3 | BEGIN { |
4 | chdir 't' if -d 't'; |
5 | @INC = '../lib'; |
0c4f7ff0 |
6 | unless (find PerlIO::Layer 'perlio') { |
7d59b7e4 |
7 | print "1..0 # Skip: not perlio\n"; |
8 | exit 0; |
9 | } |
10 | } |
11 | |
169da838 |
12 | no utf8; # needed for use utf8 not griping about the raw octets |
3ba0e062 |
13 | |
768fd157 |
14 | BEGIN { require "./test.pl"; } |
bb4e15c8 |
15 | |
0fb301d7 |
16 | plan(tests => 55); |
bb4e15c8 |
17 | |
7d59b7e4 |
18 | $| = 1; |
7d59b7e4 |
19 | |
62a28c97 |
20 | my $a_file = tempfile(); |
21 | |
22 | open(F,"+>:utf8",$a_file); |
7d59b7e4 |
23 | print F chr(0x100).'£'; |
0fb301d7 |
24 | cmp_ok( tell(F), '==', 4, tell(F) ); |
7d59b7e4 |
25 | print F "\n"; |
0fb301d7 |
26 | cmp_ok( tell(F), '>=', 5, tell(F) ); |
7d59b7e4 |
27 | seek(F,0,0); |
0fb301d7 |
28 | is( getc(F), chr(0x100) ); |
29 | is( getc(F), "£" ); |
30 | is( getc(F), "\n" ); |
7d59b7e4 |
31 | seek(F,0,0); |
32 | binmode(F,":bytes"); |
d2f5bb60 |
33 | my $chr = chr(0xc4); |
62a28c97 |
34 | if (ord($a_file) == 193) { $chr = chr(0x8c); } # EBCDIC |
0fb301d7 |
35 | is( getc(F), $chr ); |
d2f5bb60 |
36 | $chr = chr(0x80); |
62a28c97 |
37 | if (ord($a_file) == 193) { $chr = chr(0x41); } # EBCDIC |
0fb301d7 |
38 | is( getc(F), $chr ); |
d2f5bb60 |
39 | $chr = chr(0xc2); |
62a28c97 |
40 | if (ord($a_file) == 193) { $chr = chr(0x80); } # EBCDIC |
0fb301d7 |
41 | is( getc(F), $chr ); |
d2f5bb60 |
42 | $chr = chr(0xa3); |
62a28c97 |
43 | if (ord($a_file) == 193) { $chr = chr(0x44); } # EBCDIC |
0fb301d7 |
44 | is( getc(F), $chr ); |
45 | is( getc(F), "\n" ); |
7d59b7e4 |
46 | seek(F,0,0); |
47 | binmode(F,":utf8"); |
0fb301d7 |
48 | is( scalar(<F>), "\x{100}£\n" ); |
eb5c063a |
49 | seek(F,0,0); |
50 | $buf = chr(0x200); |
51 | $count = read(F,$buf,2,1); |
0fb301d7 |
52 | cmp_ok( $count, '==', 2 ); |
53 | is( $buf, "\x{200}\x{100}£" ); |
7d59b7e4 |
54 | close(F); |
55 | |
360eb788 |
56 | { |
7b89fb7c |
57 | $a = chr(300); # This *is* UTF-encoded |
58 | $b = chr(130); # This is not. |
6874a2de |
59 | |
62a28c97 |
60 | open F, ">:utf8", $a_file or die $!; |
7b89fb7c |
61 | print F $a,"\n"; |
62 | close F; |
6874a2de |
63 | |
62a28c97 |
64 | open F, "<:utf8", $a_file or die $!; |
7b89fb7c |
65 | $x = <F>; |
66 | chomp($x); |
0fb301d7 |
67 | is( $x, chr(300) ); |
6874a2de |
68 | |
62a28c97 |
69 | open F, $a_file or die $!; # Not UTF |
7b89fb7c |
70 | binmode(F, ":bytes"); |
71 | $x = <F>; |
72 | chomp($x); |
73 | $chr = chr(196).chr(172); |
62a28c97 |
74 | if (ord($a_file) == 193) { $chr = chr(141).chr(83); } # EBCDIC |
0fb301d7 |
75 | is( $x, $chr ); |
7b89fb7c |
76 | close F; |
6874a2de |
77 | |
62a28c97 |
78 | open F, ">:utf8", $a_file or die $!; |
7b89fb7c |
79 | binmode(F); # we write a "\n" and then tell() - avoid CRLF issues. |
6874a2de |
80 | binmode(F,":utf8"); # turn UTF-8-ness back on |
7b89fb7c |
81 | print F $a; |
82 | my $y; |
83 | { my $x = tell(F); |
84 | { use bytes; $y = length($a);} |
0fb301d7 |
85 | cmp_ok( $x, '==', $y ); |
7b89fb7c |
86 | } |
6874a2de |
87 | |
7b89fb7c |
88 | { # Check byte length of $b |
89 | use bytes; my $y = length($b); |
0fb301d7 |
90 | cmp_ok( $y, '==', 1 ); |
7b89fb7c |
91 | } |
6874a2de |
92 | |
7b89fb7c |
93 | print F $b,"\n"; # Don't upgrades $b |
6874a2de |
94 | |
7b89fb7c |
95 | { # Check byte length of $b |
96 | use bytes; my $y = length($b); |
0fb301d7 |
97 | cmp_ok( $y, '==', 1 ); |
7b89fb7c |
98 | } |
6874a2de |
99 | |
7b89fb7c |
100 | { |
101 | my $x = tell(F); |
102 | { use bytes; if (ord('A')==193){$y += 2;}else{$y += 3;}} # EBCDIC ASCII |
0fb301d7 |
103 | cmp_ok( $x, '==', $y ); |
7b89fb7c |
104 | } |
6874a2de |
105 | |
7b89fb7c |
106 | close F; |
6874a2de |
107 | |
62a28c97 |
108 | open F, $a_file or die $!; # Not UTF |
7b89fb7c |
109 | binmode(F, ":bytes"); |
110 | $x = <F>; |
111 | chomp($x); |
112 | $chr = v196.172.194.130; |
113 | if (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC |
0fb301d7 |
114 | is( $x, $chr, sprintf('(%vd)', $x) ); |
6874a2de |
115 | |
62a28c97 |
116 | open F, "<:utf8", $a_file or die $!; |
7b89fb7c |
117 | $x = <F>; |
118 | chomp($x); |
119 | close F; |
0fb301d7 |
120 | is( $x, chr(300).chr(130), sprintf('(%vd)', $x) ); |
6874a2de |
121 | |
62a28c97 |
122 | open F, ">", $a_file or die $!; |
11fa0b78 |
123 | binmode(F, ":bytes:"); |
7b89fb7c |
124 | |
125 | # Now let's make it suffer. |
3eb9224a |
126 | my $w; |
127 | { |
128 | use warnings 'utf8'; |
129 | local $SIG{__WARN__} = sub { $w = $_[0] }; |
130 | print F $a; |
0fb301d7 |
131 | ok( (!$@)); |
132 | like($w, qr/Wide character in print/i ); |
3eb9224a |
133 | } |
54d2e5f1 |
134 | } |
360eb788 |
135 | |
136 | # Hm. Time to get more evil. |
62a28c97 |
137 | open F, ">:utf8", $a_file or die $!; |
360eb788 |
138 | print F $a; |
139 | binmode(F, ":bytes"); |
140 | print F chr(130)."\n"; |
141 | close F; |
6874a2de |
142 | |
62a28c97 |
143 | open F, "<", $a_file or die $!; |
3eb9224a |
144 | binmode(F, ":bytes"); |
360eb788 |
145 | $x = <F>; chomp $x; |
d2f5bb60 |
146 | $chr = v196.172.130; |
147 | if (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC |
0fb301d7 |
148 | is( $x, $chr ); |
360eb788 |
149 | |
150 | # Right. |
62a28c97 |
151 | open F, ">:utf8", $a_file or die $!; |
360eb788 |
152 | print F $a; |
153 | close F; |
62a28c97 |
154 | open F, ">>", $a_file or die $!; |
ceb1aeda |
155 | binmode(F, ":bytes"); |
360eb788 |
156 | print F chr(130)."\n"; |
157 | close F; |
158 | |
62a28c97 |
159 | open F, "<", $a_file or die $!; |
ceb1aeda |
160 | binmode(F, ":bytes"); |
360eb788 |
161 | $x = <F>; chomp $x; |
ceb1aeda |
162 | SKIP: { |
163 | skip("Defaulting to UTF-8 output means that we can't generate a mangled file") |
164 | if $UTF8_OUTPUT; |
165 | is( $x, $chr ); |
166 | } |
360eb788 |
167 | |
168 | # Now we have a deformed file. |
d0965105 |
169 | |
75ccb5d3 |
170 | SKIP: { |
171 | if (ord('A') == 193) { |
ceb1aeda |
172 | skip("EBCDIC doesn't complain", 2); |
75ccb5d3 |
173 | } else { |
0fb301d7 |
174 | my @warnings; |
62a28c97 |
175 | open F, "<:utf8", $a_file or die $!; |
75ccb5d3 |
176 | $x = <F>; chomp $x; |
0fb301d7 |
177 | local $SIG{__WARN__} = sub { push @warnings, $_[0]; }; |
75ccb5d3 |
178 | eval { sprintf "%vd\n", $x }; |
0fb301d7 |
179 | is (scalar @warnings, 1); |
180 | like ($warnings[0], qr/Malformed UTF-8 character \(unexpected continuation byte 0x82, with no preceding start byte/); |
75ccb5d3 |
181 | } |
360eb788 |
182 | } |
183 | |
4f0c37ba |
184 | close F; |
62a28c97 |
185 | unlink($a_file); |
7d59b7e4 |
186 | |
62a28c97 |
187 | open F, ">:utf8", $a_file; |
d0965105 |
188 | @a = map { chr(1 << ($_ << 2)) } 0..5; # 0x1, 0x10, .., 0x100000 |
c36dfd09 |
189 | unshift @a, chr(0); # ... and a null byte in front just for fun |
d0965105 |
190 | print F @a; |
191 | close F; |
c36dfd09 |
192 | |
c88828dc |
193 | my $c; |
194 | |
195 | # read() should work on characters, not bytes |
62a28c97 |
196 | open F, "<:utf8", $a_file; |
d0965105 |
197 | $a = 0; |
0fb301d7 |
198 | my $failed; |
d0965105 |
199 | for (@a) { |
c88828dc |
200 | unless (($c = read(F, $b, 1) == 1) && |
201 | length($b) == 1 && |
202 | ord($b) == ord($_) && |
203 | tell(F) == ($a += bytes::length($b))) { |
34fce102 |
204 | print '# ord($_) == ', ord($_), "\n"; |
205 | print '# ord($b) == ', ord($b), "\n"; |
206 | print '# length($b) == ', length($b), "\n"; |
207 | print '# bytes::length($b) == ', bytes::length($b), "\n"; |
208 | print '# tell(F) == ', tell(F), "\n"; |
209 | print '# $a == ', $a, "\n"; |
210 | print '# $c == ', $c, "\n"; |
0fb301d7 |
211 | $failed++; |
d0965105 |
212 | last; |
213 | } |
214 | } |
c36dfd09 |
215 | close F; |
0fb301d7 |
216 | is($failed, undef); |
d0965105 |
217 | |
62961d2e |
218 | { |
219 | # Check that warnings are on on I/O, and that they can be muffled. |
220 | |
221 | local $SIG{__WARN__} = sub { $@ = shift }; |
222 | |
223 | undef $@; |
62a28c97 |
224 | open F, ">$a_file"; |
3eb9224a |
225 | binmode(F, ":bytes"); |
62961d2e |
226 | print F chr(0x100); |
227 | close(F); |
228 | |
bb4e15c8 |
229 | like( $@, 'Wide character in print' ); |
62961d2e |
230 | |
231 | undef $@; |
62a28c97 |
232 | open F, ">:utf8", $a_file; |
62961d2e |
233 | print F chr(0x100); |
234 | close(F); |
235 | |
768fd157 |
236 | isnt( defined $@, !0 ); |
62961d2e |
237 | |
238 | undef $@; |
62a28c97 |
239 | open F, ">$a_file"; |
62961d2e |
240 | binmode(F, ":utf8"); |
241 | print F chr(0x100); |
242 | close(F); |
243 | |
768fd157 |
244 | isnt( defined $@, !0 ); |
62961d2e |
245 | |
246 | no warnings 'utf8'; |
247 | |
248 | undef $@; |
62a28c97 |
249 | open F, ">$a_file"; |
62961d2e |
250 | print F chr(0x100); |
251 | close(F); |
252 | |
768fd157 |
253 | isnt( defined $@, !0 ); |
62961d2e |
254 | |
255 | use warnings 'utf8'; |
256 | |
257 | undef $@; |
62a28c97 |
258 | open F, ">$a_file"; |
3eb9224a |
259 | binmode(F, ":bytes"); |
62961d2e |
260 | print F chr(0x100); |
261 | close(F); |
262 | |
bb4e15c8 |
263 | like( $@, 'Wide character in print' ); |
62961d2e |
264 | } |
265 | |
efd8b2ba |
266 | { |
62a28c97 |
267 | open F, ">:bytes",$a_file; print F "\xde"; close F; |
efd8b2ba |
268 | |
62a28c97 |
269 | open F, "<:bytes", $a_file; |
efd8b2ba |
270 | my $b = chr 0x100; |
271 | $b .= <F>; |
0fb301d7 |
272 | is( $b, chr(0x100).chr(0xde), "21395 '.= <>' utf8 vs. bytes" ); |
efd8b2ba |
273 | close F; |
274 | } |
275 | |
276 | { |
62a28c97 |
277 | open F, ">:utf8",$a_file; print F chr 0x100; close F; |
efd8b2ba |
278 | |
62a28c97 |
279 | open F, "<:utf8", $a_file; |
efd8b2ba |
280 | my $b = "\xde"; |
281 | $b .= <F>; |
0fb301d7 |
282 | is( $b, chr(0xde).chr(0x100), "21395 '.= <>' bytes vs. utf8" ); |
efd8b2ba |
283 | close F; |
284 | } |
285 | |
b5d30a84 |
286 | { |
287 | my @a = ( [ 0x007F, "bytes" ], |
288 | [ 0x0080, "bytes" ], |
289 | [ 0x0080, "utf8" ], |
290 | [ 0x0100, "utf8" ] ); |
291 | my $t = 34; |
292 | for my $u (@a) { |
293 | for my $v (@a) { |
294 | # print "# @$u - @$v\n"; |
62a28c97 |
295 | open F, ">$a_file"; |
b5d30a84 |
296 | binmode(F, ":" . $u->[1]); |
297 | print F chr($u->[0]); |
298 | close F; |
299 | |
62a28c97 |
300 | open F, "<$a_file"; |
b5d30a84 |
301 | binmode(F, ":" . $u->[1]); |
302 | |
303 | my $s = chr($v->[0]); |
304 | utf8::upgrade($s) if $v->[1] eq "utf8"; |
305 | |
306 | $s .= <F>; |
0fb301d7 |
307 | is( $s, chr($v->[0]) . chr($u->[0]), 'rcatline utf8' ); |
b5d30a84 |
308 | close F; |
309 | $t++; |
310 | } |
311 | } |
11c2f0cf |
312 | # last test here 49 |
b5d30a84 |
313 | } |
314 | |
4de5f5ca |
315 | { |
316 | # [perl #23428] Somethings rotten in unicode semantics |
62a28c97 |
317 | open F, ">$a_file"; |
4de5f5ca |
318 | binmode F, ":utf8"; |
319 | syswrite(F, $a = chr(0x100)); |
d3b4e16f |
320 | close F; |
4de5f5ca |
321 | is( ord($a), 0x100, '23428 syswrite should not downgrade scalar' ); |
322 | like( $a, qr/^\w+/, '23428 syswrite should not downgrade scalar' ); |
323 | } |
324 | |
554ad1fc |
325 | # sysread() and syswrite() tested in lib/open.t since Fcntl is used |
c88828dc |
326 | |
d3b4e16f |
327 | { |
532ffc5d |
328 | # <FH> on a :utf8 stream should complain immediately with -w |
d3b4e16f |
329 | # if it finds bad UTF-8 (:encoding(utf8) works this way) |
532ffc5d |
330 | use warnings 'utf8'; |
2d79bf7f |
331 | undef $@; |
d3b4e16f |
332 | local $SIG{__WARN__} = sub { $@ = shift }; |
62a28c97 |
333 | open F, ">$a_file"; |
d3b4e16f |
334 | binmode F; |
cc8040a1 |
335 | my ($chrE4, $chrF6) = (chr(0xE4), chr(0xF6)); |
336 | if (ord('A') == 193) # EBCDIC |
337 | { ($chrE4, $chrF6) = (chr(0x43), chr(0xEC)); } |
338 | print F "foo", $chrE4, "\n"; |
339 | print F "foo", $chrF6, "\n"; |
d3b4e16f |
340 | close F; |
62a28c97 |
341 | open F, "<:utf8", $a_file; |
2d79bf7f |
342 | undef $@; |
d3b4e16f |
343 | my $line = <F>; |
cc8040a1 |
344 | my ($chrE4, $chrF6) = ("E4", "F6"); |
345 | if (ord('A') == 193) { ($chrE4, $chrF6) = ("43", "EC"); } # EBCDIC |
346 | like( $@, qr/utf8 "\\x$chrE4" does not map to Unicode .+ <F> line 1/, |
2d79bf7f |
347 | "<:utf8 readline must warn about bad utf8"); |
348 | undef $@; |
349 | $line .= <F>; |
cc8040a1 |
350 | like( $@, qr/utf8 "\\x$chrF6" does not map to Unicode .+ <F> line 2/, |
2d79bf7f |
351 | "<:utf8 rcatline must warn about bad utf8"); |
d3b4e16f |
352 | close F; |
353 | } |