SYN SYN
[p5sagit/p5-mst-13.2.git] / t / pragma / utf8.t
CommitLineData
f96ec2a2 1#!./perl
2
3BEGIN {
4 chdir 't' if -d 't';
22d4bb9c 5 @INC = '../lib';
f96ec2a2 6 $ENV{PERL5LIB} = '../lib';
146174a9 7 if ( ord("\t") != 9 ) { # skip on ebcdic platforms
8 print "1..0 # Skip utf8 tests on ebcdic platform.\n";
9 exit;
10 }
f96ec2a2 11}
12
22d4bb9c 13print "1..99\n";
f96ec2a2 14
15my $test = 1;
16
17sub ok {
18 my ($got,$expect) = @_;
19 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
20 print "ok $test\n";
21}
22
22d4bb9c 23sub nok {
24 my ($got,$expect) = @_;
25 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
26 print "ok $test\n";
27}
28
ee8c7f54 29sub ok_bytes {
30 use bytes;
31 my ($got,$expect) = @_;
32 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
33 print "ok $test\n";
34}
35
22d4bb9c 36sub nok_bytes {
37 use bytes;
38 my ($got,$expect) = @_;
39 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
40 print "ok $test\n";
41}
ee8c7f54 42
f96ec2a2 43{
44 use utf8;
45 $_ = ">\x{263A}<";
46 s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg;
47 ok $_, '>&#9786;<';
ee8c7f54 48 $test++; # 1
f96ec2a2 49
50 $_ = ">\x{263A}<";
51 my $rx = "\x{80}-\x{10ffff}";
52 s/([$rx])/"&#".ord($1).";"/eg;
53 ok $_, '>&#9786;<';
ee8c7f54 54 $test++; # 2
f96ec2a2 55
56 $_ = ">\x{263A}<";
57 my $rx = "\\x{80}-\\x{10ffff}";
58 s/([$rx])/"&#".ord($1).";"/eg;
59 ok $_, '>&#9786;<';
ee8c7f54 60 $test++; # 3
b8c5462f 61
62 $_ = "alpha,numeric";
63 m/([[:alpha:]]+)/;
64 ok $1, 'alpha';
ee8c7f54 65 $test++; # 4
b8c5462f 66
67 $_ = "alphaNUMERICstring";
68 m/([[:^lower:]]+)/;
69 ok $1, 'NUMERIC';
ee8c7f54 70 $test++; # 5
b8c5462f 71
72 $_ = "alphaNUMERICstring";
73 m/(\p{Ll}+)/;
74 ok $1, 'alpha';
ee8c7f54 75 $test++; # 6
b8c5462f 76
77 $_ = "alphaNUMERICstring";
78 m/(\p{Lu}+)/;
79 ok $1, 'NUMERIC';
ee8c7f54 80 $test++; # 7
b8c5462f 81
82 $_ = "alpha,numeric";
83 m/([\p{IsAlpha}]+)/;
84 ok $1, 'alpha';
ee8c7f54 85 $test++; # 8
b8c5462f 86
87 $_ = "alphaNUMERICstring";
88 m/([^\p{IsLower}]+)/;
89 ok $1, 'NUMERIC';
ee8c7f54 90 $test++; # 9
b8c5462f 91
0f4b6630 92 $_ = "alpha123numeric456";
93 m/([\p{IsDigit}]+)/;
94 ok $1, '123';
ee8c7f54 95 $test++; # 10
b8c5462f 96
0f4b6630 97 $_ = "alpha123numeric456";
98 m/([^\p{IsDigit}]+)/;
99 ok $1, 'alpha';
ee8c7f54 100 $test++; # 11
b8c5462f 101
0f4b6630 102 $_ = ",123alpha,456numeric";
103 m/([\p{IsAlnum}]+)/;
104 ok $1, '123alpha';
ee8c7f54 105 $test++; # 12
106}
107{
108 use utf8;
109
110 $_ = "\x{263A}>\x{263A}\x{263A}";
111
112 ok length, 4;
113 $test++; # 13
114
115 ok length((m/>(.)/)[0]), 1;
116 $test++; # 14
117
118 ok length($&), 2;
119 $test++; # 15
120
121 ok length($'), 1;
122 $test++; # 16
123
124 ok length($`), 1;
125 $test++; # 17
126
127 ok length($1), 1;
128 $test++; # 18
129
130 ok length($tmp=$&), 2;
131 $test++; # 19
132
133 ok length($tmp=$'), 1;
134 $test++; # 20
135
136 ok length($tmp=$`), 1;
137 $test++; # 21
138
139 ok length($tmp=$1), 1;
140 $test++; # 22
141
142 {
143 use bytes;
144
145 my $tmp = $&;
146 ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
147 $test++; # 23
148
149 $tmp = $';
150 ok $tmp, pack("C*", 0342, 0230, 0272);
151 $test++; # 24
152
153 $tmp = $`;
154 ok $tmp, pack("C*", 0342, 0230, 0272);
155 $test++; # 25
156
157 $tmp = $1;
158 ok $tmp, pack("C*", 0342, 0230, 0272);
159 $test++; # 26
160 }
161
162 ok_bytes $&, pack("C*", ord(">"), 0342, 0230, 0272);
163 $test++; # 27
164
165 ok_bytes $', pack("C*", 0342, 0230, 0272);
166 $test++; # 28
167
168 ok_bytes $`, pack("C*", 0342, 0230, 0272);
169 $test++; # 29
170
171 ok_bytes $1, pack("C*", 0342, 0230, 0272);
172 $test++; # 30
173
174 {
175 use bytes;
176 no utf8;
177
178 ok length, 10;
179 $test++; # 31
180
181 ok length((m/>(.)/)[0]), 1;
182 $test++; # 32
183
184 ok length($&), 2;
185 $test++; # 33
186
187 ok length($'), 5;
188 $test++; # 34
189
190 ok length($`), 3;
191 $test++; # 35
192
193 ok length($1), 1;
194 $test++; # 36
195
196 ok $&, pack("C*", ord(">"), 0342);
197 $test++; # 37
198
199 ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
200 $test++; # 38
201
202 ok $`, pack("C*", 0342, 0230, 0272);
203 $test++; # 39
204
205 ok $1, pack("C*", 0342);
206 $test++; # 40
207
208 }
209
210
211 {
212 no utf8;
213 $_="\342\230\272>\342\230\272\342\230\272";
214 }
215
216 ok length, 10;
217 $test++; # 41
218
219 ok length((m/>(.)/)[0]), 1;
220 $test++; # 42
221
222 ok length($&), 2;
223 $test++; # 43
224
225 ok length($'), 1;
226 $test++; # 44
227
228 ok length($`), 1;
229 $test++; # 45
230
231 ok length($1), 1;
232 $test++; # 46
233
234 ok length($tmp=$&), 2;
235 $test++; # 47
236
237 ok length($tmp=$'), 1;
238 $test++; # 48
239
240 ok length($tmp=$`), 1;
241 $test++; # 49
242
243 ok length($tmp=$1), 1;
244 $test++; # 50
245
246 {
247 use bytes;
248
249 my $tmp = $&;
250 ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
251 $test++; # 51
252
253 $tmp = $';
254 ok $tmp, pack("C*", 0342, 0230, 0272);
255 $test++; # 52
256
257 $tmp = $`;
258 ok $tmp, pack("C*", 0342, 0230, 0272);
259 $test++; # 53
260
261 $tmp = $1;
262 ok $tmp, pack("C*", 0342, 0230, 0272);
263 $test++; # 54
264 }
265 {
266 use bytes;
267 no utf8;
268
269 ok length, 10;
270 $test++; # 55
271
272 ok length((m/>(.)/)[0]), 1;
273 $test++; # 56
274
275 ok length($&), 2;
276 $test++; # 57
277
278 ok length($'), 5;
279 $test++; # 58
280
281 ok length($`), 3;
282 $test++; # 59
283
284 ok length($1), 1;
285 $test++; # 60
286
287 ok $&, pack("C*", ord(">"), 0342);
288 $test++; # 61
289
290 ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
291 $test++; # 62
292
293 ok $`, pack("C*", 0342, 0230, 0272);
294 $test++; # 63
295
296 ok $1, pack("C*", 0342);
297 $test++; # 64
298
299 }
300
301 ok "\x{ab}" =~ /^\x{ab}$/, 1;
302 $test++; # 65
0f4b6630 303}
4b19af01 304
305{
306 use utf8;
307 ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2);
308 $test++; # 66
309}
22d4bb9c 310
311{
312 use utf8;
313 my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
314 ok "@a", "1234 123 2345";
315 $test++; # 67
316}
317
318{
319 use utf8;
320 my $x = chr(123);
321 my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
322 ok "@a", "1234 2345";
323 $test++; # 68
324}
325
326{
327 # bug id 20001009.001
328
329 my($a,$b);
330 { use bytes; $a = "\xc3\xa4"; }
331 { use utf8; $b = "\xe4"; }
332 { use bytes; ok_bytes $a, $b; $test++; } # 69
333 { use utf8; nok $a, $b; $test++; } # 70
334}
335
336{
337 # bug id 20001008.001
338
339 my @x = ("stra\337e 138","stra\337e 138");
340 for (@x) {
341 s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
342 my($latin) = /^(.+)(?:\s+\d)/;
343 print $latin eq "stra\337e" ? "ok $test\n" :
344 "#latin[$latin]\nnot ok $test\n";
345 $test++;
346 $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
347 use utf8;
348 $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
349 }
350}
351
352{
353 # bug id 20000819.004
354
355 $_ = $dx = "\x{10f2}";
356 s/($dx)/$dx$1/;
357 {
358 use bytes;
359 print "not " unless $_ eq "$dx$dx";
360 print "ok $test\n";
361 $test++;
362 }
363
364 $_ = $dx = "\x{10f2}";
365 s/($dx)/$1$dx/;
366 {
367 use bytes;
368 print "not " unless $_ eq "$dx$dx";
369 print "ok $test\n";
370 $test++;
371 }
372
373 $dx = "\x{10f2}";
374 $_ = "\x{10f2}\x{10f2}";
375 s/($dx)($dx)/$1$2/;
376 {
377 use bytes;
378 print "not " unless $_ eq "$dx$dx";
379 print "ok $test\n";
380 $test++;
381 }
382}
383
384{
385 # bug id 20000323.056
386
387 use utf8;
388
389 print "not " unless "\x{41}" eq +v65;
390 print "ok $test\n";
391 $test++;
392
393 print "not " unless "\x41" eq +v65;
394 print "ok $test\n";
395 $test++;
396
397 print "not " unless "\x{c8}" eq +v200;
398 print "ok $test\n";
399 $test++;
400
401 print "not " unless "\xc8" eq +v200;
402 print "ok $test\n";
403 $test++;
404
405 print "not " unless "\x{221b}" eq v8731;
406 print "ok $test\n";
407 $test++;
408}
409
410{
411 # bug id 20000427.003
412
413 use utf8;
414 use warnings;
415 use strict;
416
417 my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
418
419 my @charlist = split //, $sushi;
420 my $r = '';
421 foreach my $ch (@charlist) {
422 $r = $r . " " . sprintf "U+%04X", ord($ch);
423 }
424
425 print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
426 print "ok $test\n";
427 $test++;
428}
429
430{
431 # bug id 20000901.092
432 # test that undef left and right of utf8 results in a valid string
433
434 my $a;
435 $a .= "\x{1ff}";
436 print "not " unless $a eq "\x{1ff}";
437 print "ok $test\n";
438 $test++;
439}
440
441{
442 # bug id 20000426.003
443
444 use utf8;
445
446 my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
447
448 my ($a, $b, $c) = split(/\x40/, $s);
449 print "not "
450 unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
451 print "ok $test\n";
452 $test++;
453
454 my ($a, $b) = split(/\x{100}/, $s);
455 print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
456 print "ok $test\n";
457 $test++;
458
459 my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
460 print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
461 print "ok $test\n";
462 $test++;
463
464 my ($a, $b) = split(/\x40\x{80}/, $s);
465 print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
466 print "ok $test\n";
467 $test++;
468
469 my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
470 print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
471 print "ok $test\n";
472 $test++;
473}
474
475{
476 # bug id 20000730.004
477
478 use utf8;
479
480 my $smiley = "\x{263a}";
481
482 for my $s ("\x{263a}", # 1
483 $smiley, # 2
484
485 "" . $smiley, # 3
486 "" . "\x{263a}", # 4
487
488 $smiley . "", # 5
489 "\x{263a}" . "", # 6
490 ) {
491 my $length_chars = length($s);
492 my $length_bytes;
493 { use bytes; $length_bytes = length($s) }
494 my @regex_chars = $s =~ m/(.)/g;
495 my $regex_chars = @regex_chars;
496 my @split_chars = split //, $s;
497 my $split_chars = @split_chars;
498 print "not "
499 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
500 "1/1/1/3";
501 print "ok $test\n";
502 $test++;
503 }
504
505 for my $s ("\x{263a}" . "\x{263a}", # 7
506 $smiley . $smiley, # 8
507
508 "\x{263a}\x{263a}", # 9
509 "$smiley$smiley", # 10
510
511 "\x{263a}" x 2, # 11
512 $smiley x 2, # 12
513 ) {
514 my $length_chars = length($s);
515 my $length_bytes;
516 { use bytes; $length_bytes = length($s) }
517 my @regex_chars = $s =~ m/(.)/g;
518 my $regex_chars = @regex_chars;
519 my @split_chars = split //, $s;
520 my $split_chars = @split_chars;
521 print "not "
522 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
523 "2/2/2/6";
524 print "ok $test\n";
525 $test++;
526 }
527}