Re: [PATCH] Warn on use of reference as array elem
[p5sagit/p5-mst-13.2.git] / t / pragma / utf8.t
CommitLineData
f96ec2a2 1#!./perl
2
3BEGIN {
4 chdir 't' if -d 't';
20822f61 5 @INC = '../lib';
f96ec2a2 6 $ENV{PERL5LIB} = '../lib';
f70c35af 7 if ( ord("\t") != 9 ) { # skip on ebcdic platforms
8 print "1..0 # Skip utf8 tests on ebcdic platform.\n";
9 exit;
10 }
f96ec2a2 11}
12
11c06dba 13print "1..105\n";
f96ec2a2 14
15my $test = 1;
16
17sub ok {
18 my ($got,$expect) = @_;
19 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
20 print "ok $test\n";
21}
22
7bbb0251 23sub nok {
24 my ($got,$expect) = @_;
25 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
26 print "ok $test\n";
27}
28
be341bce 29sub ok_bytes {
30 use bytes;
31 my ($got,$expect) = @_;
32 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
33 print "ok $test\n";
34}
35
7bbb0251 36sub nok_bytes {
37 use bytes;
38 my ($got,$expect) = @_;
39 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
40 print "ok $test\n";
41}
be341bce 42
f96ec2a2 43{
44 use utf8;
ffc61ed2 45
f96ec2a2 46 $_ = ">\x{263A}<";
47 s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg;
48 ok $_, '>&#9786;<';
c5cc3500 49 $test++; # 1
f96ec2a2 50
51 $_ = ">\x{263A}<";
52 my $rx = "\x{80}-\x{10ffff}";
53 s/([$rx])/"&#".ord($1).";"/eg;
54 ok $_, '>&#9786;<';
c5cc3500 55 $test++; # 2
f96ec2a2 56
57 $_ = ">\x{263A}<";
58 my $rx = "\\x{80}-\\x{10ffff}";
59 s/([$rx])/"&#".ord($1).";"/eg;
60 ok $_, '>&#9786;<';
c5cc3500 61 $test++; # 3
b8c5462f 62
63 $_ = "alpha,numeric";
64 m/([[:alpha:]]+)/;
65 ok $1, 'alpha';
c5cc3500 66 $test++; # 4
b8c5462f 67
68 $_ = "alphaNUMERICstring";
69 m/([[:^lower:]]+)/;
70 ok $1, 'NUMERIC';
c5cc3500 71 $test++; # 5
b8c5462f 72
73 $_ = "alphaNUMERICstring";
74 m/(\p{Ll}+)/;
75 ok $1, 'alpha';
c5cc3500 76 $test++; # 6
b8c5462f 77
78 $_ = "alphaNUMERICstring";
79 m/(\p{Lu}+)/;
80 ok $1, 'NUMERIC';
c5cc3500 81 $test++; # 7
b8c5462f 82
83 $_ = "alpha,numeric";
84 m/([\p{IsAlpha}]+)/;
85 ok $1, 'alpha';
c5cc3500 86 $test++; # 8
b8c5462f 87
88 $_ = "alphaNUMERICstring";
89 m/([^\p{IsLower}]+)/;
90 ok $1, 'NUMERIC';
c5cc3500 91 $test++; # 9
b8c5462f 92
0f4b6630 93 $_ = "alpha123numeric456";
94 m/([\p{IsDigit}]+)/;
95 ok $1, '123';
c5cc3500 96 $test++; # 10
b8c5462f 97
0f4b6630 98 $_ = "alpha123numeric456";
99 m/([^\p{IsDigit}]+)/;
100 ok $1, 'alpha';
c5cc3500 101 $test++; # 11
b8c5462f 102
0f4b6630 103 $_ = ",123alpha,456numeric";
104 m/([\p{IsAlnum}]+)/;
105 ok $1, '123alpha';
c5cc3500 106 $test++; # 12
0f4b6630 107}
3b5dab68 108
a197cbdd 109{
ffc61ed2 110 # no use utf8 needed
111 $_ = "\x{263A}\x{263A}x\x{263A}y\x{263A}";
112
113 ok length($_), 6; # 13
114 $test++;
a197cbdd 115
ffc61ed2 116 ($a) = m/x(.)/;
a197cbdd 117
ffc61ed2 118 ok length($a), 1; # 14
119 $test++;
a197cbdd 120
ffc61ed2 121 ok length($`), 2; # 15
122 $test++;
123 ok length($&), 2; # 16
124 $test++;
125 ok length($'), 2; # 17
126 $test++;
a197cbdd 127
ffc61ed2 128 ok length($1), 1; # 18
129 $test++;
a197cbdd 130
ffc61ed2 131 ok length($b=$`), 2; # 19
132 $test++;
a197cbdd 133
ffc61ed2 134 ok length($b=$&), 2; # 20
135 $test++;
a197cbdd 136
ffc61ed2 137 ok length($b=$'), 2; # 21
138 $test++;
a197cbdd 139
ffc61ed2 140 ok length($b=$1), 1; # 22
141 $test++;
c5cc3500 142
ffc61ed2 143 ok $a, "\x{263A}"; # 23
144 $test++;
a197cbdd 145
ffc61ed2 146 ok $`, "\x{263A}\x{263A}"; # 24
147 $test++;
a197cbdd 148
ffc61ed2 149 ok $&, "x\x{263A}"; # 25
150 $test++;
a197cbdd 151
ffc61ed2 152 ok $', "y\x{263A}"; # 26
153 $test++;
a197cbdd 154
ffc61ed2 155 ok $1, "\x{263A}"; # 27
156 $test++;
be341bce 157
ffc61ed2 158 ok_bytes $a, "\342\230\272"; # 28
159 $test++;
be341bce 160
ffc61ed2 161 ok_bytes $1, "\342\230\272"; # 29
162 $test++;
be341bce 163
ffc61ed2 164 ok_bytes $&, "x\342\230\272"; # 30
165 $test++;
be341bce 166
a197cbdd 167 {
ffc61ed2 168 use utf8; # required
169 $_ = chr(0x263A) . chr(0x263A) . 'x' . chr(0x263A) . 'y' . chr(0x263A);
170 }
a197cbdd 171
ffc61ed2 172 ok length($_), 6; # 31
173 $test++;
a197cbdd 174
ffc61ed2 175 ($a) = m/x(.)/;
a197cbdd 176
ffc61ed2 177 ok length($a), 1; # 32
178 $test++;
a197cbdd 179
ffc61ed2 180 ok length($`), 2; # 33
181 $test++;
a197cbdd 182
ffc61ed2 183 ok length($&), 2; # 34
184 $test++;
a197cbdd 185
ffc61ed2 186 ok length($'), 2; # 35
187 $test++;
a197cbdd 188
ffc61ed2 189 ok length($1), 1; # 36
190 $test++;
a197cbdd 191
ffc61ed2 192 ok length($b=$`), 2; # 37
193 $test++;
a197cbdd 194
ffc61ed2 195 ok length($b=$&), 2; # 38
196 $test++;
a197cbdd 197
ffc61ed2 198 ok length($b=$'), 2; # 39
199 $test++;
a197cbdd 200
ffc61ed2 201 ok length($b=$1), 1; # 40
202 $test++;
a197cbdd 203
ffc61ed2 204 ok $a, "\x{263A}"; # 41
205 $test++;
a197cbdd 206
ffc61ed2 207 ok $`, "\x{263A}\x{263A}"; # 42
208 $test++;
a197cbdd 209
ffc61ed2 210 ok $&, "x\x{263A}"; # 43
211 $test++;
a197cbdd 212
ffc61ed2 213 ok $', "y\x{263A}"; # 44
214 $test++;
a197cbdd 215
ffc61ed2 216 ok $1, "\x{263A}"; # 45
217 $test++;
a197cbdd 218
ffc61ed2 219 ok_bytes $a, "\342\230\272"; # 46
220 $test++;
a197cbdd 221
ffc61ed2 222 ok_bytes $1, "\342\230\272"; # 47
223 $test++;
a197cbdd 224
ffc61ed2 225 ok_bytes $&, "x\342\230\272"; # 48
226 $test++;
a197cbdd 227
ffc61ed2 228 $_ = "\342\230\272\342\230\272x\342\230\272y\342\230\272";
a197cbdd 229
ffc61ed2 230 ok length($_), 14; # 49
231 $test++;
a197cbdd 232
ffc61ed2 233 ($a) = m/x(.)/;
a197cbdd 234
ffc61ed2 235 ok length($a), 1; # 50
236 $test++;
a197cbdd 237
ffc61ed2 238 ok length($`), 6; # 51
239 $test++;
a197cbdd 240
ffc61ed2 241 ok length($&), 2; # 52
242 $test++;
3b5dab68 243
ffc61ed2 244 ok length($'), 6; # 53
245 $test++;
a197cbdd 246
ffc61ed2 247 ok length($1), 1; # 54
248 $test++;
a197cbdd 249
ffc61ed2 250 ok length($b=$`), 6; # 55
251 $test++;
a197cbdd 252
ffc61ed2 253 ok length($b=$&), 2; # 56
254 $test++;
a197cbdd 255
ffc61ed2 256 ok length($b=$'), 6; # 57
257 $test++;
a197cbdd 258
ffc61ed2 259 ok length($b=$1), 1; # 58
260 $test++;
a197cbdd 261
ffc61ed2 262 ok $a, "\342"; # 59
263 $test++;
a197cbdd 264
ffc61ed2 265 ok $`, "\342\230\272\342\230\272"; # 60
266 $test++;
a197cbdd 267
ffc61ed2 268 ok $&, "x\342"; # 61
269 $test++;
a197cbdd 270
ffc61ed2 271 ok $', "\230\272y\342\230\272"; # 62
272 $test++;
a197cbdd 273
ffc61ed2 274 ok $1, "\342"; # 63
275 $test++;
276}
de35ba6f 277
ffc61ed2 278{
279 use utf8;
de35ba6f 280 ok "\x{ab}" =~ /^\x{ab}$/, 1;
ffc61ed2 281 $test++; # 64
a197cbdd 282}
aaa68c4a 283
284{
285 use utf8;
286 ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2);
ffc61ed2 287 $test++; # 65
aaa68c4a 288}
28cb3359 289
290{
291 use utf8;
292 my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
293 ok "@a", "1234 123 2345";
ffc61ed2 294 $test++; # 66
28cb3359 295}
296
297{
298 use utf8;
299 my $x = chr(123);
300 my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
301 ok "@a", "1234 2345";
ffc61ed2 302 $test++; # 67
28cb3359 303}
31067593 304
7bbb0251 305{
da450f52 306 # bug id 20001009.001
307
89491803 308 my ($a, $b);
309
310 { use bytes; $a = "\xc3\xa4" }
311 { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8
312
313 print "not " if $a eq $b;
ffc61ed2 314 print "ok $test\n"; $test++; # 68
89491803 315
316 { use utf8; print "not " if $a eq $b; }
ffc61ed2 317 print "ok $test\n"; $test++; # 69
7bbb0251 318}
31067593 319
320{
da450f52 321 # bug id 20001008.001
322
31067593 323 my @x = ("stra\337e 138","stra\337e 138");
324 for (@x) {
325 s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
326 my($latin) = /^(.+)(?:\s+\d)/;
ffc61ed2 327 print $latin eq "stra\337e" ? "ok $test\n" : # 70, 71
31067593 328 "#latin[$latin]\nnot ok $test\n";
329 $test++;
330 $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
331 use utf8;
332 $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
333 }
334}
b7018214 335
336{
da450f52 337 # bug id 20000427.003
338
339 use utf8;
340 use warnings;
341 use strict;
342
343 my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
344
345 my @charlist = split //, $sushi;
346 my $r = '';
347 foreach my $ch (@charlist) {
348 $r = $r . " " . sprintf "U+%04X", ord($ch);
349 }
350
351 print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
ffc61ed2 352 print "ok $test\n"; # 72
da450f52 353 $test++;
354}
355
356{
93f04dac 357 # bug id 20000426.003
358
359 use utf8;
360
361 my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
362
363 my ($a, $b, $c) = split(/\x40/, $s);
364 print "not "
365 unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
366 print "ok $test\n";
ffc61ed2 367 $test++; # 73
93f04dac 368
369 my ($a, $b) = split(/\x{100}/, $s);
370 print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
371 print "ok $test\n";
ffc61ed2 372 $test++; # 74
93f04dac 373
374 my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
375 print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
376 print "ok $test\n";
ffc61ed2 377 $test++; # 75
93f04dac 378
379 my ($a, $b) = split(/\x40\x{80}/, $s);
380 print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
381 print "ok $test\n";
ffc61ed2 382 $test++; # 76
93f04dac 383
384 my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
385 print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
386 print "ok $test\n";
ffc61ed2 387 $test++; # 77
93f04dac 388}
60ff4832 389
390{
391 # bug id 20000730.004
392
393 use utf8;
394
395 my $smiley = "\x{263a}";
396
ffc61ed2 397 for my $s ("\x{263a}", # 78
398 $smiley, # 79
60ff4832 399
ffc61ed2 400 "" . $smiley, # 80
401 "" . "\x{263a}", # 81
60ff4832 402
ffc61ed2 403 $smiley . "", # 82
404 "\x{263a}" . "", # 83
60ff4832 405 ) {
406 my $length_chars = length($s);
407 my $length_bytes;
408 { use bytes; $length_bytes = length($s) }
409 my @regex_chars = $s =~ m/(.)/g;
410 my $regex_chars = @regex_chars;
411 my @split_chars = split //, $s;
412 my $split_chars = @split_chars;
413 print "not "
414 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
415 "1/1/1/3";
416 print "ok $test\n";
417 $test++;
418 }
419
ffc61ed2 420 for my $s ("\x{263a}" . "\x{263a}", # 84
421 $smiley . $smiley, # 85
60ff4832 422
ffc61ed2 423 "\x{263a}\x{263a}", # 86
424 "$smiley$smiley", # 87
60ff4832 425
ffc61ed2 426 "\x{263a}" x 2, # 88
427 $smiley x 2, # 89
60ff4832 428 ) {
429 my $length_chars = length($s);
430 my $length_bytes;
431 { use bytes; $length_bytes = length($s) }
432 my @regex_chars = $s =~ m/(.)/g;
433 my $regex_chars = @regex_chars;
434 my @split_chars = split //, $s;
435 my $split_chars = @split_chars;
436 print "not "
437 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
438 "2/2/2/6";
439 print "ok $test\n";
440 $test++;
441 }
442}
ffc61ed2 443
444{
445 use utf8;
446
447 print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
448 print "ok $test\n";
449 $test++; # 90
450
451 print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
452 print "ok $test\n";
453 $test++; # 91
454
455 print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
456 print "ok $test\n";
457 $test++; # 92
458
459 print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
460 print "ok $test\n";
461 $test++; # 93
462
463 print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
464 print "ok $test\n";
465 $test++; # 94
466
467 print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
468 print "ok $test\n";
469 $test++; # 95
470
471 print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
472 print "ok $test\n";
473 $test++; # 96
474
475 print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
476 print "ok $test\n";
477 $test++; # 97
478}
479
480{
481 # the first half of 20001028.003
482
483 my $X = chr(1448);
484 my ($Y) = $X =~ /(.*)/;
485 print "not " unless length $Y == 1;
486 print "ok $test\n";
487 $test++; # 98
488}
489
490{
491 # 20001108.001
492
493 use utf8;
494 my $X = "Szab\x{f3},Bal\x{e1}zs";
495 my $Y = $X;
496 $Y =~ s/(B)/$1/ for 0..3;
497 print "not " unless $Y eq $X;
498 print "ok $test\n";
499 $test++; # 99
500}
501
502{
503 # 20001114.001
504
505 use utf8;
506 use charnames ':full';
507 my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
508 print "not " unless ord($text) == 0xc4;
509 print "ok $test\n";
510 $test++; # 100
511}
512
513{
514 # 20001205.014
515
516 use utf8;
517
518 my $a = "ABC\x{263A}";
519
520 my @b = split( //, $a );
521
522 print "not " unless @b == 4;
523 print "ok $test\n";
524 $test++; # 101
525
526 print "not " unless length($b[3]) == 1;
527 print "ok $test\n";
528 $test++; # 102
529
530 $a =~ s/^A/Z/;
531 print "not " unless length($a) == 4;
532 print "ok $test\n";
533 $test++; # 103
534}
535
536{
537 # the second half of 20001028.003
538
539 use utf8;
540 $X =~ s/^/chr(1488)/e;
541 print "not " unless length $X == 1;
542 print "ok $test\n";
543 $test++; # 104
544}
545
11c06dba 546{
547 # 20000517.001
548
549 my $x = "\x{100}A";
550
551 $x =~ s/A/B/;
552
553 print "not " unless $x eq "\x{100}B" && length($x) == 2;
554 print "ok $test\n";
555 $test++; # 105
556}