Once again syncing after too long an absence
[p5sagit/p5-mst-13.2.git] / t / pragma / utf8.t
CommitLineData
f96ec2a2 1#!./perl
2
3BEGIN {
4 chdir 't' if -d 't';
22d4bb9c 5 @INC = '../lib';
f96ec2a2 6 $ENV{PERL5LIB} = '../lib';
146174a9 7 if ( ord("\t") != 9 ) { # skip on ebcdic platforms
8 print "1..0 # Skip utf8 tests on ebcdic platform.\n";
9 exit;
10 }
f96ec2a2 11}
12
0e06870b 13print "1..105\n";
f96ec2a2 14
15my $test = 1;
16
17sub ok {
18 my ($got,$expect) = @_;
19 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
20 print "ok $test\n";
21}
22
22d4bb9c 23sub nok {
24 my ($got,$expect) = @_;
25 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
26 print "ok $test\n";
27}
28
ee8c7f54 29sub ok_bytes {
30 use bytes;
31 my ($got,$expect) = @_;
32 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
33 print "ok $test\n";
34}
35
22d4bb9c 36sub nok_bytes {
37 use bytes;
38 my ($got,$expect) = @_;
39 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
40 print "ok $test\n";
41}
ee8c7f54 42
f96ec2a2 43{
44 use utf8;
0e06870b 45
f96ec2a2 46 $_ = ">\x{263A}<";
47 s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg;
48 ok $_, '>&#9786;<';
ee8c7f54 49 $test++; # 1
f96ec2a2 50
51 $_ = ">\x{263A}<";
52 my $rx = "\x{80}-\x{10ffff}";
53 s/([$rx])/"&#".ord($1).";"/eg;
54 ok $_, '>&#9786;<';
ee8c7f54 55 $test++; # 2
f96ec2a2 56
57 $_ = ">\x{263A}<";
58 my $rx = "\\x{80}-\\x{10ffff}";
59 s/([$rx])/"&#".ord($1).";"/eg;
60 ok $_, '>&#9786;<';
ee8c7f54 61 $test++; # 3
b8c5462f 62
63 $_ = "alpha,numeric";
64 m/([[:alpha:]]+)/;
65 ok $1, 'alpha';
ee8c7f54 66 $test++; # 4
b8c5462f 67
68 $_ = "alphaNUMERICstring";
69 m/([[:^lower:]]+)/;
70 ok $1, 'NUMERIC';
ee8c7f54 71 $test++; # 5
b8c5462f 72
73 $_ = "alphaNUMERICstring";
74 m/(\p{Ll}+)/;
75 ok $1, 'alpha';
ee8c7f54 76 $test++; # 6
b8c5462f 77
78 $_ = "alphaNUMERICstring";
79 m/(\p{Lu}+)/;
80 ok $1, 'NUMERIC';
ee8c7f54 81 $test++; # 7
b8c5462f 82
83 $_ = "alpha,numeric";
84 m/([\p{IsAlpha}]+)/;
85 ok $1, 'alpha';
ee8c7f54 86 $test++; # 8
b8c5462f 87
88 $_ = "alphaNUMERICstring";
89 m/([^\p{IsLower}]+)/;
90 ok $1, 'NUMERIC';
ee8c7f54 91 $test++; # 9
b8c5462f 92
0f4b6630 93 $_ = "alpha123numeric456";
94 m/([\p{IsDigit}]+)/;
95 ok $1, '123';
ee8c7f54 96 $test++; # 10
b8c5462f 97
0f4b6630 98 $_ = "alpha123numeric456";
99 m/([^\p{IsDigit}]+)/;
100 ok $1, 'alpha';
ee8c7f54 101 $test++; # 11
b8c5462f 102
0f4b6630 103 $_ = ",123alpha,456numeric";
104 m/([\p{IsAlnum}]+)/;
105 ok $1, '123alpha';
ee8c7f54 106 $test++; # 12
107}
ee8c7f54 108
0e06870b 109{
110 # no use utf8 needed
111 $_ = "\x{263A}\x{263A}x\x{263A}y\x{263A}";
112
113 ok length($_), 6; # 13
114 $test++;
ee8c7f54 115
0e06870b 116 ($a) = m/x(.)/;
ee8c7f54 117
0e06870b 118 ok length($a), 1; # 14
119 $test++;
ee8c7f54 120
0e06870b 121 ok length($`), 2; # 15
122 $test++;
123 ok length($&), 2; # 16
124 $test++;
125 ok length($'), 2; # 17
126 $test++;
ee8c7f54 127
0e06870b 128 ok length($1), 1; # 18
129 $test++;
ee8c7f54 130
0e06870b 131 ok length($b=$`), 2; # 19
132 $test++;
ee8c7f54 133
0e06870b 134 ok length($b=$&), 2; # 20
135 $test++;
ee8c7f54 136
0e06870b 137 ok length($b=$'), 2; # 21
138 $test++;
ee8c7f54 139
0e06870b 140 ok length($b=$1), 1; # 22
141 $test++;
ee8c7f54 142
0e06870b 143 ok $a, "\x{263A}"; # 23
144 $test++;
ee8c7f54 145
0e06870b 146 ok $`, "\x{263A}\x{263A}"; # 24
147 $test++;
ee8c7f54 148
0e06870b 149 ok $&, "x\x{263A}"; # 25
150 $test++;
ee8c7f54 151
0e06870b 152 ok $', "y\x{263A}"; # 26
153 $test++;
ee8c7f54 154
0e06870b 155 ok $1, "\x{263A}"; # 27
156 $test++;
ee8c7f54 157
0e06870b 158 ok_bytes $a, "\342\230\272"; # 28
159 $test++;
ee8c7f54 160
0e06870b 161 ok_bytes $1, "\342\230\272"; # 29
162 $test++;
ee8c7f54 163
0e06870b 164 ok_bytes $&, "x\342\230\272"; # 30
165 $test++;
ee8c7f54 166
167 {
0e06870b 168 use utf8; # required
169 $_ = chr(0x263A) . chr(0x263A) . 'x' . chr(0x263A) . 'y' . chr(0x263A);
170 }
ee8c7f54 171
0e06870b 172 ok length($_), 6; # 31
173 $test++;
ee8c7f54 174
0e06870b 175 ($a) = m/x(.)/;
ee8c7f54 176
0e06870b 177 ok length($a), 1; # 32
178 $test++;
ee8c7f54 179
0e06870b 180 ok length($`), 2; # 33
181 $test++;
ee8c7f54 182
0e06870b 183 ok length($&), 2; # 34
184 $test++;
ee8c7f54 185
0e06870b 186 ok length($'), 2; # 35
187 $test++;
ee8c7f54 188
0e06870b 189 ok length($1), 1; # 36
190 $test++;
ee8c7f54 191
0e06870b 192 ok length($b=$`), 2; # 37
193 $test++;
ee8c7f54 194
0e06870b 195 ok length($b=$&), 2; # 38
196 $test++;
ee8c7f54 197
0e06870b 198 ok length($b=$'), 2; # 39
199 $test++;
ee8c7f54 200
0e06870b 201 ok length($b=$1), 1; # 40
202 $test++;
ee8c7f54 203
0e06870b 204 ok $a, "\x{263A}"; # 41
205 $test++;
ee8c7f54 206
0e06870b 207 ok $`, "\x{263A}\x{263A}"; # 42
208 $test++;
ee8c7f54 209
0e06870b 210 ok $&, "x\x{263A}"; # 43
211 $test++;
ee8c7f54 212
0e06870b 213 ok $', "y\x{263A}"; # 44
214 $test++;
ee8c7f54 215
0e06870b 216 ok $1, "\x{263A}"; # 45
217 $test++;
ee8c7f54 218
0e06870b 219 ok_bytes $a, "\342\230\272"; # 46
220 $test++;
ee8c7f54 221
0e06870b 222 ok_bytes $1, "\342\230\272"; # 47
223 $test++;
ee8c7f54 224
0e06870b 225 ok_bytes $&, "x\342\230\272"; # 48
226 $test++;
ee8c7f54 227
0e06870b 228 $_ = "\342\230\272\342\230\272x\342\230\272y\342\230\272";
ee8c7f54 229
0e06870b 230 ok length($_), 14; # 49
231 $test++;
ee8c7f54 232
0e06870b 233 ($a) = m/x(.)/;
ee8c7f54 234
0e06870b 235 ok length($a), 1; # 50
236 $test++;
ee8c7f54 237
0e06870b 238 ok length($`), 6; # 51
239 $test++;
ee8c7f54 240
0e06870b 241 ok length($&), 2; # 52
242 $test++;
ee8c7f54 243
0e06870b 244 ok length($'), 6; # 53
245 $test++;
ee8c7f54 246
0e06870b 247 ok length($1), 1; # 54
248 $test++;
ee8c7f54 249
0e06870b 250 ok length($b=$`), 6; # 55
251 $test++;
ee8c7f54 252
0e06870b 253 ok length($b=$&), 2; # 56
254 $test++;
ee8c7f54 255
0e06870b 256 ok length($b=$'), 6; # 57
257 $test++;
ee8c7f54 258
0e06870b 259 ok length($b=$1), 1; # 58
260 $test++;
ee8c7f54 261
0e06870b 262 ok $a, "\342"; # 59
263 $test++;
ee8c7f54 264
0e06870b 265 ok $`, "\342\230\272\342\230\272"; # 60
266 $test++;
ee8c7f54 267
0e06870b 268 ok $&, "x\342"; # 61
269 $test++;
ee8c7f54 270
0e06870b 271 ok $', "\230\272y\342\230\272"; # 62
272 $test++;
ee8c7f54 273
0e06870b 274 ok $1, "\342"; # 63
275 $test++;
276}
ee8c7f54 277
0e06870b 278{
279 use utf8;
ee8c7f54 280 ok "\x{ab}" =~ /^\x{ab}$/, 1;
0e06870b 281 $test++; # 64
0f4b6630 282}
4b19af01 283
284{
285 use utf8;
286 ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2);
0e06870b 287 $test++; # 65
4b19af01 288}
22d4bb9c 289
290{
291 use utf8;
292 my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
293 ok "@a", "1234 123 2345";
0e06870b 294 $test++; # 66
22d4bb9c 295}
296
297{
298 use utf8;
299 my $x = chr(123);
300 my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
301 ok "@a", "1234 2345";
0e06870b 302 $test++; # 67
22d4bb9c 303}
304
305{
306 # bug id 20001009.001
307
0e06870b 308 my ($a, $b);
309
310 { use bytes; $a = "\xc3\xa4" }
311 { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8
312
313 print "not " if $a eq $b;
314 print "ok $test\n"; $test++; # 68
315
316 { use utf8; print "not " if $a eq $b; }
317 print "ok $test\n"; $test++; # 69
22d4bb9c 318}
319
320{
321 # bug id 20001008.001
322
323 my @x = ("stra\337e 138","stra\337e 138");
324 for (@x) {
325 s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
326 my($latin) = /^(.+)(?:\s+\d)/;
0e06870b 327 print $latin eq "stra\337e" ? "ok $test\n" : # 70, 71
22d4bb9c 328 "#latin[$latin]\nnot ok $test\n";
329 $test++;
330 $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
331 use utf8;
332 $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
333 }
334}
335
336{
22d4bb9c 337 # bug id 20000427.003
338
339 use utf8;
340 use warnings;
341 use strict;
342
343 my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
344
345 my @charlist = split //, $sushi;
346 my $r = '';
347 foreach my $ch (@charlist) {
348 $r = $r . " " . sprintf "U+%04X", ord($ch);
349 }
350
351 print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
0e06870b 352 print "ok $test\n"; # 72
22d4bb9c 353 $test++;
354}
355
356{
357 # bug id 20000426.003
358
359 use utf8;
360
361 my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
362
363 my ($a, $b, $c) = split(/\x40/, $s);
364 print "not "
365 unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
366 print "ok $test\n";
0e06870b 367 $test++; # 73
22d4bb9c 368
369 my ($a, $b) = split(/\x{100}/, $s);
370 print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
371 print "ok $test\n";
0e06870b 372 $test++; # 74
22d4bb9c 373
374 my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
375 print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
376 print "ok $test\n";
0e06870b 377 $test++; # 75
22d4bb9c 378
379 my ($a, $b) = split(/\x40\x{80}/, $s);
380 print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
381 print "ok $test\n";
0e06870b 382 $test++; # 76
22d4bb9c 383
384 my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
385 print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
386 print "ok $test\n";
0e06870b 387 $test++; # 77
22d4bb9c 388}
389
390{
391 # bug id 20000730.004
392
393 use utf8;
394
395 my $smiley = "\x{263a}";
396
0e06870b 397 for my $s ("\x{263a}", # 78
398 $smiley, # 79
22d4bb9c 399
0e06870b 400 "" . $smiley, # 80
401 "" . "\x{263a}", # 81
22d4bb9c 402
0e06870b 403 $smiley . "", # 82
404 "\x{263a}" . "", # 83
22d4bb9c 405 ) {
406 my $length_chars = length($s);
407 my $length_bytes;
408 { use bytes; $length_bytes = length($s) }
409 my @regex_chars = $s =~ m/(.)/g;
410 my $regex_chars = @regex_chars;
411 my @split_chars = split //, $s;
412 my $split_chars = @split_chars;
413 print "not "
414 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
415 "1/1/1/3";
416 print "ok $test\n";
417 $test++;
418 }
419
0e06870b 420 for my $s ("\x{263a}" . "\x{263a}", # 84
421 $smiley . $smiley, # 85
22d4bb9c 422
0e06870b 423 "\x{263a}\x{263a}", # 86
424 "$smiley$smiley", # 87
22d4bb9c 425
0e06870b 426 "\x{263a}" x 2, # 88
427 $smiley x 2, # 89
22d4bb9c 428 ) {
429 my $length_chars = length($s);
430 my $length_bytes;
431 { use bytes; $length_bytes = length($s) }
432 my @regex_chars = $s =~ m/(.)/g;
433 my $regex_chars = @regex_chars;
434 my @split_chars = split //, $s;
435 my $split_chars = @split_chars;
436 print "not "
437 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
438 "2/2/2/6";
439 print "ok $test\n";
440 $test++;
441 }
442}
0e06870b 443
444{
445 use utf8;
446
447 print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
448 print "ok $test\n";
449 $test++; # 90
450
451 print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
452 print "ok $test\n";
453 $test++; # 91
454
455 print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
456 print "ok $test\n";
457 $test++; # 92
458
459 print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
460 print "ok $test\n";
461 $test++; # 93
462
463 print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
464 print "ok $test\n";
465 $test++; # 94
466
467 print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
468 print "ok $test\n";
469 $test++; # 95
470
471 print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
472 print "ok $test\n";
473 $test++; # 96
474
475 print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
476 print "ok $test\n";
477 $test++; # 97
478}
479
480{
481 # the first half of 20001028.003
482
483 my $X = chr(1448);
484 my ($Y) = $X =~ /(.*)/;
485 print "not " unless length $Y == 1;
486 print "ok $test\n";
487 $test++; # 98
488}
489
490{
491 # 20001108.001
492
493 use utf8;
494 my $X = "Szab\x{f3},Bal\x{e1}zs";
495 my $Y = $X;
496 $Y =~ s/(B)/$1/ for 0..3;
497 print "not " unless $Y eq $X;
498 print "ok $test\n";
499 $test++; # 99
500}
501
502{
503 # 20001114.001
504
505 use utf8;
506 use charnames ':full';
507 my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
508 print "not " unless ord($text) == 0xc4;
509 print "ok $test\n";
510 $test++; # 100
511}
512
513{
514 # 20001205.014
515
516 use utf8;
517
518 my $a = "ABC\x{263A}";
519
520 my @b = split( //, $a );
521
522 print "not " unless @b == 4;
523 print "ok $test\n";
524 $test++; # 101
525
526 print "not " unless length($b[3]) == 1;
527 print "ok $test\n";
528 $test++; # 102
529
530 $a =~ s/^A/Z/;
531 print "not " unless length($a) == 4;
532 print "ok $test\n";
533 $test++; # 103
534}
535
536{
537 # the second half of 20001028.003
538
539 use utf8;
540 $X =~ s/^/chr(1488)/e;
541 print "not " unless length $X == 1;
542 print "ok $test\n";
543 $test++; # 104
544}
545
546{
547 # 20000517.001
548
549 my $x = "\x{100}A";
550
551 $x =~ s/A/B/;
552
553 print "not " unless $x eq "\x{100}B" && length($x) == 2;
554 print "ok $test\n";
555 $test++; # 105
556}