Remove an unnecessary 'use utf8' (unnecessary because \x{...}
[p5sagit/p5-mst-13.2.git] / t / pragma / utf8.t
CommitLineData
f96ec2a2 1#!./perl
2
3BEGIN {
4 chdir 't' if -d 't';
20822f61 5 @INC = '../lib';
f96ec2a2 6 $ENV{PERL5LIB} = '../lib';
f70c35af 7 if ( ord("\t") != 9 ) { # skip on ebcdic platforms
8 print "1..0 # Skip utf8 tests on ebcdic platform.\n";
9 exit;
10 }
f96ec2a2 11}
12
a9917092 13print "1..103\n";
f96ec2a2 14
15my $test = 1;
16
17sub ok {
18 my ($got,$expect) = @_;
19 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
20 print "ok $test\n";
21}
22
7bbb0251 23sub nok {
24 my ($got,$expect) = @_;
25 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
26 print "ok $test\n";
27}
28
be341bce 29sub ok_bytes {
30 use bytes;
31 my ($got,$expect) = @_;
32 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
33 print "ok $test\n";
34}
35
7bbb0251 36sub nok_bytes {
37 use bytes;
38 my ($got,$expect) = @_;
39 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
40 print "ok $test\n";
41}
be341bce 42
f96ec2a2 43{
44 use utf8;
45 $_ = ">\x{263A}<";
46 s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg;
47 ok $_, '>&#9786;<';
c5cc3500 48 $test++; # 1
f96ec2a2 49
50 $_ = ">\x{263A}<";
51 my $rx = "\x{80}-\x{10ffff}";
52 s/([$rx])/"&#".ord($1).";"/eg;
53 ok $_, '>&#9786;<';
c5cc3500 54 $test++; # 2
f96ec2a2 55
56 $_ = ">\x{263A}<";
57 my $rx = "\\x{80}-\\x{10ffff}";
58 s/([$rx])/"&#".ord($1).";"/eg;
59 ok $_, '>&#9786;<';
c5cc3500 60 $test++; # 3
b8c5462f 61
62 $_ = "alpha,numeric";
63 m/([[:alpha:]]+)/;
64 ok $1, 'alpha';
c5cc3500 65 $test++; # 4
b8c5462f 66
67 $_ = "alphaNUMERICstring";
68 m/([[:^lower:]]+)/;
69 ok $1, 'NUMERIC';
c5cc3500 70 $test++; # 5
b8c5462f 71
72 $_ = "alphaNUMERICstring";
73 m/(\p{Ll}+)/;
74 ok $1, 'alpha';
c5cc3500 75 $test++; # 6
b8c5462f 76
77 $_ = "alphaNUMERICstring";
78 m/(\p{Lu}+)/;
79 ok $1, 'NUMERIC';
c5cc3500 80 $test++; # 7
b8c5462f 81
82 $_ = "alpha,numeric";
83 m/([\p{IsAlpha}]+)/;
84 ok $1, 'alpha';
c5cc3500 85 $test++; # 8
b8c5462f 86
87 $_ = "alphaNUMERICstring";
88 m/([^\p{IsLower}]+)/;
89 ok $1, 'NUMERIC';
c5cc3500 90 $test++; # 9
b8c5462f 91
0f4b6630 92 $_ = "alpha123numeric456";
93 m/([\p{IsDigit}]+)/;
94 ok $1, '123';
c5cc3500 95 $test++; # 10
b8c5462f 96
0f4b6630 97 $_ = "alpha123numeric456";
98 m/([^\p{IsDigit}]+)/;
99 ok $1, 'alpha';
c5cc3500 100 $test++; # 11
b8c5462f 101
0f4b6630 102 $_ = ",123alpha,456numeric";
103 m/([\p{IsAlnum}]+)/;
104 ok $1, '123alpha';
c5cc3500 105 $test++; # 12
0f4b6630 106}
3b5dab68 107
a197cbdd 108{
109 use utf8;
110
111 $_ = "\x{263A}>\x{263A}\x{263A}";
112
113 ok length, 4;
c5cc3500 114 $test++; # 13
a197cbdd 115
116 ok length((m/>(.)/)[0]), 1;
c5cc3500 117 $test++; # 14
a197cbdd 118
119 ok length($&), 2;
c5cc3500 120 $test++; # 15
a197cbdd 121
122 ok length($'), 1;
c5cc3500 123 $test++; # 16
a197cbdd 124
125 ok length($`), 1;
c5cc3500 126 $test++; # 17
a197cbdd 127
128 ok length($1), 1;
c5cc3500 129 $test++; # 18
a197cbdd 130
131 ok length($tmp=$&), 2;
c5cc3500 132 $test++; # 19
a197cbdd 133
134 ok length($tmp=$'), 1;
c5cc3500 135 $test++; # 20
a197cbdd 136
137 ok length($tmp=$`), 1;
c5cc3500 138 $test++; # 21
a197cbdd 139
140 ok length($tmp=$1), 1;
c5cc3500 141 $test++; # 22
a197cbdd 142
c5cc3500 143 {
be341bce 144 use bytes;
c5cc3500 145
be341bce 146 my $tmp = $&;
c5cc3500 147 ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
148 $test++; # 23
a197cbdd 149
c5cc3500 150 $tmp = $';
151 ok $tmp, pack("C*", 0342, 0230, 0272);
152 $test++; # 24
a197cbdd 153
c5cc3500 154 $tmp = $`;
155 ok $tmp, pack("C*", 0342, 0230, 0272);
156 $test++; # 25
a197cbdd 157
c5cc3500 158 $tmp = $1;
159 ok $tmp, pack("C*", 0342, 0230, 0272);
160 $test++; # 26
161 }
a197cbdd 162
be341bce 163 ok_bytes $&, pack("C*", ord(">"), 0342, 0230, 0272);
164 $test++; # 27
165
166 ok_bytes $', pack("C*", 0342, 0230, 0272);
167 $test++; # 28
168
169 ok_bytes $`, pack("C*", 0342, 0230, 0272);
170 $test++; # 29
171
172 ok_bytes $1, pack("C*", 0342, 0230, 0272);
173 $test++; # 30
174
a197cbdd 175 {
176 use bytes;
177 no utf8;
178
179 ok length, 10;
be341bce 180 $test++; # 31
a197cbdd 181
182 ok length((m/>(.)/)[0]), 1;
be341bce 183 $test++; # 32
a197cbdd 184
185 ok length($&), 2;
be341bce 186 $test++; # 33
a197cbdd 187
188 ok length($'), 5;
be341bce 189 $test++; # 34
a197cbdd 190
191 ok length($`), 3;
be341bce 192 $test++; # 35
a197cbdd 193
194 ok length($1), 1;
be341bce 195 $test++; # 36
a197cbdd 196
197 ok $&, pack("C*", ord(">"), 0342);
be341bce 198 $test++; # 37
a197cbdd 199
200 ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
be341bce 201 $test++; # 38
a197cbdd 202
203 ok $`, pack("C*", 0342, 0230, 0272);
be341bce 204 $test++; # 39
a197cbdd 205
206 ok $1, pack("C*", 0342);
be341bce 207 $test++; # 40
a197cbdd 208 }
209
a197cbdd 210 {
211 no utf8;
212 $_="\342\230\272>\342\230\272\342\230\272";
213 }
214
215 ok length, 10;
be341bce 216 $test++; # 41
a197cbdd 217
218 ok length((m/>(.)/)[0]), 1;
be341bce 219 $test++; # 42
a197cbdd 220
221 ok length($&), 2;
be341bce 222 $test++; # 43
a197cbdd 223
224 ok length($'), 1;
be341bce 225 $test++; # 44
a197cbdd 226
227 ok length($`), 1;
be341bce 228 $test++; # 45
a197cbdd 229
230 ok length($1), 1;
be341bce 231 $test++; # 46
a197cbdd 232
233 ok length($tmp=$&), 2;
be341bce 234 $test++; # 47
a197cbdd 235
236 ok length($tmp=$'), 1;
be341bce 237 $test++; # 48
a197cbdd 238
239 ok length($tmp=$`), 1;
be341bce 240 $test++; # 49
a197cbdd 241
242 ok length($tmp=$1), 1;
be341bce 243 $test++; # 50
a197cbdd 244
c5cc3500 245 {
246 use bytes;
a197cbdd 247
c5cc3500 248 my $tmp = $&;
249 ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
be341bce 250 $test++; # 51
a197cbdd 251
c5cc3500 252 $tmp = $';
253 ok $tmp, pack("C*", 0342, 0230, 0272);
be341bce 254 $test++; # 52
a197cbdd 255
c5cc3500 256 $tmp = $`;
257 ok $tmp, pack("C*", 0342, 0230, 0272);
be341bce 258 $test++; # 53
a197cbdd 259
c5cc3500 260 $tmp = $1;
261 ok $tmp, pack("C*", 0342, 0230, 0272);
be341bce 262 $test++; # 54
c5cc3500 263 }
3b5dab68 264
a197cbdd 265 {
266 use bytes;
267 no utf8;
268
269 ok length, 10;
be341bce 270 $test++; # 55
a197cbdd 271
272 ok length((m/>(.)/)[0]), 1;
be341bce 273 $test++; # 56
a197cbdd 274
275 ok length($&), 2;
be341bce 276 $test++; # 57
a197cbdd 277
278 ok length($'), 5;
be341bce 279 $test++; # 58
a197cbdd 280
281 ok length($`), 3;
be341bce 282 $test++; # 59
a197cbdd 283
284 ok length($1), 1;
be341bce 285 $test++; # 60
a197cbdd 286
287 ok $&, pack("C*", ord(">"), 0342);
be341bce 288 $test++; # 61
a197cbdd 289
290 ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
be341bce 291 $test++; # 62
a197cbdd 292
293 ok $`, pack("C*", 0342, 0230, 0272);
be341bce 294 $test++; # 63
a197cbdd 295
296 ok $1, pack("C*", 0342);
be341bce 297 $test++; # 64
a197cbdd 298 }
de35ba6f 299
300 ok "\x{ab}" =~ /^\x{ab}$/, 1;
be341bce 301 $test++; # 65
a197cbdd 302}
aaa68c4a 303
304{
305 use utf8;
306 ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2);
307 $test++; # 66
308}
28cb3359 309
310{
311 use utf8;
312 my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
313 ok "@a", "1234 123 2345";
314 $test++; # 67
315}
316
317{
318 use utf8;
319 my $x = chr(123);
320 my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
321 ok "@a", "1234 2345";
322 $test++; # 68
323}
31067593 324
7bbb0251 325{
da450f52 326 # bug id 20001009.001
327
89491803 328 my ($a, $b);
329
330 { use bytes; $a = "\xc3\xa4" }
331 { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8
332
333 print "not " if $a eq $b;
334 print "ok $test\n"; $test++;
335
336 { use utf8; print "not " if $a eq $b; }
337 print "ok $test\n"; $test++;
7bbb0251 338}
31067593 339
340{
da450f52 341 # bug id 20001008.001
342
31067593 343 my @x = ("stra\337e 138","stra\337e 138");
344 for (@x) {
345 s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
346 my($latin) = /^(.+)(?:\s+\d)/;
347 print $latin eq "stra\337e" ? "ok $test\n" :
348 "#latin[$latin]\nnot ok $test\n";
349 $test++;
350 $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
351 use utf8;
352 $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
353 }
354}
b7018214 355
356{
da450f52 357 # bug id 20000819.004
358
b7018214 359 $_ = $dx = "\x{10f2}";
360 s/($dx)/$dx$1/;
361 {
362 use bytes;
363 print "not " unless $_ eq "$dx$dx";
364 print "ok $test\n";
365 $test++;
366 }
367
368 $_ = $dx = "\x{10f2}";
369 s/($dx)/$1$dx/;
370 {
371 use bytes;
372 print "not " unless $_ eq "$dx$dx";
373 print "ok $test\n";
374 $test++;
375 }
376
377 $dx = "\x{10f2}";
378 $_ = "\x{10f2}\x{10f2}";
379 s/($dx)($dx)/$1$2/;
380 {
381 use bytes;
382 print "not " unless $_ eq "$dx$dx";
383 print "ok $test\n";
384 $test++;
385 }
386}
8b72f7e2 387
388{
da450f52 389 # bug id 20000323.056
390
8b72f7e2 391 print "not " unless "\x{41}" eq +v65;
392 print "ok $test\n";
393 $test++;
394
395 print "not " unless "\x41" eq +v65;
396 print "ok $test\n";
397 $test++;
398
399 print "not " unless "\x{c8}" eq +v200;
400 print "ok $test\n";
401 $test++;
402
403 print "not " unless "\xc8" eq +v200;
404 print "ok $test\n";
405 $test++;
406
407 print "not " unless "\x{221b}" eq v8731;
408 print "ok $test\n";
409 $test++;
410}
da450f52 411
412{
413 # bug id 20000427.003
414
415 use utf8;
416 use warnings;
417 use strict;
418
419 my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
420
421 my @charlist = split //, $sushi;
422 my $r = '';
423 foreach my $ch (@charlist) {
424 $r = $r . " " . sprintf "U+%04X", ord($ch);
425 }
426
427 print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
428 print "ok $test\n";
429 $test++;
430}
431
432{
433 # bug id 20000901.092
434 # test that undef left and right of utf8 results in a valid string
435
436 my $a;
437 $a .= "\x{1ff}";
438 print "not " unless $a eq "\x{1ff}";
439 print "ok $test\n";
440 $test++;
441}
93f04dac 442
443{
444 # bug id 20000426.003
445
446 use utf8;
447
448 my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
449
450 my ($a, $b, $c) = split(/\x40/, $s);
451 print "not "
452 unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
453 print "ok $test\n";
454 $test++;
455
456 my ($a, $b) = split(/\x{100}/, $s);
457 print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
458 print "ok $test\n";
459 $test++;
460
461 my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
462 print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
463 print "ok $test\n";
464 $test++;
465
466 my ($a, $b) = split(/\x40\x{80}/, $s);
467 print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
468 print "ok $test\n";
469 $test++;
470
471 my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
472 print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
473 print "ok $test\n";
474 $test++;
475}
60ff4832 476
477{
478 # bug id 20000730.004
479
480 use utf8;
481
482 my $smiley = "\x{263a}";
483
484 for my $s ("\x{263a}", # 1
485 $smiley, # 2
486
487 "" . $smiley, # 3
488 "" . "\x{263a}", # 4
489
490 $smiley . "", # 5
491 "\x{263a}" . "", # 6
492 ) {
493 my $length_chars = length($s);
494 my $length_bytes;
495 { use bytes; $length_bytes = length($s) }
496 my @regex_chars = $s =~ m/(.)/g;
497 my $regex_chars = @regex_chars;
498 my @split_chars = split //, $s;
499 my $split_chars = @split_chars;
500 print "not "
501 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
502 "1/1/1/3";
503 print "ok $test\n";
504 $test++;
505 }
506
507 for my $s ("\x{263a}" . "\x{263a}", # 7
508 $smiley . $smiley, # 8
509
510 "\x{263a}\x{263a}", # 9
511 "$smiley$smiley", # 10
512
513 "\x{263a}" x 2, # 11
514 $smiley x 2, # 12
515 ) {
516 my $length_chars = length($s);
517 my $length_bytes;
518 { use bytes; $length_bytes = length($s) }
519 my @regex_chars = $s =~ m/(.)/g;
520 my $regex_chars = @regex_chars;
521 my @split_chars = split //, $s;
522 my $split_chars = @split_chars;
523 print "not "
524 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
525 "2/2/2/6";
526 print "ok $test\n";
527 $test++;
528 }
529}
689440ec 530
531{
532 # ID 20001020.006
533
534 "x" =~ /(.)/; # unset $2
535
536 # Without the fix this will croak:
537 # Modification of a read-only value attempted at ...
538 "$2\x{1234}";
539
540 print "ok $test\n";
541 $test++;
542
543 # For symmetry with the above.
544 "\x{1234}$2";
545
546 print "ok $test\n";
547 $test++;
5280a8e5 548
549 *pi = \undef;
550 # This bug existed earlier than the $2 bug, but is fixed with the same
551 # patch. Without the fix this will also croak:
552 # Modification of a read-only value attempted at ...
553 "$pi\x{1234}";
554
555 print "ok $test\n";
556 $test++;
557
558 # For symmetry with the above.
559 "\x{1234}$pi";
560
561 print "ok $test\n";
562 $test++;
689440ec 563}