6 $ENV{PERL5LIB} = '../lib';
7 if ( ord("\t") != 9 ) { # skip on ebcdic platforms
8 print "1..0 # Skip utf8 tests on ebcdic platform.\n";
18 my ($got,$expect) = @_;
19 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
24 my ($got,$expect) = @_;
25 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
31 my ($got,$expect) = @_;
32 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
38 my ($got,$expect) = @_;
39 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
47 s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg;
52 my $rx = "\x{80}-\x{10ffff}";
53 s/([$rx])/"&#".ord($1).";"/eg;
58 my $rx = "\\x{80}-\\x{10ffff}";
59 s/([$rx])/"&#".ord($1).";"/eg;
68 $_ = "alphaNUMERICstring";
73 $_ = "alphaNUMERICstring";
78 $_ = "alphaNUMERICstring";
88 $_ = "alphaNUMERICstring";
93 $_ = "alpha123numeric456";
98 $_ = "alpha123numeric456";
103 $_ = ",123alpha,456numeric";
111 $_ = "\x{263A}\x{263A}x\x{263A}y\x{263A}";
113 ok length($_), 6; # 13
118 ok length($a), 1; # 14
121 ok length($`), 2; # 15
123 ok length($&), 2; # 16
125 ok length($'), 2; # 17
128 ok length($1), 1; # 18
131 ok length($b=$`), 2; # 19
134 ok length($b=$&), 2; # 20
137 ok length($b=$'), 2; # 21
140 ok length($b=$1), 1; # 22
143 ok $a, "\x{263A}"; # 23
146 ok $`, "\x{263A}\x{263A}"; # 24
149 ok $&, "x\x{263A}"; # 25
152 ok $', "y\x{263A}"; # 26
155 ok $1, "\x{263A}"; # 27
158 ok_bytes $a, "\342\230\272"; # 28
161 ok_bytes $1, "\342\230\272"; # 29
164 ok_bytes $&, "x\342\230\272"; # 30
169 $_ = chr(0x263A) . chr(0x263A) . 'x' . chr(0x263A) . 'y' . chr(0x263A);
172 ok length($_), 6; # 31
177 ok length($a), 1; # 32
180 ok length($`), 2; # 33
183 ok length($&), 2; # 34
186 ok length($'), 2; # 35
189 ok length($1), 1; # 36
192 ok length($b=$`), 2; # 37
195 ok length($b=$&), 2; # 38
198 ok length($b=$'), 2; # 39
201 ok length($b=$1), 1; # 40
204 ok $a, "\x{263A}"; # 41
207 ok $`, "\x{263A}\x{263A}"; # 42
210 ok $&, "x\x{263A}"; # 43
213 ok $', "y\x{263A}"; # 44
216 ok $1, "\x{263A}"; # 45
219 ok_bytes $a, "\342\230\272"; # 46
222 ok_bytes $1, "\342\230\272"; # 47
225 ok_bytes $&, "x\342\230\272"; # 48
228 $_ = "\342\230\272\342\230\272x\342\230\272y\342\230\272";
230 ok length($_), 14; # 49
235 ok length($a), 1; # 50
238 ok length($`), 6; # 51
241 ok length($&), 2; # 52
244 ok length($'), 6; # 53
247 ok length($1), 1; # 54
250 ok length($b=$`), 6; # 55
253 ok length($b=$&), 2; # 56
256 ok length($b=$'), 6; # 57
259 ok length($b=$1), 1; # 58
265 ok $`, "\342\230\272\342\230\272"; # 60
271 ok $', "\230\272y\342\230\272"; # 62
280 ok "\x{ab}" =~ /^\x{ab}$/, 1;
286 ok_bytes chr(0x1e2), pack("C*", 0xc7, 0xa2);
292 my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
293 ok "@a", "1234 123 2345";
300 my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
301 ok "@a", "1234 2345";
306 # bug id 20001009.001
310 { use bytes; $a = "\xc3\xa4" }
311 { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8
313 print "not " if $a eq $b;
314 print "ok $test\n"; $test++; # 68
316 { use utf8; print "not " if $a eq $b; }
317 print "ok $test\n"; $test++; # 69
321 # bug id 20001008.001
323 my @x = ("stra\337e 138","stra\337e 138");
325 s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
326 my($latin) = /^(.+)(?:\s+\d)/;
327 print $latin eq "stra\337e" ? "ok $test\n" : # 70, 71
328 "#latin[$latin]\nnot ok $test\n";
330 $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
332 $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
337 # bug id 20000427.003
343 my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
345 my @charlist = split //, $sushi;
347 foreach my $ch (@charlist) {
348 $r = $r . " " . sprintf "U+%04X", ord($ch);
351 print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
352 print "ok $test\n"; # 72
357 # bug id 20000426.003
361 my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
363 my ($a, $b, $c) = split(/\x40/, $s);
365 unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
369 my ($a, $b) = split(/\x{100}/, $s);
370 print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
374 my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
375 print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
379 my ($a, $b) = split(/\x40\x{80}/, $s);
380 print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
384 my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
385 print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
391 # bug id 20000730.004
395 my $smiley = "\x{263a}";
397 for my $s ("\x{263a}", # 78
401 "" . "\x{263a}", # 81
404 "\x{263a}" . "", # 83
406 my $length_chars = length($s);
408 { use bytes; $length_bytes = length($s) }
409 my @regex_chars = $s =~ m/(.)/g;
410 my $regex_chars = @regex_chars;
411 my @split_chars = split //, $s;
412 my $split_chars = @split_chars;
414 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
420 for my $s ("\x{263a}" . "\x{263a}", # 84
421 $smiley . $smiley, # 85
423 "\x{263a}\x{263a}", # 86
424 "$smiley$smiley", # 87
429 my $length_chars = length($s);
431 { use bytes; $length_bytes = length($s) }
432 my @regex_chars = $s =~ m/(.)/g;
433 my $regex_chars = @regex_chars;
434 my @split_chars = split //, $s;
435 my $split_chars = @split_chars;
437 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
447 print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
451 print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
455 print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
459 print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
463 print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
467 print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
471 print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
475 print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
481 # the first half of 20001028.003
484 my ($Y) = $X =~ /(.*)/;
485 print "not " unless $Y eq v1448 && length($Y) == 1;
494 my $X = "Szab\x{f3},Bal\x{e1}zs";
496 $Y =~ s/(B)/$1/ for 0..3;
497 print "not " unless $Y eq $X && $X eq "Szab\x{f3},Bal\x{e1}zs";
506 use charnames ':full';
507 my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
508 print "not " unless $text eq "\xc4" && ord($text) == 0xc4;
518 my $a = "ABC\x{263A}";
520 my @b = split( //, $a );
522 print "not " unless @b == 4;
526 print "not " unless length($b[3]) == 1 && $b[3] eq "\x{263A}";
531 print "not " unless length($a) == 4 && $a eq "ZBC\x{263A}";
537 # the second half of 20001028.003
540 $X =~ s/^/chr(1488)/e;
541 print "not " unless length $X == 1 && ord($X) == 1488;
553 print "not " unless $x eq "\x{100}B" && length($x) == 2;
561 my @a = split(/\xFE/, "\xFF\xFE\xFD");
563 print "not " unless @a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD";
572 local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ };
573 my $x = eval q/"\\/ . "\x{100}" . q/"/;;
575 print "not " unless $w == 0 && $x eq "\x{100}";