6 $ENV{PERL5LIB} = '../lib';
7 if ( ord("\t") != 9 ) { # skip on ebcdic platforms
8 print "1..0 # Skip utf8 tests on ebcdic platform.\n";
18 my ($got,$expect) = @_;
19 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
24 my ($got,$expect) = @_;
25 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
31 my ($got,$expect) = @_;
32 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
38 my ($got,$expect) = @_;
39 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
46 s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg;
51 my $rx = "\x{80}-\x{10ffff}";
52 s/([$rx])/"&#".ord($1).";"/eg;
57 my $rx = "\\x{80}-\\x{10ffff}";
58 s/([$rx])/"&#".ord($1).";"/eg;
67 $_ = "alphaNUMERICstring";
72 $_ = "alphaNUMERICstring";
77 $_ = "alphaNUMERICstring";
87 $_ = "alphaNUMERICstring";
92 $_ = "alpha123numeric456";
97 $_ = "alpha123numeric456";
102 $_ = ",123alpha,456numeric";
110 $_ = "\x{263A}>\x{263A}\x{263A}";
115 ok length((m/>(.)/)[0]), 1;
130 ok length($tmp=$&), 2;
133 ok length($tmp=$'), 1;
136 ok length($tmp=$`), 1;
139 ok length($tmp=$1), 1;
146 ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
150 ok $tmp, pack("C*", 0342, 0230, 0272);
154 ok $tmp, pack("C*", 0342, 0230, 0272);
158 ok $tmp, pack("C*", 0342, 0230, 0272);
162 ok_bytes $&, pack("C*", ord(">"), 0342, 0230, 0272);
165 ok_bytes $', pack("C*", 0342, 0230, 0272);
168 ok_bytes $`, pack("C*", 0342, 0230, 0272);
171 ok_bytes $1, pack("C*", 0342, 0230, 0272);
181 ok length((m/>(.)/)[0]), 1;
196 ok $&, pack("C*", ord(">"), 0342);
199 ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
202 ok $`, pack("C*", 0342, 0230, 0272);
205 ok $1, pack("C*", 0342);
213 $_="\342\230\272>\342\230\272\342\230\272";
219 ok length((m/>(.)/)[0]), 1;
234 ok length($tmp=$&), 2;
237 ok length($tmp=$'), 1;
240 ok length($tmp=$`), 1;
243 ok length($tmp=$1), 1;
250 ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
254 ok $tmp, pack("C*", 0342, 0230, 0272);
258 ok $tmp, pack("C*", 0342, 0230, 0272);
262 ok $tmp, pack("C*", 0342, 0230, 0272);
272 ok length((m/>(.)/)[0]), 1;
287 ok $&, pack("C*", ord(">"), 0342);
290 ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
293 ok $`, pack("C*", 0342, 0230, 0272);
296 ok $1, pack("C*", 0342);
301 ok "\x{ab}" =~ /^\x{ab}$/, 1;
307 ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2);
313 my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
314 ok "@a", "1234 123 2345";
321 my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
322 ok "@a", "1234 2345";
327 # bug id 20001009.001
331 { use bytes; $a = "\xc3\xa4" }
332 { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8
334 print "not " if $a eq $b;
335 print "ok $test\n"; $test++;
337 { use utf8; print "not " if $a eq $b; }
338 print "ok $test\n"; $test++;
342 # bug id 20001008.001
344 my @x = ("stra\337e 138","stra\337e 138");
346 s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
347 my($latin) = /^(.+)(?:\s+\d)/;
348 print $latin eq "stra\337e" ? "ok $test\n" :
349 "#latin[$latin]\nnot ok $test\n";
351 $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
353 $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
358 # bug id 20000819.004
360 $_ = $dx = "\x{10f2}";
364 print "not " unless $_ eq "$dx$dx";
369 $_ = $dx = "\x{10f2}";
373 print "not " unless $_ eq "$dx$dx";
379 $_ = "\x{10f2}\x{10f2}";
383 print "not " unless $_ eq "$dx$dx";
390 # bug id 20000323.056
394 print "not " unless "\x{41}" eq +v65;
398 print "not " unless "\x41" eq +v65;
402 print "not " unless "\x{c8}" eq +v200;
406 print "not " unless "\xc8" eq +v200;
410 print "not " unless "\x{221b}" eq v8731;
416 # bug id 20000427.003
422 my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
424 my @charlist = split //, $sushi;
426 foreach my $ch (@charlist) {
427 $r = $r . " " . sprintf "U+%04X", ord($ch);
430 print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
436 # bug id 20000901.092
437 # test that undef left and right of utf8 results in a valid string
441 print "not " unless $a eq "\x{1ff}";
447 # bug id 20000426.003
451 my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
453 my ($a, $b, $c) = split(/\x40/, $s);
455 unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
459 my ($a, $b) = split(/\x{100}/, $s);
460 print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
464 my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
465 print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
469 my ($a, $b) = split(/\x40\x{80}/, $s);
470 print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
474 my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
475 print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
481 # bug id 20000730.004
485 my $smiley = "\x{263a}";
487 for my $s ("\x{263a}", # 1
496 my $length_chars = length($s);
498 { use bytes; $length_bytes = length($s) }
499 my @regex_chars = $s =~ m/(.)/g;
500 my $regex_chars = @regex_chars;
501 my @split_chars = split //, $s;
502 my $split_chars = @split_chars;
504 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
510 for my $s ("\x{263a}" . "\x{263a}", # 7
511 $smiley . $smiley, # 8
513 "\x{263a}\x{263a}", # 9
514 "$smiley$smiley", # 10
519 my $length_chars = length($s);
521 { use bytes; $length_bytes = length($s) }
522 my @regex_chars = $s =~ m/(.)/g;
523 my $regex_chars = @regex_chars;
524 my @split_chars = split //, $s;
525 my $split_chars = @split_chars;
527 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
537 "x" =~ /(.)/; # unset $2
539 # Without the fix this will croak:
540 # Modification of a read-only value attempted at ...
546 # For symmetry with the above.
553 # This bug existed earlier than the $2 bug, but is fixed with the same
554 # patch. Without the fix this will also croak:
555 # Modification of a read-only value attempted at ...
561 # For symmetry with the above.