6 $ENV{PERL5LIB} = '../lib';
7 if ( ord("\t") != 9 ) { # skip on ebcdic platforms
8 print "1..0 # Skip utf8 tests on ebcdic platform.\n";
18 my ($got,$expect) = @_;
19 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
24 my ($got,$expect) = @_;
25 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
31 my ($got,$expect) = @_;
32 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
38 my ($got,$expect) = @_;
39 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
46 s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg;
51 my $rx = "\x{80}-\x{10ffff}";
52 s/([$rx])/"&#".ord($1).";"/eg;
57 my $rx = "\\x{80}-\\x{10ffff}";
58 s/([$rx])/"&#".ord($1).";"/eg;
67 $_ = "alphaNUMERICstring";
72 $_ = "alphaNUMERICstring";
77 $_ = "alphaNUMERICstring";
87 $_ = "alphaNUMERICstring";
92 $_ = "alpha123numeric456";
97 $_ = "alpha123numeric456";
102 $_ = ",123alpha,456numeric";
110 $_ = "\x{263A}>\x{263A}\x{263A}";
115 ok length((m/>(.)/)[0]), 1;
130 ok length($tmp=$&), 2;
133 ok length($tmp=$'), 1;
136 ok length($tmp=$`), 1;
139 ok length($tmp=$1), 1;
146 ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
150 ok $tmp, pack("C*", 0342, 0230, 0272);
154 ok $tmp, pack("C*", 0342, 0230, 0272);
158 ok $tmp, pack("C*", 0342, 0230, 0272);
162 ok_bytes $&, pack("C*", ord(">"), 0342, 0230, 0272);
165 ok_bytes $', pack("C*", 0342, 0230, 0272);
168 ok_bytes $`, pack("C*", 0342, 0230, 0272);
171 ok_bytes $1, pack("C*", 0342, 0230, 0272);
181 ok length((m/>(.)/)[0]), 1;
196 ok $&, pack("C*", ord(">"), 0342);
199 ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
202 ok $`, pack("C*", 0342, 0230, 0272);
205 ok $1, pack("C*", 0342);
213 $_="\342\230\272>\342\230\272\342\230\272";
219 ok length((m/>(.)/)[0]), 1;
234 ok length($tmp=$&), 2;
237 ok length($tmp=$'), 1;
240 ok length($tmp=$`), 1;
243 ok length($tmp=$1), 1;
250 ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
254 ok $tmp, pack("C*", 0342, 0230, 0272);
258 ok $tmp, pack("C*", 0342, 0230, 0272);
262 ok $tmp, pack("C*", 0342, 0230, 0272);
272 ok length((m/>(.)/)[0]), 1;
287 ok $&, pack("C*", ord(">"), 0342);
290 ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
293 ok $`, pack("C*", 0342, 0230, 0272);
296 ok $1, pack("C*", 0342);
301 ok "\x{ab}" =~ /^\x{ab}$/, 1;
307 ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2);
313 my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
314 ok "@a", "1234 123 2345";
321 my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
322 ok "@a", "1234 2345";
327 # bug id 20001009.001
330 { use bytes; $a = "\xc3\xa4"; }
331 { use utf8; $b = "\xe4"; }
332 { use bytes; ok_bytes $a, $b; $test++; } # 69
333 { use utf8; nok $a, $b; $test++; } # 70
337 # bug id 20001008.001
339 my @x = ("stra\337e 138","stra\337e 138");
341 s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
342 my($latin) = /^(.+)(?:\s+\d)/;
343 print $latin eq "stra\337e" ? "ok $test\n" :
344 "#latin[$latin]\nnot ok $test\n";
346 $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
348 $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
353 # bug id 20000819.004
355 $_ = $dx = "\x{10f2}";
359 print "not " unless $_ eq "$dx$dx";
364 $_ = $dx = "\x{10f2}";
368 print "not " unless $_ eq "$dx$dx";
374 $_ = "\x{10f2}\x{10f2}";
378 print "not " unless $_ eq "$dx$dx";
385 # bug id 20000323.056
389 print "not " unless "\x{41}" eq +v65;
393 print "not " unless "\x41" eq +v65;
397 print "not " unless "\x{c8}" eq +v200;
401 print "not " unless "\xc8" eq +v200;
405 print "not " unless "\x{221b}" eq v8731;
411 # bug id 20000427.003
417 my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
419 my @charlist = split //, $sushi;
421 foreach my $ch (@charlist) {
422 $r = $r . " " . sprintf "U+%04X", ord($ch);
425 print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
431 # bug id 20000901.092
432 # test that undef left and right of utf8 results in a valid string
436 print "not " unless $a eq "\x{1ff}";
442 # bug id 20000426.003
446 my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
448 my ($a, $b, $c) = split(/\x40/, $s);
450 unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
454 my ($a, $b) = split(/\x{100}/, $s);
455 print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
459 my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
460 print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
464 my ($a, $b) = split(/\x40\x{80}/, $s);
465 print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
469 my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
470 print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";