6 $ENV{PERL5LIB} = '../lib';
7 if ( ord("\t") != 9 ) { # skip on ebcdic platforms
8 print "1..0 # Skip utf8 tests on ebcdic platform.\n";
18 my ($got,$expect) = @_;
19 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
24 my ($got,$expect) = @_;
25 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
31 my ($got,$expect) = @_;
32 print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
38 my ($got,$expect) = @_;
39 print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
46 s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg;
51 my $rx = "\x{80}-\x{10ffff}";
52 s/([$rx])/"&#".ord($1).";"/eg;
57 my $rx = "\\x{80}-\\x{10ffff}";
58 s/([$rx])/"&#".ord($1).";"/eg;
67 $_ = "alphaNUMERICstring";
72 $_ = "alphaNUMERICstring";
77 $_ = "alphaNUMERICstring";
87 $_ = "alphaNUMERICstring";
92 $_ = "alpha123numeric456";
97 $_ = "alpha123numeric456";
102 $_ = ",123alpha,456numeric";
111 $_ = "\x{263A}>\x{263A}\x{263A}";
116 ok length((m/>(.)/)[0]), 1;
131 ok length($tmp=$&), 2;
134 ok length($tmp=$'), 1;
137 ok length($tmp=$`), 1;
140 ok length($tmp=$1), 1;
147 ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
151 ok $tmp, pack("C*", 0342, 0230, 0272);
155 ok $tmp, pack("C*", 0342, 0230, 0272);
159 ok $tmp, pack("C*", 0342, 0230, 0272);
163 ok_bytes $&, pack("C*", ord(">"), 0342, 0230, 0272);
166 ok_bytes $', pack("C*", 0342, 0230, 0272);
169 ok_bytes $`, pack("C*", 0342, 0230, 0272);
172 ok_bytes $1, pack("C*", 0342, 0230, 0272);
182 ok length((m/>(.)/)[0]), 1;
197 ok $&, pack("C*", ord(">"), 0342);
200 ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
203 ok $`, pack("C*", 0342, 0230, 0272);
206 ok $1, pack("C*", 0342);
212 $_="\342\230\272>\342\230\272\342\230\272";
218 ok length((m/>(.)/)[0]), 1;
233 ok length($tmp=$&), 2;
236 ok length($tmp=$'), 1;
239 ok length($tmp=$`), 1;
242 ok length($tmp=$1), 1;
249 ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
253 ok $tmp, pack("C*", 0342, 0230, 0272);
257 ok $tmp, pack("C*", 0342, 0230, 0272);
261 ok $tmp, pack("C*", 0342, 0230, 0272);
272 ok length((m/>(.)/)[0]), 1;
287 ok $&, pack("C*", ord(">"), 0342);
290 ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
293 ok $`, pack("C*", 0342, 0230, 0272);
296 ok $1, pack("C*", 0342);
300 ok "\x{ab}" =~ /^\x{ab}$/, 1;
306 ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2);
312 my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
313 ok "@a", "1234 123 2345";
320 my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
321 ok "@a", "1234 2345";
326 # bug id 20001009.001
330 { use bytes; $a = "\xc3\xa4" }
331 { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8
333 print "not " if $a eq $b;
334 print "ok $test\n"; $test++;
336 { use utf8; print "not " if $a eq $b; }
337 print "ok $test\n"; $test++;
341 # bug id 20001008.001
343 my @x = ("stra\337e 138","stra\337e 138");
345 s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
346 my($latin) = /^(.+)(?:\s+\d)/;
347 print $latin eq "stra\337e" ? "ok $test\n" :
348 "#latin[$latin]\nnot ok $test\n";
350 $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
352 $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
357 # bug id 20000427.003
363 my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
365 my @charlist = split //, $sushi;
367 foreach my $ch (@charlist) {
368 $r = $r . " " . sprintf "U+%04X", ord($ch);
371 print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
377 # bug id 20000426.003
381 my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
383 my ($a, $b, $c) = split(/\x40/, $s);
385 unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
389 my ($a, $b) = split(/\x{100}/, $s);
390 print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
394 my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
395 print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
399 my ($a, $b) = split(/\x40\x{80}/, $s);
400 print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
404 my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
405 print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
411 # bug id 20000730.004
415 my $smiley = "\x{263a}";
417 for my $s ("\x{263a}", # 1
426 my $length_chars = length($s);
428 { use bytes; $length_bytes = length($s) }
429 my @regex_chars = $s =~ m/(.)/g;
430 my $regex_chars = @regex_chars;
431 my @split_chars = split //, $s;
432 my $split_chars = @split_chars;
434 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
440 for my $s ("\x{263a}" . "\x{263a}", # 7
441 $smiley . $smiley, # 8
443 "\x{263a}\x{263a}", # 9
444 "$smiley$smiley", # 10
449 my $length_chars = length($s);
451 { use bytes; $length_bytes = length($s) }
452 my @regex_chars = $s =~ m/(.)/g;
453 my $regex_chars = @regex_chars;
454 my @split_chars = split //, $s;
455 my $split_chars = @split_chars;
457 unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq