BEGIN {
chdir 't' if -d 't';
- unshift @INC, '../lib';
+ @INC = '../lib';
$ENV{PERL5LIB} = '../lib';
if ( ord("\t") != 9 ) { # skip on ebcdic platforms
print "1..0 # Skip utf8 tests on ebcdic platform.\n";
}
}
-print "1..66\n";
+print "1..99\n";
my $test = 1;
print "ok $test\n";
}
+sub nok {
+ my ($got,$expect) = @_;
+ print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
+ print "ok $test\n";
+}
+
sub ok_bytes {
use bytes;
my ($got,$expect) = @_;
print "ok $test\n";
}
+sub nok_bytes {
+ use bytes;
+ my ($got,$expect) = @_;
+ print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
+ print "ok $test\n";
+}
{
use utf8;
ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2);
$test++; # 66
}
+
+{
+ use utf8;
+ my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
+ ok "@a", "1234 123 2345";
+ $test++; # 67
+}
+
+{
+ use utf8;
+ my $x = chr(123);
+ my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
+ ok "@a", "1234 2345";
+ $test++; # 68
+}
+
+{
+ # bug id 20001009.001
+
+ my($a,$b);
+ { use bytes; $a = "\xc3\xa4"; }
+ { use utf8; $b = "\xe4"; }
+ { use bytes; ok_bytes $a, $b; $test++; } # 69
+ { use utf8; nok $a, $b; $test++; } # 70
+}
+
+{
+ # bug id 20001008.001
+
+ my @x = ("stra\337e 138","stra\337e 138");
+ for (@x) {
+ s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
+ my($latin) = /^(.+)(?:\s+\d)/;
+ print $latin eq "stra\337e" ? "ok $test\n" :
+ "#latin[$latin]\nnot ok $test\n";
+ $test++;
+ $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
+ use utf8;
+ $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
+ }
+}
+
+{
+ # bug id 20000819.004
+
+ $_ = $dx = "\x{10f2}";
+ s/($dx)/$dx$1/;
+ {
+ use bytes;
+ print "not " unless $_ eq "$dx$dx";
+ print "ok $test\n";
+ $test++;
+ }
+
+ $_ = $dx = "\x{10f2}";
+ s/($dx)/$1$dx/;
+ {
+ use bytes;
+ print "not " unless $_ eq "$dx$dx";
+ print "ok $test\n";
+ $test++;
+ }
+
+ $dx = "\x{10f2}";
+ $_ = "\x{10f2}\x{10f2}";
+ s/($dx)($dx)/$1$2/;
+ {
+ use bytes;
+ print "not " unless $_ eq "$dx$dx";
+ print "ok $test\n";
+ $test++;
+ }
+}
+
+{
+ # bug id 20000323.056
+
+ use utf8;
+
+ print "not " unless "\x{41}" eq +v65;
+ print "ok $test\n";
+ $test++;
+
+ print "not " unless "\x41" eq +v65;
+ print "ok $test\n";
+ $test++;
+
+ print "not " unless "\x{c8}" eq +v200;
+ print "ok $test\n";
+ $test++;
+
+ print "not " unless "\xc8" eq +v200;
+ print "ok $test\n";
+ $test++;
+
+ print "not " unless "\x{221b}" eq v8731;
+ print "ok $test\n";
+ $test++;
+}
+
+{
+ # bug id 20000427.003
+
+ use utf8;
+ use warnings;
+ use strict;
+
+ my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
+
+ my @charlist = split //, $sushi;
+ my $r = '';
+ foreach my $ch (@charlist) {
+ $r = $r . " " . sprintf "U+%04X", ord($ch);
+ }
+
+ print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
+ print "ok $test\n";
+ $test++;
+}
+
+{
+ # bug id 20000901.092
+ # test that undef left and right of utf8 results in a valid string
+
+ my $a;
+ $a .= "\x{1ff}";
+ print "not " unless $a eq "\x{1ff}";
+ print "ok $test\n";
+ $test++;
+}
+
+{
+ # bug id 20000426.003
+
+ use utf8;
+
+ my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
+
+ my ($a, $b, $c) = split(/\x40/, $s);
+ print "not "
+ unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
+ print "ok $test\n";
+ $test++;
+
+ my ($a, $b) = split(/\x{100}/, $s);
+ print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
+ print "ok $test\n";
+ $test++;
+
+ my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
+ print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
+ print "ok $test\n";
+ $test++;
+
+ my ($a, $b) = split(/\x40\x{80}/, $s);
+ print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
+ print "ok $test\n";
+ $test++;
+
+ my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
+ print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
+ print "ok $test\n";
+ $test++;
+}
+
+{
+ # bug id 20000730.004
+
+ use utf8;
+
+ my $smiley = "\x{263a}";
+
+ for my $s ("\x{263a}", # 1
+ $smiley, # 2
+
+ "" . $smiley, # 3
+ "" . "\x{263a}", # 4
+
+ $smiley . "", # 5
+ "\x{263a}" . "", # 6
+ ) {
+ my $length_chars = length($s);
+ my $length_bytes;
+ { use bytes; $length_bytes = length($s) }
+ my @regex_chars = $s =~ m/(.)/g;
+ my $regex_chars = @regex_chars;
+ my @split_chars = split //, $s;
+ my $split_chars = @split_chars;
+ print "not "
+ unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
+ "1/1/1/3";
+ print "ok $test\n";
+ $test++;
+ }
+
+ for my $s ("\x{263a}" . "\x{263a}", # 7
+ $smiley . $smiley, # 8
+
+ "\x{263a}\x{263a}", # 9
+ "$smiley$smiley", # 10
+
+ "\x{263a}" x 2, # 11
+ $smiley x 2, # 12
+ ) {
+ my $length_chars = length($s);
+ my $length_bytes;
+ { use bytes; $length_bytes = length($s) }
+ my @regex_chars = $s =~ m/(.)/g;
+ my $regex_chars = @regex_chars;
+ my @split_chars = split //, $s;
+ my $split_chars = @split_chars;
+ print "not "
+ unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
+ "2/2/2/6";
+ print "ok $test\n";
+ $test++;
+ }
+}