Hopefully no tests were lost in the shuffle.
(The beginning of pragma/utf8 was lost intentionally,
the tests were rather bogus and incomplete.)
p4raw-id: //depot/perl@9063
}
$| = 1;
-print "1..15\n";
+print "1..16\n";
use charnames ':full';
print "not "
unless to_bytes("\N{DESERET SMALL LETTER ENG}") eq $encoded_deseng;
print "ok 15\n";
+}
+{
+ # 20001114.001
+
+ if (ord("Ä") == 0xc4) { # Try to do this only on Latin-1.
+ use charnames ':full';
+ my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
+ print "not " unless $text eq "\xc4" && ord($text) == 0xc4;
+ print "ok 16\n";
+ } else {
+ print "ok 16 # Skip: not Latin-1\n";
+ }
}
require Config; import Config;
}
-print "1..159\n";
+print "1..160\n";
$format = "c2 x5 C C x s d i l a6";
# Need the expression in here to force ary[5] to be numeric. This avoids
sprintf "%vd", pack("C0U*",1,20,300,4000);
print "ok $test\n"; $test++;
+# 160
+print "not " unless join(" ", unpack("C*", chr(0x1e2))) eq "199 162";
+print "ok $test\n"; $test++;
# the format supported by op/regexp.t. If you want to add a test
# that does fit that format, add it to op/re_tests, not here.
-print "1..245\n";
+print "1..580\n";
BEGIN {
chdir 't' if -d 't';
print "ok $_ # Skip: not EBCDIC\n";
}
}
+
+print "not " unless "\x{ab}" =~ /\x{ab}/;
+print "ok 246\n";
+
+print "not " unless "\x{abcd}" =~ /\x{abcd}/;
+print "ok 247\n";
+
+{
+ # bug id 20001008.001
+
+ use utf8; # BUG - should not be needed, but is, otherwise core dump
+
+ my $test = 248;
+ my @x = ("stra\337e 138","stra\337e 138");
+ for (@x) {
+ s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
+ my($latin) = /^(.+)(?:\s+\d)/;
+ print $latin eq "stra\337e" ? "ok $test\n" : # 248,249
+ "#latin[$latin]\nnot ok $test\n";
+ $test++;
+ $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
+ use utf8;
+ $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
+ }
+}
+
+{
+ print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
+ print "ok 250\n";
+
+ print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
+ print "ok 251\n";
+
+ print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
+ print "ok 252\n";
+
+ print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
+ print "ok 253\n";
+
+ print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
+ print "ok 254\n";
+
+ print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
+ print "ok 255\n";
+
+ print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
+ print "ok 256\n";
+
+ print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
+ print "ok 257\n";
+}
+
+{
+ # the first half of 20001028.003
+
+ my $X = chr(1448);
+ my ($Y) = $X =~ /(.*)/;
+ print "not " unless $Y eq v1448 && length($Y) == 1;
+ print "ok 258\n";
+}
+
+{
+ # 20001108.001
+
+ my $X = "Szab\x{f3},Bal\x{e1}zs";
+ my $Y = $X;
+ $Y =~ s/(B)/$1/ for 0..3;
+ print "not " unless $Y eq $X && $X eq "Szab\x{f3},Bal\x{e1}zs";
+ print "ok 259\n";
+}
+
+{
+ # the second half of 20001028.003
+
+ $X =~ s/^/chr(1488)/e;
+ print "not " unless length $X == 1 && ord($X) == 1488;
+ print "ok 260\n";
+}
+
+{
+ # 20000517.001
+
+ my $x = "\x{100}A";
+
+ $x =~ s/A/B/;
+
+ print "not " unless $x eq "\x{100}B" && length($x) == 2;
+ print "ok 261\n";
+}
+
+{
+ # bug id 20001230.002
+
+ print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c';
+ print "ok 262\n";
+
+ print "not " unless "École" =~ /^\C\C(c)/;
+ print "ok 263\n";
+}
+
+{
+ my $test = 264; # till 575
+
+ use charnames ':full';
+
+ # This is far from complete testing, there are dozens of character
+ # classes in Unicode. The mixing of literals and \N{...} is
+ # intentional so that in non-Latin-1 places we test the native
+ # characters, not the Unicode code points.
+
+ my %s = (
+ "a" => 'Ll',
+ "\N{CYRILLIC SMALL LETTER A}" => 'Ll',
+ "A" => 'Lu',
+ "\N{GREEK CAPITAL LETTER ALPHA}" => 'Lu',
+ "\N{HIRAGANA LETTER SMALL A}" => 'Lo',
+ "\N{COMBINING GRAVE ACCENT}" => 'Mn',
+ "0" => 'Nd',
+ "\N{ARABIC-INDIC DIGIT ZERO}" => 'Nd',
+ "_" => 'N',
+ "!" => 'P',
+ " " => 'Zs',
+ "\0" => 'Cc',
+ );
+
+ for my $char (keys %s) {
+ my $class = $s{$char};
+ my $code = sprintf("%04x", ord($char));
+ printf "# 0x$code\n";
+ print "# IsAlpha\n";
+ if ($class =~ /^[LM]/) {
+ print "not " unless $char =~ /\p{IsAlpha}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsAlpha}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsAlpha}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsAlpha}/;
+ print "ok $test\n"; $test++;
+ }
+ print "# IsAlnum\n";
+ if ($class =~ /^[LMN]/ && $char ne "_") {
+ print "not " unless $char =~ /\p{IsAlnum}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsAlnum}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsAlnum}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsAlnum}/;
+ print "ok $test\n"; $test++;
+ }
+ print "# IsASCII\n";
+ if ($code <= 127) {
+ print "not " unless $char =~ /\p{IsASCII}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsASCII}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsASCII}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsASCII}/;
+ print "ok $test\n"; $test++;
+ }
+ print "# IsCntrl\n";
+ if ($class =~ /^C/) {
+ print "not " unless $char =~ /\p{IsCntrl}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsCntrl}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsCntrl}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsCntrl}/;
+ print "ok $test\n"; $test++;
+ }
+ print "# IsBlank\n";
+ if ($class =~ /^Z[lp]/ || $char eq " ") {
+ print "not " unless $char =~ /\p{IsBlank}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsBlank}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsBlank}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsBlank}/;
+ print "ok $test\n"; $test++;
+ }
+ print "# IsDigit\n";
+ if ($class =~ /^Nd$/) {
+ print "not " unless $char =~ /\p{IsDigit}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsDigit}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsDigit}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsDigit}/;
+ print "ok $test\n"; $test++;
+ }
+ print "# IsGraph\n";
+ if ($class =~ /^([LMNPS])|Co/) {
+ print "not " unless $char =~ /\p{IsGraph}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsGraph}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsGraph}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsGraph}/;
+ print "ok $test\n"; $test++;
+ }
+ print "# IsLower\n";
+ if ($class =~ /^Ll$/) {
+ print "not " unless $char =~ /\p{IsLower}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsLower}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsLower}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsLower}/;
+ print "ok $test\n"; $test++;
+ }
+ print "# IsPrint\n";
+ if ($class =~ /^([LMNPS])|Co|Zs/) {
+ print "not " unless $char =~ /\p{IsPrint}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsPrint}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsPrint}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsPrint}/;
+ print "ok $test\n"; $test++;
+ }
+ print "# IsPunct\n";
+ if ($class =~ /^P/ || $char eq "_") {
+ print "not " unless $char =~ /\p{IsPunct}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsPunct}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsPunct}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsPunct}/;
+ print "ok $test\n"; $test++;
+ }
+ print "# IsSpace\n";
+ if ($class =~ /^Z/ || ($code =~ /^(0009|000A|000B|000C|000D)$/)) {
+ print "not " unless $char =~ /\p{IsSpace}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsSpace}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsSpace}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsSpace}/;
+ print "ok $test\n"; $test++;
+ }
+ print "# IsUpper\n";
+ if ($class =~ /^L[ut]/) {
+ print "not " unless $char =~ /\p{IsUpper}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsUpper}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsUpper}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsUpper}/;
+ print "ok $test\n"; $test++;
+ }
+ print "# IsWord\n";
+ if ($class =~ /^[LMN]/ || $char eq "_") {
+ print "not " unless $char =~ /\p{IsWord}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsWord}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsWord}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsWord}/;
+ print "ok $test\n"; $test++;
+ }
+ }
+}
+
+{
+ $_ = "abc\x{100}\x{200}\x{300}\x{380}\x{400}defg";
+
+ if (/(.\x{300})./) {
+ print "ok 576\n";
+
+ print "not " unless $` eq "abc\x{100}" && length($`) == 4;
+ print "ok 577\n";
+
+ print "not " unless $& eq "\x{200}\x{300}\x{380}" && length($&) == 3;
+ print "ok 578\n";
+
+ print "not " unless $' eq "\x{400}defg" && length($') == 5;
+ print "ok 579\n";
+
+ print "not " unless $1 eq "\x{200}\x{300}" && length($1) == 2;
+ print "ok 580\n";
+ }
+}
#!./perl
-print "1..32\n";
+print "1..44\n";
$FS = ':';
if (join("$_",@ary) eq 'aa:b:cc') {print "ok 2\n";} else {print "not ok 2\n";}
$_ = "abc\n";
-@xyz = (@ary = split(//));
+my @xyz = (@ary = split(//));
if (join(".",@ary) eq "a.b.c.\n") {print "ok 3\n";} else {print "not ok 3\n";}
$_ = "a:b:c::::";
$ary[2] eq "\xFD\xFD" &&
$ary[2] eq "\x{FD}\xFD" &&
$ary[2] eq "\x{FD}\x{FD}";
-
print "ok 32\n";
+
+
+{
+ my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
+ print "not " unless "@a" eq "1234 123 2345";
+ print "ok 33\n";
+}
+
+{
+ my $x = chr(123);
+ my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
+ print "not " unless "@a" eq "1234 2345";
+ print "ok 34\n";
+}
+
+{
+ # bug id 20000427.003
+
+ use warnings;
+ use strict;
+
+ my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
+
+ my @charlist = split //, $sushi;
+ my $r = '';
+ foreach my $ch (@charlist) {
+ $r = $r . " " . sprintf "U+%04X", ord($ch);
+ }
+
+ print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
+ print "ok 35\n";
+}
+
+{
+ # bug id 20000426.003
+
+ my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
+
+ my ($a, $b, $c) = split(/\x40/, $s);
+ print "not "
+ unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
+ print "ok 36\n";
+
+ my ($a, $b) = split(/\x{100}/, $s);
+ print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
+ print "ok 37\n";
+
+ my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
+ print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
+ print "ok 38\n";
+
+ my ($a, $b) = split(/\x40\x{80}/, $s);
+ print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
+ print "ok 39\n";
+
+ my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
+ print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
+ print "ok 40\n";
+}
+
+{
+ # 20001205.014
+
+ my $a = "ABC\x{263A}";
+
+ my @b = split( //, $a );
+
+ print "not " unless @b == 4;
+ print "ok 41\n";
+
+ print "not " unless length($b[3]) == 1 && $b[3] eq "\x{263A}";
+ print "ok 42\n";
+
+ $a =~ s/^A/Z/;
+ print "not " unless length($a) == 4 && $a eq "ZBC\x{263A}";
+ print "ok 43\n";
+}
+
+{
+ my @a = split(/\xFE/, "\xFF\xFE\xFD");
+
+ print "not " unless @a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD";
+ print "ok 44\n";
+}
+
}
}
-print "1..109\n";
-
-my $test = 1;
-
-sub ok {
- my ($got,$expect) = @_;
- print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
- print "ok $test\n";
-}
-
-sub nok {
- my ($got,$expect) = @_;
- print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
- print "ok $test\n";
-}
-
-sub ok_bytes {
- use bytes;
- my ($got,$expect) = @_;
- print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
- print "ok $test\n";
-}
-
-sub nok_bytes {
- use bytes;
- my ($got,$expect) = @_;
- print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
- print "ok $test\n";
-}
-
-{
- use utf8;
-
- $_ = ">\x{263A}<";
- s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg;
- ok $_, '>☺<';
- $test++; # 1
-
- $_ = ">\x{263A}<";
- my $rx = "\x{80}-\x{10ffff}";
- s/([$rx])/"&#".ord($1).";"/eg;
- ok $_, '>☺<';
- $test++; # 2
-
- $_ = ">\x{263A}<";
- my $rx = "\\x{80}-\\x{10ffff}";
- s/([$rx])/"&#".ord($1).";"/eg;
- ok $_, '>☺<';
- $test++; # 3
-
- $_ = "alpha,numeric";
- m/([[:alpha:]]+)/;
- ok $1, 'alpha';
- $test++; # 4
-
- $_ = "alphaNUMERICstring";
- m/([[:^lower:]]+)/;
- ok $1, 'NUMERIC';
- $test++; # 5
-
- $_ = "alphaNUMERICstring";
- m/(\p{Ll}+)/;
- ok $1, 'alpha';
- $test++; # 6
-
- $_ = "alphaNUMERICstring";
- m/(\p{Lu}+)/;
- ok $1, 'NUMERIC';
- $test++; # 7
-
- $_ = "alpha,numeric";
- m/([\p{IsAlpha}]+)/;
- ok $1, 'alpha';
- $test++; # 8
-
- $_ = "alphaNUMERICstring";
- m/([^\p{IsLower}]+)/;
- ok $1, 'NUMERIC';
- $test++; # 9
-
- $_ = "alpha123numeric456";
- m/([\p{IsDigit}]+)/;
- ok $1, '123';
- $test++; # 10
-
- $_ = "alpha123numeric456";
- m/([^\p{IsDigit}]+)/;
- ok $1, 'alpha';
- $test++; # 11
-
- $_ = ",123alpha,456numeric";
- m/([\p{IsAlnum}]+)/;
- ok $1, '123alpha';
- $test++; # 12
-}
-
-{
- # no use utf8 needed
- $_ = "\x{263A}\x{263A}x\x{263A}y\x{263A}";
-
- ok length($_), 6; # 13
- $test++;
-
- ($a) = m/x(.)/;
-
- ok length($a), 1; # 14
- $test++;
-
- ok length($`), 2; # 15
- $test++;
- ok length($&), 2; # 16
- $test++;
- ok length($'), 2; # 17
- $test++;
-
- ok length($1), 1; # 18
- $test++;
-
- ok length($b=$`), 2; # 19
- $test++;
-
- ok length($b=$&), 2; # 20
- $test++;
-
- ok length($b=$'), 2; # 21
- $test++;
-
- ok length($b=$1), 1; # 22
- $test++;
-
- ok $a, "\x{263A}"; # 23
- $test++;
-
- ok $`, "\x{263A}\x{263A}"; # 24
- $test++;
-
- ok $&, "x\x{263A}"; # 25
- $test++;
-
- ok $', "y\x{263A}"; # 26
- $test++;
-
- ok $1, "\x{263A}"; # 27
- $test++;
-
- ok_bytes $a, "\342\230\272"; # 28
- $test++;
-
- ok_bytes $1, "\342\230\272"; # 29
- $test++;
-
- ok_bytes $&, "x\342\230\272"; # 30
- $test++;
-
- {
- use utf8; # required
- $_ = chr(0x263A) . chr(0x263A) . 'x' . chr(0x263A) . 'y' . chr(0x263A);
- }
-
- ok length($_), 6; # 31
- $test++;
-
- ($a) = m/x(.)/;
-
- ok length($a), 1; # 32
- $test++;
-
- ok length($`), 2; # 33
- $test++;
-
- ok length($&), 2; # 34
- $test++;
-
- ok length($'), 2; # 35
- $test++;
-
- ok length($1), 1; # 36
- $test++;
-
- ok length($b=$`), 2; # 37
- $test++;
-
- ok length($b=$&), 2; # 38
- $test++;
-
- ok length($b=$'), 2; # 39
- $test++;
-
- ok length($b=$1), 1; # 40
- $test++;
-
- ok $a, "\x{263A}"; # 41
- $test++;
-
- ok $`, "\x{263A}\x{263A}"; # 42
- $test++;
-
- ok $&, "x\x{263A}"; # 43
- $test++;
-
- ok $', "y\x{263A}"; # 44
- $test++;
-
- ok $1, "\x{263A}"; # 45
- $test++;
-
- ok_bytes $a, "\342\230\272"; # 46
- $test++;
-
- ok_bytes $1, "\342\230\272"; # 47
- $test++;
-
- ok_bytes $&, "x\342\230\272"; # 48
- $test++;
-
- $_ = "\342\230\272\342\230\272x\342\230\272y\342\230\272";
-
- ok length($_), 14; # 49
- $test++;
-
- ($a) = m/x(.)/;
-
- ok length($a), 1; # 50
- $test++;
-
- ok length($`), 6; # 51
- $test++;
-
- ok length($&), 2; # 52
- $test++;
-
- ok length($'), 6; # 53
- $test++;
-
- ok length($1), 1; # 54
- $test++;
-
- ok length($b=$`), 6; # 55
- $test++;
-
- ok length($b=$&), 2; # 56
- $test++;
-
- ok length($b=$'), 6; # 57
- $test++;
-
- ok length($b=$1), 1; # 58
- $test++;
-
- ok $a, "\342"; # 59
- $test++;
-
- ok $`, "\342\230\272\342\230\272"; # 60
- $test++;
-
- ok $&, "x\342"; # 61
- $test++;
-
- ok $', "\230\272y\342\230\272"; # 62
- $test++;
-
- ok $1, "\342"; # 63
- $test++;
-}
-
-{
- use utf8;
- ok "\x{ab}" =~ /^\x{ab}$/, 1;
- $test++; # 64
-}
-
-{
- use utf8;
- ok_bytes chr(0x1e2), pack("C*", 0xc7, 0xa2);
- $test++; # 65
-}
-
-{
- use utf8;
- my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
- ok "@a", "1234 123 2345";
- $test++; # 66
-}
-
-{
- use utf8;
- my $x = chr(123);
- my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
- ok "@a", "1234 2345";
- $test++; # 67
-}
+# NOTE!
+#
+# Think carefully before adding tests here. In general this should be
+# used only for about three categories of tests:
+#
+# (1) tests that absolutely require 'use utf8', and since that in general
+# shouldn't be needed as the utf8 is being obsoleted, this should
+# have rather few tests. If you want to test Unicode and regexes,
+# you probably want to go to op/regexp or op/pat; if you want to test
+# split, go to op/split; pack, op/pack; appending or joining,
+# op/append or op/join, and so forth
+#
+# (2) tests that have to do with Unicode tokenizing (though it's likely
+# that all the other Unicode tests sprinkled around the t/**/*.t are
+# going to catch that)
+#
+# (3) complicated tests that simultaneously stress so many Unicode features
+# that deciding into which other test script the tests should go to
+# is hard -- maybe consider breaking up the complicated test
+#
+#
+
+use Test;
+plan tests => 15;
{
# bug id 20001009.001
my ($a, $b);
{ use bytes; $a = "\xc3\xa4" }
- { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8
+ { use utf8; $b = "\xe4" }
- print "not " if $a eq $b;
- print "ok $test\n"; $test++; # 68
-
- { use utf8; print "not " if $a eq $b; }
- print "ok $test\n"; $test++; # 69
-}
+ my $test = 68;
-{
- # bug id 20001008.001
-
- my @x = ("stra\337e 138","stra\337e 138");
- for (@x) {
- s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
- my($latin) = /^(.+)(?:\s+\d)/;
- print $latin eq "stra\337e" ? "ok $test\n" : # 70, 71
- "#latin[$latin]\nnot ok $test\n";
- $test++;
- $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
- use utf8;
- $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
- }
-}
-
-{
- # bug id 20000427.003
-
- use utf8;
- use warnings;
- use strict;
-
- my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
-
- my @charlist = split //, $sushi;
- my $r = '';
- foreach my $ch (@charlist) {
- $r = $r . " " . sprintf "U+%04X", ord($ch);
- }
+ ok($a ne $b);
- print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
- print "ok $test\n"; # 72
- $test++;
+ { use utf8; ok($a ne $b) }
}
-{
- # bug id 20000426.003
-
- use utf8;
-
- my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
-
- my ($a, $b, $c) = split(/\x40/, $s);
- print "not "
- unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
- print "ok $test\n";
- $test++; # 73
-
- my ($a, $b) = split(/\x{100}/, $s);
- print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
- print "ok $test\n";
- $test++; # 74
-
- my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
- print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
- print "ok $test\n";
- $test++; # 75
-
- my ($a, $b) = split(/\x40\x{80}/, $s);
- print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
- print "ok $test\n";
- $test++; # 76
-
- my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
- print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
- print "ok $test\n";
- $test++; # 77
-}
{
# bug id 20000730.004
- use utf8;
-
my $smiley = "\x{263a}";
- for my $s ("\x{263a}", # 78
- $smiley, # 79
+ for my $s ("\x{263a}",
+ $smiley,
- "" . $smiley, # 80
- "" . "\x{263a}", # 81
+ "" . $smiley,
+ "" . "\x{263a}",
- $smiley . "", # 82
- "\x{263a}" . "", # 83
+ $smiley . "",
+ "\x{263a}" . "",
) {
my $length_chars = length($s);
my $length_bytes;
my $regex_chars = @regex_chars;
my @split_chars = split //, $s;
my $split_chars = @split_chars;
- print "not "
- unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
- "1/1/1/3";
- print "ok $test\n";
- $test++;
+ ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
+ "1/1/1/3");
}
- for my $s ("\x{263a}" . "\x{263a}", # 84
- $smiley . $smiley, # 85
+ for my $s ("\x{263a}" . "\x{263a}",
+ $smiley . $smiley,
- "\x{263a}\x{263a}", # 86
- "$smiley$smiley", # 87
+ "\x{263a}\x{263a}",
+ "$smiley$smiley",
- "\x{263a}" x 2, # 88
- $smiley x 2, # 89
+ "\x{263a}" x 2,
+ $smiley x 2,
) {
my $length_chars = length($s);
my $length_bytes;
my $regex_chars = @regex_chars;
my @split_chars = split //, $s;
my $split_chars = @split_chars;
- print "not "
- unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
- "2/2/2/6";
- print "ok $test\n";
- $test++;
+ ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
+ "2/2/2/6");
}
}
-{
- use utf8;
-
- print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
- print "ok $test\n";
- $test++; # 90
-
- print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
- print "ok $test\n";
- $test++; # 91
-
- print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
- print "ok $test\n";
- $test++; # 92
-
- print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
- print "ok $test\n";
- $test++; # 93
-
- print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
- print "ok $test\n";
- $test++; # 94
-
- print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
- print "ok $test\n";
- $test++; # 95
-
- print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
- print "ok $test\n";
- $test++; # 96
-
- print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
- print "ok $test\n";
- $test++; # 97
-}
{
- # the first half of 20001028.003
-
- my $X = chr(1448);
- my ($Y) = $X =~ /(.*)/;
- print "not " unless $Y eq v1448 && length($Y) == 1;
- print "ok $test\n";
- $test++; # 98
-}
-
-{
- # 20001108.001
-
- use utf8;
- my $X = "Szab\x{f3},Bal\x{e1}zs";
- my $Y = $X;
- $Y =~ s/(B)/$1/ for 0..3;
- print "not " unless $Y eq $X && $X eq "Szab\x{f3},Bal\x{e1}zs";
- print "ok $test\n";
- $test++; # 99
-}
-
-{
- # 20001114.001
-
- use utf8;
- use charnames ':full';
- my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
- print "not " unless $text eq "\xc4" && ord($text) == 0xc4;
- print "ok $test\n";
- $test++; # 100
-}
-
-{
- # 20001205.014
-
- use utf8;
-
- my $a = "ABC\x{263A}";
-
- my @b = split( //, $a );
-
- print "not " unless @b == 4;
- print "ok $test\n";
- $test++; # 101
-
- print "not " unless length($b[3]) == 1 && $b[3] eq "\x{263A}";
- print "ok $test\n";
- $test++; # 102
-
- $a =~ s/^A/Z/;
- print "not " unless length($a) == 4 && $a eq "ZBC\x{263A}";
- print "ok $test\n";
- $test++; # 103
-}
-
-{
- # the second half of 20001028.003
-
- use utf8;
- $X =~ s/^/chr(1488)/e;
- print "not " unless length $X == 1 && ord($X) == 1488;
- print "ok $test\n";
- $test++; # 104
-}
-
-{
- # 20000517.001
-
- my $x = "\x{100}A";
-
- $x =~ s/A/B/;
-
- print "not " unless $x eq "\x{100}B" && length($x) == 2;
- print "ok $test\n";
- $test++; # 105
-}
-
-{
- use utf8;
-
- my @a = split(/\xFE/, "\xFF\xFE\xFD");
-
- print "not " unless @a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD";
- print "ok $test\n";
- $test++; # 106
-}
-
-{
- use utf8;
-
my $w = 0;
local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ };
my $x = eval q/"\\/ . "\x{100}" . q/"/;;
- print "not " unless $w == 0 && $x eq "\x{100}";
- print "ok $test\n";
- $test++; # 107
+ ok($w == 0 && $x eq "\x{100}");
}
-{
- # bug id 20001230.002
-
- use utf8;
-
- print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c';
- print "ok $test\n";
- $test++; # 108
-
- print "not " unless "École" =~ /^\C\C(c)/;
- print "ok $test\n";
- $test++; # 109
-}