Upgrade to Unicode::Normalize 0.21 and Unicode::Collate 0.24,
[p5sagit/p5-mst-13.2.git] / ext / Unicode / Normalize / t / func.t
CommitLineData
ac5ea531 1
4a2e806c 2BEGIN {
9f1f04a1 3 unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
4 print "1..0 # Unicode::Normalize " .
5 "cannot stringify a Unicode code point\n";
f027f502 6 exit 0;
4a2e806c 7 }
8}
9
6c941e0c 10BEGIN {
11 if ($ENV{PERL_CORE}) {
12 chdir('t') if -d 't';
9f1f04a1 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
6c941e0c 14 }
15}
16
ac5ea531 17#########################
18
19use Test;
20use strict;
21use warnings;
f027f502 22BEGIN { plan tests => 13 };
ac5ea531 23use Unicode::Normalize qw(:all);
24ok(1); # If we made it this far, we're ok.
25
9f1f04a1 26sub _pack_U { Unicode::Normalize::pack_U(@_) }
27sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
6c941e0c 28
ac5ea531 29#########################
30
31print getCombinClass( 0) == 0
32 && getCombinClass( 768) == 230
33 && getCombinClass(1809) == 36
f027f502 34 && ($] < 5.007003 || getCombinClass(0x1D167) == 1) # Unicode 3.1
ac5ea531 35 ? "ok" : "not ok", " 2\n";
36
37print ! defined getCanon( 0)
38 && ! defined getCanon(41)
6c941e0c 39 && getCanon(0x00C0) eq _pack_U(0x0041, 0x0300)
40 && getCanon(0x00EF) eq _pack_U(0x0069, 0x0308)
41 && getCanon(0x304C) eq _pack_U(0x304B, 0x3099)
42 && getCanon(0x1EA4) eq _pack_U(0x0041, 0x0302, 0x0301)
9f1f04a1 43 && getCanon(0x1F82) eq _pack_U(0x03B1, 0x0313, 0x0300, 0x0345)
6c941e0c 44 && getCanon(0x1FAF) eq _pack_U(0x03A9, 0x0314, 0x0342, 0x0345)
45 && getCanon(0xAC00) eq _pack_U(0x1100, 0x1161)
46 && getCanon(0xAE00) eq _pack_U(0x1100, 0x1173, 0x11AF)
ac5ea531 47 && ! defined getCanon(0x212C)
48 && ! defined getCanon(0x3243)
6c941e0c 49 && getCanon(0xFA2D) eq _pack_U(0x9DB4)
ac5ea531 50 ? "ok" : "not ok", " 3\n";
51
52print ! defined getCompat( 0)
53 && ! defined getCompat(41)
6c941e0c 54 && getCompat(0x00C0) eq _pack_U(0x0041, 0x0300)
55 && getCompat(0x00EF) eq _pack_U(0x0069, 0x0308)
56 && getCompat(0x304C) eq _pack_U(0x304B, 0x3099)
57 && getCompat(0x1EA4) eq _pack_U(0x0041, 0x0302, 0x0301)
58 && getCompat(0x1F82) eq _pack_U(0x03B1, 0x0313, 0x0300, 0x0345)
59 && getCompat(0x1FAF) eq _pack_U(0x03A9, 0x0314, 0x0342, 0x0345)
60 && getCompat(0x212C) eq _pack_U(0x0042)
61 && getCompat(0x3243) eq _pack_U(0x0028, 0x81F3, 0x0029)
62 && getCompat(0xAC00) eq _pack_U(0x1100, 0x1161)
63 && getCompat(0xAE00) eq _pack_U(0x1100, 0x1173, 0x11AF)
64 && getCompat(0xFA2D) eq _pack_U(0x9DB4)
ac5ea531 65 ? "ok" : "not ok", " 4\n";
66
2a204b45 67print ! defined getComposite( 0, 0)
68 && ! defined getComposite( 0, 41)
69 && ! defined getComposite(41, 0)
70 && ! defined getComposite(41, 41)
71 && ! defined getComposite(12, 0x0300)
72 && ! defined getComposite(0x0055, 0xFF00)
73 && 0x00C0 == getComposite(0x0041, 0x0300)
ac5ea531 74 && 0x00D9 == getComposite(0x0055, 0x0300)
75 && 0x1E14 == getComposite(0x0112, 0x0300)
76 && 0xAC00 == getComposite(0x1100, 0x1161)
77 && 0xADF8 == getComposite(0x1100, 0x1173)
ab8fe378 78 && ! defined getComposite(0x1100, 0x11AF)
79 && ! defined getComposite(0x1173, 0x11AF)
8f118dcd 80 && ! defined getComposite(0xAC00, 0x11A7)
81 && 0xAC01 == getComposite(0xAC00, 0x11A8)
ac5ea531 82 && 0xAE00 == getComposite(0xADF8, 0x11AF)
83 ? "ok" : "not ok", " 5\n";
84
2a204b45 85print ! isExclusion( 0)
86 && ! isExclusion(41)
8f118dcd 87 && isExclusion(2392) # DEVANAGARI LETTER QA
88 && isExclusion(3907) # TIBETAN LETTER GHA
89 && isExclusion(64334) # HEBREW LETTER PE WITH RAFE
ac5ea531 90 ? "ok" : "not ok", " 6\n";
2a204b45 91
8f118dcd 92print ! isSingleton( 0)
93 && isSingleton(0x212B) # ANGSTROM SIGN
94 ? "ok" : "not ok", " 7\n";
95
96print reorder("") eq ""
6c941e0c 97 && reorder(_pack_U(0x0041, 0x0300, 0x0315, 0x0313, 0x031b, 0x0061))
98 eq _pack_U(0x0041, 0x031b, 0x0300, 0x0313, 0x0315, 0x0061)
99 && reorder(_pack_U(0x00C1, 0x0300, 0x0315, 0x0313, 0x031b,
8f118dcd 100 0x0061, 0x309A, 0x3099))
6c941e0c 101 eq _pack_U(0x00C1, 0x031b, 0x0300, 0x0313, 0x0315,
8f118dcd 102 0x0061, 0x309A, 0x3099)
103 ? "ok" : "not ok", " 8\n";
104
105sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
106
107print answer(checkNFD("")) eq "YES"
108 && answer(checkNFC("")) eq "YES"
109 && answer(checkNFKD("")) eq "YES"
110 && answer(checkNFKC("")) eq "YES"
111 && answer(check("NFD", "")) eq "YES"
112 && answer(check("NFC", "")) eq "YES"
113 && answer(check("NFKD","")) eq "YES"
114 && answer(check("NFKC","")) eq "YES"
115# U+0000 to U+007F are prenormalized in all the normalization forms.
116 && answer(checkNFD("AZaz\t12!#`")) eq "YES"
117 && answer(checkNFC("AZaz\t12!#`")) eq "YES"
118 && answer(checkNFKD("AZaz\t12!#`")) eq "YES"
119 && answer(checkNFKC("AZaz\t12!#`")) eq "YES"
120 && answer(check("D", "AZaz\t12!#`")) eq "YES"
121 && answer(check("C", "AZaz\t12!#`")) eq "YES"
122 && answer(check("KD","AZaz\t12!#`")) eq "YES"
123 && answer(check("KC","AZaz\t12!#`")) eq "YES"
124 ? "ok" : "not ok", " 9\n";
125
126print 1
6c941e0c 127 && answer(checkNFD(NFD(_pack_U(0xC1, 0x1100, 0x1173, 0x11AF)))) eq "YES"
128 && answer(checkNFD(_pack_U(0x20, 0xC1, 0x1100, 0x1173, 0x11AF))) eq "NO"
129 && answer(checkNFC(_pack_U(0x20, 0xC1, 0x1173, 0x11AF))) eq "MAYBE"
130 && answer(checkNFC(_pack_U(0x20, 0xC1, 0xAE00, 0x1100))) eq "YES"
131 && answer(checkNFC(_pack_U(0x20, 0xC1, 0xAE00, 0x1100, 0x300))) eq "MAYBE"
132 && answer(checkNFC(_pack_U(0x20, 0xC1, 0xFF71, 0x2025))) eq "YES"
133 && answer(check("NFC", _pack_U(0x20, 0xC1, 0x212B, 0x300))) eq "NO"
134 && answer(checkNFKD(_pack_U(0x20, 0xC1, 0xFF71, 0x2025))) eq "NO"
135 && answer(checkNFKC(_pack_U(0x20, 0xC1, 0xAE00, 0x2025))) eq "NO"
8f118dcd 136 ? "ok" : "not ok", " 10\n";
f027f502 137
138"012ABC" =~ /(\d+)(\w+)/;
139print "012" eq NFC $1 && "ABC" eq NFC $2
140 ? "ok" : "not ok", " 11\n";
141
142print "012" eq normalize('C', $1) && "ABC" eq normalize('C', $2)
143 ? "ok" : "not ok", " 12\n";
144
145print "012" eq normalize('NFC', $1) && "ABC" eq normalize('NFC', $2)
146 ? "ok" : "not ok", " 13\n";
147 # s/^NF// in normalize() must not prevent using $1, $&, etc.
148