2 unless ("A" eq pack('U', 0x41)) {
3 print "1..0 # Unicode::Collate " .
4 "cannot stringify a Unicode code point\n";
9 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
14 BEGIN { plan tests => 58 };
20 #########################
24 # a standard collator (3.1.1)
25 my $Collator = Unicode::Collate->new(
28 normalization => undef,
31 326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA
32 326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA
33 3270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA
34 3271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA
35 3272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA
36 3273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA
37 3274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA
38 3275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A
39 3276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA
40 3277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA
41 3278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA
42 3279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA
43 327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA
44 327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA
48 my $hangul = Unicode::Collate->new(
51 normalization => undef,
52 hangul_terminator => 16,
55 326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA
56 326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA
57 3270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA
58 3271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA
59 3272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA
60 3273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA
61 3274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA
62 3275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A
63 3276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA
64 3277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA
65 3278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA
66 3279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA
67 327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA
68 327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA
72 ok(ref $hangul, "Unicode::Collate");
74 #########################
76 # LVX vs LVV: /GAA/ vs /GA/.latinA
77 ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
78 ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
80 # LVX vs LVV: /GAA/ vs /GA/.hiraganaA
81 ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
82 ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
84 # LVX vs LVV: /GAA/ vs /GA/.hanja
85 ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
86 ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
88 # LVL vs LVT: /GA/./G/ vs /GAG/
89 ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
90 ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
92 # LVT vs LVX: /GAG/ vs /GA/.latinA
93 ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
94 ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
96 # LVT vs LVX: /GAG/ vs /GA/.hiraganaA
97 ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
98 ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
100 # LVT vs LVX: /GAG/ vs /GA/.hanja
101 ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
102 ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
104 # LV vs Syl(LV): /GA/ vs /[GA]/
105 ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
106 ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}"));
108 # LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
109 ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
110 ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
112 # LVT vs Syl(LVT): /GAG/ vs /[GAG]/
113 ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
114 ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
116 # LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
117 ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
118 ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
120 # Syl(LVT) vs : /GAG/ vs /[GAG]/
121 ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
122 ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
124 #########################
126 my $hangcirc = Unicode::Collate->new(
129 normalization => undef,
130 hangul_terminator => 16,
132 entry => <<'ENTRIES',
133 326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E][.10.0.0.0] # c.h.s. GA
134 326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F][.10.0.0.0] # c.h.s. NA
135 3270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270][.10.0.0.0] # c.h.s. DA
136 3271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271][.10.0.0.0] # c.h.s. RA
137 3272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272][.10.0.0.0] # c.h.s. MA
138 3273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273][.10.0.0.0] # c.h.s. BA
139 3274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274][.10.0.0.0] # c.h.s. SA
140 3275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275][.10.0.0.0] # c.h.s. A
141 3276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276][.10.0.0.0] # c.h.s. JA
142 3277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277][.10.0.0.0] # c.h.s. CA
143 3278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278][.10.0.0.0] # c.h.s. KA
144 3279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279][.10.0.0.0] # c.h.s. TA
145 327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A][.10.0.0.0] # c.h.s. PA
146 327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B][.10.0.0.0] # c.h.s. HA
150 # LV vs Circled Syl(LV): /GA/ vs /(GA)/
151 ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}"));
152 ok($hangul ->gt("\x{1100}\x{1161}", "\x{326E}"));
153 ok($hangcirc->eq("\x{1100}\x{1161}", "\x{326E}"));
155 # LV vs Circled Syl(LV): followed by latin A
156 ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A"));
157 ok($hangul ->lt("\x{1100}\x{1161}A", "\x{326E}A"));
158 ok($hangcirc->eq("\x{1100}\x{1161}A", "\x{326E}A"));
160 # LV vs Circled Syl(LV): followed by hiragana A
161 ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
162 ok($hangul ->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
163 ok($hangcirc->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
165 # LVT vs LVX: /GAG/ vs /GA/.hanja
166 ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
167 ok($hangul ->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
168 ok($hangcirc->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
170 #########################
172 # checks contraction in LVT:
173 # weights of these contractions may be non-sense.
175 my $hangcont = Unicode::Collate->new(
178 normalization => undef,
179 hangul_terminator => 16,
181 entry => <<'ENTRIES',
182 1100 1161 ; [.1831.0020.0002.1100][.188D.0020.0002.1161] # KIYEOK+A
183 1161 11A8 ; [.188D.0020.0002.1161][.18CF.0020.0002.11A8] # A+KIYEOK
187 # cont<LV> vs Syl(LV): /<GA>/ vs /[GA]/
188 ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
189 ok($hangcont->eq("\x{1100}\x{1161}", "\x{AC00}"));
191 # cont<LV>.T vs Syl(LV).T: /<GA>G/ vs /[GA]G/
192 ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
193 ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
195 # cont<LV>.T vs Syl(LVT): /<GA>G/ vs /[GAG]/
196 ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
197 ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
199 # L.cont<VT> vs Syl(LV).T: /D<AG>/ vs /[DA]G/
200 ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}"));
201 ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}"));
203 # L.cont<VT> vs Syl(LVT): /D<AG>/ vs /[DAG]/
204 ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}"));
205 ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}"));
209 $Collator->change(hangul_terminator => 16);
211 ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
212 ok($Collator->gt("\x{1100}\x{1161}", "\x{326E}"));
213 ok($Collator->lt("\x{1100}\x{1161}A", "\x{326E}A"));
214 ok($Collator->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
215 ok($Collator->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
217 $Collator->change(hangul_terminator => 0);
219 ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
220 ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}"));
221 ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A"));
222 ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
223 ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));