Commit | Line | Data |
91ae00cb |
1 | BEGIN { |
2 | unless ("A" eq pack('U', 0x41)) { |
3 | print "1..0 # Unicode::Collate " . |
4 | "cannot stringify a Unicode code point\n"; |
5 | exit 0; |
6 | } |
91ae00cb |
7 | if ($ENV{PERL_CORE}) { |
3756e7ca |
8 | chdir('t') if -d 't'; |
9 | @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); |
91ae00cb |
10 | } |
11 | } |
12 | |
13 | use Test; |
14 | BEGIN { plan tests => 58 }; |
15 | |
16 | use strict; |
17 | use warnings; |
18 | use Unicode::Collate; |
19 | |
20 | ######################### |
21 | |
22 | ok(1); |
23 | |
24 | # a standard collator (3.1.1) |
25 | my $Collator = Unicode::Collate->new( |
26 | level => 1, |
27 | table => 'keys.txt', |
28 | normalization => undef, |
29 | |
30 | entry => <<'ENTRIES', |
31 | 326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA |
32 | 326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA |
33 | 3270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA |
34 | 3271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA |
35 | 3272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA |
36 | 3273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA |
37 | 3274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA |
38 | 3275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A |
39 | 3276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA |
40 | 3277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA |
41 | 3278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA |
42 | 3279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA |
43 | 327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA |
44 | 327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA |
45 | ENTRIES |
46 | ); |
47 | |
48 | my $hangul = Unicode::Collate->new( |
49 | level => 1, |
50 | table => 'keys.txt', |
51 | normalization => undef, |
52 | hangul_terminator => 16, |
53 | |
54 | entry => <<'ENTRIES', |
55 | 326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA |
56 | 326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA |
57 | 3270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA |
58 | 3271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA |
59 | 3272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA |
60 | 3273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA |
61 | 3274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA |
62 | 3275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A |
63 | 3276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA |
64 | 3277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA |
65 | 3278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA |
66 | 3279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA |
67 | 327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA |
68 | 327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA |
69 | ENTRIES |
70 | ); |
71 | |
72 | ok(ref $hangul, "Unicode::Collate"); |
73 | |
74 | ######################### |
75 | |
76 | # LVX vs LVV: /GAA/ vs /GA/.latinA |
77 | ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); |
78 | ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); |
79 | |
80 | # LVX vs LVV: /GAA/ vs /GA/.hiraganaA |
81 | ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); |
82 | ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); |
83 | |
84 | # LVX vs LVV: /GAA/ vs /GA/.hanja |
85 | ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); |
86 | ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); |
87 | |
88 | # LVL vs LVT: /GA/./G/ vs /GAG/ |
89 | ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); |
90 | ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); |
91 | |
92 | # LVT vs LVX: /GAG/ vs /GA/.latinA |
93 | ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); |
94 | ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); |
95 | |
96 | # LVT vs LVX: /GAG/ vs /GA/.hiraganaA |
97 | ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); |
98 | ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); |
99 | |
100 | # LVT vs LVX: /GAG/ vs /GA/.hanja |
101 | ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); |
102 | ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); |
103 | |
104 | # LV vs Syl(LV): /GA/ vs /[GA]/ |
105 | ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); |
106 | ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); |
107 | |
108 | # LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ |
109 | ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); |
110 | ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); |
111 | |
112 | # LVT vs Syl(LVT): /GAG/ vs /[GAG]/ |
113 | ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); |
114 | ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); |
115 | |
116 | # LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ |
117 | ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); |
118 | ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); |
119 | |
120 | # Syl(LVT) vs : /GAG/ vs /[GAG]/ |
121 | ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); |
122 | ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); |
123 | |
124 | ######################### |
125 | |
126 | my $hangcirc = Unicode::Collate->new( |
127 | level => 1, |
128 | table => 'keys.txt', |
129 | normalization => undef, |
130 | hangul_terminator => 16, |
131 | |
132 | entry => <<'ENTRIES', |
133 | 326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E][.10.0.0.0] # c.h.s. GA |
134 | 326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F][.10.0.0.0] # c.h.s. NA |
135 | 3270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270][.10.0.0.0] # c.h.s. DA |
136 | 3271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271][.10.0.0.0] # c.h.s. RA |
137 | 3272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272][.10.0.0.0] # c.h.s. MA |
138 | 3273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273][.10.0.0.0] # c.h.s. BA |
139 | 3274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274][.10.0.0.0] # c.h.s. SA |
140 | 3275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275][.10.0.0.0] # c.h.s. A |
141 | 3276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276][.10.0.0.0] # c.h.s. JA |
142 | 3277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277][.10.0.0.0] # c.h.s. CA |
143 | 3278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278][.10.0.0.0] # c.h.s. KA |
144 | 3279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279][.10.0.0.0] # c.h.s. TA |
145 | 327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A][.10.0.0.0] # c.h.s. PA |
146 | 327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B][.10.0.0.0] # c.h.s. HA |
147 | ENTRIES |
148 | ); |
149 | |
150 | # LV vs Circled Syl(LV): /GA/ vs /(GA)/ |
151 | ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}")); |
152 | ok($hangul ->gt("\x{1100}\x{1161}", "\x{326E}")); |
153 | ok($hangcirc->eq("\x{1100}\x{1161}", "\x{326E}")); |
154 | |
155 | # LV vs Circled Syl(LV): followed by latin A |
156 | ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A")); |
157 | ok($hangul ->lt("\x{1100}\x{1161}A", "\x{326E}A")); |
158 | ok($hangcirc->eq("\x{1100}\x{1161}A", "\x{326E}A")); |
159 | |
160 | # LV vs Circled Syl(LV): followed by hiragana A |
161 | ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); |
162 | ok($hangul ->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); |
163 | ok($hangcirc->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); |
164 | |
165 | # LVT vs LVX: /GAG/ vs /GA/.hanja |
166 | ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); |
167 | ok($hangul ->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); |
168 | ok($hangcirc->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); |
169 | |
170 | ######################### |
171 | |
172 | # checks contraction in LVT: |
173 | # weights of these contractions may be non-sense. |
174 | |
175 | my $hangcont = Unicode::Collate->new( |
176 | level => 1, |
177 | table => 'keys.txt', |
178 | normalization => undef, |
179 | hangul_terminator => 16, |
180 | |
181 | entry => <<'ENTRIES', |
182 | 1100 1161 ; [.1831.0020.0002.1100][.188D.0020.0002.1161] # KIYEOK+A |
183 | 1161 11A8 ; [.188D.0020.0002.1161][.18CF.0020.0002.11A8] # A+KIYEOK |
184 | ENTRIES |
185 | ); |
186 | |
187 | # cont<LV> vs Syl(LV): /<GA>/ vs /[GA]/ |
188 | ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); |
189 | ok($hangcont->eq("\x{1100}\x{1161}", "\x{AC00}")); |
190 | |
191 | # cont<LV>.T vs Syl(LV).T: /<GA>G/ vs /[GA]G/ |
192 | ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); |
193 | ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); |
194 | |
195 | # cont<LV>.T vs Syl(LVT): /<GA>G/ vs /[GAG]/ |
196 | ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); |
197 | ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); |
198 | |
199 | # L.cont<VT> vs Syl(LV).T: /D<AG>/ vs /[DA]G/ |
200 | ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}")); |
201 | ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}")); |
202 | |
203 | # L.cont<VT> vs Syl(LVT): /D<AG>/ vs /[DAG]/ |
204 | ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}")); |
205 | ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}")); |
206 | |
207 | ##### |
208 | |
209 | $Collator->change(hangul_terminator => 16); |
210 | |
211 | ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); |
212 | ok($Collator->gt("\x{1100}\x{1161}", "\x{326E}")); |
213 | ok($Collator->lt("\x{1100}\x{1161}A", "\x{326E}A")); |
214 | ok($Collator->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); |
215 | ok($Collator->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); |
216 | |
217 | $Collator->change(hangul_terminator => 0); |
218 | |
219 | ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); |
220 | ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}")); |
221 | ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A")); |
222 | ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); |
223 | ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); |
224 | |
225 | 1; |
226 | __END__ |