Commit | Line | Data |
1d2654e1 |
1 | BEGIN { |
2 | unless ("A" eq pack('U', 0x41)) { |
3 | print "1..0 # Unicode::Collate " . |
4 | "cannot stringify a Unicode code point\n"; |
5 | exit 0; |
6 | } |
1d2654e1 |
7 | if ($ENV{PERL_CORE}) { |
3756e7ca |
8 | chdir('t') if -d 't'; |
9 | @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); |
1d2654e1 |
10 | } |
11 | } |
12 | |
13 | use Test; |
91ae00cb |
14 | BEGIN { plan tests => 72 }; |
1d2654e1 |
15 | |
16 | use strict; |
17 | use warnings; |
18 | use Unicode::Collate; |
19 | |
1d2654e1 |
20 | ######################### |
21 | |
91ae00cb |
22 | ok(1); |
1d2654e1 |
23 | |
24 | # a standard collator (3.1.1) |
25 | my $Collator = Unicode::Collate->new( |
26 | table => 'keys.txt', |
27 | normalization => undef, |
28 | ); |
29 | |
30 | |
31 | # a collator for hangul sorting, |
32 | # cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html |
e7f779c8 |
33 | # http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf |
1d2654e1 |
34 | my $hangul = Unicode::Collate->new( |
35 | level => 3, |
36 | table => undef, |
37 | normalization => undef, |
91ae00cb |
38 | |
1d2654e1 |
39 | entry => <<'ENTRIES', |
40 | 0061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A |
41 | 0041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A |
42 | #1161 ; [.1800.0020.0002] # <comment> initial jungseong A |
43 | #1163 ; [.1801.0020.0002] # <comment> initial jungseong YA |
44 | 1100 ; [.1831.0020.0002] # choseong KIYEOK |
45 | 1100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A |
46 | 1100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA |
47 | 1101 ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK |
48 | 1101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A |
49 | 1101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA |
50 | 1102 ; [.1833.0020.0002] # choseong NIEUN |
51 | 1102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A |
52 | 1102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA |
53 | 3042 ; [.1921.0020.000E] # HIRAGANA LETTER A |
54 | 11A8 ; [.FE10.0020.0002] # jongseong KIYEOK |
55 | 11A9 ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK |
56 | 1161 ; [.FE20.0020.0002] # jungseong A <non-initial> |
57 | 1163 ; [.FE21.0020.0002] # jungseong YA <non-initial> |
58 | ENTRIES |
59 | ); |
60 | |
61 | ok(ref $hangul, "Unicode::Collate"); |
62 | |
91ae00cb |
63 | my $trailwt = Unicode::Collate->new( |
64 | level => 3, |
65 | table => undef, |
66 | normalization => undef, |
67 | hangul_terminator => 16, |
68 | |
69 | entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong |
70 | 0061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A |
71 | 0041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A |
72 | 11A8 ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK |
73 | 11A9 ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK |
74 | 1161 ; [.1831.0020.0002] # HANGUL JUNGSEONG A |
75 | 1163 ; [.1832.0020.0002] # HANGUL JUNGSEONG YA |
76 | 1100 ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK |
77 | 1101 ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK |
78 | 1102 ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN |
79 | 3042 ; [.1921.0020.000E] # HIRAGANA LETTER A |
80 | ENTRIES |
81 | ); |
82 | |
1d2654e1 |
83 | ######################### |
84 | |
85 | # L(simp)L(simp) vs L(comp): /GGA/ |
86 | ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); |
87 | ok($hangul ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); |
91ae00cb |
88 | ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); |
1d2654e1 |
89 | |
90 | # L(simp) vs L(simp)L(simp): /GA/ vs /GGA/ |
91 | ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); |
92 | ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); |
91ae00cb |
93 | ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); |
1d2654e1 |
94 | |
95 | # T(simp)T(simp) vs T(comp): /AGG/ |
96 | ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); |
97 | ok($hangul ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); |
91ae00cb |
98 | ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); |
1d2654e1 |
99 | |
100 | # T(simp) vs T(simp)T(simp): /AG/ vs /AGG/ |
101 | ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); |
102 | ok($hangul ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); |
91ae00cb |
103 | ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); |
1d2654e1 |
104 | |
105 | # LV vs LLV: /GA/ vs /GNA/ |
106 | ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); |
107 | ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); |
91ae00cb |
108 | ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); |
1d2654e1 |
109 | |
110 | # LVX vs LVV: /GAA/ vs /GA/.latinA |
111 | ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); |
112 | ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); |
91ae00cb |
113 | ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); |
1d2654e1 |
114 | |
115 | # LVX vs LVV: /GAA/ vs /GA/.hiraganaA |
116 | ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); |
117 | ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); |
91ae00cb |
118 | ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); |
1d2654e1 |
119 | |
120 | # LVX vs LVV: /GAA/ vs /GA/.hanja |
121 | ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); |
122 | ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); |
91ae00cb |
123 | ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); |
1d2654e1 |
124 | |
125 | # LVL vs LVT: /GA/./G/ vs /GAG/ |
126 | ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); |
127 | ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); |
91ae00cb |
128 | ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); |
1d2654e1 |
129 | |
130 | # LVT vs LVX: /GAG/ vs /GA/.latinA |
131 | ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); |
132 | ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); |
91ae00cb |
133 | ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); |
1d2654e1 |
134 | |
135 | # LVT vs LVX: /GAG/ vs /GA/.hiraganaA |
136 | ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); |
137 | ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); |
91ae00cb |
138 | ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); |
1d2654e1 |
139 | |
140 | # LVT vs LVX: /GAG/ vs /GA/.hanja |
141 | ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); |
142 | ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); |
91ae00cb |
143 | ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); |
1d2654e1 |
144 | |
145 | # LVT vs LVV: /GAG/ vs /GAA/ |
146 | ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); |
147 | ok($hangul ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); |
91ae00cb |
148 | ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); |
1d2654e1 |
149 | |
150 | # LVL vs LVV: /GA/./G/ vs /GAA/ |
151 | ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); |
152 | ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); |
91ae00cb |
153 | ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); |
1d2654e1 |
154 | |
155 | # LV vs Syl(LV): /GA/ vs /[GA]/ |
156 | ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); |
157 | ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); |
91ae00cb |
158 | ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}")); |
1d2654e1 |
159 | |
160 | # LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ |
161 | ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); |
162 | ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); |
91ae00cb |
163 | ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); |
1d2654e1 |
164 | |
165 | # LVT vs Syl(LVT): /GAG/ vs /[GAG]/ |
166 | ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); |
167 | ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); |
91ae00cb |
168 | ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); |
1d2654e1 |
169 | |
170 | # LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ |
171 | ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); |
172 | ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); |
91ae00cb |
173 | ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); |
1d2654e1 |
174 | |
175 | # LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/ |
176 | ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); |
177 | ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); |
91ae00cb |
178 | ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); |
1d2654e1 |
179 | |
180 | # LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/ |
181 | ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); |
182 | ok($hangul ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); |
91ae00cb |
183 | ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); |
1d2654e1 |
184 | |
185 | ######################### |
186 | |
187 | # checks contraction in LVT: |
188 | # weights of these contractions may be non-sense. |
189 | |
190 | my $hangcont = Unicode::Collate->new( |
191 | level => 3, |
192 | table => undef, |
193 | normalization => undef, |
194 | entry => <<'ENTRIES', |
195 | 1100 ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK |
196 | 1101 ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK |
197 | 1161 ; [.188D.0020.0002] # HANGUL JUNGSEONG A |
198 | 1162 ; [.188E.0020.0002] # HANGUL JUNGSEONG AE |
199 | 1163 ; [.188F.0020.0002] # HANGUL JUNGSEONG YA |
200 | 11A8 ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK |
201 | 11A9 ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK |
202 | 1161 11A9 ; [.0000.0000.0000] # A-GG <contraction> |
203 | 1100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39 |
204 | ENTRIES |
205 | ); |
206 | |
207 | # contracted into VT |
208 | ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); |
209 | ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); |
210 | |
211 | # not contracted into LVT but into VT |
212 | ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); |
213 | ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); |
214 | |
215 | # contracted into LVT |
216 | ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); |
217 | ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); |
218 | |
219 | # LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ |
220 | ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); |
221 | ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); |
222 | |
223 | # LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/ |
224 | ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); |
225 | ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); |
226 | |
227 | 1; |
228 | __END__ |