/Compress/ modules are at version 2.021. Remove vestigal MAPs and comments.
[p5sagit/p5-mst-13.2.git] / ext / Unicode-Collate / t / hangul.t
CommitLineData
1d2654e1 1BEGIN {
2 unless ("A" eq pack('U', 0x41)) {
3 print "1..0 # Unicode::Collate " .
4 "cannot stringify a Unicode code point\n";
5 exit 0;
6 }
1d2654e1 7 if ($ENV{PERL_CORE}) {
3756e7ca 8 chdir('t') if -d 't';
9 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
1d2654e1 10 }
11}
12
13use Test;
91ae00cb 14BEGIN { plan tests => 72 };
1d2654e1 15
16use strict;
17use warnings;
18use Unicode::Collate;
19
1d2654e1 20#########################
21
91ae00cb 22ok(1);
1d2654e1 23
24# a standard collator (3.1.1)
25my $Collator = Unicode::Collate->new(
26 table => 'keys.txt',
27 normalization => undef,
28);
29
30
31# a collator for hangul sorting,
32# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html
e7f779c8 33# http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf
1d2654e1 34my $hangul = Unicode::Collate->new(
35 level => 3,
36 table => undef,
37 normalization => undef,
91ae00cb 38
1d2654e1 39 entry => <<'ENTRIES',
400061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A
410041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
42#1161 ; [.1800.0020.0002] # <comment> initial jungseong A
43#1163 ; [.1801.0020.0002] # <comment> initial jungseong YA
441100 ; [.1831.0020.0002] # choseong KIYEOK
451100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A
461100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA
471101 ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK
481101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A
491101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA
501102 ; [.1833.0020.0002] # choseong NIEUN
511102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A
521102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA
533042 ; [.1921.0020.000E] # HIRAGANA LETTER A
5411A8 ; [.FE10.0020.0002] # jongseong KIYEOK
5511A9 ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK
561161 ; [.FE20.0020.0002] # jungseong A <non-initial>
571163 ; [.FE21.0020.0002] # jungseong YA <non-initial>
58ENTRIES
59);
60
61ok(ref $hangul, "Unicode::Collate");
62
91ae00cb 63my $trailwt = Unicode::Collate->new(
64 level => 3,
65 table => undef,
66 normalization => undef,
67 hangul_terminator => 16,
68
69 entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong
700061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A
710041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
7211A8 ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK
7311A9 ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
741161 ; [.1831.0020.0002] # HANGUL JUNGSEONG A
751163 ; [.1832.0020.0002] # HANGUL JUNGSEONG YA
761100 ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK
771101 ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
781102 ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN
793042 ; [.1921.0020.000E] # HIRAGANA LETTER A
80ENTRIES
81);
82
1d2654e1 83#########################
84
85# L(simp)L(simp) vs L(comp): /GGA/
86ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
87ok($hangul ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
91ae00cb 88ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
1d2654e1 89
90# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/
91ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
92ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
91ae00cb 93ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
1d2654e1 94
95# T(simp)T(simp) vs T(comp): /AGG/
96ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
97ok($hangul ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
91ae00cb 98ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
1d2654e1 99
100# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/
101ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
102ok($hangul ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
91ae00cb 103ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
1d2654e1 104
105# LV vs LLV: /GA/ vs /GNA/
106ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
107ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
91ae00cb 108ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
1d2654e1 109
110# LVX vs LVV: /GAA/ vs /GA/.latinA
111ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
112ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
91ae00cb 113ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
1d2654e1 114
115# LVX vs LVV: /GAA/ vs /GA/.hiraganaA
116ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
117ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
91ae00cb 118ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
1d2654e1 119
120# LVX vs LVV: /GAA/ vs /GA/.hanja
121ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
122ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
91ae00cb 123ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
1d2654e1 124
125# LVL vs LVT: /GA/./G/ vs /GAG/
126ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
127ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
91ae00cb 128ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
1d2654e1 129
130# LVT vs LVX: /GAG/ vs /GA/.latinA
131ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
132ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
91ae00cb 133ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
1d2654e1 134
135# LVT vs LVX: /GAG/ vs /GA/.hiraganaA
136ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
137ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
91ae00cb 138ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
1d2654e1 139
140# LVT vs LVX: /GAG/ vs /GA/.hanja
141ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
142ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
91ae00cb 143ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
1d2654e1 144
145# LVT vs LVV: /GAG/ vs /GAA/
146ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
147ok($hangul ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
91ae00cb 148ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
1d2654e1 149
150# LVL vs LVV: /GA/./G/ vs /GAA/
151ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
152ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
91ae00cb 153ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
1d2654e1 154
155# LV vs Syl(LV): /GA/ vs /[GA]/
156ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
157ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}"));
91ae00cb 158ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}"));
1d2654e1 159
160# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
161ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
162ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
91ae00cb 163ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
1d2654e1 164
165# LVT vs Syl(LVT): /GAG/ vs /[GAG]/
166ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
167ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
91ae00cb 168ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
1d2654e1 169
170# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
171ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
172ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
91ae00cb 173ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
1d2654e1 174
175# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/
176ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
177ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
91ae00cb 178ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
1d2654e1 179
180# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/
181ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
182ok($hangul ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
91ae00cb 183ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
1d2654e1 184
185#########################
186
187# checks contraction in LVT:
188# weights of these contractions may be non-sense.
189
190my $hangcont = Unicode::Collate->new(
191 level => 3,
192 table => undef,
193 normalization => undef,
194 entry => <<'ENTRIES',
1951100 ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK
1961101 ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
1971161 ; [.188D.0020.0002] # HANGUL JUNGSEONG A
1981162 ; [.188E.0020.0002] # HANGUL JUNGSEONG AE
1991163 ; [.188F.0020.0002] # HANGUL JUNGSEONG YA
20011A8 ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK
20111A9 ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
2021161 11A9 ; [.0000.0000.0000] # A-GG <contraction>
2031100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39
204ENTRIES
205);
206
207# contracted into VT
208ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
209ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
210
211# not contracted into LVT but into VT
212ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
213ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
214
215# contracted into LVT
216ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
217ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
218
219# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
220ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
221ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
222
223# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/
224ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
225ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
226
2271;
228__END__