Commit | Line | Data |
3756e7ca |
1 | BEGIN { |
2 | unless ("A" eq pack('U', 0x41)) { |
3 | print "1..0 # Unicode::Collate " . |
4 | "cannot stringify a Unicode code point\n"; |
5 | exit 0; |
6 | } |
7 | if ($ENV{PERL_CORE}) { |
8 | chdir('t') if -d 't'; |
9 | @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); |
10 | } |
11 | } |
12 | |
13 | use Test; |
14 | BEGIN { plan tests => 76 }; |
15 | |
16 | use strict; |
17 | use warnings; |
18 | use Unicode::Collate; |
19 | |
20 | ok(1); |
21 | |
22 | ##### 2..6 |
23 | |
24 | my $all_undef_8 = Unicode::Collate->new( |
25 | table => undef, |
26 | normalization => undef, |
27 | overrideCJK => undef, |
28 | overrideHangul => undef, |
29 | UCA_Version => 8, |
30 | ); |
31 | |
32 | # All in the Unicode code point order. |
33 | # No hangul decomposition. |
34 | |
35 | ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); |
36 | ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); |
37 | ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); |
38 | ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}")); |
39 | ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}")); |
40 | |
41 | |
42 | ##### 7..11 |
43 | |
44 | my $all_undef_9 = Unicode::Collate->new( |
45 | table => undef, |
46 | normalization => undef, |
47 | overrideCJK => undef, |
48 | overrideHangul => undef, |
49 | UCA_Version => 9, |
50 | ); |
51 | |
52 | # CJK Ideo. < CJK ext A/B < Others. |
53 | # No hangul decomposition. |
54 | |
55 | ok($all_undef_9->lt("\x{4E00}", "\x{3402}")); |
56 | ok($all_undef_9->lt("\x{3402}", "\x{20000}")); |
57 | ok($all_undef_9->lt("\x{20000}", "\x{AC00}")); |
58 | ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}")); |
59 | ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned |
60 | |
61 | ##### 12..16 |
62 | |
63 | my $ignoreHangul = Unicode::Collate->new( |
64 | table => undef, |
65 | normalization => undef, |
66 | overrideHangul => sub {()}, |
67 | entry => <<'ENTRIES', |
68 | AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL |
69 | ENTRIES |
70 | ); |
71 | |
72 | # All Hangul Syllables except U+AE00 are ignored. |
73 | |
74 | ok($ignoreHangul->eq("\x{AC00}", "")); |
75 | ok($ignoreHangul->lt("\x{AC00}", "\0")); |
76 | ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}")); |
77 | ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored. |
78 | ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned. |
79 | |
80 | |
81 | my $ignoreCJK = Unicode::Collate->new( |
82 | table => undef, |
83 | normalization => undef, |
84 | overrideCJK => sub {()}, |
85 | entry => <<'ENTRIES', |
86 | 5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter" |
87 | ENTRIES |
88 | ); |
89 | |
90 | # All CJK Unified Ideographs except U+5B57 are ignored. |
91 | |
92 | ##### 17..21 |
93 | ok($ignoreCJK->eq("\x{4E00}", "")); |
94 | ok($ignoreCJK->lt("\x{4E00}", "\0")); |
95 | ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK. |
96 | ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK. |
97 | ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned. |
98 | |
99 | ##### 22..29 |
100 | ok($ignoreCJK->eq("\x{3400}", "")); |
101 | ok($ignoreCJK->eq("\x{4DB5}", "")); |
102 | ok($ignoreCJK->eq("\x{9FA5}", "")); |
103 | ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0 |
104 | ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0 |
105 | ok($ignoreCJK->gt("\x{9FBC}", "Perl")); |
106 | ok($ignoreCJK->eq("\x{20000}", "")); |
107 | ok($ignoreCJK->eq("\x{2A6D6}", "")); |
108 | |
109 | ##### 30..37 |
110 | $ignoreCJK->change(UCA_Version => 9); |
111 | ok($ignoreCJK->eq("\x{3400}", "")); |
112 | ok($ignoreCJK->eq("\x{4DB5}", "")); |
113 | ok($ignoreCJK->eq("\x{9FA5}", "")); |
114 | ok($ignoreCJK->gt("\x{9FA6}", "Perl")); |
115 | ok($ignoreCJK->gt("\x{9FBB}", "Perl")); |
116 | ok($ignoreCJK->gt("\x{9FBC}", "Perl")); |
117 | ok($ignoreCJK->eq("\x{20000}", "")); |
118 | ok($ignoreCJK->eq("\x{2A6D6}", "")); |
119 | |
120 | ##### 38..45 |
121 | $ignoreCJK->change(UCA_Version => 8); |
122 | ok($ignoreCJK->eq("\x{3400}", "")); |
123 | ok($ignoreCJK->eq("\x{4DB5}", "")); |
124 | ok($ignoreCJK->eq("\x{9FA5}", "")); |
125 | ok($ignoreCJK->gt("\x{9FA6}", "Perl")); |
126 | ok($ignoreCJK->gt("\x{9FBB}", "Perl")); |
127 | ok($ignoreCJK->gt("\x{9FBC}", "Perl")); |
128 | ok($ignoreCJK->eq("\x{20000}", "")); |
129 | ok($ignoreCJK->eq("\x{2A6D6}", "")); |
130 | |
131 | ##### 46..53 |
132 | $ignoreCJK->change(UCA_Version => 14); |
133 | ok($ignoreCJK->eq("\x{3400}", "")); |
134 | ok($ignoreCJK->eq("\x{4DB5}", "")); |
135 | ok($ignoreCJK->eq("\x{9FA5}", "")); |
136 | ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0 |
137 | ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0 |
138 | ok($ignoreCJK->gt("\x{9FBC}", "Perl")); |
139 | ok($ignoreCJK->eq("\x{20000}", "")); |
140 | ok($ignoreCJK->eq("\x{2A6D6}", "")); |
141 | |
142 | ##### 54..76 |
143 | my $overCJK = Unicode::Collate->new( |
144 | table => undef, |
145 | normalization => undef, |
146 | entry => <<'ENTRIES', |
147 | 0061 ; [.0101.0020.0002.0061] # latin a |
148 | 0041 ; [.0101.0020.0008.0041] # LATIN A |
149 | 4E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03. |
150 | ENTRIES |
151 | overrideCJK => sub { |
152 | my $u = 0xFFFF - $_[0]; # reversed |
153 | [$u, 0x20, 0x2, $u]; |
154 | }, |
155 | ); |
156 | |
157 | ok($overCJK->lt("a", "A")); # diff. at level 3. |
158 | ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2. |
159 | ok($overCJK->lt("A\x{4E03}", "A\x{4E00}")); |
160 | ok($overCJK->lt("A\x{4E03}", "a\x{4E00}")); |
161 | ok($overCJK->lt("a\x{4E03}", "A\x{4E00}")); |
162 | |
163 | ok($overCJK->gt("a\x{3400}", "A\x{4DB5}")); |
164 | ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}")); |
165 | ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); |
166 | ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); |
167 | ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}")); |
168 | ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}")); |
169 | |
170 | $overCJK->change(UCA_Version => 9); |
171 | |
172 | ok($overCJK->gt("a\x{3400}", "A\x{4DB5}")); |
173 | ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}")); |
174 | ok($overCJK->lt("a\x{9FA5}", "A\x{9FA6}")); |
175 | ok($overCJK->lt("a\x{9FA6}", "A\x{9FBB}")); |
176 | ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}")); |
177 | ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}")); |
178 | |
179 | $overCJK->change(UCA_Version => 14); |
180 | |
181 | ok($overCJK->gt("a\x{3400}", "A\x{4DB5}")); |
182 | ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}")); |
183 | ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); |
184 | ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); |
185 | ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}")); |
186 | ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}")); |
187 | |