Commit | Line | Data |
8b731da2 |
1 | BEGIN { |
2 | if (ord("A") == 193) { |
3 | print "1..0 # Skip: EBCDIC\n"; |
4 | exit 0; |
5 | } |
a778afa6 |
6 | chdir 't' if -d 't'; |
7 | @INC = '../lib'; |
e69a2255 |
8 | @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself |
8b731da2 |
9 | } |
10 | |
a778afa6 |
11 | use strict; |
12 | use Unicode::UCD; |
f5c9f3db |
13 | use Test::More; |
8b731da2 |
14 | |
c5a29f40 |
15 | BEGIN { plan tests => 179 }; |
561c79ed |
16 | |
55d7b906 |
17 | use Unicode::UCD 'charinfo'; |
561c79ed |
18 | |
b08cd201 |
19 | my $charinfo; |
20 | |
21 | $charinfo = charinfo(0x41); |
22 | |
f5c9f3db |
23 | is($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A'); |
24 | is($charinfo->{name}, 'LATIN CAPITAL LETTER A'); |
25 | is($charinfo->{category}, 'Lu'); |
26 | is($charinfo->{combining}, '0'); |
27 | is($charinfo->{bidi}, 'L'); |
28 | is($charinfo->{decomposition}, ''); |
29 | is($charinfo->{decimal}, ''); |
30 | is($charinfo->{digit}, ''); |
31 | is($charinfo->{numeric}, ''); |
32 | is($charinfo->{mirrored}, 'N'); |
33 | is($charinfo->{unicode10}, ''); |
34 | is($charinfo->{comment}, ''); |
35 | is($charinfo->{upper}, ''); |
36 | is($charinfo->{lower}, '0061'); |
37 | is($charinfo->{title}, ''); |
38 | is($charinfo->{block}, 'Basic Latin'); |
39 | is($charinfo->{script}, 'Latin'); |
b08cd201 |
40 | |
41 | $charinfo = charinfo(0x100); |
42 | |
f5c9f3db |
43 | is($charinfo->{code}, '0100', 'LATIN CAPITAL LETTER A WITH MACRON'); |
44 | is($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON'); |
45 | is($charinfo->{category}, 'Lu'); |
46 | is($charinfo->{combining}, '0'); |
47 | is($charinfo->{bidi}, 'L'); |
48 | is($charinfo->{decomposition}, '0041 0304'); |
49 | is($charinfo->{decimal}, ''); |
50 | is($charinfo->{digit}, ''); |
51 | is($charinfo->{numeric}, ''); |
52 | is($charinfo->{mirrored}, 'N'); |
53 | is($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON'); |
54 | is($charinfo->{comment}, ''); |
55 | is($charinfo->{upper}, ''); |
56 | is($charinfo->{lower}, '0101'); |
57 | is($charinfo->{title}, ''); |
58 | is($charinfo->{block}, 'Latin Extended-A'); |
59 | is($charinfo->{script}, 'Latin'); |
a196fbfd |
60 | |
61 | # 0x0590 is in the Hebrew block but unused. |
561c79ed |
62 | |
b08cd201 |
63 | $charinfo = charinfo(0x590); |
64 | |
f5c9f3db |
65 | is($charinfo->{code}, undef, '0x0590 - unused Hebrew'); |
66 | is($charinfo->{name}, undef); |
67 | is($charinfo->{category}, undef); |
68 | is($charinfo->{combining}, undef); |
69 | is($charinfo->{bidi}, undef); |
70 | is($charinfo->{decomposition}, undef); |
71 | is($charinfo->{decimal}, undef); |
72 | is($charinfo->{digit}, undef); |
73 | is($charinfo->{numeric}, undef); |
74 | is($charinfo->{mirrored}, undef); |
75 | is($charinfo->{unicode10}, undef); |
76 | is($charinfo->{comment}, undef); |
77 | is($charinfo->{upper}, undef); |
78 | is($charinfo->{lower}, undef); |
79 | is($charinfo->{title}, undef); |
80 | is($charinfo->{block}, undef); |
81 | is($charinfo->{script}, undef); |
a196fbfd |
82 | |
83 | # 0x05d0 is in the Hebrew block and used. |
561c79ed |
84 | |
b08cd201 |
85 | $charinfo = charinfo(0x5d0); |
86 | |
f5c9f3db |
87 | is($charinfo->{code}, '05D0', '05D0 - used Hebrew'); |
88 | is($charinfo->{name}, 'HEBREW LETTER ALEF'); |
89 | is($charinfo->{category}, 'Lo'); |
90 | is($charinfo->{combining}, '0'); |
91 | is($charinfo->{bidi}, 'R'); |
92 | is($charinfo->{decomposition}, ''); |
93 | is($charinfo->{decimal}, ''); |
94 | is($charinfo->{digit}, ''); |
95 | is($charinfo->{numeric}, ''); |
96 | is($charinfo->{mirrored}, 'N'); |
97 | is($charinfo->{unicode10}, ''); |
98 | is($charinfo->{comment}, ''); |
99 | is($charinfo->{upper}, ''); |
100 | is($charinfo->{lower}, ''); |
101 | is($charinfo->{title}, ''); |
102 | is($charinfo->{block}, 'Hebrew'); |
103 | is($charinfo->{script}, 'Hebrew'); |
561c79ed |
104 | |
74f8133e |
105 | # An open syllable in Hangul. |
a6fa416b |
106 | |
107 | $charinfo = charinfo(0xAC00); |
108 | |
f5c9f3db |
109 | is($charinfo->{code}, 'AC00', 'HANGUL SYLLABLE-AC00'); |
110 | is($charinfo->{name}, 'HANGUL SYLLABLE-AC00'); |
111 | is($charinfo->{category}, 'Lo'); |
112 | is($charinfo->{combining}, '0'); |
113 | is($charinfo->{bidi}, 'L'); |
114 | is($charinfo->{decomposition}, undef); |
115 | is($charinfo->{decimal}, ''); |
116 | is($charinfo->{digit}, ''); |
117 | is($charinfo->{numeric}, ''); |
118 | is($charinfo->{mirrored}, 'N'); |
119 | is($charinfo->{unicode10}, ''); |
120 | is($charinfo->{comment}, ''); |
121 | is($charinfo->{upper}, ''); |
122 | is($charinfo->{lower}, ''); |
123 | is($charinfo->{title}, ''); |
124 | is($charinfo->{block}, 'Hangul Syllables'); |
125 | is($charinfo->{script}, 'Hangul'); |
a6fa416b |
126 | |
74f8133e |
127 | # A closed syllable in Hangul. |
a6fa416b |
128 | |
129 | $charinfo = charinfo(0xAE00); |
130 | |
f5c9f3db |
131 | is($charinfo->{code}, 'AE00', 'HANGUL SYLLABLE-AE00'); |
132 | is($charinfo->{name}, 'HANGUL SYLLABLE-AE00'); |
133 | is($charinfo->{category}, 'Lo'); |
134 | is($charinfo->{combining}, '0'); |
135 | is($charinfo->{bidi}, 'L'); |
136 | is($charinfo->{decomposition}, undef); |
137 | is($charinfo->{decimal}, ''); |
138 | is($charinfo->{digit}, ''); |
139 | is($charinfo->{numeric}, ''); |
140 | is($charinfo->{mirrored}, 'N'); |
141 | is($charinfo->{unicode10}, ''); |
142 | is($charinfo->{comment}, ''); |
143 | is($charinfo->{upper}, ''); |
144 | is($charinfo->{lower}, ''); |
145 | is($charinfo->{title}, ''); |
146 | is($charinfo->{block}, 'Hangul Syllables'); |
147 | is($charinfo->{script}, 'Hangul'); |
a6fa416b |
148 | |
149 | $charinfo = charinfo(0x1D400); |
150 | |
f5c9f3db |
151 | is($charinfo->{code}, '1D400', 'MATHEMATICAL BOLD CAPITAL A'); |
152 | is($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A'); |
153 | is($charinfo->{category}, 'Lu'); |
154 | is($charinfo->{combining}, '0'); |
155 | is($charinfo->{bidi}, 'L'); |
156 | is($charinfo->{decomposition}, '<font> 0041'); |
157 | is($charinfo->{decimal}, ''); |
158 | is($charinfo->{digit}, ''); |
159 | is($charinfo->{numeric}, ''); |
160 | is($charinfo->{mirrored}, 'N'); |
161 | is($charinfo->{unicode10}, ''); |
162 | is($charinfo->{comment}, ''); |
163 | is($charinfo->{upper}, ''); |
164 | is($charinfo->{lower}, ''); |
165 | is($charinfo->{title}, ''); |
166 | is($charinfo->{block}, 'Mathematical Alphanumeric Symbols'); |
167 | is($charinfo->{script}, undef); |
a6fa416b |
168 | |
55d7b906 |
169 | use Unicode::UCD qw(charblock charscript); |
a196fbfd |
170 | |
171 | # 0x0590 is in the Hebrew block but unused. |
561c79ed |
172 | |
f5c9f3db |
173 | is(charblock(0x590), 'Hebrew', '0x0590 - Hebrew unused charblock'); |
174 | is(charscript(0x590), undef, '0x0590 - Hebrew unused charscript'); |
561c79ed |
175 | |
b08cd201 |
176 | $charinfo = charinfo(0xbe); |
177 | |
f5c9f3db |
178 | is($charinfo->{code}, '00BE', 'VULGAR FRACTION THREE QUARTERS'); |
179 | is($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS'); |
180 | is($charinfo->{category}, 'No'); |
181 | is($charinfo->{combining}, '0'); |
182 | is($charinfo->{bidi}, 'ON'); |
183 | is($charinfo->{decomposition}, '<fraction> 0033 2044 0034'); |
184 | is($charinfo->{decimal}, ''); |
185 | is($charinfo->{digit}, ''); |
186 | is($charinfo->{numeric}, '3/4'); |
187 | is($charinfo->{mirrored}, 'N'); |
188 | is($charinfo->{unicode10}, 'FRACTION THREE QUARTERS'); |
189 | is($charinfo->{comment}, ''); |
190 | is($charinfo->{upper}, ''); |
191 | is($charinfo->{lower}, ''); |
192 | is($charinfo->{title}, ''); |
193 | is($charinfo->{block}, 'Latin-1 Supplement'); |
194 | is($charinfo->{script}, undef); |
10a6ecd2 |
195 | |
55d7b906 |
196 | use Unicode::UCD qw(charblocks charscripts); |
10a6ecd2 |
197 | |
b08cd201 |
198 | my $charblocks = charblocks(); |
10a6ecd2 |
199 | |
f5c9f3db |
200 | ok(exists $charblocks->{Thai}, 'Thai charblock exists'); |
201 | is($charblocks->{Thai}->[0]->[0], hex('0e00')); |
202 | ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist'); |
10a6ecd2 |
203 | |
b08cd201 |
204 | my $charscripts = charscripts(); |
10a6ecd2 |
205 | |
f5c9f3db |
206 | ok(exists $charscripts->{Armenian}, 'Armenian charscript exists'); |
207 | is($charscripts->{Armenian}->[0]->[0], hex('0531')); |
208 | ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist'); |
10a6ecd2 |
209 | |
210 | my $charscript; |
211 | |
212 | $charscript = charscript("12ab"); |
f5c9f3db |
213 | is($charscript, 'Ethiopic', 'Ethiopic charscript'); |
10a6ecd2 |
214 | |
215 | $charscript = charscript("0x12ab"); |
f5c9f3db |
216 | is($charscript, 'Ethiopic'); |
10a6ecd2 |
217 | |
218 | $charscript = charscript("U+12ab"); |
f5c9f3db |
219 | is($charscript, 'Ethiopic'); |
10a6ecd2 |
220 | |
221 | my $ranges; |
222 | |
223 | $ranges = charscript('Ogham'); |
f5c9f3db |
224 | is($ranges->[0]->[0], hex('1681'), 'Ogham charscript'); |
225 | is($ranges->[0]->[1], hex('169a')); |
10a6ecd2 |
226 | |
55d7b906 |
227 | use Unicode::UCD qw(charinrange); |
10a6ecd2 |
228 | |
229 | $ranges = charscript('Cherokee'); |
f5c9f3db |
230 | ok(!charinrange($ranges, "139f"), 'Cherokee charscript'); |
10a6ecd2 |
231 | ok( charinrange($ranges, "13a0")); |
232 | ok( charinrange($ranges, "13f4")); |
233 | ok(!charinrange($ranges, "13f5")); |
234 | |
1911be83 |
235 | is(Unicode::UCD::UnicodeVersion, '4.0.0', 'UnicodeVersion'); |
b08cd201 |
236 | |
55d7b906 |
237 | use Unicode::UCD qw(compexcl); |
b08cd201 |
238 | |
f5c9f3db |
239 | ok(!compexcl(0x0100), 'compexcl'); |
b08cd201 |
240 | ok( compexcl(0x0958)); |
241 | |
55d7b906 |
242 | use Unicode::UCD qw(casefold); |
b08cd201 |
243 | |
244 | my $casefold; |
245 | |
246 | $casefold = casefold(0x41); |
247 | |
248 | ok($casefold->{code} eq '0041' && |
249 | $casefold->{status} eq 'C' && |
f5c9f3db |
250 | $casefold->{mapping} eq '0061', 'casefold 0x41'); |
b08cd201 |
251 | |
252 | $casefold = casefold(0xdf); |
253 | |
254 | ok($casefold->{code} eq '00DF' && |
255 | $casefold->{status} eq 'F' && |
f5c9f3db |
256 | $casefold->{mapping} eq '0073 0073', 'casefold 0xDF'); |
b08cd201 |
257 | |
258 | ok(!casefold(0x20)); |
259 | |
55d7b906 |
260 | use Unicode::UCD qw(casespec); |
b08cd201 |
261 | |
262 | my $casespec; |
263 | |
264 | ok(!casespec(0x41)); |
265 | |
266 | $casespec = casespec(0xdf); |
267 | |
268 | ok($casespec->{code} eq '00DF' && |
269 | $casespec->{lower} eq '00DF' && |
270 | $casespec->{title} eq '0053 0073' && |
271 | $casespec->{upper} eq '0053 0053' && |
f5c9f3db |
272 | $casespec->{condition} eq undef, 'casespec 0xDF'); |
b08cd201 |
273 | |
274 | $casespec = casespec(0x307); |
275 | |
f499c386 |
276 | ok($casespec->{az}->{code} eq '0307' && |
277 | $casespec->{az}->{lower} eq '' && |
278 | $casespec->{az}->{title} eq '0307' && |
279 | $casespec->{az}->{upper} eq '0307' && |
9c3dc587 |
280 | $casespec->{az}->{condition} eq 'az After_I', |
f5c9f3db |
281 | 'casespec 0x307'); |
6c8d78fb |
282 | |
283 | # perl #7305 UnicodeCD::compexcl is weird |
284 | |
285 | for (1) {$a=compexcl $_} |
286 | ok(1, 'compexcl read-only $_: perl #7305'); |
287 | grep {compexcl $_} %{{1=>2}}; |
288 | ok(1, 'compexcl read-only hash: perl #7305'); |
289 | |
d7829152 |
290 | is(Unicode::UCD::_getcode('123'), 123, "_getcode(123)"); |
291 | is(Unicode::UCD::_getcode('0123'), 0x123, "_getcode(0123)"); |
292 | is(Unicode::UCD::_getcode('0x123'), 0x123, "_getcode(0x123)"); |
293 | is(Unicode::UCD::_getcode('0X123'), 0x123, "_getcode(0X123)"); |
294 | is(Unicode::UCD::_getcode('U+123'), 0x123, "_getcode(U+123)"); |
295 | is(Unicode::UCD::_getcode('u+123'), 0x123, "_getcode(u+123)"); |
296 | is(Unicode::UCD::_getcode('U+1234'), 0x1234, "_getcode(U+1234)"); |
297 | is(Unicode::UCD::_getcode('U+12345'), 0x12345, "_getcode(U+12345)"); |
298 | is(Unicode::UCD::_getcode('123x'), undef, "_getcode(123x)"); |
299 | is(Unicode::UCD::_getcode('x123'), undef, "_getcode(x123)"); |
300 | is(Unicode::UCD::_getcode('0x123x'), undef, "_getcode(x123)"); |
301 | is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)"); |
741297c1 |
302 | |
303 | { |
304 | my $r1 = charscript('Latin'); |
305 | my $n1 = @$r1; |
306 | is($n1, 26, "26 ranges in Latin script (Unicode 4.0.0)"); |
307 | shift @$r1 while @$r1; |
308 | my $r2 = charscript('Latin'); |
309 | is(@$r2, $n1, "modifying results should not mess up internal caches"); |
310 | } |
311 | |
c5a29f40 |
312 | { |
313 | is(charinfo(0xdeadbeef), undef, "[perl #23273] warnings in Unicode::UCD"); |
314 | } |