3 print "1..0 # Skip: EBCDIC\n";
8 @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself
15 BEGIN { plan tests => 176 };
17 use Unicode::UCD 'charinfo';
21 $charinfo = charinfo(0x41);
23 is($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A');
24 is($charinfo->{name}, 'LATIN CAPITAL LETTER A');
25 is($charinfo->{category}, 'Lu');
26 is($charinfo->{combining}, '0');
27 is($charinfo->{bidi}, 'L');
28 is($charinfo->{decomposition}, '');
29 is($charinfo->{decimal}, '');
30 is($charinfo->{digit}, '');
31 is($charinfo->{numeric}, '');
32 is($charinfo->{mirrored}, 'N');
33 is($charinfo->{unicode10}, '');
34 is($charinfo->{comment}, '');
35 is($charinfo->{upper}, '');
36 is($charinfo->{lower}, '0061');
37 is($charinfo->{title}, '');
38 is($charinfo->{block}, 'Basic Latin');
39 is($charinfo->{script}, 'Latin');
41 $charinfo = charinfo(0x100);
43 is($charinfo->{code}, '0100', 'LATIN CAPITAL LETTER A WITH MACRON');
44 is($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON');
45 is($charinfo->{category}, 'Lu');
46 is($charinfo->{combining}, '0');
47 is($charinfo->{bidi}, 'L');
48 is($charinfo->{decomposition}, '0041 0304');
49 is($charinfo->{decimal}, '');
50 is($charinfo->{digit}, '');
51 is($charinfo->{numeric}, '');
52 is($charinfo->{mirrored}, 'N');
53 is($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON');
54 is($charinfo->{comment}, '');
55 is($charinfo->{upper}, '');
56 is($charinfo->{lower}, '0101');
57 is($charinfo->{title}, '');
58 is($charinfo->{block}, 'Latin Extended-A');
59 is($charinfo->{script}, 'Latin');
61 # 0x0590 is in the Hebrew block but unused.
63 $charinfo = charinfo(0x590);
65 is($charinfo->{code}, undef, '0x0590 - unused Hebrew');
66 is($charinfo->{name}, undef);
67 is($charinfo->{category}, undef);
68 is($charinfo->{combining}, undef);
69 is($charinfo->{bidi}, undef);
70 is($charinfo->{decomposition}, undef);
71 is($charinfo->{decimal}, undef);
72 is($charinfo->{digit}, undef);
73 is($charinfo->{numeric}, undef);
74 is($charinfo->{mirrored}, undef);
75 is($charinfo->{unicode10}, undef);
76 is($charinfo->{comment}, undef);
77 is($charinfo->{upper}, undef);
78 is($charinfo->{lower}, undef);
79 is($charinfo->{title}, undef);
80 is($charinfo->{block}, undef);
81 is($charinfo->{script}, undef);
83 # 0x05d0 is in the Hebrew block and used.
85 $charinfo = charinfo(0x5d0);
87 is($charinfo->{code}, '05D0', '05D0 - used Hebrew');
88 is($charinfo->{name}, 'HEBREW LETTER ALEF');
89 is($charinfo->{category}, 'Lo');
90 is($charinfo->{combining}, '0');
91 is($charinfo->{bidi}, 'R');
92 is($charinfo->{decomposition}, '');
93 is($charinfo->{decimal}, '');
94 is($charinfo->{digit}, '');
95 is($charinfo->{numeric}, '');
96 is($charinfo->{mirrored}, 'N');
97 is($charinfo->{unicode10}, '');
98 is($charinfo->{comment}, '');
99 is($charinfo->{upper}, '');
100 is($charinfo->{lower}, '');
101 is($charinfo->{title}, '');
102 is($charinfo->{block}, 'Hebrew');
103 is($charinfo->{script}, 'Hebrew');
105 # An open syllable in Hangul.
107 $charinfo = charinfo(0xAC00);
109 is($charinfo->{code}, 'AC00', 'HANGUL SYLLABLE-AC00');
110 is($charinfo->{name}, 'HANGUL SYLLABLE-AC00');
111 is($charinfo->{category}, 'Lo');
112 is($charinfo->{combining}, '0');
113 is($charinfo->{bidi}, 'L');
114 is($charinfo->{decomposition}, undef);
115 is($charinfo->{decimal}, '');
116 is($charinfo->{digit}, '');
117 is($charinfo->{numeric}, '');
118 is($charinfo->{mirrored}, 'N');
119 is($charinfo->{unicode10}, '');
120 is($charinfo->{comment}, '');
121 is($charinfo->{upper}, '');
122 is($charinfo->{lower}, '');
123 is($charinfo->{title}, '');
124 is($charinfo->{block}, 'Hangul Syllables');
125 is($charinfo->{script}, 'Hangul');
127 # A closed syllable in Hangul.
129 $charinfo = charinfo(0xAE00);
131 is($charinfo->{code}, 'AE00', 'HANGUL SYLLABLE-AE00');
132 is($charinfo->{name}, 'HANGUL SYLLABLE-AE00');
133 is($charinfo->{category}, 'Lo');
134 is($charinfo->{combining}, '0');
135 is($charinfo->{bidi}, 'L');
136 is($charinfo->{decomposition}, undef);
137 is($charinfo->{decimal}, '');
138 is($charinfo->{digit}, '');
139 is($charinfo->{numeric}, '');
140 is($charinfo->{mirrored}, 'N');
141 is($charinfo->{unicode10}, '');
142 is($charinfo->{comment}, '');
143 is($charinfo->{upper}, '');
144 is($charinfo->{lower}, '');
145 is($charinfo->{title}, '');
146 is($charinfo->{block}, 'Hangul Syllables');
147 is($charinfo->{script}, 'Hangul');
149 $charinfo = charinfo(0x1D400);
151 is($charinfo->{code}, '1D400', 'MATHEMATICAL BOLD CAPITAL A');
152 is($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A');
153 is($charinfo->{category}, 'Lu');
154 is($charinfo->{combining}, '0');
155 is($charinfo->{bidi}, 'L');
156 is($charinfo->{decomposition}, '<font> 0041');
157 is($charinfo->{decimal}, '');
158 is($charinfo->{digit}, '');
159 is($charinfo->{numeric}, '');
160 is($charinfo->{mirrored}, 'N');
161 is($charinfo->{unicode10}, '');
162 is($charinfo->{comment}, '');
163 is($charinfo->{upper}, '');
164 is($charinfo->{lower}, '');
165 is($charinfo->{title}, '');
166 is($charinfo->{block}, 'Mathematical Alphanumeric Symbols');
167 is($charinfo->{script}, undef);
169 use Unicode::UCD qw(charblock charscript);
171 # 0x0590 is in the Hebrew block but unused.
173 is(charblock(0x590), 'Hebrew', '0x0590 - Hebrew unused charblock');
174 is(charscript(0x590), undef, '0x0590 - Hebrew unused charscript');
176 $charinfo = charinfo(0xbe);
178 is($charinfo->{code}, '00BE', 'VULGAR FRACTION THREE QUARTERS');
179 is($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS');
180 is($charinfo->{category}, 'No');
181 is($charinfo->{combining}, '0');
182 is($charinfo->{bidi}, 'ON');
183 is($charinfo->{decomposition}, '<fraction> 0033 2044 0034');
184 is($charinfo->{decimal}, '');
185 is($charinfo->{digit}, '');
186 is($charinfo->{numeric}, '3/4');
187 is($charinfo->{mirrored}, 'N');
188 is($charinfo->{unicode10}, 'FRACTION THREE QUARTERS');
189 is($charinfo->{comment}, '');
190 is($charinfo->{upper}, '');
191 is($charinfo->{lower}, '');
192 is($charinfo->{title}, '');
193 is($charinfo->{block}, 'Latin-1 Supplement');
194 is($charinfo->{script}, undef);
196 use Unicode::UCD qw(charblocks charscripts);
198 my $charblocks = charblocks();
200 ok(exists $charblocks->{Thai}, 'Thai charblock exists');
201 is($charblocks->{Thai}->[0]->[0], hex('0e00'));
202 ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist');
204 my $charscripts = charscripts();
206 ok(exists $charscripts->{Armenian}, 'Armenian charscript exists');
207 is($charscripts->{Armenian}->[0]->[0], hex('0531'));
208 ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist');
212 $charscript = charscript("12ab");
213 is($charscript, 'Ethiopic', 'Ethiopic charscript');
215 $charscript = charscript("0x12ab");
216 is($charscript, 'Ethiopic');
218 $charscript = charscript("U+12ab");
219 is($charscript, 'Ethiopic');
223 $ranges = charscript('Ogham');
224 is($ranges->[0]->[0], hex('1681'), 'Ogham charscript');
225 is($ranges->[0]->[1], hex('169a'));
227 use Unicode::UCD qw(charinrange);
229 $ranges = charscript('Cherokee');
230 ok(!charinrange($ranges, "139f"), 'Cherokee charscript');
231 ok( charinrange($ranges, "13a0"));
232 ok( charinrange($ranges, "13f4"));
233 ok(!charinrange($ranges, "13f5"));
235 is(Unicode::UCD::UnicodeVersion, '4.0.0', 'UnicodeVersion');
237 use Unicode::UCD qw(compexcl);
239 ok(!compexcl(0x0100), 'compexcl');
240 ok( compexcl(0x0958));
242 use Unicode::UCD qw(casefold);
246 $casefold = casefold(0x41);
248 ok($casefold->{code} eq '0041' &&
249 $casefold->{status} eq 'C' &&
250 $casefold->{mapping} eq '0061', 'casefold 0x41');
252 $casefold = casefold(0xdf);
254 ok($casefold->{code} eq '00DF' &&
255 $casefold->{status} eq 'F' &&
256 $casefold->{mapping} eq '0073 0073', 'casefold 0xDF');
260 use Unicode::UCD qw(casespec);
266 $casespec = casespec(0xdf);
268 ok($casespec->{code} eq '00DF' &&
269 $casespec->{lower} eq '00DF' &&
270 $casespec->{title} eq '0053 0073' &&
271 $casespec->{upper} eq '0053 0053' &&
272 $casespec->{condition} eq undef, 'casespec 0xDF');
274 $casespec = casespec(0x307);
276 ok($casespec->{az}->{code} eq '0307' &&
277 $casespec->{az}->{lower} eq '' &&
278 $casespec->{az}->{title} eq '0307' &&
279 $casespec->{az}->{upper} eq '0307' &&
280 $casespec->{az}->{condition} eq 'az After_I',
283 # perl #7305 UnicodeCD::compexcl is weird
285 for (1) {$a=compexcl $_}
286 ok(1, 'compexcl read-only $_: perl #7305');
287 grep {compexcl $_} %{{1=>2}};
288 ok(1, 'compexcl read-only hash: perl #7305');
290 is(Unicode::UCD::_getcode('123'), 123, "_getcode(123)");
291 is(Unicode::UCD::_getcode('0123'), 0x123, "_getcode(0123)");
292 is(Unicode::UCD::_getcode('0x123'), 0x123, "_getcode(0x123)");
293 is(Unicode::UCD::_getcode('0X123'), 0x123, "_getcode(0X123)");
294 is(Unicode::UCD::_getcode('U+123'), 0x123, "_getcode(U+123)");
295 is(Unicode::UCD::_getcode('u+123'), 0x123, "_getcode(u+123)");
296 is(Unicode::UCD::_getcode('U+1234'), 0x1234, "_getcode(U+1234)");
297 is(Unicode::UCD::_getcode('U+12345'), 0x12345, "_getcode(U+12345)");
298 is(Unicode::UCD::_getcode('123x'), undef, "_getcode(123x)");
299 is(Unicode::UCD::_getcode('x123'), undef, "_getcode(x123)");
300 is(Unicode::UCD::_getcode('0x123x'), undef, "_getcode(x123)");
301 is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)");