7 print "1..0 # Skip: EBCDIC\n";
14 BEGIN { plan tests => 162 };
16 use Unicode::UCD 'charinfo';
20 $charinfo = charinfo(0x41);
22 ok($charinfo->{code}, '0041');
23 ok($charinfo->{name}, 'LATIN CAPITAL LETTER A');
24 ok($charinfo->{category}, 'Lu');
25 ok($charinfo->{combining}, '0');
26 ok($charinfo->{bidi}, 'L');
27 ok($charinfo->{decomposition}, '');
28 ok($charinfo->{decimal}, '');
29 ok($charinfo->{digit}, '');
30 ok($charinfo->{numeric}, '');
31 ok($charinfo->{mirrored}, 'N');
32 ok($charinfo->{unicode10}, '');
33 ok($charinfo->{comment}, '');
34 ok($charinfo->{upper}, '');
35 ok($charinfo->{lower}, '0061');
36 ok($charinfo->{title}, '');
37 ok($charinfo->{block}, 'Basic Latin');
38 ok($charinfo->{script}, 'Latin');
40 $charinfo = charinfo(0x100);
42 ok($charinfo->{code}, '0100');
43 ok($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON');
44 ok($charinfo->{category}, 'Lu');
45 ok($charinfo->{combining}, '0');
46 ok($charinfo->{bidi}, 'L');
47 ok($charinfo->{decomposition}, '0041 0304');
48 ok($charinfo->{decimal}, '');
49 ok($charinfo->{digit}, '');
50 ok($charinfo->{numeric}, '');
51 ok($charinfo->{mirrored}, 'N');
52 ok($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON');
53 ok($charinfo->{comment}, '');
54 ok($charinfo->{upper}, '');
55 ok($charinfo->{lower}, '0101');
56 ok($charinfo->{title}, '');
57 ok($charinfo->{block}, 'Latin Extended-A');
58 ok($charinfo->{script}, 'Latin');
60 # 0x0590 is in the Hebrew block but unused.
62 $charinfo = charinfo(0x590);
64 ok($charinfo->{code}, undef);
65 ok($charinfo->{name}, undef);
66 ok($charinfo->{category}, undef);
67 ok($charinfo->{combining}, undef);
68 ok($charinfo->{bidi}, undef);
69 ok($charinfo->{decomposition}, undef);
70 ok($charinfo->{decimal}, undef);
71 ok($charinfo->{digit}, undef);
72 ok($charinfo->{numeric}, undef);
73 ok($charinfo->{mirrored}, undef);
74 ok($charinfo->{unicode10}, undef);
75 ok($charinfo->{comment}, undef);
76 ok($charinfo->{upper}, undef);
77 ok($charinfo->{lower}, undef);
78 ok($charinfo->{title}, undef);
79 ok($charinfo->{block}, undef);
80 ok($charinfo->{script}, undef);
82 # 0x05d0 is in the Hebrew block and used.
84 $charinfo = charinfo(0x5d0);
86 ok($charinfo->{code}, '05D0');
87 ok($charinfo->{name}, 'HEBREW LETTER ALEF');
88 ok($charinfo->{category}, 'Lo');
89 ok($charinfo->{combining}, '0');
90 ok($charinfo->{bidi}, 'R');
91 ok($charinfo->{decomposition}, '');
92 ok($charinfo->{decimal}, '');
93 ok($charinfo->{digit}, '');
94 ok($charinfo->{numeric}, '');
95 ok($charinfo->{mirrored}, 'N');
96 ok($charinfo->{unicode10}, '');
97 ok($charinfo->{comment}, '');
98 ok($charinfo->{upper}, '');
99 ok($charinfo->{lower}, '');
100 ok($charinfo->{title}, '');
101 ok($charinfo->{block}, 'Hebrew');
102 ok($charinfo->{script}, 'Hebrew');
104 # An open syllable in Hangul.
106 $charinfo = charinfo(0xAC00);
108 ok($charinfo->{code}, 'AC00');
109 ok($charinfo->{name}, 'HANGUL SYLLABLE-AC00');
110 ok($charinfo->{category}, 'Lo');
111 ok($charinfo->{combining}, '0');
112 ok($charinfo->{bidi}, 'L');
113 ok($charinfo->{decomposition}, undef);
114 ok($charinfo->{decimal}, '');
115 ok($charinfo->{digit}, '');
116 ok($charinfo->{numeric}, '');
117 ok($charinfo->{mirrored}, 'N');
118 ok($charinfo->{unicode10}, '');
119 ok($charinfo->{comment}, '');
120 ok($charinfo->{upper}, '');
121 ok($charinfo->{lower}, '');
122 ok($charinfo->{title}, '');
123 ok($charinfo->{block}, 'Hangul Syllables');
124 ok($charinfo->{script}, 'Hangul');
126 # A closed syllable in Hangul.
128 $charinfo = charinfo(0xAE00);
130 ok($charinfo->{code}, 'AE00');
131 ok($charinfo->{name}, 'HANGUL SYLLABLE-AE00');
132 ok($charinfo->{category}, 'Lo');
133 ok($charinfo->{combining}, '0');
134 ok($charinfo->{bidi}, 'L');
135 ok($charinfo->{decomposition}, undef);
136 ok($charinfo->{decimal}, '');
137 ok($charinfo->{digit}, '');
138 ok($charinfo->{numeric}, '');
139 ok($charinfo->{mirrored}, 'N');
140 ok($charinfo->{unicode10}, '');
141 ok($charinfo->{comment}, '');
142 ok($charinfo->{upper}, '');
143 ok($charinfo->{lower}, '');
144 ok($charinfo->{title}, '');
145 ok($charinfo->{block}, 'Hangul Syllables');
146 ok($charinfo->{script}, 'Hangul');
148 $charinfo = charinfo(0x1D400);
150 ok($charinfo->{code}, '1D400');
151 ok($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A');
152 ok($charinfo->{category}, 'Lu');
153 ok($charinfo->{combining}, '0');
154 ok($charinfo->{bidi}, 'L');
155 ok($charinfo->{decomposition}, '<font> 0041');
156 ok($charinfo->{decimal}, '');
157 ok($charinfo->{digit}, '');
158 ok($charinfo->{numeric}, '');
159 ok($charinfo->{mirrored}, 'N');
160 ok($charinfo->{unicode10}, '');
161 ok($charinfo->{comment}, '');
162 ok($charinfo->{upper}, '');
163 ok($charinfo->{lower}, '');
164 ok($charinfo->{title}, '');
165 ok($charinfo->{block}, 'Mathematical Alphanumeric Symbols');
166 ok($charinfo->{script}, undef);
168 use Unicode::UCD qw(charblock charscript);
170 # 0x0590 is in the Hebrew block but unused.
172 ok(charblock(0x590), 'Hebrew');
173 ok(charscript(0x590), undef);
175 $charinfo = charinfo(0xbe);
177 ok($charinfo->{code}, '00BE');
178 ok($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS');
179 ok($charinfo->{category}, 'No');
180 ok($charinfo->{combining}, '0');
181 ok($charinfo->{bidi}, 'ON');
182 ok($charinfo->{decomposition}, '<fraction> 0033 2044 0034');
183 ok($charinfo->{decimal}, '');
184 ok($charinfo->{digit}, '');
185 ok($charinfo->{numeric}, '3/4');
186 ok($charinfo->{mirrored}, 'N');
187 ok($charinfo->{unicode10}, 'FRACTION THREE QUARTERS');
188 ok($charinfo->{comment}, '');
189 ok($charinfo->{upper}, '');
190 ok($charinfo->{lower}, '');
191 ok($charinfo->{title}, '');
192 ok($charinfo->{block}, 'Latin-1 Supplement');
193 ok($charinfo->{script}, undef);
195 use Unicode::UCD qw(charblocks charscripts);
197 my $charblocks = charblocks();
199 ok(exists $charblocks->{Thai});
200 ok($charblocks->{Thai}->[0]->[0], hex('0e00'));
201 ok(!exists $charblocks->{PigLatin});
203 my $charscripts = charscripts();
205 ok(exists $charscripts->{Armenian});
206 ok($charscripts->{Armenian}->[0]->[0], hex('0531'));
207 ok(!exists $charscripts->{PigLatin});
211 $charscript = charscript("12ab");
212 ok($charscript, 'Ethiopic');
214 $charscript = charscript("0x12ab");
215 ok($charscript, 'Ethiopic');
217 $charscript = charscript("U+12ab");
218 ok($charscript, 'Ethiopic');
222 $ranges = charscript('Ogham');
223 ok($ranges->[0]->[0], hex('1681'));
224 ok($ranges->[0]->[1], hex('169a'));
226 use Unicode::UCD qw(charinrange);
228 $ranges = charscript('Cherokee');
229 ok(!charinrange($ranges, "139f"));
230 ok( charinrange($ranges, "13a0"));
231 ok( charinrange($ranges, "13f4"));
232 ok(!charinrange($ranges, "13f5"));
234 ok(Unicode::UCD::UnicodeVersion, '3.1.1');
236 use Unicode::UCD qw(compexcl);
238 ok(!compexcl(0x0100));
239 ok( compexcl(0x0958));
241 use Unicode::UCD qw(casefold);
245 $casefold = casefold(0x41);
247 ok($casefold->{code} eq '0041' &&
248 $casefold->{status} eq 'C' &&
249 $casefold->{mapping} eq '0061');
251 $casefold = casefold(0xdf);
253 ok($casefold->{code} eq '00DF' &&
254 $casefold->{status} eq 'F' &&
255 $casefold->{mapping} eq '0073 0073');
259 use Unicode::UCD qw(casespec);
265 $casespec = casespec(0xdf);
267 ok($casespec->{code} eq '00DF' &&
268 $casespec->{lower} eq '00DF' &&
269 $casespec->{title} eq '0053 0073' &&
270 $casespec->{upper} eq '0053 0053' &&
271 $casespec->{condition} eq undef);
273 $casespec = casespec(0x307);
275 ok($casespec->{az}->{code} eq '0307' &&
276 $casespec->{az}->{lower} eq '' &&
277 $casespec->{az}->{title} eq '0307' &&
278 $casespec->{az}->{upper} eq '0307' &&
279 $casespec->{az}->{condition} eq 'az AFTER_i NOT_MORE_ABOVE');