Add compexcl(), casefold(), and casespec() interfaces;
[p5sagit/p5-mst-13.2.git] / lib / Unicode / UCD.t
CommitLineData
10a6ecd2 1use Unicode::UCD;
561c79ed 2
3use Test;
4use strict;
5
b08cd201 6BEGIN { plan tests => 111 };
561c79ed 7
8use Unicode::UCD 'charinfo';
9
b08cd201 10my $charinfo;
11
12$charinfo = charinfo(0x41);
13
14ok($charinfo->{code}, '0041');
15ok($charinfo->{name}, 'LATIN CAPITAL LETTER A');
16ok($charinfo->{category}, 'Lu');
17ok($charinfo->{combining}, '0');
18ok($charinfo->{bidi}, 'L');
19ok($charinfo->{decomposition}, '');
20ok($charinfo->{decimal}, '');
21ok($charinfo->{digit}, '');
22ok($charinfo->{numeric}, '');
23ok($charinfo->{mirrored}, 'N');
24ok($charinfo->{unicode10}, '');
25ok($charinfo->{comment}, '');
26ok($charinfo->{upper}, '');
27ok($charinfo->{lower}, '0061');
28ok($charinfo->{title}, '');
29ok($charinfo->{block}, 'Basic Latin');
30ok($charinfo->{script}, 'Latin');
31
32$charinfo = charinfo(0x100);
33
34ok($charinfo->{code}, '0100');
35ok($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON');
36ok($charinfo->{category}, 'Lu');
37ok($charinfo->{combining}, '0');
38ok($charinfo->{bidi}, 'L');
39ok($charinfo->{decomposition}, '0041 0304');
40ok($charinfo->{decimal}, '');
41ok($charinfo->{digit}, '');
42ok($charinfo->{numeric}, '');
43ok($charinfo->{mirrored}, 'N');
44ok($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON');
45ok($charinfo->{comment}, '');
46ok($charinfo->{upper}, '');
47ok($charinfo->{lower}, '0101');
48ok($charinfo->{title}, '');
49ok($charinfo->{block}, 'Latin Extended-A');
50ok($charinfo->{script}, 'Latin');
a196fbfd 51
52# 0x0590 is in the Hebrew block but unused.
561c79ed 53
b08cd201 54$charinfo = charinfo(0x590);
55
56ok($charinfo->{code}, undef);
57ok($charinfo->{name}, undef);
58ok($charinfo->{category}, undef);
59ok($charinfo->{combining}, undef);
60ok($charinfo->{bidi}, undef);
61ok($charinfo->{decomposition}, undef);
62ok($charinfo->{decimal}, undef);
63ok($charinfo->{digit}, undef);
64ok($charinfo->{numeric}, undef);
65ok($charinfo->{mirrored}, undef);
66ok($charinfo->{unicode10}, undef);
67ok($charinfo->{comment}, undef);
68ok($charinfo->{upper}, undef);
69ok($charinfo->{lower}, undef);
70ok($charinfo->{title}, undef);
71ok($charinfo->{block}, undef);
72ok($charinfo->{script}, undef);
a196fbfd 73
74# 0x05d0 is in the Hebrew block and used.
561c79ed 75
b08cd201 76$charinfo = charinfo(0x5d0);
77
78ok($charinfo->{code}, '05D0');
79ok($charinfo->{name}, 'HEBREW LETTER ALEF');
80ok($charinfo->{category}, 'Lo');
81ok($charinfo->{combining}, '0');
82ok($charinfo->{bidi}, 'R');
83ok($charinfo->{decomposition}, '');
84ok($charinfo->{decimal}, '');
85ok($charinfo->{digit}, '');
86ok($charinfo->{numeric}, '');
87ok($charinfo->{mirrored}, 'N');
88ok($charinfo->{unicode10}, '');
89ok($charinfo->{comment}, '');
90ok($charinfo->{upper}, '');
91ok($charinfo->{lower}, '');
92ok($charinfo->{title}, '');
93ok($charinfo->{block}, 'Hebrew');
94ok($charinfo->{script}, 'Hebrew');
561c79ed 95
a196fbfd 96use Unicode::UCD qw(charblock charscript);
97
98# 0x0590 is in the Hebrew block but unused.
561c79ed 99
100ok(charblock(0x590), 'Hebrew');
a196fbfd 101ok(charscript(0x590), undef);
561c79ed 102
b08cd201 103$charinfo = charinfo(0xbe);
104
105ok($charinfo->{code}, '00BE');
106ok($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS');
107ok($charinfo->{category}, 'No');
108ok($charinfo->{combining}, '0');
109ok($charinfo->{bidi}, 'ON');
110ok($charinfo->{decomposition}, '<fraction> 0033 2044 0034');
111ok($charinfo->{decimal}, '');
112ok($charinfo->{digit}, '');
113ok($charinfo->{numeric}, '3/4');
114ok($charinfo->{mirrored}, 'N');
115ok($charinfo->{unicode10}, 'FRACTION THREE QUARTERS');
116ok($charinfo->{comment}, '');
117ok($charinfo->{upper}, '');
118ok($charinfo->{lower}, '');
119ok($charinfo->{title}, '');
120ok($charinfo->{block}, 'Latin-1 Supplement');
121ok($charinfo->{script}, undef);
10a6ecd2 122
123use Unicode::UCD qw(charblocks charscripts);
124
b08cd201 125my $charblocks = charblocks();
10a6ecd2 126
b08cd201 127ok(exists $charblocks->{Thai});
128ok($charblocks->{Thai}->[0]->[0], hex('0e00'));
129ok(!exists $charblocks->{PigLatin});
10a6ecd2 130
b08cd201 131my $charscripts = charscripts();
10a6ecd2 132
b08cd201 133ok(exists $charscripts->{Armenian});
134ok($charscripts->{Armenian}->[0]->[0], hex('0531'));
135ok(!exists $charscripts->{PigLatin});
10a6ecd2 136
137my $charscript;
138
139$charscript = charscript("12ab");
140ok($charscript, 'Ethiopic');
141
142$charscript = charscript("0x12ab");
143ok($charscript, 'Ethiopic');
144
145$charscript = charscript("U+12ab");
146ok($charscript, 'Ethiopic');
147
148my $ranges;
149
150$ranges = charscript('Ogham');
151ok($ranges->[0]->[0], hex('1681'));
152ok($ranges->[0]->[1], hex('169a'));
153
154use Unicode::UCD qw(charinrange);
155
156$ranges = charscript('Cherokee');
157ok(!charinrange($ranges, "139f"));
158ok( charinrange($ranges, "13a0"));
159ok( charinrange($ranges, "13f4"));
160ok(!charinrange($ranges, "13f5"));
161
162ok(Unicode::UCD::UnicodeVersion, 3.1);
b08cd201 163
164use Unicode::UCD qw(compexcl);
165
166ok(!compexcl(0x0100));
167ok( compexcl(0x0958));
168
169use Unicode::UCD qw(casefold);
170
171my $casefold;
172
173$casefold = casefold(0x41);
174
175ok($casefold->{code} eq '0041' &&
176 $casefold->{status} eq 'C' &&
177 $casefold->{mapping} eq '0061');
178
179$casefold = casefold(0xdf);
180
181ok($casefold->{code} eq '00DF' &&
182 $casefold->{status} eq 'F' &&
183 $casefold->{mapping} eq '0073 0073');
184
185ok(!casefold(0x20));
186
187use Unicode::UCD qw(casespec);
188
189my $casespec;
190
191ok(!casespec(0x41));
192
193$casespec = casespec(0xdf);
194
195ok($casespec->{code} eq '00DF' &&
196 $casespec->{lower} eq '00DF' &&
197 $casespec->{title} eq '0053 0073' &&
198 $casespec->{upper} eq '0053 0053' &&
199 $casespec->{condition} eq undef);
200
201$casespec = casespec(0x307);
202
203ok($casespec->{code} eq '0307' &&
204 $casespec->{lower} eq '0307' &&
205 $casespec->{title} eq '' &&
206 $casespec->{upper} eq '' &&
207 $casespec->{condition} eq 'lt AFTER_i');