What started as a small nit (the charnames test, nit found
[p5sagit/p5-mst-13.2.git] / lib / Unicode / UCD.t
CommitLineData
8b731da2 1BEGIN {
2 if (ord("A") == 193) {
3 print "1..0 # Skip: EBCDIC\n";
4 exit 0;
5 }
a778afa6 6 chdir 't' if -d 't';
7 @INC = '../lib';
8b731da2 8}
9
a778afa6 10use strict;
11use Unicode::UCD;
f5c9f3db 12use Test::More;
8b731da2 13
74f8133e 14BEGIN { plan tests => 162 };
561c79ed 15
55d7b906 16use Unicode::UCD 'charinfo';
561c79ed 17
b08cd201 18my $charinfo;
19
20$charinfo = charinfo(0x41);
21
f5c9f3db 22is($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A');
23is($charinfo->{name}, 'LATIN CAPITAL LETTER A');
24is($charinfo->{category}, 'Lu');
25is($charinfo->{combining}, '0');
26is($charinfo->{bidi}, 'L');
27is($charinfo->{decomposition}, '');
28is($charinfo->{decimal}, '');
29is($charinfo->{digit}, '');
30is($charinfo->{numeric}, '');
31is($charinfo->{mirrored}, 'N');
32is($charinfo->{unicode10}, '');
33is($charinfo->{comment}, '');
34is($charinfo->{upper}, '');
35is($charinfo->{lower}, '0061');
36is($charinfo->{title}, '');
37is($charinfo->{block}, 'Basic Latin');
38is($charinfo->{script}, 'Latin');
b08cd201 39
40$charinfo = charinfo(0x100);
41
f5c9f3db 42is($charinfo->{code}, '0100', 'LATIN CAPITAL LETTER A WITH MACRON');
43is($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON');
44is($charinfo->{category}, 'Lu');
45is($charinfo->{combining}, '0');
46is($charinfo->{bidi}, 'L');
47is($charinfo->{decomposition}, '0041 0304');
48is($charinfo->{decimal}, '');
49is($charinfo->{digit}, '');
50is($charinfo->{numeric}, '');
51is($charinfo->{mirrored}, 'N');
52is($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON');
53is($charinfo->{comment}, '');
54is($charinfo->{upper}, '');
55is($charinfo->{lower}, '0101');
56is($charinfo->{title}, '');
57is($charinfo->{block}, 'Latin Extended-A');
58is($charinfo->{script}, 'Latin');
a196fbfd 59
60# 0x0590 is in the Hebrew block but unused.
561c79ed 61
b08cd201 62$charinfo = charinfo(0x590);
63
f5c9f3db 64is($charinfo->{code}, undef, '0x0590 - unused Hebrew');
65is($charinfo->{name}, undef);
66is($charinfo->{category}, undef);
67is($charinfo->{combining}, undef);
68is($charinfo->{bidi}, undef);
69is($charinfo->{decomposition}, undef);
70is($charinfo->{decimal}, undef);
71is($charinfo->{digit}, undef);
72is($charinfo->{numeric}, undef);
73is($charinfo->{mirrored}, undef);
74is($charinfo->{unicode10}, undef);
75is($charinfo->{comment}, undef);
76is($charinfo->{upper}, undef);
77is($charinfo->{lower}, undef);
78is($charinfo->{title}, undef);
79is($charinfo->{block}, undef);
80is($charinfo->{script}, undef);
a196fbfd 81
82# 0x05d0 is in the Hebrew block and used.
561c79ed 83
b08cd201 84$charinfo = charinfo(0x5d0);
85
f5c9f3db 86is($charinfo->{code}, '05D0', '05D0 - used Hebrew');
87is($charinfo->{name}, 'HEBREW LETTER ALEF');
88is($charinfo->{category}, 'Lo');
89is($charinfo->{combining}, '0');
90is($charinfo->{bidi}, 'R');
91is($charinfo->{decomposition}, '');
92is($charinfo->{decimal}, '');
93is($charinfo->{digit}, '');
94is($charinfo->{numeric}, '');
95is($charinfo->{mirrored}, 'N');
96is($charinfo->{unicode10}, '');
97is($charinfo->{comment}, '');
98is($charinfo->{upper}, '');
99is($charinfo->{lower}, '');
100is($charinfo->{title}, '');
101is($charinfo->{block}, 'Hebrew');
102is($charinfo->{script}, 'Hebrew');
561c79ed 103
74f8133e 104# An open syllable in Hangul.
a6fa416b 105
106$charinfo = charinfo(0xAC00);
107
f5c9f3db 108is($charinfo->{code}, 'AC00', 'HANGUL SYLLABLE-AC00');
109is($charinfo->{name}, 'HANGUL SYLLABLE-AC00');
110is($charinfo->{category}, 'Lo');
111is($charinfo->{combining}, '0');
112is($charinfo->{bidi}, 'L');
113is($charinfo->{decomposition}, undef);
114is($charinfo->{decimal}, '');
115is($charinfo->{digit}, '');
116is($charinfo->{numeric}, '');
117is($charinfo->{mirrored}, 'N');
118is($charinfo->{unicode10}, '');
119is($charinfo->{comment}, '');
120is($charinfo->{upper}, '');
121is($charinfo->{lower}, '');
122is($charinfo->{title}, '');
123is($charinfo->{block}, 'Hangul Syllables');
124is($charinfo->{script}, 'Hangul');
a6fa416b 125
74f8133e 126# A closed syllable in Hangul.
a6fa416b 127
128$charinfo = charinfo(0xAE00);
129
f5c9f3db 130is($charinfo->{code}, 'AE00', 'HANGUL SYLLABLE-AE00');
131is($charinfo->{name}, 'HANGUL SYLLABLE-AE00');
132is($charinfo->{category}, 'Lo');
133is($charinfo->{combining}, '0');
134is($charinfo->{bidi}, 'L');
135is($charinfo->{decomposition}, undef);
136is($charinfo->{decimal}, '');
137is($charinfo->{digit}, '');
138is($charinfo->{numeric}, '');
139is($charinfo->{mirrored}, 'N');
140is($charinfo->{unicode10}, '');
141is($charinfo->{comment}, '');
142is($charinfo->{upper}, '');
143is($charinfo->{lower}, '');
144is($charinfo->{title}, '');
145is($charinfo->{block}, 'Hangul Syllables');
146is($charinfo->{script}, 'Hangul');
a6fa416b 147
148$charinfo = charinfo(0x1D400);
149
f5c9f3db 150is($charinfo->{code}, '1D400', 'MATHEMATICAL BOLD CAPITAL A');
151is($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A');
152is($charinfo->{category}, 'Lu');
153is($charinfo->{combining}, '0');
154is($charinfo->{bidi}, 'L');
155is($charinfo->{decomposition}, '<font> 0041');
156is($charinfo->{decimal}, '');
157is($charinfo->{digit}, '');
158is($charinfo->{numeric}, '');
159is($charinfo->{mirrored}, 'N');
160is($charinfo->{unicode10}, '');
161is($charinfo->{comment}, '');
162is($charinfo->{upper}, '');
163is($charinfo->{lower}, '');
164is($charinfo->{title}, '');
165is($charinfo->{block}, 'Mathematical Alphanumeric Symbols');
166is($charinfo->{script}, undef);
a6fa416b 167
55d7b906 168use Unicode::UCD qw(charblock charscript);
a196fbfd 169
170# 0x0590 is in the Hebrew block but unused.
561c79ed 171
f5c9f3db 172is(charblock(0x590), 'Hebrew', '0x0590 - Hebrew unused charblock');
173is(charscript(0x590), undef, '0x0590 - Hebrew unused charscript');
561c79ed 174
b08cd201 175$charinfo = charinfo(0xbe);
176
f5c9f3db 177is($charinfo->{code}, '00BE', 'VULGAR FRACTION THREE QUARTERS');
178is($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS');
179is($charinfo->{category}, 'No');
180is($charinfo->{combining}, '0');
181is($charinfo->{bidi}, 'ON');
182is($charinfo->{decomposition}, '<fraction> 0033 2044 0034');
183is($charinfo->{decimal}, '');
184is($charinfo->{digit}, '');
185is($charinfo->{numeric}, '3/4');
186is($charinfo->{mirrored}, 'N');
187is($charinfo->{unicode10}, 'FRACTION THREE QUARTERS');
188is($charinfo->{comment}, '');
189is($charinfo->{upper}, '');
190is($charinfo->{lower}, '');
191is($charinfo->{title}, '');
192is($charinfo->{block}, 'Latin-1 Supplement');
193is($charinfo->{script}, undef);
10a6ecd2 194
55d7b906 195use Unicode::UCD qw(charblocks charscripts);
10a6ecd2 196
b08cd201 197my $charblocks = charblocks();
10a6ecd2 198
f5c9f3db 199ok(exists $charblocks->{Thai}, 'Thai charblock exists');
200is($charblocks->{Thai}->[0]->[0], hex('0e00'));
201ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist');
10a6ecd2 202
b08cd201 203my $charscripts = charscripts();
10a6ecd2 204
f5c9f3db 205ok(exists $charscripts->{Armenian}, 'Armenian charscript exists');
206is($charscripts->{Armenian}->[0]->[0], hex('0531'));
207ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist');
10a6ecd2 208
209my $charscript;
210
211$charscript = charscript("12ab");
f5c9f3db 212is($charscript, 'Ethiopic', 'Ethiopic charscript');
10a6ecd2 213
214$charscript = charscript("0x12ab");
f5c9f3db 215is($charscript, 'Ethiopic');
10a6ecd2 216
217$charscript = charscript("U+12ab");
f5c9f3db 218is($charscript, 'Ethiopic');
10a6ecd2 219
220my $ranges;
221
222$ranges = charscript('Ogham');
f5c9f3db 223is($ranges->[0]->[0], hex('1681'), 'Ogham charscript');
224is($ranges->[0]->[1], hex('169a'));
10a6ecd2 225
55d7b906 226use Unicode::UCD qw(charinrange);
10a6ecd2 227
228$ranges = charscript('Cherokee');
f5c9f3db 229ok(!charinrange($ranges, "139f"), 'Cherokee charscript');
10a6ecd2 230ok( charinrange($ranges, "13a0"));
231ok( charinrange($ranges, "13f4"));
232ok(!charinrange($ranges, "13f5"));
233
822ebcc8 234is(Unicode::UCD::UnicodeVersion, '3.2.0', 'UnicodeVersion');
b08cd201 235
55d7b906 236use Unicode::UCD qw(compexcl);
b08cd201 237
f5c9f3db 238ok(!compexcl(0x0100), 'compexcl');
b08cd201 239ok( compexcl(0x0958));
240
55d7b906 241use Unicode::UCD qw(casefold);
b08cd201 242
243my $casefold;
244
245$casefold = casefold(0x41);
246
247ok($casefold->{code} eq '0041' &&
248 $casefold->{status} eq 'C' &&
f5c9f3db 249 $casefold->{mapping} eq '0061', 'casefold 0x41');
b08cd201 250
251$casefold = casefold(0xdf);
252
253ok($casefold->{code} eq '00DF' &&
254 $casefold->{status} eq 'F' &&
f5c9f3db 255 $casefold->{mapping} eq '0073 0073', 'casefold 0xDF');
b08cd201 256
257ok(!casefold(0x20));
258
55d7b906 259use Unicode::UCD qw(casespec);
b08cd201 260
261my $casespec;
262
263ok(!casespec(0x41));
264
265$casespec = casespec(0xdf);
266
267ok($casespec->{code} eq '00DF' &&
268 $casespec->{lower} eq '00DF' &&
269 $casespec->{title} eq '0053 0073' &&
270 $casespec->{upper} eq '0053 0053' &&
f5c9f3db 271 $casespec->{condition} eq undef, 'casespec 0xDF');
b08cd201 272
273$casespec = casespec(0x307);
274
f499c386 275ok($casespec->{az}->{code} eq '0307' &&
276 $casespec->{az}->{lower} eq '' &&
277 $casespec->{az}->{title} eq '0307' &&
278 $casespec->{az}->{upper} eq '0307' &&
822ebcc8 279 $casespec->{az}->{condition} eq 'az After_Soft_Dotted',
f5c9f3db 280 'casespec 0x307');