Integrate macperl patch #16868.
[p5sagit/p5-mst-13.2.git] / lib / Unicode / UCD.t
CommitLineData
8b731da2 1BEGIN {
2 if (ord("A") == 193) {
3 print "1..0 # Skip: EBCDIC\n";
4 exit 0;
5 }
a778afa6 6 chdir 't' if -d 't';
7 @INC = '../lib';
e69a2255 8 @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself
8b731da2 9}
10
a778afa6 11use strict;
12use Unicode::UCD;
f5c9f3db 13use Test::More;
8b731da2 14
74f8133e 15BEGIN { plan tests => 162 };
561c79ed 16
55d7b906 17use Unicode::UCD 'charinfo';
561c79ed 18
b08cd201 19my $charinfo;
20
21$charinfo = charinfo(0x41);
22
f5c9f3db 23is($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A');
24is($charinfo->{name}, 'LATIN CAPITAL LETTER A');
25is($charinfo->{category}, 'Lu');
26is($charinfo->{combining}, '0');
27is($charinfo->{bidi}, 'L');
28is($charinfo->{decomposition}, '');
29is($charinfo->{decimal}, '');
30is($charinfo->{digit}, '');
31is($charinfo->{numeric}, '');
32is($charinfo->{mirrored}, 'N');
33is($charinfo->{unicode10}, '');
34is($charinfo->{comment}, '');
35is($charinfo->{upper}, '');
36is($charinfo->{lower}, '0061');
37is($charinfo->{title}, '');
38is($charinfo->{block}, 'Basic Latin');
39is($charinfo->{script}, 'Latin');
b08cd201 40
41$charinfo = charinfo(0x100);
42
f5c9f3db 43is($charinfo->{code}, '0100', 'LATIN CAPITAL LETTER A WITH MACRON');
44is($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON');
45is($charinfo->{category}, 'Lu');
46is($charinfo->{combining}, '0');
47is($charinfo->{bidi}, 'L');
48is($charinfo->{decomposition}, '0041 0304');
49is($charinfo->{decimal}, '');
50is($charinfo->{digit}, '');
51is($charinfo->{numeric}, '');
52is($charinfo->{mirrored}, 'N');
53is($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON');
54is($charinfo->{comment}, '');
55is($charinfo->{upper}, '');
56is($charinfo->{lower}, '0101');
57is($charinfo->{title}, '');
58is($charinfo->{block}, 'Latin Extended-A');
59is($charinfo->{script}, 'Latin');
a196fbfd 60
61# 0x0590 is in the Hebrew block but unused.
561c79ed 62
b08cd201 63$charinfo = charinfo(0x590);
64
f5c9f3db 65is($charinfo->{code}, undef, '0x0590 - unused Hebrew');
66is($charinfo->{name}, undef);
67is($charinfo->{category}, undef);
68is($charinfo->{combining}, undef);
69is($charinfo->{bidi}, undef);
70is($charinfo->{decomposition}, undef);
71is($charinfo->{decimal}, undef);
72is($charinfo->{digit}, undef);
73is($charinfo->{numeric}, undef);
74is($charinfo->{mirrored}, undef);
75is($charinfo->{unicode10}, undef);
76is($charinfo->{comment}, undef);
77is($charinfo->{upper}, undef);
78is($charinfo->{lower}, undef);
79is($charinfo->{title}, undef);
80is($charinfo->{block}, undef);
81is($charinfo->{script}, undef);
a196fbfd 82
83# 0x05d0 is in the Hebrew block and used.
561c79ed 84
b08cd201 85$charinfo = charinfo(0x5d0);
86
f5c9f3db 87is($charinfo->{code}, '05D0', '05D0 - used Hebrew');
88is($charinfo->{name}, 'HEBREW LETTER ALEF');
89is($charinfo->{category}, 'Lo');
90is($charinfo->{combining}, '0');
91is($charinfo->{bidi}, 'R');
92is($charinfo->{decomposition}, '');
93is($charinfo->{decimal}, '');
94is($charinfo->{digit}, '');
95is($charinfo->{numeric}, '');
96is($charinfo->{mirrored}, 'N');
97is($charinfo->{unicode10}, '');
98is($charinfo->{comment}, '');
99is($charinfo->{upper}, '');
100is($charinfo->{lower}, '');
101is($charinfo->{title}, '');
102is($charinfo->{block}, 'Hebrew');
103is($charinfo->{script}, 'Hebrew');
561c79ed 104
74f8133e 105# An open syllable in Hangul.
a6fa416b 106
107$charinfo = charinfo(0xAC00);
108
f5c9f3db 109is($charinfo->{code}, 'AC00', 'HANGUL SYLLABLE-AC00');
110is($charinfo->{name}, 'HANGUL SYLLABLE-AC00');
111is($charinfo->{category}, 'Lo');
112is($charinfo->{combining}, '0');
113is($charinfo->{bidi}, 'L');
114is($charinfo->{decomposition}, undef);
115is($charinfo->{decimal}, '');
116is($charinfo->{digit}, '');
117is($charinfo->{numeric}, '');
118is($charinfo->{mirrored}, 'N');
119is($charinfo->{unicode10}, '');
120is($charinfo->{comment}, '');
121is($charinfo->{upper}, '');
122is($charinfo->{lower}, '');
123is($charinfo->{title}, '');
124is($charinfo->{block}, 'Hangul Syllables');
125is($charinfo->{script}, 'Hangul');
a6fa416b 126
74f8133e 127# A closed syllable in Hangul.
a6fa416b 128
129$charinfo = charinfo(0xAE00);
130
f5c9f3db 131is($charinfo->{code}, 'AE00', 'HANGUL SYLLABLE-AE00');
132is($charinfo->{name}, 'HANGUL SYLLABLE-AE00');
133is($charinfo->{category}, 'Lo');
134is($charinfo->{combining}, '0');
135is($charinfo->{bidi}, 'L');
136is($charinfo->{decomposition}, undef);
137is($charinfo->{decimal}, '');
138is($charinfo->{digit}, '');
139is($charinfo->{numeric}, '');
140is($charinfo->{mirrored}, 'N');
141is($charinfo->{unicode10}, '');
142is($charinfo->{comment}, '');
143is($charinfo->{upper}, '');
144is($charinfo->{lower}, '');
145is($charinfo->{title}, '');
146is($charinfo->{block}, 'Hangul Syllables');
147is($charinfo->{script}, 'Hangul');
a6fa416b 148
149$charinfo = charinfo(0x1D400);
150
f5c9f3db 151is($charinfo->{code}, '1D400', 'MATHEMATICAL BOLD CAPITAL A');
152is($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A');
153is($charinfo->{category}, 'Lu');
154is($charinfo->{combining}, '0');
155is($charinfo->{bidi}, 'L');
156is($charinfo->{decomposition}, '<font> 0041');
157is($charinfo->{decimal}, '');
158is($charinfo->{digit}, '');
159is($charinfo->{numeric}, '');
160is($charinfo->{mirrored}, 'N');
161is($charinfo->{unicode10}, '');
162is($charinfo->{comment}, '');
163is($charinfo->{upper}, '');
164is($charinfo->{lower}, '');
165is($charinfo->{title}, '');
166is($charinfo->{block}, 'Mathematical Alphanumeric Symbols');
167is($charinfo->{script}, undef);
a6fa416b 168
55d7b906 169use Unicode::UCD qw(charblock charscript);
a196fbfd 170
171# 0x0590 is in the Hebrew block but unused.
561c79ed 172
f5c9f3db 173is(charblock(0x590), 'Hebrew', '0x0590 - Hebrew unused charblock');
174is(charscript(0x590), undef, '0x0590 - Hebrew unused charscript');
561c79ed 175
b08cd201 176$charinfo = charinfo(0xbe);
177
f5c9f3db 178is($charinfo->{code}, '00BE', 'VULGAR FRACTION THREE QUARTERS');
179is($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS');
180is($charinfo->{category}, 'No');
181is($charinfo->{combining}, '0');
182is($charinfo->{bidi}, 'ON');
183is($charinfo->{decomposition}, '<fraction> 0033 2044 0034');
184is($charinfo->{decimal}, '');
185is($charinfo->{digit}, '');
186is($charinfo->{numeric}, '3/4');
187is($charinfo->{mirrored}, 'N');
188is($charinfo->{unicode10}, 'FRACTION THREE QUARTERS');
189is($charinfo->{comment}, '');
190is($charinfo->{upper}, '');
191is($charinfo->{lower}, '');
192is($charinfo->{title}, '');
193is($charinfo->{block}, 'Latin-1 Supplement');
194is($charinfo->{script}, undef);
10a6ecd2 195
55d7b906 196use Unicode::UCD qw(charblocks charscripts);
10a6ecd2 197
b08cd201 198my $charblocks = charblocks();
10a6ecd2 199
f5c9f3db 200ok(exists $charblocks->{Thai}, 'Thai charblock exists');
201is($charblocks->{Thai}->[0]->[0], hex('0e00'));
202ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist');
10a6ecd2 203
b08cd201 204my $charscripts = charscripts();
10a6ecd2 205
f5c9f3db 206ok(exists $charscripts->{Armenian}, 'Armenian charscript exists');
207is($charscripts->{Armenian}->[0]->[0], hex('0531'));
208ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist');
10a6ecd2 209
210my $charscript;
211
212$charscript = charscript("12ab");
f5c9f3db 213is($charscript, 'Ethiopic', 'Ethiopic charscript');
10a6ecd2 214
215$charscript = charscript("0x12ab");
f5c9f3db 216is($charscript, 'Ethiopic');
10a6ecd2 217
218$charscript = charscript("U+12ab");
f5c9f3db 219is($charscript, 'Ethiopic');
10a6ecd2 220
221my $ranges;
222
223$ranges = charscript('Ogham');
f5c9f3db 224is($ranges->[0]->[0], hex('1681'), 'Ogham charscript');
225is($ranges->[0]->[1], hex('169a'));
10a6ecd2 226
55d7b906 227use Unicode::UCD qw(charinrange);
10a6ecd2 228
229$ranges = charscript('Cherokee');
f5c9f3db 230ok(!charinrange($ranges, "139f"), 'Cherokee charscript');
10a6ecd2 231ok( charinrange($ranges, "13a0"));
232ok( charinrange($ranges, "13f4"));
233ok(!charinrange($ranges, "13f5"));
234
822ebcc8 235is(Unicode::UCD::UnicodeVersion, '3.2.0', 'UnicodeVersion');
b08cd201 236
55d7b906 237use Unicode::UCD qw(compexcl);
b08cd201 238
f5c9f3db 239ok(!compexcl(0x0100), 'compexcl');
b08cd201 240ok( compexcl(0x0958));
241
55d7b906 242use Unicode::UCD qw(casefold);
b08cd201 243
244my $casefold;
245
246$casefold = casefold(0x41);
247
248ok($casefold->{code} eq '0041' &&
249 $casefold->{status} eq 'C' &&
f5c9f3db 250 $casefold->{mapping} eq '0061', 'casefold 0x41');
b08cd201 251
252$casefold = casefold(0xdf);
253
254ok($casefold->{code} eq '00DF' &&
255 $casefold->{status} eq 'F' &&
f5c9f3db 256 $casefold->{mapping} eq '0073 0073', 'casefold 0xDF');
b08cd201 257
258ok(!casefold(0x20));
259
55d7b906 260use Unicode::UCD qw(casespec);
b08cd201 261
262my $casespec;
263
264ok(!casespec(0x41));
265
266$casespec = casespec(0xdf);
267
268ok($casespec->{code} eq '00DF' &&
269 $casespec->{lower} eq '00DF' &&
270 $casespec->{title} eq '0053 0073' &&
271 $casespec->{upper} eq '0053 0053' &&
f5c9f3db 272 $casespec->{condition} eq undef, 'casespec 0xDF');
b08cd201 273
274$casespec = casespec(0x307);
275
f499c386 276ok($casespec->{az}->{code} eq '0307' &&
277 $casespec->{az}->{lower} eq '' &&
278 $casespec->{az}->{title} eq '0307' &&
279 $casespec->{az}->{upper} eq '0307' &&
822ebcc8 280 $casespec->{az}->{condition} eq 'az After_Soft_Dotted',
f5c9f3db 281 'casespec 0x307');