UCD 5.0.0
[p5sagit/p5-mst-13.2.git] / lib / Unicode / UCD.t
CommitLineData
25a47338 1#!perl -w
8b731da2 2BEGIN {
3 if (ord("A") == 193) {
4 print "1..0 # Skip: EBCDIC\n";
5 exit 0;
6 }
a778afa6 7 chdir 't' if -d 't';
8 @INC = '../lib';
e69a2255 9 @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself
25a47338 10 require Config; import Config;
11 if ($Config{'extensions'} !~ /\bStorable\b/) {
12 print "1..0 # Skip: Storable was not built; Unicode::UCD uses Storable\n";
13 exit 0;
14 }
8b731da2 15}
16
a778afa6 17use strict;
18use Unicode::UCD;
f5c9f3db 19use Test::More;
8b731da2 20
a2bd7410 21BEGIN { plan tests => 188 };
561c79ed 22
55d7b906 23use Unicode::UCD 'charinfo';
561c79ed 24
b08cd201 25my $charinfo;
26
27$charinfo = charinfo(0x41);
28
f5c9f3db 29is($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A');
30is($charinfo->{name}, 'LATIN CAPITAL LETTER A');
31is($charinfo->{category}, 'Lu');
32is($charinfo->{combining}, '0');
33is($charinfo->{bidi}, 'L');
34is($charinfo->{decomposition}, '');
35is($charinfo->{decimal}, '');
36is($charinfo->{digit}, '');
37is($charinfo->{numeric}, '');
38is($charinfo->{mirrored}, 'N');
39is($charinfo->{unicode10}, '');
40is($charinfo->{comment}, '');
41is($charinfo->{upper}, '');
42is($charinfo->{lower}, '0061');
43is($charinfo->{title}, '');
44is($charinfo->{block}, 'Basic Latin');
45is($charinfo->{script}, 'Latin');
b08cd201 46
47$charinfo = charinfo(0x100);
48
f5c9f3db 49is($charinfo->{code}, '0100', 'LATIN CAPITAL LETTER A WITH MACRON');
50is($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON');
51is($charinfo->{category}, 'Lu');
52is($charinfo->{combining}, '0');
53is($charinfo->{bidi}, 'L');
54is($charinfo->{decomposition}, '0041 0304');
55is($charinfo->{decimal}, '');
56is($charinfo->{digit}, '');
57is($charinfo->{numeric}, '');
58is($charinfo->{mirrored}, 'N');
59is($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON');
60is($charinfo->{comment}, '');
61is($charinfo->{upper}, '');
62is($charinfo->{lower}, '0101');
63is($charinfo->{title}, '');
64is($charinfo->{block}, 'Latin Extended-A');
65is($charinfo->{script}, 'Latin');
a196fbfd 66
67# 0x0590 is in the Hebrew block but unused.
561c79ed 68
b08cd201 69$charinfo = charinfo(0x590);
70
f5c9f3db 71is($charinfo->{code}, undef, '0x0590 - unused Hebrew');
72is($charinfo->{name}, undef);
73is($charinfo->{category}, undef);
74is($charinfo->{combining}, undef);
75is($charinfo->{bidi}, undef);
76is($charinfo->{decomposition}, undef);
77is($charinfo->{decimal}, undef);
78is($charinfo->{digit}, undef);
79is($charinfo->{numeric}, undef);
80is($charinfo->{mirrored}, undef);
81is($charinfo->{unicode10}, undef);
82is($charinfo->{comment}, undef);
83is($charinfo->{upper}, undef);
84is($charinfo->{lower}, undef);
85is($charinfo->{title}, undef);
86is($charinfo->{block}, undef);
87is($charinfo->{script}, undef);
a196fbfd 88
89# 0x05d0 is in the Hebrew block and used.
561c79ed 90
b08cd201 91$charinfo = charinfo(0x5d0);
92
f5c9f3db 93is($charinfo->{code}, '05D0', '05D0 - used Hebrew');
94is($charinfo->{name}, 'HEBREW LETTER ALEF');
95is($charinfo->{category}, 'Lo');
96is($charinfo->{combining}, '0');
97is($charinfo->{bidi}, 'R');
98is($charinfo->{decomposition}, '');
99is($charinfo->{decimal}, '');
100is($charinfo->{digit}, '');
101is($charinfo->{numeric}, '');
102is($charinfo->{mirrored}, 'N');
103is($charinfo->{unicode10}, '');
104is($charinfo->{comment}, '');
105is($charinfo->{upper}, '');
106is($charinfo->{lower}, '');
107is($charinfo->{title}, '');
108is($charinfo->{block}, 'Hebrew');
109is($charinfo->{script}, 'Hebrew');
561c79ed 110
74f8133e 111# An open syllable in Hangul.
a6fa416b 112
113$charinfo = charinfo(0xAC00);
114
f5c9f3db 115is($charinfo->{code}, 'AC00', 'HANGUL SYLLABLE-AC00');
116is($charinfo->{name}, 'HANGUL SYLLABLE-AC00');
117is($charinfo->{category}, 'Lo');
118is($charinfo->{combining}, '0');
119is($charinfo->{bidi}, 'L');
120is($charinfo->{decomposition}, undef);
121is($charinfo->{decimal}, '');
122is($charinfo->{digit}, '');
123is($charinfo->{numeric}, '');
124is($charinfo->{mirrored}, 'N');
125is($charinfo->{unicode10}, '');
126is($charinfo->{comment}, '');
127is($charinfo->{upper}, '');
128is($charinfo->{lower}, '');
129is($charinfo->{title}, '');
130is($charinfo->{block}, 'Hangul Syllables');
131is($charinfo->{script}, 'Hangul');
a6fa416b 132
74f8133e 133# A closed syllable in Hangul.
a6fa416b 134
135$charinfo = charinfo(0xAE00);
136
f5c9f3db 137is($charinfo->{code}, 'AE00', 'HANGUL SYLLABLE-AE00');
138is($charinfo->{name}, 'HANGUL SYLLABLE-AE00');
139is($charinfo->{category}, 'Lo');
140is($charinfo->{combining}, '0');
141is($charinfo->{bidi}, 'L');
142is($charinfo->{decomposition}, undef);
143is($charinfo->{decimal}, '');
144is($charinfo->{digit}, '');
145is($charinfo->{numeric}, '');
146is($charinfo->{mirrored}, 'N');
147is($charinfo->{unicode10}, '');
148is($charinfo->{comment}, '');
149is($charinfo->{upper}, '');
150is($charinfo->{lower}, '');
151is($charinfo->{title}, '');
152is($charinfo->{block}, 'Hangul Syllables');
153is($charinfo->{script}, 'Hangul');
a6fa416b 154
155$charinfo = charinfo(0x1D400);
156
f5c9f3db 157is($charinfo->{code}, '1D400', 'MATHEMATICAL BOLD CAPITAL A');
158is($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A');
159is($charinfo->{category}, 'Lu');
160is($charinfo->{combining}, '0');
161is($charinfo->{bidi}, 'L');
162is($charinfo->{decomposition}, '<font> 0041');
163is($charinfo->{decimal}, '');
164is($charinfo->{digit}, '');
165is($charinfo->{numeric}, '');
166is($charinfo->{mirrored}, 'N');
167is($charinfo->{unicode10}, '');
168is($charinfo->{comment}, '');
169is($charinfo->{upper}, '');
170is($charinfo->{lower}, '');
171is($charinfo->{title}, '');
172is($charinfo->{block}, 'Mathematical Alphanumeric Symbols');
7be0dac3 173is($charinfo->{script}, 'Common');
a6fa416b 174
55d7b906 175use Unicode::UCD qw(charblock charscript);
a196fbfd 176
177# 0x0590 is in the Hebrew block but unused.
561c79ed 178
f5c9f3db 179is(charblock(0x590), 'Hebrew', '0x0590 - Hebrew unused charblock');
180is(charscript(0x590), undef, '0x0590 - Hebrew unused charscript');
561c79ed 181
b08cd201 182$charinfo = charinfo(0xbe);
183
f5c9f3db 184is($charinfo->{code}, '00BE', 'VULGAR FRACTION THREE QUARTERS');
185is($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS');
186is($charinfo->{category}, 'No');
187is($charinfo->{combining}, '0');
188is($charinfo->{bidi}, 'ON');
189is($charinfo->{decomposition}, '<fraction> 0033 2044 0034');
190is($charinfo->{decimal}, '');
191is($charinfo->{digit}, '');
192is($charinfo->{numeric}, '3/4');
193is($charinfo->{mirrored}, 'N');
194is($charinfo->{unicode10}, 'FRACTION THREE QUARTERS');
195is($charinfo->{comment}, '');
196is($charinfo->{upper}, '');
197is($charinfo->{lower}, '');
198is($charinfo->{title}, '');
199is($charinfo->{block}, 'Latin-1 Supplement');
7be0dac3 200is($charinfo->{script}, 'Common');
10a6ecd2 201
55d7b906 202use Unicode::UCD qw(charblocks charscripts);
10a6ecd2 203
b08cd201 204my $charblocks = charblocks();
10a6ecd2 205
f5c9f3db 206ok(exists $charblocks->{Thai}, 'Thai charblock exists');
207is($charblocks->{Thai}->[0]->[0], hex('0e00'));
208ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist');
10a6ecd2 209
b08cd201 210my $charscripts = charscripts();
10a6ecd2 211
f5c9f3db 212ok(exists $charscripts->{Armenian}, 'Armenian charscript exists');
213is($charscripts->{Armenian}->[0]->[0], hex('0531'));
214ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist');
10a6ecd2 215
216my $charscript;
217
218$charscript = charscript("12ab");
f5c9f3db 219is($charscript, 'Ethiopic', 'Ethiopic charscript');
10a6ecd2 220
221$charscript = charscript("0x12ab");
f5c9f3db 222is($charscript, 'Ethiopic');
10a6ecd2 223
224$charscript = charscript("U+12ab");
f5c9f3db 225is($charscript, 'Ethiopic');
10a6ecd2 226
227my $ranges;
228
229$ranges = charscript('Ogham');
7be0dac3 230is($ranges->[1]->[0], hex('1681'), 'Ogham charscript');
231is($ranges->[1]->[1], hex('169a'));
10a6ecd2 232
55d7b906 233use Unicode::UCD qw(charinrange);
10a6ecd2 234
235$ranges = charscript('Cherokee');
f5c9f3db 236ok(!charinrange($ranges, "139f"), 'Cherokee charscript');
10a6ecd2 237ok( charinrange($ranges, "13a0"));
238ok( charinrange($ranges, "13f4"));
239ok(!charinrange($ranges, "13f5"));
240
98fbe989 241is(Unicode::UCD::UnicodeVersion, '5.0.0', 'UnicodeVersion');
b08cd201 242
55d7b906 243use Unicode::UCD qw(compexcl);
b08cd201 244
f5c9f3db 245ok(!compexcl(0x0100), 'compexcl');
b08cd201 246ok( compexcl(0x0958));
247
55d7b906 248use Unicode::UCD qw(casefold);
b08cd201 249
250my $casefold;
251
252$casefold = casefold(0x41);
253
254ok($casefold->{code} eq '0041' &&
255 $casefold->{status} eq 'C' &&
f5c9f3db 256 $casefold->{mapping} eq '0061', 'casefold 0x41');
b08cd201 257
258$casefold = casefold(0xdf);
259
260ok($casefold->{code} eq '00DF' &&
261 $casefold->{status} eq 'F' &&
f5c9f3db 262 $casefold->{mapping} eq '0073 0073', 'casefold 0xDF');
b08cd201 263
264ok(!casefold(0x20));
265
55d7b906 266use Unicode::UCD qw(casespec);
b08cd201 267
268my $casespec;
269
270ok(!casespec(0x41));
271
272$casespec = casespec(0xdf);
273
274ok($casespec->{code} eq '00DF' &&
275 $casespec->{lower} eq '00DF' &&
276 $casespec->{title} eq '0053 0073' &&
277 $casespec->{upper} eq '0053 0053' &&
2d3cf3ee 278 !defined $casespec->{condition}, 'casespec 0xDF');
b08cd201 279
280$casespec = casespec(0x307);
281
f499c386 282ok($casespec->{az}->{code} eq '0307' &&
2d3cf3ee 283 !defined $casespec->{az}->{lower} &&
f499c386 284 $casespec->{az}->{title} eq '0307' &&
285 $casespec->{az}->{upper} eq '0307' &&
9c3dc587 286 $casespec->{az}->{condition} eq 'az After_I',
f5c9f3db 287 'casespec 0x307');
6c8d78fb 288
289# perl #7305 UnicodeCD::compexcl is weird
290
2d3cf3ee 291for (1) {my $a=compexcl $_}
6c8d78fb 292ok(1, 'compexcl read-only $_: perl #7305');
293grep {compexcl $_} %{{1=>2}};
294ok(1, 'compexcl read-only hash: perl #7305');
295
d7829152 296is(Unicode::UCD::_getcode('123'), 123, "_getcode(123)");
297is(Unicode::UCD::_getcode('0123'), 0x123, "_getcode(0123)");
298is(Unicode::UCD::_getcode('0x123'), 0x123, "_getcode(0x123)");
299is(Unicode::UCD::_getcode('0X123'), 0x123, "_getcode(0X123)");
300is(Unicode::UCD::_getcode('U+123'), 0x123, "_getcode(U+123)");
301is(Unicode::UCD::_getcode('u+123'), 0x123, "_getcode(u+123)");
302is(Unicode::UCD::_getcode('U+1234'), 0x1234, "_getcode(U+1234)");
303is(Unicode::UCD::_getcode('U+12345'), 0x12345, "_getcode(U+12345)");
304is(Unicode::UCD::_getcode('123x'), undef, "_getcode(123x)");
305is(Unicode::UCD::_getcode('x123'), undef, "_getcode(x123)");
306is(Unicode::UCD::_getcode('0x123x'), undef, "_getcode(x123)");
307is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)");
741297c1 308
309{
310 my $r1 = charscript('Latin');
311 my $n1 = @$r1;
98fbe989 312 is($n1, 35, "number of ranges in Latin script (Unicode 5.0.0)");
741297c1 313 shift @$r1 while @$r1;
314 my $r2 = charscript('Latin');
315 is(@$r2, $n1, "modifying results should not mess up internal caches");
316}
317
c5a29f40 318{
319 is(charinfo(0xdeadbeef), undef, "[perl #23273] warnings in Unicode::UCD");
2d3cf3ee 320}
a2bd7410 321
322use Unicode::UCD qw(namedseq);
323
324is(namedseq("KATAKANA LETTER AINU P"), "\x{31F7}\x{309A}", "namedseq");
325is(namedseq("KATAKANA LETTER AINU Q"), undef);
326is(namedseq(), undef);
327is(namedseq(qw(foo bar)), undef);
328my @ns = namedseq("KATAKANA LETTER AINU P");
329is(scalar @ns, 2);
330is($ns[0], 0x31F7);
331is($ns[1], 0x309A);
332my %ns = namedseq();
333is($ns{"KATAKANA LETTER AINU P"}, "\x{31F7}\x{309A}");
334@ns = namedseq(42);
335is(@ns, 0);
336