X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FUnicode%2FUCD.t;h=c4046bcdcab380f19aa9b55f7ab37302158da4e0;hb=98641f606c65e71cca89f9a694e2796b5a21cbd8;hp=51e200b56c3a47628c9f6e989420c846a025d295;hpb=10a6ecd25e80ad20ebf67b311125411d51e78bc0;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index 51e200b..c4046bc 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -1,162 +1,320 @@ -use Unicode::UCD; +#!perl -w +BEGIN { + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + chdir 't' if -d 't'; + @INC = '../lib'; + @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself + require Config; import Config; + if ($Config{'extensions'} !~ /\bStorable\b/) { + print "1..0 # Skip: Storable was not built; Unicode::UCD uses Storable\n"; + exit 0; + } +} -use Test; use strict; +use Unicode::UCD; +use Test::More; -BEGIN { plan tests => 103 }; +BEGIN { plan tests => 179 }; use Unicode::UCD 'charinfo'; -my %charinfo; - -%charinfo = charinfo(0x41); - -ok($charinfo{code}, '0041'); -ok($charinfo{name}, 'LATIN CAPITAL LETTER A'); -ok($charinfo{category}, 'Lu'); -ok($charinfo{combining}, '0'); -ok($charinfo{bidi}, 'L'); -ok($charinfo{decomposition}, ''); -ok($charinfo{decimal}, ''); -ok($charinfo{digit}, ''); -ok($charinfo{numeric}, ''); -ok($charinfo{mirrored}, 'N'); -ok($charinfo{unicode10}, ''); -ok($charinfo{comment}, ''); -ok($charinfo{upper}, ''); -ok($charinfo{lower}, '0061'); -ok($charinfo{title}, ''); -ok($charinfo{block}, 'Basic Latin'); -ok($charinfo{script}, 'LATIN'); - -%charinfo = charinfo(0x100); - -ok($charinfo{code}, '0100'); -ok($charinfo{name}, 'LATIN CAPITAL LETTER A WITH MACRON'); -ok($charinfo{category}, 'Lu'); -ok($charinfo{combining}, '0'); -ok($charinfo{bidi}, 'L'); -ok($charinfo{decomposition}, '0041 0304'); -ok($charinfo{decimal}, ''); -ok($charinfo{digit}, ''); -ok($charinfo{numeric}, ''); -ok($charinfo{mirrored}, 'N'); -ok($charinfo{unicode10}, 'LATIN CAPITAL LETTER A MACRON'); -ok($charinfo{comment}, ''); -ok($charinfo{upper}, ''); -ok($charinfo{lower}, '0101'); -ok($charinfo{title}, ''); -ok($charinfo{block}, 'Latin Extended-A'); -ok($charinfo{script}, 'LATIN'); +my $charinfo; + +$charinfo = charinfo(0x41); + +is($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A'); +is($charinfo->{name}, 'LATIN CAPITAL LETTER A'); +is($charinfo->{category}, 'Lu'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'L'); +is($charinfo->{decomposition}, ''); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, ''); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, ''); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, '0061'); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Basic Latin'); +is($charinfo->{script}, 'Latin'); + +$charinfo = charinfo(0x100); + +is($charinfo->{code}, '0100', 'LATIN CAPITAL LETTER A WITH MACRON'); +is($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON'); +is($charinfo->{category}, 'Lu'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'L'); +is($charinfo->{decomposition}, '0041 0304'); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, ''); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON'); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, '0101'); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Latin Extended-A'); +is($charinfo->{script}, 'Latin'); # 0x0590 is in the Hebrew block but unused. -%charinfo = charinfo(0x590); - -ok($charinfo{code}, undef); -ok($charinfo{name}, undef); -ok($charinfo{category}, undef); -ok($charinfo{combining}, undef); -ok($charinfo{bidi}, undef); -ok($charinfo{decomposition}, undef); -ok($charinfo{decimal}, undef); -ok($charinfo{digit}, undef); -ok($charinfo{numeric}, undef); -ok($charinfo{mirrored}, undef); -ok($charinfo{unicode10}, undef); -ok($charinfo{comment}, undef); -ok($charinfo{upper}, undef); -ok($charinfo{lower}, undef); -ok($charinfo{title}, undef); -ok($charinfo{block}, undef); -ok($charinfo{script}, undef); +$charinfo = charinfo(0x590); + +is($charinfo->{code}, undef, '0x0590 - unused Hebrew'); +is($charinfo->{name}, undef); +is($charinfo->{category}, undef); +is($charinfo->{combining}, undef); +is($charinfo->{bidi}, undef); +is($charinfo->{decomposition}, undef); +is($charinfo->{decimal}, undef); +is($charinfo->{digit}, undef); +is($charinfo->{numeric}, undef); +is($charinfo->{mirrored}, undef); +is($charinfo->{unicode10}, undef); +is($charinfo->{comment}, undef); +is($charinfo->{upper}, undef); +is($charinfo->{lower}, undef); +is($charinfo->{title}, undef); +is($charinfo->{block}, undef); +is($charinfo->{script}, undef); # 0x05d0 is in the Hebrew block and used. -%charinfo = charinfo(0x5d0); - -ok($charinfo{code}, '05D0'); -ok($charinfo{name}, 'HEBREW LETTER ALEF'); -ok($charinfo{category}, 'Lo'); -ok($charinfo{combining}, '0'); -ok($charinfo{bidi}, 'R'); -ok($charinfo{decomposition}, ''); -ok($charinfo{decimal}, ''); -ok($charinfo{digit}, ''); -ok($charinfo{numeric}, ''); -ok($charinfo{mirrored}, 'N'); -ok($charinfo{unicode10}, ''); -ok($charinfo{comment}, ''); -ok($charinfo{upper}, ''); -ok($charinfo{lower}, ''); -ok($charinfo{title}, ''); -ok($charinfo{block}, 'Hebrew'); -ok($charinfo{script}, 'Hebrew'); +$charinfo = charinfo(0x5d0); + +is($charinfo->{code}, '05D0', '05D0 - used Hebrew'); +is($charinfo->{name}, 'HEBREW LETTER ALEF'); +is($charinfo->{category}, 'Lo'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'R'); +is($charinfo->{decomposition}, ''); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, ''); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, ''); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, ''); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Hebrew'); +is($charinfo->{script}, 'Hebrew'); + +# An open syllable in Hangul. + +$charinfo = charinfo(0xAC00); + +is($charinfo->{code}, 'AC00', 'HANGUL SYLLABLE-AC00'); +is($charinfo->{name}, 'HANGUL SYLLABLE-AC00'); +is($charinfo->{category}, 'Lo'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'L'); +is($charinfo->{decomposition}, undef); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, ''); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, ''); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, ''); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Hangul Syllables'); +is($charinfo->{script}, 'Hangul'); + +# A closed syllable in Hangul. + +$charinfo = charinfo(0xAE00); + +is($charinfo->{code}, 'AE00', 'HANGUL SYLLABLE-AE00'); +is($charinfo->{name}, 'HANGUL SYLLABLE-AE00'); +is($charinfo->{category}, 'Lo'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'L'); +is($charinfo->{decomposition}, undef); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, ''); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, ''); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, ''); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Hangul Syllables'); +is($charinfo->{script}, 'Hangul'); + +$charinfo = charinfo(0x1D400); + +is($charinfo->{code}, '1D400', 'MATHEMATICAL BOLD CAPITAL A'); +is($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A'); +is($charinfo->{category}, 'Lu'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'L'); +is($charinfo->{decomposition}, ' 0041'); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, ''); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, ''); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, ''); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Mathematical Alphanumeric Symbols'); +is($charinfo->{script}, 'Common'); use Unicode::UCD qw(charblock charscript); # 0x0590 is in the Hebrew block but unused. -ok(charblock(0x590), 'Hebrew'); -ok(charscript(0x590), undef); - -%charinfo = charinfo(0xbe); - -ok($charinfo{code}, '00BE'); -ok($charinfo{name}, 'VULGAR FRACTION THREE QUARTERS'); -ok($charinfo{category}, 'No'); -ok($charinfo{combining}, '0'); -ok($charinfo{bidi}, 'ON'); -ok($charinfo{decomposition}, ' 0033 2044 0034'); -ok($charinfo{decimal}, ''); -ok($charinfo{digit}, ''); -ok($charinfo{numeric}, '3/4'); -ok($charinfo{mirrored}, 'N'); -ok($charinfo{unicode10}, 'FRACTION THREE QUARTERS'); -ok($charinfo{comment}, ''); -ok($charinfo{upper}, ''); -ok($charinfo{lower}, ''); -ok($charinfo{title}, ''); -ok($charinfo{block}, 'Latin-1 Supplement'); -ok($charinfo{script}, undef); +is(charblock(0x590), 'Hebrew', '0x0590 - Hebrew unused charblock'); +is(charscript(0x590), undef, '0x0590 - Hebrew unused charscript'); + +$charinfo = charinfo(0xbe); + +is($charinfo->{code}, '00BE', 'VULGAR FRACTION THREE QUARTERS'); +is($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS'); +is($charinfo->{category}, 'No'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'ON'); +is($charinfo->{decomposition}, ' 0033 2044 0034'); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, '3/4'); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, 'FRACTION THREE QUARTERS'); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, ''); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Latin-1 Supplement'); +is($charinfo->{script}, 'Common'); use Unicode::UCD qw(charblocks charscripts); -my %charblocks = charblocks(); +my $charblocks = charblocks(); -ok(exists $charblocks{Thai}); -ok($charblocks{Thai}->[0]->[0], hex('0e00')); -ok(!exists $charblocks{PigLatin}); +ok(exists $charblocks->{Thai}, 'Thai charblock exists'); +is($charblocks->{Thai}->[0]->[0], hex('0e00')); +ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist'); -my %charscripts = charscripts(); +my $charscripts = charscripts(); -ok(exists $charscripts{Armenian}); -ok($charscripts{Armenian}->[0]->[0], hex('0531')); -ok(!exists $charscripts{PigLatin}); +ok(exists $charscripts->{Armenian}, 'Armenian charscript exists'); +is($charscripts->{Armenian}->[0]->[0], hex('0531')); +ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist'); my $charscript; $charscript = charscript("12ab"); -ok($charscript, 'Ethiopic'); +is($charscript, 'Ethiopic', 'Ethiopic charscript'); $charscript = charscript("0x12ab"); -ok($charscript, 'Ethiopic'); +is($charscript, 'Ethiopic'); $charscript = charscript("U+12ab"); -ok($charscript, 'Ethiopic'); +is($charscript, 'Ethiopic'); my $ranges; $ranges = charscript('Ogham'); -ok($ranges->[0]->[0], hex('1681')); -ok($ranges->[0]->[1], hex('169a')); +is($ranges->[1]->[0], hex('1681'), 'Ogham charscript'); +is($ranges->[1]->[1], hex('169a')); use Unicode::UCD qw(charinrange); $ranges = charscript('Cherokee'); -ok(!charinrange($ranges, "139f")); +ok(!charinrange($ranges, "139f"), 'Cherokee charscript'); ok( charinrange($ranges, "13a0")); ok( charinrange($ranges, "13f4")); ok(!charinrange($ranges, "13f5")); -ok(Unicode::UCD::UnicodeVersion, 3.1); +is(Unicode::UCD::UnicodeVersion, '4.0.1', 'UnicodeVersion'); + +use Unicode::UCD qw(compexcl); + +ok(!compexcl(0x0100), 'compexcl'); +ok( compexcl(0x0958)); + +use Unicode::UCD qw(casefold); + +my $casefold; + +$casefold = casefold(0x41); + +ok($casefold->{code} eq '0041' && + $casefold->{status} eq 'C' && + $casefold->{mapping} eq '0061', 'casefold 0x41'); + +$casefold = casefold(0xdf); + +ok($casefold->{code} eq '00DF' && + $casefold->{status} eq 'F' && + $casefold->{mapping} eq '0073 0073', 'casefold 0xDF'); + +ok(!casefold(0x20)); + +use Unicode::UCD qw(casespec); + +my $casespec; + +ok(!casespec(0x41)); + +$casespec = casespec(0xdf); + +ok($casespec->{code} eq '00DF' && + $casespec->{lower} eq '00DF' && + $casespec->{title} eq '0053 0073' && + $casespec->{upper} eq '0053 0053' && + !defined $casespec->{condition}, 'casespec 0xDF'); + +$casespec = casespec(0x307); + +ok($casespec->{az}->{code} eq '0307' && + !defined $casespec->{az}->{lower} && + $casespec->{az}->{title} eq '0307' && + $casespec->{az}->{upper} eq '0307' && + $casespec->{az}->{condition} eq 'az After_I', + 'casespec 0x307'); + +# perl #7305 UnicodeCD::compexcl is weird + +for (1) {my $a=compexcl $_} +ok(1, 'compexcl read-only $_: perl #7305'); +grep {compexcl $_} %{{1=>2}}; +ok(1, 'compexcl read-only hash: perl #7305'); + +is(Unicode::UCD::_getcode('123'), 123, "_getcode(123)"); +is(Unicode::UCD::_getcode('0123'), 0x123, "_getcode(0123)"); +is(Unicode::UCD::_getcode('0x123'), 0x123, "_getcode(0x123)"); +is(Unicode::UCD::_getcode('0X123'), 0x123, "_getcode(0X123)"); +is(Unicode::UCD::_getcode('U+123'), 0x123, "_getcode(U+123)"); +is(Unicode::UCD::_getcode('u+123'), 0x123, "_getcode(u+123)"); +is(Unicode::UCD::_getcode('U+1234'), 0x1234, "_getcode(U+1234)"); +is(Unicode::UCD::_getcode('U+12345'), 0x12345, "_getcode(U+12345)"); +is(Unicode::UCD::_getcode('123x'), undef, "_getcode(123x)"); +is(Unicode::UCD::_getcode('x123'), undef, "_getcode(x123)"); +is(Unicode::UCD::_getcode('0x123x'), undef, "_getcode(x123)"); +is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)"); + +{ + my $r1 = charscript('Latin'); + my $n1 = @$r1; + is($n1, 26, "26 ranges in Latin script (Unicode 4.0.0)"); + shift @$r1 while @$r1; + my $r2 = charscript('Latin'); + is(@$r2, $n1, "modifying results should not mess up internal caches"); +} + +{ + is(charinfo(0xdeadbeef), undef, "[perl #23273] warnings in Unicode::UCD"); +}