/Compress/ modules are at version 2.021. Remove vestigal MAPs and comments.
[p5sagit/p5-mst-13.2.git] / ext / Unicode-Collate / t / override.t
CommitLineData
3756e7ca 1BEGIN {
2 unless ("A" eq pack('U', 0x41)) {
3 print "1..0 # Unicode::Collate " .
4 "cannot stringify a Unicode code point\n";
5 exit 0;
6 }
7 if ($ENV{PERL_CORE}) {
8 chdir('t') if -d 't';
9 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
10 }
11}
12
13use Test;
14BEGIN { plan tests => 76 };
15
16use strict;
17use warnings;
18use Unicode::Collate;
19
20ok(1);
21
22##### 2..6
23
24my $all_undef_8 = Unicode::Collate->new(
25 table => undef,
26 normalization => undef,
27 overrideCJK => undef,
28 overrideHangul => undef,
29 UCA_Version => 8,
30);
31
32# All in the Unicode code point order.
33# No hangul decomposition.
34
35ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
36ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
37ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
38ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
39ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
40
41
42##### 7..11
43
44my $all_undef_9 = Unicode::Collate->new(
45 table => undef,
46 normalization => undef,
47 overrideCJK => undef,
48 overrideHangul => undef,
49 UCA_Version => 9,
50);
51
52# CJK Ideo. < CJK ext A/B < Others.
53# No hangul decomposition.
54
55ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
56ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
57ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
58ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
59ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned
60
61##### 12..16
62
63my $ignoreHangul = Unicode::Collate->new(
64 table => undef,
65 normalization => undef,
66 overrideHangul => sub {()},
67 entry => <<'ENTRIES',
68AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL
69ENTRIES
70);
71
72# All Hangul Syllables except U+AE00 are ignored.
73
74ok($ignoreHangul->eq("\x{AC00}", ""));
75ok($ignoreHangul->lt("\x{AC00}", "\0"));
76ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
77ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
78ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
79
80
81my $ignoreCJK = Unicode::Collate->new(
82 table => undef,
83 normalization => undef,
84 overrideCJK => sub {()},
85 entry => <<'ENTRIES',
865B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
87ENTRIES
88);
89
90# All CJK Unified Ideographs except U+5B57 are ignored.
91
92##### 17..21
93ok($ignoreCJK->eq("\x{4E00}", ""));
94ok($ignoreCJK->lt("\x{4E00}", "\0"));
95ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK.
96ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK.
97ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned.
98
99##### 22..29
100ok($ignoreCJK->eq("\x{3400}", ""));
101ok($ignoreCJK->eq("\x{4DB5}", ""));
102ok($ignoreCJK->eq("\x{9FA5}", ""));
103ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
104ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
105ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
106ok($ignoreCJK->eq("\x{20000}", ""));
107ok($ignoreCJK->eq("\x{2A6D6}", ""));
108
109##### 30..37
110$ignoreCJK->change(UCA_Version => 9);
111ok($ignoreCJK->eq("\x{3400}", ""));
112ok($ignoreCJK->eq("\x{4DB5}", ""));
113ok($ignoreCJK->eq("\x{9FA5}", ""));
114ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
115ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
116ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
117ok($ignoreCJK->eq("\x{20000}", ""));
118ok($ignoreCJK->eq("\x{2A6D6}", ""));
119
120##### 38..45
121$ignoreCJK->change(UCA_Version => 8);
122ok($ignoreCJK->eq("\x{3400}", ""));
123ok($ignoreCJK->eq("\x{4DB5}", ""));
124ok($ignoreCJK->eq("\x{9FA5}", ""));
125ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
126ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
127ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
128ok($ignoreCJK->eq("\x{20000}", ""));
129ok($ignoreCJK->eq("\x{2A6D6}", ""));
130
131##### 46..53
132$ignoreCJK->change(UCA_Version => 14);
133ok($ignoreCJK->eq("\x{3400}", ""));
134ok($ignoreCJK->eq("\x{4DB5}", ""));
135ok($ignoreCJK->eq("\x{9FA5}", ""));
136ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
137ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
138ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
139ok($ignoreCJK->eq("\x{20000}", ""));
140ok($ignoreCJK->eq("\x{2A6D6}", ""));
141
142##### 54..76
143my $overCJK = Unicode::Collate->new(
144 table => undef,
145 normalization => undef,
146 entry => <<'ENTRIES',
1470061 ; [.0101.0020.0002.0061] # latin a
1480041 ; [.0101.0020.0008.0041] # LATIN A
1494E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03.
150ENTRIES
151 overrideCJK => sub {
152 my $u = 0xFFFF - $_[0]; # reversed
153 [$u, 0x20, 0x2, $u];
154 },
155);
156
157ok($overCJK->lt("a", "A")); # diff. at level 3.
158ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2.
159ok($overCJK->lt("A\x{4E03}", "A\x{4E00}"));
160ok($overCJK->lt("A\x{4E03}", "a\x{4E00}"));
161ok($overCJK->lt("a\x{4E03}", "A\x{4E00}"));
162
163ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
164ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
165ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}"));
166ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}"));
167ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
168ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
169
170$overCJK->change(UCA_Version => 9);
171
172ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
173ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
174ok($overCJK->lt("a\x{9FA5}", "A\x{9FA6}"));
175ok($overCJK->lt("a\x{9FA6}", "A\x{9FBB}"));
176ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
177ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
178
179$overCJK->change(UCA_Version => 14);
180
181ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
182ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
183ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}"));
184ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}"));
185ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
186ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
187