Upgrade to Unicode::Normalize 0.21 and Unicode::Collate 0.24,
[p5sagit/p5-mst-13.2.git] / lib / Unicode / Collate / t / test.t
CommitLineData
45394607 1
4a2e806c 2BEGIN {
9f1f04a1 3 unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
4 print "1..0 # Unicode::Collate " .
5 "cannot stringify a Unicode code point\n";
4a2e806c 6 exit 0;
7 }
8}
9
0116f5dc 10BEGIN {
11 if ($ENV{PERL_CORE}) {
12 chdir('t') if -d 't';
63c6dcc1 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
0116f5dc 14 }
15}
16
45394607 17use Test;
4d36a948 18BEGIN { plan tests => 194 };
45394607 19use Unicode::Collate;
45394607 20
4d36a948 21our $IsEBCDIC = ord("A") != 0x41;
22
45394607 23#########################
24
0116f5dc 25ok(1); # If we made it this far, we're ok.
26
27my $UCA_Version = "9";
3164dd77 28
29ok(Unicode::Collate::UCA_Version, $UCA_Version);
30ok(Unicode::Collate->UCA_Version, $UCA_Version);
31
5398038e 32my $Collator = Unicode::Collate->new(
45394607 33 table => 'keys.txt',
34 normalization => undef,
35);
36
5398038e 37ok(ref $Collator, "Unicode::Collate");
45394607 38
3164dd77 39ok($Collator->UCA_Version, $UCA_Version);
40ok($Collator->UCA_Version(), $UCA_Version);
41
45394607 42ok(
5398038e 43 join(':', $Collator->sort(
45394607 44 qw/ lib strict Carp ExtUtils CGI Time warnings Math overload Pod CPAN /
45 ) ),
46 join(':',
47 qw/ Carp CGI CPAN ExtUtils lib Math overload Pod strict Time warnings /
48 ),
49);
50
0116f5dc 51ok($Collator->cmp("", ""), 0);
52ok($Collator->eq("", ""));
53ok($Collator->cmp("", "perl"), -1);
54
55##############
56
9f1f04a1 57sub _pack_U { Unicode::Collate::pack_U(@_) }
58sub _unpack_U { Unicode::Collate::unpack_U(@_) }
59
60my $A_acute = _pack_U(0xC1);
61my $a_acute = _pack_U(0xE1);
62my $acute = _pack_U(0x0301);
45394607 63
caffd4cf 64ok($Collator->cmp("A$acute", $A_acute), 0); # @version 3.1.1 (prev: -1)
0116f5dc 65ok($Collator->cmp($a_acute, $A_acute), -1);
4d36a948 66ok($Collator->eq("A\cA$acute", $A_acute)); # UCA v9. \cA is invariant.
0116f5dc 67
68my %old_level = $Collator->change(level => 1);
69ok($Collator->eq("A$acute", $A_acute));
70ok($Collator->eq("A", $A_acute));
71
72ok($Collator->change(level => 2)->eq($a_acute, $A_acute));
73ok($Collator->lt("A", $A_acute));
74
75ok($Collator->change(%old_level)->lt("A", $A_acute));
76ok($Collator->lt("A", $A_acute));
77ok($Collator->lt("A", $a_acute));
78ok($Collator->lt($a_acute, $A_acute));
45394607 79
809c7673 80##############
81
82eval { require Unicode::Normalize };
45394607 83
4d36a948 84if (!$@ && !$IsEBCDIC) {
45394607 85 my $NFD = Unicode::Collate->new(
caffd4cf 86 table => undef,
905aa9f0 87 entry => <<'ENTRIES',
caffd4cf 880430 ; [.0CB5.0020.0002.0430] # CYRILLIC SMALL LETTER A
890410 ; [.0CB5.0020.0008.0410] # CYRILLIC CAPITAL LETTER A
9004D3 ; [.0CBD.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
910430 0308 ; [.0CBD.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
9204D2 ; [.0CBD.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
930410 0308 ; [.0CBD.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
940430 3099 ; [.0CBE.0020.0002.04D3] # A WITH KATAKANA VOICED
950430 3099 0308 ; [.0CBF.0020.0002.04D3] # A WITH KATAKANA VOICED, DIAERESIS
905aa9f0 96ENTRIES
45394607 97 );
905aa9f0 98 ok($NFD->eq("\x{4D3}\x{325}", "\x{430}\x{308}\x{325}"));
99 ok($NFD->lt("\x{430}\x{308}A", "\x{430}\x{308}B"));
100 ok($NFD->lt("\x{430}\x{3099}B", "\x{430}\x{308}\x{3099}A"));
101 ok($NFD->eq("\x{0430}\x{3099}\x{309A}\x{0308}",
102 "\x{0430}\x{309A}\x{3099}\x{0308}") );
45394607 103}
809c7673 104else {
d16e9e3d 105 ok(1);
905aa9f0 106 ok(1);
107 ok(1);
108 ok(1);
45394607 109}
110
809c7673 111##############
112
113my $trad = Unicode::Collate->new(
45394607 114 table => 'keys.txt',
115 normalization => undef,
809c7673 116 ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
117 level => 4,
118 entry => << 'ENTRIES',
caffd4cf 119 0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish
120 0043 0068 ; [.0A3F.0020.0008.0043] # "Ch" in traditional Spanish
45394607 121ENTRIES
122);
caffd4cf 123# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C
124# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D
4d36a948 125# Deutsch sz is included in 'keys.txt';
45394607 126
127ok(
809c7673 128 join(':', $trad->sort( qw/ acha aca ada acia acka / ) ),
129 join(':', qw/ aca acia acka acha ada / ),
45394607 130);
131
132ok(
809c7673 133 join(':', $Collator->sort( qw/ acha aca ada acia acka / ) ),
134 join(':', qw/ aca acha acia acka ada / ),
45394607 135);
caffd4cf 136ok($trad->eq("ocho", "oc\cAho")); # UCA v9
4d36a948 137ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9
45394607 138
45394607 139my $hiragana = "\x{3042}\x{3044}";
140my $katakana = "\x{30A2}\x{30A4}";
141
809c7673 142# HIRAGANA and KATAKANA are ignorable via ignoreName
143ok($trad->eq($hiragana, ""));
144ok($trad->eq("", $katakana));
145ok($trad->eq($hiragana, $katakana));
146ok($trad->eq($katakana, $hiragana));
147
148##############
149
0116f5dc 150$Collator->change(level => 2);
809c7673 151
0116f5dc 152ok($Collator->{level}, 2);
5398038e 153
154ok( $Collator->cmp("ABC","abc"), 0);
155ok( $Collator->eq("ABC","abc") );
156ok( $Collator->le("ABC","abc") );
157ok( $Collator->cmp($hiragana, $katakana), 0);
158ok( $Collator->eq($hiragana, $katakana) );
159ok( $Collator->ge($hiragana, $katakana) );
45394607 160
5398038e 161# hangul
162ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") );
163ok( $Collator->eq("a\x{AE00}b", "a\x{1100}\x{1173}\x{11AF}b") );
164ok( $Collator->gt("a\x{AE00}b", "a\x{1100}\x{1173}b\x{11AF}") );
165ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") );
166ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") );
167ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana
45394607 168
0116f5dc 169$Collator->change(%old_level, katakana_before_hiragana => 1);
45394607 170
0116f5dc 171ok($Collator->{level}, 4);
45394607 172
5398038e 173ok( $Collator->cmp("abc", "ABC"), -1);
174ok( $Collator->ne("abc", "ABC") );
175ok( $Collator->lt("abc", "ABC") );
176ok( $Collator->le("abc", "ABC") );
177ok( $Collator->cmp($hiragana, $katakana), 1);
178ok( $Collator->ne($hiragana, $katakana) );
179ok( $Collator->gt($hiragana, $katakana) );
180ok( $Collator->ge($hiragana, $katakana) );
45394607 181
0116f5dc 182$Collator->change(upper_before_lower => 1);
45394607 183
5398038e 184ok( $Collator->cmp("abc", "ABC"), 1);
185ok( $Collator->ge("abc", "ABC"), 1);
186ok( $Collator->gt("abc", "ABC"), 1);
187ok( $Collator->cmp($hiragana, $katakana), 1);
188ok( $Collator->ge($hiragana, $katakana), 1);
189ok( $Collator->gt($hiragana, $katakana), 1);
45394607 190
0116f5dc 191$Collator->change(katakana_before_hiragana => 0);
45394607 192
5398038e 193ok( $Collator->cmp("abc", "ABC"), 1);
194ok( $Collator->cmp($hiragana, $katakana), -1);
45394607 195
0116f5dc 196$Collator->change(upper_before_lower => 0);
45394607 197
5398038e 198ok( $Collator->cmp("abc", "ABC"), -1);
199ok( $Collator->le("abc", "ABC") );
200ok( $Collator->cmp($hiragana, $katakana), -1);
201ok( $Collator->lt($hiragana, $katakana) );
45394607 202
809c7673 203##############
204
205my $ignoreAE = Unicode::Collate->new(
206 table => 'keys.txt',
207 normalization => undef,
208 ignoreChar => qr/^[aAeE]$/,
209);
210
211ok($ignoreAE->eq("element","lament"));
212ok($ignoreAE->eq("Perl","ePrl"));
213
214##############
215
216my $onlyABC = Unicode::Collate->new(
217 table => undef,
327745dc 218 normalization => undef,
809c7673 219 entry => << 'ENTRIES',
2200061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A
2210041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
2220062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B
2230042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B
2240063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C
2250043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C
226ENTRIES
227);
228
229ok(
230 join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ),
231 join(':', qw/ A aB Ab ABA BAC cAc cc / ),
232);
233
234##############
235
236my $undefAE = Unicode::Collate->new(
45394607 237 table => 'keys.txt',
238 normalization => undef,
809c7673 239 undefChar => qr/^[aAeE]$/,
45394607 240);
241
809c7673 242ok($undefAE ->gt("edge","fog"));
243ok($Collator->lt("edge","fog"));
244ok($undefAE ->gt("lake","like"));
245ok($Collator->lt("lake","like"));
246
247##############
45394607 248
809c7673 249# Table is undefined, then no entry is defined.
250
251my $undef_table = Unicode::Collate->new(
252 table => undef,
253 normalization => undef,
254 level => 1,
255);
256
257# in the Unicode code point order
258ok($undef_table->lt('', 'A'));
259ok($undef_table->lt('ABC', 'B'));
260
261# Hangul should be decomposed (even w/o Unicode::Normalize).
262
263ok($undef_table->lt("Perl", "\x{AC00}"));
264ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}"));
265ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}"));
266ok($undef_table->lt("\x{AE00}", "\x{3042}"));
267 # U+AC00: Hangul GA
268 # U+AE00: Hangul GEUL
269 # U+3042: Hiragana A
270
271# Weight for CJK Ideographs is defined, though.
272
273ok($undef_table->lt("", "\x{4E00}"));
274ok($undef_table->lt("\x{4E8C}","ABC"));
275ok($undef_table->lt("\x{4E00}","\x{3042}"));
276ok($undef_table->lt("\x{4E00}","\x{4E8C}"));
277 # U+4E00: Ideograph "ONE"
278 # U+4E8C: Ideograph "TWO"
279
280
281##############
282
283my $few_entries = Unicode::Collate->new(
284 entry => <<'ENTRIES',
2850050 ; [.0101.0020.0002.0050] # P
2860045 ; [.0102.0020.0002.0045] # E
2870052 ; [.0103.0020.0002.0052] # R
288004C ; [.0104.0020.0002.004C] # L
2891100 ; [.0105.0020.0002.1100] # Hangul Jamo initial G
2901175 ; [.0106.0020.0002.1175] # Hangul Jamo middle I
2915B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
292ENTRIES
293 table => undef,
294 normalization => undef,
295);
296
297# defined before undefined
298
299my $sortABC = join '',
300 $few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ ");
301
302ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ");
303
304ok($few_entries->lt('E', 'D'));
305ok($few_entries->lt("\x{5B57}", "\x{4E00}"));
306ok($few_entries->lt("\x{AE30}", "\x{AC00}"));
307
308# Hangul must be decomposed.
309
310ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}"));
311
312##############
313
0116f5dc 314my $all_undef_8 = Unicode::Collate->new(
809c7673 315 table => undef,
316 normalization => undef,
317 overrideCJK => undef,
318 overrideHangul => undef,
0116f5dc 319 UCA_Version => 8,
809c7673 320);
321
322# All in the Unicode code point order.
323# No hangul decomposition.
324
0116f5dc 325ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
326ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
327ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
328ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
329ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
330
331##############
332
333my $all_undef_9 = Unicode::Collate->new(
334 table => undef,
335 normalization => undef,
336 overrideCJK => undef,
337 overrideHangul => undef,
338 UCA_Version => 9,
339);
340
341# CJK Ideo. < CJK ext A/B < Others.
342# No hangul decomposition.
343
344ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
345ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
346ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
347ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
caffd4cf 348ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned
809c7673 349
350##############
351
352my $ignoreCJK = Unicode::Collate->new(
353 table => undef,
354 normalization => undef,
355 overrideCJK => sub {()},
356 entry => <<'ENTRIES',
3575B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
358ENTRIES
359);
360
361# All CJK Unified Ideographs except U+5B57 are ignored.
362
363ok($ignoreCJK->eq("\x{4E00}", ""));
364ok($ignoreCJK->lt("\x{4E00}", "\0"));
365ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK.
366ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK.
367ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned.
368
369##############
370
371my $ignoreHangul = Unicode::Collate->new(
372 table => undef,
373 normalization => undef,
374 overrideHangul => sub {()},
375 entry => <<'ENTRIES',
376AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL
377ENTRIES
378);
379
380# All Hangul Syllables except U+AE00 are ignored.
381
382ok($ignoreHangul->eq("\x{AC00}", ""));
383ok($ignoreHangul->lt("\x{AC00}", "\0"));
384ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
385ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
386ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
387
388##############
389
0116f5dc 390my %origAlter = $Collator->change(alternate => 'Blanked');
809c7673 391
0116f5dc 392ok($Collator->lt("death", "de luge"));
393ok($Collator->lt("de luge", "de-luge"));
394ok($Collator->lt("de-luge", "deluge"));
395ok($Collator->lt("deluge", "de\x{2010}luge"));
396ok($Collator->lt("deluge", "de Luge"));
809c7673 397
0116f5dc 398$Collator->change(alternate => 'Non-ignorable');
809c7673 399
0116f5dc 400ok($Collator->lt("de luge", "de Luge"));
401ok($Collator->lt("de Luge", "de-luge"));
402ok($Collator->lt("de-Luge", "de\x{2010}luge"));
403ok($Collator->lt("de-luge", "death"));
404ok($Collator->lt("death", "deluge"));
809c7673 405
0116f5dc 406$Collator->change(alternate => 'Shifted');
809c7673 407
0116f5dc 408ok($Collator->lt("death", "de luge"));
409ok($Collator->lt("de luge", "de-luge"));
410ok($Collator->lt("de-luge", "deluge"));
411ok($Collator->lt("deluge", "de Luge"));
412ok($Collator->lt("de Luge", "deLuge"));
809c7673 413
0116f5dc 414$Collator->change(alternate => 'Shift-Trimmed');
809c7673 415
0116f5dc 416ok($Collator->lt("death", "deluge"));
417ok($Collator->lt("deluge", "de luge"));
418ok($Collator->lt("de luge", "de-luge"));
419ok($Collator->lt("de-luge", "deLuge"));
420ok($Collator->lt("deLuge", "de Luge"));
809c7673 421
0116f5dc 422$Collator->change(%origAlter);
809c7673 423
0116f5dc 424ok($Collator->{alternate}, 'shifted');
809c7673 425
426##############
427
428my $overCJK = Unicode::Collate->new(
429 table => undef,
430 normalization => undef,
431 entry => <<'ENTRIES',
4320061 ; [.0101.0020.0002.0061] # latin a
4330041 ; [.0101.0020.0008.0041] # LATIN A
4344E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03.
435ENTRIES
436 overrideCJK => sub {
437 my $u = 0xFFFF - $_[0]; # reversed
438 [$u, 0x20, 0x2, $u];
439 },
440);
441
442ok($overCJK->lt("a", "A")); # diff. at level 3.
443ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2.
444ok($overCJK->lt("A\x{4E03}", "A\x{4E00}"));
445ok($overCJK->lt("A\x{4E03}", "a\x{4E00}"));
446ok($overCJK->lt("a\x{4E03}", "A\x{4E00}"));
447
448##############
449
0116f5dc 450# rearrange : 0x0E40..0x0E44, 0x0EC0..0x0EC4 (default)
451
452my %old_rearrange = $Collator->change(rearrange => undef);
453
454ok($Collator->gt("\x{0E41}A", "\x{0E40}B"));
455ok($Collator->gt("A\x{0E41}A", "A\x{0E40}B"));
456
4d36a948 457$Collator->change(rearrange => [ 0x61 ]);
458 # U+0061, 'a': This is a Unicode value, never a native value.
809c7673 459
0116f5dc 460ok($Collator->gt("ab", "AB")); # as 'ba' > 'AB'
461
462$Collator->change(%old_rearrange);
463
464ok($Collator->lt("ab", "AB"));
809c7673 465ok($Collator->lt("\x{0E40}", "\x{0E41}"));
466ok($Collator->lt("\x{0E40}A", "\x{0E41}B"));
467ok($Collator->lt("\x{0E41}A", "\x{0E40}B"));
468ok($Collator->lt("A\x{0E41}A", "A\x{0E40}B"));
469
0116f5dc 470ok($all_undef_8->lt("\x{0E40}", "\x{0E41}"));
471ok($all_undef_8->lt("\x{0E40}A", "\x{0E41}B"));
472ok($all_undef_8->lt("\x{0E41}A", "\x{0E40}B"));
473ok($all_undef_8->lt("A\x{0E41}A", "A\x{0E40}B"));
809c7673 474
475##############
476
477my $no_rearrange = Unicode::Collate->new(
478 table => undef,
479 normalization => undef,
480 rearrange => [],
481);
482
483ok($no_rearrange->lt("A", "B"));
484ok($no_rearrange->lt("\x{0E40}", "\x{0E41}"));
485ok($no_rearrange->lt("\x{0E40}A", "\x{0E41}B"));
486ok($no_rearrange->gt("\x{0E41}A", "\x{0E40}B"));
487ok($no_rearrange->gt("A\x{0E41}A", "A\x{0E40}B"));
488
489##############
490
809c7673 491my $undef_rearrange = Unicode::Collate->new(
492 table => undef,
493 normalization => undef,
494 rearrange => undef,
495);
496
497ok($undef_rearrange->lt("A", "B"));
498ok($undef_rearrange->lt("\x{0E40}", "\x{0E41}"));
499ok($undef_rearrange->lt("\x{0E40}A", "\x{0E41}B"));
500ok($undef_rearrange->gt("\x{0E41}A", "\x{0E40}B"));
501ok($undef_rearrange->gt("A\x{0E41}A", "A\x{0E40}B"));
502
503##############
504
505my $dropArticles = Unicode::Collate->new(
506 table => "keys.txt",
507 normalization => undef,
508 preprocess => sub {
509 my $string = shift;
510 $string =~ s/\b(?:an?|the)\s+//ig;
511 $string;
512 },
513);
514
515ok($dropArticles->eq("camel", "a camel"));
516ok($dropArticles->eq("Perl", "The Perl"));
517ok($dropArticles->lt("the pen", "a pencil"));
518ok($Collator->lt("Perl", "The Perl"));
519ok($Collator->gt("the pen", "a pencil"));
520
521##############
522
523my $backLevel1 = Unicode::Collate->new(
524 table => undef,
525 normalization => undef,
526 backwards => [ 1 ],
527);
528
529# all strings are reversed at level 1.
530
531ok($backLevel1->gt("AB", "BA"));
532ok($backLevel1->gt("\x{3042}\x{3044}", "\x{3044}\x{3042}"));
533
534##############
535
536my $backLevel2 = Unicode::Collate->new(
537 table => "keys.txt",
538 normalization => undef,
539 undefName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
540 backwards => 2,
541);
542
543ok($backLevel2->gt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}"));
544ok($backLevel2->gt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}"));
545ok($Collator ->lt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}"));
546ok($Collator ->lt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}"));
547
3164dd77 548# HIRAGANA and KATAKANA are made undefined via undefName.
549# So they are after CJK Unified Ideographs.
809c7673 550
551ok($backLevel2->lt("\x{4E00}", $hiragana));
552ok($backLevel2->lt("\x{4E03}", $katakana));
553ok($Collator ->gt("\x{4E00}", $hiragana));
554ok($Collator ->gt("\x{4E03}", $katakana));
555
556##############
caffd4cf 557
4d36a948 558# ignorable after variable
caffd4cf 559
4d36a948 560# Shifted;
caffd4cf 561ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
4d36a948 562ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
caffd4cf 563ok($Collator->eq("?\x{300}", "?"));
4d36a948 564ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.
caffd4cf 565
4d36a948 566$Collator->change(level => 3);
567ok($Collator->eq("\cA", "?"));
568
569$Collator->change(alternate => 'blanked', level => 4);
570ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
571ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
572ok($Collator->eq("?\x{300}", "?"));
573ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.
574
575$Collator->change(level => 3);
576ok($Collator->eq("\cA", "?"));
caffd4cf 577
4d36a948 578$Collator->change(alternate => 'Non-ignorable', level => 4);
579
580ok($Collator->lt("?\x{300}", "?!"));
581ok($Collator->gt("?\x{300}A$acute", "?$A_acute"));
caffd4cf 582ok($Collator->gt("?\x{300}", "?"));
4d36a948 583ok($Collator->gt("?\x{344}", "?"));
caffd4cf 584
4d36a948 585$Collator->change(level => 3);
586ok($Collator->lt("\cA", "?"));
587
588$Collator->change(alternate => 'Shifted', level => 4);
589
590##############
591
592# According to Conformance Test,
593# a L3-ignorable is treated as a completely ignorable.
594
595my $L3ignorable = Unicode::Collate->new(
596 alternate => 'Non-ignorable',
597 table => undef,
598 normalization => undef,
599 entry => <<'ENTRIES',
6000000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
6010001 ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429)
6020591 ; [.0000.0000.0000.0591] # HEBREW ACCENT ETNAHTA
6031D165 ; [.0000.0000.0000.1D165] # MUSICAL SYMBOL COMBINING STEM
6040021 ; [*024B.0020.0002.0021] # EXCLAMATION MARK
60509BE ; [.114E.0020.0002.09BE] # BENGALI VOWEL SIGN AA
60609C7 ; [.1157.0020.0002.09C7] # BENGALI VOWEL SIGN E
60709CB ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
60809C7 09BE ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
609ENTRIES
610);
611
612ok($L3ignorable->lt("\cA", "!"));
613ok($L3ignorable->lt("\x{591}", "!"));
614ok($L3ignorable->eq("\cA", "\x{591}"));
615ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\cA\x{09BE}A"));
616ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{0591}\x{09BE}A"));
617ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{1D165}\x{09BE}A"));
618ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09CB}A"));