Integrate mainline.
[p5sagit/p5-mst-13.2.git] / lib / Unicode / Collate / t / test.t
CommitLineData
45394607 1
4a2e806c 2BEGIN {
3 if (ord("A") == 193) {
4 print "1..0 # Unicode::Collate not ported to EBCDIC\n";
5 exit 0;
6 }
7}
8
ed423f7a 9BEGIN {
10 if ($ENV{PERL_CORE}) {
11 chdir('t') if -d 't';
12 @INC = qw(../lib);
13 }
14}
15
45394607 16use Test;
ed423f7a 17BEGIN { plan tests => 183};
45394607 18use Unicode::Collate;
45394607 19
20#########################
21
ed423f7a 22ok(1); # If we made it this far, we're ok.
23
24my $UCA_Version = "9";
3164dd77 25
26ok(Unicode::Collate::UCA_Version, $UCA_Version);
27ok(Unicode::Collate->UCA_Version, $UCA_Version);
28
5398038e 29my $Collator = Unicode::Collate->new(
45394607 30 table => 'keys.txt',
31 normalization => undef,
32);
33
5398038e 34ok(ref $Collator, "Unicode::Collate");
45394607 35
3164dd77 36ok($Collator->UCA_Version, $UCA_Version);
37ok($Collator->UCA_Version(), $UCA_Version);
38
45394607 39ok(
5398038e 40 join(':', $Collator->sort(
45394607 41 qw/ lib strict Carp ExtUtils CGI Time warnings Math overload Pod CPAN /
42 ) ),
43 join(':',
44 qw/ Carp CGI CPAN ExtUtils lib Math overload Pod strict Time warnings /
45 ),
46);
47
ed423f7a 48ok($Collator->cmp("", ""), 0);
49ok($Collator->eq("", ""));
50ok($Collator->cmp("", "perl"), -1);
51
52##############
53
45394607 54my $A_acute = pack('U', 0x00C1);
ed423f7a 55my $a_acute = pack('U', 0x00E1);
45394607 56my $acute = pack('U', 0x0301);
57
5398038e 58ok($Collator->cmp("A$acute", $A_acute), -1);
ed423f7a 59ok($Collator->cmp($a_acute, $A_acute), -1);
60
61my %old_level = $Collator->change(level => 1);
62ok($Collator->eq("A$acute", $A_acute));
63ok($Collator->eq("A", $A_acute));
64
65ok($Collator->change(level => 2)->eq($a_acute, $A_acute));
66ok($Collator->lt("A", $A_acute));
67
68ok($Collator->change(%old_level)->lt("A", $A_acute));
69ok($Collator->lt("A", $A_acute));
70ok($Collator->lt("A", $a_acute));
71ok($Collator->lt($a_acute, $A_acute));
45394607 72
809c7673 73##############
74
75eval { require Unicode::Normalize };
45394607 76
809c7673 77if (!$@) {
45394607 78 my $NFD = Unicode::Collate->new(
79 table => 'keys.txt',
905aa9f0 80 entry => <<'ENTRIES',
810430 ; [.0B01.0020.0002.0430] # CYRILLIC SMALL LETTER A
820410 ; [.0B01.0020.0008.0410] # CYRILLIC CAPITAL LETTER A
8304D3 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
840430 0308 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
8504D3 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
860430 0308 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
8704D2 ; [.0B09.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
880410 0308 ; [.0B09.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
890430 3099 ; [.0B10.0020.0002.04D3] # A WITH KATAKANA VOICED
900430 3099 0308 ; [.0B11.0020.0002.04D3] # A WITH KATAKANA VOICED, DIAERESIS
91ENTRIES
45394607 92 );
905aa9f0 93 ok($NFD->eq("A$acute", $A_acute));
94 ok($NFD->eq("\x{4D3}\x{325}", "\x{430}\x{308}\x{325}"));
95 ok($NFD->lt("\x{430}\x{308}A", "\x{430}\x{308}B"));
96 ok($NFD->lt("\x{430}\x{3099}B", "\x{430}\x{308}\x{3099}A"));
97 ok($NFD->eq("\x{0430}\x{3099}\x{309A}\x{0308}",
98 "\x{0430}\x{309A}\x{3099}\x{0308}") );
ed423f7a 99
100 my %old_norm = $NFD->change(normalization => undef);
101 ok($NFD->lt("A$acute", $A_acute));
102 ok($NFD->cmp("A$acute", $A_acute), $Collator->cmp("A$acute", $A_acute));
103
104 $NFD->change(%old_norm);
105 ok($NFD->eq("A$acute", $A_acute));
106 ok($NFD->change(normalization => undef)->lt("A$acute", $A_acute));
107 ok($NFD->change(level => 1)->eq("A$acute", $A_acute));
108
45394607 109}
809c7673 110else {
d16e9e3d 111 ok(1);
905aa9f0 112 ok(1);
113 ok(1);
114 ok(1);
115 ok(1);
ed423f7a 116 ok(1);
117 ok(1);
118 ok(1);
119 ok(1);
120 ok(1);
45394607 121}
122
809c7673 123##############
124
125my $trad = Unicode::Collate->new(
45394607 126 table => 'keys.txt',
127 normalization => undef,
809c7673 128 ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
129 level => 4,
130 entry => << 'ENTRIES',
131 0063 0068 ; [.0893.0020.0002.0063] % "ch" in traditional Spanish
132 0043 0068 ; [.0893.0020.0008.0043] # "Ch" in traditional Spanish
133 00DF ; [.09F3.0154.0004.00DF] [.09F3.0020.0004.00DF] # eszet in Germany
45394607 134ENTRIES
135);
136
137ok(
809c7673 138 join(':', $trad->sort( qw/ acha aca ada acia acka / ) ),
139 join(':', qw/ aca acia acka acha ada / ),
45394607 140);
141
142ok(
809c7673 143 join(':', $Collator->sort( qw/ acha aca ada acia acka / ) ),
144 join(':', qw/ aca acha acia acka ada / ),
45394607 145);
146
45394607 147my $hiragana = "\x{3042}\x{3044}";
148my $katakana = "\x{30A2}\x{30A4}";
149
809c7673 150# HIRAGANA and KATAKANA are ignorable via ignoreName
151ok($trad->eq($hiragana, ""));
152ok($trad->eq("", $katakana));
153ok($trad->eq($hiragana, $katakana));
154ok($trad->eq($katakana, $hiragana));
155
156##############
157
ed423f7a 158$Collator->change(level => 2);
809c7673 159
ed423f7a 160ok($Collator->{level}, 2);
5398038e 161
162ok( $Collator->cmp("ABC","abc"), 0);
163ok( $Collator->eq("ABC","abc") );
164ok( $Collator->le("ABC","abc") );
165ok( $Collator->cmp($hiragana, $katakana), 0);
166ok( $Collator->eq($hiragana, $katakana) );
167ok( $Collator->ge($hiragana, $katakana) );
45394607 168
5398038e 169# hangul
170ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") );
171ok( $Collator->eq("a\x{AE00}b", "a\x{1100}\x{1173}\x{11AF}b") );
172ok( $Collator->gt("a\x{AE00}b", "a\x{1100}\x{1173}b\x{11AF}") );
173ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") );
174ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") );
175ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana
45394607 176
ed423f7a 177$Collator->change(%old_level, katakana_before_hiragana => 1);
45394607 178
ed423f7a 179ok($Collator->{level}, 4);
45394607 180
5398038e 181ok( $Collator->cmp("abc", "ABC"), -1);
182ok( $Collator->ne("abc", "ABC") );
183ok( $Collator->lt("abc", "ABC") );
184ok( $Collator->le("abc", "ABC") );
185ok( $Collator->cmp($hiragana, $katakana), 1);
186ok( $Collator->ne($hiragana, $katakana) );
187ok( $Collator->gt($hiragana, $katakana) );
188ok( $Collator->ge($hiragana, $katakana) );
45394607 189
ed423f7a 190$Collator->change(upper_before_lower => 1);
45394607 191
5398038e 192ok( $Collator->cmp("abc", "ABC"), 1);
193ok( $Collator->ge("abc", "ABC"), 1);
194ok( $Collator->gt("abc", "ABC"), 1);
195ok( $Collator->cmp($hiragana, $katakana), 1);
196ok( $Collator->ge($hiragana, $katakana), 1);
197ok( $Collator->gt($hiragana, $katakana), 1);
45394607 198
ed423f7a 199$Collator->change(katakana_before_hiragana => 0);
45394607 200
5398038e 201ok( $Collator->cmp("abc", "ABC"), 1);
202ok( $Collator->cmp($hiragana, $katakana), -1);
45394607 203
ed423f7a 204$Collator->change(upper_before_lower => 0);
45394607 205
5398038e 206ok( $Collator->cmp("abc", "ABC"), -1);
207ok( $Collator->le("abc", "ABC") );
208ok( $Collator->cmp($hiragana, $katakana), -1);
209ok( $Collator->lt($hiragana, $katakana) );
45394607 210
809c7673 211##############
212
213my $ignoreAE = Unicode::Collate->new(
214 table => 'keys.txt',
215 normalization => undef,
216 ignoreChar => qr/^[aAeE]$/,
217);
218
219ok($ignoreAE->eq("element","lament"));
220ok($ignoreAE->eq("Perl","ePrl"));
221
222##############
223
224my $onlyABC = Unicode::Collate->new(
225 table => undef,
327745dc 226 normalization => undef,
809c7673 227 entry => << 'ENTRIES',
2280061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A
2290041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
2300062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B
2310042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B
2320063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C
2330043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C
234ENTRIES
235);
236
237ok(
238 join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ),
239 join(':', qw/ A aB Ab ABA BAC cAc cc / ),
240);
241
242##############
243
244my $undefAE = Unicode::Collate->new(
45394607 245 table => 'keys.txt',
246 normalization => undef,
809c7673 247 undefChar => qr/^[aAeE]$/,
45394607 248);
249
809c7673 250ok($undefAE ->gt("edge","fog"));
251ok($Collator->lt("edge","fog"));
252ok($undefAE ->gt("lake","like"));
253ok($Collator->lt("lake","like"));
254
255##############
45394607 256
ed423f7a 257$Collator->change(level => 2);
d16e9e3d 258
d16e9e3d 259my $str;
5398038e 260
261my $orig = "This is a Perl book.";
d16e9e3d 262my $sub = "PERL";
263my $rep = "camel";
264my $ret = "This is a camel book.";
265
266$str = $orig;
809c7673 267if (my($pos,$len) = $Collator->index($str, $sub)) {
5398038e 268 substr($str, $pos, $len, $rep);
d16e9e3d 269}
270
271ok($str, $ret);
272
ed423f7a 273$Collator->change(%old_level);
d16e9e3d 274
275$str = $orig;
809c7673 276if (my($pos,$len) = $Collator->index($str, $sub)) {
5398038e 277 substr($str, $pos, $len, $rep);
d16e9e3d 278}
279
280ok($str, $orig);
281
809c7673 282##############
283
284my $match;
285
ed423f7a 286$Collator->change(level => 1);
809c7673 287
288$str = "Pe\x{300}rl";
289$sub = "pe";
290$match = undef;
291if (my($pos, $len) = $Collator->index($str, $sub)) {
292 $match = substr($str, $pos, $len);
293}
294ok($match, "Pe\x{300}");
295
296$str = "P\x{300}e\x{300}\x{301}\x{303}rl";
297$sub = "pE";
298$match = undef;
299if (my($pos, $len) = $Collator->index($str, $sub)) {
300 $match = substr($str, $pos, $len);
301}
302ok($match, "P\x{300}e\x{300}\x{301}\x{303}");
303
ed423f7a 304$Collator->change(%old_level);
809c7673 305
306##############
307
ed423f7a 308%old_level = $trad->change(level => 1);
5398038e 309
310$str = "Ich mu\x{00DF} studieren.";
311$sub = "m\x{00FC}ss";
809c7673 312$match = undef;
313if (my($pos, $len) = $trad->index($str, $sub)) {
5398038e 314 $match = substr($str, $pos, $len);
315}
316ok($match, "mu\x{00DF}");
317
ed423f7a 318$trad->change(%old_level);
5398038e 319
320$str = "Ich mu\x{00DF} studieren.";
321$sub = "m\x{00FC}ss";
322$match = undef;
809c7673 323
324if (my($pos, $len) = $trad->index($str, $sub)) {
5398038e 325 $match = substr($str, $pos, $len);
326}
327ok($match, undef);
328
329$match = undef;
809c7673 330if (my($pos,$len) = $Collator->index("", "")) {
5398038e 331 $match = substr("", $pos, $len);
332}
333ok($match, "");
334
335$match = undef;
809c7673 336if (my($pos,$len) = $Collator->index("", "abc")) {
5398038e 337 $match = substr("", $pos, $len);
338}
339ok($match, undef);
340
809c7673 341##############
342
343# Table is undefined, then no entry is defined.
344
345my $undef_table = Unicode::Collate->new(
346 table => undef,
347 normalization => undef,
348 level => 1,
349);
350
351# in the Unicode code point order
352ok($undef_table->lt('', 'A'));
353ok($undef_table->lt('ABC', 'B'));
354
355# Hangul should be decomposed (even w/o Unicode::Normalize).
356
357ok($undef_table->lt("Perl", "\x{AC00}"));
358ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}"));
359ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}"));
360ok($undef_table->lt("\x{AE00}", "\x{3042}"));
361 # U+AC00: Hangul GA
362 # U+AE00: Hangul GEUL
363 # U+3042: Hiragana A
364
365# Weight for CJK Ideographs is defined, though.
366
367ok($undef_table->lt("", "\x{4E00}"));
368ok($undef_table->lt("\x{4E8C}","ABC"));
369ok($undef_table->lt("\x{4E00}","\x{3042}"));
370ok($undef_table->lt("\x{4E00}","\x{4E8C}"));
371 # U+4E00: Ideograph "ONE"
372 # U+4E8C: Ideograph "TWO"
373
374
375##############
376
377my $few_entries = Unicode::Collate->new(
378 entry => <<'ENTRIES',
3790050 ; [.0101.0020.0002.0050] # P
3800045 ; [.0102.0020.0002.0045] # E
3810052 ; [.0103.0020.0002.0052] # R
382004C ; [.0104.0020.0002.004C] # L
3831100 ; [.0105.0020.0002.1100] # Hangul Jamo initial G
3841175 ; [.0106.0020.0002.1175] # Hangul Jamo middle I
3855B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
386ENTRIES
387 table => undef,
388 normalization => undef,
389);
390
391# defined before undefined
392
393my $sortABC = join '',
394 $few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ ");
395
396ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ");
397
398ok($few_entries->lt('E', 'D'));
399ok($few_entries->lt("\x{5B57}", "\x{4E00}"));
400ok($few_entries->lt("\x{AE30}", "\x{AC00}"));
401
402# Hangul must be decomposed.
403
404ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}"));
405
406##############
407
ed423f7a 408my $all_undef_8 = Unicode::Collate->new(
809c7673 409 table => undef,
410 normalization => undef,
411 overrideCJK => undef,
412 overrideHangul => undef,
ed423f7a 413 UCA_Version => 8,
809c7673 414);
415
416# All in the Unicode code point order.
417# No hangul decomposition.
418
ed423f7a 419ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
420ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
421ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
422ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
423ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
424
425##############
426
427my $all_undef_9 = Unicode::Collate->new(
428 table => undef,
429 normalization => undef,
430 overrideCJK => undef,
431 overrideHangul => undef,
432 UCA_Version => 9,
433);
434
435# CJK Ideo. < CJK ext A/B < Others.
436# No hangul decomposition.
437
438ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
439ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
440ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
441ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
442ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}"));
809c7673 443
444##############
445
446my $ignoreCJK = Unicode::Collate->new(
447 table => undef,
448 normalization => undef,
449 overrideCJK => sub {()},
450 entry => <<'ENTRIES',
4515B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
452ENTRIES
453);
454
455# All CJK Unified Ideographs except U+5B57 are ignored.
456
457ok($ignoreCJK->eq("\x{4E00}", ""));
458ok($ignoreCJK->lt("\x{4E00}", "\0"));
459ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK.
460ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK.
461ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned.
462
463##############
464
465my $ignoreHangul = Unicode::Collate->new(
466 table => undef,
467 normalization => undef,
468 overrideHangul => sub {()},
469 entry => <<'ENTRIES',
470AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL
471ENTRIES
472);
473
474# All Hangul Syllables except U+AE00 are ignored.
475
476ok($ignoreHangul->eq("\x{AC00}", ""));
477ok($ignoreHangul->lt("\x{AC00}", "\0"));
478ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
479ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
480ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
481
482##############
483
ed423f7a 484my %origAlter = $Collator->change(alternate => 'Blanked');
809c7673 485
ed423f7a 486ok($Collator->lt("death", "de luge"));
487ok($Collator->lt("de luge", "de-luge"));
488ok($Collator->lt("de-luge", "deluge"));
489ok($Collator->lt("deluge", "de\x{2010}luge"));
490ok($Collator->lt("deluge", "de Luge"));
809c7673 491
ed423f7a 492$Collator->change(alternate => 'Non-ignorable');
809c7673 493
ed423f7a 494ok($Collator->lt("de luge", "de Luge"));
495ok($Collator->lt("de Luge", "de-luge"));
496ok($Collator->lt("de-Luge", "de\x{2010}luge"));
497ok($Collator->lt("de-luge", "death"));
498ok($Collator->lt("death", "deluge"));
809c7673 499
ed423f7a 500$Collator->change(alternate => 'Shifted');
809c7673 501
ed423f7a 502ok($Collator->lt("death", "de luge"));
503ok($Collator->lt("de luge", "de-luge"));
504ok($Collator->lt("de-luge", "deluge"));
505ok($Collator->lt("deluge", "de Luge"));
506ok($Collator->lt("de Luge", "deLuge"));
809c7673 507
ed423f7a 508$Collator->change(alternate => 'Shift-Trimmed');
809c7673 509
ed423f7a 510ok($Collator->lt("death", "deluge"));
511ok($Collator->lt("deluge", "de luge"));
512ok($Collator->lt("de luge", "de-luge"));
513ok($Collator->lt("de-luge", "deLuge"));
514ok($Collator->lt("deLuge", "de Luge"));
809c7673 515
ed423f7a 516$Collator->change(%origAlter);
809c7673 517
ed423f7a 518ok($Collator->{alternate}, 'shifted');
809c7673 519
520##############
521
522my $overCJK = Unicode::Collate->new(
523 table => undef,
524 normalization => undef,
525 entry => <<'ENTRIES',
5260061 ; [.0101.0020.0002.0061] # latin a
5270041 ; [.0101.0020.0008.0041] # LATIN A
5284E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03.
529ENTRIES
530 overrideCJK => sub {
531 my $u = 0xFFFF - $_[0]; # reversed
532 [$u, 0x20, 0x2, $u];
533 },
534);
535
536ok($overCJK->lt("a", "A")); # diff. at level 3.
537ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2.
538ok($overCJK->lt("A\x{4E03}", "A\x{4E00}"));
539ok($overCJK->lt("A\x{4E03}", "a\x{4E00}"));
540ok($overCJK->lt("a\x{4E03}", "A\x{4E00}"));
541
542##############
543
ed423f7a 544# rearrange : 0x0E40..0x0E44, 0x0EC0..0x0EC4 (default)
545
546my %old_rearrange = $Collator->change(rearrange => undef);
547
548ok($Collator->gt("\x{0E41}A", "\x{0E40}B"));
549ok($Collator->gt("A\x{0E41}A", "A\x{0E40}B"));
550
551$Collator->change(rearrange => [ 0x61 ]); # 'a'
809c7673 552
ed423f7a 553ok($Collator->gt("ab", "AB")); # as 'ba' > 'AB'
554
555$Collator->change(%old_rearrange);
556
557ok($Collator->lt("ab", "AB"));
809c7673 558ok($Collator->lt("\x{0E40}", "\x{0E41}"));
559ok($Collator->lt("\x{0E40}A", "\x{0E41}B"));
560ok($Collator->lt("\x{0E41}A", "\x{0E40}B"));
561ok($Collator->lt("A\x{0E41}A", "A\x{0E40}B"));
562
ed423f7a 563ok($all_undef_8->lt("\x{0E40}", "\x{0E41}"));
564ok($all_undef_8->lt("\x{0E40}A", "\x{0E41}B"));
565ok($all_undef_8->lt("\x{0E41}A", "\x{0E40}B"));
566ok($all_undef_8->lt("A\x{0E41}A", "A\x{0E40}B"));
809c7673 567
568##############
569
570my $no_rearrange = Unicode::Collate->new(
571 table => undef,
572 normalization => undef,
573 rearrange => [],
574);
575
576ok($no_rearrange->lt("A", "B"));
577ok($no_rearrange->lt("\x{0E40}", "\x{0E41}"));
578ok($no_rearrange->lt("\x{0E40}A", "\x{0E41}B"));
579ok($no_rearrange->gt("\x{0E41}A", "\x{0E40}B"));
580ok($no_rearrange->gt("A\x{0E41}A", "A\x{0E40}B"));
581
582##############
583
809c7673 584my $undef_rearrange = Unicode::Collate->new(
585 table => undef,
586 normalization => undef,
587 rearrange => undef,
588);
589
590ok($undef_rearrange->lt("A", "B"));
591ok($undef_rearrange->lt("\x{0E40}", "\x{0E41}"));
592ok($undef_rearrange->lt("\x{0E40}A", "\x{0E41}B"));
593ok($undef_rearrange->gt("\x{0E41}A", "\x{0E40}B"));
594ok($undef_rearrange->gt("A\x{0E41}A", "A\x{0E40}B"));
595
596##############
597
598my $dropArticles = Unicode::Collate->new(
599 table => "keys.txt",
600 normalization => undef,
601 preprocess => sub {
602 my $string = shift;
603 $string =~ s/\b(?:an?|the)\s+//ig;
604 $string;
605 },
606);
607
608ok($dropArticles->eq("camel", "a camel"));
609ok($dropArticles->eq("Perl", "The Perl"));
610ok($dropArticles->lt("the pen", "a pencil"));
611ok($Collator->lt("Perl", "The Perl"));
612ok($Collator->gt("the pen", "a pencil"));
613
614##############
615
616my $backLevel1 = Unicode::Collate->new(
617 table => undef,
618 normalization => undef,
619 backwards => [ 1 ],
620);
621
622# all strings are reversed at level 1.
623
624ok($backLevel1->gt("AB", "BA"));
625ok($backLevel1->gt("\x{3042}\x{3044}", "\x{3044}\x{3042}"));
626
627##############
628
629my $backLevel2 = Unicode::Collate->new(
630 table => "keys.txt",
631 normalization => undef,
632 undefName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
633 backwards => 2,
634);
635
636ok($backLevel2->gt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}"));
637ok($backLevel2->gt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}"));
638ok($Collator ->lt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}"));
639ok($Collator ->lt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}"));
640
641
3164dd77 642# HIRAGANA and KATAKANA are made undefined via undefName.
643# So they are after CJK Unified Ideographs.
809c7673 644
645ok($backLevel2->lt("\x{4E00}", $hiragana));
646ok($backLevel2->lt("\x{4E03}", $katakana));
647ok($Collator ->gt("\x{4E00}", $hiragana));
648ok($Collator ->gt("\x{4E03}", $katakana));
649
650##############