Unicode::Collate v0.09
[p5sagit/p5-mst-13.2.git] / lib / Unicode / Collate / t / test.t
CommitLineData
45394607 1# Before `make install' is performed this script should be runnable with
2# `make test'. After `make install' it should work as `perl test.pl'
3
4#########################
5
6use Test;
5398038e 7BEGIN { plan tests => 50 };
45394607 8use Unicode::Collate;
9ok(1); # If we made it this far, we're ok.
10
11#########################
12
5398038e 13my $Collator = Unicode::Collate->new(
45394607 14 table => 'keys.txt',
15 normalization => undef,
16);
17
5398038e 18ok(ref $Collator, "Unicode::Collate");
45394607 19
20ok(
5398038e 21 join(':', $Collator->sort(
45394607 22 qw/ lib strict Carp ExtUtils CGI Time warnings Math overload Pod CPAN /
23 ) ),
24 join(':',
25 qw/ Carp CGI CPAN ExtUtils lib Math overload Pod strict Time warnings /
26 ),
27);
28
29my $A_acute = pack('U', 0x00C1);
30my $acute = pack('U', 0x0301);
31
5398038e 32ok($Collator->cmp("A$acute", $A_acute), -1);
45394607 33
5398038e 34ok($Collator->cmp("", ""), 0);
35ok(! $Collator->ne("", "") );
36ok( $Collator->eq("", "") );
37
38ok($Collator->cmp("", "perl"), -1);
45394607 39
40eval "use Unicode::Normalize";
41
42if(!$@){
43 my $NFD = Unicode::Collate->new(
44 table => 'keys.txt',
45 );
46 ok($NFD->cmp("A$acute", $A_acute), 0);
47}
48else{
d16e9e3d 49 ok(1);
45394607 50}
51
52my $tr = Unicode::Collate->new(
53 table => 'keys.txt',
54 normalization => undef,
55 ignoreName => qr/^(?:HANGUL|HIRAGANA|KATAKANA|BOPOMOFO)$/,
56 entry => <<'ENTRIES',
570063 0068 ; [.0893.0020.0002.0063] # "ch" in traditional Spanish
580043 0068 ; [.0893.0020.0008.0043] # "Ch" in traditional Spanish
5398038e 5900DF ; [.09F3.0154.0004.00DF] [.09F3.0020.0004.00DF] # eszet in Germany
45394607 60ENTRIES
61);
62
63ok(
64 join(':', $tr->sort(
65 qw/ acha aca ada acia acka /
66 ) ),
67 join(':',
68 qw/ aca acia acka acha ada /
69 ),
70);
71
72ok(
5398038e 73 join(':', $Collator->sort(
45394607 74 qw/ acha aca ada acia acka /
75 ) ),
76 join(':',
77 qw/ aca acha acia acka ada /
78 ),
79);
80
5398038e 81my $old_level = $Collator->{level};
45394607 82my $hiragana = "\x{3042}\x{3044}";
83my $katakana = "\x{30A2}\x{30A4}";
84
5398038e 85$Collator->{level} = 2;
86
87ok( $Collator->cmp("ABC","abc"), 0);
88ok( $Collator->eq("ABC","abc") );
89ok( $Collator->le("ABC","abc") );
90ok( $Collator->cmp($hiragana, $katakana), 0);
91ok( $Collator->eq($hiragana, $katakana) );
92ok( $Collator->ge($hiragana, $katakana) );
45394607 93
5398038e 94# hangul
95ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") );
96ok( $Collator->eq("a\x{AE00}b", "a\x{1100}\x{1173}\x{11AF}b") );
97ok( $Collator->gt("a\x{AE00}b", "a\x{1100}\x{1173}b\x{11AF}") );
98ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") );
99ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") );
100ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana
45394607 101
5398038e 102$Collator->{level} = $old_level;
45394607 103
5398038e 104$Collator->{katakana_before_hiragana} = 1;
45394607 105
5398038e 106ok( $Collator->cmp("abc", "ABC"), -1);
107ok( $Collator->ne("abc", "ABC") );
108ok( $Collator->lt("abc", "ABC") );
109ok( $Collator->le("abc", "ABC") );
110ok( $Collator->cmp($hiragana, $katakana), 1);
111ok( $Collator->ne($hiragana, $katakana) );
112ok( $Collator->gt($hiragana, $katakana) );
113ok( $Collator->ge($hiragana, $katakana) );
45394607 114
5398038e 115$Collator->{upper_before_lower} = 1;
45394607 116
5398038e 117ok( $Collator->cmp("abc", "ABC"), 1);
118ok( $Collator->ge("abc", "ABC"), 1);
119ok( $Collator->gt("abc", "ABC"), 1);
120ok( $Collator->cmp($hiragana, $katakana), 1);
121ok( $Collator->ge($hiragana, $katakana), 1);
122ok( $Collator->gt($hiragana, $katakana), 1);
45394607 123
5398038e 124$Collator->{katakana_before_hiragana} = 0;
45394607 125
5398038e 126ok( $Collator->cmp("abc", "ABC"), 1);
127ok( $Collator->cmp($hiragana, $katakana), -1);
45394607 128
5398038e 129$Collator->{upper_before_lower} = 0;
45394607 130
5398038e 131ok( $Collator->cmp("abc", "ABC"), -1);
132ok( $Collator->le("abc", "ABC") );
133ok( $Collator->cmp($hiragana, $katakana), -1);
134ok( $Collator->lt($hiragana, $katakana) );
45394607 135
136my $ign = Unicode::Collate->new(
137 table => 'keys.txt',
138 normalization => undef,
139 ignoreChar => qr/^[ae]$/,
140);
141
142ok( $ign->cmp("element","lament"), 0);
143
5398038e 144$Collator->{level} = 2;
d16e9e3d 145
d16e9e3d 146my $str;
5398038e 147
148my $orig = "This is a Perl book.";
d16e9e3d 149my $sub = "PERL";
150my $rep = "camel";
151my $ret = "This is a camel book.";
152
153$str = $orig;
5398038e 154if(my($pos,$len) = $Collator->index($str, $sub)){
155 substr($str, $pos, $len, $rep);
d16e9e3d 156}
157
158ok($str, $ret);
159
5398038e 160$Collator->{level} = $old_level;
d16e9e3d 161
162$str = $orig;
5398038e 163if(my($pos,$len) = $Collator->index($str, $sub)){
164 substr($str, $pos, $len, $rep);
d16e9e3d 165}
166
167ok($str, $orig);
168
5398038e 169$tr->{level} = 1;
170
171$str = "Ich mu\x{00DF} studieren.";
172$sub = "m\x{00FC}ss";
173my $match = undef;
174if(my($pos, $len) = $tr->index($str, $sub)){
175 $match = substr($str, $pos, $len);
176}
177ok($match, "mu\x{00DF}");
178
179$tr->{level} = $old_level;
180
181$str = "Ich mu\x{00DF} studieren.";
182$sub = "m\x{00FC}ss";
183$match = undef;
184if(my($pos, $len) = $tr->index($str, $sub)){
185 $match = substr($str, $pos, $len);
186}
187ok($match, undef);
188
189$match = undef;
190if(my($pos,$len) = $Collator->index("", "")){
191 $match = substr("", $pos, $len);
192}
193ok($match, "");
194
195$match = undef;
196if(my($pos,$len) = $Collator->index("", "abc")){
197 $match = substr("", $pos, $len);
198}
199ok($match, undef);
200