Commit | Line | Data |
10d7ec48 |
1 | |
2 | BEGIN { |
3 | unless ("A" eq pack('U', 0x41)) { |
4 | print "1..0 # Unicode::Collate " . |
5 | "cannot stringify a Unicode code point\n"; |
6 | exit 0; |
7 | } |
10d7ec48 |
8 | if ($ENV{PERL_CORE}) { |
3756e7ca |
9 | chdir('t') if -d 't'; |
10 | @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); |
10d7ec48 |
11 | } |
12 | } |
13 | |
14 | use Test; |
15 | use strict; |
16 | use warnings; |
17 | |
18 | BEGIN { |
19 | use Unicode::Collate; |
20 | |
21 | unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) { |
22 | print "1..0 # skipped: XSUB, or Perl 5.8.0 or later". |
23 | " needed for this test\n"; |
24 | print $@; |
25 | exit; |
26 | } |
27 | } |
28 | |
abd1ec54 |
29 | BEGIN { plan tests => 40 }; |
10d7ec48 |
30 | |
31 | ok(1); |
32 | |
33 | ######################### |
34 | |
35 | no warnings 'utf8'; |
36 | |
37 | # NULL is tailorable but illegal code points are not. |
38 | # illegal code points should be always ingored |
39 | # (cf. UCA, 7.1.1 Illegal code points). |
40 | |
41 | my $illeg = Unicode::Collate->new( |
42 | entry => <<'ENTRIES', |
43 | 0000 ; [.0020.0000.0000.0000] # [0000] NULL |
44 | 0001 ; [.0021.0000.0000.0001] # [0001] START OF HEADING |
abd1ec54 |
45 | FFFE ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid) |
46 | FFFF ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid) |
47 | D800 ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid) |
48 | DFFF ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid) |
49 | FDD0 ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid) |
50 | FDEF ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid) |
10d7ec48 |
51 | 0002 ; [.0030.0000.0000.0002] # [0002] START OF TEXT |
abd1ec54 |
52 | 10FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid) |
53 | 110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid) |
54 | 0041 ; [.1000.0020.0008.0041] # latin A |
55 | 0041 0000 ; [.1100.0020.0008.0041] # latin A + NULL |
56 | 0041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid) |
10d7ec48 |
57 | ENTRIES |
58 | level => 1, |
59 | table => undef, |
60 | normalization => undef, |
61 | ); |
62 | |
abd1ec54 |
63 | # 2..12 |
10d7ec48 |
64 | ok($illeg->lt("", "\x00")); |
65 | ok($illeg->lt("", "\x01")); |
66 | ok($illeg->eq("", "\x{FFFE}")); |
67 | ok($illeg->eq("", "\x{FFFF}")); |
68 | ok($illeg->eq("", "\x{D800}")); |
69 | ok($illeg->eq("", "\x{DFFF}")); |
70 | ok($illeg->eq("", "\x{FDD0}")); |
71 | ok($illeg->eq("", "\x{FDEF}")); |
72 | ok($illeg->lt("", "\x02")); |
73 | ok($illeg->eq("", "\x{10FFFF}")); |
74 | ok($illeg->eq("", "\x{110000}")); |
75 | |
abd1ec54 |
76 | # 13..22 |
10d7ec48 |
77 | ok($illeg->lt("\x00", "\x01")); |
78 | ok($illeg->lt("\x01", "\x02")); |
79 | ok($illeg->ne("\0", "\x{D800}")); |
80 | ok($illeg->ne("\0", "\x{DFFF}")); |
81 | ok($illeg->ne("\0", "\x{FDD0}")); |
82 | ok($illeg->ne("\0", "\x{FDEF}")); |
83 | ok($illeg->ne("\0", "\x{FFFE}")); |
84 | ok($illeg->ne("\0", "\x{FFFF}")); |
85 | ok($illeg->ne("\0", "\x{10FFFF}")); |
86 | ok($illeg->ne("\0", "\x{110000}")); |
87 | |
abd1ec54 |
88 | # 23..26 |
89 | ok($illeg->eq("A", "A\x{FFFF}")); |
90 | ok($illeg->gt("A\0", "A\x{FFFF}")); |
91 | ok($illeg->lt("A", "A\0")); |
92 | ok($illeg->lt("AA", "A\0")); |
93 | |
94 | ################## |
95 | |
96 | my($match, $str, $sub, $ret); |
97 | |
98 | my $Collator = Unicode::Collate->new( |
99 | table => 'keys.txt', |
100 | level => 1, |
101 | normalization => undef, |
102 | ); |
103 | |
104 | $sub = "pe"; |
105 | |
106 | |
107 | $str = "Pe\x{300}\x{301}rl"; |
108 | $ret = "Pe\x{300}\x{301}"; |
109 | ($match) = $Collator->match($str, $sub); |
110 | ok($match, $ret); |
111 | |
112 | $str = "Pe\x{300}\0\0\x{301}rl"; |
113 | $ret = "Pe\x{300}\0\0\x{301}"; |
114 | ($match) = $Collator->match($str, $sub); |
115 | ok($match, $ret); |
116 | |
117 | $str = "Pe\x{DA00}\x{301}\x{DFFF}rl"; |
118 | $ret = "Pe\x{DA00}\x{301}\x{DFFF}"; |
119 | ($match) = $Collator->match($str, $sub); |
120 | ok($match, $ret); |
121 | |
122 | $str = "Pe\x{FFFF}\x{301}rl"; |
123 | $ret = "Pe\x{FFFF}\x{301}"; |
124 | ($match) = $Collator->match($str, $sub); |
125 | ok($match, $ret); |
126 | |
127 | $str = "Pe\x{110000}\x{301}rl"; |
128 | $ret = "Pe\x{110000}\x{301}"; |
129 | ($match) = $Collator->match($str, $sub); |
130 | ok($match, $ret); |
131 | |
132 | $str = "Pe\x{300}\x{d801}\x{301}rl"; |
133 | $ret = "Pe\x{300}\x{d801}\x{301}"; |
134 | ($match) = $Collator->match($str, $sub); |
135 | ok($match, $ret); |
136 | |
137 | $str = "Pe\x{300}\x{ffff}\x{301}rl"; |
138 | $ret = "Pe\x{300}\x{ffff}\x{301}"; |
139 | ($match) = $Collator->match($str, $sub); |
140 | ok($match, $ret); |
141 | |
142 | $str = "Pe\x{300}\x{110000}\x{301}rl"; |
143 | $ret = "Pe\x{300}\x{110000}\x{301}"; |
144 | ($match) = $Collator->match($str, $sub); |
145 | ok($match, $ret); |
146 | |
147 | $str = "Pe\x{D9ab}\x{DFFF}rl"; |
148 | $ret = "Pe\x{D9ab}\x{DFFF}"; |
149 | ($match) = $Collator->match($str, $sub); |
150 | ok($match, $ret); |
151 | |
152 | $str = "Pe\x{FFFF}rl"; |
153 | $ret = "Pe\x{FFFF}"; |
154 | ($match) = $Collator->match($str, $sub); |
155 | ok($match, $ret); |
156 | |
157 | $str = "Pe\x{110000}rl"; |
158 | $ret = "Pe\x{110000}"; |
159 | ($match) = $Collator->match($str, $sub); |
160 | ok($match, $ret); |
161 | |
162 | $str = "Pe\x{300}\x{D800}\x{DFFF}rl"; |
163 | $ret = "Pe\x{300}\x{D800}\x{DFFF}"; |
164 | ($match) = $Collator->match($str, $sub); |
165 | ok($match, $ret); |
166 | |
167 | $str = "Pe\x{300}\x{FFFF}rl"; |
168 | $ret = "Pe\x{300}\x{FFFF}"; |
169 | ($match) = $Collator->match($str, $sub); |
170 | ok($match, $ret); |
171 | |
172 | $str = "Pe\x{300}\x{110000}rl"; |
173 | $ret = "Pe\x{300}\x{110000}"; |
174 | ($match) = $Collator->match($str, $sub); |
175 | ok($match, $ret); |
176 | |
177 | |