3 unless ("A" eq pack('U', 0x41)) {
4 print "1..0 # Unicode::Collate " .
5 "cannot stringify a Unicode code point\n";
10 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
21 unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) {
22 print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
23 " needed for this test\n";
29 BEGIN { plan tests => 40 };
33 #########################
37 # NULL is tailorable but illegal code points are not.
38 # illegal code points should be always ingored
39 # (cf. UCA, 7.1.1 Illegal code points).
41 my $illeg = Unicode::Collate->new(
43 0000 ; [.0020.0000.0000.0000] # [0000] NULL
44 0001 ; [.0021.0000.0000.0001] # [0001] START OF HEADING
45 FFFE ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid)
46 FFFF ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid)
47 D800 ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid)
48 DFFF ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid)
49 FDD0 ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid)
50 FDEF ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid)
51 0002 ; [.0030.0000.0000.0002] # [0002] START OF TEXT
52 10FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid)
53 110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid)
54 0041 ; [.1000.0020.0008.0041] # latin A
55 0041 0000 ; [.1100.0020.0008.0041] # latin A + NULL
56 0041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid)
60 normalization => undef,
64 ok($illeg->lt("", "\x00"));
65 ok($illeg->lt("", "\x01"));
66 ok($illeg->eq("", "\x{FFFE}"));
67 ok($illeg->eq("", "\x{FFFF}"));
68 ok($illeg->eq("", "\x{D800}"));
69 ok($illeg->eq("", "\x{DFFF}"));
70 ok($illeg->eq("", "\x{FDD0}"));
71 ok($illeg->eq("", "\x{FDEF}"));
72 ok($illeg->lt("", "\x02"));
73 ok($illeg->eq("", "\x{10FFFF}"));
74 ok($illeg->eq("", "\x{110000}"));
77 ok($illeg->lt("\x00", "\x01"));
78 ok($illeg->lt("\x01", "\x02"));
79 ok($illeg->ne("\0", "\x{D800}"));
80 ok($illeg->ne("\0", "\x{DFFF}"));
81 ok($illeg->ne("\0", "\x{FDD0}"));
82 ok($illeg->ne("\0", "\x{FDEF}"));
83 ok($illeg->ne("\0", "\x{FFFE}"));
84 ok($illeg->ne("\0", "\x{FFFF}"));
85 ok($illeg->ne("\0", "\x{10FFFF}"));
86 ok($illeg->ne("\0", "\x{110000}"));
89 ok($illeg->eq("A", "A\x{FFFF}"));
90 ok($illeg->gt("A\0", "A\x{FFFF}"));
91 ok($illeg->lt("A", "A\0"));
92 ok($illeg->lt("AA", "A\0"));
96 my($match, $str, $sub, $ret);
98 my $Collator = Unicode::Collate->new(
101 normalization => undef,
107 $str = "Pe\x{300}\x{301}rl";
108 $ret = "Pe\x{300}\x{301}";
109 ($match) = $Collator->match($str, $sub);
112 $str = "Pe\x{300}\0\0\x{301}rl";
113 $ret = "Pe\x{300}\0\0\x{301}";
114 ($match) = $Collator->match($str, $sub);
117 $str = "Pe\x{DA00}\x{301}\x{DFFF}rl";
118 $ret = "Pe\x{DA00}\x{301}\x{DFFF}";
119 ($match) = $Collator->match($str, $sub);
122 $str = "Pe\x{FFFF}\x{301}rl";
123 $ret = "Pe\x{FFFF}\x{301}";
124 ($match) = $Collator->match($str, $sub);
127 $str = "Pe\x{110000}\x{301}rl";
128 $ret = "Pe\x{110000}\x{301}";
129 ($match) = $Collator->match($str, $sub);
132 $str = "Pe\x{300}\x{d801}\x{301}rl";
133 $ret = "Pe\x{300}\x{d801}\x{301}";
134 ($match) = $Collator->match($str, $sub);
137 $str = "Pe\x{300}\x{ffff}\x{301}rl";
138 $ret = "Pe\x{300}\x{ffff}\x{301}";
139 ($match) = $Collator->match($str, $sub);
142 $str = "Pe\x{300}\x{110000}\x{301}rl";
143 $ret = "Pe\x{300}\x{110000}\x{301}";
144 ($match) = $Collator->match($str, $sub);
147 $str = "Pe\x{D9ab}\x{DFFF}rl";
148 $ret = "Pe\x{D9ab}\x{DFFF}";
149 ($match) = $Collator->match($str, $sub);
152 $str = "Pe\x{FFFF}rl";
154 ($match) = $Collator->match($str, $sub);
157 $str = "Pe\x{110000}rl";
158 $ret = "Pe\x{110000}";
159 ($match) = $Collator->match($str, $sub);
162 $str = "Pe\x{300}\x{D800}\x{DFFF}rl";
163 $ret = "Pe\x{300}\x{D800}\x{DFFF}";
164 ($match) = $Collator->match($str, $sub);
167 $str = "Pe\x{300}\x{FFFF}rl";
168 $ret = "Pe\x{300}\x{FFFF}";
169 ($match) = $Collator->match($str, $sub);
172 $str = "Pe\x{300}\x{110000}rl";
173 $ret = "Pe\x{300}\x{110000}";
174 ($match) = $Collator->match($str, $sub);