Commit | Line | Data |
0effba8c |
1 | BEGIN { |
aa5485d1 |
2 | require Config; import Config; |
3 | if ($Config{'extensions'} !~ /\bEncode\b/) { |
4 | print "1..0 # Skip: Encode was not built\n"; |
5 | exit 0; |
6 | } |
b9890021 |
7 | unless (find PerlIO::Layer 'perlio') { |
d1256cb1 |
8 | print "1..0 # Skip: PerlIO was not built\n"; |
9 | exit 0; |
b9890021 |
10 | } |
0effba8c |
11 | if (ord("A") == 193) { |
d1256cb1 |
12 | print "1..0 # encoding pragma does not support EBCDIC platforms\n"; |
13 | exit(0); |
0effba8c |
14 | } |
15 | } |
16 | |
a999c27c |
17 | print "1..31\n"; |
799ef3cb |
18 | |
0a378802 |
19 | use encoding "latin1"; # ignored (overwritten by the next line) |
f14ed3c6 |
20 | use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...) |
0a378802 |
21 | |
0a378802 |
22 | # "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is |
f14ed3c6 |
23 | # \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS), |
0a378802 |
24 | # instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S) |
25 | |
9f4817db |
26 | $a = "\xDF"; |
27 | $b = "\x{100}"; |
28 | |
29 | print "not " unless ord($a) == 0x3af; |
0a378802 |
30 | print "ok 1\n"; |
31 | |
9f4817db |
32 | print "not " unless ord($b) == 0x100; |
0a378802 |
33 | print "ok 2\n"; |
34 | |
9f4817db |
35 | my $c; |
36 | |
37 | $c = $a . $b; |
38 | |
39 | print "not " unless ord($c) == 0x3af; |
0a378802 |
40 | print "ok 3\n"; |
41 | |
9f4817db |
42 | print "not " unless length($c) == 2; |
43 | print "ok 4\n"; |
44 | |
45 | print "not " unless ord(substr($c, 1, 1)) == 0x100; |
46 | print "ok 5\n"; |
0a378802 |
47 | |
121910a4 |
48 | print "not " unless ord(chr(0xdf)) == 0x3af; # spooky |
49 | print "ok 6\n"; |
50 | |
51 | print "not " unless ord(pack("C", 0xdf)) == 0x3af; |
52 | print "ok 7\n"; |
53 | |
54 | # we didn't break pack/unpack, I hope |
55 | |
56 | print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf; |
57 | print "ok 8\n"; |
58 | |
59 | # the first octet of UTF-8 encoded 0x3af |
1651fc44 |
60 | print "not " unless unpack("U0 C", chr(0xdf)) == 0xce; |
121910a4 |
61 | print "ok 9\n"; |
bfa383d6 |
62 | |
3de8ed06 |
63 | print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf; |
64 | print "ok 10\n"; |
65 | |
66 | print "not " unless unpack("U", chr(0xdf)) == 0x3af; |
67 | print "ok 11\n"; |
68 | |
bfa383d6 |
69 | # charnames must still work |
70 | use charnames ':full'; |
71 | print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf; |
3de8ed06 |
72 | print "ok 12\n"; |
73 | |
74 | # combine |
75 | |
76 | $c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf); |
77 | |
78 | print "not " unless ord($c) == 0x3af; |
79 | print "ok 13\n"; |
80 | |
81 | print "not " unless ord(substr($c, 1, 1)) == 0xdf; |
82 | print "ok 14\n"; |
83 | |
84 | print "not " unless ord(substr($c, 2, 1)) == 0x3af; |
85 | print "ok 15\n"; |
bfa383d6 |
86 | |
a72c7584 |
87 | # regex literals |
88 | |
89 | print "not " unless "\xDF" =~ /\x{3AF}/; |
90 | print "ok 16\n"; |
91 | |
92 | print "not " unless "\x{3AF}" =~ /\xDF/; |
93 | print "ok 17\n"; |
94 | |
95 | print "not " unless "\xDF" =~ /\xDF/; |
96 | print "ok 18\n"; |
97 | |
98 | print "not " unless "\x{3AF}" =~ /\x{3AF}/; |
99 | print "ok 19\n"; |
100 | |
799ef3cb |
101 | # eq, cmp |
102 | |
553e1bcc |
103 | my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( |
104 | pack("C*", 0xDF ), # byte |
105 | pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0 |
106 | pack("U*", 0x3AF), # $U eq $byte |
107 | pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding |
108 | pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1) |
109 | pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0; |
110 | pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb) |
111 | ); |
112 | |
113 | # all the tests in this section that compare a byte encoded string |
114 | # ato UTF-8 encoded are run in all possible vairants |
115 | # all of the eq, ne, cmp operations tested, |
116 | # $v z $u tested as well as $u z $v |
117 | |
118 | sub alleq($$){ |
119 | my ($a,$b) = (shift, shift); |
120 | $a eq $b && $b eq $a && |
121 | !( $a ne $b ) && !( $b ne $a ) && |
122 | ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0; |
123 | } |
124 | |
125 | sub anyeq($$){ |
126 | my ($a,$b) = (shift, shift); |
127 | $a eq $b || $b eq $a || |
128 | !( $a ne $b ) || !( $b ne $a ) || |
129 | ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0; |
130 | } |
131 | |
132 | sub allgt($$){ |
133 | my ($a,$b) = (shift, shift); |
134 | ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1; |
135 | } |
136 | #match the correct UTF-8 string |
137 | print "not " unless alleq($byte, $U); |
138 | print "ok 20\n"; |
139 | |
140 | #do not match a wrong UTF-8 string |
141 | print "not " if anyeq($byte, $Ub); |
142 | print "ok 21\n"; |
143 | |
144 | #string ordering |
145 | print "not " unless allgt ( $g1, $byte ) && |
146 | allgt ( $g2, $byte ) && |
147 | allgt ( $byte, $l ) && |
148 | allgt ( $bytes, $U ); |
149 | print "ok 22\n"; |
150 | |
151 | # upgrade, downgrade |
152 | |
153 | my ($u,$v,$v2); |
154 | $u = $v = $v2 = pack("C*", 0xDF); |
155 | utf8::upgrade($v); #explicit upgrade |
156 | $v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade |
157 | |
158 | # implicit upgrade === explicit upgrade |
159 | print "not " if do{{use bytes; $v ne $v2}} || $v ne $v2; |
160 | print "ok 23\n"; |
161 | |
162 | # utf8::upgrade is transparent and does not break equality |
163 | print "not " unless alleq( $u, $v ); |
164 | print "ok 24\n"; |
165 | |
166 | $u = $v = pack("C*", 0xDF); |
167 | utf8::upgrade($v); |
168 | #test for a roundtrip, we should get back from where we left |
169 | eval {utf8::downgrade( $v )}; |
170 | print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v; |
171 | print "ok 25\n"; |
172 | |
173 | # some more eq, cmp |
174 | |
3ef515df |
175 | $byte=pack("C*", 0xDF); |
799ef3cb |
176 | |
177 | print "not " unless pack("U*", 0x3AF) eq $byte; |
553e1bcc |
178 | print "ok 26\n"; |
799ef3cb |
179 | |
180 | print "not " if chr(0xDF) cmp $byte; |
553e1bcc |
181 | print "ok 27\n"; |
799ef3cb |
182 | |
183 | print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) && |
184 | ((pack("U*", 0x3AE) cmp $byte) == -1) && |
185 | ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) && |
d1256cb1 |
186 | ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1); |
553e1bcc |
187 | print "ok 28\n"; |
799ef3cb |
188 | |
3ef515df |
189 | |
190 | { |
191 | # Used to core dump in 5.7.3 |
192 | no warnings; # so test goes noiselessly |
193 | print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n"; |
194 | } |
a999c27c |
195 | |
196 | { |
d1256cb1 |
197 | my %h1; |
198 | my %h2; |
199 | $h1{"\xdf"} = 41; |
200 | $h2{"\x{3af}"} = 42; |
201 | print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n"; |
202 | print $h2{"\xdf"} == 42 ? "ok 31\n" : "not ok 31\n"; |
a999c27c |
203 | } |