Commit | Line | Data |
f96ec2a2 |
1 | #!./perl |
2 | |
3 | BEGIN { |
4 | chdir 't' if -d 't'; |
20822f61 |
5 | @INC = '../lib'; |
f96ec2a2 |
6 | $ENV{PERL5LIB} = '../lib'; |
f70c35af |
7 | if ( ord("\t") != 9 ) { # skip on ebcdic platforms |
8 | print "1..0 # Skip utf8 tests on ebcdic platform.\n"; |
9 | exit; |
10 | } |
f96ec2a2 |
11 | } |
12 | |
00387047 |
13 | print "1..90\n"; |
f96ec2a2 |
14 | |
15 | my $test = 1; |
16 | |
17 | sub ok { |
18 | my ($got,$expect) = @_; |
19 | print "# expected [$expect], got [$got]\nnot " if $got ne $expect; |
20 | print "ok $test\n"; |
21 | } |
22 | |
7bbb0251 |
23 | sub nok { |
24 | my ($got,$expect) = @_; |
25 | print "# expected not [$expect], got [$got]\nnot " if $got eq $expect; |
26 | print "ok $test\n"; |
27 | } |
28 | |
be341bce |
29 | sub ok_bytes { |
30 | use bytes; |
31 | my ($got,$expect) = @_; |
32 | print "# expected [$expect], got [$got]\nnot " if $got ne $expect; |
33 | print "ok $test\n"; |
34 | } |
35 | |
7bbb0251 |
36 | sub nok_bytes { |
37 | use bytes; |
38 | my ($got,$expect) = @_; |
39 | print "# expected not [$expect], got [$got]\nnot " if $got eq $expect; |
40 | print "ok $test\n"; |
41 | } |
be341bce |
42 | |
f96ec2a2 |
43 | { |
44 | use utf8; |
45 | $_ = ">\x{263A}<"; |
46 | s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg; |
47 | ok $_, '>☺<'; |
c5cc3500 |
48 | $test++; # 1 |
f96ec2a2 |
49 | |
50 | $_ = ">\x{263A}<"; |
51 | my $rx = "\x{80}-\x{10ffff}"; |
52 | s/([$rx])/"&#".ord($1).";"/eg; |
53 | ok $_, '>☺<'; |
c5cc3500 |
54 | $test++; # 2 |
f96ec2a2 |
55 | |
56 | $_ = ">\x{263A}<"; |
57 | my $rx = "\\x{80}-\\x{10ffff}"; |
58 | s/([$rx])/"&#".ord($1).";"/eg; |
59 | ok $_, '>☺<'; |
c5cc3500 |
60 | $test++; # 3 |
b8c5462f |
61 | |
62 | $_ = "alpha,numeric"; |
63 | m/([[:alpha:]]+)/; |
64 | ok $1, 'alpha'; |
c5cc3500 |
65 | $test++; # 4 |
b8c5462f |
66 | |
67 | $_ = "alphaNUMERICstring"; |
68 | m/([[:^lower:]]+)/; |
69 | ok $1, 'NUMERIC'; |
c5cc3500 |
70 | $test++; # 5 |
b8c5462f |
71 | |
72 | $_ = "alphaNUMERICstring"; |
73 | m/(\p{Ll}+)/; |
74 | ok $1, 'alpha'; |
c5cc3500 |
75 | $test++; # 6 |
b8c5462f |
76 | |
77 | $_ = "alphaNUMERICstring"; |
78 | m/(\p{Lu}+)/; |
79 | ok $1, 'NUMERIC'; |
c5cc3500 |
80 | $test++; # 7 |
b8c5462f |
81 | |
82 | $_ = "alpha,numeric"; |
83 | m/([\p{IsAlpha}]+)/; |
84 | ok $1, 'alpha'; |
c5cc3500 |
85 | $test++; # 8 |
b8c5462f |
86 | |
87 | $_ = "alphaNUMERICstring"; |
88 | m/([^\p{IsLower}]+)/; |
89 | ok $1, 'NUMERIC'; |
c5cc3500 |
90 | $test++; # 9 |
b8c5462f |
91 | |
0f4b6630 |
92 | $_ = "alpha123numeric456"; |
93 | m/([\p{IsDigit}]+)/; |
94 | ok $1, '123'; |
c5cc3500 |
95 | $test++; # 10 |
b8c5462f |
96 | |
0f4b6630 |
97 | $_ = "alpha123numeric456"; |
98 | m/([^\p{IsDigit}]+)/; |
99 | ok $1, 'alpha'; |
c5cc3500 |
100 | $test++; # 11 |
b8c5462f |
101 | |
0f4b6630 |
102 | $_ = ",123alpha,456numeric"; |
103 | m/([\p{IsAlnum}]+)/; |
104 | ok $1, '123alpha'; |
c5cc3500 |
105 | $test++; # 12 |
0f4b6630 |
106 | } |
3b5dab68 |
107 | |
a197cbdd |
108 | { |
109 | use utf8; |
110 | |
111 | $_ = "\x{263A}>\x{263A}\x{263A}"; |
112 | |
113 | ok length, 4; |
c5cc3500 |
114 | $test++; # 13 |
a197cbdd |
115 | |
116 | ok length((m/>(.)/)[0]), 1; |
c5cc3500 |
117 | $test++; # 14 |
a197cbdd |
118 | |
119 | ok length($&), 2; |
c5cc3500 |
120 | $test++; # 15 |
a197cbdd |
121 | |
122 | ok length($'), 1; |
c5cc3500 |
123 | $test++; # 16 |
a197cbdd |
124 | |
125 | ok length($`), 1; |
c5cc3500 |
126 | $test++; # 17 |
a197cbdd |
127 | |
128 | ok length($1), 1; |
c5cc3500 |
129 | $test++; # 18 |
a197cbdd |
130 | |
131 | ok length($tmp=$&), 2; |
c5cc3500 |
132 | $test++; # 19 |
a197cbdd |
133 | |
134 | ok length($tmp=$'), 1; |
c5cc3500 |
135 | $test++; # 20 |
a197cbdd |
136 | |
137 | ok length($tmp=$`), 1; |
c5cc3500 |
138 | $test++; # 21 |
a197cbdd |
139 | |
140 | ok length($tmp=$1), 1; |
c5cc3500 |
141 | $test++; # 22 |
a197cbdd |
142 | |
c5cc3500 |
143 | { |
be341bce |
144 | use bytes; |
c5cc3500 |
145 | |
be341bce |
146 | my $tmp = $&; |
c5cc3500 |
147 | ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272); |
148 | $test++; # 23 |
a197cbdd |
149 | |
c5cc3500 |
150 | $tmp = $'; |
151 | ok $tmp, pack("C*", 0342, 0230, 0272); |
152 | $test++; # 24 |
a197cbdd |
153 | |
c5cc3500 |
154 | $tmp = $`; |
155 | ok $tmp, pack("C*", 0342, 0230, 0272); |
156 | $test++; # 25 |
a197cbdd |
157 | |
c5cc3500 |
158 | $tmp = $1; |
159 | ok $tmp, pack("C*", 0342, 0230, 0272); |
160 | $test++; # 26 |
161 | } |
a197cbdd |
162 | |
be341bce |
163 | ok_bytes $&, pack("C*", ord(">"), 0342, 0230, 0272); |
164 | $test++; # 27 |
165 | |
166 | ok_bytes $', pack("C*", 0342, 0230, 0272); |
167 | $test++; # 28 |
168 | |
169 | ok_bytes $`, pack("C*", 0342, 0230, 0272); |
170 | $test++; # 29 |
171 | |
172 | ok_bytes $1, pack("C*", 0342, 0230, 0272); |
173 | $test++; # 30 |
174 | |
a197cbdd |
175 | { |
176 | use bytes; |
177 | no utf8; |
178 | |
179 | ok length, 10; |
be341bce |
180 | $test++; # 31 |
a197cbdd |
181 | |
182 | ok length((m/>(.)/)[0]), 1; |
be341bce |
183 | $test++; # 32 |
a197cbdd |
184 | |
185 | ok length($&), 2; |
be341bce |
186 | $test++; # 33 |
a197cbdd |
187 | |
188 | ok length($'), 5; |
be341bce |
189 | $test++; # 34 |
a197cbdd |
190 | |
191 | ok length($`), 3; |
be341bce |
192 | $test++; # 35 |
a197cbdd |
193 | |
194 | ok length($1), 1; |
be341bce |
195 | $test++; # 36 |
a197cbdd |
196 | |
197 | ok $&, pack("C*", ord(">"), 0342); |
be341bce |
198 | $test++; # 37 |
a197cbdd |
199 | |
200 | ok $', pack("C*", 0230, 0272, 0342, 0230, 0272); |
be341bce |
201 | $test++; # 38 |
a197cbdd |
202 | |
203 | ok $`, pack("C*", 0342, 0230, 0272); |
be341bce |
204 | $test++; # 39 |
a197cbdd |
205 | |
206 | ok $1, pack("C*", 0342); |
be341bce |
207 | $test++; # 40 |
a197cbdd |
208 | } |
209 | |
a197cbdd |
210 | { |
211 | no utf8; |
212 | $_="\342\230\272>\342\230\272\342\230\272"; |
213 | } |
214 | |
215 | ok length, 10; |
be341bce |
216 | $test++; # 41 |
a197cbdd |
217 | |
218 | ok length((m/>(.)/)[0]), 1; |
be341bce |
219 | $test++; # 42 |
a197cbdd |
220 | |
221 | ok length($&), 2; |
be341bce |
222 | $test++; # 43 |
a197cbdd |
223 | |
224 | ok length($'), 1; |
be341bce |
225 | $test++; # 44 |
a197cbdd |
226 | |
227 | ok length($`), 1; |
be341bce |
228 | $test++; # 45 |
a197cbdd |
229 | |
230 | ok length($1), 1; |
be341bce |
231 | $test++; # 46 |
a197cbdd |
232 | |
233 | ok length($tmp=$&), 2; |
be341bce |
234 | $test++; # 47 |
a197cbdd |
235 | |
236 | ok length($tmp=$'), 1; |
be341bce |
237 | $test++; # 48 |
a197cbdd |
238 | |
239 | ok length($tmp=$`), 1; |
be341bce |
240 | $test++; # 49 |
a197cbdd |
241 | |
242 | ok length($tmp=$1), 1; |
be341bce |
243 | $test++; # 50 |
a197cbdd |
244 | |
c5cc3500 |
245 | { |
246 | use bytes; |
a197cbdd |
247 | |
c5cc3500 |
248 | my $tmp = $&; |
249 | ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272); |
be341bce |
250 | $test++; # 51 |
a197cbdd |
251 | |
c5cc3500 |
252 | $tmp = $'; |
253 | ok $tmp, pack("C*", 0342, 0230, 0272); |
be341bce |
254 | $test++; # 52 |
a197cbdd |
255 | |
c5cc3500 |
256 | $tmp = $`; |
257 | ok $tmp, pack("C*", 0342, 0230, 0272); |
be341bce |
258 | $test++; # 53 |
a197cbdd |
259 | |
c5cc3500 |
260 | $tmp = $1; |
261 | ok $tmp, pack("C*", 0342, 0230, 0272); |
be341bce |
262 | $test++; # 54 |
c5cc3500 |
263 | } |
3b5dab68 |
264 | |
a197cbdd |
265 | { |
266 | use bytes; |
267 | no utf8; |
268 | |
269 | ok length, 10; |
be341bce |
270 | $test++; # 55 |
a197cbdd |
271 | |
272 | ok length((m/>(.)/)[0]), 1; |
be341bce |
273 | $test++; # 56 |
a197cbdd |
274 | |
275 | ok length($&), 2; |
be341bce |
276 | $test++; # 57 |
a197cbdd |
277 | |
278 | ok length($'), 5; |
be341bce |
279 | $test++; # 58 |
a197cbdd |
280 | |
281 | ok length($`), 3; |
be341bce |
282 | $test++; # 59 |
a197cbdd |
283 | |
284 | ok length($1), 1; |
be341bce |
285 | $test++; # 60 |
a197cbdd |
286 | |
287 | ok $&, pack("C*", ord(">"), 0342); |
be341bce |
288 | $test++; # 61 |
a197cbdd |
289 | |
290 | ok $', pack("C*", 0230, 0272, 0342, 0230, 0272); |
be341bce |
291 | $test++; # 62 |
a197cbdd |
292 | |
293 | ok $`, pack("C*", 0342, 0230, 0272); |
be341bce |
294 | $test++; # 63 |
a197cbdd |
295 | |
296 | ok $1, pack("C*", 0342); |
be341bce |
297 | $test++; # 64 |
a197cbdd |
298 | } |
de35ba6f |
299 | |
300 | ok "\x{ab}" =~ /^\x{ab}$/, 1; |
be341bce |
301 | $test++; # 65 |
a197cbdd |
302 | } |
aaa68c4a |
303 | |
304 | { |
305 | use utf8; |
306 | ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2); |
307 | $test++; # 66 |
308 | } |
28cb3359 |
309 | |
310 | { |
311 | use utf8; |
312 | my @a = map ord, split(//, join("", map chr, (1234, 123, 2345))); |
313 | ok "@a", "1234 123 2345"; |
314 | $test++; # 67 |
315 | } |
316 | |
317 | { |
318 | use utf8; |
319 | my $x = chr(123); |
320 | my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345))); |
321 | ok "@a", "1234 2345"; |
322 | $test++; # 68 |
323 | } |
31067593 |
324 | |
7bbb0251 |
325 | { |
da450f52 |
326 | # bug id 20001009.001 |
327 | |
89491803 |
328 | my ($a, $b); |
329 | |
330 | { use bytes; $a = "\xc3\xa4" } |
331 | { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8 |
332 | |
333 | print "not " if $a eq $b; |
334 | print "ok $test\n"; $test++; |
335 | |
336 | { use utf8; print "not " if $a eq $b; } |
337 | print "ok $test\n"; $test++; |
7bbb0251 |
338 | } |
31067593 |
339 | |
340 | { |
da450f52 |
341 | # bug id 20001008.001 |
342 | |
31067593 |
343 | my @x = ("stra\337e 138","stra\337e 138"); |
344 | for (@x) { |
345 | s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; |
346 | my($latin) = /^(.+)(?:\s+\d)/; |
347 | print $latin eq "stra\337e" ? "ok $test\n" : |
348 | "#latin[$latin]\nnot ok $test\n"; |
349 | $test++; |
350 | $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a |
351 | use utf8; |
352 | $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a |
353 | } |
354 | } |
b7018214 |
355 | |
356 | { |
da450f52 |
357 | # bug id 20000427.003 |
358 | |
359 | use utf8; |
360 | use warnings; |
361 | use strict; |
362 | |
363 | my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}"; |
364 | |
365 | my @charlist = split //, $sushi; |
366 | my $r = ''; |
367 | foreach my $ch (@charlist) { |
368 | $r = $r . " " . sprintf "U+%04X", ord($ch); |
369 | } |
370 | |
371 | print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B"; |
372 | print "ok $test\n"; |
373 | $test++; |
374 | } |
375 | |
376 | { |
93f04dac |
377 | # bug id 20000426.003 |
378 | |
379 | use utf8; |
380 | |
381 | my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20"; |
382 | |
383 | my ($a, $b, $c) = split(/\x40/, $s); |
384 | print "not " |
385 | unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a; |
386 | print "ok $test\n"; |
387 | $test++; |
388 | |
389 | my ($a, $b) = split(/\x{100}/, $s); |
390 | print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20"; |
391 | print "ok $test\n"; |
392 | $test++; |
393 | |
394 | my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s); |
395 | print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20"; |
396 | print "ok $test\n"; |
397 | $test++; |
398 | |
399 | my ($a, $b) = split(/\x40\x{80}/, $s); |
400 | print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20"; |
401 | print "ok $test\n"; |
402 | $test++; |
403 | |
404 | my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s); |
405 | print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20"; |
406 | print "ok $test\n"; |
407 | $test++; |
408 | } |
60ff4832 |
409 | |
410 | { |
411 | # bug id 20000730.004 |
412 | |
413 | use utf8; |
414 | |
415 | my $smiley = "\x{263a}"; |
416 | |
417 | for my $s ("\x{263a}", # 1 |
418 | $smiley, # 2 |
419 | |
420 | "" . $smiley, # 3 |
421 | "" . "\x{263a}", # 4 |
422 | |
423 | $smiley . "", # 5 |
424 | "\x{263a}" . "", # 6 |
425 | ) { |
426 | my $length_chars = length($s); |
427 | my $length_bytes; |
428 | { use bytes; $length_bytes = length($s) } |
429 | my @regex_chars = $s =~ m/(.)/g; |
430 | my $regex_chars = @regex_chars; |
431 | my @split_chars = split //, $s; |
432 | my $split_chars = @split_chars; |
433 | print "not " |
434 | unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq |
435 | "1/1/1/3"; |
436 | print "ok $test\n"; |
437 | $test++; |
438 | } |
439 | |
440 | for my $s ("\x{263a}" . "\x{263a}", # 7 |
441 | $smiley . $smiley, # 8 |
442 | |
443 | "\x{263a}\x{263a}", # 9 |
444 | "$smiley$smiley", # 10 |
445 | |
446 | "\x{263a}" x 2, # 11 |
447 | $smiley x 2, # 12 |
448 | ) { |
449 | my $length_chars = length($s); |
450 | my $length_bytes; |
451 | { use bytes; $length_bytes = length($s) } |
452 | my @regex_chars = $s =~ m/(.)/g; |
453 | my $regex_chars = @regex_chars; |
454 | my @split_chars = split //, $s; |
455 | my $split_chars = @split_chars; |
456 | print "not " |
457 | unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq |
458 | "2/2/2/6"; |
459 | print "ok $test\n"; |
460 | $test++; |
461 | } |
462 | } |