Commit | Line | Data |
f96ec2a2 |
1 | #!./perl |
2 | |
3 | BEGIN { |
4 | chdir 't' if -d 't'; |
20822f61 |
5 | @INC = '../lib'; |
f96ec2a2 |
6 | $ENV{PERL5LIB} = '../lib'; |
f70c35af |
7 | if ( ord("\t") != 9 ) { # skip on ebcdic platforms |
8 | print "1..0 # Skip utf8 tests on ebcdic platform.\n"; |
9 | exit; |
10 | } |
f96ec2a2 |
11 | } |
12 | |
a9917092 |
13 | print "1..103\n"; |
f96ec2a2 |
14 | |
15 | my $test = 1; |
16 | |
17 | sub ok { |
18 | my ($got,$expect) = @_; |
19 | print "# expected [$expect], got [$got]\nnot " if $got ne $expect; |
20 | print "ok $test\n"; |
21 | } |
22 | |
7bbb0251 |
23 | sub nok { |
24 | my ($got,$expect) = @_; |
25 | print "# expected not [$expect], got [$got]\nnot " if $got eq $expect; |
26 | print "ok $test\n"; |
27 | } |
28 | |
be341bce |
29 | sub ok_bytes { |
30 | use bytes; |
31 | my ($got,$expect) = @_; |
32 | print "# expected [$expect], got [$got]\nnot " if $got ne $expect; |
33 | print "ok $test\n"; |
34 | } |
35 | |
7bbb0251 |
36 | sub nok_bytes { |
37 | use bytes; |
38 | my ($got,$expect) = @_; |
39 | print "# expected not [$expect], got [$got]\nnot " if $got eq $expect; |
40 | print "ok $test\n"; |
41 | } |
be341bce |
42 | |
f96ec2a2 |
43 | { |
44 | use utf8; |
45 | $_ = ">\x{263A}<"; |
46 | s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg; |
47 | ok $_, '>☺<'; |
c5cc3500 |
48 | $test++; # 1 |
f96ec2a2 |
49 | |
50 | $_ = ">\x{263A}<"; |
51 | my $rx = "\x{80}-\x{10ffff}"; |
52 | s/([$rx])/"&#".ord($1).";"/eg; |
53 | ok $_, '>☺<'; |
c5cc3500 |
54 | $test++; # 2 |
f96ec2a2 |
55 | |
56 | $_ = ">\x{263A}<"; |
57 | my $rx = "\\x{80}-\\x{10ffff}"; |
58 | s/([$rx])/"&#".ord($1).";"/eg; |
59 | ok $_, '>☺<'; |
c5cc3500 |
60 | $test++; # 3 |
b8c5462f |
61 | |
62 | $_ = "alpha,numeric"; |
63 | m/([[:alpha:]]+)/; |
64 | ok $1, 'alpha'; |
c5cc3500 |
65 | $test++; # 4 |
b8c5462f |
66 | |
67 | $_ = "alphaNUMERICstring"; |
68 | m/([[:^lower:]]+)/; |
69 | ok $1, 'NUMERIC'; |
c5cc3500 |
70 | $test++; # 5 |
b8c5462f |
71 | |
72 | $_ = "alphaNUMERICstring"; |
73 | m/(\p{Ll}+)/; |
74 | ok $1, 'alpha'; |
c5cc3500 |
75 | $test++; # 6 |
b8c5462f |
76 | |
77 | $_ = "alphaNUMERICstring"; |
78 | m/(\p{Lu}+)/; |
79 | ok $1, 'NUMERIC'; |
c5cc3500 |
80 | $test++; # 7 |
b8c5462f |
81 | |
82 | $_ = "alpha,numeric"; |
83 | m/([\p{IsAlpha}]+)/; |
84 | ok $1, 'alpha'; |
c5cc3500 |
85 | $test++; # 8 |
b8c5462f |
86 | |
87 | $_ = "alphaNUMERICstring"; |
88 | m/([^\p{IsLower}]+)/; |
89 | ok $1, 'NUMERIC'; |
c5cc3500 |
90 | $test++; # 9 |
b8c5462f |
91 | |
0f4b6630 |
92 | $_ = "alpha123numeric456"; |
93 | m/([\p{IsDigit}]+)/; |
94 | ok $1, '123'; |
c5cc3500 |
95 | $test++; # 10 |
b8c5462f |
96 | |
0f4b6630 |
97 | $_ = "alpha123numeric456"; |
98 | m/([^\p{IsDigit}]+)/; |
99 | ok $1, 'alpha'; |
c5cc3500 |
100 | $test++; # 11 |
b8c5462f |
101 | |
0f4b6630 |
102 | $_ = ",123alpha,456numeric"; |
103 | m/([\p{IsAlnum}]+)/; |
104 | ok $1, '123alpha'; |
c5cc3500 |
105 | $test++; # 12 |
0f4b6630 |
106 | } |
3b5dab68 |
107 | |
a197cbdd |
108 | { |
109 | use utf8; |
110 | |
111 | $_ = "\x{263A}>\x{263A}\x{263A}"; |
112 | |
113 | ok length, 4; |
c5cc3500 |
114 | $test++; # 13 |
a197cbdd |
115 | |
116 | ok length((m/>(.)/)[0]), 1; |
c5cc3500 |
117 | $test++; # 14 |
a197cbdd |
118 | |
119 | ok length($&), 2; |
c5cc3500 |
120 | $test++; # 15 |
a197cbdd |
121 | |
122 | ok length($'), 1; |
c5cc3500 |
123 | $test++; # 16 |
a197cbdd |
124 | |
125 | ok length($`), 1; |
c5cc3500 |
126 | $test++; # 17 |
a197cbdd |
127 | |
128 | ok length($1), 1; |
c5cc3500 |
129 | $test++; # 18 |
a197cbdd |
130 | |
131 | ok length($tmp=$&), 2; |
c5cc3500 |
132 | $test++; # 19 |
a197cbdd |
133 | |
134 | ok length($tmp=$'), 1; |
c5cc3500 |
135 | $test++; # 20 |
a197cbdd |
136 | |
137 | ok length($tmp=$`), 1; |
c5cc3500 |
138 | $test++; # 21 |
a197cbdd |
139 | |
140 | ok length($tmp=$1), 1; |
c5cc3500 |
141 | $test++; # 22 |
a197cbdd |
142 | |
c5cc3500 |
143 | { |
be341bce |
144 | use bytes; |
c5cc3500 |
145 | |
be341bce |
146 | my $tmp = $&; |
c5cc3500 |
147 | ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272); |
148 | $test++; # 23 |
a197cbdd |
149 | |
c5cc3500 |
150 | $tmp = $'; |
151 | ok $tmp, pack("C*", 0342, 0230, 0272); |
152 | $test++; # 24 |
a197cbdd |
153 | |
c5cc3500 |
154 | $tmp = $`; |
155 | ok $tmp, pack("C*", 0342, 0230, 0272); |
156 | $test++; # 25 |
a197cbdd |
157 | |
c5cc3500 |
158 | $tmp = $1; |
159 | ok $tmp, pack("C*", 0342, 0230, 0272); |
160 | $test++; # 26 |
161 | } |
a197cbdd |
162 | |
be341bce |
163 | ok_bytes $&, pack("C*", ord(">"), 0342, 0230, 0272); |
164 | $test++; # 27 |
165 | |
166 | ok_bytes $', pack("C*", 0342, 0230, 0272); |
167 | $test++; # 28 |
168 | |
169 | ok_bytes $`, pack("C*", 0342, 0230, 0272); |
170 | $test++; # 29 |
171 | |
172 | ok_bytes $1, pack("C*", 0342, 0230, 0272); |
173 | $test++; # 30 |
174 | |
a197cbdd |
175 | { |
176 | use bytes; |
177 | no utf8; |
178 | |
179 | ok length, 10; |
be341bce |
180 | $test++; # 31 |
a197cbdd |
181 | |
182 | ok length((m/>(.)/)[0]), 1; |
be341bce |
183 | $test++; # 32 |
a197cbdd |
184 | |
185 | ok length($&), 2; |
be341bce |
186 | $test++; # 33 |
a197cbdd |
187 | |
188 | ok length($'), 5; |
be341bce |
189 | $test++; # 34 |
a197cbdd |
190 | |
191 | ok length($`), 3; |
be341bce |
192 | $test++; # 35 |
a197cbdd |
193 | |
194 | ok length($1), 1; |
be341bce |
195 | $test++; # 36 |
a197cbdd |
196 | |
197 | ok $&, pack("C*", ord(">"), 0342); |
be341bce |
198 | $test++; # 37 |
a197cbdd |
199 | |
200 | ok $', pack("C*", 0230, 0272, 0342, 0230, 0272); |
be341bce |
201 | $test++; # 38 |
a197cbdd |
202 | |
203 | ok $`, pack("C*", 0342, 0230, 0272); |
be341bce |
204 | $test++; # 39 |
a197cbdd |
205 | |
206 | ok $1, pack("C*", 0342); |
be341bce |
207 | $test++; # 40 |
a197cbdd |
208 | } |
209 | |
a197cbdd |
210 | { |
211 | no utf8; |
212 | $_="\342\230\272>\342\230\272\342\230\272"; |
213 | } |
214 | |
215 | ok length, 10; |
be341bce |
216 | $test++; # 41 |
a197cbdd |
217 | |
218 | ok length((m/>(.)/)[0]), 1; |
be341bce |
219 | $test++; # 42 |
a197cbdd |
220 | |
221 | ok length($&), 2; |
be341bce |
222 | $test++; # 43 |
a197cbdd |
223 | |
224 | ok length($'), 1; |
be341bce |
225 | $test++; # 44 |
a197cbdd |
226 | |
227 | ok length($`), 1; |
be341bce |
228 | $test++; # 45 |
a197cbdd |
229 | |
230 | ok length($1), 1; |
be341bce |
231 | $test++; # 46 |
a197cbdd |
232 | |
233 | ok length($tmp=$&), 2; |
be341bce |
234 | $test++; # 47 |
a197cbdd |
235 | |
236 | ok length($tmp=$'), 1; |
be341bce |
237 | $test++; # 48 |
a197cbdd |
238 | |
239 | ok length($tmp=$`), 1; |
be341bce |
240 | $test++; # 49 |
a197cbdd |
241 | |
242 | ok length($tmp=$1), 1; |
be341bce |
243 | $test++; # 50 |
a197cbdd |
244 | |
c5cc3500 |
245 | { |
246 | use bytes; |
a197cbdd |
247 | |
c5cc3500 |
248 | my $tmp = $&; |
249 | ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272); |
be341bce |
250 | $test++; # 51 |
a197cbdd |
251 | |
c5cc3500 |
252 | $tmp = $'; |
253 | ok $tmp, pack("C*", 0342, 0230, 0272); |
be341bce |
254 | $test++; # 52 |
a197cbdd |
255 | |
c5cc3500 |
256 | $tmp = $`; |
257 | ok $tmp, pack("C*", 0342, 0230, 0272); |
be341bce |
258 | $test++; # 53 |
a197cbdd |
259 | |
c5cc3500 |
260 | $tmp = $1; |
261 | ok $tmp, pack("C*", 0342, 0230, 0272); |
be341bce |
262 | $test++; # 54 |
c5cc3500 |
263 | } |
3b5dab68 |
264 | |
a197cbdd |
265 | { |
266 | use bytes; |
267 | no utf8; |
268 | |
269 | ok length, 10; |
be341bce |
270 | $test++; # 55 |
a197cbdd |
271 | |
272 | ok length((m/>(.)/)[0]), 1; |
be341bce |
273 | $test++; # 56 |
a197cbdd |
274 | |
275 | ok length($&), 2; |
be341bce |
276 | $test++; # 57 |
a197cbdd |
277 | |
278 | ok length($'), 5; |
be341bce |
279 | $test++; # 58 |
a197cbdd |
280 | |
281 | ok length($`), 3; |
be341bce |
282 | $test++; # 59 |
a197cbdd |
283 | |
284 | ok length($1), 1; |
be341bce |
285 | $test++; # 60 |
a197cbdd |
286 | |
287 | ok $&, pack("C*", ord(">"), 0342); |
be341bce |
288 | $test++; # 61 |
a197cbdd |
289 | |
290 | ok $', pack("C*", 0230, 0272, 0342, 0230, 0272); |
be341bce |
291 | $test++; # 62 |
a197cbdd |
292 | |
293 | ok $`, pack("C*", 0342, 0230, 0272); |
be341bce |
294 | $test++; # 63 |
a197cbdd |
295 | |
296 | ok $1, pack("C*", 0342); |
be341bce |
297 | $test++; # 64 |
a197cbdd |
298 | } |
de35ba6f |
299 | |
300 | ok "\x{ab}" =~ /^\x{ab}$/, 1; |
be341bce |
301 | $test++; # 65 |
a197cbdd |
302 | } |
aaa68c4a |
303 | |
304 | { |
305 | use utf8; |
306 | ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2); |
307 | $test++; # 66 |
308 | } |
28cb3359 |
309 | |
310 | { |
311 | use utf8; |
312 | my @a = map ord, split(//, join("", map chr, (1234, 123, 2345))); |
313 | ok "@a", "1234 123 2345"; |
314 | $test++; # 67 |
315 | } |
316 | |
317 | { |
318 | use utf8; |
319 | my $x = chr(123); |
320 | my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345))); |
321 | ok "@a", "1234 2345"; |
322 | $test++; # 68 |
323 | } |
31067593 |
324 | |
7bbb0251 |
325 | { |
da450f52 |
326 | # bug id 20001009.001 |
327 | |
89491803 |
328 | my ($a, $b); |
329 | |
330 | { use bytes; $a = "\xc3\xa4" } |
331 | { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8 |
332 | |
333 | print "not " if $a eq $b; |
334 | print "ok $test\n"; $test++; |
335 | |
336 | { use utf8; print "not " if $a eq $b; } |
337 | print "ok $test\n"; $test++; |
7bbb0251 |
338 | } |
31067593 |
339 | |
340 | { |
da450f52 |
341 | # bug id 20001008.001 |
342 | |
31067593 |
343 | my @x = ("stra\337e 138","stra\337e 138"); |
344 | for (@x) { |
345 | s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; |
346 | my($latin) = /^(.+)(?:\s+\d)/; |
347 | print $latin eq "stra\337e" ? "ok $test\n" : |
348 | "#latin[$latin]\nnot ok $test\n"; |
349 | $test++; |
350 | $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a |
351 | use utf8; |
352 | $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a |
353 | } |
354 | } |
b7018214 |
355 | |
356 | { |
da450f52 |
357 | # bug id 20000819.004 |
358 | |
b7018214 |
359 | $_ = $dx = "\x{10f2}"; |
360 | s/($dx)/$dx$1/; |
361 | { |
362 | use bytes; |
363 | print "not " unless $_ eq "$dx$dx"; |
364 | print "ok $test\n"; |
365 | $test++; |
366 | } |
367 | |
368 | $_ = $dx = "\x{10f2}"; |
369 | s/($dx)/$1$dx/; |
370 | { |
371 | use bytes; |
372 | print "not " unless $_ eq "$dx$dx"; |
373 | print "ok $test\n"; |
374 | $test++; |
375 | } |
376 | |
377 | $dx = "\x{10f2}"; |
378 | $_ = "\x{10f2}\x{10f2}"; |
379 | s/($dx)($dx)/$1$2/; |
380 | { |
381 | use bytes; |
382 | print "not " unless $_ eq "$dx$dx"; |
383 | print "ok $test\n"; |
384 | $test++; |
385 | } |
386 | } |
8b72f7e2 |
387 | |
388 | { |
da450f52 |
389 | # bug id 20000323.056 |
390 | |
8b72f7e2 |
391 | print "not " unless "\x{41}" eq +v65; |
392 | print "ok $test\n"; |
393 | $test++; |
394 | |
395 | print "not " unless "\x41" eq +v65; |
396 | print "ok $test\n"; |
397 | $test++; |
398 | |
399 | print "not " unless "\x{c8}" eq +v200; |
400 | print "ok $test\n"; |
401 | $test++; |
402 | |
403 | print "not " unless "\xc8" eq +v200; |
404 | print "ok $test\n"; |
405 | $test++; |
406 | |
407 | print "not " unless "\x{221b}" eq v8731; |
408 | print "ok $test\n"; |
409 | $test++; |
410 | } |
da450f52 |
411 | |
412 | { |
413 | # bug id 20000427.003 |
414 | |
415 | use utf8; |
416 | use warnings; |
417 | use strict; |
418 | |
419 | my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}"; |
420 | |
421 | my @charlist = split //, $sushi; |
422 | my $r = ''; |
423 | foreach my $ch (@charlist) { |
424 | $r = $r . " " . sprintf "U+%04X", ord($ch); |
425 | } |
426 | |
427 | print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B"; |
428 | print "ok $test\n"; |
429 | $test++; |
430 | } |
431 | |
432 | { |
433 | # bug id 20000901.092 |
434 | # test that undef left and right of utf8 results in a valid string |
435 | |
436 | my $a; |
437 | $a .= "\x{1ff}"; |
438 | print "not " unless $a eq "\x{1ff}"; |
439 | print "ok $test\n"; |
440 | $test++; |
441 | } |
93f04dac |
442 | |
443 | { |
444 | # bug id 20000426.003 |
445 | |
446 | use utf8; |
447 | |
448 | my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20"; |
449 | |
450 | my ($a, $b, $c) = split(/\x40/, $s); |
451 | print "not " |
452 | unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a; |
453 | print "ok $test\n"; |
454 | $test++; |
455 | |
456 | my ($a, $b) = split(/\x{100}/, $s); |
457 | print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20"; |
458 | print "ok $test\n"; |
459 | $test++; |
460 | |
461 | my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s); |
462 | print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20"; |
463 | print "ok $test\n"; |
464 | $test++; |
465 | |
466 | my ($a, $b) = split(/\x40\x{80}/, $s); |
467 | print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20"; |
468 | print "ok $test\n"; |
469 | $test++; |
470 | |
471 | my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s); |
472 | print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20"; |
473 | print "ok $test\n"; |
474 | $test++; |
475 | } |
60ff4832 |
476 | |
477 | { |
478 | # bug id 20000730.004 |
479 | |
480 | use utf8; |
481 | |
482 | my $smiley = "\x{263a}"; |
483 | |
484 | for my $s ("\x{263a}", # 1 |
485 | $smiley, # 2 |
486 | |
487 | "" . $smiley, # 3 |
488 | "" . "\x{263a}", # 4 |
489 | |
490 | $smiley . "", # 5 |
491 | "\x{263a}" . "", # 6 |
492 | ) { |
493 | my $length_chars = length($s); |
494 | my $length_bytes; |
495 | { use bytes; $length_bytes = length($s) } |
496 | my @regex_chars = $s =~ m/(.)/g; |
497 | my $regex_chars = @regex_chars; |
498 | my @split_chars = split //, $s; |
499 | my $split_chars = @split_chars; |
500 | print "not " |
501 | unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq |
502 | "1/1/1/3"; |
503 | print "ok $test\n"; |
504 | $test++; |
505 | } |
506 | |
507 | for my $s ("\x{263a}" . "\x{263a}", # 7 |
508 | $smiley . $smiley, # 8 |
509 | |
510 | "\x{263a}\x{263a}", # 9 |
511 | "$smiley$smiley", # 10 |
512 | |
513 | "\x{263a}" x 2, # 11 |
514 | $smiley x 2, # 12 |
515 | ) { |
516 | my $length_chars = length($s); |
517 | my $length_bytes; |
518 | { use bytes; $length_bytes = length($s) } |
519 | my @regex_chars = $s =~ m/(.)/g; |
520 | my $regex_chars = @regex_chars; |
521 | my @split_chars = split //, $s; |
522 | my $split_chars = @split_chars; |
523 | print "not " |
524 | unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq |
525 | "2/2/2/6"; |
526 | print "ok $test\n"; |
527 | $test++; |
528 | } |
529 | } |
689440ec |
530 | |
531 | { |
532 | # ID 20001020.006 |
533 | |
534 | "x" =~ /(.)/; # unset $2 |
535 | |
536 | # Without the fix this will croak: |
537 | # Modification of a read-only value attempted at ... |
538 | "$2\x{1234}"; |
539 | |
540 | print "ok $test\n"; |
541 | $test++; |
542 | |
543 | # For symmetry with the above. |
544 | "\x{1234}$2"; |
545 | |
546 | print "ok $test\n"; |
547 | $test++; |
5280a8e5 |
548 | |
549 | *pi = \undef; |
550 | # This bug existed earlier than the $2 bug, but is fixed with the same |
551 | # patch. Without the fix this will also croak: |
552 | # Modification of a read-only value attempted at ... |
553 | "$pi\x{1234}"; |
554 | |
555 | print "ok $test\n"; |
556 | $test++; |
557 | |
558 | # For symmetry with the above. |
559 | "\x{1234}$pi"; |
560 | |
561 | print "ok $test\n"; |
562 | $test++; |
689440ec |
563 | } |