Commit | Line | Data |
f96ec2a2 |
1 | #!./perl |
2 | |
3 | BEGIN { |
4 | chdir 't' if -d 't'; |
20822f61 |
5 | @INC = '../lib'; |
f96ec2a2 |
6 | $ENV{PERL5LIB} = '../lib'; |
f70c35af |
7 | if ( ord("\t") != 9 ) { # skip on ebcdic platforms |
8 | print "1..0 # Skip utf8 tests on ebcdic platform.\n"; |
9 | exit; |
10 | } |
f96ec2a2 |
11 | } |
12 | |
0c981600 |
13 | print "1..105\n"; |
f96ec2a2 |
14 | |
15 | my $test = 1; |
16 | |
17 | sub ok { |
18 | my ($got,$expect) = @_; |
19 | print "# expected [$expect], got [$got]\nnot " if $got ne $expect; |
20 | print "ok $test\n"; |
21 | } |
22 | |
7bbb0251 |
23 | sub nok { |
24 | my ($got,$expect) = @_; |
25 | print "# expected not [$expect], got [$got]\nnot " if $got eq $expect; |
26 | print "ok $test\n"; |
27 | } |
28 | |
be341bce |
29 | sub ok_bytes { |
30 | use bytes; |
31 | my ($got,$expect) = @_; |
32 | print "# expected [$expect], got [$got]\nnot " if $got ne $expect; |
33 | print "ok $test\n"; |
34 | } |
35 | |
7bbb0251 |
36 | sub nok_bytes { |
37 | use bytes; |
38 | my ($got,$expect) = @_; |
39 | print "# expected not [$expect], got [$got]\nnot " if $got eq $expect; |
40 | print "ok $test\n"; |
41 | } |
be341bce |
42 | |
f96ec2a2 |
43 | { |
44 | use utf8; |
ffc61ed2 |
45 | |
f96ec2a2 |
46 | $_ = ">\x{263A}<"; |
47 | s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg; |
48 | ok $_, '>☺<'; |
c5cc3500 |
49 | $test++; # 1 |
f96ec2a2 |
50 | |
51 | $_ = ">\x{263A}<"; |
52 | my $rx = "\x{80}-\x{10ffff}"; |
53 | s/([$rx])/"&#".ord($1).";"/eg; |
54 | ok $_, '>☺<'; |
c5cc3500 |
55 | $test++; # 2 |
f96ec2a2 |
56 | |
57 | $_ = ">\x{263A}<"; |
58 | my $rx = "\\x{80}-\\x{10ffff}"; |
59 | s/([$rx])/"&#".ord($1).";"/eg; |
60 | ok $_, '>☺<'; |
c5cc3500 |
61 | $test++; # 3 |
b8c5462f |
62 | |
63 | $_ = "alpha,numeric"; |
64 | m/([[:alpha:]]+)/; |
65 | ok $1, 'alpha'; |
c5cc3500 |
66 | $test++; # 4 |
b8c5462f |
67 | |
68 | $_ = "alphaNUMERICstring"; |
69 | m/([[:^lower:]]+)/; |
70 | ok $1, 'NUMERIC'; |
c5cc3500 |
71 | $test++; # 5 |
b8c5462f |
72 | |
73 | $_ = "alphaNUMERICstring"; |
74 | m/(\p{Ll}+)/; |
75 | ok $1, 'alpha'; |
c5cc3500 |
76 | $test++; # 6 |
b8c5462f |
77 | |
78 | $_ = "alphaNUMERICstring"; |
79 | m/(\p{Lu}+)/; |
80 | ok $1, 'NUMERIC'; |
c5cc3500 |
81 | $test++; # 7 |
b8c5462f |
82 | |
83 | $_ = "alpha,numeric"; |
84 | m/([\p{IsAlpha}]+)/; |
85 | ok $1, 'alpha'; |
c5cc3500 |
86 | $test++; # 8 |
b8c5462f |
87 | |
88 | $_ = "alphaNUMERICstring"; |
89 | m/([^\p{IsLower}]+)/; |
90 | ok $1, 'NUMERIC'; |
c5cc3500 |
91 | $test++; # 9 |
b8c5462f |
92 | |
0f4b6630 |
93 | $_ = "alpha123numeric456"; |
94 | m/([\p{IsDigit}]+)/; |
95 | ok $1, '123'; |
c5cc3500 |
96 | $test++; # 10 |
b8c5462f |
97 | |
0f4b6630 |
98 | $_ = "alpha123numeric456"; |
99 | m/([^\p{IsDigit}]+)/; |
100 | ok $1, 'alpha'; |
c5cc3500 |
101 | $test++; # 11 |
b8c5462f |
102 | |
0f4b6630 |
103 | $_ = ",123alpha,456numeric"; |
104 | m/([\p{IsAlnum}]+)/; |
105 | ok $1, '123alpha'; |
c5cc3500 |
106 | $test++; # 12 |
0f4b6630 |
107 | } |
3b5dab68 |
108 | |
a197cbdd |
109 | { |
ffc61ed2 |
110 | # no use utf8 needed |
111 | $_ = "\x{263A}\x{263A}x\x{263A}y\x{263A}"; |
112 | |
113 | ok length($_), 6; # 13 |
114 | $test++; |
a197cbdd |
115 | |
ffc61ed2 |
116 | ($a) = m/x(.)/; |
a197cbdd |
117 | |
ffc61ed2 |
118 | ok length($a), 1; # 14 |
119 | $test++; |
a197cbdd |
120 | |
ffc61ed2 |
121 | ok length($`), 2; # 15 |
122 | $test++; |
123 | ok length($&), 2; # 16 |
124 | $test++; |
125 | ok length($'), 2; # 17 |
126 | $test++; |
a197cbdd |
127 | |
ffc61ed2 |
128 | ok length($1), 1; # 18 |
129 | $test++; |
a197cbdd |
130 | |
ffc61ed2 |
131 | ok length($b=$`), 2; # 19 |
132 | $test++; |
a197cbdd |
133 | |
ffc61ed2 |
134 | ok length($b=$&), 2; # 20 |
135 | $test++; |
a197cbdd |
136 | |
ffc61ed2 |
137 | ok length($b=$'), 2; # 21 |
138 | $test++; |
a197cbdd |
139 | |
ffc61ed2 |
140 | ok length($b=$1), 1; # 22 |
141 | $test++; |
c5cc3500 |
142 | |
ffc61ed2 |
143 | ok $a, "\x{263A}"; # 23 |
144 | $test++; |
a197cbdd |
145 | |
ffc61ed2 |
146 | ok $`, "\x{263A}\x{263A}"; # 24 |
147 | $test++; |
a197cbdd |
148 | |
ffc61ed2 |
149 | ok $&, "x\x{263A}"; # 25 |
150 | $test++; |
a197cbdd |
151 | |
ffc61ed2 |
152 | ok $', "y\x{263A}"; # 26 |
153 | $test++; |
a197cbdd |
154 | |
ffc61ed2 |
155 | ok $1, "\x{263A}"; # 27 |
156 | $test++; |
be341bce |
157 | |
ffc61ed2 |
158 | ok_bytes $a, "\342\230\272"; # 28 |
159 | $test++; |
be341bce |
160 | |
ffc61ed2 |
161 | ok_bytes $1, "\342\230\272"; # 29 |
162 | $test++; |
be341bce |
163 | |
ffc61ed2 |
164 | ok_bytes $&, "x\342\230\272"; # 30 |
165 | $test++; |
be341bce |
166 | |
a197cbdd |
167 | { |
ffc61ed2 |
168 | use utf8; # required |
169 | $_ = chr(0x263A) . chr(0x263A) . 'x' . chr(0x263A) . 'y' . chr(0x263A); |
170 | } |
a197cbdd |
171 | |
ffc61ed2 |
172 | ok length($_), 6; # 31 |
173 | $test++; |
a197cbdd |
174 | |
ffc61ed2 |
175 | ($a) = m/x(.)/; |
a197cbdd |
176 | |
ffc61ed2 |
177 | ok length($a), 1; # 32 |
178 | $test++; |
a197cbdd |
179 | |
ffc61ed2 |
180 | ok length($`), 2; # 33 |
181 | $test++; |
a197cbdd |
182 | |
ffc61ed2 |
183 | ok length($&), 2; # 34 |
184 | $test++; |
a197cbdd |
185 | |
ffc61ed2 |
186 | ok length($'), 2; # 35 |
187 | $test++; |
a197cbdd |
188 | |
ffc61ed2 |
189 | ok length($1), 1; # 36 |
190 | $test++; |
a197cbdd |
191 | |
ffc61ed2 |
192 | ok length($b=$`), 2; # 37 |
193 | $test++; |
a197cbdd |
194 | |
ffc61ed2 |
195 | ok length($b=$&), 2; # 38 |
196 | $test++; |
a197cbdd |
197 | |
ffc61ed2 |
198 | ok length($b=$'), 2; # 39 |
199 | $test++; |
a197cbdd |
200 | |
ffc61ed2 |
201 | ok length($b=$1), 1; # 40 |
202 | $test++; |
a197cbdd |
203 | |
ffc61ed2 |
204 | ok $a, "\x{263A}"; # 41 |
205 | $test++; |
a197cbdd |
206 | |
ffc61ed2 |
207 | ok $`, "\x{263A}\x{263A}"; # 42 |
208 | $test++; |
a197cbdd |
209 | |
ffc61ed2 |
210 | ok $&, "x\x{263A}"; # 43 |
211 | $test++; |
a197cbdd |
212 | |
ffc61ed2 |
213 | ok $', "y\x{263A}"; # 44 |
214 | $test++; |
a197cbdd |
215 | |
ffc61ed2 |
216 | ok $1, "\x{263A}"; # 45 |
217 | $test++; |
a197cbdd |
218 | |
ffc61ed2 |
219 | ok_bytes $a, "\342\230\272"; # 46 |
220 | $test++; |
a197cbdd |
221 | |
ffc61ed2 |
222 | ok_bytes $1, "\342\230\272"; # 47 |
223 | $test++; |
a197cbdd |
224 | |
ffc61ed2 |
225 | ok_bytes $&, "x\342\230\272"; # 48 |
226 | $test++; |
a197cbdd |
227 | |
ffc61ed2 |
228 | $_ = "\342\230\272\342\230\272x\342\230\272y\342\230\272"; |
a197cbdd |
229 | |
ffc61ed2 |
230 | ok length($_), 14; # 49 |
231 | $test++; |
a197cbdd |
232 | |
ffc61ed2 |
233 | ($a) = m/x(.)/; |
a197cbdd |
234 | |
ffc61ed2 |
235 | ok length($a), 1; # 50 |
236 | $test++; |
a197cbdd |
237 | |
ffc61ed2 |
238 | ok length($`), 6; # 51 |
239 | $test++; |
a197cbdd |
240 | |
ffc61ed2 |
241 | ok length($&), 2; # 52 |
242 | $test++; |
3b5dab68 |
243 | |
ffc61ed2 |
244 | ok length($'), 6; # 53 |
245 | $test++; |
a197cbdd |
246 | |
ffc61ed2 |
247 | ok length($1), 1; # 54 |
248 | $test++; |
a197cbdd |
249 | |
ffc61ed2 |
250 | ok length($b=$`), 6; # 55 |
251 | $test++; |
a197cbdd |
252 | |
ffc61ed2 |
253 | ok length($b=$&), 2; # 56 |
254 | $test++; |
a197cbdd |
255 | |
ffc61ed2 |
256 | ok length($b=$'), 6; # 57 |
257 | $test++; |
a197cbdd |
258 | |
ffc61ed2 |
259 | ok length($b=$1), 1; # 58 |
260 | $test++; |
a197cbdd |
261 | |
ffc61ed2 |
262 | ok $a, "\342"; # 59 |
263 | $test++; |
a197cbdd |
264 | |
ffc61ed2 |
265 | ok $`, "\342\230\272\342\230\272"; # 60 |
266 | $test++; |
a197cbdd |
267 | |
ffc61ed2 |
268 | ok $&, "x\342"; # 61 |
269 | $test++; |
a197cbdd |
270 | |
ffc61ed2 |
271 | ok $', "\230\272y\342\230\272"; # 62 |
272 | $test++; |
a197cbdd |
273 | |
ffc61ed2 |
274 | ok $1, "\342"; # 63 |
275 | $test++; |
276 | } |
de35ba6f |
277 | |
ffc61ed2 |
278 | { |
279 | use utf8; |
de35ba6f |
280 | ok "\x{ab}" =~ /^\x{ab}$/, 1; |
ffc61ed2 |
281 | $test++; # 64 |
a197cbdd |
282 | } |
aaa68c4a |
283 | |
284 | { |
285 | use utf8; |
286 | ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2); |
ffc61ed2 |
287 | $test++; # 65 |
aaa68c4a |
288 | } |
28cb3359 |
289 | |
290 | { |
291 | use utf8; |
292 | my @a = map ord, split(//, join("", map chr, (1234, 123, 2345))); |
293 | ok "@a", "1234 123 2345"; |
ffc61ed2 |
294 | $test++; # 66 |
28cb3359 |
295 | } |
296 | |
297 | { |
298 | use utf8; |
299 | my $x = chr(123); |
300 | my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345))); |
301 | ok "@a", "1234 2345"; |
ffc61ed2 |
302 | $test++; # 67 |
28cb3359 |
303 | } |
31067593 |
304 | |
7bbb0251 |
305 | { |
da450f52 |
306 | # bug id 20001009.001 |
307 | |
89491803 |
308 | my ($a, $b); |
309 | |
310 | { use bytes; $a = "\xc3\xa4" } |
311 | { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8 |
312 | |
313 | print "not " if $a eq $b; |
ffc61ed2 |
314 | print "ok $test\n"; $test++; # 68 |
89491803 |
315 | |
316 | { use utf8; print "not " if $a eq $b; } |
ffc61ed2 |
317 | print "ok $test\n"; $test++; # 69 |
7bbb0251 |
318 | } |
31067593 |
319 | |
320 | { |
da450f52 |
321 | # bug id 20001008.001 |
322 | |
31067593 |
323 | my @x = ("stra\337e 138","stra\337e 138"); |
324 | for (@x) { |
325 | s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; |
326 | my($latin) = /^(.+)(?:\s+\d)/; |
ffc61ed2 |
327 | print $latin eq "stra\337e" ? "ok $test\n" : # 70, 71 |
31067593 |
328 | "#latin[$latin]\nnot ok $test\n"; |
329 | $test++; |
330 | $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a |
331 | use utf8; |
332 | $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a |
333 | } |
334 | } |
b7018214 |
335 | |
336 | { |
da450f52 |
337 | # bug id 20000427.003 |
338 | |
339 | use utf8; |
340 | use warnings; |
341 | use strict; |
342 | |
343 | my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}"; |
344 | |
345 | my @charlist = split //, $sushi; |
346 | my $r = ''; |
347 | foreach my $ch (@charlist) { |
348 | $r = $r . " " . sprintf "U+%04X", ord($ch); |
349 | } |
350 | |
351 | print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B"; |
ffc61ed2 |
352 | print "ok $test\n"; # 72 |
da450f52 |
353 | $test++; |
354 | } |
355 | |
356 | { |
93f04dac |
357 | # bug id 20000426.003 |
358 | |
359 | use utf8; |
360 | |
361 | my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20"; |
362 | |
363 | my ($a, $b, $c) = split(/\x40/, $s); |
364 | print "not " |
365 | unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a; |
366 | print "ok $test\n"; |
ffc61ed2 |
367 | $test++; # 73 |
93f04dac |
368 | |
369 | my ($a, $b) = split(/\x{100}/, $s); |
370 | print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20"; |
371 | print "ok $test\n"; |
ffc61ed2 |
372 | $test++; # 74 |
93f04dac |
373 | |
374 | my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s); |
375 | print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20"; |
376 | print "ok $test\n"; |
ffc61ed2 |
377 | $test++; # 75 |
93f04dac |
378 | |
379 | my ($a, $b) = split(/\x40\x{80}/, $s); |
380 | print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20"; |
381 | print "ok $test\n"; |
ffc61ed2 |
382 | $test++; # 76 |
93f04dac |
383 | |
384 | my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s); |
385 | print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20"; |
386 | print "ok $test\n"; |
ffc61ed2 |
387 | $test++; # 77 |
93f04dac |
388 | } |
60ff4832 |
389 | |
390 | { |
391 | # bug id 20000730.004 |
392 | |
393 | use utf8; |
394 | |
395 | my $smiley = "\x{263a}"; |
396 | |
ffc61ed2 |
397 | for my $s ("\x{263a}", # 78 |
398 | $smiley, # 79 |
60ff4832 |
399 | |
ffc61ed2 |
400 | "" . $smiley, # 80 |
401 | "" . "\x{263a}", # 81 |
60ff4832 |
402 | |
ffc61ed2 |
403 | $smiley . "", # 82 |
404 | "\x{263a}" . "", # 83 |
60ff4832 |
405 | ) { |
406 | my $length_chars = length($s); |
407 | my $length_bytes; |
408 | { use bytes; $length_bytes = length($s) } |
409 | my @regex_chars = $s =~ m/(.)/g; |
410 | my $regex_chars = @regex_chars; |
411 | my @split_chars = split //, $s; |
412 | my $split_chars = @split_chars; |
413 | print "not " |
414 | unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq |
415 | "1/1/1/3"; |
416 | print "ok $test\n"; |
417 | $test++; |
418 | } |
419 | |
ffc61ed2 |
420 | for my $s ("\x{263a}" . "\x{263a}", # 84 |
421 | $smiley . $smiley, # 85 |
60ff4832 |
422 | |
ffc61ed2 |
423 | "\x{263a}\x{263a}", # 86 |
424 | "$smiley$smiley", # 87 |
60ff4832 |
425 | |
ffc61ed2 |
426 | "\x{263a}" x 2, # 88 |
427 | $smiley x 2, # 89 |
60ff4832 |
428 | ) { |
429 | my $length_chars = length($s); |
430 | my $length_bytes; |
431 | { use bytes; $length_bytes = length($s) } |
432 | my @regex_chars = $s =~ m/(.)/g; |
433 | my $regex_chars = @regex_chars; |
434 | my @split_chars = split //, $s; |
435 | my $split_chars = @split_chars; |
436 | print "not " |
437 | unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq |
438 | "2/2/2/6"; |
439 | print "ok $test\n"; |
440 | $test++; |
441 | } |
442 | } |
ffc61ed2 |
443 | |
444 | { |
445 | use utf8; |
446 | |
447 | print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; |
448 | print "ok $test\n"; |
449 | $test++; # 90 |
450 | |
451 | print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; |
452 | print "ok $test\n"; |
453 | $test++; # 91 |
454 | |
455 | print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; |
456 | print "ok $test\n"; |
457 | $test++; # 92 |
458 | |
459 | print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; |
460 | print "ok $test\n"; |
461 | $test++; # 93 |
462 | |
463 | print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; |
464 | print "ok $test\n"; |
465 | $test++; # 94 |
466 | |
467 | print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; |
468 | print "ok $test\n"; |
469 | $test++; # 95 |
470 | |
471 | print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; |
472 | print "ok $test\n"; |
473 | $test++; # 96 |
474 | |
475 | print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; |
476 | print "ok $test\n"; |
477 | $test++; # 97 |
478 | } |
479 | |
480 | { |
481 | # the first half of 20001028.003 |
482 | |
483 | my $X = chr(1448); |
484 | my ($Y) = $X =~ /(.*)/; |
485 | print "not " unless length $Y == 1; |
486 | print "ok $test\n"; |
487 | $test++; # 98 |
488 | } |
489 | |
490 | { |
491 | # 20001108.001 |
492 | |
493 | use utf8; |
494 | my $X = "Szab\x{f3},Bal\x{e1}zs"; |
495 | my $Y = $X; |
496 | $Y =~ s/(B)/$1/ for 0..3; |
497 | print "not " unless $Y eq $X; |
498 | print "ok $test\n"; |
499 | $test++; # 99 |
500 | } |
501 | |
502 | { |
503 | # 20001114.001 |
504 | |
505 | use utf8; |
506 | use charnames ':full'; |
507 | my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}"; |
508 | print "not " unless ord($text) == 0xc4; |
509 | print "ok $test\n"; |
510 | $test++; # 100 |
511 | } |
512 | |
513 | { |
514 | # 20001205.014 |
515 | |
516 | use utf8; |
517 | |
518 | my $a = "ABC\x{263A}"; |
519 | |
520 | my @b = split( //, $a ); |
521 | |
522 | print "not " unless @b == 4; |
523 | print "ok $test\n"; |
524 | $test++; # 101 |
525 | |
526 | print "not " unless length($b[3]) == 1; |
527 | print "ok $test\n"; |
528 | $test++; # 102 |
529 | |
530 | $a =~ s/^A/Z/; |
531 | print "not " unless length($a) == 4; |
532 | print "ok $test\n"; |
533 | $test++; # 103 |
534 | } |
535 | |
536 | { |
537 | # the second half of 20001028.003 |
538 | |
539 | use utf8; |
540 | $X =~ s/^/chr(1488)/e; |
541 | print "not " unless length $X == 1; |
542 | print "ok $test\n"; |
543 | $test++; # 104 |
544 | } |
545 | |
11c06dba |
546 | { |
547 | # 20000517.001 |
548 | |
549 | my $x = "\x{100}A"; |
550 | |
551 | $x =~ s/A/B/; |
552 | |
553 | print "not " unless $x eq "\x{100}B" && length($x) == 2; |
554 | print "ok $test\n"; |
555 | $test++; # 105 |
556 | } |