Commit | Line | Data |
8ebc5c01 |
1 | #!./perl -wT |
2 | |
3 | BEGIN { |
4 | chdir 't' if -d 't'; |
284102e8 |
5 | unshift @INC, '../lib'; |
b002077a |
6 | require Config; import Config; |
97a0514d |
7 | if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) { |
b002077a |
8 | print "1..0\n"; |
9 | exit; |
10 | } |
8ebc5c01 |
11 | } |
12 | |
13 | use strict; |
14 | |
284102e8 |
15 | my $debug = 1; |
16 | |
8ebc5c01 |
17 | my $have_setlocale = 0; |
18 | eval { |
19 | require POSIX; |
20 | import POSIX ':locale_h'; |
21 | $have_setlocale++; |
22 | }; |
23 | |
284102e8 |
24 | use vars qw(&LC_ALL); |
25 | |
6dead956 |
26 | # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" |
f6c6487a |
27 | # and mingw32 uses said silly CRT |
28 | $have_setlocale = 0 if $^O eq 'MSWin32' && $Config{cc} =~ /^(cl|gcc)/i; |
6dead956 |
29 | |
284102e8 |
30 | # 103 (the last test) may fail but that is sort-of okay. |
8ada0baa |
31 | # (It indicates something broken in the environment, not Perl) |
284102e8 |
32 | |
33 | print "1..", ($have_setlocale ? 103 : 98), "\n"; |
8ebc5c01 |
34 | |
35 | use vars qw($a |
36 | $English $German $French $Spanish |
37 | @C @English @German @French @Spanish |
284102e8 |
38 | $Locale @Locale %UPPER %lower %bothcase @Neoalpha); |
8ebc5c01 |
39 | |
40 | $a = 'abc %'; |
41 | |
42 | sub ok { |
43 | my ($n, $result) = @_; |
44 | |
45 | print 'not ' unless ($result); |
46 | print "ok $n\n"; |
47 | } |
48 | |
49 | # First we'll do a lot of taint checking for locales. |
50 | # This is the easiest to test, actually, as any locale, |
51 | # even the default locale will taint under 'use locale'. |
52 | |
53 | sub is_tainted { # hello, camel two. |
3fe9a6f1 |
54 | local $^W; # no warnings 'undef' |
8ebc5c01 |
55 | my $dummy; |
56 | not eval { $dummy = join("", @_), kill 0; 1 } |
57 | } |
58 | |
59 | sub check_taint ($$) { |
60 | ok $_[0], is_tainted($_[1]); |
61 | } |
62 | |
63 | sub check_taint_not ($$) { |
64 | ok $_[0], not is_tainted($_[1]); |
65 | } |
66 | |
67 | use locale; # engage locale and therefore locale taint. |
68 | |
69 | check_taint_not 1, $a; |
70 | |
71 | check_taint 2, uc($a); |
72 | check_taint 3, "\U$a"; |
73 | check_taint 4, ucfirst($a); |
74 | check_taint 5, "\u$a"; |
75 | check_taint 6, lc($a); |
76 | check_taint 7, "\L$a"; |
77 | check_taint 8, lcfirst($a); |
78 | check_taint 9, "\l$a"; |
79 | |
80 | check_taint 10, sprintf('%e', 123.456); |
81 | check_taint 11, sprintf('%f', 123.456); |
82 | check_taint 12, sprintf('%g', 123.456); |
83 | check_taint_not 13, sprintf('%d', 123.456); |
84 | check_taint_not 14, sprintf('%x', 123.456); |
85 | |
86 | $_ = $a; # untaint $_ |
87 | |
88 | $_ = uc($a); # taint $_ |
89 | |
90 | check_taint 15, $_; |
91 | |
92 | /(\w)/; # taint $&, $`, $', $+, $1. |
93 | check_taint 16, $&; |
94 | check_taint 17, $`; |
95 | check_taint 18, $'; |
96 | check_taint 19, $+; |
97 | check_taint 20, $1; |
98 | check_taint_not 21, $2; |
99 | |
100 | /(.)/; # untaint $&, $`, $', $+, $1. |
101 | check_taint_not 22, $&; |
102 | check_taint_not 23, $`; |
103 | check_taint_not 24, $'; |
104 | check_taint_not 25, $+; |
105 | check_taint_not 26, $1; |
106 | check_taint_not 27, $2; |
107 | |
108 | /(\W)/; # taint $&, $`, $', $+, $1. |
109 | check_taint 28, $&; |
110 | check_taint 29, $`; |
111 | check_taint 30, $'; |
112 | check_taint 31, $+; |
113 | check_taint 32, $1; |
114 | check_taint_not 33, $2; |
115 | |
116 | /(\s)/; # taint $&, $`, $', $+, $1. |
117 | check_taint 34, $&; |
118 | check_taint 35, $`; |
119 | check_taint 36, $'; |
120 | check_taint 37, $+; |
121 | check_taint 38, $1; |
122 | check_taint_not 39, $2; |
123 | |
124 | /(\S)/; # taint $&, $`, $', $+, $1. |
125 | check_taint 40, $&; |
126 | check_taint 41, $`; |
127 | check_taint 42, $'; |
128 | check_taint 43, $+; |
129 | check_taint 44, $1; |
130 | check_taint_not 45, $2; |
131 | |
132 | $_ = $a; # untaint $_ |
133 | |
134 | check_taint_not 46, $_; |
135 | |
136 | /(b)/; # this must not taint |
137 | check_taint_not 47, $&; |
138 | check_taint_not 48, $`; |
139 | check_taint_not 49, $'; |
140 | check_taint_not 50, $+; |
141 | check_taint_not 51, $1; |
142 | check_taint_not 52, $2; |
143 | |
144 | $_ = $a; # untaint $_ |
145 | |
146 | check_taint_not 53, $_; |
147 | |
148 | $b = uc($a); # taint $b |
149 | s/(.+)/$b/; # this must taint only the $_ |
150 | |
151 | check_taint 54, $_; |
152 | check_taint_not 55, $&; |
153 | check_taint_not 56, $`; |
154 | check_taint_not 57, $'; |
155 | check_taint_not 58, $+; |
156 | check_taint_not 59, $1; |
157 | check_taint_not 60, $2; |
158 | |
159 | $_ = $a; # untaint $_ |
160 | |
161 | s/(.+)/b/; # this must not taint |
162 | check_taint_not 61, $_; |
163 | check_taint_not 62, $&; |
164 | check_taint_not 63, $`; |
165 | check_taint_not 64, $'; |
166 | check_taint_not 65, $+; |
167 | check_taint_not 66, $1; |
168 | check_taint_not 67, $2; |
169 | |
170 | $b = $a; # untaint $b |
171 | |
172 | ($b = $a) =~ s/\w/$&/; |
173 | check_taint 68, $b; # $b should be tainted. |
174 | check_taint_not 69, $a; # $a should be not. |
175 | |
176 | $_ = $a; # untaint $_ |
177 | |
178 | s/(\w)/\l$1/; # this must taint |
179 | check_taint 70, $_; |
180 | check_taint 71, $&; |
181 | check_taint 72, $`; |
182 | check_taint 73, $'; |
183 | check_taint 74, $+; |
184 | check_taint 75, $1; |
185 | check_taint_not 76, $2; |
186 | |
187 | $_ = $a; # untaint $_ |
188 | |
189 | s/(\w)/\L$1/; # this must taint |
190 | check_taint 77, $_; |
191 | check_taint 78, $&; |
192 | check_taint 79, $`; |
193 | check_taint 80, $'; |
194 | check_taint 81, $+; |
195 | check_taint 82, $1; |
196 | check_taint_not 83, $2; |
197 | |
198 | $_ = $a; # untaint $_ |
199 | |
200 | s/(\w)/\u$1/; # this must taint |
201 | check_taint 84, $_; |
202 | check_taint 85, $&; |
203 | check_taint 86, $`; |
204 | check_taint 87, $'; |
205 | check_taint 88, $+; |
206 | check_taint 89, $1; |
207 | check_taint_not 90, $2; |
208 | |
209 | $_ = $a; # untaint $_ |
210 | |
211 | s/(\w)/\U$1/; # this must taint |
212 | check_taint 91, $_; |
213 | check_taint 92, $&; |
214 | check_taint 93, $`; |
215 | check_taint 94, $'; |
216 | check_taint 95, $+; |
217 | check_taint 96, $1; |
218 | check_taint_not 97, $2; |
219 | |
220 | # After all this tainting $a should be cool. |
221 | |
222 | check_taint_not 98, $a; |
223 | |
224 | # I think we've seen quite enough of taint. |
225 | # Let us do some *real* locale work now, |
284102e8 |
226 | # unless setlocale() is missing (i.e. minitest). |
8ebc5c01 |
227 | |
228 | exit unless $have_setlocale; |
229 | |
284102e8 |
230 | # Find locales. |
231 | |
232 | my $locales = <<EOF; |
233 | Arabic:ar:dz eg sa:6 arabic8 |
234 | Bulgarian:bg:bg:5 |
235 | Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW tw.EUC |
236 | Croation:hr:hr:2 |
237 | Czech:cs:cz:2 |
238 | Danish:dk:da:1 |
239 | Danish:dk:da:1 |
240 | Dutch:nl:nl:1 |
241 | English American British:en:au ca gb ie nz us uk:1 cp850 |
242 | Estonian:et:ee:1 |
243 | Finnish:fi:fi:1 |
244 | French:fr:be ca ch fr:1 |
245 | German:de:de at ch:1 |
246 | Greek:el:gr:7 g8 |
247 | Hebrew:iw:il:8 hebrew8 |
248 | Hungarian:hu:hu:2 |
249 | Icelandic:is:is:1 |
250 | Italian:it:it:1 |
251 | Japanese:ja:jp:euc eucJP jp.EUC sjis |
252 | Korean:ko:kr: |
253 | Latin:la:va:1 |
254 | Latvian:lv:lv:1 |
255 | Lithuanian:lt:lt:1 |
256 | Polish:pl:pl:2 |
257 | Portuguese:po:po br:1 |
258 | Rumanian:ro:ro:2 |
259 | Russian:ru:ru su:5 koi8 koi8r koi8u cp1251 |
260 | Slovak:sk:sk:2 |
261 | Slovene:sl:si:2 |
262 | Spanish:es:ar bo cl co cr ec es gt mx ni pa pe py sv uy ve:1 |
263 | Swedish:sv:se:1 |
264 | Thai:th:th:tis620 |
265 | Turkish:tr:tr:9 turkish8 |
266 | EOF |
267 | |
268 | my @Locale; |
269 | my $Locale; |
270 | my @Alnum_; |
271 | |
272 | sub getalnum_ { |
8ebc5c01 |
273 | sort grep /\w/, map { chr } 0..255 |
274 | } |
275 | |
284102e8 |
276 | sub trylocale { |
277 | my $locale = shift; |
278 | if (setlocale(LC_ALL, $locale)) { |
279 | push @Locale, $locale; |
280 | } |
281 | } |
8ebc5c01 |
282 | |
284102e8 |
283 | sub decode_encodings { |
284 | my @enc; |
8ebc5c01 |
285 | |
284102e8 |
286 | foreach (split(/ /, shift)) { |
287 | if (/^(\d+)$/) { |
288 | push @enc, "ISO8859-$1"; |
289 | push @enc, "iso8859$1"; # HP |
290 | if ($1 eq '1') { |
291 | push @enc, "roman8"; # HP |
292 | } |
293 | } else { |
294 | push @enc, $_; |
8ebc5c01 |
295 | } |
296 | } |
297 | |
284102e8 |
298 | return @enc; |
8ebc5c01 |
299 | } |
300 | |
284102e8 |
301 | trylocale("C"); |
302 | trylocale("POSIX"); |
303 | foreach (0..15) { |
304 | trylocale("ISO8859-$_"); |
305 | trylocale("iso_8859_$_"); |
306 | trylocale("iso8859$_"); |
8ebc5c01 |
307 | } |
308 | |
284102e8 |
309 | foreach my $locale (split(/\n/, $locales)) { |
310 | my ($locale_name, $language_codes, $country_codes, $encodings) = |
311 | split(/:/, $locale); |
312 | my @enc = decode_encodings($encodings); |
313 | foreach my $loc (split(/ /, $locale_name)) { |
314 | trylocale($loc); |
315 | foreach my $enc (@enc) { |
316 | trylocale("$loc.$enc"); |
317 | } |
318 | $loc = lc $loc; |
319 | foreach my $enc (@enc) { |
320 | trylocale("$loc.$enc"); |
321 | } |
322 | } |
323 | foreach my $lang (split(/ /, $language_codes)) { |
324 | trylocale($lang); |
325 | foreach my $country (split(/ /, $country_codes)) { |
326 | my $lc = "${lang}_${country}"; |
327 | trylocale($lc); |
328 | foreach my $enc (@enc) { |
329 | trylocale("$lc.$enc"); |
330 | } |
331 | my $lC = "${lang}_\U${country}"; |
332 | trylocale($lC); |
333 | foreach my $enc (@enc) { |
334 | trylocale("$lC.$enc"); |
335 | } |
336 | } |
337 | } |
338 | } |
4599a1de |
339 | |
340 | @Locale = sort @Locale; |
341 | |
284102e8 |
342 | sub debug { |
343 | print @_ if $debug; |
344 | } |
8ebc5c01 |
345 | |
284102e8 |
346 | sub debugf { |
347 | printf @_ if $debug; |
8ebc5c01 |
348 | } |
349 | |
284102e8 |
350 | debug "# Locales = @Locale\n"; |
8ebc5c01 |
351 | |
284102e8 |
352 | my %Problem; |
353 | |
354 | foreach $Locale (@Locale) { |
355 | debug "# Locale = $Locale\n"; |
356 | @Alnum_ = getalnum_(); |
357 | debug "# \\w = @Alnum_\n"; |
358 | |
359 | unless (setlocale(LC_ALL, $Locale)) { |
360 | foreach (99..103) { |
361 | $Problem{$_}{$Locale} = -1; |
8ebc5c01 |
362 | } |
284102e8 |
363 | next; |
8ebc5c01 |
364 | } |
8ebc5c01 |
365 | |
284102e8 |
366 | # Sieve the uppercase and the lowercase. |
367 | |
368 | %UPPER = %lower = %bothcase = (); |
369 | for (@Alnum_) { |
370 | if (/[^\d_]/) { # skip digits and the _ |
371 | if (uc($_) eq $_) { |
372 | $UPPER{$_} = $_; |
373 | } |
374 | if (lc($_) eq $_) { |
375 | $lower{$_} = $_; |
376 | } |
377 | } |
378 | } |
379 | foreach (keys %UPPER) { |
380 | $bothcase{$_}++ if exists $lower{$_}; |
381 | } |
382 | foreach (keys %lower) { |
383 | $bothcase{$_}++ if exists $UPPER{$_}; |
384 | } |
385 | foreach (keys %bothcase) { |
386 | delete $UPPER{$_}; |
387 | delete $lower{$_}; |
388 | } |
389 | |
390 | debug "# UPPER = ", join(" ", sort keys %UPPER ), "\n"; |
391 | debug "# lower = ", join(" ", sort keys %lower ), "\n"; |
392 | debug "# bothcase = ", join(" ", sort keys %bothcase), "\n"; |
393 | |
394 | # Find the alphabets that are not alphabets in the default locale. |
8ebc5c01 |
395 | |
284102e8 |
396 | { |
397 | no locale; |
8ebc5c01 |
398 | |
284102e8 |
399 | @Neoalpha = (); |
400 | for (keys %UPPER, keys %lower) { |
401 | push(@Neoalpha, $_) if (/\W/); |
402 | } |
8ebc5c01 |
403 | } |
8ebc5c01 |
404 | |
284102e8 |
405 | @Neoalpha = sort @Neoalpha; |
8ebc5c01 |
406 | |
284102e8 |
407 | debug "# Neoalpha = @Neoalpha\n"; |
8ebc5c01 |
408 | |
284102e8 |
409 | if (@Neoalpha == 0) { |
410 | # If we have no Neoalphas the remaining tests are no-ops. |
411 | debug "# no Neoalpha, skipping tests 99..103 for locale '$Locale'\n"; |
412 | next; |
413 | } |
8ebc5c01 |
414 | |
284102e8 |
415 | # Test \w. |
416 | |
417 | debug "# testing 99 with locale '$Locale'\n"; |
418 | { |
419 | my $word = join('', @Neoalpha); |
8ebc5c01 |
420 | |
284102e8 |
421 | $word =~ /^(\w+)$/; |
8ebc5c01 |
422 | |
284102e8 |
423 | if ($1 ne $word) { |
424 | $Problem{99}{$Locale} = 1; |
425 | debug "# failed 99 ($1 vs $word)\n"; |
426 | } |
427 | } |
8ebc5c01 |
428 | |
284102e8 |
429 | # Test #100 removed but to preserve historical test number |
430 | # consistency we do not renumber the remaining tests. |
8ebc5c01 |
431 | |
284102e8 |
432 | # Cross-check whole character set. |
8ebc5c01 |
433 | |
284102e8 |
434 | debug "# testing 101 with locale '$Locale'\n"; |
435 | for (map { chr } 0..255) { |
436 | if ((/\w/ and /\W/) or (/\d/ and /\D/) or (/\s/ and /\S/)) { |
437 | $Problem{101}{$Locale} = 1; |
438 | debug "# failed 101\n"; |
439 | last; |
440 | } |
8ebc5c01 |
441 | } |
442 | |
284102e8 |
443 | # Test for read-only scalars' locale vs non-locale comparisons. |
444 | |
445 | debug "# testing 102 with locale '$Locale'\n"; |
446 | { |
447 | no locale; |
448 | $a = "qwerty"; |
449 | { |
450 | use locale; |
451 | if ($a cmp "qwerty") { |
452 | $Problem{102}{$Locale} = 1; |
453 | debug "# failed 102\n"; |
8ebc5c01 |
454 | } |
455 | } |
456 | } |
457 | |
284102e8 |
458 | # This test must be the last one because its failure is not fatal. |
459 | # The @Alnum_ should be internally consistent. |
460 | # Thanks to Hallvard Furuseth <h.b.furuseth@usit.uio.no> |
461 | # for inventing a way to test for ordering consistency |
462 | # without requiring any particular order. |
463 | # <jhi@iki.fi> |
464 | |
465 | debug "# testing 103 with locale '$Locale'\n"; |
466 | { |
467 | my ($from, $to, $lesser, $greater, |
468 | @test, %test, $test, $yes, $no, $sign); |
469 | |
470 | for (0..9) { |
471 | # Select a slice. |
472 | $from = int(($_*@Alnum_)/10); |
473 | $to = $from + int(@Alnum_/10); |
474 | $to = $#Alnum_ if ($to > $#Alnum_); |
475 | $lesser = join('', @Alnum_[$from..$to]); |
476 | # Select a slice one character on. |
477 | $from++; $to++; |
478 | $to = $#Alnum_ if ($to > $#Alnum_); |
479 | $greater = join('', @Alnum_[$from..$to]); |
480 | ($yes, $no, $sign) = ($lesser lt $greater |
481 | ? (" ", "not ", 1) |
482 | : ("not ", " ", -1)); |
483 | # all these tests should FAIL (return 0). |
484 | # Exact lt or gt cannot be tested because |
485 | # in some locales, say, eacute and E may test equal. |
486 | @test = |
487 | ( |
488 | $no.' ($lesser le $greater)', # 1 |
489 | 'not ($lesser ne $greater)', # 2 |
490 | ' ($lesser eq $greater)', # 3 |
491 | $yes.' ($lesser ge $greater)', # 4 |
492 | $yes.' ($lesser ge $greater)', # 5 |
493 | $yes.' ($greater le $lesser )', # 7 |
494 | 'not ($greater ne $lesser )', # 8 |
495 | ' ($greater eq $lesser )', # 9 |
496 | $no.' ($greater ge $lesser )', # 10 |
497 | 'not (($lesser cmp $greater) == -$sign)' # 12 |
498 | ); |
499 | @test{@test} = 0 x @test; |
500 | $test = 0; |
501 | for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} } |
502 | if ($test) { |
503 | $Problem{103}{$Locale} = 1; |
504 | debug "# failed 103 at:\n"; |
505 | debug "# lesser = '$lesser'\n"; |
506 | debug "# greater = '$greater'\n"; |
507 | debug "# lesser cmp greater = ", $lesser cmp $greater, "\n"; |
508 | debug "# greater cmp lesser = ", $greater cmp $lesser, "\n"; |
509 | debug "# (greater) from = $from, to = $to\n"; |
510 | for my $ti (@test) { |
511 | debugf("# %-40s %-4s", $ti, |
512 | $test{$ti} ? 'FAIL' : 'ok'); |
513 | if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { |
514 | debugf("(%s == %4d)", $1, eval $1); |
515 | } |
516 | debug "\n#"; |
517 | } |
518 | |
519 | last; |
520 | } |
8ebc5c01 |
521 | } |
522 | } |
523 | } |
284102e8 |
524 | |
525 | no locale; |
526 | |
527 | foreach (99..103) { |
528 | if ($Problem{$_}) { |
529 | if ($_ == 103) { |
530 | print "# The failure of test 103 is not necessarily fatal.\n"; |
531 | print "# It usually indicates a problem in the enviroment,\n"; |
532 | print "# not in Perl itself.\n"; |
533 | } |
534 | print "not "; |
8ebc5c01 |
535 | } |
284102e8 |
536 | print "ok $_\n"; |
8ebc5c01 |
537 | } |
fb73857a |
538 | |
284102e8 |
539 | my $didwarn = 0; |
540 | |
541 | foreach (99..103) { |
542 | if ($Problem{$_}) { |
543 | my @f = sort keys %{ $Problem{$_} }; |
544 | my $f = join(" ", @f); |
545 | $f =~ s/(.{50,60}) /$1\n#\t/g; |
546 | warn |
547 | "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", |
548 | "#\t", $f, "\n#\n", |
549 | "# on your system may have errors because the locale test $_\n", |
550 | "# failed in ", (@f == 1 ? "that locale" : "those locales"), |
551 | ".\n"; |
552 | warn <<EOW; |
553 | # |
554 | # If your users are not using these locales you are safe for the moment, |
555 | # but please report this failure first to perlbug\@perl.com using the |
556 | # perlbug script (as described in the INSTALL file) so that the exact |
557 | # details of the failures can be sorted out first and then your operating |
558 | # system supplier can be alerted about these anomalies. |
559 | # |
560 | EOW |
561 | $didwarn = 1; |
fb73857a |
562 | } |
563 | } |
774d564b |
564 | |
284102e8 |
565 | if ($didwarn) { |
566 | my @s; |
567 | |
568 | foreach my $l (@Locale) { |
569 | my $p = 0; |
570 | foreach my $t (99..103) { |
571 | $p++ if $Problem{$t}{$l}; |
8ebc5c01 |
572 | } |
284102e8 |
573 | push @s, $l if $p == 0; |
8ebc5c01 |
574 | } |
284102e8 |
575 | |
576 | my $s = join(" ", @s); |
577 | $s =~ s/(.{50,60}) /$1\n#\t/g; |
578 | |
579 | warn |
580 | "# The following locales\n#\n", |
581 | "#\t", $s, "\n#\n", |
582 | "# tested okay.\n#\n", |
8ebc5c01 |
583 | } |
90248788 |
584 | |
585 | # eof |