Commit | Line | Data |
8ebc5c01 |
1 | #!./perl -wT |
2 | |
3 | BEGIN { |
4 | chdir 't' if -d 't'; |
5 | @INC = '../lib'; |
b002077a |
6 | require Config; import Config; |
97a0514d |
7 | if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) { |
b002077a |
8 | print "1..0\n"; |
9 | exit; |
10 | } |
8ebc5c01 |
11 | } |
12 | |
13 | use strict; |
14 | |
15 | my $have_setlocale = 0; |
16 | eval { |
17 | require POSIX; |
18 | import POSIX ':locale_h'; |
19 | $have_setlocale++; |
20 | }; |
21 | |
6dead956 |
22 | # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" |
f6c6487a |
23 | # and mingw32 uses said silly CRT |
24 | $have_setlocale = 0 if $^O eq 'MSWin32' && $Config{cc} =~ /^(cl|gcc)/i; |
6dead956 |
25 | |
8ada0baa |
26 | # 103 (the last test) may fail but that is okay. |
27 | # (It indicates something broken in the environment, not Perl) |
28 | # Therefore .. only until 102, not 103. |
9fc9f3bf |
29 | print "1..", ($have_setlocale ? 102 : 98), "\n"; |
8ebc5c01 |
30 | |
31 | use vars qw($a |
32 | $English $German $French $Spanish |
33 | @C @English @German @French @Spanish |
34 | $Locale @Locale %iLocale %UPPER %lower @Neoalpha); |
35 | |
36 | $a = 'abc %'; |
37 | |
38 | sub ok { |
39 | my ($n, $result) = @_; |
40 | |
41 | print 'not ' unless ($result); |
42 | print "ok $n\n"; |
43 | } |
44 | |
45 | # First we'll do a lot of taint checking for locales. |
46 | # This is the easiest to test, actually, as any locale, |
47 | # even the default locale will taint under 'use locale'. |
48 | |
49 | sub is_tainted { # hello, camel two. |
3fe9a6f1 |
50 | local $^W; # no warnings 'undef' |
8ebc5c01 |
51 | my $dummy; |
52 | not eval { $dummy = join("", @_), kill 0; 1 } |
53 | } |
54 | |
55 | sub check_taint ($$) { |
56 | ok $_[0], is_tainted($_[1]); |
57 | } |
58 | |
59 | sub check_taint_not ($$) { |
60 | ok $_[0], not is_tainted($_[1]); |
61 | } |
62 | |
63 | use locale; # engage locale and therefore locale taint. |
64 | |
65 | check_taint_not 1, $a; |
66 | |
67 | check_taint 2, uc($a); |
68 | check_taint 3, "\U$a"; |
69 | check_taint 4, ucfirst($a); |
70 | check_taint 5, "\u$a"; |
71 | check_taint 6, lc($a); |
72 | check_taint 7, "\L$a"; |
73 | check_taint 8, lcfirst($a); |
74 | check_taint 9, "\l$a"; |
75 | |
76 | check_taint 10, sprintf('%e', 123.456); |
77 | check_taint 11, sprintf('%f', 123.456); |
78 | check_taint 12, sprintf('%g', 123.456); |
79 | check_taint_not 13, sprintf('%d', 123.456); |
80 | check_taint_not 14, sprintf('%x', 123.456); |
81 | |
82 | $_ = $a; # untaint $_ |
83 | |
84 | $_ = uc($a); # taint $_ |
85 | |
86 | check_taint 15, $_; |
87 | |
88 | /(\w)/; # taint $&, $`, $', $+, $1. |
89 | check_taint 16, $&; |
90 | check_taint 17, $`; |
91 | check_taint 18, $'; |
92 | check_taint 19, $+; |
93 | check_taint 20, $1; |
94 | check_taint_not 21, $2; |
95 | |
96 | /(.)/; # untaint $&, $`, $', $+, $1. |
97 | check_taint_not 22, $&; |
98 | check_taint_not 23, $`; |
99 | check_taint_not 24, $'; |
100 | check_taint_not 25, $+; |
101 | check_taint_not 26, $1; |
102 | check_taint_not 27, $2; |
103 | |
104 | /(\W)/; # taint $&, $`, $', $+, $1. |
105 | check_taint 28, $&; |
106 | check_taint 29, $`; |
107 | check_taint 30, $'; |
108 | check_taint 31, $+; |
109 | check_taint 32, $1; |
110 | check_taint_not 33, $2; |
111 | |
112 | /(\s)/; # taint $&, $`, $', $+, $1. |
113 | check_taint 34, $&; |
114 | check_taint 35, $`; |
115 | check_taint 36, $'; |
116 | check_taint 37, $+; |
117 | check_taint 38, $1; |
118 | check_taint_not 39, $2; |
119 | |
120 | /(\S)/; # taint $&, $`, $', $+, $1. |
121 | check_taint 40, $&; |
122 | check_taint 41, $`; |
123 | check_taint 42, $'; |
124 | check_taint 43, $+; |
125 | check_taint 44, $1; |
126 | check_taint_not 45, $2; |
127 | |
128 | $_ = $a; # untaint $_ |
129 | |
130 | check_taint_not 46, $_; |
131 | |
132 | /(b)/; # this must not taint |
133 | check_taint_not 47, $&; |
134 | check_taint_not 48, $`; |
135 | check_taint_not 49, $'; |
136 | check_taint_not 50, $+; |
137 | check_taint_not 51, $1; |
138 | check_taint_not 52, $2; |
139 | |
140 | $_ = $a; # untaint $_ |
141 | |
142 | check_taint_not 53, $_; |
143 | |
144 | $b = uc($a); # taint $b |
145 | s/(.+)/$b/; # this must taint only the $_ |
146 | |
147 | check_taint 54, $_; |
148 | check_taint_not 55, $&; |
149 | check_taint_not 56, $`; |
150 | check_taint_not 57, $'; |
151 | check_taint_not 58, $+; |
152 | check_taint_not 59, $1; |
153 | check_taint_not 60, $2; |
154 | |
155 | $_ = $a; # untaint $_ |
156 | |
157 | s/(.+)/b/; # this must not taint |
158 | check_taint_not 61, $_; |
159 | check_taint_not 62, $&; |
160 | check_taint_not 63, $`; |
161 | check_taint_not 64, $'; |
162 | check_taint_not 65, $+; |
163 | check_taint_not 66, $1; |
164 | check_taint_not 67, $2; |
165 | |
166 | $b = $a; # untaint $b |
167 | |
168 | ($b = $a) =~ s/\w/$&/; |
169 | check_taint 68, $b; # $b should be tainted. |
170 | check_taint_not 69, $a; # $a should be not. |
171 | |
172 | $_ = $a; # untaint $_ |
173 | |
174 | s/(\w)/\l$1/; # this must taint |
175 | check_taint 70, $_; |
176 | check_taint 71, $&; |
177 | check_taint 72, $`; |
178 | check_taint 73, $'; |
179 | check_taint 74, $+; |
180 | check_taint 75, $1; |
181 | check_taint_not 76, $2; |
182 | |
183 | $_ = $a; # untaint $_ |
184 | |
185 | s/(\w)/\L$1/; # this must taint |
186 | check_taint 77, $_; |
187 | check_taint 78, $&; |
188 | check_taint 79, $`; |
189 | check_taint 80, $'; |
190 | check_taint 81, $+; |
191 | check_taint 82, $1; |
192 | check_taint_not 83, $2; |
193 | |
194 | $_ = $a; # untaint $_ |
195 | |
196 | s/(\w)/\u$1/; # this must taint |
197 | check_taint 84, $_; |
198 | check_taint 85, $&; |
199 | check_taint 86, $`; |
200 | check_taint 87, $'; |
201 | check_taint 88, $+; |
202 | check_taint 89, $1; |
203 | check_taint_not 90, $2; |
204 | |
205 | $_ = $a; # untaint $_ |
206 | |
207 | s/(\w)/\U$1/; # this must taint |
208 | check_taint 91, $_; |
209 | check_taint 92, $&; |
210 | check_taint 93, $`; |
211 | check_taint 94, $'; |
212 | check_taint 95, $+; |
213 | check_taint 96, $1; |
214 | check_taint_not 97, $2; |
215 | |
216 | # After all this tainting $a should be cool. |
217 | |
218 | check_taint_not 98, $a; |
219 | |
220 | # I think we've seen quite enough of taint. |
221 | # Let us do some *real* locale work now, |
222 | # unless setlocale() is missing (i.e. minitest). |
223 | |
224 | exit unless $have_setlocale; |
225 | |
226 | sub getalnum { |
227 | sort grep /\w/, map { chr } 0..255 |
228 | } |
229 | |
230 | sub locatelocale ($$@) { |
231 | my ($lcall, $alnum, @try) = @_; |
232 | |
233 | undef $$lcall; |
234 | |
235 | for (@try) { |
236 | local $^W = 0; # suppress "Subroutine LC_ALL redefined" |
237 | if (setlocale(&LC_ALL, $_)) { |
238 | $$lcall = $_; |
239 | @$alnum = &getalnum; |
240 | last; |
241 | } |
242 | } |
243 | |
244 | @$alnum = () unless (defined $$lcall); |
245 | } |
246 | |
247 | # Find some default locale |
248 | |
249 | locatelocale(\$Locale, \@Locale, qw(C POSIX)); |
250 | |
251 | # Find some English locale |
252 | |
253 | locatelocale(\$English, \@English, |
254 | qw(en_US.ISO8859-1 en_GB.ISO8859-1 |
255 | en en_US en_UK en_IE en_CA en_AU en_NZ |
256 | english english.iso88591 |
257 | american american.iso88591 |
258 | british british.iso88591 |
259 | )); |
260 | |
261 | # Find some German locale |
262 | |
263 | locatelocale(\$German, \@German, |
264 | qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1 |
265 | de de_DE de_AT de_CH |
266 | german german.iso88591)); |
267 | |
268 | # Find some French locale |
269 | |
270 | locatelocale(\$French, \@French, |
271 | qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1 |
272 | fr fr_FR fr_BE fr_CA fr_CH |
273 | french french.iso88591)); |
274 | |
275 | # Find some Spanish locale |
276 | |
277 | locatelocale(\$Spanish, \@Spanish, |
278 | qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1 |
279 | es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1 |
280 | es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1 |
281 | es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1 |
282 | es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1 |
283 | es es_AR es_BO es_CL |
284 | es_CO es_CR es_EC |
285 | es_ES es_GT es_MX |
286 | es_NI es_PA es_PE |
287 | es_PY es_SV es_UY es_VE |
288 | spanish spanish.iso88591)); |
289 | |
290 | # Select the largest of the alpha(num)bets. |
291 | |
292 | ($Locale, @Locale) = ($English, @English) |
545c8fcc |
293 | if (@English > @Locale); |
8ebc5c01 |
294 | ($Locale, @Locale) = ($German, @German) |
545c8fcc |
295 | if (@German > @Locale); |
8ebc5c01 |
296 | ($Locale, @Locale) = ($French, @French) |
545c8fcc |
297 | if (@French > @Locale); |
8ebc5c01 |
298 | ($Locale, @Locale) = ($Spanish, @Spanish) |
545c8fcc |
299 | if (@Spanish > @Locale); |
8ebc5c01 |
300 | |
8ebc5c01 |
301 | { |
302 | local $^W = 0; |
303 | setlocale(&LC_ALL, $Locale); |
304 | } |
305 | |
4599a1de |
306 | # Sort it now that LC_ALL has been set. |
307 | |
308 | @Locale = sort @Locale; |
309 | |
310 | print "# Locale = $Locale\n"; |
311 | print "# Alnum_ = @Locale\n"; |
312 | |
8ebc5c01 |
313 | { |
314 | my $i = 0; |
315 | |
316 | for (@Locale) { |
317 | $iLocale{$_} = $i++; |
318 | } |
319 | } |
320 | |
321 | # Sieve the uppercase and the lowercase. |
322 | |
323 | for (@Locale) { |
324 | if (/[^\d_]/) { # skip digits and the _ |
325 | if (lc eq $_) { |
326 | $UPPER{$_} = uc; |
327 | } else { |
328 | $lower{$_} = lc; |
329 | } |
330 | } |
331 | } |
332 | |
8ebc5c01 |
333 | # Find the alphabets that are not alphabets in the default locale. |
334 | |
335 | { |
336 | no locale; |
337 | |
338 | for (keys %UPPER, keys %lower) { |
339 | push(@Neoalpha, $_) if (/\W/); |
340 | } |
341 | } |
342 | |
343 | @Neoalpha = sort @Neoalpha; |
344 | |
345 | # Test \w. |
346 | |
347 | { |
348 | my $word = join('', @Neoalpha); |
349 | |
350 | $word =~ /^(\w*)$/; |
351 | |
352 | print 'not ' if ($1 ne $word); |
353 | } |
9fc9f3bf |
354 | print "ok 99\n"; |
8ebc5c01 |
355 | |
356 | # Find places where the collation order differs from the default locale. |
357 | |
9fc9f3bf |
358 | print "# testing 100\n"; |
8ebc5c01 |
359 | { |
360 | my (@k, $i, $j, @d); |
361 | |
362 | { |
363 | no locale; |
364 | |
365 | @k = sort (keys %UPPER, keys %lower); |
366 | } |
367 | |
368 | for ($i = 0; $i < @k; $i++) { |
369 | for ($j = $i + 1; $j < @k; $j++) { |
370 | if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) { |
371 | push(@d, [$k[$j], $k[$i]]); |
372 | } |
373 | } |
374 | } |
375 | |
376 | # Cross-check those places. |
377 | |
378 | for (@d) { |
379 | ($i, $j) = @$_; |
380 | if ($i gt $j) { |
9fc9f3bf |
381 | print "# failed 100 at:\n"; |
8ebc5c01 |
382 | print "# i = $i, j = $j, i ", |
383 | $i le $j ? 'le' : 'gt', " j\n"; |
384 | print 'not '; |
385 | last; |
386 | } |
387 | } |
388 | } |
9fc9f3bf |
389 | print "ok 100\n"; |
8ebc5c01 |
390 | |
391 | # Cross-check whole character set. |
392 | |
9fc9f3bf |
393 | print "# testing 101\n"; |
8ebc5c01 |
394 | for (map { chr } 0..255) { |
395 | if (/\w/ and /\W/) { print 'not '; last } |
396 | if (/\d/ and /\D/) { print 'not '; last } |
397 | if (/\s/ and /\S/) { print 'not '; last } |
398 | if (/\w/ and /\D/ and not /_/ and |
399 | not (exists $UPPER{$_} or exists $lower{$_})) { |
9fc9f3bf |
400 | print "# failed 101 at:\n"; |
774d564b |
401 | print "# ", ord($_), " '$_'\n"; |
8ebc5c01 |
402 | print 'not '; |
403 | last; |
404 | } |
405 | } |
9fc9f3bf |
406 | print "ok 101\n"; |
8ebc5c01 |
407 | |
fb73857a |
408 | # Test for read-onlys. |
409 | |
8ada0baa |
410 | print "# testing 102\n"; |
fb73857a |
411 | { |
412 | no locale; |
413 | $a = "qwerty"; |
414 | { |
415 | use locale; |
416 | print "not " if $a cmp "qwerty"; |
417 | } |
418 | } |
419 | print "ok 102\n"; |
420 | |
421 | # This test must be the last one because its failure is not fatal. |
8ebc5c01 |
422 | # The @Locale should be internally consistent. |
90248788 |
423 | # Thanks to Hallvard Furuseth <h.b.furuseth@usit.uio.no> |
424 | # for inventing a way to test for ordering consistency |
425 | # without requiring any particular order. |
8ada0baa |
426 | # <jhi@iki.fi> |
8ebc5c01 |
427 | |
fb73857a |
428 | print "# testing 103\n"; |
8ebc5c01 |
429 | { |
90248788 |
430 | my ($from, $to, $lesser, $greater, @test, %test, $test, $yes, $no, $sign); |
8ebc5c01 |
431 | |
432 | for (0..9) { |
433 | # Select a slice. |
434 | $from = int(($_*@Locale)/10); |
435 | $to = $from + int(@Locale/10); |
436 | $to = $#Locale if ($to > $#Locale); |
437 | $lesser = join('', @Locale[$from..$to]); |
438 | # Select a slice one character on. |
439 | $from++; $to++; |
440 | $to = $#Locale if ($to > $#Locale); |
441 | $greater = join('', @Locale[$from..$to]); |
90248788 |
442 | ($yes, $no, $sign) = ($lesser lt $greater |
443 | ? (" ", "not ", 1) |
444 | : ("not ", " ", -1)); |
445 | # all these tests should FAIL (return 0). |
774d564b |
446 | @test = |
447 | ( |
90248788 |
448 | $no.' ($lesser lt $greater)', # 0 |
449 | $no.' ($lesser le $greater)', # 1 |
fb73857a |
450 | 'not ($lesser ne $greater)', # 2 |
451 | ' ($lesser eq $greater)', # 3 |
90248788 |
452 | $yes.' ($lesser ge $greater)', # 4 |
453 | $yes.' ($lesser gt $greater)', # 5 |
454 | $yes.' ($greater lt $lesser )', # 6 |
455 | $yes.' ($greater le $lesser )', # 7 |
fb73857a |
456 | 'not ($greater ne $lesser )', # 8 |
457 | ' ($greater eq $lesser )', # 9 |
90248788 |
458 | $no.' ($greater ge $lesser )', # 10 |
459 | $no.' ($greater gt $lesser )', # 11 |
460 | 'not (($lesser cmp $greater) == -$sign)' # 12 |
774d564b |
461 | ); |
462 | @test{@test} = 0 x @test; |
463 | $test = 0; |
464 | for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} } |
465 | if ($test) { |
fb73857a |
466 | print "# failed 103 at:\n"; |
774d564b |
467 | print "# lesser = '$lesser'\n"; |
468 | print "# greater = '$greater'\n"; |
90248788 |
469 | print "# lesser cmp greater = ", $lesser cmp $greater, "\n"; |
470 | print "# greater cmp lesser = ", $greater cmp $lesser, "\n"; |
774d564b |
471 | print "# (greater) from = $from, to = $to\n"; |
472 | for my $ti (@test) { |
473 | printf("# %-40s %-4s", $ti, |
474 | $test{$ti} ? 'FAIL' : 'ok'); |
475 | if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { |
476 | printf("(%s == %4d)", $1, eval $1); |
477 | } |
478 | print "\n"; |
479 | } |
480 | |
fb73857a |
481 | warn "The locale definition on your system may have errors.\n"; |
8ebc5c01 |
482 | last; |
483 | } |
484 | } |
485 | } |
90248788 |
486 | |
487 | # eof |