Commit | Line | Data |
8ebc5c01 |
1 | #!./perl -wT |
2 | |
3 | BEGIN { |
4 | chdir 't' if -d 't'; |
5 | @INC = '../lib'; |
b002077a |
6 | require Config; import Config; |
97a0514d |
7 | if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) { |
b002077a |
8 | print "1..0\n"; |
9 | exit; |
10 | } |
8ebc5c01 |
11 | } |
12 | |
13 | use strict; |
14 | |
15 | my $have_setlocale = 0; |
16 | eval { |
17 | require POSIX; |
18 | import POSIX ':locale_h'; |
19 | $have_setlocale++; |
20 | }; |
21 | |
9fc9f3bf |
22 | print "1..", ($have_setlocale ? 102 : 98), "\n"; |
8ebc5c01 |
23 | |
24 | use vars qw($a |
25 | $English $German $French $Spanish |
26 | @C @English @German @French @Spanish |
27 | $Locale @Locale %iLocale %UPPER %lower @Neoalpha); |
28 | |
29 | $a = 'abc %'; |
30 | |
31 | sub ok { |
32 | my ($n, $result) = @_; |
33 | |
34 | print 'not ' unless ($result); |
35 | print "ok $n\n"; |
36 | } |
37 | |
38 | # First we'll do a lot of taint checking for locales. |
39 | # This is the easiest to test, actually, as any locale, |
40 | # even the default locale will taint under 'use locale'. |
41 | |
42 | sub is_tainted { # hello, camel two. |
3fe9a6f1 |
43 | local $^W; # no warnings 'undef' |
8ebc5c01 |
44 | my $dummy; |
45 | not eval { $dummy = join("", @_), kill 0; 1 } |
46 | } |
47 | |
48 | sub check_taint ($$) { |
49 | ok $_[0], is_tainted($_[1]); |
50 | } |
51 | |
52 | sub check_taint_not ($$) { |
53 | ok $_[0], not is_tainted($_[1]); |
54 | } |
55 | |
56 | use locale; # engage locale and therefore locale taint. |
57 | |
58 | check_taint_not 1, $a; |
59 | |
60 | check_taint 2, uc($a); |
61 | check_taint 3, "\U$a"; |
62 | check_taint 4, ucfirst($a); |
63 | check_taint 5, "\u$a"; |
64 | check_taint 6, lc($a); |
65 | check_taint 7, "\L$a"; |
66 | check_taint 8, lcfirst($a); |
67 | check_taint 9, "\l$a"; |
68 | |
69 | check_taint 10, sprintf('%e', 123.456); |
70 | check_taint 11, sprintf('%f', 123.456); |
71 | check_taint 12, sprintf('%g', 123.456); |
72 | check_taint_not 13, sprintf('%d', 123.456); |
73 | check_taint_not 14, sprintf('%x', 123.456); |
74 | |
75 | $_ = $a; # untaint $_ |
76 | |
77 | $_ = uc($a); # taint $_ |
78 | |
79 | check_taint 15, $_; |
80 | |
81 | /(\w)/; # taint $&, $`, $', $+, $1. |
82 | check_taint 16, $&; |
83 | check_taint 17, $`; |
84 | check_taint 18, $'; |
85 | check_taint 19, $+; |
86 | check_taint 20, $1; |
87 | check_taint_not 21, $2; |
88 | |
89 | /(.)/; # untaint $&, $`, $', $+, $1. |
90 | check_taint_not 22, $&; |
91 | check_taint_not 23, $`; |
92 | check_taint_not 24, $'; |
93 | check_taint_not 25, $+; |
94 | check_taint_not 26, $1; |
95 | check_taint_not 27, $2; |
96 | |
97 | /(\W)/; # taint $&, $`, $', $+, $1. |
98 | check_taint 28, $&; |
99 | check_taint 29, $`; |
100 | check_taint 30, $'; |
101 | check_taint 31, $+; |
102 | check_taint 32, $1; |
103 | check_taint_not 33, $2; |
104 | |
105 | /(\s)/; # taint $&, $`, $', $+, $1. |
106 | check_taint 34, $&; |
107 | check_taint 35, $`; |
108 | check_taint 36, $'; |
109 | check_taint 37, $+; |
110 | check_taint 38, $1; |
111 | check_taint_not 39, $2; |
112 | |
113 | /(\S)/; # taint $&, $`, $', $+, $1. |
114 | check_taint 40, $&; |
115 | check_taint 41, $`; |
116 | check_taint 42, $'; |
117 | check_taint 43, $+; |
118 | check_taint 44, $1; |
119 | check_taint_not 45, $2; |
120 | |
121 | $_ = $a; # untaint $_ |
122 | |
123 | check_taint_not 46, $_; |
124 | |
125 | /(b)/; # this must not taint |
126 | check_taint_not 47, $&; |
127 | check_taint_not 48, $`; |
128 | check_taint_not 49, $'; |
129 | check_taint_not 50, $+; |
130 | check_taint_not 51, $1; |
131 | check_taint_not 52, $2; |
132 | |
133 | $_ = $a; # untaint $_ |
134 | |
135 | check_taint_not 53, $_; |
136 | |
137 | $b = uc($a); # taint $b |
138 | s/(.+)/$b/; # this must taint only the $_ |
139 | |
140 | check_taint 54, $_; |
141 | check_taint_not 55, $&; |
142 | check_taint_not 56, $`; |
143 | check_taint_not 57, $'; |
144 | check_taint_not 58, $+; |
145 | check_taint_not 59, $1; |
146 | check_taint_not 60, $2; |
147 | |
148 | $_ = $a; # untaint $_ |
149 | |
150 | s/(.+)/b/; # this must not taint |
151 | check_taint_not 61, $_; |
152 | check_taint_not 62, $&; |
153 | check_taint_not 63, $`; |
154 | check_taint_not 64, $'; |
155 | check_taint_not 65, $+; |
156 | check_taint_not 66, $1; |
157 | check_taint_not 67, $2; |
158 | |
159 | $b = $a; # untaint $b |
160 | |
161 | ($b = $a) =~ s/\w/$&/; |
162 | check_taint 68, $b; # $b should be tainted. |
163 | check_taint_not 69, $a; # $a should be not. |
164 | |
165 | $_ = $a; # untaint $_ |
166 | |
167 | s/(\w)/\l$1/; # this must taint |
168 | check_taint 70, $_; |
169 | check_taint 71, $&; |
170 | check_taint 72, $`; |
171 | check_taint 73, $'; |
172 | check_taint 74, $+; |
173 | check_taint 75, $1; |
174 | check_taint_not 76, $2; |
175 | |
176 | $_ = $a; # untaint $_ |
177 | |
178 | s/(\w)/\L$1/; # this must taint |
179 | check_taint 77, $_; |
180 | check_taint 78, $&; |
181 | check_taint 79, $`; |
182 | check_taint 80, $'; |
183 | check_taint 81, $+; |
184 | check_taint 82, $1; |
185 | check_taint_not 83, $2; |
186 | |
187 | $_ = $a; # untaint $_ |
188 | |
189 | s/(\w)/\u$1/; # this must taint |
190 | check_taint 84, $_; |
191 | check_taint 85, $&; |
192 | check_taint 86, $`; |
193 | check_taint 87, $'; |
194 | check_taint 88, $+; |
195 | check_taint 89, $1; |
196 | check_taint_not 90, $2; |
197 | |
198 | $_ = $a; # untaint $_ |
199 | |
200 | s/(\w)/\U$1/; # this must taint |
201 | check_taint 91, $_; |
202 | check_taint 92, $&; |
203 | check_taint 93, $`; |
204 | check_taint 94, $'; |
205 | check_taint 95, $+; |
206 | check_taint 96, $1; |
207 | check_taint_not 97, $2; |
208 | |
209 | # After all this tainting $a should be cool. |
210 | |
211 | check_taint_not 98, $a; |
212 | |
213 | # I think we've seen quite enough of taint. |
214 | # Let us do some *real* locale work now, |
215 | # unless setlocale() is missing (i.e. minitest). |
216 | |
217 | exit unless $have_setlocale; |
218 | |
219 | sub getalnum { |
220 | sort grep /\w/, map { chr } 0..255 |
221 | } |
222 | |
223 | sub locatelocale ($$@) { |
224 | my ($lcall, $alnum, @try) = @_; |
225 | |
226 | undef $$lcall; |
227 | |
228 | for (@try) { |
229 | local $^W = 0; # suppress "Subroutine LC_ALL redefined" |
230 | if (setlocale(&LC_ALL, $_)) { |
231 | $$lcall = $_; |
232 | @$alnum = &getalnum; |
233 | last; |
234 | } |
235 | } |
236 | |
237 | @$alnum = () unless (defined $$lcall); |
238 | } |
239 | |
240 | # Find some default locale |
241 | |
242 | locatelocale(\$Locale, \@Locale, qw(C POSIX)); |
243 | |
244 | # Find some English locale |
245 | |
246 | locatelocale(\$English, \@English, |
247 | qw(en_US.ISO8859-1 en_GB.ISO8859-1 |
248 | en en_US en_UK en_IE en_CA en_AU en_NZ |
249 | english english.iso88591 |
250 | american american.iso88591 |
251 | british british.iso88591 |
252 | )); |
253 | |
254 | # Find some German locale |
255 | |
256 | locatelocale(\$German, \@German, |
257 | qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1 |
258 | de de_DE de_AT de_CH |
259 | german german.iso88591)); |
260 | |
261 | # Find some French locale |
262 | |
263 | locatelocale(\$French, \@French, |
264 | qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1 |
265 | fr fr_FR fr_BE fr_CA fr_CH |
266 | french french.iso88591)); |
267 | |
268 | # Find some Spanish locale |
269 | |
270 | locatelocale(\$Spanish, \@Spanish, |
271 | qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1 |
272 | es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1 |
273 | es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1 |
274 | es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1 |
275 | es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1 |
276 | es es_AR es_BO es_CL |
277 | es_CO es_CR es_EC |
278 | es_ES es_GT es_MX |
279 | es_NI es_PA es_PE |
280 | es_PY es_SV es_UY es_VE |
281 | spanish spanish.iso88591)); |
282 | |
283 | # Select the largest of the alpha(num)bets. |
284 | |
285 | ($Locale, @Locale) = ($English, @English) |
286 | if (length(@English) > length(@Locale)); |
287 | ($Locale, @Locale) = ($German, @German) |
288 | if (length(@German) > length(@Locale)); |
289 | ($Locale, @Locale) = ($French, @French) |
290 | if (length(@French) > length(@Locale)); |
291 | ($Locale, @Locale) = ($Spanish, @Spanish) |
292 | if (length(@Spanish) > length(@Locale)); |
293 | |
294 | print "# Locale = $Locale\n"; |
295 | print "# Alnum_ = @Locale\n"; |
296 | |
297 | { |
298 | local $^W = 0; |
299 | setlocale(&LC_ALL, $Locale); |
300 | } |
301 | |
302 | { |
303 | my $i = 0; |
304 | |
305 | for (@Locale) { |
306 | $iLocale{$_} = $i++; |
307 | } |
308 | } |
309 | |
310 | # Sieve the uppercase and the lowercase. |
311 | |
312 | for (@Locale) { |
313 | if (/[^\d_]/) { # skip digits and the _ |
314 | if (lc eq $_) { |
315 | $UPPER{$_} = uc; |
316 | } else { |
317 | $lower{$_} = lc; |
318 | } |
319 | } |
320 | } |
321 | |
8ebc5c01 |
322 | # Find the alphabets that are not alphabets in the default locale. |
323 | |
324 | { |
325 | no locale; |
326 | |
327 | for (keys %UPPER, keys %lower) { |
328 | push(@Neoalpha, $_) if (/\W/); |
329 | } |
330 | } |
331 | |
332 | @Neoalpha = sort @Neoalpha; |
333 | |
334 | # Test \w. |
335 | |
336 | { |
337 | my $word = join('', @Neoalpha); |
338 | |
339 | $word =~ /^(\w*)$/; |
340 | |
341 | print 'not ' if ($1 ne $word); |
342 | } |
9fc9f3bf |
343 | print "ok 99\n"; |
8ebc5c01 |
344 | |
345 | # Find places where the collation order differs from the default locale. |
346 | |
9fc9f3bf |
347 | print "# testing 100\n"; |
8ebc5c01 |
348 | { |
349 | my (@k, $i, $j, @d); |
350 | |
351 | { |
352 | no locale; |
353 | |
354 | @k = sort (keys %UPPER, keys %lower); |
355 | } |
356 | |
357 | for ($i = 0; $i < @k; $i++) { |
358 | for ($j = $i + 1; $j < @k; $j++) { |
359 | if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) { |
360 | push(@d, [$k[$j], $k[$i]]); |
361 | } |
362 | } |
363 | } |
364 | |
365 | # Cross-check those places. |
366 | |
367 | for (@d) { |
368 | ($i, $j) = @$_; |
369 | if ($i gt $j) { |
9fc9f3bf |
370 | print "# failed 100 at:\n"; |
8ebc5c01 |
371 | print "# i = $i, j = $j, i ", |
372 | $i le $j ? 'le' : 'gt', " j\n"; |
373 | print 'not '; |
374 | last; |
375 | } |
376 | } |
377 | } |
9fc9f3bf |
378 | print "ok 100\n"; |
8ebc5c01 |
379 | |
380 | # Cross-check whole character set. |
381 | |
9fc9f3bf |
382 | print "# testing 101\n"; |
8ebc5c01 |
383 | for (map { chr } 0..255) { |
384 | if (/\w/ and /\W/) { print 'not '; last } |
385 | if (/\d/ and /\D/) { print 'not '; last } |
386 | if (/\s/ and /\S/) { print 'not '; last } |
387 | if (/\w/ and /\D/ and not /_/ and |
388 | not (exists $UPPER{$_} or exists $lower{$_})) { |
9fc9f3bf |
389 | print "# failed 101 at:\n"; |
774d564b |
390 | print "# ", ord($_), " '$_'\n"; |
8ebc5c01 |
391 | print 'not '; |
392 | last; |
393 | } |
394 | } |
9fc9f3bf |
395 | print "ok 101\n"; |
8ebc5c01 |
396 | |
397 | # The @Locale should be internally consistent. |
398 | |
9fc9f3bf |
399 | print "# testing 102\n"; |
8ebc5c01 |
400 | { |
774d564b |
401 | my ($from, $to, $lesser, $greater, @test, %test, $test); |
8ebc5c01 |
402 | |
403 | for (0..9) { |
404 | # Select a slice. |
405 | $from = int(($_*@Locale)/10); |
406 | $to = $from + int(@Locale/10); |
407 | $to = $#Locale if ($to > $#Locale); |
408 | $lesser = join('', @Locale[$from..$to]); |
409 | # Select a slice one character on. |
410 | $from++; $to++; |
411 | $to = $#Locale if ($to > $#Locale); |
412 | $greater = join('', @Locale[$from..$to]); |
774d564b |
413 | @test = |
414 | ( |
415 | 'not ($lesser lt $greater)', # 0 |
416 | 'not ($lesser le $greater)', # 1 |
417 | 'not ($lesser ne $greater)', # 2 |
418 | ' ($lesser eq $greater)', # 3 |
419 | ' ($lesser ge $greater)', # 4 |
420 | ' ($lesser gt $greater)', # 5 |
421 | ' ($greater lt $lesser )', # 6 |
422 | ' ($greater le $lesser )', # 7 |
423 | 'not ($greater ne $lesser )', # 8 |
424 | ' ($greater eq $lesser )', # 9 |
425 | 'not ($greater ge $lesser )', # 10 |
426 | 'not ($greater gt $lesser )', # 11 |
427 | # Well, these two are sort of redundant |
428 | # because @Locale was derived using cmp. |
429 | 'not (($lesser cmp $greater) == -1)', # 12 |
430 | 'not (($greater cmp $lesser ) == 1)' # 13 |
431 | ); |
432 | @test{@test} = 0 x @test; |
433 | $test = 0; |
434 | for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} } |
435 | if ($test) { |
9fc9f3bf |
436 | print "# failed 102 at:\n"; |
774d564b |
437 | print "# lesser = '$lesser'\n"; |
438 | print "# greater = '$greater'\n"; |
439 | print "# (greater) from = $from, to = $to\n"; |
440 | for my $ti (@test) { |
441 | printf("# %-40s %-4s", $ti, |
442 | $test{$ti} ? 'FAIL' : 'ok'); |
443 | if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { |
444 | printf("(%s == %4d)", $1, eval $1); |
445 | } |
446 | print "\n"; |
447 | } |
448 | |
8ebc5c01 |
449 | print 'not '; |
450 | last; |
451 | } |
452 | } |
453 | } |
9fc9f3bf |
454 | print "ok 102\n"; |