Commit | Line | Data |
8ebc5c01 |
1 | #!./perl -wT |
2 | |
3 | BEGIN { |
4 | chdir 't' if -d 't'; |
5 | @INC = '../lib'; |
b002077a |
6 | require Config; import Config; |
385588b3 |
7 | if ($Config{ccflags} =~ /\bD?NO_LOCALE\b/) { |
b002077a |
8 | print "1..0\n"; |
9 | exit; |
10 | } |
8ebc5c01 |
11 | } |
12 | |
13 | use strict; |
14 | |
15 | my $have_setlocale = 0; |
16 | eval { |
17 | require POSIX; |
18 | import POSIX ':locale_h'; |
19 | $have_setlocale++; |
20 | }; |
21 | |
9fc9f3bf |
22 | print "1..", ($have_setlocale ? 102 : 98), "\n"; |
8ebc5c01 |
23 | |
24 | use vars qw($a |
25 | $English $German $French $Spanish |
26 | @C @English @German @French @Spanish |
27 | $Locale @Locale %iLocale %UPPER %lower @Neoalpha); |
28 | |
29 | $a = 'abc %'; |
30 | |
31 | sub ok { |
32 | my ($n, $result) = @_; |
33 | |
34 | print 'not ' unless ($result); |
35 | print "ok $n\n"; |
36 | } |
37 | |
38 | # First we'll do a lot of taint checking for locales. |
39 | # This is the easiest to test, actually, as any locale, |
40 | # even the default locale will taint under 'use locale'. |
41 | |
42 | sub is_tainted { # hello, camel two. |
43 | my $dummy; |
44 | not eval { $dummy = join("", @_), kill 0; 1 } |
45 | } |
46 | |
47 | sub check_taint ($$) { |
48 | ok $_[0], is_tainted($_[1]); |
49 | } |
50 | |
51 | sub check_taint_not ($$) { |
52 | ok $_[0], not is_tainted($_[1]); |
53 | } |
54 | |
55 | use locale; # engage locale and therefore locale taint. |
56 | |
57 | check_taint_not 1, $a; |
58 | |
59 | check_taint 2, uc($a); |
60 | check_taint 3, "\U$a"; |
61 | check_taint 4, ucfirst($a); |
62 | check_taint 5, "\u$a"; |
63 | check_taint 6, lc($a); |
64 | check_taint 7, "\L$a"; |
65 | check_taint 8, lcfirst($a); |
66 | check_taint 9, "\l$a"; |
67 | |
68 | check_taint 10, sprintf('%e', 123.456); |
69 | check_taint 11, sprintf('%f', 123.456); |
70 | check_taint 12, sprintf('%g', 123.456); |
71 | check_taint_not 13, sprintf('%d', 123.456); |
72 | check_taint_not 14, sprintf('%x', 123.456); |
73 | |
74 | $_ = $a; # untaint $_ |
75 | |
76 | $_ = uc($a); # taint $_ |
77 | |
78 | check_taint 15, $_; |
79 | |
80 | /(\w)/; # taint $&, $`, $', $+, $1. |
81 | check_taint 16, $&; |
82 | check_taint 17, $`; |
83 | check_taint 18, $'; |
84 | check_taint 19, $+; |
85 | check_taint 20, $1; |
86 | check_taint_not 21, $2; |
87 | |
88 | /(.)/; # untaint $&, $`, $', $+, $1. |
89 | check_taint_not 22, $&; |
90 | check_taint_not 23, $`; |
91 | check_taint_not 24, $'; |
92 | check_taint_not 25, $+; |
93 | check_taint_not 26, $1; |
94 | check_taint_not 27, $2; |
95 | |
96 | /(\W)/; # taint $&, $`, $', $+, $1. |
97 | check_taint 28, $&; |
98 | check_taint 29, $`; |
99 | check_taint 30, $'; |
100 | check_taint 31, $+; |
101 | check_taint 32, $1; |
102 | check_taint_not 33, $2; |
103 | |
104 | /(\s)/; # taint $&, $`, $', $+, $1. |
105 | check_taint 34, $&; |
106 | check_taint 35, $`; |
107 | check_taint 36, $'; |
108 | check_taint 37, $+; |
109 | check_taint 38, $1; |
110 | check_taint_not 39, $2; |
111 | |
112 | /(\S)/; # taint $&, $`, $', $+, $1. |
113 | check_taint 40, $&; |
114 | check_taint 41, $`; |
115 | check_taint 42, $'; |
116 | check_taint 43, $+; |
117 | check_taint 44, $1; |
118 | check_taint_not 45, $2; |
119 | |
120 | $_ = $a; # untaint $_ |
121 | |
122 | check_taint_not 46, $_; |
123 | |
124 | /(b)/; # this must not taint |
125 | check_taint_not 47, $&; |
126 | check_taint_not 48, $`; |
127 | check_taint_not 49, $'; |
128 | check_taint_not 50, $+; |
129 | check_taint_not 51, $1; |
130 | check_taint_not 52, $2; |
131 | |
132 | $_ = $a; # untaint $_ |
133 | |
134 | check_taint_not 53, $_; |
135 | |
136 | $b = uc($a); # taint $b |
137 | s/(.+)/$b/; # this must taint only the $_ |
138 | |
139 | check_taint 54, $_; |
140 | check_taint_not 55, $&; |
141 | check_taint_not 56, $`; |
142 | check_taint_not 57, $'; |
143 | check_taint_not 58, $+; |
144 | check_taint_not 59, $1; |
145 | check_taint_not 60, $2; |
146 | |
147 | $_ = $a; # untaint $_ |
148 | |
149 | s/(.+)/b/; # this must not taint |
150 | check_taint_not 61, $_; |
151 | check_taint_not 62, $&; |
152 | check_taint_not 63, $`; |
153 | check_taint_not 64, $'; |
154 | check_taint_not 65, $+; |
155 | check_taint_not 66, $1; |
156 | check_taint_not 67, $2; |
157 | |
158 | $b = $a; # untaint $b |
159 | |
160 | ($b = $a) =~ s/\w/$&/; |
161 | check_taint 68, $b; # $b should be tainted. |
162 | check_taint_not 69, $a; # $a should be not. |
163 | |
164 | $_ = $a; # untaint $_ |
165 | |
166 | s/(\w)/\l$1/; # this must taint |
167 | check_taint 70, $_; |
168 | check_taint 71, $&; |
169 | check_taint 72, $`; |
170 | check_taint 73, $'; |
171 | check_taint 74, $+; |
172 | check_taint 75, $1; |
173 | check_taint_not 76, $2; |
174 | |
175 | $_ = $a; # untaint $_ |
176 | |
177 | s/(\w)/\L$1/; # this must taint |
178 | check_taint 77, $_; |
179 | check_taint 78, $&; |
180 | check_taint 79, $`; |
181 | check_taint 80, $'; |
182 | check_taint 81, $+; |
183 | check_taint 82, $1; |
184 | check_taint_not 83, $2; |
185 | |
186 | $_ = $a; # untaint $_ |
187 | |
188 | s/(\w)/\u$1/; # this must taint |
189 | check_taint 84, $_; |
190 | check_taint 85, $&; |
191 | check_taint 86, $`; |
192 | check_taint 87, $'; |
193 | check_taint 88, $+; |
194 | check_taint 89, $1; |
195 | check_taint_not 90, $2; |
196 | |
197 | $_ = $a; # untaint $_ |
198 | |
199 | s/(\w)/\U$1/; # this must taint |
200 | check_taint 91, $_; |
201 | check_taint 92, $&; |
202 | check_taint 93, $`; |
203 | check_taint 94, $'; |
204 | check_taint 95, $+; |
205 | check_taint 96, $1; |
206 | check_taint_not 97, $2; |
207 | |
208 | # After all this tainting $a should be cool. |
209 | |
210 | check_taint_not 98, $a; |
211 | |
212 | # I think we've seen quite enough of taint. |
213 | # Let us do some *real* locale work now, |
214 | # unless setlocale() is missing (i.e. minitest). |
215 | |
216 | exit unless $have_setlocale; |
217 | |
218 | sub getalnum { |
219 | sort grep /\w/, map { chr } 0..255 |
220 | } |
221 | |
222 | sub locatelocale ($$@) { |
223 | my ($lcall, $alnum, @try) = @_; |
224 | |
225 | undef $$lcall; |
226 | |
227 | for (@try) { |
228 | local $^W = 0; # suppress "Subroutine LC_ALL redefined" |
229 | if (setlocale(&LC_ALL, $_)) { |
230 | $$lcall = $_; |
231 | @$alnum = &getalnum; |
232 | last; |
233 | } |
234 | } |
235 | |
236 | @$alnum = () unless (defined $$lcall); |
237 | } |
238 | |
239 | # Find some default locale |
240 | |
241 | locatelocale(\$Locale, \@Locale, qw(C POSIX)); |
242 | |
243 | # Find some English locale |
244 | |
245 | locatelocale(\$English, \@English, |
246 | qw(en_US.ISO8859-1 en_GB.ISO8859-1 |
247 | en en_US en_UK en_IE en_CA en_AU en_NZ |
248 | english english.iso88591 |
249 | american american.iso88591 |
250 | british british.iso88591 |
251 | )); |
252 | |
253 | # Find some German locale |
254 | |
255 | locatelocale(\$German, \@German, |
256 | qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1 |
257 | de de_DE de_AT de_CH |
258 | german german.iso88591)); |
259 | |
260 | # Find some French locale |
261 | |
262 | locatelocale(\$French, \@French, |
263 | qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1 |
264 | fr fr_FR fr_BE fr_CA fr_CH |
265 | french french.iso88591)); |
266 | |
267 | # Find some Spanish locale |
268 | |
269 | locatelocale(\$Spanish, \@Spanish, |
270 | qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1 |
271 | es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1 |
272 | es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1 |
273 | es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1 |
274 | es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1 |
275 | es es_AR es_BO es_CL |
276 | es_CO es_CR es_EC |
277 | es_ES es_GT es_MX |
278 | es_NI es_PA es_PE |
279 | es_PY es_SV es_UY es_VE |
280 | spanish spanish.iso88591)); |
281 | |
282 | # Select the largest of the alpha(num)bets. |
283 | |
284 | ($Locale, @Locale) = ($English, @English) |
285 | if (length(@English) > length(@Locale)); |
286 | ($Locale, @Locale) = ($German, @German) |
287 | if (length(@German) > length(@Locale)); |
288 | ($Locale, @Locale) = ($French, @French) |
289 | if (length(@French) > length(@Locale)); |
290 | ($Locale, @Locale) = ($Spanish, @Spanish) |
291 | if (length(@Spanish) > length(@Locale)); |
292 | |
293 | print "# Locale = $Locale\n"; |
294 | print "# Alnum_ = @Locale\n"; |
295 | |
296 | { |
297 | local $^W = 0; |
298 | setlocale(&LC_ALL, $Locale); |
299 | } |
300 | |
301 | { |
302 | my $i = 0; |
303 | |
304 | for (@Locale) { |
305 | $iLocale{$_} = $i++; |
306 | } |
307 | } |
308 | |
309 | # Sieve the uppercase and the lowercase. |
310 | |
311 | for (@Locale) { |
312 | if (/[^\d_]/) { # skip digits and the _ |
313 | if (lc eq $_) { |
314 | $UPPER{$_} = uc; |
315 | } else { |
316 | $lower{$_} = lc; |
317 | } |
318 | } |
319 | } |
320 | |
8ebc5c01 |
321 | # Find the alphabets that are not alphabets in the default locale. |
322 | |
323 | { |
324 | no locale; |
325 | |
326 | for (keys %UPPER, keys %lower) { |
327 | push(@Neoalpha, $_) if (/\W/); |
328 | } |
329 | } |
330 | |
331 | @Neoalpha = sort @Neoalpha; |
332 | |
333 | # Test \w. |
334 | |
335 | { |
336 | my $word = join('', @Neoalpha); |
337 | |
338 | $word =~ /^(\w*)$/; |
339 | |
340 | print 'not ' if ($1 ne $word); |
341 | } |
9fc9f3bf |
342 | print "ok 99\n"; |
8ebc5c01 |
343 | |
344 | # Find places where the collation order differs from the default locale. |
345 | |
9fc9f3bf |
346 | print "# testing 100\n"; |
8ebc5c01 |
347 | { |
348 | my (@k, $i, $j, @d); |
349 | |
350 | { |
351 | no locale; |
352 | |
353 | @k = sort (keys %UPPER, keys %lower); |
354 | } |
355 | |
356 | for ($i = 0; $i < @k; $i++) { |
357 | for ($j = $i + 1; $j < @k; $j++) { |
358 | if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) { |
359 | push(@d, [$k[$j], $k[$i]]); |
360 | } |
361 | } |
362 | } |
363 | |
364 | # Cross-check those places. |
365 | |
366 | for (@d) { |
367 | ($i, $j) = @$_; |
368 | if ($i gt $j) { |
9fc9f3bf |
369 | print "# failed 100 at:\n"; |
8ebc5c01 |
370 | print "# i = $i, j = $j, i ", |
371 | $i le $j ? 'le' : 'gt', " j\n"; |
372 | print 'not '; |
373 | last; |
374 | } |
375 | } |
376 | } |
9fc9f3bf |
377 | print "ok 100\n"; |
8ebc5c01 |
378 | |
379 | # Cross-check whole character set. |
380 | |
9fc9f3bf |
381 | print "# testing 101\n"; |
8ebc5c01 |
382 | for (map { chr } 0..255) { |
383 | if (/\w/ and /\W/) { print 'not '; last } |
384 | if (/\d/ and /\D/) { print 'not '; last } |
385 | if (/\s/ and /\S/) { print 'not '; last } |
386 | if (/\w/ and /\D/ and not /_/ and |
387 | not (exists $UPPER{$_} or exists $lower{$_})) { |
9fc9f3bf |
388 | print "# failed 101 at:\n"; |
774d564b |
389 | print "# ", ord($_), " '$_'\n"; |
8ebc5c01 |
390 | print 'not '; |
391 | last; |
392 | } |
393 | } |
9fc9f3bf |
394 | print "ok 101\n"; |
8ebc5c01 |
395 | |
396 | # The @Locale should be internally consistent. |
397 | |
9fc9f3bf |
398 | print "# testing 102\n"; |
8ebc5c01 |
399 | { |
774d564b |
400 | my ($from, $to, $lesser, $greater, @test, %test, $test); |
8ebc5c01 |
401 | |
402 | for (0..9) { |
403 | # Select a slice. |
404 | $from = int(($_*@Locale)/10); |
405 | $to = $from + int(@Locale/10); |
406 | $to = $#Locale if ($to > $#Locale); |
407 | $lesser = join('', @Locale[$from..$to]); |
408 | # Select a slice one character on. |
409 | $from++; $to++; |
410 | $to = $#Locale if ($to > $#Locale); |
411 | $greater = join('', @Locale[$from..$to]); |
774d564b |
412 | @test = |
413 | ( |
414 | 'not ($lesser lt $greater)', # 0 |
415 | 'not ($lesser le $greater)', # 1 |
416 | 'not ($lesser ne $greater)', # 2 |
417 | ' ($lesser eq $greater)', # 3 |
418 | ' ($lesser ge $greater)', # 4 |
419 | ' ($lesser gt $greater)', # 5 |
420 | ' ($greater lt $lesser )', # 6 |
421 | ' ($greater le $lesser )', # 7 |
422 | 'not ($greater ne $lesser )', # 8 |
423 | ' ($greater eq $lesser )', # 9 |
424 | 'not ($greater ge $lesser )', # 10 |
425 | 'not ($greater gt $lesser )', # 11 |
426 | # Well, these two are sort of redundant |
427 | # because @Locale was derived using cmp. |
428 | 'not (($lesser cmp $greater) == -1)', # 12 |
429 | 'not (($greater cmp $lesser ) == 1)' # 13 |
430 | ); |
431 | @test{@test} = 0 x @test; |
432 | $test = 0; |
433 | for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} } |
434 | if ($test) { |
9fc9f3bf |
435 | print "# failed 102 at:\n"; |
774d564b |
436 | print "# lesser = '$lesser'\n"; |
437 | print "# greater = '$greater'\n"; |
438 | print "# (greater) from = $from, to = $to\n"; |
439 | for my $ti (@test) { |
440 | printf("# %-40s %-4s", $ti, |
441 | $test{$ti} ? 'FAIL' : 'ok'); |
442 | if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { |
443 | printf("(%s == %4d)", $1, eval $1); |
444 | } |
445 | print "\n"; |
446 | } |
447 | |
8ebc5c01 |
448 | print 'not '; |
449 | last; |
450 | } |
451 | } |
452 | } |
9fc9f3bf |
453 | print "ok 102\n"; |