Commit | Line | Data |
8ebc5c01 |
1 | #!./perl -wT |
2 | |
3 | BEGIN { |
4 | chdir 't' if -d 't'; |
5 | @INC = '../lib'; |
6 | } |
7 | |
8 | use strict; |
9 | |
10 | my $have_setlocale = 0; |
11 | eval { |
12 | require POSIX; |
13 | import POSIX ':locale_h'; |
14 | $have_setlocale++; |
15 | }; |
16 | |
17 | print "1..", ($have_setlocale ? 104 : 98), "\n"; |
18 | |
19 | use vars qw($a |
20 | $English $German $French $Spanish |
21 | @C @English @German @French @Spanish |
22 | $Locale @Locale %iLocale %UPPER %lower @Neoalpha); |
23 | |
24 | $a = 'abc %'; |
25 | |
26 | sub ok { |
27 | my ($n, $result) = @_; |
28 | |
29 | print 'not ' unless ($result); |
30 | print "ok $n\n"; |
31 | } |
32 | |
33 | # First we'll do a lot of taint checking for locales. |
34 | # This is the easiest to test, actually, as any locale, |
35 | # even the default locale will taint under 'use locale'. |
36 | |
37 | sub is_tainted { # hello, camel two. |
38 | my $dummy; |
39 | not eval { $dummy = join("", @_), kill 0; 1 } |
40 | } |
41 | |
42 | sub check_taint ($$) { |
43 | ok $_[0], is_tainted($_[1]); |
44 | } |
45 | |
46 | sub check_taint_not ($$) { |
47 | ok $_[0], not is_tainted($_[1]); |
48 | } |
49 | |
50 | use locale; # engage locale and therefore locale taint. |
51 | |
52 | check_taint_not 1, $a; |
53 | |
54 | check_taint 2, uc($a); |
55 | check_taint 3, "\U$a"; |
56 | check_taint 4, ucfirst($a); |
57 | check_taint 5, "\u$a"; |
58 | check_taint 6, lc($a); |
59 | check_taint 7, "\L$a"; |
60 | check_taint 8, lcfirst($a); |
61 | check_taint 9, "\l$a"; |
62 | |
63 | check_taint 10, sprintf('%e', 123.456); |
64 | check_taint 11, sprintf('%f', 123.456); |
65 | check_taint 12, sprintf('%g', 123.456); |
66 | check_taint_not 13, sprintf('%d', 123.456); |
67 | check_taint_not 14, sprintf('%x', 123.456); |
68 | |
69 | $_ = $a; # untaint $_ |
70 | |
71 | $_ = uc($a); # taint $_ |
72 | |
73 | check_taint 15, $_; |
74 | |
75 | /(\w)/; # taint $&, $`, $', $+, $1. |
76 | check_taint 16, $&; |
77 | check_taint 17, $`; |
78 | check_taint 18, $'; |
79 | check_taint 19, $+; |
80 | check_taint 20, $1; |
81 | check_taint_not 21, $2; |
82 | |
83 | /(.)/; # untaint $&, $`, $', $+, $1. |
84 | check_taint_not 22, $&; |
85 | check_taint_not 23, $`; |
86 | check_taint_not 24, $'; |
87 | check_taint_not 25, $+; |
88 | check_taint_not 26, $1; |
89 | check_taint_not 27, $2; |
90 | |
91 | /(\W)/; # taint $&, $`, $', $+, $1. |
92 | check_taint 28, $&; |
93 | check_taint 29, $`; |
94 | check_taint 30, $'; |
95 | check_taint 31, $+; |
96 | check_taint 32, $1; |
97 | check_taint_not 33, $2; |
98 | |
99 | /(\s)/; # taint $&, $`, $', $+, $1. |
100 | check_taint 34, $&; |
101 | check_taint 35, $`; |
102 | check_taint 36, $'; |
103 | check_taint 37, $+; |
104 | check_taint 38, $1; |
105 | check_taint_not 39, $2; |
106 | |
107 | /(\S)/; # taint $&, $`, $', $+, $1. |
108 | check_taint 40, $&; |
109 | check_taint 41, $`; |
110 | check_taint 42, $'; |
111 | check_taint 43, $+; |
112 | check_taint 44, $1; |
113 | check_taint_not 45, $2; |
114 | |
115 | $_ = $a; # untaint $_ |
116 | |
117 | check_taint_not 46, $_; |
118 | |
119 | /(b)/; # this must not taint |
120 | check_taint_not 47, $&; |
121 | check_taint_not 48, $`; |
122 | check_taint_not 49, $'; |
123 | check_taint_not 50, $+; |
124 | check_taint_not 51, $1; |
125 | check_taint_not 52, $2; |
126 | |
127 | $_ = $a; # untaint $_ |
128 | |
129 | check_taint_not 53, $_; |
130 | |
131 | $b = uc($a); # taint $b |
132 | s/(.+)/$b/; # this must taint only the $_ |
133 | |
134 | check_taint 54, $_; |
135 | check_taint_not 55, $&; |
136 | check_taint_not 56, $`; |
137 | check_taint_not 57, $'; |
138 | check_taint_not 58, $+; |
139 | check_taint_not 59, $1; |
140 | check_taint_not 60, $2; |
141 | |
142 | $_ = $a; # untaint $_ |
143 | |
144 | s/(.+)/b/; # this must not taint |
145 | check_taint_not 61, $_; |
146 | check_taint_not 62, $&; |
147 | check_taint_not 63, $`; |
148 | check_taint_not 64, $'; |
149 | check_taint_not 65, $+; |
150 | check_taint_not 66, $1; |
151 | check_taint_not 67, $2; |
152 | |
153 | $b = $a; # untaint $b |
154 | |
155 | ($b = $a) =~ s/\w/$&/; |
156 | check_taint 68, $b; # $b should be tainted. |
157 | check_taint_not 69, $a; # $a should be not. |
158 | |
159 | $_ = $a; # untaint $_ |
160 | |
161 | s/(\w)/\l$1/; # this must taint |
162 | check_taint 70, $_; |
163 | check_taint 71, $&; |
164 | check_taint 72, $`; |
165 | check_taint 73, $'; |
166 | check_taint 74, $+; |
167 | check_taint 75, $1; |
168 | check_taint_not 76, $2; |
169 | |
170 | $_ = $a; # untaint $_ |
171 | |
172 | s/(\w)/\L$1/; # this must taint |
173 | check_taint 77, $_; |
174 | check_taint 78, $&; |
175 | check_taint 79, $`; |
176 | check_taint 80, $'; |
177 | check_taint 81, $+; |
178 | check_taint 82, $1; |
179 | check_taint_not 83, $2; |
180 | |
181 | $_ = $a; # untaint $_ |
182 | |
183 | s/(\w)/\u$1/; # this must taint |
184 | check_taint 84, $_; |
185 | check_taint 85, $&; |
186 | check_taint 86, $`; |
187 | check_taint 87, $'; |
188 | check_taint 88, $+; |
189 | check_taint 89, $1; |
190 | check_taint_not 90, $2; |
191 | |
192 | $_ = $a; # untaint $_ |
193 | |
194 | s/(\w)/\U$1/; # this must taint |
195 | check_taint 91, $_; |
196 | check_taint 92, $&; |
197 | check_taint 93, $`; |
198 | check_taint 94, $'; |
199 | check_taint 95, $+; |
200 | check_taint 96, $1; |
201 | check_taint_not 97, $2; |
202 | |
203 | # After all this tainting $a should be cool. |
204 | |
205 | check_taint_not 98, $a; |
206 | |
207 | # I think we've seen quite enough of taint. |
208 | # Let us do some *real* locale work now, |
209 | # unless setlocale() is missing (i.e. minitest). |
210 | |
211 | exit unless $have_setlocale; |
212 | |
213 | sub getalnum { |
214 | sort grep /\w/, map { chr } 0..255 |
215 | } |
216 | |
217 | sub locatelocale ($$@) { |
218 | my ($lcall, $alnum, @try) = @_; |
219 | |
220 | undef $$lcall; |
221 | |
222 | for (@try) { |
223 | local $^W = 0; # suppress "Subroutine LC_ALL redefined" |
224 | if (setlocale(&LC_ALL, $_)) { |
225 | $$lcall = $_; |
226 | @$alnum = &getalnum; |
227 | last; |
228 | } |
229 | } |
230 | |
231 | @$alnum = () unless (defined $$lcall); |
232 | } |
233 | |
234 | # Find some default locale |
235 | |
236 | locatelocale(\$Locale, \@Locale, qw(C POSIX)); |
237 | |
238 | # Find some English locale |
239 | |
240 | locatelocale(\$English, \@English, |
241 | qw(en_US.ISO8859-1 en_GB.ISO8859-1 |
242 | en en_US en_UK en_IE en_CA en_AU en_NZ |
243 | english english.iso88591 |
244 | american american.iso88591 |
245 | british british.iso88591 |
246 | )); |
247 | |
248 | # Find some German locale |
249 | |
250 | locatelocale(\$German, \@German, |
251 | qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1 |
252 | de de_DE de_AT de_CH |
253 | german german.iso88591)); |
254 | |
255 | # Find some French locale |
256 | |
257 | locatelocale(\$French, \@French, |
258 | qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1 |
259 | fr fr_FR fr_BE fr_CA fr_CH |
260 | french french.iso88591)); |
261 | |
262 | # Find some Spanish locale |
263 | |
264 | locatelocale(\$Spanish, \@Spanish, |
265 | qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1 |
266 | es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1 |
267 | es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1 |
268 | es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1 |
269 | es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1 |
270 | es es_AR es_BO es_CL |
271 | es_CO es_CR es_EC |
272 | es_ES es_GT es_MX |
273 | es_NI es_PA es_PE |
274 | es_PY es_SV es_UY es_VE |
275 | spanish spanish.iso88591)); |
276 | |
277 | # Select the largest of the alpha(num)bets. |
278 | |
279 | ($Locale, @Locale) = ($English, @English) |
280 | if (length(@English) > length(@Locale)); |
281 | ($Locale, @Locale) = ($German, @German) |
282 | if (length(@German) > length(@Locale)); |
283 | ($Locale, @Locale) = ($French, @French) |
284 | if (length(@French) > length(@Locale)); |
285 | ($Locale, @Locale) = ($Spanish, @Spanish) |
286 | if (length(@Spanish) > length(@Locale)); |
287 | |
288 | print "# Locale = $Locale\n"; |
289 | print "# Alnum_ = @Locale\n"; |
290 | |
291 | { |
292 | local $^W = 0; |
293 | setlocale(&LC_ALL, $Locale); |
294 | } |
295 | |
296 | { |
297 | my $i = 0; |
298 | |
299 | for (@Locale) { |
300 | $iLocale{$_} = $i++; |
301 | } |
302 | } |
303 | |
304 | # Sieve the uppercase and the lowercase. |
305 | |
306 | for (@Locale) { |
307 | if (/[^\d_]/) { # skip digits and the _ |
308 | if (lc eq $_) { |
309 | $UPPER{$_} = uc; |
310 | } else { |
311 | $lower{$_} = lc; |
312 | } |
313 | } |
314 | } |
315 | |
316 | # Cross-check the upper and the lower. |
317 | # Yes, this is broken when the upper<->lower changes the number of |
318 | # the glyphs (e.g. the German sharp-s aka double-s aka sz-ligature, |
319 | # or the Dutch IJ or the Spanish LL or ...) |
320 | # But so far all the implementations do this wrong so we can do it wrong too. |
321 | |
322 | for (keys %UPPER) { |
323 | if (defined $lower{$UPPER{$_}}) { |
324 | if ($_ ne $lower{$UPPER{$_}}) { |
325 | print 'not '; |
326 | last; |
327 | } |
328 | } |
329 | } |
330 | print "ok 99\n"; |
331 | |
332 | for (keys %lower) { |
333 | if (defined $UPPER{$lower{$_}}) { |
334 | if ($_ ne $UPPER{$lower{$_}}) { |
335 | print 'not '; |
336 | last; |
337 | } |
338 | } |
339 | } |
340 | print "ok 100\n"; |
341 | |
342 | # Find the alphabets that are not alphabets in the default locale. |
343 | |
344 | { |
345 | no locale; |
346 | |
347 | for (keys %UPPER, keys %lower) { |
348 | push(@Neoalpha, $_) if (/\W/); |
349 | } |
350 | } |
351 | |
352 | @Neoalpha = sort @Neoalpha; |
353 | |
354 | # Test \w. |
355 | |
356 | { |
357 | my $word = join('', @Neoalpha); |
358 | |
359 | $word =~ /^(\w*)$/; |
360 | |
361 | print 'not ' if ($1 ne $word); |
362 | } |
363 | print "ok 101\n"; |
364 | |
365 | # Find places where the collation order differs from the default locale. |
366 | |
367 | { |
368 | my (@k, $i, $j, @d); |
369 | |
370 | { |
371 | no locale; |
372 | |
373 | @k = sort (keys %UPPER, keys %lower); |
374 | } |
375 | |
376 | for ($i = 0; $i < @k; $i++) { |
377 | for ($j = $i + 1; $j < @k; $j++) { |
378 | if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) { |
379 | push(@d, [$k[$j], $k[$i]]); |
380 | } |
381 | } |
382 | } |
383 | |
384 | # Cross-check those places. |
385 | |
386 | for (@d) { |
387 | ($i, $j) = @$_; |
388 | if ($i gt $j) { |
389 | print "# i = $i, j = $j, i ", |
390 | $i le $j ? 'le' : 'gt', " j\n"; |
391 | print 'not '; |
392 | last; |
393 | } |
394 | } |
395 | } |
396 | print "ok 102\n"; |
397 | |
398 | # Cross-check whole character set. |
399 | |
400 | for (map { chr } 0..255) { |
401 | if (/\w/ and /\W/) { print 'not '; last } |
402 | if (/\d/ and /\D/) { print 'not '; last } |
403 | if (/\s/ and /\S/) { print 'not '; last } |
404 | if (/\w/ and /\D/ and not /_/ and |
405 | not (exists $UPPER{$_} or exists $lower{$_})) { |
406 | print 'not '; |
407 | last; |
408 | } |
409 | } |
410 | print "ok 103\n"; |
411 | |
412 | # The @Locale should be internally consistent. |
413 | |
414 | { |
415 | my ($from, $to, , $lesser, $greater); |
416 | |
417 | for (0..9) { |
418 | # Select a slice. |
419 | $from = int(($_*@Locale)/10); |
420 | $to = $from + int(@Locale/10); |
421 | $to = $#Locale if ($to > $#Locale); |
422 | $lesser = join('', @Locale[$from..$to]); |
423 | # Select a slice one character on. |
424 | $from++; $to++; |
425 | $to = $#Locale if ($to > $#Locale); |
426 | $greater = join('', @Locale[$from..$to]); |
427 | if (not ($lesser lt $greater) or |
428 | not ($lesser le $greater) or |
429 | not ($lesser ne $greater) or |
430 | ($lesser eq $greater) or |
431 | ($lesser ge $greater) or |
432 | ($lesser gt $greater) or |
433 | ($greater lt $lesser ) or |
434 | ($greater le $lesser ) or |
435 | not ($greater ne $lesser ) or |
436 | ($greater eq $lesser ) or |
437 | not ($greater ge $lesser ) or |
438 | not ($greater gt $lesser ) or |
439 | # Well, these two are sort of redundant because @Locale |
440 | # was derived using cmp. |
441 | not (($lesser cmp $greater) == -1) or |
442 | not (($greater cmp $lesser ) == 1) |
443 | ) { |
444 | print 'not '; |
445 | last; |
446 | } |
447 | } |
448 | } |
449 | print "ok 104\n"; |