Commit | Line | Data |
92d69e20 |
1 | #!./perl -wT |
2 | |
3 | print "1..67\n"; |
4 | |
5 | BEGIN { |
6 | chdir 't' if -d 't'; |
7 | @INC = '../lib'; |
8 | } |
9 | |
10 | use strict; |
11 | use POSIX qw(locale_h); |
12 | |
13 | use vars qw($a |
14 | $English $German $French $Spanish |
15 | @C @English @German @French @Spanish |
16 | $Locale @Locale %iLocale %UPPER %lower @Neoalpha); |
17 | |
18 | $a = 'abc %'; |
19 | |
20 | sub ok { |
21 | my ($n, $result) = @_; |
22 | |
23 | print 'not ' unless ($result); |
24 | print "ok $n\n"; |
25 | } |
26 | |
27 | # First we'll do a lot of taint checking for locales. |
28 | # This is the easiest to test, actually, as any locale, |
29 | # even the default locale will taint under 'use locale'. |
30 | |
31 | sub is_tainted { # hello, camel two. |
32 | my $dummy; |
33 | not eval { $dummy = join("", @_), kill 0; 1 } |
34 | } |
35 | |
36 | sub check_taint ($$) { |
37 | ok $_[0], is_tainted($_[1]); |
38 | } |
39 | |
40 | sub check_taint_not ($$) { |
41 | ok $_[0], not is_tainted($_[1]); |
42 | } |
43 | |
44 | use locale; # engage locale and therefore locale taint. |
45 | |
46 | check_taint_not 1, $a; |
47 | |
48 | check_taint 2, uc($a); |
49 | check_taint 3, "\U$a"; |
50 | check_taint 4, ucfirst($a); |
51 | check_taint 5, "\u$a"; |
52 | check_taint 6, lc($a); |
53 | check_taint 7, "\L$a"; |
54 | check_taint 8, lcfirst($a); |
55 | check_taint 9, "\l$a"; |
56 | |
57 | check_taint 10, sprintf('%e', 123.456); |
58 | check_taint 11, sprintf('%f', 123.456); |
59 | check_taint 12, sprintf('%g', 123.456); |
60 | check_taint_not 13, sprintf('%d', 123.456); |
61 | check_taint_not 14, sprintf('%x', 123.456); |
62 | |
63 | $_ = $a; # untaint $_ |
64 | |
65 | $_ = uc($a); # taint $_ |
66 | |
67 | check_taint 15, $_; |
68 | |
69 | /(\w)/; # taint $&, $`, $', $+, $1. |
70 | check_taint 16, $&; |
71 | check_taint 17, $`; |
72 | check_taint 18, $'; |
73 | check_taint 19, $+; |
74 | check_taint 20, $1; |
75 | check_taint_not 21, $2; |
76 | |
77 | /(\W)/; # taint $&, $`, $', $+, $1. |
78 | check_taint 22, $&; |
79 | check_taint 23, $`; |
80 | check_taint 24, $'; |
81 | check_taint 25, $+; |
82 | check_taint 26, $1; |
83 | check_taint_not 27, $2; |
84 | |
85 | /(\s)/; # taint $&, $`, $', $+, $1. |
86 | check_taint 28, $&; |
87 | check_taint 29, $`; |
88 | check_taint 30, $'; |
89 | check_taint 31, $+; |
90 | check_taint 32, $1; |
91 | check_taint_not 33, $2; |
92 | |
93 | /(\S)/; # taint $&, $`, $', $+, $1. |
94 | check_taint 34, $&; |
95 | check_taint 35, $`; |
96 | check_taint 36, $'; |
97 | check_taint 37, $+; |
98 | check_taint 38, $1; |
99 | check_taint_not 39, $2; |
100 | |
101 | $_ = $a; # untaint $_ |
102 | |
103 | check_taint_not 40, $_; |
104 | |
105 | /(b)/; # this must not taint |
106 | check_taint_not 41, $&; |
107 | check_taint_not 42, $`; |
108 | check_taint_not 43, $'; |
109 | check_taint_not 44, $+; |
110 | check_taint_not 45, $1; |
111 | check_taint_not 46, $2; |
112 | |
113 | $_ = $a; # untaint $_ |
114 | |
115 | check_taint_not 47, $_; |
116 | |
117 | $b = uc($a); # taint $b |
118 | s/(.+)/$b/; # this must taint only the $_ |
119 | |
120 | check_taint 48, $_; |
121 | check_taint_not 49, $&; |
122 | check_taint_not 50, $`; |
123 | check_taint_not 51, $'; |
124 | check_taint_not 52, $+; |
125 | check_taint_not 53, $1; |
126 | check_taint_not 54, $2; |
127 | |
128 | $_ = $a; # untaint $_ |
129 | |
130 | s/(.+)/b/; # this must not taint |
131 | check_taint_not 55, $_; |
132 | check_taint_not 56, $&; |
133 | check_taint_not 57, $`; |
134 | check_taint_not 58, $'; |
135 | check_taint_not 59, $+; |
136 | check_taint_not 60, $1; |
137 | check_taint_not 61, $2; |
138 | |
139 | check_taint_not 62, $a; |
140 | |
141 | # I think we've seen quite enough of taint. |
142 | # Let us do some *real* locale work now. |
143 | |
144 | sub getalnum { |
145 | sort grep /\w/, map { chr } 0..255 |
146 | } |
147 | |
148 | sub locatelocale ($$@) { |
149 | my ($lcall, $alnum, @try) = @_; |
150 | |
151 | undef $$lcall; |
152 | |
153 | for (@try) { |
154 | local $^W = 0; # suppress "Subroutine LC_ALL redefined" |
155 | if (setlocale(LC_ALL, $_)) { |
156 | $$lcall = $_; |
157 | @$alnum = &getalnum; |
158 | last; |
159 | } |
160 | } |
161 | |
162 | @$alnum = () unless (defined $$lcall); |
163 | } |
164 | |
165 | # Find some default locale |
166 | |
167 | locatelocale(\$Locale, \@Locale, qw(C POSIX)); |
168 | |
169 | # Find some English locale |
170 | |
171 | locatelocale(\$English, \@English, |
172 | qw(en_US.ISO8859-1 en_GB.ISO8859-1 |
173 | en en_US en_UK en_IE en_CA en_AU en_NZ |
174 | english english.iso88591 |
175 | american american.iso88591 |
176 | british british.iso88591 |
177 | )); |
178 | |
179 | # Find some German locale |
180 | |
181 | locatelocale(\$German, \@German, |
182 | qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1 |
183 | de de_DE de_AT de_CH |
184 | german german.iso88591)); |
185 | |
186 | # Find some French locale |
187 | |
188 | locatelocale(\$French, \@French, |
189 | qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1 |
190 | fr fr_FR fr_BE fr_CA fr_CH |
191 | french french.iso88591)); |
192 | |
193 | # Find some Spanish locale |
194 | |
195 | locatelocale(\$Spanish, \@Spanish, |
196 | qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1 |
197 | es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1 |
198 | es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1 |
199 | es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1 |
200 | es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1 |
201 | es es_AR es_BO es_CL |
202 | es_CO es_CR es_EC |
203 | es_ES es_GT es_MX |
204 | es_NI es_PA es_PE |
205 | es_PY es_SV es_UY es_VE |
206 | spanish spanish.iso88591)); |
207 | |
208 | # Select the largest of the alpha(num)bets. |
209 | |
210 | ($Locale, @Locale) = ($English, @English) |
211 | if (length(@English) > length(@Locale)); |
212 | ($Locale, @Locale) = ($German, @German) |
213 | if (length(@German) > length(@Locale)); |
214 | ($Locale, @Locale) = ($French, @French) |
215 | if (length(@French) > length(@Locale)); |
216 | ($Locale, @Locale) = ($Spanish, @Spanish) |
217 | if (length(@Spanish) > length(@Locale)); |
218 | |
219 | print "# Locale = $Locale\n"; |
220 | print "# Alnum_ = @Locale\n"; |
221 | |
222 | { |
223 | local $^W = 0; |
224 | setlocale(LC_ALL, $Locale); |
225 | } |
226 | |
227 | { |
228 | my $i = 0; |
229 | |
230 | for (@Locale) { |
231 | $iLocale{$_} = $i++; |
232 | } |
233 | } |
234 | |
235 | # Sieve the uppercase and the lowercase. |
236 | |
237 | for (@Locale) { |
238 | if (/[^\d_]/) { # skip digits and the _ |
239 | if (lc eq $_) { |
240 | $UPPER{$_} = uc; |
241 | } else { |
242 | $lower{$_} = lc; |
243 | } |
244 | } |
245 | } |
246 | |
247 | # Cross-check the upper and the lower. |
248 | # Yes, this is broken when the upper<->lower changes the number of |
249 | # the glyphs (e.g. the German sharp-s aka double-s aka sz-ligature. |
250 | # But so far all the implementations do this wrong so we can do it wrong too. |
251 | |
252 | for (keys %UPPER) { |
253 | if (defined $lower{$UPPER{$_}}) { |
254 | if ($_ ne $lower{$UPPER{$_}}) { |
255 | print 'not '; |
256 | last; |
257 | } |
258 | } |
259 | } |
260 | print "ok 63\n"; |
261 | |
262 | for (keys %lower) { |
263 | if (defined $UPPER{$lower{$_}}) { |
264 | if ($_ ne $UPPER{$lower{$_}}) { |
265 | print 'not '; |
266 | last; |
267 | } |
268 | } |
269 | } |
270 | print "ok 64\n"; |
271 | |
272 | # Find the alphabets that are not alphabets in the default locale. |
273 | |
274 | { |
275 | no locale; |
276 | |
277 | for (keys %UPPER, keys %lower) { |
278 | push(@Neoalpha, $_) if (/\W/); |
279 | } |
280 | } |
281 | |
282 | @Neoalpha = sort @Neoalpha; |
283 | |
284 | # Test \w. |
285 | |
286 | { |
287 | my $word = join('', @Neoalpha); |
288 | |
289 | $word =~ /^(\w*)$/; |
290 | |
291 | print 'not ' if ($1 ne $word); |
292 | } |
293 | print "ok 65\n"; |
294 | |
295 | # Find places where the collation order differs from the default locale. |
296 | |
297 | { |
298 | no locale; |
299 | |
300 | my @k = sort (keys %UPPER, keys %lower); |
301 | my ($i, $j, @d); |
302 | |
303 | for ($i = 0; $i < @k; $i++) { |
304 | for ($j = $i + 1; $j < @k; $j++) { |
305 | if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) { |
306 | push(@d, [$k[$j], $k[$i]]); |
307 | } |
308 | } |
309 | } |
310 | |
311 | # Cross-check those places. |
312 | |
313 | for (@d) { |
314 | ($i, $j) = @$_; |
315 | print 'not ' if ($i le $j or not (($i cmp $j) == 1)); |
316 | } |
317 | } |
318 | print "ok 66\n"; |
319 | |
320 | # Cross-check whole character set. |
321 | |
322 | for (map { chr } 0..255) { |
323 | if (/\w/ and /\W/) { print 'not '; last } |
324 | if (/\d/ and /\D/) { print 'not '; last } |
325 | if (/\s/ and /\S/) { print 'not '; last } |
326 | if (/\w/ and /\D/ and not /_/ and |
327 | not (exists $UPPER{$_} or exists $lower{$_})) { |
328 | print 'not '; last |
329 | } |
330 | } |
331 | print "ok 67\n"; |