11 use POSIX qw(locale_h);
14 $English $German $French $Spanish
15 @C @English @German @French @Spanish
16 $Locale @Locale %iLocale %UPPER %lower @Neoalpha);
21 my ($n, $result) = @_;
23 print 'not ' unless ($result);
27 # First we'll do a lot of taint checking for locales.
28 # This is the easiest to test, actually, as any locale,
29 # even the default locale will taint under 'use locale'.
31 sub is_tainted { # hello, camel two.
33 not eval { $dummy = join("", @_), kill 0; 1 }
36 sub check_taint ($$) {
37 ok $_[0], is_tainted($_[1]);
40 sub check_taint_not ($$) {
41 ok $_[0], not is_tainted($_[1]);
44 use locale; # engage locale and therefore locale taint.
46 check_taint_not 1, $a;
48 check_taint 2, uc($a);
49 check_taint 3, "\U$a";
50 check_taint 4, ucfirst($a);
51 check_taint 5, "\u$a";
52 check_taint 6, lc($a);
53 check_taint 7, "\L$a";
54 check_taint 8, lcfirst($a);
55 check_taint 9, "\l$a";
57 check_taint 10, sprintf('%e', 123.456);
58 check_taint 11, sprintf('%f', 123.456);
59 check_taint 12, sprintf('%g', 123.456);
60 check_taint_not 13, sprintf('%d', 123.456);
61 check_taint_not 14, sprintf('%x', 123.456);
65 $_ = uc($a); # taint $_
69 /(\w)/; # taint $&, $`, $', $+, $1.
75 check_taint_not 21, $2;
77 /(\W)/; # taint $&, $`, $', $+, $1.
83 check_taint_not 27, $2;
85 /(\s)/; # taint $&, $`, $', $+, $1.
91 check_taint_not 33, $2;
93 /(\S)/; # taint $&, $`, $', $+, $1.
99 check_taint_not 39, $2;
101 $_ = $a; # untaint $_
103 check_taint_not 40, $_;
105 /(b)/; # this must not taint
106 check_taint_not 41, $&;
107 check_taint_not 42, $`;
108 check_taint_not 43, $';
109 check_taint_not 44, $+;
110 check_taint_not 45, $1;
111 check_taint_not 46, $2;
113 $_ = $a; # untaint $_
115 check_taint_not 47, $_;
117 $b = uc($a); # taint $b
118 s/(.+)/$b/; # this must taint only the $_
121 check_taint_not 49, $&;
122 check_taint_not 50, $`;
123 check_taint_not 51, $';
124 check_taint_not 52, $+;
125 check_taint_not 53, $1;
126 check_taint_not 54, $2;
128 $_ = $a; # untaint $_
130 s/(.+)/b/; # this must not taint
131 check_taint_not 55, $_;
132 check_taint_not 56, $&;
133 check_taint_not 57, $`;
134 check_taint_not 58, $';
135 check_taint_not 59, $+;
136 check_taint_not 60, $1;
137 check_taint_not 61, $2;
139 check_taint_not 62, $a;
141 # I think we've seen quite enough of taint.
142 # Let us do some *real* locale work now.
145 sort grep /\w/, map { chr } 0..255
148 sub locatelocale ($$@) {
149 my ($lcall, $alnum, @try) = @_;
154 local $^W = 0; # suppress "Subroutine LC_ALL redefined"
155 if (setlocale(LC_ALL, $_)) {
162 @$alnum = () unless (defined $$lcall);
165 # Find some default locale
167 locatelocale(\$Locale, \@Locale, qw(C POSIX));
169 # Find some English locale
171 locatelocale(\$English, \@English,
172 qw(en_US.ISO8859-1 en_GB.ISO8859-1
173 en en_US en_UK en_IE en_CA en_AU en_NZ
174 english english.iso88591
175 american american.iso88591
176 british british.iso88591
179 # Find some German locale
181 locatelocale(\$German, \@German,
182 qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1
184 german german.iso88591));
186 # Find some French locale
188 locatelocale(\$French, \@French,
189 qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1
190 fr fr_FR fr_BE fr_CA fr_CH
191 french french.iso88591));
193 # Find some Spanish locale
195 locatelocale(\$Spanish, \@Spanish,
196 qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1
197 es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1
198 es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1
199 es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1
200 es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1
205 es_PY es_SV es_UY es_VE
206 spanish spanish.iso88591));
208 # Select the largest of the alpha(num)bets.
210 ($Locale, @Locale) = ($English, @English)
211 if (length(@English) > length(@Locale));
212 ($Locale, @Locale) = ($German, @German)
213 if (length(@German) > length(@Locale));
214 ($Locale, @Locale) = ($French, @French)
215 if (length(@French) > length(@Locale));
216 ($Locale, @Locale) = ($Spanish, @Spanish)
217 if (length(@Spanish) > length(@Locale));
219 print "# Locale = $Locale\n";
220 print "# Alnum_ = @Locale\n";
224 setlocale(LC_ALL, $Locale);
235 # Sieve the uppercase and the lowercase.
238 if (/[^\d_]/) { # skip digits and the _
247 # Cross-check the upper and the lower.
248 # Yes, this is broken when the upper<->lower changes the number of
249 # the glyphs (e.g. the German sharp-s aka double-s aka sz-ligature.
250 # But so far all the implementations do this wrong so we can do it wrong too.
253 if (defined $lower{$UPPER{$_}}) {
254 if ($_ ne $lower{$UPPER{$_}}) {
263 if (defined $UPPER{$lower{$_}}) {
264 if ($_ ne $UPPER{$lower{$_}}) {
272 # Find the alphabets that are not alphabets in the default locale.
277 for (keys %UPPER, keys %lower) {
278 push(@Neoalpha, $_) if (/\W/);
282 @Neoalpha = sort @Neoalpha;
287 my $word = join('', @Neoalpha);
291 print 'not ' if ($1 ne $word);
295 # Find places where the collation order differs from the default locale.
300 my @k = sort (keys %UPPER, keys %lower);
303 for ($i = 0; $i < @k; $i++) {
304 for ($j = $i + 1; $j < @k; $j++) {
305 if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) {
306 push(@d, [$k[$j], $k[$i]]);
311 # Cross-check those places.
315 print 'not ' if ($i le $j or not (($i cmp $j) == 1));
320 # Cross-check whole character set.
322 for (map { chr } 0..255) {
323 if (/\w/ and /\W/) { print 'not '; last }
324 if (/\d/ and /\D/) { print 'not '; last }
325 if (/\s/ and /\S/) { print 'not '; last }
326 if (/\w/ and /\D/ and not /_/ and
327 not (exists $UPPER{$_} or exists $lower{$_})) {