File::Copy under OS/2
[p5sagit/p5-mst-13.2.git] / t / lib / locale.t
CommitLineData
92d69e20 1#!./perl -wT
2
3print "1..67\n";
4
5BEGIN {
6 chdir 't' if -d 't';
7 @INC = '../lib';
8}
9
10use strict;
11use POSIX qw(locale_h);
12
13use vars qw($a
14 $English $German $French $Spanish
15 @C @English @German @French @Spanish
16 $Locale @Locale %iLocale %UPPER %lower @Neoalpha);
17
18$a = 'abc %';
19
20sub ok {
21 my ($n, $result) = @_;
22
23 print 'not ' unless ($result);
24 print "ok $n\n";
25}
26
27# First we'll do a lot of taint checking for locales.
28# This is the easiest to test, actually, as any locale,
29# even the default locale will taint under 'use locale'.
30
31sub is_tainted { # hello, camel two.
32 my $dummy;
33 not eval { $dummy = join("", @_), kill 0; 1 }
34}
35
36sub check_taint ($$) {
37 ok $_[0], is_tainted($_[1]);
38}
39
40sub check_taint_not ($$) {
41 ok $_[0], not is_tainted($_[1]);
42}
43
44use locale; # engage locale and therefore locale taint.
45
46check_taint_not 1, $a;
47
48check_taint 2, uc($a);
49check_taint 3, "\U$a";
50check_taint 4, ucfirst($a);
51check_taint 5, "\u$a";
52check_taint 6, lc($a);
53check_taint 7, "\L$a";
54check_taint 8, lcfirst($a);
55check_taint 9, "\l$a";
56
57check_taint 10, sprintf('%e', 123.456);
58check_taint 11, sprintf('%f', 123.456);
59check_taint 12, sprintf('%g', 123.456);
60check_taint_not 13, sprintf('%d', 123.456);
61check_taint_not 14, sprintf('%x', 123.456);
62
63$_ = $a; # untaint $_
64
65$_ = uc($a); # taint $_
66
67check_taint 15, $_;
68
69/(\w)/; # taint $&, $`, $', $+, $1.
70check_taint 16, $&;
71check_taint 17, $`;
72check_taint 18, $';
73check_taint 19, $+;
74check_taint 20, $1;
75check_taint_not 21, $2;
76
77/(\W)/; # taint $&, $`, $', $+, $1.
78check_taint 22, $&;
79check_taint 23, $`;
80check_taint 24, $';
81check_taint 25, $+;
82check_taint 26, $1;
83check_taint_not 27, $2;
84
85/(\s)/; # taint $&, $`, $', $+, $1.
86check_taint 28, $&;
87check_taint 29, $`;
88check_taint 30, $';
89check_taint 31, $+;
90check_taint 32, $1;
91check_taint_not 33, $2;
92
93/(\S)/; # taint $&, $`, $', $+, $1.
94check_taint 34, $&;
95check_taint 35, $`;
96check_taint 36, $';
97check_taint 37, $+;
98check_taint 38, $1;
99check_taint_not 39, $2;
100
101$_ = $a; # untaint $_
102
103check_taint_not 40, $_;
104
105/(b)/; # this must not taint
106check_taint_not 41, $&;
107check_taint_not 42, $`;
108check_taint_not 43, $';
109check_taint_not 44, $+;
110check_taint_not 45, $1;
111check_taint_not 46, $2;
112
113$_ = $a; # untaint $_
114
115check_taint_not 47, $_;
116
117$b = uc($a); # taint $b
118s/(.+)/$b/; # this must taint only the $_
119
120check_taint 48, $_;
121check_taint_not 49, $&;
122check_taint_not 50, $`;
123check_taint_not 51, $';
124check_taint_not 52, $+;
125check_taint_not 53, $1;
126check_taint_not 54, $2;
127
128$_ = $a; # untaint $_
129
130s/(.+)/b/; # this must not taint
131check_taint_not 55, $_;
132check_taint_not 56, $&;
133check_taint_not 57, $`;
134check_taint_not 58, $';
135check_taint_not 59, $+;
136check_taint_not 60, $1;
137check_taint_not 61, $2;
138
139check_taint_not 62, $a;
140
141# I think we've seen quite enough of taint.
142# Let us do some *real* locale work now.
143
144sub getalnum {
145 sort grep /\w/, map { chr } 0..255
146}
147
148sub locatelocale ($$@) {
149 my ($lcall, $alnum, @try) = @_;
150
151 undef $$lcall;
152
153 for (@try) {
154 local $^W = 0; # suppress "Subroutine LC_ALL redefined"
155 if (setlocale(LC_ALL, $_)) {
156 $$lcall = $_;
157 @$alnum = &getalnum;
158 last;
159 }
160 }
161
162 @$alnum = () unless (defined $$lcall);
163}
164
165# Find some default locale
166
167locatelocale(\$Locale, \@Locale, qw(C POSIX));
168
169# Find some English locale
170
171locatelocale(\$English, \@English,
172 qw(en_US.ISO8859-1 en_GB.ISO8859-1
173 en en_US en_UK en_IE en_CA en_AU en_NZ
174 english english.iso88591
175 american american.iso88591
176 british british.iso88591
177 ));
178
179# Find some German locale
180
181locatelocale(\$German, \@German,
182 qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1
183 de de_DE de_AT de_CH
184 german german.iso88591));
185
186# Find some French locale
187
188locatelocale(\$French, \@French,
189 qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1
190 fr fr_FR fr_BE fr_CA fr_CH
191 french french.iso88591));
192
193# Find some Spanish locale
194
195locatelocale(\$Spanish, \@Spanish,
196 qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1
197 es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1
198 es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1
199 es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1
200 es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1
201 es es_AR es_BO es_CL
202 es_CO es_CR es_EC
203 es_ES es_GT es_MX
204 es_NI es_PA es_PE
205 es_PY es_SV es_UY es_VE
206 spanish spanish.iso88591));
207
208# Select the largest of the alpha(num)bets.
209
210($Locale, @Locale) = ($English, @English)
211 if (length(@English) > length(@Locale));
212($Locale, @Locale) = ($German, @German)
213 if (length(@German) > length(@Locale));
214($Locale, @Locale) = ($French, @French)
215 if (length(@French) > length(@Locale));
216($Locale, @Locale) = ($Spanish, @Spanish)
217 if (length(@Spanish) > length(@Locale));
218
219print "# Locale = $Locale\n";
220print "# Alnum_ = @Locale\n";
221
222{
223 local $^W = 0;
224 setlocale(LC_ALL, $Locale);
225}
226
227{
228 my $i = 0;
229
230 for (@Locale) {
231 $iLocale{$_} = $i++;
232 }
233}
234
235# Sieve the uppercase and the lowercase.
236
237for (@Locale) {
238 if (/[^\d_]/) { # skip digits and the _
239 if (lc eq $_) {
240 $UPPER{$_} = uc;
241 } else {
242 $lower{$_} = lc;
243 }
244 }
245}
246
247# Cross-check the upper and the lower.
248# Yes, this is broken when the upper<->lower changes the number of
249# the glyphs (e.g. the German sharp-s aka double-s aka sz-ligature.
250# But so far all the implementations do this wrong so we can do it wrong too.
251
252for (keys %UPPER) {
253 if (defined $lower{$UPPER{$_}}) {
254 if ($_ ne $lower{$UPPER{$_}}) {
255 print 'not ';
256 last;
257 }
258 }
259}
260print "ok 63\n";
261
262for (keys %lower) {
263 if (defined $UPPER{$lower{$_}}) {
264 if ($_ ne $UPPER{$lower{$_}}) {
265 print 'not ';
266 last;
267 }
268 }
269}
270print "ok 64\n";
271
272# Find the alphabets that are not alphabets in the default locale.
273
274{
275 no locale;
276
277 for (keys %UPPER, keys %lower) {
278 push(@Neoalpha, $_) if (/\W/);
279 }
280}
281
282@Neoalpha = sort @Neoalpha;
283
284# Test \w.
285
286{
287 my $word = join('', @Neoalpha);
288
289 $word =~ /^(\w*)$/;
290
291 print 'not ' if ($1 ne $word);
292}
293print "ok 65\n";
294
295# Find places where the collation order differs from the default locale.
296
297{
298 no locale;
299
300 my @k = sort (keys %UPPER, keys %lower);
301 my ($i, $j, @d);
302
303 for ($i = 0; $i < @k; $i++) {
304 for ($j = $i + 1; $j < @k; $j++) {
305 if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) {
306 push(@d, [$k[$j], $k[$i]]);
307 }
308 }
309 }
310
311 # Cross-check those places.
312
313 for (@d) {
314 ($i, $j) = @$_;
315 print 'not ' if ($i le $j or not (($i cmp $j) == 1));
316 }
317}
318print "ok 66\n";
319
320# Cross-check whole character set.
321
322for (map { chr } 0..255) {
323 if (/\w/ and /\W/) { print 'not '; last }
324 if (/\d/ and /\D/) { print 'not '; last }
325 if (/\s/ and /\S/) { print 'not '; last }
326 if (/\w/ and /\D/ and not /_/ and
327 not (exists $UPPER{$_} or exists $lower{$_})) {
328 print 'not '; last
329 }
330}
331print "ok 67\n";