bump patchlevel to 5.005_51
[p5sagit/p5-mst-13.2.git] / t / pragma / locale.t
CommitLineData
8ebc5c01 1#!./perl -wT
2
3BEGIN {
4 chdir 't' if -d 't';
5 @INC = '../lib';
b002077a 6 require Config; import Config;
97a0514d 7 if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
b002077a 8 print "1..0\n";
9 exit;
10 }
8ebc5c01 11}
12
13use strict;
14
15my $have_setlocale = 0;
16eval {
17 require POSIX;
18 import POSIX ':locale_h';
19 $have_setlocale++;
20};
21
6dead956 22# Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1"
f6c6487a 23# and mingw32 uses said silly CRT
24$have_setlocale = 0 if $^O eq 'MSWin32' && $Config{cc} =~ /^(cl|gcc)/i;
6dead956 25
9fc9f3bf 26print "1..", ($have_setlocale ? 102 : 98), "\n";
8ebc5c01 27
28use vars qw($a
29 $English $German $French $Spanish
30 @C @English @German @French @Spanish
31 $Locale @Locale %iLocale %UPPER %lower @Neoalpha);
32
33$a = 'abc %';
34
35sub ok {
36 my ($n, $result) = @_;
37
38 print 'not ' unless ($result);
39 print "ok $n\n";
40}
41
42# First we'll do a lot of taint checking for locales.
43# This is the easiest to test, actually, as any locale,
44# even the default locale will taint under 'use locale'.
45
46sub is_tainted { # hello, camel two.
3fe9a6f1 47 local $^W; # no warnings 'undef'
8ebc5c01 48 my $dummy;
49 not eval { $dummy = join("", @_), kill 0; 1 }
50}
51
52sub check_taint ($$) {
53 ok $_[0], is_tainted($_[1]);
54}
55
56sub check_taint_not ($$) {
57 ok $_[0], not is_tainted($_[1]);
58}
59
60use locale; # engage locale and therefore locale taint.
61
62check_taint_not 1, $a;
63
64check_taint 2, uc($a);
65check_taint 3, "\U$a";
66check_taint 4, ucfirst($a);
67check_taint 5, "\u$a";
68check_taint 6, lc($a);
69check_taint 7, "\L$a";
70check_taint 8, lcfirst($a);
71check_taint 9, "\l$a";
72
73check_taint 10, sprintf('%e', 123.456);
74check_taint 11, sprintf('%f', 123.456);
75check_taint 12, sprintf('%g', 123.456);
76check_taint_not 13, sprintf('%d', 123.456);
77check_taint_not 14, sprintf('%x', 123.456);
78
79$_ = $a; # untaint $_
80
81$_ = uc($a); # taint $_
82
83check_taint 15, $_;
84
85/(\w)/; # taint $&, $`, $', $+, $1.
86check_taint 16, $&;
87check_taint 17, $`;
88check_taint 18, $';
89check_taint 19, $+;
90check_taint 20, $1;
91check_taint_not 21, $2;
92
93/(.)/; # untaint $&, $`, $', $+, $1.
94check_taint_not 22, $&;
95check_taint_not 23, $`;
96check_taint_not 24, $';
97check_taint_not 25, $+;
98check_taint_not 26, $1;
99check_taint_not 27, $2;
100
101/(\W)/; # taint $&, $`, $', $+, $1.
102check_taint 28, $&;
103check_taint 29, $`;
104check_taint 30, $';
105check_taint 31, $+;
106check_taint 32, $1;
107check_taint_not 33, $2;
108
109/(\s)/; # taint $&, $`, $', $+, $1.
110check_taint 34, $&;
111check_taint 35, $`;
112check_taint 36, $';
113check_taint 37, $+;
114check_taint 38, $1;
115check_taint_not 39, $2;
116
117/(\S)/; # taint $&, $`, $', $+, $1.
118check_taint 40, $&;
119check_taint 41, $`;
120check_taint 42, $';
121check_taint 43, $+;
122check_taint 44, $1;
123check_taint_not 45, $2;
124
125$_ = $a; # untaint $_
126
127check_taint_not 46, $_;
128
129/(b)/; # this must not taint
130check_taint_not 47, $&;
131check_taint_not 48, $`;
132check_taint_not 49, $';
133check_taint_not 50, $+;
134check_taint_not 51, $1;
135check_taint_not 52, $2;
136
137$_ = $a; # untaint $_
138
139check_taint_not 53, $_;
140
141$b = uc($a); # taint $b
142s/(.+)/$b/; # this must taint only the $_
143
144check_taint 54, $_;
145check_taint_not 55, $&;
146check_taint_not 56, $`;
147check_taint_not 57, $';
148check_taint_not 58, $+;
149check_taint_not 59, $1;
150check_taint_not 60, $2;
151
152$_ = $a; # untaint $_
153
154s/(.+)/b/; # this must not taint
155check_taint_not 61, $_;
156check_taint_not 62, $&;
157check_taint_not 63, $`;
158check_taint_not 64, $';
159check_taint_not 65, $+;
160check_taint_not 66, $1;
161check_taint_not 67, $2;
162
163$b = $a; # untaint $b
164
165($b = $a) =~ s/\w/$&/;
166check_taint 68, $b; # $b should be tainted.
167check_taint_not 69, $a; # $a should be not.
168
169$_ = $a; # untaint $_
170
171s/(\w)/\l$1/; # this must taint
172check_taint 70, $_;
173check_taint 71, $&;
174check_taint 72, $`;
175check_taint 73, $';
176check_taint 74, $+;
177check_taint 75, $1;
178check_taint_not 76, $2;
179
180$_ = $a; # untaint $_
181
182s/(\w)/\L$1/; # this must taint
183check_taint 77, $_;
184check_taint 78, $&;
185check_taint 79, $`;
186check_taint 80, $';
187check_taint 81, $+;
188check_taint 82, $1;
189check_taint_not 83, $2;
190
191$_ = $a; # untaint $_
192
193s/(\w)/\u$1/; # this must taint
194check_taint 84, $_;
195check_taint 85, $&;
196check_taint 86, $`;
197check_taint 87, $';
198check_taint 88, $+;
199check_taint 89, $1;
200check_taint_not 90, $2;
201
202$_ = $a; # untaint $_
203
204s/(\w)/\U$1/; # this must taint
205check_taint 91, $_;
206check_taint 92, $&;
207check_taint 93, $`;
208check_taint 94, $';
209check_taint 95, $+;
210check_taint 96, $1;
211check_taint_not 97, $2;
212
213# After all this tainting $a should be cool.
214
215check_taint_not 98, $a;
216
217# I think we've seen quite enough of taint.
218# Let us do some *real* locale work now,
219# unless setlocale() is missing (i.e. minitest).
220
221exit unless $have_setlocale;
222
223sub getalnum {
224 sort grep /\w/, map { chr } 0..255
225}
226
227sub locatelocale ($$@) {
228 my ($lcall, $alnum, @try) = @_;
229
230 undef $$lcall;
231
232 for (@try) {
233 local $^W = 0; # suppress "Subroutine LC_ALL redefined"
234 if (setlocale(&LC_ALL, $_)) {
235 $$lcall = $_;
236 @$alnum = &getalnum;
237 last;
238 }
239 }
240
241 @$alnum = () unless (defined $$lcall);
242}
243
244# Find some default locale
245
246locatelocale(\$Locale, \@Locale, qw(C POSIX));
247
248# Find some English locale
249
250locatelocale(\$English, \@English,
251 qw(en_US.ISO8859-1 en_GB.ISO8859-1
252 en en_US en_UK en_IE en_CA en_AU en_NZ
253 english english.iso88591
254 american american.iso88591
255 british british.iso88591
256 ));
257
258# Find some German locale
259
260locatelocale(\$German, \@German,
261 qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1
262 de de_DE de_AT de_CH
263 german german.iso88591));
264
265# Find some French locale
266
267locatelocale(\$French, \@French,
268 qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1
269 fr fr_FR fr_BE fr_CA fr_CH
270 french french.iso88591));
271
272# Find some Spanish locale
273
274locatelocale(\$Spanish, \@Spanish,
275 qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1
276 es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1
277 es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1
278 es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1
279 es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1
280 es es_AR es_BO es_CL
281 es_CO es_CR es_EC
282 es_ES es_GT es_MX
283 es_NI es_PA es_PE
284 es_PY es_SV es_UY es_VE
285 spanish spanish.iso88591));
286
287# Select the largest of the alpha(num)bets.
288
289($Locale, @Locale) = ($English, @English)
545c8fcc 290 if (@English > @Locale);
8ebc5c01 291($Locale, @Locale) = ($German, @German)
545c8fcc 292 if (@German > @Locale);
8ebc5c01 293($Locale, @Locale) = ($French, @French)
545c8fcc 294 if (@French > @Locale);
8ebc5c01 295($Locale, @Locale) = ($Spanish, @Spanish)
545c8fcc 296 if (@Spanish > @Locale);
8ebc5c01 297
8ebc5c01 298{
299 local $^W = 0;
300 setlocale(&LC_ALL, $Locale);
301}
302
4599a1de 303# Sort it now that LC_ALL has been set.
304
305@Locale = sort @Locale;
306
307print "# Locale = $Locale\n";
308print "# Alnum_ = @Locale\n";
309
8ebc5c01 310{
311 my $i = 0;
312
313 for (@Locale) {
314 $iLocale{$_} = $i++;
315 }
316}
317
318# Sieve the uppercase and the lowercase.
319
320for (@Locale) {
321 if (/[^\d_]/) { # skip digits and the _
322 if (lc eq $_) {
323 $UPPER{$_} = uc;
324 } else {
325 $lower{$_} = lc;
326 }
327 }
328}
329
8ebc5c01 330# Find the alphabets that are not alphabets in the default locale.
331
332{
333 no locale;
334
335 for (keys %UPPER, keys %lower) {
336 push(@Neoalpha, $_) if (/\W/);
337 }
338}
339
340@Neoalpha = sort @Neoalpha;
341
342# Test \w.
343
344{
345 my $word = join('', @Neoalpha);
346
347 $word =~ /^(\w*)$/;
348
349 print 'not ' if ($1 ne $word);
350}
9fc9f3bf 351print "ok 99\n";
8ebc5c01 352
353# Find places where the collation order differs from the default locale.
354
9fc9f3bf 355print "# testing 100\n";
8ebc5c01 356{
357 my (@k, $i, $j, @d);
358
359 {
360 no locale;
361
362 @k = sort (keys %UPPER, keys %lower);
363 }
364
365 for ($i = 0; $i < @k; $i++) {
366 for ($j = $i + 1; $j < @k; $j++) {
367 if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) {
368 push(@d, [$k[$j], $k[$i]]);
369 }
370 }
371 }
372
373 # Cross-check those places.
374
375 for (@d) {
376 ($i, $j) = @$_;
377 if ($i gt $j) {
9fc9f3bf 378 print "# failed 100 at:\n";
8ebc5c01 379 print "# i = $i, j = $j, i ",
380 $i le $j ? 'le' : 'gt', " j\n";
381 print 'not ';
382 last;
383 }
384 }
385}
9fc9f3bf 386print "ok 100\n";
8ebc5c01 387
388# Cross-check whole character set.
389
9fc9f3bf 390print "# testing 101\n";
8ebc5c01 391for (map { chr } 0..255) {
392 if (/\w/ and /\W/) { print 'not '; last }
393 if (/\d/ and /\D/) { print 'not '; last }
394 if (/\s/ and /\S/) { print 'not '; last }
395 if (/\w/ and /\D/ and not /_/ and
396 not (exists $UPPER{$_} or exists $lower{$_})) {
9fc9f3bf 397 print "# failed 101 at:\n";
774d564b 398 print "# ", ord($_), " '$_'\n";
8ebc5c01 399 print 'not ';
400 last;
401 }
402}
9fc9f3bf 403print "ok 101\n";
8ebc5c01 404
fb73857a 405# Test for read-onlys.
406
407{
408 no locale;
409 $a = "qwerty";
410 {
411 use locale;
412 print "not " if $a cmp "qwerty";
413 }
414}
415print "ok 102\n";
416
417# This test must be the last one because its failure is not fatal.
8ebc5c01 418# The @Locale should be internally consistent.
90248788 419# Thanks to Hallvard Furuseth <h.b.furuseth@usit.uio.no>
420# for inventing a way to test for ordering consistency
421# without requiring any particular order.
422# ++$jhi;#@iki.fi
8ebc5c01 423
fb73857a 424print "# testing 103\n";
8ebc5c01 425{
90248788 426 my ($from, $to, $lesser, $greater, @test, %test, $test, $yes, $no, $sign);
8ebc5c01 427
428 for (0..9) {
429 # Select a slice.
430 $from = int(($_*@Locale)/10);
431 $to = $from + int(@Locale/10);
432 $to = $#Locale if ($to > $#Locale);
433 $lesser = join('', @Locale[$from..$to]);
434 # Select a slice one character on.
435 $from++; $to++;
436 $to = $#Locale if ($to > $#Locale);
437 $greater = join('', @Locale[$from..$to]);
90248788 438 ($yes, $no, $sign) = ($lesser lt $greater
439 ? (" ", "not ", 1)
440 : ("not ", " ", -1));
441 # all these tests should FAIL (return 0).
774d564b 442 @test =
443 (
90248788 444 $no.' ($lesser lt $greater)', # 0
445 $no.' ($lesser le $greater)', # 1
fb73857a 446 'not ($lesser ne $greater)', # 2
447 ' ($lesser eq $greater)', # 3
90248788 448 $yes.' ($lesser ge $greater)', # 4
449 $yes.' ($lesser gt $greater)', # 5
450 $yes.' ($greater lt $lesser )', # 6
451 $yes.' ($greater le $lesser )', # 7
fb73857a 452 'not ($greater ne $lesser )', # 8
453 ' ($greater eq $lesser )', # 9
90248788 454 $no.' ($greater ge $lesser )', # 10
455 $no.' ($greater gt $lesser )', # 11
456 'not (($lesser cmp $greater) == -$sign)' # 12
774d564b 457 );
458 @test{@test} = 0 x @test;
459 $test = 0;
460 for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} }
461 if ($test) {
fb73857a 462 print "# failed 103 at:\n";
774d564b 463 print "# lesser = '$lesser'\n";
464 print "# greater = '$greater'\n";
90248788 465 print "# lesser cmp greater = ", $lesser cmp $greater, "\n";
466 print "# greater cmp lesser = ", $greater cmp $lesser, "\n";
774d564b 467 print "# (greater) from = $from, to = $to\n";
468 for my $ti (@test) {
469 printf("# %-40s %-4s", $ti,
470 $test{$ti} ? 'FAIL' : 'ok');
471 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
472 printf("(%s == %4d)", $1, eval $1);
473 }
474 print "\n";
475 }
476
fb73857a 477 warn "The locale definition on your system may have errors.\n";
8ebc5c01 478 last;
479 }
480 }
481}
90248788 482
483# eof