Remove bad advice from perllocale.pod
[p5sagit/p5-mst-13.2.git] / t / pragma / locale.t
CommitLineData
8ebc5c01 1#!./perl -wT
2
3BEGIN {
4 chdir 't' if -d 't';
5 @INC = '../lib';
6}
7
8use strict;
9
10my $have_setlocale = 0;
11eval {
12 require POSIX;
13 import POSIX ':locale_h';
14 $have_setlocale++;
15};
16
17print "1..", ($have_setlocale ? 104 : 98), "\n";
18
19use vars qw($a
20 $English $German $French $Spanish
21 @C @English @German @French @Spanish
22 $Locale @Locale %iLocale %UPPER %lower @Neoalpha);
23
24$a = 'abc %';
25
26sub ok {
27 my ($n, $result) = @_;
28
29 print 'not ' unless ($result);
30 print "ok $n\n";
31}
32
33# First we'll do a lot of taint checking for locales.
34# This is the easiest to test, actually, as any locale,
35# even the default locale will taint under 'use locale'.
36
37sub is_tainted { # hello, camel two.
38 my $dummy;
39 not eval { $dummy = join("", @_), kill 0; 1 }
40}
41
42sub check_taint ($$) {
43 ok $_[0], is_tainted($_[1]);
44}
45
46sub check_taint_not ($$) {
47 ok $_[0], not is_tainted($_[1]);
48}
49
50use locale; # engage locale and therefore locale taint.
51
52check_taint_not 1, $a;
53
54check_taint 2, uc($a);
55check_taint 3, "\U$a";
56check_taint 4, ucfirst($a);
57check_taint 5, "\u$a";
58check_taint 6, lc($a);
59check_taint 7, "\L$a";
60check_taint 8, lcfirst($a);
61check_taint 9, "\l$a";
62
63check_taint 10, sprintf('%e', 123.456);
64check_taint 11, sprintf('%f', 123.456);
65check_taint 12, sprintf('%g', 123.456);
66check_taint_not 13, sprintf('%d', 123.456);
67check_taint_not 14, sprintf('%x', 123.456);
68
69$_ = $a; # untaint $_
70
71$_ = uc($a); # taint $_
72
73check_taint 15, $_;
74
75/(\w)/; # taint $&, $`, $', $+, $1.
76check_taint 16, $&;
77check_taint 17, $`;
78check_taint 18, $';
79check_taint 19, $+;
80check_taint 20, $1;
81check_taint_not 21, $2;
82
83/(.)/; # untaint $&, $`, $', $+, $1.
84check_taint_not 22, $&;
85check_taint_not 23, $`;
86check_taint_not 24, $';
87check_taint_not 25, $+;
88check_taint_not 26, $1;
89check_taint_not 27, $2;
90
91/(\W)/; # taint $&, $`, $', $+, $1.
92check_taint 28, $&;
93check_taint 29, $`;
94check_taint 30, $';
95check_taint 31, $+;
96check_taint 32, $1;
97check_taint_not 33, $2;
98
99/(\s)/; # taint $&, $`, $', $+, $1.
100check_taint 34, $&;
101check_taint 35, $`;
102check_taint 36, $';
103check_taint 37, $+;
104check_taint 38, $1;
105check_taint_not 39, $2;
106
107/(\S)/; # taint $&, $`, $', $+, $1.
108check_taint 40, $&;
109check_taint 41, $`;
110check_taint 42, $';
111check_taint 43, $+;
112check_taint 44, $1;
113check_taint_not 45, $2;
114
115$_ = $a; # untaint $_
116
117check_taint_not 46, $_;
118
119/(b)/; # this must not taint
120check_taint_not 47, $&;
121check_taint_not 48, $`;
122check_taint_not 49, $';
123check_taint_not 50, $+;
124check_taint_not 51, $1;
125check_taint_not 52, $2;
126
127$_ = $a; # untaint $_
128
129check_taint_not 53, $_;
130
131$b = uc($a); # taint $b
132s/(.+)/$b/; # this must taint only the $_
133
134check_taint 54, $_;
135check_taint_not 55, $&;
136check_taint_not 56, $`;
137check_taint_not 57, $';
138check_taint_not 58, $+;
139check_taint_not 59, $1;
140check_taint_not 60, $2;
141
142$_ = $a; # untaint $_
143
144s/(.+)/b/; # this must not taint
145check_taint_not 61, $_;
146check_taint_not 62, $&;
147check_taint_not 63, $`;
148check_taint_not 64, $';
149check_taint_not 65, $+;
150check_taint_not 66, $1;
151check_taint_not 67, $2;
152
153$b = $a; # untaint $b
154
155($b = $a) =~ s/\w/$&/;
156check_taint 68, $b; # $b should be tainted.
157check_taint_not 69, $a; # $a should be not.
158
159$_ = $a; # untaint $_
160
161s/(\w)/\l$1/; # this must taint
162check_taint 70, $_;
163check_taint 71, $&;
164check_taint 72, $`;
165check_taint 73, $';
166check_taint 74, $+;
167check_taint 75, $1;
168check_taint_not 76, $2;
169
170$_ = $a; # untaint $_
171
172s/(\w)/\L$1/; # this must taint
173check_taint 77, $_;
174check_taint 78, $&;
175check_taint 79, $`;
176check_taint 80, $';
177check_taint 81, $+;
178check_taint 82, $1;
179check_taint_not 83, $2;
180
181$_ = $a; # untaint $_
182
183s/(\w)/\u$1/; # this must taint
184check_taint 84, $_;
185check_taint 85, $&;
186check_taint 86, $`;
187check_taint 87, $';
188check_taint 88, $+;
189check_taint 89, $1;
190check_taint_not 90, $2;
191
192$_ = $a; # untaint $_
193
194s/(\w)/\U$1/; # this must taint
195check_taint 91, $_;
196check_taint 92, $&;
197check_taint 93, $`;
198check_taint 94, $';
199check_taint 95, $+;
200check_taint 96, $1;
201check_taint_not 97, $2;
202
203# After all this tainting $a should be cool.
204
205check_taint_not 98, $a;
206
207# I think we've seen quite enough of taint.
208# Let us do some *real* locale work now,
209# unless setlocale() is missing (i.e. minitest).
210
211exit unless $have_setlocale;
212
213sub getalnum {
214 sort grep /\w/, map { chr } 0..255
215}
216
217sub locatelocale ($$@) {
218 my ($lcall, $alnum, @try) = @_;
219
220 undef $$lcall;
221
222 for (@try) {
223 local $^W = 0; # suppress "Subroutine LC_ALL redefined"
224 if (setlocale(&LC_ALL, $_)) {
225 $$lcall = $_;
226 @$alnum = &getalnum;
227 last;
228 }
229 }
230
231 @$alnum = () unless (defined $$lcall);
232}
233
234# Find some default locale
235
236locatelocale(\$Locale, \@Locale, qw(C POSIX));
237
238# Find some English locale
239
240locatelocale(\$English, \@English,
241 qw(en_US.ISO8859-1 en_GB.ISO8859-1
242 en en_US en_UK en_IE en_CA en_AU en_NZ
243 english english.iso88591
244 american american.iso88591
245 british british.iso88591
246 ));
247
248# Find some German locale
249
250locatelocale(\$German, \@German,
251 qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1
252 de de_DE de_AT de_CH
253 german german.iso88591));
254
255# Find some French locale
256
257locatelocale(\$French, \@French,
258 qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1
259 fr fr_FR fr_BE fr_CA fr_CH
260 french french.iso88591));
261
262# Find some Spanish locale
263
264locatelocale(\$Spanish, \@Spanish,
265 qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1
266 es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1
267 es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1
268 es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1
269 es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1
270 es es_AR es_BO es_CL
271 es_CO es_CR es_EC
272 es_ES es_GT es_MX
273 es_NI es_PA es_PE
274 es_PY es_SV es_UY es_VE
275 spanish spanish.iso88591));
276
277# Select the largest of the alpha(num)bets.
278
279($Locale, @Locale) = ($English, @English)
280 if (length(@English) > length(@Locale));
281($Locale, @Locale) = ($German, @German)
282 if (length(@German) > length(@Locale));
283($Locale, @Locale) = ($French, @French)
284 if (length(@French) > length(@Locale));
285($Locale, @Locale) = ($Spanish, @Spanish)
286 if (length(@Spanish) > length(@Locale));
287
288print "# Locale = $Locale\n";
289print "# Alnum_ = @Locale\n";
290
291{
292 local $^W = 0;
293 setlocale(&LC_ALL, $Locale);
294}
295
296{
297 my $i = 0;
298
299 for (@Locale) {
300 $iLocale{$_} = $i++;
301 }
302}
303
304# Sieve the uppercase and the lowercase.
305
306for (@Locale) {
307 if (/[^\d_]/) { # skip digits and the _
308 if (lc eq $_) {
309 $UPPER{$_} = uc;
310 } else {
311 $lower{$_} = lc;
312 }
313 }
314}
315
316# Cross-check the upper and the lower.
317# Yes, this is broken when the upper<->lower changes the number of
318# the glyphs (e.g. the German sharp-s aka double-s aka sz-ligature,
319# or the Dutch IJ or the Spanish LL or ...)
320# But so far all the implementations do this wrong so we can do it wrong too.
321
322for (keys %UPPER) {
323 if (defined $lower{$UPPER{$_}}) {
324 if ($_ ne $lower{$UPPER{$_}}) {
325 print 'not ';
326 last;
327 }
328 }
329}
330print "ok 99\n";
331
332for (keys %lower) {
333 if (defined $UPPER{$lower{$_}}) {
334 if ($_ ne $UPPER{$lower{$_}}) {
335 print 'not ';
336 last;
337 }
338 }
339}
340print "ok 100\n";
341
342# Find the alphabets that are not alphabets in the default locale.
343
344{
345 no locale;
346
347 for (keys %UPPER, keys %lower) {
348 push(@Neoalpha, $_) if (/\W/);
349 }
350}
351
352@Neoalpha = sort @Neoalpha;
353
354# Test \w.
355
356{
357 my $word = join('', @Neoalpha);
358
359 $word =~ /^(\w*)$/;
360
361 print 'not ' if ($1 ne $word);
362}
363print "ok 101\n";
364
365# Find places where the collation order differs from the default locale.
366
367{
368 my (@k, $i, $j, @d);
369
370 {
371 no locale;
372
373 @k = sort (keys %UPPER, keys %lower);
374 }
375
376 for ($i = 0; $i < @k; $i++) {
377 for ($j = $i + 1; $j < @k; $j++) {
378 if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) {
379 push(@d, [$k[$j], $k[$i]]);
380 }
381 }
382 }
383
384 # Cross-check those places.
385
386 for (@d) {
387 ($i, $j) = @$_;
388 if ($i gt $j) {
389 print "# i = $i, j = $j, i ",
390 $i le $j ? 'le' : 'gt', " j\n";
391 print 'not ';
392 last;
393 }
394 }
395}
396print "ok 102\n";
397
398# Cross-check whole character set.
399
400for (map { chr } 0..255) {
401 if (/\w/ and /\W/) { print 'not '; last }
402 if (/\d/ and /\D/) { print 'not '; last }
403 if (/\s/ and /\S/) { print 'not '; last }
404 if (/\w/ and /\D/ and not /_/ and
405 not (exists $UPPER{$_} or exists $lower{$_})) {
406 print 'not ';
407 last;
408 }
409}
410print "ok 103\n";
411
412# The @Locale should be internally consistent.
413
414{
415 my ($from, $to, , $lesser, $greater);
416
417 for (0..9) {
418 # Select a slice.
419 $from = int(($_*@Locale)/10);
420 $to = $from + int(@Locale/10);
421 $to = $#Locale if ($to > $#Locale);
422 $lesser = join('', @Locale[$from..$to]);
423 # Select a slice one character on.
424 $from++; $to++;
425 $to = $#Locale if ($to > $#Locale);
426 $greater = join('', @Locale[$from..$to]);
427 if (not ($lesser lt $greater) or
428 not ($lesser le $greater) or
429 not ($lesser ne $greater) or
430 ($lesser eq $greater) or
431 ($lesser ge $greater) or
432 ($lesser gt $greater) or
433 ($greater lt $lesser ) or
434 ($greater le $lesser ) or
435 not ($greater ne $lesser ) or
436 ($greater eq $lesser ) or
437 not ($greater ge $lesser ) or
438 not ($greater gt $lesser ) or
439 # Well, these two are sort of redundant because @Locale
440 # was derived using cmp.
441 not (($lesser cmp $greater) == -1) or
442 not (($greater cmp $lesser ) == 1)
443 ) {
444 print 'not ';
445 last;
446 }
447 }
448}
449print "ok 104\n";