Refresh Test::Harness to 1.15
[p5sagit/p5-mst-13.2.git] / t / pragma / locale.t
CommitLineData
8ebc5c01 1#!./perl -wT
2
3BEGIN {
4 chdir 't' if -d 't';
5 @INC = '../lib';
6}
7
8use strict;
9
10my $have_setlocale = 0;
11eval {
12 require POSIX;
13 import POSIX ':locale_h';
14 $have_setlocale++;
15};
16
17print "1..", ($have_setlocale ? 104 : 98), "\n";
18
19use vars qw($a
20 $English $German $French $Spanish
21 @C @English @German @French @Spanish
22 $Locale @Locale %iLocale %UPPER %lower @Neoalpha);
23
24$a = 'abc %';
25
26sub ok {
27 my ($n, $result) = @_;
28
29 print 'not ' unless ($result);
30 print "ok $n\n";
31}
32
33# First we'll do a lot of taint checking for locales.
34# This is the easiest to test, actually, as any locale,
35# even the default locale will taint under 'use locale'.
36
37sub is_tainted { # hello, camel two.
38 my $dummy;
39 not eval { $dummy = join("", @_), kill 0; 1 }
40}
41
42sub check_taint ($$) {
43 ok $_[0], is_tainted($_[1]);
44}
45
46sub check_taint_not ($$) {
47 ok $_[0], not is_tainted($_[1]);
48}
49
50use locale; # engage locale and therefore locale taint.
51
52check_taint_not 1, $a;
53
54check_taint 2, uc($a);
55check_taint 3, "\U$a";
56check_taint 4, ucfirst($a);
57check_taint 5, "\u$a";
58check_taint 6, lc($a);
59check_taint 7, "\L$a";
60check_taint 8, lcfirst($a);
61check_taint 9, "\l$a";
62
63check_taint 10, sprintf('%e', 123.456);
64check_taint 11, sprintf('%f', 123.456);
65check_taint 12, sprintf('%g', 123.456);
66check_taint_not 13, sprintf('%d', 123.456);
67check_taint_not 14, sprintf('%x', 123.456);
68
69$_ = $a; # untaint $_
70
71$_ = uc($a); # taint $_
72
73check_taint 15, $_;
74
75/(\w)/; # taint $&, $`, $', $+, $1.
76check_taint 16, $&;
77check_taint 17, $`;
78check_taint 18, $';
79check_taint 19, $+;
80check_taint 20, $1;
81check_taint_not 21, $2;
82
83/(.)/; # untaint $&, $`, $', $+, $1.
84check_taint_not 22, $&;
85check_taint_not 23, $`;
86check_taint_not 24, $';
87check_taint_not 25, $+;
88check_taint_not 26, $1;
89check_taint_not 27, $2;
90
91/(\W)/; # taint $&, $`, $', $+, $1.
92check_taint 28, $&;
93check_taint 29, $`;
94check_taint 30, $';
95check_taint 31, $+;
96check_taint 32, $1;
97check_taint_not 33, $2;
98
99/(\s)/; # taint $&, $`, $', $+, $1.
100check_taint 34, $&;
101check_taint 35, $`;
102check_taint 36, $';
103check_taint 37, $+;
104check_taint 38, $1;
105check_taint_not 39, $2;
106
107/(\S)/; # taint $&, $`, $', $+, $1.
108check_taint 40, $&;
109check_taint 41, $`;
110check_taint 42, $';
111check_taint 43, $+;
112check_taint 44, $1;
113check_taint_not 45, $2;
114
115$_ = $a; # untaint $_
116
117check_taint_not 46, $_;
118
119/(b)/; # this must not taint
120check_taint_not 47, $&;
121check_taint_not 48, $`;
122check_taint_not 49, $';
123check_taint_not 50, $+;
124check_taint_not 51, $1;
125check_taint_not 52, $2;
126
127$_ = $a; # untaint $_
128
129check_taint_not 53, $_;
130
131$b = uc($a); # taint $b
132s/(.+)/$b/; # this must taint only the $_
133
134check_taint 54, $_;
135check_taint_not 55, $&;
136check_taint_not 56, $`;
137check_taint_not 57, $';
138check_taint_not 58, $+;
139check_taint_not 59, $1;
140check_taint_not 60, $2;
141
142$_ = $a; # untaint $_
143
144s/(.+)/b/; # this must not taint
145check_taint_not 61, $_;
146check_taint_not 62, $&;
147check_taint_not 63, $`;
148check_taint_not 64, $';
149check_taint_not 65, $+;
150check_taint_not 66, $1;
151check_taint_not 67, $2;
152
153$b = $a; # untaint $b
154
155($b = $a) =~ s/\w/$&/;
156check_taint 68, $b; # $b should be tainted.
157check_taint_not 69, $a; # $a should be not.
158
159$_ = $a; # untaint $_
160
161s/(\w)/\l$1/; # this must taint
162check_taint 70, $_;
163check_taint 71, $&;
164check_taint 72, $`;
165check_taint 73, $';
166check_taint 74, $+;
167check_taint 75, $1;
168check_taint_not 76, $2;
169
170$_ = $a; # untaint $_
171
172s/(\w)/\L$1/; # this must taint
173check_taint 77, $_;
174check_taint 78, $&;
175check_taint 79, $`;
176check_taint 80, $';
177check_taint 81, $+;
178check_taint 82, $1;
179check_taint_not 83, $2;
180
181$_ = $a; # untaint $_
182
183s/(\w)/\u$1/; # this must taint
184check_taint 84, $_;
185check_taint 85, $&;
186check_taint 86, $`;
187check_taint 87, $';
188check_taint 88, $+;
189check_taint 89, $1;
190check_taint_not 90, $2;
191
192$_ = $a; # untaint $_
193
194s/(\w)/\U$1/; # this must taint
195check_taint 91, $_;
196check_taint 92, $&;
197check_taint 93, $`;
198check_taint 94, $';
199check_taint 95, $+;
200check_taint 96, $1;
201check_taint_not 97, $2;
202
203# After all this tainting $a should be cool.
204
205check_taint_not 98, $a;
206
207# I think we've seen quite enough of taint.
208# Let us do some *real* locale work now,
209# unless setlocale() is missing (i.e. minitest).
210
211exit unless $have_setlocale;
212
213sub getalnum {
214 sort grep /\w/, map { chr } 0..255
215}
216
217sub locatelocale ($$@) {
218 my ($lcall, $alnum, @try) = @_;
219
220 undef $$lcall;
221
222 for (@try) {
223 local $^W = 0; # suppress "Subroutine LC_ALL redefined"
224 if (setlocale(&LC_ALL, $_)) {
225 $$lcall = $_;
226 @$alnum = &getalnum;
227 last;
228 }
229 }
230
231 @$alnum = () unless (defined $$lcall);
232}
233
234# Find some default locale
235
236locatelocale(\$Locale, \@Locale, qw(C POSIX));
237
238# Find some English locale
239
240locatelocale(\$English, \@English,
241 qw(en_US.ISO8859-1 en_GB.ISO8859-1
242 en en_US en_UK en_IE en_CA en_AU en_NZ
243 english english.iso88591
244 american american.iso88591
245 british british.iso88591
246 ));
247
248# Find some German locale
249
250locatelocale(\$German, \@German,
251 qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1
252 de de_DE de_AT de_CH
253 german german.iso88591));
254
255# Find some French locale
256
257locatelocale(\$French, \@French,
258 qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1
259 fr fr_FR fr_BE fr_CA fr_CH
260 french french.iso88591));
261
262# Find some Spanish locale
263
264locatelocale(\$Spanish, \@Spanish,
265 qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1
266 es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1
267 es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1
268 es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1
269 es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1
270 es es_AR es_BO es_CL
271 es_CO es_CR es_EC
272 es_ES es_GT es_MX
273 es_NI es_PA es_PE
274 es_PY es_SV es_UY es_VE
275 spanish spanish.iso88591));
276
277# Select the largest of the alpha(num)bets.
278
279($Locale, @Locale) = ($English, @English)
280 if (length(@English) > length(@Locale));
281($Locale, @Locale) = ($German, @German)
282 if (length(@German) > length(@Locale));
283($Locale, @Locale) = ($French, @French)
284 if (length(@French) > length(@Locale));
285($Locale, @Locale) = ($Spanish, @Spanish)
286 if (length(@Spanish) > length(@Locale));
287
288print "# Locale = $Locale\n";
289print "# Alnum_ = @Locale\n";
290
291{
292 local $^W = 0;
293 setlocale(&LC_ALL, $Locale);
294}
295
296{
297 my $i = 0;
298
299 for (@Locale) {
300 $iLocale{$_} = $i++;
301 }
302}
303
304# Sieve the uppercase and the lowercase.
305
306for (@Locale) {
307 if (/[^\d_]/) { # skip digits and the _
308 if (lc eq $_) {
309 $UPPER{$_} = uc;
310 } else {
311 $lower{$_} = lc;
312 }
313 }
314}
315
316# Cross-check the upper and the lower.
317# Yes, this is broken when the upper<->lower changes the number of
318# the glyphs (e.g. the German sharp-s aka double-s aka sz-ligature,
319# or the Dutch IJ or the Spanish LL or ...)
320# But so far all the implementations do this wrong so we can do it wrong too.
321
322for (keys %UPPER) {
323 if (defined $lower{$UPPER{$_}}) {
324 if ($_ ne $lower{$UPPER{$_}}) {
325 print 'not ';
326 last;
327 }
328 }
329}
330print "ok 99\n";
331
332for (keys %lower) {
333 if (defined $UPPER{$lower{$_}}) {
334 if ($_ ne $UPPER{$lower{$_}}) {
335 print 'not ';
336 last;
337 }
338 }
339}
340print "ok 100\n";
341
342# Find the alphabets that are not alphabets in the default locale.
343
344{
345 no locale;
346
347 for (keys %UPPER, keys %lower) {
348 push(@Neoalpha, $_) if (/\W/);
349 }
350}
351
352@Neoalpha = sort @Neoalpha;
353
354# Test \w.
355
356{
357 my $word = join('', @Neoalpha);
358
359 $word =~ /^(\w*)$/;
360
361 print 'not ' if ($1 ne $word);
362}
363print "ok 101\n";
364
365# Find places where the collation order differs from the default locale.
366
774d564b 367print "# testing 102\n";
8ebc5c01 368{
369 my (@k, $i, $j, @d);
370
371 {
372 no locale;
373
374 @k = sort (keys %UPPER, keys %lower);
375 }
376
377 for ($i = 0; $i < @k; $i++) {
378 for ($j = $i + 1; $j < @k; $j++) {
379 if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) {
380 push(@d, [$k[$j], $k[$i]]);
381 }
382 }
383 }
384
385 # Cross-check those places.
386
387 for (@d) {
388 ($i, $j) = @$_;
389 if ($i gt $j) {
774d564b 390 print "# failed 102 at:\n";
8ebc5c01 391 print "# i = $i, j = $j, i ",
392 $i le $j ? 'le' : 'gt', " j\n";
393 print 'not ';
394 last;
395 }
396 }
397}
398print "ok 102\n";
399
400# Cross-check whole character set.
401
774d564b 402print "# testing 103\n";
8ebc5c01 403for (map { chr } 0..255) {
404 if (/\w/ and /\W/) { print 'not '; last }
405 if (/\d/ and /\D/) { print 'not '; last }
406 if (/\s/ and /\S/) { print 'not '; last }
407 if (/\w/ and /\D/ and not /_/ and
408 not (exists $UPPER{$_} or exists $lower{$_})) {
774d564b 409 print "# failed 103 at:\n";
410 print "# ", ord($_), " '$_'\n";
8ebc5c01 411 print 'not ';
412 last;
413 }
414}
415print "ok 103\n";
416
417# The @Locale should be internally consistent.
418
774d564b 419print "# testing 104\n";
8ebc5c01 420{
774d564b 421 my ($from, $to, $lesser, $greater, @test, %test, $test);
8ebc5c01 422
423 for (0..9) {
424 # Select a slice.
425 $from = int(($_*@Locale)/10);
426 $to = $from + int(@Locale/10);
427 $to = $#Locale if ($to > $#Locale);
428 $lesser = join('', @Locale[$from..$to]);
429 # Select a slice one character on.
430 $from++; $to++;
431 $to = $#Locale if ($to > $#Locale);
432 $greater = join('', @Locale[$from..$to]);
774d564b 433 @test =
434 (
435 'not ($lesser lt $greater)', # 0
436 'not ($lesser le $greater)', # 1
437 'not ($lesser ne $greater)', # 2
438 ' ($lesser eq $greater)', # 3
439 ' ($lesser ge $greater)', # 4
440 ' ($lesser gt $greater)', # 5
441 ' ($greater lt $lesser )', # 6
442 ' ($greater le $lesser )', # 7
443 'not ($greater ne $lesser )', # 8
444 ' ($greater eq $lesser )', # 9
445 'not ($greater ge $lesser )', # 10
446 'not ($greater gt $lesser )', # 11
447 # Well, these two are sort of redundant
448 # because @Locale was derived using cmp.
449 'not (($lesser cmp $greater) == -1)', # 12
450 'not (($greater cmp $lesser ) == 1)' # 13
451 );
452 @test{@test} = 0 x @test;
453 $test = 0;
454 for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} }
455 if ($test) {
456 print "# failed 104 at:\n";
457 print "# lesser = '$lesser'\n";
458 print "# greater = '$greater'\n";
459 print "# (greater) from = $from, to = $to\n";
460 for my $ti (@test) {
461 printf("# %-40s %-4s", $ti,
462 $test{$ti} ? 'FAIL' : 'ok');
463 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
464 printf("(%s == %4d)", $1, eval $1);
465 }
466 print "\n";
467 }
468
8ebc5c01 469 print 'not ';
470 last;
471 }
472 }
473}
474print "ok 104\n";