(?>) broken in RE
[p5sagit/p5-mst-13.2.git] / t / pragma / locale.t
CommitLineData
8ebc5c01 1#!./perl -wT
2
3BEGIN {
4 chdir 't' if -d 't';
5 @INC = '../lib';
b002077a 6 require Config; import Config;
97a0514d 7 if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
b002077a 8 print "1..0\n";
9 exit;
10 }
8ebc5c01 11}
12
13use strict;
14
15my $have_setlocale = 0;
16eval {
17 require POSIX;
18 import POSIX ':locale_h';
19 $have_setlocale++;
20};
21
6dead956 22# Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1"
f6c6487a 23# and mingw32 uses said silly CRT
24$have_setlocale = 0 if $^O eq 'MSWin32' && $Config{cc} =~ /^(cl|gcc)/i;
6dead956 25
8ada0baa 26# 103 (the last test) may fail but that is okay.
27# (It indicates something broken in the environment, not Perl)
28# Therefore .. only until 102, not 103.
9fc9f3bf 29print "1..", ($have_setlocale ? 102 : 98), "\n";
8ebc5c01 30
31use vars qw($a
32 $English $German $French $Spanish
33 @C @English @German @French @Spanish
34 $Locale @Locale %iLocale %UPPER %lower @Neoalpha);
35
36$a = 'abc %';
37
38sub ok {
39 my ($n, $result) = @_;
40
41 print 'not ' unless ($result);
42 print "ok $n\n";
43}
44
45# First we'll do a lot of taint checking for locales.
46# This is the easiest to test, actually, as any locale,
47# even the default locale will taint under 'use locale'.
48
49sub is_tainted { # hello, camel two.
3fe9a6f1 50 local $^W; # no warnings 'undef'
8ebc5c01 51 my $dummy;
52 not eval { $dummy = join("", @_), kill 0; 1 }
53}
54
55sub check_taint ($$) {
56 ok $_[0], is_tainted($_[1]);
57}
58
59sub check_taint_not ($$) {
60 ok $_[0], not is_tainted($_[1]);
61}
62
63use locale; # engage locale and therefore locale taint.
64
65check_taint_not 1, $a;
66
67check_taint 2, uc($a);
68check_taint 3, "\U$a";
69check_taint 4, ucfirst($a);
70check_taint 5, "\u$a";
71check_taint 6, lc($a);
72check_taint 7, "\L$a";
73check_taint 8, lcfirst($a);
74check_taint 9, "\l$a";
75
76check_taint 10, sprintf('%e', 123.456);
77check_taint 11, sprintf('%f', 123.456);
78check_taint 12, sprintf('%g', 123.456);
79check_taint_not 13, sprintf('%d', 123.456);
80check_taint_not 14, sprintf('%x', 123.456);
81
82$_ = $a; # untaint $_
83
84$_ = uc($a); # taint $_
85
86check_taint 15, $_;
87
88/(\w)/; # taint $&, $`, $', $+, $1.
89check_taint 16, $&;
90check_taint 17, $`;
91check_taint 18, $';
92check_taint 19, $+;
93check_taint 20, $1;
94check_taint_not 21, $2;
95
96/(.)/; # untaint $&, $`, $', $+, $1.
97check_taint_not 22, $&;
98check_taint_not 23, $`;
99check_taint_not 24, $';
100check_taint_not 25, $+;
101check_taint_not 26, $1;
102check_taint_not 27, $2;
103
104/(\W)/; # taint $&, $`, $', $+, $1.
105check_taint 28, $&;
106check_taint 29, $`;
107check_taint 30, $';
108check_taint 31, $+;
109check_taint 32, $1;
110check_taint_not 33, $2;
111
112/(\s)/; # taint $&, $`, $', $+, $1.
113check_taint 34, $&;
114check_taint 35, $`;
115check_taint 36, $';
116check_taint 37, $+;
117check_taint 38, $1;
118check_taint_not 39, $2;
119
120/(\S)/; # taint $&, $`, $', $+, $1.
121check_taint 40, $&;
122check_taint 41, $`;
123check_taint 42, $';
124check_taint 43, $+;
125check_taint 44, $1;
126check_taint_not 45, $2;
127
128$_ = $a; # untaint $_
129
130check_taint_not 46, $_;
131
132/(b)/; # this must not taint
133check_taint_not 47, $&;
134check_taint_not 48, $`;
135check_taint_not 49, $';
136check_taint_not 50, $+;
137check_taint_not 51, $1;
138check_taint_not 52, $2;
139
140$_ = $a; # untaint $_
141
142check_taint_not 53, $_;
143
144$b = uc($a); # taint $b
145s/(.+)/$b/; # this must taint only the $_
146
147check_taint 54, $_;
148check_taint_not 55, $&;
149check_taint_not 56, $`;
150check_taint_not 57, $';
151check_taint_not 58, $+;
152check_taint_not 59, $1;
153check_taint_not 60, $2;
154
155$_ = $a; # untaint $_
156
157s/(.+)/b/; # this must not taint
158check_taint_not 61, $_;
159check_taint_not 62, $&;
160check_taint_not 63, $`;
161check_taint_not 64, $';
162check_taint_not 65, $+;
163check_taint_not 66, $1;
164check_taint_not 67, $2;
165
166$b = $a; # untaint $b
167
168($b = $a) =~ s/\w/$&/;
169check_taint 68, $b; # $b should be tainted.
170check_taint_not 69, $a; # $a should be not.
171
172$_ = $a; # untaint $_
173
174s/(\w)/\l$1/; # this must taint
175check_taint 70, $_;
176check_taint 71, $&;
177check_taint 72, $`;
178check_taint 73, $';
179check_taint 74, $+;
180check_taint 75, $1;
181check_taint_not 76, $2;
182
183$_ = $a; # untaint $_
184
185s/(\w)/\L$1/; # this must taint
186check_taint 77, $_;
187check_taint 78, $&;
188check_taint 79, $`;
189check_taint 80, $';
190check_taint 81, $+;
191check_taint 82, $1;
192check_taint_not 83, $2;
193
194$_ = $a; # untaint $_
195
196s/(\w)/\u$1/; # this must taint
197check_taint 84, $_;
198check_taint 85, $&;
199check_taint 86, $`;
200check_taint 87, $';
201check_taint 88, $+;
202check_taint 89, $1;
203check_taint_not 90, $2;
204
205$_ = $a; # untaint $_
206
207s/(\w)/\U$1/; # this must taint
208check_taint 91, $_;
209check_taint 92, $&;
210check_taint 93, $`;
211check_taint 94, $';
212check_taint 95, $+;
213check_taint 96, $1;
214check_taint_not 97, $2;
215
216# After all this tainting $a should be cool.
217
218check_taint_not 98, $a;
219
220# I think we've seen quite enough of taint.
221# Let us do some *real* locale work now,
222# unless setlocale() is missing (i.e. minitest).
223
224exit unless $have_setlocale;
225
226sub getalnum {
227 sort grep /\w/, map { chr } 0..255
228}
229
230sub locatelocale ($$@) {
231 my ($lcall, $alnum, @try) = @_;
232
233 undef $$lcall;
234
235 for (@try) {
236 local $^W = 0; # suppress "Subroutine LC_ALL redefined"
237 if (setlocale(&LC_ALL, $_)) {
238 $$lcall = $_;
239 @$alnum = &getalnum;
240 last;
241 }
242 }
243
244 @$alnum = () unless (defined $$lcall);
245}
246
247# Find some default locale
248
249locatelocale(\$Locale, \@Locale, qw(C POSIX));
250
251# Find some English locale
252
253locatelocale(\$English, \@English,
254 qw(en_US.ISO8859-1 en_GB.ISO8859-1
255 en en_US en_UK en_IE en_CA en_AU en_NZ
256 english english.iso88591
257 american american.iso88591
258 british british.iso88591
259 ));
260
261# Find some German locale
262
263locatelocale(\$German, \@German,
264 qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1
265 de de_DE de_AT de_CH
266 german german.iso88591));
267
268# Find some French locale
269
270locatelocale(\$French, \@French,
271 qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1
272 fr fr_FR fr_BE fr_CA fr_CH
273 french french.iso88591));
274
275# Find some Spanish locale
276
277locatelocale(\$Spanish, \@Spanish,
278 qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1
279 es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1
280 es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1
281 es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1
282 es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1
283 es es_AR es_BO es_CL
284 es_CO es_CR es_EC
285 es_ES es_GT es_MX
286 es_NI es_PA es_PE
287 es_PY es_SV es_UY es_VE
288 spanish spanish.iso88591));
289
290# Select the largest of the alpha(num)bets.
291
292($Locale, @Locale) = ($English, @English)
545c8fcc 293 if (@English > @Locale);
8ebc5c01 294($Locale, @Locale) = ($German, @German)
545c8fcc 295 if (@German > @Locale);
8ebc5c01 296($Locale, @Locale) = ($French, @French)
545c8fcc 297 if (@French > @Locale);
8ebc5c01 298($Locale, @Locale) = ($Spanish, @Spanish)
545c8fcc 299 if (@Spanish > @Locale);
8ebc5c01 300
8ebc5c01 301{
302 local $^W = 0;
303 setlocale(&LC_ALL, $Locale);
304}
305
4599a1de 306# Sort it now that LC_ALL has been set.
307
308@Locale = sort @Locale;
309
310print "# Locale = $Locale\n";
311print "# Alnum_ = @Locale\n";
312
8ebc5c01 313{
314 my $i = 0;
315
316 for (@Locale) {
317 $iLocale{$_} = $i++;
318 }
319}
320
321# Sieve the uppercase and the lowercase.
322
323for (@Locale) {
324 if (/[^\d_]/) { # skip digits and the _
325 if (lc eq $_) {
326 $UPPER{$_} = uc;
327 } else {
328 $lower{$_} = lc;
329 }
330 }
331}
332
8ebc5c01 333# Find the alphabets that are not alphabets in the default locale.
334
335{
336 no locale;
337
338 for (keys %UPPER, keys %lower) {
339 push(@Neoalpha, $_) if (/\W/);
340 }
341}
342
343@Neoalpha = sort @Neoalpha;
344
345# Test \w.
346
347{
348 my $word = join('', @Neoalpha);
349
350 $word =~ /^(\w*)$/;
351
352 print 'not ' if ($1 ne $word);
353}
9fc9f3bf 354print "ok 99\n";
8ebc5c01 355
356# Find places where the collation order differs from the default locale.
357
9fc9f3bf 358print "# testing 100\n";
8ebc5c01 359{
360 my (@k, $i, $j, @d);
361
362 {
363 no locale;
364
365 @k = sort (keys %UPPER, keys %lower);
366 }
367
368 for ($i = 0; $i < @k; $i++) {
369 for ($j = $i + 1; $j < @k; $j++) {
370 if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) {
371 push(@d, [$k[$j], $k[$i]]);
372 }
373 }
374 }
375
376 # Cross-check those places.
377
378 for (@d) {
379 ($i, $j) = @$_;
380 if ($i gt $j) {
9fc9f3bf 381 print "# failed 100 at:\n";
8ebc5c01 382 print "# i = $i, j = $j, i ",
383 $i le $j ? 'le' : 'gt', " j\n";
384 print 'not ';
385 last;
386 }
387 }
388}
9fc9f3bf 389print "ok 100\n";
8ebc5c01 390
391# Cross-check whole character set.
392
9fc9f3bf 393print "# testing 101\n";
8ebc5c01 394for (map { chr } 0..255) {
395 if (/\w/ and /\W/) { print 'not '; last }
396 if (/\d/ and /\D/) { print 'not '; last }
397 if (/\s/ and /\S/) { print 'not '; last }
398 if (/\w/ and /\D/ and not /_/ and
399 not (exists $UPPER{$_} or exists $lower{$_})) {
9fc9f3bf 400 print "# failed 101 at:\n";
774d564b 401 print "# ", ord($_), " '$_'\n";
8ebc5c01 402 print 'not ';
403 last;
404 }
405}
9fc9f3bf 406print "ok 101\n";
8ebc5c01 407
fb73857a 408# Test for read-onlys.
409
8ada0baa 410print "# testing 102\n";
fb73857a 411{
412 no locale;
413 $a = "qwerty";
414 {
415 use locale;
416 print "not " if $a cmp "qwerty";
417 }
418}
419print "ok 102\n";
420
421# This test must be the last one because its failure is not fatal.
8ebc5c01 422# The @Locale should be internally consistent.
90248788 423# Thanks to Hallvard Furuseth <h.b.furuseth@usit.uio.no>
424# for inventing a way to test for ordering consistency
425# without requiring any particular order.
8ada0baa 426# <jhi@iki.fi>
8ebc5c01 427
fb73857a 428print "# testing 103\n";
8ebc5c01 429{
90248788 430 my ($from, $to, $lesser, $greater, @test, %test, $test, $yes, $no, $sign);
8ebc5c01 431
432 for (0..9) {
433 # Select a slice.
434 $from = int(($_*@Locale)/10);
435 $to = $from + int(@Locale/10);
436 $to = $#Locale if ($to > $#Locale);
437 $lesser = join('', @Locale[$from..$to]);
438 # Select a slice one character on.
439 $from++; $to++;
440 $to = $#Locale if ($to > $#Locale);
441 $greater = join('', @Locale[$from..$to]);
90248788 442 ($yes, $no, $sign) = ($lesser lt $greater
443 ? (" ", "not ", 1)
444 : ("not ", " ", -1));
445 # all these tests should FAIL (return 0).
774d564b 446 @test =
447 (
90248788 448 $no.' ($lesser lt $greater)', # 0
449 $no.' ($lesser le $greater)', # 1
fb73857a 450 'not ($lesser ne $greater)', # 2
451 ' ($lesser eq $greater)', # 3
90248788 452 $yes.' ($lesser ge $greater)', # 4
453 $yes.' ($lesser gt $greater)', # 5
454 $yes.' ($greater lt $lesser )', # 6
455 $yes.' ($greater le $lesser )', # 7
fb73857a 456 'not ($greater ne $lesser )', # 8
457 ' ($greater eq $lesser )', # 9
90248788 458 $no.' ($greater ge $lesser )', # 10
459 $no.' ($greater gt $lesser )', # 11
460 'not (($lesser cmp $greater) == -$sign)' # 12
774d564b 461 );
462 @test{@test} = 0 x @test;
463 $test = 0;
464 for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} }
465 if ($test) {
fb73857a 466 print "# failed 103 at:\n";
774d564b 467 print "# lesser = '$lesser'\n";
468 print "# greater = '$greater'\n";
90248788 469 print "# lesser cmp greater = ", $lesser cmp $greater, "\n";
470 print "# greater cmp lesser = ", $greater cmp $lesser, "\n";
774d564b 471 print "# (greater) from = $from, to = $to\n";
472 for my $ti (@test) {
473 printf("# %-40s %-4s", $ti,
474 $test{$ti} ? 'FAIL' : 'ok');
475 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
476 printf("(%s == %4d)", $1, eval $1);
477 }
478 print "\n";
479 }
480
fb73857a 481 warn "The locale definition on your system may have errors.\n";
8ebc5c01 482 last;
483 }
484 }
485}
90248788 486
487# eof