t/pragma/locale.t

   1 #!./perl -wT
   2
   3 BEGIN {
   4     chdir 't' if -d 't';
   5     @INC = '../lib';
   6     require Config; import Config;
   7     if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
   8         print "1..0\n";
   9         exit;
  10     }
  11 }
  12
  13 use strict;
  14
  15 my $have_setlocale = 0;
  16 eval {
  17     require POSIX;
  18     import POSIX ':locale_h';
  19     $have_setlocale++;
  20 };
  21
  22 # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1"
  23 # and mingw32 uses said silly CRT
  24 $have_setlocale = 0 if $^O eq 'MSWin32' && $Config{cc} =~ /^(cl|gcc)/i;
  25
  26 print "1..", ($have_setlocale ? 102 : 98), "\n";
  27
  28 use vars qw($a
  29             $English $German $French $Spanish
  30             @C @English @German @French @Spanish
  31             $Locale @Locale %iLocale %UPPER %lower @Neoalpha);
  32
  33 $a = 'abc %';
  34
  35 sub ok {
  36     my ($n, $result) = @_;
  37
  38     print 'not ' unless ($result);
  39     print "ok $n\n";
  40 }
  41
  42 # First we'll do a lot of taint checking for locales.
  43 # This is the easiest to test, actually, as any locale,
  44 # even the default locale will taint under 'use locale'.
  45
  46 sub is_tainted { # hello, camel two.
  47     local $^W;  # no warnings 'undef'
  48     my $dummy;
  49     not eval { $dummy = join("", @_), kill 0; 1 }
  50 }
  51
  52 sub check_taint ($$) {
  53     ok $_[0], is_tainted($_[1]);
  54 }
  55
  56 sub check_taint_not ($$) {
  57     ok $_[0], not is_tainted($_[1]);
  58 }
  59
  60 use locale;     # engage locale and therefore locale taint.
  61
  62 check_taint_not   1, $a;
  63
  64 check_taint       2, uc($a);
  65 check_taint       3, "\U$a";
  66 check_taint       4, ucfirst($a);
  67 check_taint       5, "\u$a";
  68 check_taint       6, lc($a);
  69 check_taint       7, "\L$a";
  70 check_taint       8, lcfirst($a);
  71 check_taint       9, "\l$a";
  72
  73 check_taint      10, sprintf('%e', 123.456);
  74 check_taint      11, sprintf('%f', 123.456);
  75 check_taint      12, sprintf('%g', 123.456);
  76 check_taint_not  13, sprintf('%d', 123.456);
  77 check_taint_not  14, sprintf('%x', 123.456);
  78
  79 $_ = $a;        # untaint $_
  80
  81 $_ = uc($a);    # taint $_
  82
  83 check_taint      15, $_;
  84
  85 /(\w)/; # taint $&, $`, $', $+, $1.
  86 check_taint      16, $&;
  87 check_taint      17, $`;
  88 check_taint      18, $';
  89 check_taint      19, $+;
  90 check_taint      20, $1;
  91 check_taint_not  21, $2;
  92
  93 /(.)/;  # untaint $&, $`, $', $+, $1.
  94 check_taint_not  22, $&;
  95 check_taint_not  23, $`;
  96 check_taint_not  24, $';
  97 check_taint_not  25, $+;
  98 check_taint_not  26, $1;
  99 check_taint_not  27, $2;
 100
 101 /(\W)/; # taint $&, $`, $', $+, $1.
 102 check_taint      28, $&;
 103 check_taint      29, $`;
 104 check_taint      30, $';
 105 check_taint      31, $+;
 106 check_taint      32, $1;
 107 check_taint_not  33, $2;
 108
 109 /(\s)/; # taint $&, $`, $', $+, $1.
 110 check_taint      34, $&;
 111 check_taint      35, $`;
 112 check_taint      36, $';
 113 check_taint      37, $+;
 114 check_taint      38, $1;
 115 check_taint_not  39, $2;
 116
 117 /(\S)/; # taint $&, $`, $', $+, $1.
 118 check_taint      40, $&;
 119 check_taint      41, $`;
 120 check_taint      42, $';
 121 check_taint      43, $+;
 122 check_taint      44, $1;
 123 check_taint_not  45, $2;
 124
 125 $_ = $a;        # untaint $_
 126
 127 check_taint_not  46, $_;
 128
 129 /(b)/;          # this must not taint
 130 check_taint_not  47, $&;
 131 check_taint_not  48, $`;
 132 check_taint_not  49, $';
 133 check_taint_not  50, $+;
 134 check_taint_not  51, $1;
 135 check_taint_not  52, $2;
 136
 137 $_ = $a;        # untaint $_
 138
 139 check_taint_not  53, $_;
 140
 141 $b = uc($a);    # taint $b
 142 s/(.+)/$b/;     # this must taint only the $_
 143
 144 check_taint      54, $_;
 145 check_taint_not  55, $&;
 146 check_taint_not  56, $`;
 147 check_taint_not  57, $';
 148 check_taint_not  58, $+;
 149 check_taint_not  59, $1;
 150 check_taint_not  60, $2;
 151
 152 $_ = $a;        # untaint $_
 153
 154 s/(.+)/b/;      # this must not taint
 155 check_taint_not  61, $_;
 156 check_taint_not  62, $&;
 157 check_taint_not  63, $`;
 158 check_taint_not  64, $';
 159 check_taint_not  65, $+;
 160 check_taint_not  66, $1;
 161 check_taint_not  67, $2;
 162
 163 $b = $a;        # untaint $b
 164
 165 ($b = $a) =~ s/\w/$&/;
 166 check_taint      68, $b;        # $b should be tainted.
 167 check_taint_not  69, $a;        # $a should be not.
 168
 169 $_ = $a;        # untaint $_
 170
 171 s/(\w)/\l$1/;   # this must taint
 172 check_taint      70, $_;
 173 check_taint      71, $&;
 174 check_taint      72, $`;
 175 check_taint      73, $';
 176 check_taint      74, $+;
 177 check_taint      75, $1;
 178 check_taint_not  76, $2;
 179
 180 $_ = $a;        # untaint $_
 181
 182 s/(\w)/\L$1/;   # this must taint
 183 check_taint      77, $_;
 184 check_taint      78, $&;
 185 check_taint      79, $`;
 186 check_taint      80, $';
 187 check_taint      81, $+;
 188 check_taint      82, $1;
 189 check_taint_not  83, $2;
 190
 191 $_ = $a;        # untaint $_
 192
 193 s/(\w)/\u$1/;   # this must taint
 194 check_taint      84, $_;
 195 check_taint      85, $&;
 196 check_taint      86, $`;
 197 check_taint      87, $';
 198 check_taint      88, $+;
 199 check_taint      89, $1;
 200 check_taint_not  90, $2;
 201
 202 $_ = $a;        # untaint $_
 203
 204 s/(\w)/\U$1/;   # this must taint
 205 check_taint      91, $_;
 206 check_taint      92, $&;
 207 check_taint      93, $`;
 208 check_taint      94, $';
 209 check_taint      95, $+;
 210 check_taint      96, $1;
 211 check_taint_not  97, $2;
 212
 213 # After all this tainting $a should be cool.
 214
 215 check_taint_not  98, $a;
 216
 217 # I think we've seen quite enough of taint.
 218 # Let us do some *real* locale work now,
 219 #  unless setlocale() is missing (i.e. minitest).
 220
 221 exit unless $have_setlocale;
 222
 223 sub getalnum {
 224     sort grep /\w/, map { chr } 0..255
 225 }
 226
 227 sub locatelocale ($$@) {
 228     my ($lcall, $alnum, @try) = @_;
 229
 230     undef $$lcall;
 231
 232     for (@try) {
 233         local $^W = 0; # suppress "Subroutine LC_ALL redefined"
 234         if (setlocale(&LC_ALL, $_)) {
 235             $$lcall = $_;
 236             @$alnum = &getalnum;
 237             last;
 238         }
 239     }
 240
 241     @$alnum = () unless (defined $$lcall);
 242 }
 243
 244 # Find some default locale
 245
 246 locatelocale(\$Locale, \@Locale, qw(C POSIX));
 247
 248 # Find some English locale
 249
 250 locatelocale(\$English, \@English,
 251              qw(en_US.ISO8859-1 en_GB.ISO8859-1
 252                 en en_US en_UK en_IE en_CA en_AU en_NZ
 253                 english english.iso88591
 254                 american american.iso88591
 255                 british british.iso88591
 256                 ));
 257
 258 # Find some German locale
 259
 260 locatelocale(\$German, \@German,
 261              qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1
 262                 de de_DE de_AT de_CH
 263                 german german.iso88591));
 264
 265 # Find some French locale
 266
 267 locatelocale(\$French, \@French,
 268              qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1
 269                 fr fr_FR fr_BE fr_CA fr_CH
 270                 french french.iso88591));
 271
 272 # Find some Spanish locale
 273
 274 locatelocale(\$Spanish, \@Spanish,
 275              qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1
 276                 es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1
 277                 es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1
 278                 es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1
 279                 es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1
 280                 es es_AR es_BO es_CL
 281                 es_CO es_CR es_EC
 282                 es_ES es_GT es_MX
 283                 es_NI es_PA es_PE
 284                 es_PY es_SV es_UY es_VE
 285                 spanish spanish.iso88591));
 286
 287 # Select the largest of the alpha(num)bets.
 288
 289 ($Locale, @Locale) = ($English, @English)
 290     if (@English > @Locale);
 291 ($Locale, @Locale) = ($German, @German)
 292     if (@German  > @Locale);
 293 ($Locale, @Locale) = ($French, @French)
 294     if (@French  > @Locale);
 295 ($Locale, @Locale) = ($Spanish, @Spanish)
 296     if (@Spanish > @Locale);
 297
 298 {
 299     local $^W = 0;
 300     setlocale(&LC_ALL, $Locale);
 301 }
 302
 303 # Sort it now that LC_ALL has been set.
 304
 305 @Locale = sort @Locale;
 306
 307 print "# Locale = $Locale\n";
 308 print "# Alnum_ = @Locale\n";
 309
 310 {
 311     my $i = 0;
 312
 313     for (@Locale) {
 314         $iLocale{$_} = $i++;
 315     }
 316 }
 317
 318 # Sieve the uppercase and the lowercase.
 319
 320 for (@Locale) {
 321     if (/[^\d_]/) { # skip digits and the _
 322         if (lc eq $_) {
 323             $UPPER{$_} = uc;
 324         } else {
 325             $lower{$_} = lc;
 326         }
 327     }
 328 }
 329
 330 # Find the alphabets that are not alphabets in the default locale.
 331
 332 {
 333     no locale;
 334
 335     for (keys %UPPER, keys %lower) {
 336         push(@Neoalpha, $_) if (/\W/);
 337     }
 338 }
 339
 340 @Neoalpha = sort @Neoalpha;
 341
 342 # Test \w.
 343
 344 {
 345     my $word = join('', @Neoalpha);
 346
 347     $word =~ /^(\w*)$/;
 348
 349     print 'not ' if ($1 ne $word);
 350 }
 351 print "ok 99\n";
 352
 353 # Find places where the collation order differs from the default locale.
 354
 355 print "# testing 100\n";
 356 {
 357     my (@k, $i, $j, @d);
 358
 359     {
 360         no locale;
 361
 362         @k = sort (keys %UPPER, keys %lower);
 363     }
 364
 365     for ($i = 0; $i < @k; $i++) {
 366         for ($j = $i + 1; $j < @k; $j++) {
 367             if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) {
 368                 push(@d, [$k[$j], $k[$i]]);
 369             }
 370         }
 371     }
 372
 373     # Cross-check those places.
 374
 375     for (@d) {
 376         ($i, $j) = @$_;
 377         if ($i gt $j) {
 378             print "# failed 100 at:\n";
 379             print "# i = $i, j = $j, i ",
 380                   $i le $j ? 'le' : 'gt', " j\n";
 381             print 'not ';
 382             last;
 383         }
 384     }
 385 }
 386 print "ok 100\n";
 387
 388 # Cross-check whole character set.
 389
 390 print "# testing 101\n";
 391 for (map { chr } 0..255) {
 392     if (/\w/ and /\W/) { print 'not '; last }
 393     if (/\d/ and /\D/) { print 'not '; last }
 394     if (/\s/ and /\S/) { print 'not '; last }
 395     if (/\w/ and /\D/ and not /_/ and
 396         not (exists $UPPER{$_} or exists $lower{$_})) {
 397         print "# failed 101 at:\n";
 398         print "# ", ord($_), " '$_'\n";
 399         print 'not ';
 400         last;
 401     }
 402 }
 403 print "ok 101\n";
 404
 405 # Test for read-onlys.
 406
 407 {
 408     no locale;
 409     $a = "qwerty";
 410     {
 411         use locale;
 412         print "not " if $a cmp "qwerty";
 413     }
 414 }
 415 print "ok 102\n";
 416
 417 # This test must be the last one because its failure is not fatal.
 418 # The @Locale should be internally consistent.
 419 # Thanks to Hallvard Furuseth <h.b.furuseth@usit.uio.no>
 420 # for inventing a way to test for ordering consistency
 421 # without requiring any particular order.
 422 # ++$jhi;#@iki.fi
 423
 424 print "# testing 103\n";
 425 {
 426     my ($from, $to, $lesser, $greater, @test, %test, $test, $yes, $no, $sign);
 427
 428     for (0..9) {
 429         # Select a slice.
 430         $from = int(($_*@Locale)/10);
 431         $to = $from + int(@Locale/10);
 432         $to = $#Locale if ($to > $#Locale);
 433         $lesser  = join('', @Locale[$from..$to]);
 434         # Select a slice one character on.
 435         $from++; $to++;
 436         $to = $#Locale if ($to > $#Locale);
 437         $greater = join('', @Locale[$from..$to]);
 438         ($yes, $no, $sign) = ($lesser lt $greater
 439                                 ? ("    ", "not ", 1)
 440                                 : ("not ", "    ", -1));
 441         # all these tests should FAIL (return 0).
 442         @test =
 443             (
 444              $no.'    ($lesser  lt $greater)',  # 0
 445              $no.'    ($lesser  le $greater)',  # 1
 446              'not      ($lesser  ne $greater)', # 2
 447              '         ($lesser  eq $greater)', # 3
 448              $yes.'    ($lesser  ge $greater)', # 4
 449              $yes.'    ($lesser  gt $greater)', # 5
 450              $yes.'    ($greater lt $lesser )', # 6
 451              $yes.'    ($greater le $lesser )', # 7
 452              'not      ($greater ne $lesser )', # 8
 453              '         ($greater eq $lesser )', # 9
 454              $no.'     ($greater ge $lesser )', # 10
 455              $no.'     ($greater gt $lesser )', # 11
 456              'not (($lesser cmp $greater) == -$sign)' # 12
 457              );
 458         @test{@test} = 0 x @test;
 459         $test = 0;
 460         for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} }
 461         if ($test) {
 462             print "# failed 103 at:\n";
 463             print "# lesser  = '$lesser'\n";
 464             print "# greater = '$greater'\n";
 465             print "# lesser cmp greater = ", $lesser cmp $greater, "\n";
 466             print "# greater cmp lesser = ", $greater cmp $lesser, "\n";
 467             print "# (greater) from = $from, to = $to\n";
 468             for my $ti (@test) {
 469                 printf("# %-40s %-4s", $ti,
 470                        $test{$ti} ? 'FAIL' : 'ok');
 471                 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
 472                     printf("(%s == %4d)", $1, eval $1);
 473                 }
 474                 print "\n";
 475             }
 476
 477             warn "The locale definition on your system may have errors.\n";
 478             last;
 479         }
 480     }
 481 }
 482
 483 # eof