From: Jarkko Hietaniemi Date: Sat, 28 Nov 1998 17:23:15 +0000 (+0000) Subject: Revamp the locale tests. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=284102e897f98dc140b5c4416caadc04d1261661;p=p5sagit%2Fp5-mst-13.2.git Revamp the locale tests. (0) Instead of rewiring a few locales scan for them. (1) Bogus test #101 removed. (2) All the locales are checked, the lists of failed and non-failed ones are displayed. (3) The test #103 is again 'active' so that it may fail. (4) To balance (3) a hopefully pacifying message is shown if the #103 fails. p4raw-id: //depot/cfgperl@2358 --- diff --git a/t/pragma/locale.t b/t/pragma/locale.t index 7e3df8c..b53a228 100755 --- a/t/pragma/locale.t +++ b/t/pragma/locale.t @@ -2,7 +2,7 @@ BEGIN { chdir 't' if -d 't'; - @INC = '../lib'; + unshift @INC, '../lib'; require Config; import Config; if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) { print "1..0\n"; @@ -12,6 +12,8 @@ BEGIN { use strict; +my $debug = 1; + my $have_setlocale = 0; eval { require POSIX; @@ -19,19 +21,21 @@ eval { $have_setlocale++; }; +use vars qw(&LC_ALL); + # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" # and mingw32 uses said silly CRT $have_setlocale = 0 if $^O eq 'MSWin32' && $Config{cc} =~ /^(cl|gcc)/i; -# 103 (the last test) may fail but that is okay. +# 103 (the last test) may fail but that is sort-of okay. # (It indicates something broken in the environment, not Perl) -# Therefore .. only until 102, not 103. -print "1..", ($have_setlocale ? 102 : 98), "\n"; + +print "1..", ($have_setlocale ? 103 : 98), "\n"; use vars qw($a $English $German $French $Spanish @C @English @German @French @Spanish - $Locale @Locale %iLocale %UPPER %lower @Neoalpha); + $Locale @Locale %UPPER %lower %bothcase @Neoalpha); $a = 'abc %'; @@ -219,269 +223,363 @@ check_taint_not 98, $a; # I think we've seen quite enough of taint. # Let us do some *real* locale work now, -# unless setlocale() is missing (i.e. minitest). +# unless setlocale() is missing (i.e. minitest). exit unless $have_setlocale; -sub getalnum { +# Find locales. + +my $locales = < @Locale); -($Locale, @Locale) = ($German, @German) - if (@German > @Locale); -($Locale, @Locale) = ($French, @French) - if (@French > @Locale); -($Locale, @Locale) = ($Spanish, @Spanish) - if (@Spanish > @Locale); - -{ - local $^W = 0; - setlocale(&LC_ALL, $Locale); +trylocale("C"); +trylocale("POSIX"); +foreach (0..15) { + trylocale("ISO8859-$_"); + trylocale("iso_8859_$_"); + trylocale("iso8859$_"); } -# Sort it now that LC_ALL has been set. +foreach my $locale (split(/\n/, $locales)) { + my ($locale_name, $language_codes, $country_codes, $encodings) = + split(/:/, $locale); + my @enc = decode_encodings($encodings); + foreach my $loc (split(/ /, $locale_name)) { + trylocale($loc); + foreach my $enc (@enc) { + trylocale("$loc.$enc"); + } + $loc = lc $loc; + foreach my $enc (@enc) { + trylocale("$loc.$enc"); + } + } + foreach my $lang (split(/ /, $language_codes)) { + trylocale($lang); + foreach my $country (split(/ /, $country_codes)) { + my $lc = "${lang}_${country}"; + trylocale($lc); + foreach my $enc (@enc) { + trylocale("$lc.$enc"); + } + my $lC = "${lang}_\U${country}"; + trylocale($lC); + foreach my $enc (@enc) { + trylocale("$lC.$enc"); + } + } + } +} @Locale = sort @Locale; -print "# Locale = $Locale\n"; -print "# Alnum_ = @Locale\n"; - -{ - my $i = 0; +sub debug { + print @_ if $debug; +} - for (@Locale) { - $iLocale{$_} = $i++; - } +sub debugf { + printf @_ if $debug; } -# Sieve the uppercase and the lowercase. +debug "# Locales = @Locale\n"; -for (@Locale) { - if (/[^\d_]/) { # skip digits and the _ - if (lc eq $_) { - $UPPER{$_} = uc; - } else { - $lower{$_} = lc; +my %Problem; + +foreach $Locale (@Locale) { + debug "# Locale = $Locale\n"; + @Alnum_ = getalnum_(); + debug "# \\w = @Alnum_\n"; + + unless (setlocale(LC_ALL, $Locale)) { + foreach (99..103) { + $Problem{$_}{$Locale} = -1; } + next; } -} -# Find the alphabets that are not alphabets in the default locale. + # Sieve the uppercase and the lowercase. + + %UPPER = %lower = %bothcase = (); + for (@Alnum_) { + if (/[^\d_]/) { # skip digits and the _ + if (uc($_) eq $_) { + $UPPER{$_} = $_; + } + if (lc($_) eq $_) { + $lower{$_} = $_; + } + } + } + foreach (keys %UPPER) { + $bothcase{$_}++ if exists $lower{$_}; + } + foreach (keys %lower) { + $bothcase{$_}++ if exists $UPPER{$_}; + } + foreach (keys %bothcase) { + delete $UPPER{$_}; + delete $lower{$_}; + } + + debug "# UPPER = ", join(" ", sort keys %UPPER ), "\n"; + debug "# lower = ", join(" ", sort keys %lower ), "\n"; + debug "# bothcase = ", join(" ", sort keys %bothcase), "\n"; + + # Find the alphabets that are not alphabets in the default locale. -{ - no locale; + { + no locale; - for (keys %UPPER, keys %lower) { - push(@Neoalpha, $_) if (/\W/); + @Neoalpha = (); + for (keys %UPPER, keys %lower) { + push(@Neoalpha, $_) if (/\W/); + } } -} -@Neoalpha = sort @Neoalpha; + @Neoalpha = sort @Neoalpha; -# Test \w. + debug "# Neoalpha = @Neoalpha\n"; -{ - my $word = join('', @Neoalpha); + if (@Neoalpha == 0) { + # If we have no Neoalphas the remaining tests are no-ops. + debug "# no Neoalpha, skipping tests 99..103 for locale '$Locale'\n"; + next; + } - $word =~ /^(\w*)$/; + # Test \w. + + debug "# testing 99 with locale '$Locale'\n"; + { + my $word = join('', @Neoalpha); - print 'not ' if ($1 ne $word); -} -print "ok 99\n"; + $word =~ /^(\w+)$/; -# Find places where the collation order differs from the default locale. + if ($1 ne $word) { + $Problem{99}{$Locale} = 1; + debug "# failed 99 ($1 vs $word)\n"; + } + } -print "# testing 100\n"; -{ - my (@k, $i, $j, @d); + # Test #100 removed but to preserve historical test number + # consistency we do not renumber the remaining tests. - { - no locale; + # Cross-check whole character set. - @k = sort (keys %UPPER, keys %lower); + debug "# testing 101 with locale '$Locale'\n"; + for (map { chr } 0..255) { + if ((/\w/ and /\W/) or (/\d/ and /\D/) or (/\s/ and /\S/)) { + $Problem{101}{$Locale} = 1; + debug "# failed 101\n"; + last; + } } - for ($i = 0; $i < @k; $i++) { - for ($j = $i + 1; $j < @k; $j++) { - if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) { - push(@d, [$k[$j], $k[$i]]); + # Test for read-only scalars' locale vs non-locale comparisons. + + debug "# testing 102 with locale '$Locale'\n"; + { + no locale; + $a = "qwerty"; + { + use locale; + if ($a cmp "qwerty") { + $Problem{102}{$Locale} = 1; + debug "# failed 102\n"; } } } - # Cross-check those places. - - for (@d) { - ($i, $j) = @$_; - if ($i gt $j) { - print "# failed 100 at:\n"; - print "# i = $i, j = $j, i ", - $i le $j ? 'le' : 'gt', " j\n"; - print 'not '; - last; + # This test must be the last one because its failure is not fatal. + # The @Alnum_ should be internally consistent. + # Thanks to Hallvard Furuseth + # for inventing a way to test for ordering consistency + # without requiring any particular order. + # + + debug "# testing 103 with locale '$Locale'\n"; + { + my ($from, $to, $lesser, $greater, + @test, %test, $test, $yes, $no, $sign); + + for (0..9) { + # Select a slice. + $from = int(($_*@Alnum_)/10); + $to = $from + int(@Alnum_/10); + $to = $#Alnum_ if ($to > $#Alnum_); + $lesser = join('', @Alnum_[$from..$to]); + # Select a slice one character on. + $from++; $to++; + $to = $#Alnum_ if ($to > $#Alnum_); + $greater = join('', @Alnum_[$from..$to]); + ($yes, $no, $sign) = ($lesser lt $greater + ? (" ", "not ", 1) + : ("not ", " ", -1)); + # all these tests should FAIL (return 0). + # Exact lt or gt cannot be tested because + # in some locales, say, eacute and E may test equal. + @test = + ( + $no.' ($lesser le $greater)', # 1 + 'not ($lesser ne $greater)', # 2 + ' ($lesser eq $greater)', # 3 + $yes.' ($lesser ge $greater)', # 4 + $yes.' ($lesser ge $greater)', # 5 + $yes.' ($greater le $lesser )', # 7 + 'not ($greater ne $lesser )', # 8 + ' ($greater eq $lesser )', # 9 + $no.' ($greater ge $lesser )', # 10 + 'not (($lesser cmp $greater) == -$sign)' # 12 + ); + @test{@test} = 0 x @test; + $test = 0; + for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} } + if ($test) { + $Problem{103}{$Locale} = 1; + debug "# failed 103 at:\n"; + debug "# lesser = '$lesser'\n"; + debug "# greater = '$greater'\n"; + debug "# lesser cmp greater = ", $lesser cmp $greater, "\n"; + debug "# greater cmp lesser = ", $greater cmp $lesser, "\n"; + debug "# (greater) from = $from, to = $to\n"; + for my $ti (@test) { + debugf("# %-40s %-4s", $ti, + $test{$ti} ? 'FAIL' : 'ok'); + if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { + debugf("(%s == %4d)", $1, eval $1); + } + debug "\n#"; + } + + last; + } } } } -print "ok 100\n"; - -# Cross-check whole character set. - -print "# testing 101\n"; -for (map { chr } 0..255) { - if (/\w/ and /\W/) { print 'not '; last } - if (/\d/ and /\D/) { print 'not '; last } - if (/\s/ and /\S/) { print 'not '; last } - if (/\w/ and /\D/ and not /_/ and - not (exists $UPPER{$_} or exists $lower{$_})) { - print "# failed 101 at:\n"; - print "# ", ord($_), " '$_'\n"; - print 'not '; - last; + +no locale; + +foreach (99..103) { + if ($Problem{$_}) { + if ($_ == 103) { + print "# The failure of test 103 is not necessarily fatal.\n"; + print "# It usually indicates a problem in the enviroment,\n"; + print "# not in Perl itself.\n"; + } + print "not "; } + print "ok $_\n"; } -print "ok 101\n"; - -# Test for read-onlys. -print "# testing 102\n"; -{ - no locale; - $a = "qwerty"; - { - use locale; - print "not " if $a cmp "qwerty"; +my $didwarn = 0; + +foreach (99..103) { + if ($Problem{$_}) { + my @f = sort keys %{ $Problem{$_} }; + my $f = join(" ", @f); + $f =~ s/(.{50,60}) /$1\n#\t/g; + warn + "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", + "#\t", $f, "\n#\n", + "# on your system may have errors because the locale test $_\n", + "# failed in ", (@f == 1 ? "that locale" : "those locales"), + ".\n"; + warn < -# for inventing a way to test for ordering consistency -# without requiring any particular order. -# - -print "# testing 103\n"; -{ - my ($from, $to, $lesser, $greater, @test, %test, $test, $yes, $no, $sign); - - for (0..9) { - # Select a slice. - $from = int(($_*@Locale)/10); - $to = $from + int(@Locale/10); - $to = $#Locale if ($to > $#Locale); - $lesser = join('', @Locale[$from..$to]); - # Select a slice one character on. - $from++; $to++; - $to = $#Locale if ($to > $#Locale); - $greater = join('', @Locale[$from..$to]); - ($yes, $no, $sign) = ($lesser lt $greater - ? (" ", "not ", 1) - : ("not ", " ", -1)); - # all these tests should FAIL (return 0). - @test = - ( - $no.' ($lesser lt $greater)', # 0 - $no.' ($lesser le $greater)', # 1 - 'not ($lesser ne $greater)', # 2 - ' ($lesser eq $greater)', # 3 - $yes.' ($lesser ge $greater)', # 4 - $yes.' ($lesser gt $greater)', # 5 - $yes.' ($greater lt $lesser )', # 6 - $yes.' ($greater le $lesser )', # 7 - 'not ($greater ne $lesser )', # 8 - ' ($greater eq $lesser )', # 9 - $no.' ($greater ge $lesser )', # 10 - $no.' ($greater gt $lesser )', # 11 - 'not (($lesser cmp $greater) == -$sign)' # 12 - ); - @test{@test} = 0 x @test; - $test = 0; - for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} } - if ($test) { - print "# failed 103 at:\n"; - print "# lesser = '$lesser'\n"; - print "# greater = '$greater'\n"; - print "# lesser cmp greater = ", $lesser cmp $greater, "\n"; - print "# greater cmp lesser = ", $greater cmp $lesser, "\n"; - print "# (greater) from = $from, to = $to\n"; - for my $ti (@test) { - printf("# %-40s %-4s", $ti, - $test{$ti} ? 'FAIL' : 'ok'); - if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { - printf("(%s == %4d)", $1, eval $1); - } - print "\n"; - } - warn "The locale definition on your system may have errors.\n"; - last; +if ($didwarn) { + my @s; + + foreach my $l (@Locale) { + my $p = 0; + foreach my $t (99..103) { + $p++ if $Problem{$t}{$l}; } + push @s, $l if $p == 0; } + + my $s = join(" ", @s); + $s =~ s/(.{50,60}) /$1\n#\t/g; + + warn + "# The following locales\n#\n", + "#\t", $s, "\n#\n", + "# tested okay.\n#\n", } # eof