# Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1"
# and mingw32 uses said silly CRT
-$have_setlocale = 0 if (($^O eq 'MSWin32' || $^O eq 'NetWare') && $Config{cc} =~ /^(cl|gcc)/i);
+# This doesn't seem to be an issue any more, at least on Windows XP,
+# so re-enable the tests for Windows XP onwards.
+my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion &&
+ join('.', (Win32::GetOSVersion())[1..2]) >= 5.1);
+$have_setlocale = 0 if ((($^O eq 'MSWin32' && !$winxp) || $^O eq 'NetWare') &&
+ $Config{cc} =~ /^(cl|gcc)/i);
+
+# UWIN seems to loop after test 98, just skip for now
+$have_setlocale = 0 if ($^O =~ /^uwin/);
my $last = $have_setlocale ? &last : &last_without_setlocale;
print "1..$last\n";
-use vars qw(&LC_ALL);
+sub LC_ALL ();
$a = 'abc %';
$locales =~ s/Thai:th:th:11 tis620\n//;
}
-sub in_utf8 () { $^H & 0x08 }
+sub in_utf8 () { $^H & 0x08 || (${^OPEN} || "") =~ /:utf8/ }
if (in_utf8) {
require "lib/locale/utf8";
if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) {
while (<LOCALES>) {
+ # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which
+ # ain't great when we're running this testPERL_UNICODE= so that utf8
+ # locales will cause all IO hadles to default to (assume) utf8
+ next unless utf8::valid($_);
chomp;
trylocale($_);
}
trylocale($_);
}
close(LOCALES);
+} elsif ($^O eq 'openbsd' && -e '/usr/share/locale') {
+
+ # OpenBSD doesn't have a locale executable, so reading /usr/share/locale
+ # is much easier and faster than the last resort method.
+
+ opendir(LOCALES, '/usr/share/locale');
+ while ($_ = readdir(LOCALES)) {
+ chomp;
+ trylocale($_);
+ }
+ close(LOCALES);
} else {
# This is going to be slow.
setlocale(LC_ALL, "C");
-sub utf8locale { $_[0] =~ /utf-?8/i }
+if ($^O eq 'darwin') {
+ # Darwin 8/Mac OS X 10.4 and 10.5 have bad Basque locales: perl bug #35895,
+ # Apple bug ID# 4139653. It also has a problem in Byelorussian.
+ (my $v) = $Config{osvers} =~ /^(\d+)/;
+ if ($v >= 8 and $v < 10) {
+ debug "# Skipping eu_ES, be_BY locales -- buggy in Darwin\n";
+ @Locale = grep ! m/^(eu_ES|be_BY.CP1131$)/, @Locale;
+ }
+}
@Locale = sort @Locale;
-debug "# Locales = @Locale\n";
+debug "# Locales =\n";
+for ( @Locale ) {
+ debug "# $_\n";
+}
my %Problem;
my %Okay;
# Test \w.
- if (utf8locale($Locale)) {
- # utf8 and locales do not mix.
- debug "# skipping UTF-8 locale '$Locale'\n";
- push @utf8locale, $Locale;
- @utf8skip{99..102} = ();
- } else {
- my $word = join('', @Neoalpha);
+ my $word = join('', @Neoalpha);
- $word =~ /^(\w+)$/;
-
- tryneoalpha($Locale, 99, $1 eq $word);
+ my $badutf8;
+ {
+ local $SIG{__WARN__} = sub {
+ $badutf8 = $_[0] =~ /Malformed UTF-8/;
+ };
+ $Locale =~ /utf-?8/i;
}
+
+ if ($badutf8) {
+ debug "# Locale name contains bad UTF-8, skipping test 99 for locale '$Locale'\n";
+ } elsif ($Locale =~ /utf-?8/i) {
+ debug "# unknown whether locale and Unicode have the same \\w, skipping test 99 for locale '$Locale'\n";
+ push @{$Okay{99}}, $Locale;
+ } else {
+ if ($word =~ /^(\w+)$/) {
+ tryneoalpha($Locale, 99, 1);
+ } else {
+ tryneoalpha($Locale, 99, 0);
+ }
+ }
+
# Cross-check the whole 8-bit character set.
for (map { chr } 0..255) {
# case-insensitively the UPPER, and does the UPPER match
# case-insensitively the lc of the UPPER. And vice versa.
{
- if (utf8locale($Locale)) {
- # utf8 and locales do not mix.
- debug "# skipping UTF-8 locale '$Locale'\n";
- push @utf8locale, $Locale;
- $utf8skip{117}++;
- } else {
- use locale;
- use locale;
- no utf8; # so that the native 8-bit characters work
-
- my @f = ();
- foreach my $x (keys %UPPER) {
- my $y = lc $x;
- next unless uc $y eq $x;
- push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
+ use locale;
+ no utf8;
+ my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
+
+ my @f = ();
+ foreach my $x (keys %UPPER) {
+ my $y = lc $x;
+ next unless uc $y eq $x;
+ print "# UPPER $x lc $y ",
+ $x =~ /$y/i ? 1 : 0, " ",
+ $y =~ /$x/i ? 1 : 0, "\n" if 0;
+ #
+ # If $x and $y contain regular expression characters
+ # AND THEY lowercase (/i) to regular expression characters,
+ # regcomp() will be mightily confused. No, the \Q doesn't
+ # help here (maybe regex engine internal lowercasing
+ # is done after the \Q?) An example of this happening is
+ # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
+ # the chr(173) (the "[") is the lowercase of the chr(235).
+ #
+ # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
+ # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
+ # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
+ # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
+ # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
+ # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
+ #
+ # Similar things can happen even under (bastardised)
+ # non-EBCDIC locales: in many European countries before the
+ # advent of ISO 8859-x nationally customised versions of
+ # ISO 646 were devised, reusing certain punctuation
+ # characters for modified characters needed by the
+ # country/language. For example, the "|" might have
+ # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
+ #
+ if ($x =~ $re || $y =~ $re) {
+ print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n";
+ next;
}
+ # With utf8 both will fail since the locale concept
+ # of upper/lower does not work well in Unicode.
+ push @f, $x unless $x =~ /$y/i == $y =~ /$x/i;
+
foreach my $x (keys %lower) {
my $y = uc $x;
next unless lc $y eq $x;
- push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
+ print "# lower $x uc $y ",
+ $x =~ /$y/i ? 1 : 0, " ",
+ $y =~ /$x/i ? 1 : 0, "\n" if 0;
+ if ($x =~ $re || $y =~ $re) { # See above.
+ print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n";
+ next;
+ }
+ # With utf8 both will fail since the locale concept
+ # of upper/lower does not work well in Unicode.
+ push @f, $x unless $x =~ /$y/i == $y =~ /$x/i;
}
tryneoalpha($Locale, 117, @f == 0);
if (@f) {
if ($Problem{$_} || !defined $Okay{$_} || !@{$Okay{$_}}) {
if ($_ == 102) {
print "# The failure of test 102 is not necessarily fatal.\n";
- print "# It usually indicates a problem in the enviroment,\n";
+ print "# It usually indicates a problem in the environment,\n";
print "# not in Perl itself.\n";
}
print "not ";