X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2Flocale.t;h=666b9a93d13b7b79b69ae15f230b1633d65b9296;hb=9b5be9b5aa498a302ab752a0d1cdb335a620ede2;hp=bcbce9ed4a7f5d9f5fff01f499152e1781c6c583;hpb=b4e009bec552efad97a5f0403d80a2afdcb546d8;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/locale.t b/lib/locale.t index bcbce9e..666b9a9 100644 --- a/lib/locale.t +++ b/lib/locale.t @@ -45,11 +45,14 @@ eval { # and mingw32 uses said silly CRT $have_setlocale = 0 if (($^O eq 'MSWin32' || $^O eq 'NetWare') && $Config{cc} =~ /^(cl|gcc)/i); +# UWIN seems to loop after test 98, just skip for now +$have_setlocale = 0 if ($^O =~ /^uwin/); + my $last = $have_setlocale ? &last : &last_without_setlocale; print "1..$last\n"; -use vars qw(&LC_ALL); +sub LC_ALL (); $a = 'abc %'; @@ -379,6 +382,10 @@ delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) { while () { + # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which + # ain't great when we're running this testPERL_UNICODE= so that utf8 + # locales will cause all IO hadles to default to (assume) utf8 + next unless utf8::valid($_); chomp; trylocale($_); } @@ -430,9 +437,21 @@ if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) { setlocale(LC_ALL, "C"); +if ($^O eq 'darwin') { + # Darwin 8/Mac OS X 10.4 has bad Basque locales: perl bug #35895, + # Apple bug ID# 4139653. It also has a problem in Byelorussian. + if ($Config{osvers} ge '8' and $Config{osvers} lt '9') { + debug "# Skipping eu_ES, be_BY locales -- buggy in Darwin\n"; + @Locale = grep ! m/^(eu_ES|be_BY.CP1131$)/, @Locale; + } +} + @Locale = sort @Locale; -debug "# Locales = @Locale\n"; +debug "# Locales =\n"; +for ( @Locale ) { + debug "# $_\n"; +} my %Problem; my %Okay; @@ -520,7 +539,17 @@ foreach $Locale (@Locale) { my $word = join('', @Neoalpha); - if ($Locale =~ /utf-?8/i) { + my $badutf8; + { + local $SIG{__WARN__} = sub { + $badutf8 = $_[0] =~ /Malformed UTF-8/; + }; + $Locale =~ /utf-?8/i; + } + + if ($badutf8) { + debug "# Locale name contains bad UTF-8, skipping test 99 for locale '$Locale'\n"; + } elsif ($Locale =~ /utf-?8/i) { debug "# unknown whether locale and Unicode have the same \\w, skipping test 99 for locale '$Locale'\n"; push @{$Okay{99}}, $Locale; } else { @@ -722,6 +751,7 @@ foreach $Locale (@Locale) { print "# UPPER $x lc $y ", $x =~ /$y/i ? 1 : 0, " ", $y =~ /$x/i ? 1 : 0, "\n" if 0; + # # If $x and $y contain regular expression characters # AND THEY lowercase (/i) to regular expression characters, # regcomp() will be mightily confused. No, the \Q doesn't @@ -729,12 +759,22 @@ foreach $Locale (@Locale) { # is done after the \Q?) An example of this happening is # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS): # the chr(173) (the "[") is the lowercase of the chr(235). + # # Similarly losing EBCDIC locales include cs_cz, cs_CZ, # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!), # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT, # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037, # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU, # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR. + # + # Similar things can happen even under (bastardised) + # non-EBCDIC locales: in many European countries before the + # advent of ISO 8859-x nationally customised versions of + # ISO 646 were devised, reusing certain punctuation + # characters for modified characters needed by the + # country/language. For example, the "|" might have + # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS. + # if ($x =~ $re || $y =~ $re) { print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n"; next;