X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2Flocale.t;h=037e2710e6bb6b79594a0a2d2078b22ad36056c4;hb=7d161605f8e10199e4c300f0a0e3d3c71656ee35;hp=bcbce9ed4a7f5d9f5fff01f499152e1781c6c583;hpb=b4e009bec552efad97a5f0403d80a2afdcb546d8;p=p5sagit%2Fp5-mst-13.2.git

diff --git a/lib/locale.t b/lib/locale.t
index bcbce9e..037e271 100644
--- a/lib/locale.t
+++ b/lib/locale.t
@@ -45,6 +45,9 @@ eval {
 # and mingw32 uses said silly CRT
 $have_setlocale = 0 if (($^O eq 'MSWin32' || $^O eq 'NetWare') && $Config{cc} =~ /^(cl|gcc)/i);
 
+# UWIN seems to loop after test 98, just skip for now
+$have_setlocale = 0 if ($^O =~ /^uwin/);
+
 my $last = $have_setlocale ? &last : &last_without_setlocale;
 
 print "1..$last\n";
@@ -379,6 +382,10 @@ delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
 
 if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) {
     while (<LOCALES>) {
+	# It seems that /usr/bin/locale steadfastly outputs 8 bit data, which
+	# ain't great when we're running this testPERL_UNICODE= so that utf8
+	# locales will cause all IO hadles to default to (assume) utf8
+	next unless utf8::valid($_);
         chomp;
 	trylocale($_);
     }
@@ -432,7 +439,10 @@ setlocale(LC_ALL, "C");
 
 @Locale = sort @Locale;
 
-debug "# Locales = @Locale\n";
+debug "# Locales =\n";
+for ( @Locale ) {
+    debug "# $_\n";
+}
 
 my %Problem;
 my %Okay;
@@ -520,7 +530,17 @@ foreach $Locale (@Locale) {
     
 	my $word = join('', @Neoalpha);
 
-	if ($Locale =~ /utf-?8/i) {
+	my $badutf8;
+	{
+	    local $SIG{__WARN__} = sub {
+		$badutf8 = $_[0] =~ /Malformed UTF-8/;
+	    };
+	    $Locale =~ /utf-?8/i;
+	}
+
+	if ($badutf8) {
+	    debug "# Locale name contains bad UTF-8, skipping test 99 for locale '$Locale'\n";
+	} elsif ($Locale =~ /utf-?8/i) {
 	    debug "# unknown whether locale and Unicode have the same \\w, skipping test 99 for locale '$Locale'\n";
 	    push @{$Okay{99}}, $Locale;
 	} else {
@@ -722,6 +742,7 @@ foreach $Locale (@Locale) {
 	    print "# UPPER $x lc $y ",
 	    $x =~ /$y/i ? 1 : 0, " ",
 	    $y =~ /$x/i ? 1 : 0, "\n" if 0;
+	    #
 	    # If $x and $y contain regular expression characters
 	    # AND THEY lowercase (/i) to regular expression characters,
 	    # regcomp() will be mightily confused.  No, the \Q doesn't
@@ -729,12 +750,22 @@ foreach $Locale (@Locale) {
 	    # is done after the \Q?)  An example of this happening is
 	    # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
 	    # the chr(173) (the "[") is the lowercase of the chr(235).
+	    #
 	    # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
 	    # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
 	    # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
 	    # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
 	    # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
 	    # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
+	    #
+	    # Similar things can happen even under (bastardised)
+	    # non-EBCDIC locales: in many European countries before the
+	    # advent of ISO 8859-x nationally customised versions of
+	    # ISO 646 were devised, reusing certain punctuation
+	    # characters for modified characters needed by the
+	    # country/language.  For example, the "|" might have
+	    # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
+	    #
 	    if ($x =~ $re || $y =~ $re) {
 		print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n";
 		next;