Convert to using File::Spec, so that we can build Unicode files

[p5sagit/p5-mst-13.2.git] / lib / locale.t
diff --git a/lib/locale.t b/lib/locale.t

index 679aaf0..a294d2f 100644 (file)
--- a/lib/locale.t
+++ b/lib/locale.t
@@ -45,6 +45,9 @@ eval {
 # and mingw32 uses said silly CRT
 $have_setlocale = 0 if (($^O eq 'MSWin32' || $^O eq 'NetWare') && $Config{cc} =~ /^(cl|gcc)/i);
 
+# UWIN seems to loop after test 98, just skip for now
+$have_setlocale = 0 if ($^O =~ /^uwin/);
+
 my $last = $have_setlocale ? &last : &last_without_setlocale;
 
 print "1..$last\n";
@@ -523,7 +526,17 @@ foreach $Locale (@Locale) {
     
        my $word = join('', @Neoalpha);
 
-       if ($Locale =~ /utf-?8/i) {
+       my $badutf8;
+       {
+           local $SIG{__WARN__} = sub {
+               $badutf8 = $_[0] =~ /Malformed UTF-8/;
+           };
+           $Locale =~ /utf-?8/i;
+       }
+
+       if ($badutf8) {
+           debug "# Locale name contains bad UTF-8, skipping test 99 for locale '$Locale'\n";
+       } elsif ($Locale =~ /utf-?8/i) {
            debug "# unknown whether locale and Unicode have the same \\w, skipping test 99 for locale '$Locale'\n";
            push @{$Okay{99}}, $Locale;
        } else {
@@ -725,6 +738,7 @@ foreach $Locale (@Locale) {
            print "# UPPER $x lc $y ",
            $x =~ /$y/i ? 1 : 0, " ",
            $y =~ /$x/i ? 1 : 0, "\n" if 0;
+           #
            # If $x and $y contain regular expression characters
            # AND THEY lowercase (/i) to regular expression characters,
            # regcomp() will be mightily confused.  No, the \Q doesn't
@@ -732,12 +746,22 @@ foreach $Locale (@Locale) {
            # is done after the \Q?)  An example of this happening is
            # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
            # the chr(173) (the "[") is the lowercase of the chr(235).
+           #
            # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
            # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
            # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
            # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
            # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
            # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
+           #
+           # Similar things can happen even under (bastardised)
+           # non-EBCDIC locales: in many European countries before the
+           # advent of ISO 8859-x nationally customised versions of
+           # ISO 646 were devised, reusing certain punctuation
+           # characters for modified characters needed by the
+           # country/language.  For example, the "|" might have
+           # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
+           #
            if ($x =~ $re || $y =~ $re) {
                print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n";
                next;