X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2Flocale.t;h=7b273309729dc193b34c389944590e0229eef758;hb=08411240a1e5278b0232e1455d984110b1c5343b;hp=679aaf0b2e82e557a14018eabd6ae962a0beb9e1;hpb=887ef7ed525c34ffc3e744dad908d47e4dcb4c10;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/locale.t b/lib/locale.t index 679aaf0..7b27330 100644 --- a/lib/locale.t +++ b/lib/locale.t @@ -725,6 +725,7 @@ foreach $Locale (@Locale) { print "# UPPER $x lc $y ", $x =~ /$y/i ? 1 : 0, " ", $y =~ /$x/i ? 1 : 0, "\n" if 0; + # # If $x and $y contain regular expression characters # AND THEY lowercase (/i) to regular expression characters, # regcomp() will be mightily confused. No, the \Q doesn't @@ -732,12 +733,22 @@ foreach $Locale (@Locale) { # is done after the \Q?) An example of this happening is # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS): # the chr(173) (the "[") is the lowercase of the chr(235). + # # Similarly losing EBCDIC locales include cs_cz, cs_CZ, # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!), # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT, # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037, # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU, # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR. + # + # Similar things can happen even under (bastardised) + # non-EBCDIC locales: in many European countries before the + # advent of ISO 8859-x nationally customised versions of + # ISO 646 were devised, reusing certain punctuation + # characters for modified characters needed by the + # country/language. For example, the "|" might have + # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS. + # if ($x =~ $re || $y =~ $re) { print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n"; next;