From: Jarkko Hietaniemi Date: Sun, 3 Nov 2002 17:50:08 +0000 (+0200) Subject: Re: [PATCH] UTF-8 enabling via locale (was: Re: Redhat 8 issue?) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=61de9fb5aad39c2904a43125c7c70031be6bc679;p=p5sagit%2Fp5-mst-13.2.git Re: [PATCH] UTF-8 enabling via locale (was: Re: Redhat 8 issue?) Message-ID: <20021103155008.GA382682@lyta.hut.fi> p4raw-id: //depot/perl@18165 --- diff --git a/lib/open.pm b/lib/open.pm index 2dc1d21..007b667 100644 --- a/lib/open.pm +++ b/lib/open.pm @@ -27,6 +27,7 @@ sub _get_locale_encoding { } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) { ($country_language, $locale_encoding) = ($1, $2); } + # LANGUAGE affects only LC_MESSAGES only on glibc } elsif (not $locale_encoding) { if ($ENV{LC_ALL} =~ /\butf-?8\b/i || $ENV{LANG} =~ /\butf-?8\b/i) { @@ -250,7 +251,7 @@ pragma. =back -If your locale environment variables (LANGUAGE, LC_ALL, LC_CTYPE, LANG) +If your locale environment variables (LC_ALL, LC_CTYPE, LANG) contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching), the default encoding of your STDIN, STDOUT, and STDERR, and of B, is UTF-8. diff --git a/locale.c b/locale.c index 0986614..c03451b 100644 --- a/locale.c +++ b/locale.c @@ -478,10 +478,15 @@ Perl_init_i18nl10n(pTHX_ int printwarn) /* Set PL_wantutf8 to TRUE if using PerlIO _and_ any of the following are true: - nl_langinfo(CODESET) contains /^utf-?8/i - - $ENV{LANGUAGE} contains /^utf-?8/i (only if using glibc) - - $ENV{LC_CALL} contains /^utf-?8/i + - $ENV{LC_ALL} contains /^utf-?8/i - $ENV{LC_CTYPE} contains /^utf-?8/i - - $ENV{LANG} contains /^utf-?8/i + - $ENV{LANG} contains /^utf-?8/i + The LC_ALL, LC_CTYPE, LANG obey the usual override + hierarchy of locale environment variables. (LANGUAGE + affects only LC_MESSAGES only under glibc.) (If present, + it overrides LC_MESSAGES for GNU gettext, and it also + can have more than one locale, separated by spaces, + in case you need to know.) If PL_wantutf8 is true, perl.c:S_parse_body() will turn on the PerlIO :utf8 discipline on STDIN, STDOUT, STDERR, _and_ the default open discipline. @@ -491,32 +496,26 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #if defined(HAS_NL_LANGINFO) && defined(CODESET) codeset = nl_langinfo(CODESET); #endif - if (codeset && - (ibcmp(codeset, "UTF-8", 5) == 0 || - ibcmp(codeset, "UTF8", 4) == 0)) - wantutf8 = TRUE; + if (codeset) + wantutf8 = (ibcmp(codeset, "UTF-8", 5) == 0 || + ibcmp(codeset, "UTF8", 4) == 0); #if defined(USE_LOCALE) -#ifdef __GLIBC__ - if (!wantutf8 && language && - (ibcmp(language, "UTF-8", 5) == 0 || - ibcmp(language, "UTF8", 4) == 0)) - wantutf8 = TRUE; -#endif - if (!wantutf8 && lc_all && - (ibcmp(lc_all, "UTF-8", 5) == 0 || - ibcmp(lc_all, "UTF8", 4) == 0)) - wantutf8 = TRUE; + else { /* nl_langinfo(CODESET) is supposed to correctly + * interpret the locale environment variables, + * but just in case it fails, let's do this manually. */ + if (lang) + wantutf8 = (ibcmp(lang, "UTF-8", 5) == 0 || + ibcmp(lang, "UTF8", 4) == 0); #ifdef USE_LOCALE_CTYPE - if (!wantutf8 && curctype && - (ibcmp(curctype, "UTF-8", 5) == 0 || - ibcmp(curctype, "UTF8", 4) == 0)) - wantutf8 = TRUE; + if (curctype) + wantutf8 = (ibcmp(curctype, "UTF-8", 5) == 0 || + ibcmp(curctype, "UTF8", 4) == 0); #endif - if (!wantutf8 && lang && - (ibcmp(lang, "UTF-8", 5) == 0 || - ibcmp(lang, "UTF8", 4) == 0)) - wantutf8 = TRUE; + if (lc_all) + wantutf8 = (ibcmp(lc_all, "UTF-8", 5) == 0 || + ibcmp(lc_all, "UTF8", 4) == 0); #endif /* USE_LOCALE */ + } if (wantutf8) PL_wantutf8 = TRUE; } diff --git a/pod/perl58delta.pod b/pod/perl58delta.pod index 9b9e529..3bef17b 100644 --- a/pod/perl58delta.pod +++ b/pod/perl58delta.pod @@ -415,12 +415,12 @@ for more information about UTF-8. =item * -If your environment variables (LC_ALL, LC_CTYPE, LANG, LANGUAGE) look -like you want to use UTF-8 (any of the the variables match C), -your STDIN, STDOUT, STDERR handles and the default open layer -(see L) are marked as UTF-8. (This feature, like other new -features that combine Unicode and I/O, work only if you are using -PerlIO, but that's the default.) +If your environment variables (LC_ALL, LC_CTYPE, LANG) look like you +want to use UTF-8 (any of the the variables match C), your +STDIN, STDOUT, STDERR handles and the default open layer (see L) +are marked as UTF-8. (This feature, like other new features that +combine Unicode and I/O, work only if you are using PerlIO, but that's +the default.) Note that after this Perl really does assume that everything is UTF-8: for example if some input handle is not, Perl will probably very soon