# include <langinfo.h>
#endif
+#include "reentr.h"
+
/*
* Standardize the locale name from a string returned by 'setlocale'.
*
SSize_t mult = fb - fa;
if (mult < 1)
Perl_croak(aTHX_ "strxfrm() gets absurd");
- PL_collxfrm_base = (fa > mult) ? (fa - mult) : 0;
+ PL_collxfrm_base = (fa > (Size_t)mult) ? (fa - mult) : 0;
PL_collxfrm_mult = mult;
}
}
#endif /* USE_LOCALE */
+#ifdef USE_PERLIO
{
+ /* Set PL_wantutf8 to TRUE if using PerlIO _and_
+ any of the following are true:
+ - nl_langinfo(CODESET) contains /^utf-?8/i
+ - $ENV{LC_ALL} contains /^utf-?8/i
+ - $ENV{LC_CTYPE} contains /^utf-?8/i
+ - $ENV{LANG} contains /^utf-?8/i
+ The LC_ALL, LC_CTYPE, LANG obey the usual override
+ hierarchy of locale environment variables. (LANGUAGE
+ affects only LC_MESSAGES only under glibc.) (If present,
+ it overrides LC_MESSAGES for GNU gettext, and it also
+ can have more than one locale, separated by spaces,
+ in case you need to know.)
+ If PL_wantutf8 is true, perl.c:S_parse_body()
+ will turn on the PerlIO :utf8 discipline on STDIN, STDOUT,
+ STDERR, _and_ the default open discipline.
+ */
bool wantutf8 = FALSE;
char *codeset = NULL;
#if defined(HAS_NL_LANGINFO) && defined(CODESET)
codeset = nl_langinfo(CODESET);
#endif
- if (codeset &&
- (ibcmp(codeset, "UTF-8", 5) == 0 ||
- ibcmp(codeset, "UTF8", 4) == 0))
- wantutf8 = TRUE;
-#ifdef __GLIBC__
- if (!wantutf8 && language &&
- (ibcmp(language, "UTF-8", 5) == 0 ||
- ibcmp(language, "UTF8", 4) == 0))
- wantutf8 = TRUE;
-#endif
- if (!wantutf8 && lc_all &&
- (ibcmp(lc_all, "UTF-8", 5) == 0 ||
- ibcmp(lc_all, "UTF8", 4) == 0))
- wantutf8 = TRUE;
+ if (codeset)
+ wantutf8 = (ibcmp(codeset, "UTF-8", 5) == 0 ||
+ ibcmp(codeset, "UTF8", 4) == 0);
+#if defined(USE_LOCALE)
+ else { /* nl_langinfo(CODESET) is supposed to correctly
+ * interpret the locale environment variables,
+ * but just in case it fails, let's do this manually. */
+ if (lang)
+ wantutf8 = (ibcmp(lang, "UTF-8", 5) == 0 ||
+ ibcmp(lang, "UTF8", 4) == 0);
#ifdef USE_LOCALE_CTYPE
- if (!wantutf8 && curctype &&
- (ibcmp(curctype, "UTF-8", 5) == 0 ||
- ibcmp(curctype, "UTF8", 4) == 0))
- wantutf8 = TRUE;
+ if (curctype)
+ wantutf8 = (ibcmp(curctype, "UTF-8", 5) == 0 ||
+ ibcmp(curctype, "UTF8", 4) == 0);
#endif
- if (!wantutf8 && lang &&
- (ibcmp(lang, "UTF-8", 5) == 0 ||
- ibcmp(lang, "UTF8", 4) == 0))
- wantutf8 = TRUE;
+ if (lc_all)
+ wantutf8 = (ibcmp(lc_all, "UTF-8", 5) == 0 ||
+ ibcmp(lc_all, "UTF8", 4) == 0);
+#endif /* USE_LOCALE */
+ }
if (wantutf8)
PL_wantutf8 = TRUE;
}
+#endif
#ifdef USE_LOCALE_CTYPE
if (curctype != NULL)
xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout);
if (xused == -1)
goto bad;
- if (xused < xAlloc - xout)
+ if ((STRLEN)xused < xAlloc - xout)
break;
xAlloc = (2 * xAlloc) + 1;
Renew(xbuf, xAlloc, char);