/* locale.c
*
- * Copyright (c) 2001-2002, Larry Wall
+ * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+ * 2002, 2003, 2005, 2006, 2007, 2008 by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
*/
/*
- * A Elbereth Gilthoniel,
- * silivren penna míriel
- * o menel aglar elenath!
- * Na-chaered palan-díriel
- * o galadhremmin ennorath,
- * Fanuilos, le linnathon
- * nef aear, si nef aearon!
+ * A Elbereth Gilthoniel,
+ * silivren penna míriel
+ * o menel aglar elenath!
+ * Na-chaered palan-díriel
+ * o galadhremmin ennorath,
+ * Fanuilos, le linnathon
+ * nef aear, si nef aearon!
+ *
+ * [p.238 of _The Lord of the Rings_, II/i: "Many Meetings"]
+ */
+
+/* utility functions for handling locale-specific stuff like what
+ * character represents the decimal point.
*/
#include "EXTERN.h"
# include <langinfo.h>
#endif
+#include "reentr.h"
+
+#if defined(USE_LOCALE_NUMERIC) || defined(USE_LOCALE_COLLATE)
/*
* Standardize the locale name from a string returned by 'setlocale'.
*
STATIC char *
S_stdize_locale(pTHX_ char *locs)
{
- char *s;
+ const char * const s = strchr(locs, '=');
bool okay = TRUE;
- if ((s = strchr(locs, '='))) {
- char *t;
+ PERL_ARGS_ASSERT_STDIZE_LOCALE;
+ if (s) {
+ const char * const t = strchr(s, '.');
okay = FALSE;
- if ((t = strchr(s, '.'))) {
- char *u;
-
- if ((u = strchr(t, '\n'))) {
-
- if (u[1] == 0) {
- STRLEN len = u - s;
- Move(s + 1, locs, len, char);
- locs[len] = 0;
- okay = TRUE;
- }
+ if (t) {
+ const char * const u = strchr(t, '\n');
+ if (u && (u[1] == 0)) {
+ const STRLEN len = u - s;
+ Move(s + 1, locs, len, char);
+ locs[len] = 0;
+ okay = TRUE;
}
}
}
return locs;
}
+#endif
void
Perl_set_numeric_radix(pTHX)
{
#ifdef USE_LOCALE_NUMERIC
+ dVAR;
# ifdef HAS_LOCALECONV
- struct lconv* lc;
+ const struct lconv* const lc = localeconv();
- lc = localeconv();
if (lc && lc->decimal_point) {
if (lc->decimal_point[0] == '.' && lc->decimal_point[1] == 0) {
SvREFCNT_dec(PL_numeric_radix_sv);
- PL_numeric_radix_sv = Nullsv;
+ PL_numeric_radix_sv = NULL;
}
else {
if (PL_numeric_radix_sv)
}
}
else
- PL_numeric_radix_sv = Nullsv;
+ PL_numeric_radix_sv = NULL;
# endif /* HAS_LOCALECONV */
#endif /* USE_LOCALE_NUMERIC */
}
* Set up for a new numeric locale.
*/
void
-Perl_new_numeric(pTHX_ char *newnum)
+Perl_new_numeric(pTHX_ const char *newnum)
{
#ifdef USE_LOCALE_NUMERIC
+ dVAR;
if (! newnum) {
- if (PL_numeric_name) {
- Safefree(PL_numeric_name);
- PL_numeric_name = NULL;
- }
+ Safefree(PL_numeric_name);
+ PL_numeric_name = NULL;
PL_numeric_standard = TRUE;
PL_numeric_local = TRUE;
return;
if (! PL_numeric_name || strNE(PL_numeric_name, newnum)) {
Safefree(PL_numeric_name);
PL_numeric_name = stdize_locale(savepv(newnum));
- PL_numeric_standard = (strEQ(newnum, "C") || strEQ(newnum, "POSIX"));
+ PL_numeric_standard = ((*newnum == 'C' && newnum[1] == '\0')
+ || strEQ(newnum, "POSIX"));
PL_numeric_local = TRUE;
set_numeric_radix();
}
Perl_set_numeric_standard(pTHX)
{
#ifdef USE_LOCALE_NUMERIC
+ dVAR;
if (! PL_numeric_standard) {
setlocale(LC_NUMERIC, "C");
Perl_set_numeric_local(pTHX)
{
#ifdef USE_LOCALE_NUMERIC
+ dVAR;
if (! PL_numeric_local) {
setlocale(LC_NUMERIC, PL_numeric_name);
* Set up for a new ctype locale.
*/
void
-Perl_new_ctype(pTHX_ char *newctype)
+Perl_new_ctype(pTHX_ const char *newctype)
{
#ifdef USE_LOCALE_CTYPE
-
+ dVAR;
int i;
+ PERL_ARGS_ASSERT_NEW_CTYPE;
+
for (i = 0; i < 256; i++) {
if (isUPPER_LC(i))
PL_fold_locale[i] = toLOWER_LC(i);
}
#endif /* USE_LOCALE_CTYPE */
+ PERL_ARGS_ASSERT_NEW_CTYPE;
+ PERL_UNUSED_ARG(newctype);
+ PERL_UNUSED_CONTEXT;
}
/*
* Set up for a new collation locale.
*/
void
-Perl_new_collate(pTHX_ char *newcoll)
+Perl_new_collate(pTHX_ const char *newcoll)
{
#ifdef USE_LOCALE_COLLATE
+ dVAR;
if (! newcoll) {
if (PL_collation_name) {
++PL_collation_ix;
Safefree(PL_collation_name);
PL_collation_name = stdize_locale(savepv(newcoll));
- PL_collation_standard = (strEQ(newcoll, "C") || strEQ(newcoll, "POSIX"));
+ PL_collation_standard = ((*newcoll == 'C' && newcoll[1] == '\0')
+ || strEQ(newcoll, "POSIX"));
{
/* 2: at most so many chars ('a', 'b'). */
/* 50: surely no system expands a char more. */
#define XFRMBUFSIZE (2 * 50)
char xbuf[XFRMBUFSIZE];
- Size_t fa = strxfrm(xbuf, "a", XFRMBUFSIZE);
- Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE);
- SSize_t mult = fb - fa;
+ const Size_t fa = strxfrm(xbuf, "a", XFRMBUFSIZE);
+ const Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE);
+ const SSize_t mult = fb - fa;
if (mult < 1)
Perl_croak(aTHX_ "strxfrm() gets absurd");
PL_collxfrm_base = (fa > (Size_t)mult) ? (fa - mult) : 0;
*/
#if defined(USE_LOCALE)
+ dVAR;
#ifdef USE_LOCALE_CTYPE
char *curctype = NULL;
char *curnum = NULL;
#endif /* USE_LOCALE_NUMERIC */
#ifdef __GLIBC__
- char *language = PerlEnv_getenv("LANGUAGE");
+ char * const language = PerlEnv_getenv("LANGUAGE");
#endif
- char *lc_all = PerlEnv_getenv("LC_ALL");
- char *lang = PerlEnv_getenv("LANG");
+ char * const lc_all = PerlEnv_getenv("LC_ALL");
+ char * const lang = PerlEnv_getenv("LANG");
bool setlocale_failure = FALSE;
#ifdef LOCALE_ENVIRON_REQUIRED
}
if (!setlocale_failure) {
#ifdef USE_LOCALE_CTYPE
+ Safefree(curctype);
if (! (curctype =
setlocale(LC_CTYPE,
(!done && (lang || PerlEnv_getenv("LC_CTYPE")))
- ? "" : Nullch)))
+ ? "" : NULL)))
setlocale_failure = TRUE;
else
curctype = savepv(curctype);
#endif /* USE_LOCALE_CTYPE */
#ifdef USE_LOCALE_COLLATE
+ Safefree(curcoll);
if (! (curcoll =
setlocale(LC_COLLATE,
(!done && (lang || PerlEnv_getenv("LC_COLLATE")))
- ? "" : Nullch)))
+ ? "" : NULL)))
setlocale_failure = TRUE;
else
curcoll = savepv(curcoll);
#endif /* USE_LOCALE_COLLATE */
#ifdef USE_LOCALE_NUMERIC
+ Safefree(curnum);
if (! (curnum =
setlocale(LC_NUMERIC,
(!done && (lang || PerlEnv_getenv("LC_NUMERIC")))
- ? "" : Nullch)))
+ ? "" : NULL)))
setlocale_failure = TRUE;
else
curnum = savepv(curnum);
if (!setlocale_failure) {
#ifdef USE_LOCALE_CTYPE
+ Safefree(curctype);
if (! (curctype = setlocale(LC_CTYPE, "")))
setlocale_failure = TRUE;
else
curctype = savepv(curctype);
#endif /* USE_LOCALE_CTYPE */
#ifdef USE_LOCALE_COLLATE
+ Safefree(curcoll);
if (! (curcoll = setlocale(LC_COLLATE, "")))
setlocale_failure = TRUE;
else
curcoll = savepv(curcoll);
#endif /* USE_LOCALE_COLLATE */
#ifdef USE_LOCALE_NUMERIC
+ Safefree(curnum);
if (! (curnum = setlocale(LC_NUMERIC, "")))
setlocale_failure = TRUE;
else
if (setlocale_failure) {
char *p;
- bool locwarn = (printwarn > 1 ||
+ const bool locwarn = (printwarn > 1 ||
(printwarn &&
(!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p))));
#endif /* ! LC_ALL */
#ifdef USE_LOCALE_CTYPE
- curctype = savepv(setlocale(LC_CTYPE, Nullch));
+ Safefree(curctype);
+ curctype = savepv(setlocale(LC_CTYPE, NULL));
#endif /* USE_LOCALE_CTYPE */
#ifdef USE_LOCALE_COLLATE
- curcoll = savepv(setlocale(LC_COLLATE, Nullch));
+ Safefree(curcoll);
+ curcoll = savepv(setlocale(LC_COLLATE, NULL));
#endif /* USE_LOCALE_COLLATE */
#ifdef USE_LOCALE_NUMERIC
- curnum = savepv(setlocale(LC_NUMERIC, Nullch));
+ Safefree(curnum);
+ curnum = savepv(setlocale(LC_NUMERIC, NULL));
#endif /* USE_LOCALE_NUMERIC */
}
else {
#endif /* USE_LOCALE */
+#ifdef USE_PERLIO
{
- bool wantutf8 = FALSE;
+ /* Set PL_utf8locale to TRUE if using PerlIO _and_
+ any of the following are true:
+ - nl_langinfo(CODESET) contains /^utf-?8/i
+ - $ENV{LC_ALL} contains /^utf-?8/i
+ - $ENV{LC_CTYPE} contains /^utf-?8/i
+ - $ENV{LANG} contains /^utf-?8/i
+ The LC_ALL, LC_CTYPE, LANG obey the usual override
+ hierarchy of locale environment variables. (LANGUAGE
+ affects only LC_MESSAGES only under glibc.) (If present,
+ it overrides LC_MESSAGES for GNU gettext, and it also
+ can have more than one locale, separated by spaces,
+ in case you need to know.)
+ If PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE})
+ are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer
+ on STDIN, STDOUT, STDERR, _and_ the default open discipline.
+ */
+ bool utf8locale = FALSE;
char *codeset = NULL;
#if defined(HAS_NL_LANGINFO) && defined(CODESET)
codeset = nl_langinfo(CODESET);
#endif
- if (codeset &&
- (ibcmp(codeset, "UTF-8", 5) == 0 ||
- ibcmp(codeset, "UTF8", 4) == 0))
- wantutf8 = TRUE;
+ if (codeset)
+ utf8locale = (Perl_ibcmp(aTHX_ codeset, STR_WITH_LEN("UTF-8")) == 0 ||
+ Perl_ibcmp(aTHX_ codeset, STR_WITH_LEN("UTF8") ) == 0);
#if defined(USE_LOCALE)
-#ifdef __GLIBC__
- if (!wantutf8 && language &&
- (ibcmp(language, "UTF-8", 5) == 0 ||
- ibcmp(language, "UTF8", 4) == 0))
- wantutf8 = TRUE;
-#endif
- if (!wantutf8 && lc_all &&
- (ibcmp(lc_all, "UTF-8", 5) == 0 ||
- ibcmp(lc_all, "UTF8", 4) == 0))
- wantutf8 = TRUE;
+ else { /* nl_langinfo(CODESET) is supposed to correctly
+ * interpret the locale environment variables,
+ * but just in case it fails, let's do this manually. */
+ if (lang)
+ utf8locale = (Perl_ibcmp(aTHX_ lang, STR_WITH_LEN("UTF-8")) == 0 ||
+ Perl_ibcmp(aTHX_ lang, STR_WITH_LEN("UTF8") ) == 0);
#ifdef USE_LOCALE_CTYPE
- if (!wantutf8 && curctype &&
- (ibcmp(curctype, "UTF-8", 5) == 0 ||
- ibcmp(curctype, "UTF8", 4) == 0))
- wantutf8 = TRUE;
+ if (curctype)
+ utf8locale = (Perl_ibcmp(aTHX_ curctype, STR_WITH_LEN("UTF-8")) == 0 ||
+ Perl_ibcmp(aTHX_ curctype, STR_WITH_LEN("UTF8") ) == 0);
#endif
- if (!wantutf8 && lang &&
- (ibcmp(lang, "UTF-8", 5) == 0 ||
- ibcmp(lang, "UTF8", 4) == 0))
- wantutf8 = TRUE;
+ if (lc_all)
+ utf8locale = (Perl_ibcmp(aTHX_ lc_all, STR_WITH_LEN("UTF-8")) == 0 ||
+ Perl_ibcmp(aTHX_ lc_all, STR_WITH_LEN("UTF8") ) == 0);
+ }
#endif /* USE_LOCALE */
- if (wantutf8)
- PL_wantutf8 = TRUE;
+ if (utf8locale)
+ PL_utf8locale = TRUE;
+ }
+ /* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO.
+ This is an alternative to using the -C command line switch
+ (the -C if present will override this). */
+ {
+ const char *p = PerlEnv_getenv("PERL_UNICODE");
+ PL_unicode = p ? parse_unicode_opts(&p) : 0;
+ if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG)
+ PL_utf8cache = -1;
}
+#endif
#ifdef USE_LOCALE_CTYPE
- if (curctype != NULL)
- Safefree(curctype);
+ Safefree(curctype);
#endif /* USE_LOCALE_CTYPE */
#ifdef USE_LOCALE_COLLATE
- if (curcoll != NULL)
- Safefree(curcoll);
+ Safefree(curcoll);
#endif /* USE_LOCALE_COLLATE */
#ifdef USE_LOCALE_NUMERIC
- if (curnum != NULL)
- Safefree(curnum);
+ Safefree(curnum);
#endif /* USE_LOCALE_NUMERIC */
return ok;
}
-/* Backwards compatibility. */
-int
-Perl_init_i18nl14n(pTHX_ int printwarn)
-{
- return init_i18nl10n(printwarn);
-}
-
#ifdef USE_LOCALE_COLLATE
/*
* The real transformed data begins at offset sizeof(collationix).
* Please see sv_collxfrm() to see how this is used.
*/
+
char *
Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen)
{
+ dVAR;
char *xbuf;
STRLEN xAlloc, xin, xout; /* xalloc is a reserved word in VC */
+ PERL_ARGS_ASSERT_MEM_COLLXFRM;
+
/* the first sizeof(collationix) bytes are used by sv_collxfrm(). */
/* the +1 is for the terminating NUL. */
xAlloc = sizeof(PL_collation_ix) + PL_collxfrm_base + (PL_collxfrm_mult * len) + 1;
- New(171, xbuf, xAlloc, char);
+ Newx(xbuf, xAlloc, char);
if (! xbuf)
goto bad;
*(U32*)xbuf = PL_collation_ix;
xout = sizeof(PL_collation_ix);
for (xin = 0; xin < len; ) {
- SSize_t xused;
+ Size_t xused;
for (;;) {
xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout);
- if (xused == -1)
+ if (xused >= PERL_INT_MAX)
goto bad;
if ((STRLEN)xused < xAlloc - xout)
break;
#endif /* USE_LOCALE_COLLATE */
+/*
+ * Local variables:
+ * c-indentation-style: bsd
+ * c-basic-offset: 4
+ * indent-tabs-mode: t
+ * End:
+ *
+ * ex: set ts=8 sts=4 sw=4 noet:
+ */