#define PL_utf8_toupper (vTHX->Iutf8_toupper)
#define PL_utf8_upper (vTHX->Iutf8_upper)
#define PL_utf8_xdigit (vTHX->Iutf8_xdigit)
+#define PL_utf8locale (vTHX->Iutf8locale)
#define PL_uudmap (vTHX->Iuudmap)
#define PL_wantutf8 (vTHX->Iwantutf8)
#define PL_warnhook (vTHX->Iwarnhook)
-#define PL_widesyscalls (vTHX->Iwidesyscalls)
#define PL_xiv_arenaroot (vTHX->Ixiv_arenaroot)
#define PL_xiv_root (vTHX->Ixiv_root)
#define PL_xnv_arenaroot (vTHX->Ixnv_arenaroot)
#define PL_Iutf8_toupper PL_utf8_toupper
#define PL_Iutf8_upper PL_utf8_upper
#define PL_Iutf8_xdigit PL_utf8_xdigit
+#define PL_Iutf8locale PL_utf8locale
#define PL_Iuudmap PL_uudmap
#define PL_Iwantutf8 PL_wantutf8
#define PL_Iwarnhook PL_warnhook
-#define PL_Iwidesyscalls PL_widesyscalls
#define PL_Ixiv_arenaroot PL_xiv_arenaroot
#define PL_Ixiv_root PL_xiv_root
#define PL_Ixnv_arenaroot PL_xnv_arenaroot
goto ro_magicalize;
else
break;
+ case '\025':
+ if (len > 1 && strNE(name, "\025TF8_LOCALE"))
+ break;
+ goto ro_magicalize;
+
case '\027': /* $^W & $^WARNING_BITS */
- if (len > 1 && strNE(name, "\027ARNING_BITS")
- && strNE(name, "\027IDE_SYSTEM_CALLS"))
+ if (len > 1
+ && strNE(name, "\027ARNING_BITS")
+ )
break;
goto magicalize;
goto yes;
}
break;
+ case '\025':
+ if (len > 1 && strEQ(name, "\025TF8_LOCALE"))
+ goto yes;
case '\027': /* $^W & $^WARNING_BITS */
if (len == 1
|| (len == 12 && strEQ(name, "\027ARNING_BITS"))
- || (len == 17 && strEQ(name, "\027IDE_SYSTEM_CALLS")))
+ )
{
goto yes;
}
*/
PERLVAR(Idowarn, U8)
-PERLVAR(Iwidesyscalls, bool) /* wide system calls */
+PERLVAR(Iutf8locale, bool) /* utf8 locale detected */
PERLVAR(Idoextract, bool)
PERLVAR(Isawampersand, bool) /* must save all match strings */
PERLVAR(Iunsafe, bool)
#ifdef USE_PERLIO
{
- /* Set PL_wantutf8 to TRUE if using PerlIO _and_
+ /* Set PL_utf8locale to TRUE if using PerlIO _and_
any of the following are true:
- nl_langinfo(CODESET) contains /^utf-?8/i
- $ENV{LC_ALL} contains /^utf-?8/i
it overrides LC_MESSAGES for GNU gettext, and it also
can have more than one locale, separated by spaces,
in case you need to know.)
- If PL_wantutf8 is true, perl.c:S_parse_body()
- will turn on the PerlIO :utf8 discipline on STDIN, STDOUT,
- STDERR, _and_ the default open discipline.
+ If PL_utf8locale and PL_wantutf8 (set by -C) are true,
+ perl.c:S_parse_body() will turn on the PerlIO :utf8 layer
+ on STDIN, STDOUT, STDERR, _and_ the default open discipline.
*/
- bool wantutf8 = FALSE;
+ bool utf8locale = FALSE;
char *codeset = NULL;
#if defined(HAS_NL_LANGINFO) && defined(CODESET)
codeset = nl_langinfo(CODESET);
#endif
if (codeset)
- wantutf8 = (ibcmp(codeset, "UTF-8", 5) == 0 ||
- ibcmp(codeset, "UTF8", 4) == 0);
+ utf8locale = (ibcmp(codeset, "UTF-8", 5) == 0 ||
+ ibcmp(codeset, "UTF8", 4) == 0);
#if defined(USE_LOCALE)
else { /* nl_langinfo(CODESET) is supposed to correctly
* interpret the locale environment variables,
* but just in case it fails, let's do this manually. */
if (lang)
- wantutf8 = (ibcmp(lang, "UTF-8", 5) == 0 ||
- ibcmp(lang, "UTF8", 4) == 0);
+ utf8locale = (ibcmp(lang, "UTF-8", 5) == 0 ||
+ ibcmp(lang, "UTF8", 4) == 0);
#ifdef USE_LOCALE_CTYPE
if (curctype)
- wantutf8 = (ibcmp(curctype, "UTF-8", 5) == 0 ||
- ibcmp(curctype, "UTF8", 4) == 0);
+ utf8locale = (ibcmp(curctype, "UTF-8", 5) == 0 ||
+ ibcmp(curctype, "UTF8", 4) == 0);
#endif
if (lc_all)
- wantutf8 = (ibcmp(lc_all, "UTF-8", 5) == 0 ||
- ibcmp(lc_all, "UTF8", 4) == 0);
-#endif /* USE_LOCALE */
+ utf8locale = (ibcmp(lc_all, "UTF-8", 5) == 0 ||
+ ibcmp(lc_all, "UTF8", 4) == 0);
}
- if (wantutf8)
- PL_wantutf8 = TRUE;
+#endif /* USE_LOCALE */
+ if (utf8locale)
+ PL_utf8locale = TRUE;
+ }
+ /* Set PL_wantutf8 to $ENV{PERL_UTF8_LOCALE} if using PerlIO.
+ This is an alternative to using the -C command line switch
+ (the -C if present will override this). */
+ {
+ char *p = PerlEnv_getenv("PERL_UTF8_LOCALE");
+ PL_wantutf8 = p ? (bool) atoi(p) : FALSE;
}
#endif
? (PL_taint_warn || PL_unsafe ? -1 : 1)
: 0);
break;
- case '\027': /* ^W & $^WARNING_BITS & ^WIDE_SYSTEM_CALLS */
+ case '\025': /* $^UTF8_LOCALE */
+ if (strEQ(mg->mg_ptr, "\025TF8_LOCALE"))
+ sv_setiv(sv, (IV) (PL_wantutf8 && PL_utf8locale));
+ break;
+ case '\027': /* ^W & $^WARNING_BITS */
if (*(mg->mg_ptr+1) == '\0')
sv_setiv(sv, (IV)((PL_dowarn & G_WARN_ON) ? TRUE : FALSE));
else if (strEQ(mg->mg_ptr+1, "ARNING_BITS")) {
}
SvPOK_only(sv);
}
- else if (strEQ(mg->mg_ptr+1, "IDE_SYSTEM_CALLS"))
- sv_setiv(sv, (IV)PL_widesyscalls);
break;
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '&':
PL_basetime = (Time_t)(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
#endif
break;
- case '\027': /* ^W & $^WARNING_BITS & ^WIDE_SYSTEM_CALLS */
+ case '\025': /* $^UTF8_LOCALE */
+ if (SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv))
+ PL_wantutf8 = PL_utf8locale;
+ else
+ PL_wantutf8 = FALSE;
+ break;
+ case '\027': /* ^W & $^WARNING_BITS */
if (*(mg->mg_ptr+1) == '\0') {
if ( ! (PL_dowarn & G_WARN_ALL_MASK)) {
i = SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv);
}
}
}
- else if (strEQ(mg->mg_ptr+1, "IDE_SYSTEM_CALLS"))
- PL_widesyscalls = (bool)SvTRUE(sv);
break;
case '.':
if (PL_localizing) {
if (!PL_do_undump)
init_postdump_symbols(argc,argv,env);
- /* PL_wantutf8 is conditionally turned on by
+ /* PL_utf8locale is conditionally turned on by
* locale.c:Perl_init_i18nl10n() if the environment
- * look like the user wants to use UTF-8. */
- if (PL_wantutf8) { /* Requires init_predump_symbols(). */
+ * look like the user wants to use UTF-8.
+ * PL_wantutf8 is turned on by -C or by $ENV{PERL_UTF8_LOCALE}. */
+ if (PL_utf8locale && PL_wantutf8) { /* Requires init_predump_symbols(). */
IO* io;
PerlIO* fp;
SV* sv;
return s + numlen;
}
case 'C':
- PL_widesyscalls = TRUE;
+ PL_wantutf8 = TRUE; /* Can be set earlier by $ENV{PERL_UTF8_LOCALE}. */
s++;
return s;
case 'F':
for (; argc > 0; argc--,argv++) {
SV *sv = newSVpv(argv[0],0);
av_push(GvAVn(PL_argvgv),sv);
- if (PL_widesyscalls)
+ if (PL_wantutf8)
(void)sv_utf8_decode(sv);
}
}
#define PL_utf8_upper (*Perl_Iutf8_upper_ptr(aTHX))
#undef PL_utf8_xdigit
#define PL_utf8_xdigit (*Perl_Iutf8_xdigit_ptr(aTHX))
+#undef PL_utf8locale
+#define PL_utf8locale (*Perl_Iutf8locale_ptr(aTHX))
#undef PL_uudmap
#define PL_uudmap (*Perl_Iuudmap_ptr(aTHX))
#undef PL_wantutf8
#define PL_wantutf8 (*Perl_Iwantutf8_ptr(aTHX))
#undef PL_warnhook
#define PL_warnhook (*Perl_Iwarnhook_ptr(aTHX))
-#undef PL_widesyscalls
-#define PL_widesyscalls (*Perl_Iwidesyscalls_ptr(aTHX))
#undef PL_xiv_arenaroot
#define PL_xiv_arenaroot (*Perl_Ixiv_arenaroot_ptr(aTHX))
#undef PL_xiv_root
=item B<-C>
-enables Perl to use the native wide character APIs on the target system.
-The magic variable C<${^WIDE_SYSTEM_CALLS}> reflects the state of
-this switch. See L<perlvar/"${^WIDE_SYSTEM_CALLS}">.
-
-This feature is currently only implemented on the Win32 platform.
+enables Perl to use the Unicode APIs on the target system.
+
+As of Perl 5.8.1, if C<-C> is used and the locale settings (the LC_ALL,
+LC_CTYPE, and LANG environment variables) indicate a UTF-8 locale,
+the STDIN is expected to be in UTF-8, the STDOUT and STDERR are
+expected to be in UTF-8, and C<:utf8> is the default file open layer.
+See L<perluniintro>, L<perlfunc/open>, and L<open> for more information.
+The magic variable C<${^UTF8_LOCALE}> reflects this state,
+see L<perlvar/"${^UTF8_LOCALE}">. (Another way of setting this
+variable is to set the environment variable PERL_UTF8_LOCALE.)
+
+(In Perls earlier than 5.8.1 the C<-C> switch was a Win32-only switch
+that enabled the use of Unicode-aware "wide system call" Win32 APIs.
+This feature was practically unused, however, and the command line
+switch was therefore "recycled".)
=item B<-c>
external programs, from information provided by the system (such as %ENV),
or from literals and constants in the source text.
-On Windows platforms, if the C<-C> command line switch is used or the
-${^WIDE_SYSTEM_CALLS} global flag is set to C<1>, all system calls
-will use the corresponding wide-character APIs. This feature is
-available only on Windows to conform to the API standard already
-established for that platform--and there are very few non-Windows
-platforms that have Unicode-aware APIs.
-
The C<bytes> pragma will always, regardless of platform, force byte
semantics in a particular lexical scope. See L<bytes>.
=item *
-If your locale environment variables (LANGUAGE, LC_ALL, LC_CTYPE, LANG)
-contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
-the default encodings of your STDIN, STDOUT, and STDERR, and of
-B<any subsequent file open>, are considered to be UTF-8.
+If your locale environment variables (LC_ALL, LC_CTYPE, LANG)
+contain the strings 'UTF-8' or 'UTF8' (matched case-insensitively)
+B<and> you enable using UTF-8 either by using the C<-C> command line
+switch or setting the PERL_UTF8_LOCALE environment variable to a true
+value, then the default encodings of your STDIN, STDOUT, and STDERR,
+and of B<any subsequent file open>, are considered to be UTF-8.
+See L<perluniintro>, L<perlfunc/open>, and L<open> for more
+information. The magic variable C<${^UTF8_LOCALE}> will also be set.
=item *
=head1 SEE ALSO
L<perluniintro>, L<encoding>, L<Encode>, L<open>, L<utf8>, L<bytes>,
-L<perlretut>, L<perlvar/"${^WIDE_SYSTEM_CALLS}">
+L<perlretut>, L<perlvar/"${^UTF8_LOCALE}">
=cut
to this sample program ensures that the output is completely UTF-8,
and removes the program's warning.
-If your locale environment variables (C<LANGUAGE>, C<LC_ALL>,
-C<LC_CTYPE>, C<LANG>) contain the strings 'UTF-8' or 'UTF8',
-regardless of case, then the default encoding of your STDIN, STDOUT,
-and STDERR and of B<any subsequent file open>, is UTF-8. Note that
-this means that Perl expects other software to work, too: if Perl has
-been led to believe that STDIN should be UTF-8, but then STDIN coming
-in from another command is not UTF-8, Perl will complain about the
+If your locale environment variables (C<LC_ALL>, C<LC_CTYPE>, C<LANG>)
+contain the strings 'UTF-8' or 'UTF8' (matched case-insensitively)
+B<and> you enable using UTF-8 either by using the C<-C> command line
+switch or by setting the PERL_UTF8_LOCALE environment variable to
+a true value, then the default encoding of your STDIN, STDOUT, and
+STDERR, and of B<any subsequent file open>, is UTF-8. Note that this
+means that Perl expects other software to work, too: if Perl has been
+led to believe that STDIN should be UTF-8, but then STDIN coming in
+from another command is not UTF-8, Perl will complain about the
malformed UTF-8.
All features that combine Unicode and I/O also require using the new
B<-T>), 0 for off, -1 when only taint warnings are enabled (i.e. with
B<-t> or B<-TU>). This variable is read-only.
+=item ${^UTF8_LOCALE}
+
+Reflects whether the locale settings indicated the use of UTF-8 and that
+the use of UTF-8 was enabled either by the C<-C> command line switch or
+by setting the PERL_UTF8_LOCALE environment variable to a true value.
+This variable is read-only. If true, the STDIN is expected to be in
+UTF-8, the STDOUT and STDERR are in UTF-8, and C<:utf8> is the default
+file open layer. See L<perluniintro>, L<perlfunc/open>, and L<open>
+for more information.
+
=item $PERL_VERSION
=item $^V
The current set of warning checks enabled by the C<use warnings> pragma.
See the documentation of C<warnings> for more details.
-=item ${^WIDE_SYSTEM_CALLS}
-
-Global flag that enables system calls made by Perl to use wide character
-APIs native to the system, if available. This is currently only implemented
-on the Windows platform.
-
-This can also be enabled from the command line using the C<-C> switch.
-
-The initial value is typically C<0> for compatibility with Perl versions
-earlier than 5.6, but may be automatically set to C<1> by Perl if the system
-provides a user-settable default (e.g., C<$ENV{LC_CTYPE}>).
-
-The C<bytes> pragma always overrides the effect of this flag in the current
-lexical scope. See L<bytes>.
-
=item $EXECUTABLE_NAME
=item $^X