-C (or PERL_UNICODE). See perlrun/-C for more details.
p4raw-id: //depot/perl@18715
p |void |vivify_defelem |SV* sv
p |void |vivify_ref |SV* sv|U32 to_what
p |I32 |wait4pid |Pid_t pid|int* statusp|int flags
+p |U32 |parse_unicode_opts|char **popt
p |void |report_evil_fh |GV *gv|IO *io|I32 op
pd |void |report_uninit
Afpd |void |warn |const char* pat|...
#define PL_tainting (vTHX->Itainting)
#define PL_tokenbuf (vTHX->Itokenbuf)
#define PL_uid (vTHX->Iuid)
+#define PL_unicode (vTHX->Iunicode)
#define PL_unsafe (vTHX->Iunsafe)
#define PL_utf8_alnum (vTHX->Iutf8_alnum)
#define PL_utf8_alnumc (vTHX->Iutf8_alnumc)
#define PL_utf8_xdigit (vTHX->Iutf8_xdigit)
#define PL_utf8locale (vTHX->Iutf8locale)
#define PL_uudmap (vTHX->Iuudmap)
-#define PL_wantutf8 (vTHX->Iwantutf8)
#define PL_warnhook (vTHX->Iwarnhook)
+#define PL_widesyscalls (vTHX->Iwidesyscalls)
#define PL_xiv_arenaroot (vTHX->Ixiv_arenaroot)
#define PL_xiv_root (vTHX->Ixiv_root)
#define PL_xnv_arenaroot (vTHX->Ixnv_arenaroot)
#define PL_Itainting PL_tainting
#define PL_Itokenbuf PL_tokenbuf
#define PL_Iuid PL_uid
+#define PL_Iunicode PL_unicode
#define PL_Iunsafe PL_unsafe
#define PL_Iutf8_alnum PL_utf8_alnum
#define PL_Iutf8_alnumc PL_utf8_alnumc
#define PL_Iutf8_xdigit PL_utf8_xdigit
#define PL_Iutf8locale PL_utf8locale
#define PL_Iuudmap PL_uudmap
-#define PL_Iwantutf8 PL_wantutf8
#define PL_Iwarnhook PL_warnhook
+#define PL_Iwidesyscalls PL_widesyscalls
#define PL_Ixiv_arenaroot PL_xiv_arenaroot
#define PL_Ixiv_root PL_xiv_root
#define PL_Ixnv_arenaroot PL_xnv_arenaroot
else
break;
case '\025':
- if (len > 1 && strNE(name, "\025TF8_LOCALE"))
+ if (len > 1 && strNE(name, "\025NICODE"))
break;
goto ro_magicalize;
}
break;
case '\025':
- if (len > 1 && strEQ(name, "\025TF8_LOCALE"))
+ if (len > 1 && strEQ(name, "\025NICODE"))
goto yes;
case '\027': /* $^W & $^WARNING_BITS */
if (len == 1
*/
PERLVAR(Idowarn, U8)
-PERLVAR(Iutf8locale, bool) /* utf8 locale detected */
+PERLVAR(Iwidesyscalls, bool) /* unused since 5.8.1 */
PERLVAR(Idoextract, bool)
PERLVAR(Isawampersand, bool) /* must save all match strings */
PERLVAR(Iunsafe, bool)
PERLVAR(IOpSlab,I32 *)
#endif
-PERLVAR(Iwantutf8, bool) /* want utf8 as the default discipline */
+PERLVAR(Iutf8locale, bool) /* utf8 locale detected */
PERLVAR(Iutf8_idstart, SV *)
PERLVAR(Iutf8_idcont, SV *)
PERLVARI(Iin_load_module, int, 0) /* to prevent recursions in PerlIO_find_layer */
+PERLVAR(Iunicode, U32) /* Unicode features: $ENV{PERL_UNICODE} or -C */
+
/* New variables must be added to the very end, before this comment,
* for binary compatibility (the offsets of the old members must not change).
* XSUB.h provides wrapper functions via perlapi.h that make this
it overrides LC_MESSAGES for GNU gettext, and it also
can have more than one locale, separated by spaces,
in case you need to know.)
- If PL_utf8locale and PL_wantutf8 (set by -C) are true,
- perl.c:S_parse_body() will turn on the PerlIO :utf8 layer
+ If PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE)
+ are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer
on STDIN, STDOUT, STDERR, _and_ the default open discipline.
*/
bool utf8locale = FALSE;
if (utf8locale)
PL_utf8locale = TRUE;
}
- /* Set PL_wantutf8 to $ENV{PERL_UTF8_LOCALE} if using PerlIO.
+ /* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO.
This is an alternative to using the -C command line switch
(the -C if present will override this). */
{
- char *p = PerlEnv_getenv("PERL_UTF8_LOCALE");
- PL_wantutf8 = p ? (bool) atoi(p) : FALSE;
+ char *p = PerlEnv_getenv("PERL_UNICODE");
+ PL_unicode = p ? parse_unicode_opts(&p) : 0;
}
#endif
? (PL_taint_warn || PL_unsafe ? -1 : 1)
: 0);
break;
- case '\025': /* $^UTF8_LOCALE */
- if (strEQ(mg->mg_ptr, "\025TF8_LOCALE"))
- sv_setiv(sv, (IV) (PL_wantutf8 && PL_utf8locale));
+ case '\025': /* $^UNICODE */
+ if (strEQ(mg->mg_ptr, "\025NICODE"))
+ sv_setuv(sv, (UV) PL_unicode);
break;
case '\027': /* ^W & $^WARNING_BITS */
if (*(mg->mg_ptr+1) == '\0')
PL_basetime = (Time_t)(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
#endif
break;
- case '\025': /* $^UTF8_LOCALE */
- if (SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv))
- PL_wantutf8 = PL_utf8locale;
- else
- PL_wantutf8 = FALSE;
- break;
case '\027': /* ^W & $^WARNING_BITS */
if (*(mg->mg_ptr+1) == '\0') {
if ( ! (PL_dowarn & G_WARN_ALL_MASK)) {
if (!PL_do_undump)
init_postdump_symbols(argc,argv,env);
- /* PL_utf8locale is conditionally turned on by
+ /* PL_unicode is turned on by -C or by $ENV{PERL_UNICODE}.
+ * PL_utf8locale is conditionally turned on by
* locale.c:Perl_init_i18nl10n() if the environment
- * look like the user wants to use UTF-8.
- * PL_wantutf8 is turned on by -C or by $ENV{PERL_UTF8_LOCALE}. */
- if (PL_utf8locale && PL_wantutf8) { /* Requires init_predump_symbols(). */
+ * look like the user wants to use UTF-8. */
+ if (PL_unicode) { /* Requires init_predump_symbols(). */
IO* io;
PerlIO* fp;
SV* sv;
- /* Turn on UTF-8-ness on STDIN, STDOUT, STDERR
- * _and_ the default open discipline. */
- if (PL_stdingv && (io = GvIO(PL_stdingv)) && (fp = IoIFP(io)))
- PerlIO_binmode(aTHX_ fp, IoTYPE(io), 0, ":utf8");
- if (PL_defoutgv && (io = GvIO(PL_defoutgv)) && (fp = IoOFP(io)))
- PerlIO_binmode(aTHX_ fp, IoTYPE(io), 0, ":utf8");
- if (PL_stderrgv && (io = GvIO(PL_stderrgv)) && (fp = IoOFP(io)))
- PerlIO_binmode(aTHX_ fp, IoTYPE(io), 0, ":utf8");
- if ((sv = GvSV(gv_fetchpv("\017PEN", TRUE, SVt_PV)))) {
- sv_setpvn(sv, ":utf8\0:utf8", 11);
- SvSETMAGIC(sv);
+
+ if (!(PL_unicode & PERL_UNICODE_LOCALE_FLAG) || PL_utf8locale) {
+ /* Turn on UTF-8-ness on STDIN, STDOUT, STDERR
+ * and the default open discipline. */
+ if ((PL_unicode & PERL_UNICODE_STDIN_FLAG) &&
+ PL_stdingv && (io = GvIO(PL_stdingv)) &&
+ (fp = IoIFP(io)))
+ PerlIO_binmode(aTHX_ fp, IoTYPE(io), 0, ":utf8");
+ if ((PL_unicode & PERL_UNICODE_STDOUT_FLAG) &&
+ PL_defoutgv && (io = GvIO(PL_defoutgv)) &&
+ (fp = IoOFP(io)))
+ PerlIO_binmode(aTHX_ fp, IoTYPE(io), 0, ":utf8");
+ if ((PL_unicode & PERL_UNICODE_STDERR_FLAG) &&
+ PL_stderrgv && (io = GvIO(PL_stderrgv)) &&
+ (fp = IoOFP(io)))
+ PerlIO_binmode(aTHX_ fp, IoTYPE(io), 0, ":utf8");
+ if ((PL_unicode & PERL_UNICODE_INOUT_FLAG) &&
+ (sv = GvSV(gv_fetchpv("\017PEN", TRUE, SVt_PV)))) {
+ U32 in = PL_unicode & PERL_UNICODE_IN_FLAG;
+ U32 out = PL_unicode & PERL_UNICODE_OUT_FLAG;
+ if (in) {
+ if (out)
+ sv_setpvn(sv, ":utf8\0:utf8", 11);
+ else
+ sv_setpvn(sv, ":utf8\0", 6);
+ }
+ else if (out)
+ sv_setpvn(sv, "\0:utf8", 6);
+ SvSETMAGIC(sv);
+ }
}
}
return s + numlen;
}
case 'C':
- PL_wantutf8 = TRUE; /* Can be set earlier by $ENV{PERL_UTF8_LOCALE}. */
- s++;
- if (*s == ':') {
- PL_wantutf8 = (bool) atoi(s + 1);
- for (s++; isDIGIT(*s); s++) ;
- }
+ s++;
+ PL_unicode = parse_unicode_opts(&s);
return s;
case 'F':
PL_minus_F = TRUE;
for (; argc > 0; argc--,argv++) {
SV *sv = newSVpv(argv[0],0);
av_push(GvAVn(PL_argvgv),sv);
- if (PL_wantutf8)
- (void)sv_utf8_decode(sv);
+ if (PL_unicode & PERL_UNICODE_ARGV_FLAG)
+ SvUTF8_on(sv);
+ if (PL_unicode & PERL_UNICODE_WIDESYSCALLS_FLAG) /* Sarathy? */
+ (void)sv_utf8_decode(sv);
}
}
}
#define PERL_MAGIC_UTF8_CACHESIZE 2
+#define PERL_UNICODE_STDIN_FLAG 0x0001
+#define PERL_UNICODE_STDOUT_FLAG 0x0002
+#define PERL_UNICODE_STDERR_FLAG 0x0004
+#define PERL_UNICODE_STD_FLAG 0x0007
+#define PERL_UNICODE_IN_FLAG 0x0008
+#define PERL_UNICODE_OUT_FLAG 0x0010
+#define PERL_UNICODE_INOUT_FLAG 0x0018
+#define PERL_UNICODE_ARGV_FLAG 0x0020 /* For @ARGV? */
+#define PERL_UNICODE_LOCALE_FLAG 0x0040
+#define PERL_UNICODE_WIDESYSCALLS_FLAG 0x0080 /* for Sarathy */
+
+#define PERL_UNICODE_DEFAULT_FLAGS \
+ (PERL_UNICODE_STD_FLAG | \
+ PERL_UNICODE_INOUT_FLAG | \
+ PERL_UNICODE_LOCALE_FLAG)
+
+#define PERL_UNICODE_ALL_FLAGS 0x00ff
+
+#define PERL_UNICODE_STDIN 'I'
+#define PERL_UNICODE_STDOUT 'O'
+#define PERL_UNICODE_STDERR 'E'
+#define PERL_UNICODE_STD 'S'
+#define PERL_UNICODE_IN 'i'
+#define PERL_UNICODE_OUT 'o'
+#define PERL_UNICODE_INOUT 'D'
+#define PERL_UNICODE_ARGV 'A'
+#define PERL_UNICODE_LOCALE 'L'
+#define PERL_UNICODE_WIDESYSCALLS 'W'
+
/* and finally... */
#define PERL_PATCHLEVEL_H_IMPLICIT
#include "patchlevel.h"
#define PL_tokenbuf (*Perl_Itokenbuf_ptr(aTHX))
#undef PL_uid
#define PL_uid (*Perl_Iuid_ptr(aTHX))
+#undef PL_unicode
+#define PL_unicode (*Perl_Iunicode_ptr(aTHX))
#undef PL_unsafe
#define PL_unsafe (*Perl_Iunsafe_ptr(aTHX))
#undef PL_utf8_alnum
#define PL_utf8locale (*Perl_Iutf8locale_ptr(aTHX))
#undef PL_uudmap
#define PL_uudmap (*Perl_Iuudmap_ptr(aTHX))
-#undef PL_wantutf8
-#define PL_wantutf8 (*Perl_Iwantutf8_ptr(aTHX))
#undef PL_warnhook
#define PL_warnhook (*Perl_Iwarnhook_ptr(aTHX))
+#undef PL_widesyscalls
+#define PL_widesyscalls (*Perl_Iwidesyscalls_ptr(aTHX))
#undef PL_xiv_arenaroot
#define PL_xiv_arenaroot (*Perl_Ixiv_arenaroot_ptr(aTHX))
#undef PL_xiv_root
data Perl expected. Someone's very confused, or perhaps trying to
subvert Perl's population of %ENV for nefarious purposes.
+=item Unknown Unicode option letter '%c'
+
+You specified an unknown Unicode option. See L<perlrun> documentation
+of the C<-C> switch for the list of known options.
+
+=item Unknown Unicode option value %x
+
+You specified an unknown Unicode option. See L<perlrun> documentation
+of the C<-C> switch for the list of known options.
+
=item Unknown warnings category '%s'
(F) An error issued by the C<warnings> pragma. You specified a warnings
S<[ B<-x>[I<dir>] ]>
S<[ B<-i>[I<extension>] ]>
S<[ B<-e> I<'command'> ] [ B<--> ] [ I<programfile> ] [ I<argument> ]...>
- S<[ B<-C[:I<boolean>]> ]>
+ S<[ B<-C [I<number/list>] >]> ]>
=head1 DESCRIPTION
An alternate delimiter may be specified using B<-F>.
-=item B<-C[:boolean]>
+=item B<-C [I<number/list>]>
-enables Perl to use the Unicode APIs on the target system. A bare C<-C>
-enables, C<-C:1> also enables, and C<-C:0> disables.
+The C<-C> flag controls some Unicode of the Perl Unicode features.
+
+As of 5.8.1, the C<-C> can be followed either by a number or a list
+of option letters.
+
+ I 0x0001 STDIN is assumed to be in UTF-8
+ O 0x0002 STDOUT will be in UTF-8
+ E 0x0004 STDERR will be in UTF-8
+ S 0x0007 I + O + E
+ i 0x0008 the default input layer expects UTF-8
+ o 0x0010 the default output layer enforces UTF-8
+ D 0x0018 i + o
+ A 0x0020 the @ARGV elements are supposed to be in UTF-8
+ L 0x0040 normally the IOEio (SD) are unconditional,
+ the L makes them conditional on the locale environment
+ variables (the LC_ALL, LC_TYPE, and LANG; in the order
+ of decreasing precedence)
+
+The C<-C> on its own (not followed by any number or option list) has
+the same effect as <-CSDL>. In other words, the standard I/O handles
+and the default C<open()> layer are UTF-8-fied B<but> only if the locale
+environment variables indicate a UTF-8 locale. This behavior follows
+the I<implicit> behaviour of Perl 5.8.0.
+
+You can use C<-C0> to explicitly disable all the above Unicode features.
-As of Perl 5.8.1, if C<-C> is used and the locale settings (the LC_ALL,
-LC_CTYPE, and LANG environment variables) indicate a UTF-8 locale,
-the STDIN is expected to be in UTF-8, the STDOUT and STDERR are
-expected to be in UTF-8, and C<:utf8> is the default file open layer.
See L<perluniintro>, L<perlfunc/open>, and L<open> for more information.
-The magic variable C<${^UTF8_LOCALE}> reflects this state,
-see L<perlvar/"${^UTF8_LOCALE}">. (Another way of setting this
-variable is to set the environment variable PERL_UTF8_LOCALE.)
+
+The magic variable C<${^UNICODE}> reflects the state of this setting,
+see L<perlvar/"${^UNICODE}">. (Another way of setting this variable
+is to set the environment variable PERL_UNICODE.)
(In Perls earlier than 5.8.1 the C<-C> switch was a Win32-only switch
that enabled the use of Unicode-aware "wide system call" Win32 APIs.
SYS$TIMEZONE_DIFFERENTIAL but are optional and discussed further in
L<perlvms> and in F<README.vms> in the Perl source distribution.
-=item PERL_UTF8_LOCALE
+=item PERL_UNICODE
Equivalent to the B<-C> command-line switch.
=item *
-If your locale environment variables (LC_ALL, LC_CTYPE, LANG)
-contain the strings 'UTF-8' or 'UTF8' (matched case-insensitively)
-B<and> you enable using UTF-8 either by using the C<-C> command line
-switch or setting the PERL_UTF8_LOCALE environment variable to a true
-value, then the default encodings of your STDIN, STDOUT, and STDERR,
-and of B<any subsequent file open>, are considered to be UTF-8.
-See L<perluniintro>, L<perlfunc/open>, and L<open> for more
-information. The magic variable C<${^UTF8_LOCALE}> will also be set.
+If your locale environment variables (LC_ALL, LC_CTYPE, LANG) contain
+the strings 'UTF-8' or 'UTF8' (matched case-insensitively) B<and> you
+enable using UTF-8 either by using the C<-C> command line switch or
+setting the PERL_UNICODE environment variable to be, C<"L">, an empty
+string, (see L<perlrun> and the documentation for the C<-C> switch for
+more information about the possible values), then the default
+encodings of your STDIN, STDOUT, and STDERR, and of B<any subsequent
+file open>, are considered to be UTF-8. See L<perluniintro>,
+L<perlfunc/open>, and L<open> for more information. The magic
+variable C<${^UNICODE}> will also be set.
=item *
=head1 SEE ALSO
L<perluniintro>, L<encoding>, L<Encode>, L<open>, L<utf8>, L<bytes>,
-L<perlretut>, L<perlvar/"${^UTF8_LOCALE}">
+L<perlretut>, L<perlvar/"${^UNICODE}">
=cut
If your locale environment variables (C<LC_ALL>, C<LC_CTYPE>, C<LANG>)
contain the strings 'UTF-8' or 'UTF8' (matched case-insensitively)
B<and> you enable using UTF-8 either by using the C<-C> command line
-switch or by setting the PERL_UTF8_LOCALE environment variable to
-a true value, then the default encoding of your STDIN, STDOUT, and
-STDERR, and of B<any subsequent file open>, is UTF-8. Note that this
-means that Perl expects other software to work, too: if Perl has been
-led to believe that STDIN should be UTF-8, but then STDIN coming in
-from another command is not UTF-8, Perl will complain about the
-malformed UTF-8.
+switch or by setting the PERL_UNICODE environment variable to an empty
+string, C<"">, (see L<perlrun> and the documentation for the C<-C>
+switch for more information about the possible values), then the
+default encoding of your STDIN, STDOUT, and STDERR, and of B<any
+subsequent file open>, will be UTF-8. Note that this means that Perl
+expects other software to work, too: if Perl has been led to believe
+that STDIN should be UTF-8, but then STDIN coming in from another
+command is not UTF-8, Perl will complain about the malformed UTF-8.
All features that combine Unicode and I/O also require using the new
PerlIO feature. Almost all Perl 5.8 platforms do use PerlIO, though:
B<-T>), 0 for off, -1 when only taint warnings are enabled (i.e. with
B<-t> or B<-TU>). This variable is read-only.
-=item ${^UTF8_LOCALE}
-
-Reflects whether the locale settings indicated the use of UTF-8 and that
-the use of UTF-8 was enabled either by the C<-C> command line switch or
-by setting the PERL_UTF8_LOCALE environment variable to a true value.
-This variable is read-only. If true, the STDIN is expected to be in
-UTF-8, the STDOUT and STDERR are in UTF-8, and C<:utf8> is the default
-file open layer. See L<perluniintro>, L<perlfunc/open>, and L<open>
-for more information.
+=item ${^UNICODE}
+
+Reflects certain Unicode settings of Perl. See L<perlrun> for more
+information about the possible values. This variable is set during
+Perl startup and thereafter read-only. See L<perluniintro>,
+L<perlfunc/open>, and L<open> for more information.
=item $PERL_VERSION
{
}
+U32
+Perl_parse_unicode_opts(pTHX_ char **popt)
+{
+ char *p = *popt;
+ U32 opt = 0;
+
+ if (*p) {
+ if (isDIGIT(*p)) {
+ opt = (U32) atoi(p);
+ while (isDIGIT(*p)) p++;
+ if (*p)
+ Perl_croak(aTHX_ "Unknown Unicode option letter '%c'", *p);
+ }
+ else {
+ for (; *p; p++) {
+ switch (*p) {
+ case PERL_UNICODE_STDIN:
+ opt |= PERL_UNICODE_STDIN_FLAG; break;
+ case PERL_UNICODE_STDOUT:
+ opt |= PERL_UNICODE_STDOUT_FLAG; break;
+ case PERL_UNICODE_STDERR:
+ opt |= PERL_UNICODE_STDERR_FLAG; break;
+ case PERL_UNICODE_STD:
+ opt |= PERL_UNICODE_STD_FLAG; break;
+ case PERL_UNICODE_IN:
+ opt |= PERL_UNICODE_IN_FLAG; break;
+ case PERL_UNICODE_OUT:
+ opt |= PERL_UNICODE_OUT_FLAG; break;
+ case PERL_UNICODE_INOUT:
+ opt |= PERL_UNICODE_INOUT_FLAG; break;
+ case PERL_UNICODE_LOCALE:
+ opt |= PERL_UNICODE_LOCALE_FLAG; break;
+ case PERL_UNICODE_ARGV:
+ opt |= PERL_UNICODE_ARGV_FLAG; break;
+ default:
+ Perl_croak(aTHX_
+ "Unknown Unicode option letter '%c'", *p);
+ }
+ }
+ }
+ }
+ else
+ opt = PERL_UNICODE_DEFAULT_FLAGS;
+
+ if (opt & ~PERL_UNICODE_ALL_FLAGS)
+ Perl_croak(aTHX_ "Unknown Unicode option value 0x%"UVXf,
+ (UV) (opt & ~PERL_UNICODE_ALL_FLAGS));
+
+ *popt = p;
+
+ return opt;
+}
+