#define PL_argvoutgv (PERL_GET_INTERP->Iargvoutgv)
#define PL_basetime (PERL_GET_INTERP->Ibasetime)
#define PL_beginav (PERL_GET_INTERP->Ibeginav)
+#define PL_bigchar (PERL_GET_INTERP->Ibigchar)
#define PL_bitcount (PERL_GET_INTERP->Ibitcount)
#define PL_bufend (PERL_GET_INTERP->Ibufend)
#define PL_bufptr (PERL_GET_INTERP->Ibufptr)
#define PL_argvoutgv (vTHX->Iargvoutgv)
#define PL_basetime (vTHX->Ibasetime)
#define PL_beginav (vTHX->Ibeginav)
+#define PL_bigchar (vTHX->Ibigchar)
#define PL_bitcount (vTHX->Ibitcount)
#define PL_bufend (vTHX->Ibufend)
#define PL_bufptr (vTHX->Ibufptr)
#define PL_argvoutgv (aTHXo->interp.Iargvoutgv)
#define PL_basetime (aTHXo->interp.Ibasetime)
#define PL_beginav (aTHXo->interp.Ibeginav)
+#define PL_bigchar (aTHXo->interp.Ibigchar)
#define PL_bitcount (aTHXo->interp.Ibitcount)
#define PL_bufend (aTHXo->interp.Ibufend)
#define PL_bufptr (aTHXo->interp.Ibufptr)
#define PL_Iargvoutgv PL_argvoutgv
#define PL_Ibasetime PL_basetime
#define PL_Ibeginav PL_beginav
+#define PL_Ibigchar PL_bigchar
#define PL_Ibitcount PL_bitcount
#define PL_Ibufend PL_bufend
#define PL_Ibufptr PL_bufptr
case '\017': /* $^O */
case '\020': /* $^P */
case '\024': /* $^T */
+ case '\025': /* $^U */
if (len > 1)
break;
goto magicalize;
=cut
*/
-PERLVAR(Idowarn, bool)
+PERLVAR(Idowarn, U8)
+PERLVAR(Ibigchar, bool)
PERLVAR(Idoextract, bool)
PERLVAR(Isawampersand, bool) /* must save all match strings */
PERLVAR(Iunsafe, bool)
}
die "Unknown charname '$name'" unless @off;
- # use caller 'encoding'; # Does not work at compile time?
-
my $ord = hex substr $txt, $off[0] - 4, 4;
- if ($^H & 0x8) {
- use utf8;
- return chr $ord;
+ if ($^H & 0x10) { # "use byte" in effect?
+ use byte;
+ return chr $ord if $ord <= 255;
+ my $hex = sprintf '%X=0%o', $ord, $ord;
+ my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
+ die "Character 0x$hex with name '$fname' is above 0xFF";
}
- return chr $ord if $ord <= 255;
- my $hex = sprintf '%X=0%o', $ord, $ord;
- my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
- die "Character 0x$hex with name '$fname' is above 0xFF";
+ return chr $ord;
}
sub import {
package utf8;
+$^U = 1;
+
sub import {
$^H |= 0x00000008;
$enc{caller()} = $_[1] if $_[1];
sv_setiv(sv, (IV)PL_basetime);
#endif
break;
+ case '\025': /* ^U */
+ sv_setiv(sv, (IV)PL_bigchar);
+ break;
case '\027': /* ^W & $^Warnings*/
if (*(mg->mg_ptr+1) == '\0')
sv_setiv(sv, (IV)((PL_dowarn & G_WARN_ON) ? TRUE : FALSE));
PL_basetime = (Time_t)(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
#endif
break;
+ case '\025': /* ^U */
+ PL_bigchar = SvTRUE(sv);
+ break;
case '\027': /* ^W & $^Warnings */
if (*(mg->mg_ptr+1) == '\0') {
if ( ! (PL_dowarn & G_WARN_ALL_MASK)) {
cop->op_ppaddr = PL_ppaddr[ OP_NEXTSTATE ];
}
cop->op_flags = flags;
- cop->op_private = (PL_hints & HINT_UTF8);
+ cop->op_private = (PL_hints & (HINT_UTF8|HINT_BYTE));
#ifdef NATIVE_HINTS
cop->op_private |= NATIVE_HINTS;
#endif
: G_SCALAR) \
: dowantarray())
+/* NOTE: OP_NEXTSTATE, OP_DBSTATE, and OP_SETSTATE (i.e. COPs) carry lower
+ * bits of PL_hints in op_private */
+
/* Private for lvalues */
#define OPpLVAL_INTRO 128 /* Lvalue must be localized or lvalue sub */
#define PL_basetime (*Perl_Ibasetime_ptr(aTHXo))
#undef PL_beginav
#define PL_beginav (*Perl_Ibeginav_ptr(aTHXo))
+#undef PL_bigchar
+#define PL_bigchar (*Perl_Ibigchar_ptr(aTHXo))
#undef PL_bitcount
#define PL_bitcount (*Perl_Ibitcount_ptr(aTHXo))
#undef PL_bufend
epoch (beginning of 1970). The values returned by the B<-M>, B<-A>,
and B<-C> filetests are based on this value.
+=item $^U
+
+Global flag that switches on Unicode character support in the Perl
+interpreter. The initial value is usually C<0> for compatibility
+with Perl versions earlier than 5.6, but may be automatically set
+to C<1> by Perl if the system provides a user-settable default
+(e.g., C<$ENV{LC_CTYPE}>). It is also implicitly set to C<1>
+whenever the utf8 pragma is loaded.
+
+Setting it to C<1> has the following effects:
+
+=over
+
+=item *
+
+C<chr> produces UTF-8 encoded Unicode characters. These are the same
+as the corresponding ASCII characters if the argument is less than 128.
+
+=item *
+
+The C<%c> format in C<sprintf> generates a UTF-8 encoded Unicode
+character. This is the same as the corresponding ASCII character
+if the argument is less than 128.
+
+=item *
+
+Any system calls made by Perl will use wide character APIs native to
+the system, if available. This is currently only implemented on the
+Windows platform.
+
+=back
+
+The C<byte> pragma overrides the value of this flag in the current
+lexical scope. See L<byte>.
+
=item $^V
The revision, version, and subversion of the Perl interpreter, represented
SvUTF8_off(TARG); /* decontaminate */
(void)SvUPGRADE(TARG,SVt_PV);
- if (value >= 128 && !IN_BYTE) {
+ if (value >= 128 && PL_bigchar && !IN_BYTE) {
SvGROW(TARG,8);
tmps = SvPVX(TARG);
tmps = (char*)uv_to_utf8((U8*)tmps, (UV)value);
Move(ptr,SvPVX(sv)+tlen,len,char);
SvCUR(sv) += len;
*SvEND(sv) = '\0';
- (void)SvPOK_only(sv); /* validate pointer */
+ (void)SvPOK_only_UTF8(sv); /* validate pointer */
SvTAINT(sv);
}
return;
if (s = SvPV(sstr, len))
sv_catpvn(dstr,s,len);
+ if (SvUTF8(sstr))
+ SvUTF8_on(dstr);
}
/*
ptr = SvPVX(sv);
Move(ptr,SvPVX(sv)+tlen,len+1,char);
SvCUR(sv) += len;
- (void)SvPOK_only(sv); /* validate pointer */
+ (void)SvPOK_only_UTF8(sv); /* validate pointer */
SvTAINT(sv);
}
uv = va_arg(*args, int);
else
uv = (svix < svmax) ? SvIVx(svargs[svix++]) : 0;
- if (uv >= 128 && !IN_BYTE) {
+ if (uv >= 128 && PL_bigchar && !IN_BYTE) {
eptr = (char*)utf8buf;
elen = uv_to_utf8((U8*)eptr, uv) - utf8buf;
is_utf = TRUE;
#define SvOK(sv) (SvFLAGS(sv) & SVf_OK)
#define SvOK_off(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \
- SVf_IVisUV), \
+ SVf_IVisUV|SVf_UTF8), \
SvOOK_off(sv))
-#define SvOK_off_exc_UV(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC), \
+#define SvOK_off_exc_UV(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \
+ SVf_UTF8), \
SvOOK_off(sv))
#define SvOKp(sv) (SvFLAGS(sv) & (SVp_IOK|SVp_NOK|SVp_POK))
#define SvPOK(sv) (SvFLAGS(sv) & SVf_POK)
#define SvPOK_on(sv) (SvFLAGS(sv) |= (SVf_POK|SVp_POK))
#define SvPOK_off(sv) (SvFLAGS(sv) &= ~(SVf_POK|SVp_POK))
-#define SvPOK_only(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC|SVf_IVisUV), \
+#define SvPOK_only(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \
+ SVf_IVisUV|SVf_UTF8), \
+ SvFLAGS(sv) |= (SVf_POK|SVp_POK))
+#define SvPOK_only_UTF8(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \
+ SVf_IVisUV), \
SvFLAGS(sv) |= (SVf_POK|SVp_POK))
#define SvOOK(sv) (SvFLAGS(sv) & SVf_OOK)
print "ok 1\n";
{
- no utf8; # UTEST can switch it on
+ use byte; # UTEST can switch utf8 on
print "# \$res=$res \$\@='$@'\nnot "
if $res = eval <<'EOE'
/* Use CP_UTF8 when mode is UTF8 */
#define A2WHELPER(lpa, lpw, nBytes)\
- lpw[0] = 0, MultiByteToWideChar((IN_UTF8) ? CP_UTF8 : CP_ACP, 0, lpa, -1, lpw, (nBytes/sizeof(WCHAR)))
+ lpw[0] = 0, MultiByteToWideChar((IN_BYTE) ? CP_ACP : CP_UTF8, 0, \
+ lpa, -1, lpw, (nBytes/sizeof(WCHAR)))
#define W2AHELPER(lpw, lpa, nChars)\
- lpa[0] = '\0', WideCharToMultiByte((IN_UTF8) ? CP_UTF8 : CP_ACP, 0, lpw, -1, (LPSTR)lpa, nChars, NULL, NULL)
+ lpa[0] = '\0', WideCharToMultiByte((IN_BYTE) ? CP_ACP : CP_UTF8, 0, \
+ lpw, -1, (LPSTR)lpa, nChars, NULL, NULL)
-#define USING_WIDE() (PerlEnv_os_id() == VER_PLATFORM_WIN32_NT)
+#define USING_WIDE() (PL_bigchar && PerlEnv_os_id() == VER_PLATFORM_WIN32_NT)
#ifdef USE_ITHREADS
# define PERL_WAIT_FOR_CHILDREN \