From: Gurusamy Sarathy Date: Mon, 31 Jan 2000 20:19:34 +0000 (+0000) Subject: introduce $^U, a global bit to indicate whether system X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=d41ff1b8ad987cfcb928deba4254681c1a4c0e36;p=p5sagit%2Fp5-mst-13.2.git introduce $^U, a global bit to indicate whether system calls should using widechar APIs; chr and sprintf "%c" also follow this flag in the absense of "use byte"; "use utf8" sets $^U=1 (this appears kludgey) p4raw-id: //depot/perl@4937 --- diff --git a/embedvar.h b/embedvar.h index 342f543..c9a0cec 100644 --- a/embedvar.h +++ b/embedvar.h @@ -196,6 +196,7 @@ #define PL_argvoutgv (PERL_GET_INTERP->Iargvoutgv) #define PL_basetime (PERL_GET_INTERP->Ibasetime) #define PL_beginav (PERL_GET_INTERP->Ibeginav) +#define PL_bigchar (PERL_GET_INTERP->Ibigchar) #define PL_bitcount (PERL_GET_INTERP->Ibitcount) #define PL_bufend (PERL_GET_INTERP->Ibufend) #define PL_bufptr (PERL_GET_INTERP->Ibufptr) @@ -460,6 +461,7 @@ #define PL_argvoutgv (vTHX->Iargvoutgv) #define PL_basetime (vTHX->Ibasetime) #define PL_beginav (vTHX->Ibeginav) +#define PL_bigchar (vTHX->Ibigchar) #define PL_bitcount (vTHX->Ibitcount) #define PL_bufend (vTHX->Ibufend) #define PL_bufptr (vTHX->Ibufptr) @@ -861,6 +863,7 @@ #define PL_argvoutgv (aTHXo->interp.Iargvoutgv) #define PL_basetime (aTHXo->interp.Ibasetime) #define PL_beginav (aTHXo->interp.Ibeginav) +#define PL_bigchar (aTHXo->interp.Ibigchar) #define PL_bitcount (aTHXo->interp.Ibitcount) #define PL_bufend (aTHXo->interp.Ibufend) #define PL_bufptr (aTHXo->interp.Ibufptr) @@ -1126,6 +1129,7 @@ #define PL_Iargvoutgv PL_argvoutgv #define PL_Ibasetime PL_basetime #define PL_Ibeginav PL_beginav +#define PL_Ibigchar PL_bigchar #define PL_Ibitcount PL_bitcount #define PL_Ibufend PL_bufend #define PL_Ibufptr PL_bufptr diff --git a/gv.c b/gv.c index acd8501..b8fef0d 100644 --- a/gv.c +++ b/gv.c @@ -837,6 +837,7 @@ Perl_gv_fetchpv(pTHX_ const char *nambeg, I32 add, I32 sv_type) case '\017': /* $^O */ case '\020': /* $^P */ case '\024': /* $^T */ + case '\025': /* $^U */ if (len > 1) break; goto magicalize; diff --git a/intrpvar.h b/intrpvar.h index 2dde0dc..869897d 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -44,7 +44,8 @@ The C variable which corresponds to Perl's $^W warning variable. =cut */ -PERLVAR(Idowarn, bool) +PERLVAR(Idowarn, U8) +PERLVAR(Ibigchar, bool) PERLVAR(Idoextract, bool) PERLVAR(Isawampersand, bool) /* must save all match strings */ PERLVAR(Iunsafe, bool) diff --git a/lib/charnames.pm b/lib/charnames.pm index bd97983..59350b2 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -29,17 +29,15 @@ sub charnames { } die "Unknown charname '$name'" unless @off; - # use caller 'encoding'; # Does not work at compile time? - my $ord = hex substr $txt, $off[0] - 4, 4; - if ($^H & 0x8) { - use utf8; - return chr $ord; + if ($^H & 0x10) { # "use byte" in effect? + use byte; + return chr $ord if $ord <= 255; + my $hex = sprintf '%X=0%o', $ord, $ord; + my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2; + die "Character 0x$hex with name '$fname' is above 0xFF"; } - return chr $ord if $ord <= 255; - my $hex = sprintf '%X=0%o', $ord, $ord; - my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2; - die "Character 0x$hex with name '$fname' is above 0xFF"; + return chr $ord; } sub import { diff --git a/lib/utf8.pm b/lib/utf8.pm index 5ddd4ba..691de0d 100644 --- a/lib/utf8.pm +++ b/lib/utf8.pm @@ -1,5 +1,7 @@ package utf8; +$^U = 1; + sub import { $^H |= 0x00000008; $enc{caller()} = $_[1] if $_[1]; diff --git a/mg.c b/mg.c index 3ba3d08..f0c3bf3 100644 --- a/mg.c +++ b/mg.c @@ -567,6 +567,9 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) sv_setiv(sv, (IV)PL_basetime); #endif break; + case '\025': /* ^U */ + sv_setiv(sv, (IV)PL_bigchar); + break; case '\027': /* ^W & $^Warnings*/ if (*(mg->mg_ptr+1) == '\0') sv_setiv(sv, (IV)((PL_dowarn & G_WARN_ON) ? TRUE : FALSE)); @@ -1707,6 +1710,9 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) PL_basetime = (Time_t)(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv)); #endif break; + case '\025': /* ^U */ + PL_bigchar = SvTRUE(sv); + break; case '\027': /* ^W & $^Warnings */ if (*(mg->mg_ptr+1) == '\0') { if ( ! (PL_dowarn & G_WARN_ALL_MASK)) { diff --git a/op.c b/op.c index fdfdf27..6bb7876 100644 --- a/op.c +++ b/op.c @@ -3401,7 +3401,7 @@ Perl_newSTATEOP(pTHX_ I32 flags, char *label, OP *o) cop->op_ppaddr = PL_ppaddr[ OP_NEXTSTATE ]; } cop->op_flags = flags; - cop->op_private = (PL_hints & HINT_UTF8); + cop->op_private = (PL_hints & (HINT_UTF8|HINT_BYTE)); #ifdef NATIVE_HINTS cop->op_private |= NATIVE_HINTS; #endif diff --git a/op.h b/op.h index 8bc8276..2360f9b 100644 --- a/op.h +++ b/op.h @@ -107,6 +107,9 @@ Deprecated. Use C instead. : G_SCALAR) \ : dowantarray()) +/* NOTE: OP_NEXTSTATE, OP_DBSTATE, and OP_SETSTATE (i.e. COPs) carry lower + * bits of PL_hints in op_private */ + /* Private for lvalues */ #define OPpLVAL_INTRO 128 /* Lvalue must be localized or lvalue sub */ diff --git a/perlapi.h b/perlapi.h index b2b8a32..22117ed 100644 --- a/perlapi.h +++ b/perlapi.h @@ -130,6 +130,8 @@ START_EXTERN_C #define PL_basetime (*Perl_Ibasetime_ptr(aTHXo)) #undef PL_beginav #define PL_beginav (*Perl_Ibeginav_ptr(aTHXo)) +#undef PL_bigchar +#define PL_bigchar (*Perl_Ibigchar_ptr(aTHXo)) #undef PL_bitcount #define PL_bitcount (*Perl_Ibitcount_ptr(aTHXo)) #undef PL_bufend diff --git a/pod/perlvar.pod b/pod/perlvar.pod index 3094251..3393fd9 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -856,6 +856,41 @@ The time at which the program began running, in seconds since the epoch (beginning of 1970). The values returned by the B<-M>, B<-A>, and B<-C> filetests are based on this value. +=item $^U + +Global flag that switches on Unicode character support in the Perl +interpreter. The initial value is usually C<0> for compatibility +with Perl versions earlier than 5.6, but may be automatically set +to C<1> by Perl if the system provides a user-settable default +(e.g., C<$ENV{LC_CTYPE}>). It is also implicitly set to C<1> +whenever the utf8 pragma is loaded. + +Setting it to C<1> has the following effects: + +=over + +=item * + +C produces UTF-8 encoded Unicode characters. These are the same +as the corresponding ASCII characters if the argument is less than 128. + +=item * + +The C<%c> format in C generates a UTF-8 encoded Unicode +character. This is the same as the corresponding ASCII character +if the argument is less than 128. + +=item * + +Any system calls made by Perl will use wide character APIs native to +the system, if available. This is currently only implemented on the +Windows platform. + +=back + +The C pragma overrides the value of this flag in the current +lexical scope. See L. + =item $^V The revision, version, and subversion of the Perl interpreter, represented diff --git a/pp.c b/pp.c index 45654a9..aec5073 100644 --- a/pp.c +++ b/pp.c @@ -2202,7 +2202,7 @@ PP(pp_chr) SvUTF8_off(TARG); /* decontaminate */ (void)SvUPGRADE(TARG,SVt_PV); - if (value >= 128 && !IN_BYTE) { + if (value >= 128 && PL_bigchar && !IN_BYTE) { SvGROW(TARG,8); tmps = SvPVX(TARG); tmps = (char*)uv_to_utf8((U8*)tmps, (UV)value); diff --git a/sv.c b/sv.c index d76752f..0697d8e 100644 --- a/sv.c +++ b/sv.c @@ -3046,7 +3046,7 @@ Perl_sv_catpvn(pTHX_ register SV *sv, register const char *ptr, register STRLEN Move(ptr,SvPVX(sv)+tlen,len,char); SvCUR(sv) += len; *SvEND(sv) = '\0'; - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3083,6 +3083,8 @@ Perl_sv_catsv(pTHX_ SV *dstr, register SV *sstr) return; if (s = SvPV(sstr, len)) sv_catpvn(dstr,s,len); + if (SvUTF8(sstr)) + SvUTF8_on(dstr); } /* @@ -3125,7 +3127,7 @@ Perl_sv_catpv(pTHX_ register SV *sv, register const char *ptr) ptr = SvPVX(sv); Move(ptr,SvPVX(sv)+tlen,len+1,char); SvCUR(sv) += len; - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -5828,7 +5830,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV uv = va_arg(*args, int); else uv = (svix < svmax) ? SvIVx(svargs[svix++]) : 0; - if (uv >= 128 && !IN_BYTE) { + if (uv >= 128 && PL_bigchar && !IN_BYTE) { eptr = (char*)utf8buf; elen = uv_to_utf8((U8*)eptr, uv) - utf8buf; is_utf = TRUE; diff --git a/sv.h b/sv.h index 91fd17b..d8cd487 100644 --- a/sv.h +++ b/sv.h @@ -503,9 +503,10 @@ Set the length of the string which is in the SV. See C. #define SvOK(sv) (SvFLAGS(sv) & SVf_OK) #define SvOK_off(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \ - SVf_IVisUV), \ + SVf_IVisUV|SVf_UTF8), \ SvOOK_off(sv)) -#define SvOK_off_exc_UV(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC), \ +#define SvOK_off_exc_UV(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \ + SVf_UTF8), \ SvOOK_off(sv)) #define SvOKp(sv) (SvFLAGS(sv) & (SVp_IOK|SVp_NOK|SVp_POK)) @@ -547,7 +548,11 @@ Set the length of the string which is in the SV. See C. #define SvPOK(sv) (SvFLAGS(sv) & SVf_POK) #define SvPOK_on(sv) (SvFLAGS(sv) |= (SVf_POK|SVp_POK)) #define SvPOK_off(sv) (SvFLAGS(sv) &= ~(SVf_POK|SVp_POK)) -#define SvPOK_only(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC|SVf_IVisUV), \ +#define SvPOK_only(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \ + SVf_IVisUV|SVf_UTF8), \ + SvFLAGS(sv) |= (SVf_POK|SVp_POK)) +#define SvPOK_only_UTF8(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \ + SVf_IVisUV), \ SvFLAGS(sv) |= (SVf_POK|SVp_POK)) #define SvOOK(sv) (SvFLAGS(sv) & SVf_OOK) diff --git a/t/lib/charnames.t b/t/lib/charnames.t index 9775b14..8494989 100644 --- a/t/lib/charnames.t +++ b/t/lib/charnames.t @@ -16,7 +16,7 @@ print "not " unless "Here\N{EXCLAMATION MARK}?" eq "Here\041?"; print "ok 1\n"; { - no utf8; # UTEST can switch it on + use byte; # UTEST can switch utf8 on print "# \$res=$res \$\@='$@'\nnot " if $res = eval <<'EOE' diff --git a/win32/win32.h b/win32/win32.h index 69a4caf..4fed26a 100644 --- a/win32/win32.h +++ b/win32/win32.h @@ -422,12 +422,14 @@ struct interp_intern { /* Use CP_UTF8 when mode is UTF8 */ #define A2WHELPER(lpa, lpw, nBytes)\ - lpw[0] = 0, MultiByteToWideChar((IN_UTF8) ? CP_UTF8 : CP_ACP, 0, lpa, -1, lpw, (nBytes/sizeof(WCHAR))) + lpw[0] = 0, MultiByteToWideChar((IN_BYTE) ? CP_ACP : CP_UTF8, 0, \ + lpa, -1, lpw, (nBytes/sizeof(WCHAR))) #define W2AHELPER(lpw, lpa, nChars)\ - lpa[0] = '\0', WideCharToMultiByte((IN_UTF8) ? CP_UTF8 : CP_ACP, 0, lpw, -1, (LPSTR)lpa, nChars, NULL, NULL) + lpa[0] = '\0', WideCharToMultiByte((IN_BYTE) ? CP_ACP : CP_UTF8, 0, \ + lpw, -1, (LPSTR)lpa, nChars, NULL, NULL) -#define USING_WIDE() (PerlEnv_os_id() == VER_PLATFORM_WIN32_NT) +#define USING_WIDE() (PL_bigchar && PerlEnv_os_id() == VER_PLATFORM_WIN32_NT) #ifdef USE_ITHREADS # define PERL_WAIT_FOR_CHILDREN \