Tweak for MULTIPLICITY/USE_PERLIO
Nick Ing-Simmons [Sat, 30 Dec 2000 19:47:51 +0000 (19:47 +0000)]
p4raw-id: //depot/perlio@8272

toke.c

diff --git a/toke.c b/toke.c
index 46278e8..8b5f7f4 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1336,6 +1336,8 @@ S_scan_const(pTHX_ char *start)
 
        /* backslashes */
        if (*s == '\\' && s+1 < send) {
+           bool to_be_utf8 = FALSE;
+
            s++;
 
            /* some backslashes we leave behind */
@@ -1404,7 +1406,7 @@ S_scan_const(pTHX_ char *start)
                    else {
                        STRLEN len = 1;         /* allow underscores */
                        uv = (UV)scan_hex(s + 1, e - s - 1, &len);
-                       has_utf8 = TRUE;
+                       to_be_utf8 = TRUE;
                    }
                    s = e + 1;
                }
@@ -1421,8 +1423,8 @@ S_scan_const(pTHX_ char *start)
                 * There will always enough room in sv since such escapes will
                 * be longer than any utf8 sequence they can end up as
                 */
-               if (uv > 127 || has_utf8) {
-                   if (!this_utf8 && !has_utf8 && uv > 255) {
+               if (uv > 127) {
+                   if (!has_utf8 && (to_be_utf8 || uv > 255)) {
                        /* might need to recode whatever we have accumulated so far
                         * if it contains any hibit chars
                         */
@@ -1454,7 +1456,7 @@ S_scan_const(pTHX_ char *start)
                         }
                     }
 
-                    if (has_utf8 || uv > 255) {
+                    if (to_be_utf8 || uv > 255) {
                        d = (char*)uv_to_utf8((U8*)d, uv);
                        has_utf8 = TRUE;
                     }
@@ -3120,6 +3122,9 @@ Perl_yylex(pTHX)
                if (*d == '}') {
                    char minus = (PL_tokenbuf[0] == '-');
                    s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
+                   if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, 0) &&
+                       PL_nextval[PL_nexttoke-1].opval)
+                     SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke-1].opval)->op_sv);
                    if (minus)
                        force_next('-');
                }
@@ -3774,6 +3779,8 @@ Perl_yylex(pTHX)
            CLINE;
            yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
            yylval.opval->op_private = OPpCONST_BARE;
+           if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
+             SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
            TERM(WORD);
        }
 
@@ -3915,7 +3922,7 @@ Perl_yylex(pTHX)
                    /* If not a declared subroutine, it's an indirect object. */
                    /* (But it's an indir obj regardless for sort.) */
 
-                   if ( !immediate_paren && (PL_last_lop_op == OP_SORT || 
+                   if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
                          ((!gv || !GvCVu(gv)) &&
                         (PL_last_lop_op != OP_MAPSTART &&
                         PL_last_lop_op != OP_GREPSTART))))
@@ -3933,6 +3940,8 @@ Perl_yylex(pTHX)
                if (*s == '=' && s[1] == '>') {
                    CLINE;
                    sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
+                   if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
+                     SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
                    TERM(WORD);
                }
 
@@ -4104,6 +4113,10 @@ Perl_yylex(pTHX)
                    }
                }
 #endif
+#ifdef PERLIO_LAYERS
+               if (UTF && !IN_BYTE)
+                   PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
+#endif
                PL_rsfp = Nullfp;
            }
            goto fake_eof;
@@ -4634,6 +4647,7 @@ Perl_yylex(pTHX)
                int warned = 0;
                d = SvPV_force(PL_lex_stuff, len);
                while (len) {
+                   SV *sv;
                    for (; isSPACE(*d) && len; --len, ++d) ;
                    if (len) {
                        char *b = d;
@@ -4654,8 +4668,11 @@ Perl_yylex(pTHX)
                        else {
                            for (; !isSPACE(*d) && len; --len, ++d) ;
                        }
+                       sv = newSVpvn(b, d-b);
+                       if (DO_UTF8(PL_lex_stuff))
+                           SvUTF8_on(sv);
                        words = append_elem(OP_LIST, words,
-                                           newSVOP(OP_CONST, 0, tokeq(newSVpvn(b, d-b))));
+                                           newSVOP(OP_CONST, 0, tokeq(sv)));
                    }
                }
                if (words) {
@@ -6248,7 +6265,9 @@ S_scan_trans(pTHX_ char *start)
            squash = OPpTRANS_SQUASH;
        s++;
     }
-    o->op_private = del|squash|complement;
+    o->op_private = del|squash|complement|
+      (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
+      (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
 
     PL_lex_op = o;
     yylval.ival = OP_TRANS;
@@ -6442,6 +6461,8 @@ retval:
        Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
     }
     SvREFCNT_dec(herewas);
+    if (UTF && !IN_BYTE && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
+       SvUTF8_on(tmpstr);
     PL_lex_stuff = tmpstr;
     yylval.ival = op_type;
     return s;
@@ -7205,7 +7226,8 @@ vstring:
                SvREADONLY_on(sv);
                if (utf8) {
                    SvUTF8_on(sv);
-                   sv_utf8_downgrade(sv, TRUE);
+                   if (!UTF||IN_BYTE)
+                     sv_utf8_downgrade(sv, TRUE);
                }
            }
        }