/* backslashes */
if (*s == '\\' && s+1 < send) {
+ bool to_be_utf8 = FALSE;
+
s++;
/* some backslashes we leave behind */
else {
STRLEN len = 1; /* allow underscores */
uv = (UV)scan_hex(s + 1, e - s - 1, &len);
- has_utf8 = TRUE;
+ to_be_utf8 = TRUE;
}
s = e + 1;
}
* There will always enough room in sv since such escapes will
* be longer than any utf8 sequence they can end up as
*/
- if (uv > 127 || has_utf8) {
- if (!this_utf8 && !has_utf8 && uv > 255) {
+ if (uv > 127) {
+ if (!has_utf8 && (to_be_utf8 || uv > 255)) {
/* might need to recode whatever we have accumulated so far
* if it contains any hibit chars
*/
}
}
- if (has_utf8 || uv > 255) {
+ if (to_be_utf8 || uv > 255) {
d = (char*)uv_to_utf8((U8*)d, uv);
has_utf8 = TRUE;
}
if (*d == '}') {
char minus = (PL_tokenbuf[0] == '-');
s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
+ if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, 0) &&
+ PL_nextval[PL_nexttoke-1].opval)
+ SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke-1].opval)->op_sv);
if (minus)
force_next('-');
}
CLINE;
yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
yylval.opval->op_private = OPpCONST_BARE;
+ if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
+ SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
TERM(WORD);
}
/* If not a declared subroutine, it's an indirect object. */
/* (But it's an indir obj regardless for sort.) */
- if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
+ if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
((!gv || !GvCVu(gv)) &&
(PL_last_lop_op != OP_MAPSTART &&
PL_last_lop_op != OP_GREPSTART))))
if (*s == '=' && s[1] == '>') {
CLINE;
sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
+ if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
+ SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
TERM(WORD);
}
}
}
#endif
+#ifdef PERLIO_LAYERS
+ if (UTF && !IN_BYTE)
+ PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
+#endif
PL_rsfp = Nullfp;
}
goto fake_eof;
int warned = 0;
d = SvPV_force(PL_lex_stuff, len);
while (len) {
+ SV *sv;
for (; isSPACE(*d) && len; --len, ++d) ;
if (len) {
char *b = d;
else {
for (; !isSPACE(*d) && len; --len, ++d) ;
}
+ sv = newSVpvn(b, d-b);
+ if (DO_UTF8(PL_lex_stuff))
+ SvUTF8_on(sv);
words = append_elem(OP_LIST, words,
- newSVOP(OP_CONST, 0, tokeq(newSVpvn(b, d-b))));
+ newSVOP(OP_CONST, 0, tokeq(sv)));
}
}
if (words) {
squash = OPpTRANS_SQUASH;
s++;
}
- o->op_private = del|squash|complement;
+ o->op_private = del|squash|complement|
+ (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
+ (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
PL_lex_op = o;
yylval.ival = OP_TRANS;
Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
}
SvREFCNT_dec(herewas);
+ if (UTF && !IN_BYTE && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
+ SvUTF8_on(tmpstr);
PL_lex_stuff = tmpstr;
yylval.ival = op_type;
return s;
SvREADONLY_on(sv);
if (utf8) {
SvUTF8_on(sv);
- sv_utf8_downgrade(sv, TRUE);
+ if (!UTF||IN_BYTE)
+ sv_utf8_downgrade(sv, TRUE);
}
}
}