I32 offs = LvTARGOFF(sv);
I32 rem = LvTARGLEN(sv);
+ if (SvUTF8(lsv))
+ sv_pos_u2b(lsv, &offs, &rem);
if (offs > len)
offs = len;
if (rem + offs > len)
rem = len - offs;
sv_setpvn(sv, tmps + offs, (STRLEN)rem);
- if (DO_UTF8(lsv))
+ if (SvUTF8(lsv))
SvUTF8_on(sv);
return 0;
}
int
Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg)
{
- STRLEN littlelen;
- char *tmps = SvPV(sv, littlelen);
+ STRLEN len;
+ char *tmps = SvPV(sv, len);
+ SV *lsv = LvTARG(sv);
+ I32 lvoff = LvTARGOFF(sv);
+ I32 lvlen = LvTARGLEN(sv);
if (DO_UTF8(sv)) {
- I32 bigoff = LvTARGOFF(sv);
- I32 biglen = LvTARGLEN(sv);
- U8 *s, *a, *b;
-
- sv_utf8_upgrade(LvTARG(sv));
- /* sv_utf8_upgrade() might have moved and/or resized
- * the string to be replaced, we must rediscover it. --jhi */
- s = (U8*)SvPVX(LvTARG(sv));
- a = utf8_hop(s, bigoff);
- b = utf8_hop(a, biglen);
- sv_insert(LvTARG(sv), a - s, b - a, tmps, littlelen);
- SvUTF8_on(LvTARG(sv));
+ sv_utf8_upgrade(lsv);
+ sv_pos_u2b(lsv, &lvoff, &lvlen);
+ sv_insert(lsv, lvoff, lvlen, tmps, len);
+ SvUTF8_on(lsv);
+ }
+ else if (SvUTF8(lsv)) {
+ sv_pos_u2b(lsv, &lvoff, &lvlen);
+ tmps = bytes_to_utf8(tmps, &len);
+ sv_insert(lsv, lvoff, lvlen, tmps, len);
+ Safefree(tmps);
}
else
- sv_insert(LvTARG(sv), LvTARGOFF(sv), LvTARGLEN(sv), tmps, littlelen);
+ sv_insert(lsv, lvoff, lvlen, tmps, len);
return 0;
}
#!./perl
-print "1..149\n";
+print "1..162\n";
#P = start of string Q = start of substr R = end of substr S = end of string
substr($x, 3, 1) eq "\x{FF}" &&
substr($x, 4, 1) eq "\x{F3}";
+# And tests for already-UTF8 one
+
+$x = "\x{101}\x{F2}\x{F3}";
+substr($x, 0, 1) = "\x{100}";
+ok 150, length($x) == 3 &&
+ $x eq "\x{100}\xF2\xF3" &&
+ substr($x, 0, 1) eq "\x{100}" &&
+ substr($x, 1, 1) eq "\x{F2}" &&
+ substr($x, 2, 1) eq "\x{F3}";
+
+$x = "\x{101}\x{F2}\x{F3}";
+substr($x, 0, 1) = "\x{100}\x{FF}";
+ok 151, length($x) == 4 &&
+ $x eq "\x{100}\x{FF}\xF2\xF3" &&
+ substr($x, 0, 1) eq "\x{100}" &&
+ substr($x, 1, 1) eq "\x{FF}" &&
+ substr($x, 2, 1) eq "\x{F2}" &&
+ substr($x, 3, 1) eq "\x{F3}";
+
+$x = "\x{101}\x{F2}\x{F3}";
+substr($x, 0, 2) = "\x{100}\xFF";
+ok 152, length($x) == 3 &&
+ $x eq "\x{100}\xFF\xF3" &&
+ substr($x, 0, 1) eq "\x{100}" &&
+ substr($x, 1, 1) eq "\x{FF}" &&
+ substr($x, 2, 1) eq "\x{F3}";
+
+$x = "\x{101}\x{F2}\x{F3}";
+substr($x, 1, 1) = "\x{100}\xFF";
+ok 153, length($x) == 4 &&
+ $x eq "\x{101}\x{100}\xFF\xF3" &&
+ substr($x, 0, 1) eq "\x{101}" &&
+ substr($x, 1, 1) eq "\x{100}" &&
+ substr($x, 2, 1) eq "\x{FF}" &&
+ substr($x, 3, 1) eq "\x{F3}";
+
+$x = "\x{101}\x{F2}\x{F3}";
+substr($x, 2, 1) = "\x{100}\xFF";
+ok 154, length($x) == 4 &&
+ $x eq "\x{101}\xF2\x{100}\xFF" &&
+ substr($x, 0, 1) eq "\x{101}" &&
+ substr($x, 1, 1) eq "\x{F2}" &&
+ substr($x, 2, 1) eq "\x{100}" &&
+ substr($x, 3, 1) eq "\x{FF}";
+
+$x = "\x{101}\x{F2}\x{F3}";
+substr($x, 3, 1) = "\x{100}\xFF";
+ok 155, length($x) == 5 &&
+ $x eq "\x{101}\x{F2}\x{F3}\x{100}\xFF" &&
+ substr($x, 0, 1) eq "\x{101}" &&
+ substr($x, 1, 1) eq "\x{F2}" &&
+ substr($x, 2, 1) eq "\x{F3}" &&
+ substr($x, 3, 1) eq "\x{100}" &&
+ substr($x, 4, 1) eq "\x{FF}";
+
+$x = "\x{101}\x{F2}\x{F3}";
+substr($x, -1, 1) = "\x{100}\xFF";
+ok 156, length($x) == 4 &&
+ $x eq "\x{101}\xF2\x{100}\xFF" &&
+ substr($x, 0, 1) eq "\x{101}" &&
+ substr($x, 1, 1) eq "\x{F2}" &&
+ substr($x, 2, 1) eq "\x{100}" &&
+ substr($x, 3, 1) eq "\x{FF}";
+
+$x = "\x{101}\x{F2}\x{F3}";
+substr($x, -1, 0) = "\x{100}\xFF";
+ok 157, length($x) == 5 &&
+ $x eq "\x{101}\xF2\x{100}\xFF\xF3" &&
+ substr($x, 0, 1) eq "\x{101}" &&
+ substr($x, 1, 1) eq "\x{F2}" &&
+ substr($x, 2, 1) eq "\x{100}" &&
+ substr($x, 3, 1) eq "\x{FF}" &&
+ substr($x, 4, 1) eq "\x{F3}";
+
+$x = "\x{101}\x{F2}\x{F3}";
+substr($x, 0, -1) = "\x{100}\xFF";
+ok 158, length($x) == 3 &&
+ $x eq "\x{100}\xFF\xF3" &&
+ substr($x, 0, 1) eq "\x{100}" &&
+ substr($x, 1, 1) eq "\x{FF}" &&
+ substr($x, 2, 1) eq "\x{F3}";
+
+$x = "\x{101}\x{F2}\x{F3}";
+substr($x, 0, -2) = "\x{100}\xFF";
+ok 159, length($x) == 4 &&
+ $x eq "\x{100}\xFF\xF2\xF3" &&
+ substr($x, 0, 1) eq "\x{100}" &&
+ substr($x, 1, 1) eq "\x{FF}" &&
+ substr($x, 2, 1) eq "\x{F2}" &&
+ substr($x, 3, 1) eq "\x{F3}";
+
+$x = "\x{101}\x{F2}\x{F3}";
+substr($x, 0, -3) = "\x{100}\xFF";
+ok 160, length($x) == 5 &&
+ $x eq "\x{100}\xFF\x{101}\x{F2}\x{F3}" &&
+ substr($x, 0, 1) eq "\x{100}" &&
+ substr($x, 1, 1) eq "\x{FF}" &&
+ substr($x, 2, 1) eq "\x{101}" &&
+ substr($x, 3, 1) eq "\x{F2}" &&
+ substr($x, 4, 1) eq "\x{F3}";
+
+$x = "\x{101}\x{F2}\x{F3}";
+substr($x, 1, -1) = "\x{100}\xFF";
+ok 161, length($x) == 4 &&
+ $x eq "\x{101}\x{100}\xFF\xF3" &&
+ substr($x, 0, 1) eq "\x{101}" &&
+ substr($x, 1, 1) eq "\x{100}" &&
+ substr($x, 2, 1) eq "\x{FF}" &&
+ substr($x, 3, 1) eq "\x{F3}";
+
+$x = "\x{101}\x{F2}\x{F3}";
+substr($x, -1, -1) = "\x{100}\xFF";
+ok 162, length($x) == 5 &&
+ $x eq "\x{101}\xF2\x{100}\xFF\xF3" &&
+ substr($x, 0, 1) eq "\x{101}" &&
+ substr($x, 1, 1) eq "\x{F2}" &&
+ substr($x, 2, 1) eq "\x{100}" &&
+ substr($x, 3, 1) eq "\x{FF}" &&
+ substr($x, 4, 1) eq "\x{F3}";
S_sublex_done(pTHX)
{
if (!PL_lex_starts++) {
+ SV *sv = newSVpvn("",0);
+ if (SvUTF8(PL_linestr))
+ SvUTF8_on(sv);
PL_expect = XOPERATOR;
- yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
+ yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
return THING;
}
register char *d = SvPVX(sv); /* destination for copies */
bool dorange = FALSE; /* are we in a translit range? */
bool didrange = FALSE; /* did we just finish a range? */
- bool has_utf8 = FALSE; /* embedded \x{} */
+ bool has_utf8 = (PL_linestr && SvUTF8(PL_linestr));
+ /* the constant is UTF8 */
UV uv;
I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
/* backslashes */
if (*s == '\\' && s+1 < send) {
- bool to_be_utf8 = FALSE;
-
s++;
/* some backslashes we leave behind */
else {
STRLEN len = 1; /* allow underscores */
uv = (UV)scan_hex(s + 1, e - s - 1, &len);
- if (PL_hints & HINT_UTF8)
- to_be_utf8 = TRUE;
}
s = e + 1;
}
* repertoire. --jhi */
if (uv > 127) {
- if (!has_utf8 && (to_be_utf8 || uv > 255)) {
+ if (!has_utf8 && uv > 255) {
/* Might need to recode whatever we have
* accumulated so far if it contains any
* hibit chars.
}
}
- if (to_be_utf8 || has_utf8 || uv > 255) {
+ if (has_utf8 || uv > 255) {
d = (char*)uv_to_utf8((U8*)d, uv);
has_utf8 = TRUE;
}
TOKEN('(');
case KEY_qq:
+ case KEY_qu:
s = scan_str(s,FALSE,FALSE);
+ if (tmp == KEY_qu && is_utf8_string((U8*)s, SvCUR(PL_lex_stuff)))
+ SvUTF8_on(PL_lex_stuff);
if (!s)
missingterm((char*)0);
yylval.ival = OP_STRINGIFY;
if (strEQ(d,"q")) return KEY_q;
if (strEQ(d,"qr")) return KEY_qr;
if (strEQ(d,"qq")) return KEY_qq;
+ if (strEQ(d,"qu")) return KEY_qu;
if (strEQ(d,"qw")) return KEY_qw;
if (strEQ(d,"qx")) return KEY_qx;
}
while (isDIGIT(*pos) || *pos == '_')
pos++;
if (!isALPHA(*pos)) {
- UV rev;
+ UV rev, revmax = 0;
U8 tmpbuf[UTF8_MAXLEN+1];
U8 *tmpend;
- bool utf8 = FALSE;
s++; /* get past 'v' */
sv = NEWSV(92,5);
}
}
tmpend = uv_to_utf8(tmpbuf, rev);
- utf8 = utf8 || rev > 127;
+ if (rev > revmax)
+ revmax = rev;
sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
if (*pos == '.' && isDIGIT(pos[1]))
s = ++pos;
SvPOK_on(sv);
SvREADONLY_on(sv);
- if (utf8) {
+ if (revmax > 127) {
SvUTF8_on(sv);
- if (!UTF||IN_BYTE)
+ if (revmax < 256)
sv_utf8_downgrade(sv, TRUE);
}
}