From: Jarkko Hietaniemi Date: Fri, 5 Jan 2001 15:02:38 +0000 (+0000) Subject: "\x{FF}\xFF" was broken, the \xFF was appended in its X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=301d3d20746da77e204598157e568e8c22a220b1;p=p5sagit%2Fp5-mst-13.2.git "\x{FF}\xFF" was broken, the \xFF was appended in its raw 8-bit form to the UTF-8 string. p4raw-id: //depot/perl@8330 --- diff --git a/toke.c b/toke.c index 9b23896..09a2e48 100644 --- a/toke.c +++ b/toke.c @@ -1397,16 +1397,21 @@ S_scan_const(pTHX_ char *start) NUM_ESCAPE_INSERT: /* Insert oct or hex escaped character. - * There will always enough room in sv since such escapes will - * be longer than any utf8 sequence they can end up as - */ + * There will always enough room in sv since such + * escapes will be longer than any UT-F8 sequence + * they can end up as. */ if (uv > 127) { if (!has_utf8 && (to_be_utf8 || uv > 255)) { - /* might need to recode whatever we have accumulated so far - * if it contains any hibit chars + /* Might need to recode whatever we have + * accumulated so far if it contains any + * hibit chars. + * + * (Can't we keep track of that and avoid + * this rescan? --jhi) */ int hicount = 0; char *c; + for (c = SvPVX(sv); c < d; c++) { if (UTF8_IS_CONTINUED(*c)) hicount++; @@ -1416,7 +1421,10 @@ S_scan_const(pTHX_ char *start) char *src, *dst; U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tmpend; - d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx); + + d = SvGROW(sv, + SvCUR(sv) + hicount + 1) + + (d - old_pvx); src = d - 1; d += hicount; @@ -1436,7 +1444,7 @@ S_scan_const(pTHX_ char *start) } } - if (to_be_utf8 || uv > 255) { + if (to_be_utf8 || (has_utf8 && uv > 127) || uv > 255) { d = (char*)uv_to_utf8((U8*)d, uv); has_utf8 = TRUE; }