else {
STRLEN len = 1; /* allow underscores */
uv = (UV)scan_hex(s + 1, e - s - 1, &len);
- to_be_utf8 = TRUE;
+ if (PL_hints & HINT_UTF8)
+ to_be_utf8 = TRUE;
}
s = e + 1;
}
NUM_ESCAPE_INSERT:
/* Insert oct or hex escaped character.
- * There will always enough room in sv since such escapes will
- * be longer than any utf8 sequence they can end up as
- */
+ * There will always enough room in sv since such
+ * escapes will be longer than any UT-F8 sequence
+ * they can end up as. */
+
+ /* This spot is wrong for EBCDIC. Characters like
+ * the lowercase letters and digits are >127 in EBCDIC,
+ * so here they would need to be mapped to the Unicode
+ * repertoire. --jhi */
+
if (uv > 127) {
if (!has_utf8 && (to_be_utf8 || uv > 255)) {
- /* might need to recode whatever we have accumulated so far
- * if it contains any hibit chars
+ /* Might need to recode whatever we have
+ * accumulated so far if it contains any
+ * hibit chars.
+ *
+ * (Can't we keep track of that and avoid
+ * this rescan? --jhi)
*/
int hicount = 0;
char *c;
+
for (c = SvPVX(sv); c < d; c++) {
if (UTF8_IS_CONTINUED(*c))
hicount++;
if (hicount) {
char *old_pvx = SvPVX(sv);
char *src, *dst;
- d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
+
+ d = SvGROW(sv,
+ SvCUR(sv) + hicount + 1) +
+ (d - old_pvx);
src = d - 1;
d += hicount;
while (src < dst) {
if (UTF8_IS_CONTINUED(*src)) {
- dst--;
- uv_to_utf8((U8*)dst, (U8)*src--);
- dst--;
+ *dst-- = UTF8_EIGHT_BIT_LO(*src);
+ *dst-- = UTF8_EIGHT_BIT_HI(*src--);
}
else {
*dst-- = *src--;
}
}
- if (to_be_utf8 || uv > 255) {
+ if (to_be_utf8 || has_utf8 || uv > 255) {
d = (char*)uv_to_utf8((U8*)d, uv);
has_utf8 = TRUE;
}
STRLEN len;
GV *gv = Nullgv;
GV **gvp = 0;
+ bool bof = FALSE;
/* check if there's an identifier for us to look at */
if (PL_pending_ident) {
goto retry;
}
do {
- bool bof = PL_rsfp ? TRUE : FALSE;
+ bof = PL_rsfp ? TRUE : FALSE;
if (bof) {
#ifdef PERLIO_IS_STDIO
# ifdef __GNU_LIBRARY__