const char * const end = pv + count; /* end of string */
octbuf[0] = esc;
- if (!flags & PERL_PV_ESCAPE_NOCLEAR)
+ if (!flags & PERL_PV_ESCAPE_NOCLEAR) {
+ /* This won't alter the UTF-8 flag */
sv_setpvn(dsv, "", 0);
+ }
if ((flags & PERL_PV_ESCAPE_UNI_DETECT) && is_utf8_string((U8*)pv, count))
isuni = 1;
sv_catpvn(dsv, octbuf, chsize);
wrote += chsize;
} else {
- const char string = (char) c;
- sv_catpvn(dsv, &string, 1);
+ /* If PERL_PV_ESCAPE_NOBACKSLASH is set then bytes in the range
+ 128-255 can be appended raw to the dsv. If dsv happens to be
+ UTF-8 then we need catpvf to upgrade them for us.
+ Or add a new API call sv_catpvc(). Think about that name, and
+ how to keep it clear that it's unlike the s of catpvs, which is
+ really an array octets, not a string. */
+ Perl_sv_catpvf( aTHX_ dsv, "%c", c);
wrote++;
}
if ( flags & PERL_PV_ESCAPE_FIRSTCHAR )
STATIC void
S_put_byte(pTHX_ SV *sv, int c)
{
- if (isCNTRL(c) || c == 255 || !isPRINT(c))
+ /* Our definition of isPRINT() ignores locales, so only bytes that are
+ not part of UTF-8 are considered printable. I assume that the same
+ holds for UTF-EBCDIC.
+ Also, code point 255 is not printable in either (it's E0 in EBCDIC,
+ which Wikipedia says:
+
+ EO, or Eight Ones, is an 8-bit EBCDIC character code represented as all
+ ones (binary 1111 1111, hexadecimal FF). It is similar, but not
+ identical, to the ASCII delete (DEL) or rubout control character.
+ ) So the old condition can be simplified to !isPRINT(c) */
+ if (!isPRINT(c))
Perl_sv_catpvf(aTHX_ sv, "\\%o", c);
else {
const unsigned char string = (unsigned char) c;