From: Jarkko Hietaniemi Date: Wed, 21 Mar 2001 03:48:24 +0000 (+0000) Subject: First stab at making Data::Dumper to grok Unicode. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=dc71dc59fcd7e213f3e4a961182792e0d7ceae78;p=p5sagit%2Fp5-mst-13.2.git First stab at making Data::Dumper to grok Unicode. TODO: tests, EBCDICify. p4raw-id: //depot/perl@9274 --- diff --git a/ext/Data/Dumper/Dumper.pm b/ext/Data/Dumper/Dumper.pm index b092a22..4f15bbb 100644 --- a/ext/Data/Dumper/Dumper.pm +++ b/ext/Data/Dumper/Dumper.pm @@ -553,6 +553,8 @@ my %esc = ( sub qquote { local($_) = shift; s/([\\\"\@\$])/\\$1/g; + my $bytes; { use bytes; $bytes = length } + s/([^\x00-\x7f])/'\x{'.sprintf("%x",ord($1)).'}'/ge if $bytes > length; return qq("$_") unless /[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~]/; # fast exit diff --git a/ext/Data/Dumper/Dumper.xs b/ext/Data/Dumper/Dumper.xs index 25e72b1..2cb89e5 100644 --- a/ext/Data/Dumper/Dumper.xs +++ b/ext/Data/Dumper/Dumper.xs @@ -22,6 +22,7 @@ static I32 num_q (char *s, STRLEN slen); static I32 esc_q (char *dest, char *src, STRLEN slen); +static I32 esc_q_utf8 (SV *sv, char *src, STRLEN slen); static SV *sv_x (pTHX_ SV *sv, char *str, STRLEN len, I32 n); static I32 DD_dump (pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, AV *postav, I32 *levelp, I32 indent, @@ -96,6 +97,52 @@ esc_q(register char *d, register char *s, register STRLEN slen) return ret; } +static I32 +esc_q_utf8(SV* sv, register char *src, register STRLEN slen) +{ + char *s, *send, *r; + STRLEN grow = 0, j = 1, l; + bool dquote = FALSE; + + /* this will need EBCDICification */ + for (s = src, send = src + slen; s < send; s += UTF8SKIP(s)) { + UV k = utf8_to_uvchr((U8*)s, &l); + + grow += + (*s == '"' || *s == '\\') ? 2 : + (k < 0x80 ? 1 : UNISKIP(k) + 1 + 4); /* 4: \x{} */ + } + sv_grow(sv, SvCUR(sv)+3+grow); /* 3: ""\0 */ + r = SvPVX(sv) + SvCUR(sv); + + for (s = src; s < send; s += UTF8SKIP(s)) { + UV k = utf8_to_uvchr((U8*)s, &l); + + if (*s == '"' || *s == '\\') { + r[j++] = '\\'; + r[j++] = *s; + } + else if (k < 0x80) + r[j++] = k; + else { + r[j++] = '\\'; + r[j++] = 'x'; + r[j++] = '{'; + j += sprintf(r + j, "%x", k); + r[j++] = '}'; + dquote = TRUE; + } + } + if (dquote) + r[0] = r[j++] = '"'; + else + r[0] = r[j++] = '\''; + r[j++] = '\0'; + SvCUR_set(sv, SvCUR(sv) + j); + + return j; +} + /* append a repeated string to an SV */ static SV * sv_x(pTHX_ SV *sv, register char *str, STRLEN len, I32 n) @@ -456,38 +503,59 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, (void)hv_iterinit((HV*)ival); i = 0; while ((entry = hv_iternext((HV*)ival))) { - char *nkey; + char *nkey = NULL; I32 nticks = 0; + SV* keysv; + STRLEN keylen; + bool do_utf8 = FALSE; if (i) sv_catpvn(retval, ",", 1); i++; - key = hv_iterkey(entry, &klen); - hval = hv_iterval((HV*)ival, entry); - - if (quotekeys || needs_quote(key)) { - nticks = num_q(key, klen); - New(0, nkey, klen+nticks+3, char); - nkey[0] = '\''; - if (nticks) - klen += esc_q(nkey+1, key, klen); - else - (void)Copy(key, nkey+1, klen, char); - nkey[++klen] = '\''; - nkey[++klen] = '\0'; + keysv = hv_iterkeysv(entry); + hval = hv_iterval((HV*)ival, entry); + + do_utf8 = DO_UTF8(keysv); + key = SvPV(keysv, keylen); + klen = keylen; + + if (do_utf8) { + char *okey = SvPVX(retval) + SvCUR(retval); + I32 nlen; + + sv_catsv(retval, totpad); + sv_catsv(retval, ipad); + nlen = esc_q_utf8(retval, key, klen); + + sname = newSVsv(iname); + sv_catpvn(sname, okey, nlen); + sv_catpvn(sname, "}", 1); } else { - New(0, nkey, klen, char); - (void)Copy(key, nkey, klen, char); - } - - sname = newSVsv(iname); - sv_catpvn(sname, nkey, klen); - sv_catpvn(sname, "}", 1); + if (quotekeys || needs_quote(key)) { + nticks = num_q(key, klen); + New(0, nkey, klen+nticks+3, char); + nkey[0] = '\''; + if (nticks) + klen += esc_q(nkey+1, key, klen); + else + (void)Copy(key, nkey+1, klen, char); + nkey[++klen] = '\''; + nkey[++klen] = '\0'; + } + else { + New(0, nkey, klen, char); + (void)Copy(key, nkey, klen, char); + } - sv_catsv(retval, totpad); - sv_catsv(retval, ipad); - sv_catpvn(retval, nkey, klen); + sname = newSVsv(iname); + sv_catpvn(sname, nkey, klen); + sv_catpvn(sname, "}", 1); + + sv_catsv(retval, totpad); + sv_catsv(retval, ipad); + sv_catpvn(retval, nkey, klen); + } sv_catpvn(retval, " => ", 4); if (indent >= 2) { char *extra; @@ -662,14 +730,18 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, } else { c = SvPV(val, i); - sv_grow(retval, SvCUR(retval)+3+2*i); - r = SvPVX(retval)+SvCUR(retval); - r[0] = '\''; - i += esc_q(r+1, c, i); - ++i; - r[i++] = '\''; - r[i] = '\0'; - SvCUR_set(retval, SvCUR(retval)+i); + if (DO_UTF8(val)) + i += esc_q_utf8(retval, c, i); + else { + sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */ + r = SvPVX(retval) + SvCUR(retval); + r[0] = '\''; + i += esc_q(r+1, c, i); + ++i; + r[i++] = '\''; + r[i] = '\0'; + SvCUR_set(retval, SvCUR(retval)+i); + } } }