Accessing unicode keys in tie hashes via hv_exists was broken.
[p5sagit/p5-mst-13.2.git] / ext / Data / Dumper / Dumper.xs
CommitLineData
c5be433b 1#define PERL_NO_GET_CONTEXT
823edd99 2#include "EXTERN.h"
3#include "perl.h"
4#include "XSUB.h"
823edd99 5
20ce7b12 6static I32 num_q (char *s, STRLEN slen);
7static I32 esc_q (char *dest, char *src, STRLEN slen);
6cde4e94 8static I32 esc_q_utf8 (pTHX_ SV *sv, char *src, STRLEN slen);
cea2e8a9 9static SV *sv_x (pTHX_ SV *sv, char *str, STRLEN len, I32 n);
10static I32 DD_dump (pTHX_ SV *val, char *name, STRLEN namelen, SV *retval,
20ce7b12 11 HV *seenhv, AV *postav, I32 *levelp, I32 indent,
30b4f386 12 SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
20ce7b12 13 SV *freezer, SV *toaster,
a2126434 14 I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
e9105f86 15 I32 maxdepth, SV *sortkeys);
823edd99 16
fec5e1eb 17#if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
18
19# ifdef EBCDIC
20# define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
21# else
22# define UNI_TO_NATIVE(ch) (ch)
23# endif
24
25UV
26Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
27{
28 UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
29 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
30 return UNI_TO_NATIVE(uv);
31}
32
33# if !defined(PERL_IMPLICIT_CONTEXT)
34# define utf8_to_uvchr Perl_utf8_to_uvchr
35# else
36# define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
37# endif
38
39#endif /* PERL_VERSION <= 6 */
40
41/* Changes in 5.7 series mean that now IOK is only set if scalar is
42 precisely integer but in 5.6 and earlier we need to do a more
43 complex test */
44#if PERL_VERSION <= 6
45#define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
46#else
47#define DD_is_integer(sv) SvIOK(sv)
48#endif
49
823edd99 50/* does a string need to be protected? */
51static I32
52needs_quote(register char *s)
53{
54TOP:
55 if (s[0] == ':') {
56 if (*++s) {
57 if (*s++ != ':')
58 return 1;
59 }
60 else
61 return 1;
62 }
63 if (isIDFIRST(*s)) {
64 while (*++s)
7b0972df 65 if (!isALNUM(*s)) {
823edd99 66 if (*s == ':')
67 goto TOP;
68 else
69 return 1;
7b0972df 70 }
823edd99 71 }
6cde4e94 72 else
823edd99 73 return 1;
74 return 0;
75}
76
77/* count the number of "'"s and "\"s in string */
78static I32
6c1ab3c2 79num_q(register char *s, register STRLEN slen)
823edd99 80{
81 register I32 ret = 0;
6c1ab3c2 82
83 while (slen > 0) {
823edd99 84 if (*s == '\'' || *s == '\\')
85 ++ret;
86 ++s;
6c1ab3c2 87 --slen;
823edd99 88 }
89 return ret;
90}
91
92
93/* returns number of chars added to escape "'"s and "\"s in s */
94/* slen number of characters in s will be escaped */
95/* destination must be long enough for additional chars */
96static I32
97esc_q(register char *d, register char *s, register STRLEN slen)
98{
99 register I32 ret = 0;
6cde4e94 100
823edd99 101 while (slen > 0) {
102 switch (*s) {
103 case '\'':
104 case '\\':
105 *d = '\\';
106 ++d; ++ret;
107 default:
108 *d = *s;
109 ++d; ++s; --slen;
110 break;
111 }
112 }
113 return ret;
114}
115
dc71dc59 116static I32
6cde4e94 117esc_q_utf8(pTHX_ SV* sv, register char *src, register STRLEN slen)
dc71dc59 118{
f052740f 119 char *s, *send, *r, *rstart;
120 STRLEN j, cur = SvCUR(sv);
121 /* Could count 128-255 and 256+ in two variables, if we want to
122 be like &qquote and make a distinction. */
123 STRLEN grow = 0; /* bytes needed to represent chars 128+ */
124 /* STRLEN topbit_grow = 0; bytes needed to represent chars 128-255 */
125 STRLEN backslashes = 0;
126 STRLEN single_quotes = 0;
127 STRLEN qq_escapables = 0; /* " $ @ will need a \ in "" strings. */
128 STRLEN normal = 0;
6cde4e94 129
dc71dc59 130 /* this will need EBCDICification */
131 for (s = src, send = src + slen; s < send; s += UTF8SKIP(s)) {
f052740f 132 UV k = utf8_to_uvchr((U8*)s, NULL);
133
134 if (k > 127) {
135 /* 4: \x{} then count the number of hex digits. */
136 grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
137#if UVSIZE == 4
138 8 /* We may allocate a bit more than the minimum here. */
139#else
140 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
141#endif
142 );
143 } else if (k == '\\') {
144 backslashes++;
145 } else if (k == '\'') {
146 single_quotes++;
147 } else if (k == '"' || k == '$' || k == '@') {
148 qq_escapables++;
149 } else {
150 normal++;
151 }
dc71dc59 152 }
f052740f 153 if (grow) {
154 /* We have something needing hex. 3 is ""\0 */
34231210 155 sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
156 + 2*qq_escapables + normal);
f052740f 157 rstart = r = SvPVX(sv) + cur;
158
159 *r++ = '"';
160
161 for (s = src; s < send; s += UTF8SKIP(s)) {
162 UV k = utf8_to_uvchr((U8*)s, NULL);
163
164 if (k == '"' || k == '\\' || k == '$' || k == '@') {
165 *r++ = '\\';
7c436af3 166 *r++ = (char)k;
f052740f 167 }
168 else if (k < 0x80)
7c436af3 169 *r++ = (char)k;
f052740f 170 else {
f13a2bc0 171 /* The return value of sprintf() is unportable.
172 * In modern systems it returns (int) the number of characters,
173 * but in older systems it might return (char*) the original
174 * buffer, or it might even be (void). The easiest portable
175 * thing to do is probably use sprintf() in void context and
176 * then strlen(buffer) for the length. The more proper way
177 * would of course be to figure out the prototype of sprintf.
178 * --jhi */
179 sprintf(r, "\\x{%"UVxf"}", k);
180 r += strlen(r);
f052740f 181 }
182 }
183 *r++ = '"';
184 } else {
185 /* Single quotes. */
34231210 186 sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
187 + qq_escapables + normal);
f052740f 188 rstart = r = SvPVX(sv) + cur;
189 *r++ = '\'';
190 for (s = src; s < send; s ++) {
191 char k = *s;
192 if (k == '\'' || k == '\\')
193 *r++ = '\\';
194 *r++ = k;
195 }
196 *r++ = '\'';
dc71dc59 197 }
f052740f 198 *r = '\0';
199 j = r - rstart;
200 SvCUR_set(sv, cur + j);
dc71dc59 201
202 return j;
203}
204
823edd99 205/* append a repeated string to an SV */
206static SV *
cea2e8a9 207sv_x(pTHX_ SV *sv, register char *str, STRLEN len, I32 n)
823edd99 208{
209 if (sv == Nullsv)
7820172a 210 sv = newSVpvn("", 0);
823edd99 211 else
212 assert(SvTYPE(sv) >= SVt_PV);
213
214 if (n > 0) {
215 SvGROW(sv, len*n + SvCUR(sv) + 1);
216 if (len == 1) {
217 char *start = SvPVX(sv) + SvCUR(sv);
218 SvCUR(sv) += n;
219 start[n] = '\0';
220 while (n > 0)
221 start[--n] = str[0];
222 }
223 else
224 while (n > 0) {
225 sv_catpvn(sv, str, len);
226 --n;
227 }
228 }
229 return sv;
230}
231
232/*
233 * This ought to be split into smaller functions. (it is one long function since
234 * it exactly parallels the perl version, which was one long thing for
235 * efficiency raisins.) Ugggh!
236 */
237static I32
cea2e8a9 238DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
823edd99 239 AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
30b4f386 240 SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
e9105f86 241 I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
823edd99 242{
243 char tmpbuf[128];
244 U32 i;
245 char *c, *r, *realpack, id[128];
246 SV **svp;
7820172a 247 SV *sv, *ipad, *ival;
823edd99 248 SV *blesspad = Nullsv;
7820172a 249 AV *seenentry = Nullav;
823edd99 250 char *iname;
251 STRLEN inamelen, idlen = 0;
823edd99 252 U32 realtype;
253
254 if (!val)
255 return 0;
256
823edd99 257 realtype = SvTYPE(val);
6cde4e94 258
823edd99 259 if (SvGMAGICAL(val))
260 mg_get(val);
823edd99 261 if (SvROK(val)) {
262
263 if (SvOBJECT(SvRV(val)) && freezer &&
264 SvPOK(freezer) && SvCUR(freezer))
265 {
266 dSP; ENTER; SAVETMPS; PUSHMARK(sp);
267 XPUSHs(val); PUTBACK;
268 i = perl_call_method(SvPVX(freezer), G_EVAL|G_SCALAR);
269 SPAGAIN;
7820172a 270 if (SvTRUE(ERRSV))
35c1215d 271 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
823edd99 272 else if (i)
273 val = newSVsv(POPs);
274 PUTBACK; FREETMPS; LEAVE;
275 if (i)
276 (void)sv_2mortal(val);
277 }
278
279 ival = SvRV(val);
823edd99 280 realtype = SvTYPE(ival);
c623ac67 281 (void) sprintf(id, "0x%"UVxf, PTR2UV(ival));
823edd99 282 idlen = strlen(id);
283 if (SvOBJECT(ival))
284 realpack = HvNAME(SvSTASH(ival));
285 else
286 realpack = Nullch;
7820172a 287
288 /* if it has a name, we need to either look it up, or keep a tab
289 * on it so we know when we hit it later
290 */
291 if (namelen) {
292 if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
293 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
294 {
295 SV *othername;
296 if ((svp = av_fetch(seenentry, 0, FALSE))
297 && (othername = *svp))
298 {
299 if (purity && *levelp > 0) {
300 SV *postentry;
301
302 if (realtype == SVt_PVHV)
303 sv_catpvn(retval, "{}", 2);
304 else if (realtype == SVt_PVAV)
305 sv_catpvn(retval, "[]", 2);
306 else
5df59fb6 307 sv_catpvn(retval, "do{my $o}", 9);
7820172a 308 postentry = newSVpvn(name, namelen);
309 sv_catpvn(postentry, " = ", 3);
310 sv_catsv(postentry, othername);
311 av_push(postav, postentry);
312 }
313 else {
314 if (name[0] == '@' || name[0] == '%') {
315 if ((SvPVX(othername))[0] == '\\' &&
316 (SvPVX(othername))[1] == name[0]) {
317 sv_catpvn(retval, SvPVX(othername)+1,
318 SvCUR(othername)-1);
319 }
320 else {
321 sv_catpvn(retval, name, 1);
322 sv_catpvn(retval, "{", 1);
323 sv_catsv(retval, othername);
324 sv_catpvn(retval, "}", 1);
325 }
823edd99 326 }
7820172a 327 else
823edd99 328 sv_catsv(retval, othername);
823edd99 329 }
7820172a 330 return 1;
331 }
332 else {
333 warn("ref name not found for %s", id);
334 return 0;
823edd99 335 }
823edd99 336 }
7820172a 337 else { /* store our name and continue */
338 SV *namesv;
339 if (name[0] == '@' || name[0] == '%') {
340 namesv = newSVpvn("\\", 1);
341 sv_catpvn(namesv, name, namelen);
342 }
343 else if (realtype == SVt_PVCV && name[0] == '*') {
344 namesv = newSVpvn("\\", 2);
345 sv_catpvn(namesv, name, namelen);
346 (SvPVX(namesv))[1] = '&';
347 }
348 else
349 namesv = newSVpvn(name, namelen);
350 seenentry = newAV();
351 av_push(seenentry, namesv);
352 (void)SvREFCNT_inc(val);
353 av_push(seenentry, val);
354 (void)hv_store(seenhv, id, strlen(id),
fec5e1eb 355 newRV_inc((SV*)seenentry), 0);
7820172a 356 SvREFCNT_dec(seenentry);
823edd99 357 }
823edd99 358 }
823edd99 359
a2126434 360 if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
361 STRLEN rlen;
362 char *rval = SvPV(val, rlen);
363 char *slash = strchr(rval, '/');
364 sv_catpvn(retval, "qr/", 3);
365 while (slash) {
366 sv_catpvn(retval, rval, slash-rval);
367 sv_catpvn(retval, "\\/", 2);
368 rlen -= slash-rval+1;
369 rval = slash+1;
370 slash = strchr(rval, '/');
7894fbab 371 }
a2126434 372 sv_catpvn(retval, rval, rlen);
373 sv_catpvn(retval, "/", 1);
374 return 1;
375 }
376
377 /* If purity is not set and maxdepth is set, then check depth:
378 * if we have reached maximum depth, return the string
379 * representation of the thing we are currently examining
6cde4e94 380 * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
a2126434 381 */
382 if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
383 STRLEN vallen;
384 char *valstr = SvPV(val,vallen);
385 sv_catpvn(retval, "'", 1);
386 sv_catpvn(retval, valstr, vallen);
387 sv_catpvn(retval, "'", 1);
388 return 1;
389 }
390
391 if (realpack) { /* we have a blessed ref */
392 STRLEN blesslen;
393 char *blessstr = SvPV(bless, blesslen);
394 sv_catpvn(retval, blessstr, blesslen);
395 sv_catpvn(retval, "( ", 2);
396 if (indent >= 2) {
397 blesspad = apad;
398 apad = newSVsv(apad);
399 sv_x(aTHX_ apad, " ", 1, blesslen+2);
823edd99 400 }
401 }
402
7894fbab 403 (*levelp)++;
404 ipad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), *levelp);
405
7820172a 406 if (realtype <= SVt_PVBM) { /* scalar ref */
407 SV *namesv = newSVpvn("${", 2);
408 sv_catpvn(namesv, name, namelen);
409 sv_catpvn(namesv, "}", 1);
6cde4e94 410 if (realpack) { /* blessed */
823edd99 411 sv_catpvn(retval, "do{\\(my $o = ", 13);
cea2e8a9 412 DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
30b4f386 413 postav, levelp, indent, pad, xpad, apad, sep, pair,
a2126434 414 freezer, toaster, purity, deepcopy, quotekeys, bless,
e9105f86 415 maxdepth, sortkeys);
823edd99 416 sv_catpvn(retval, ")}", 2);
7820172a 417 } /* plain */
823edd99 418 else {
419 sv_catpvn(retval, "\\", 1);
cea2e8a9 420 DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
30b4f386 421 postav, levelp, indent, pad, xpad, apad, sep, pair,
a2126434 422 freezer, toaster, purity, deepcopy, quotekeys, bless,
e9105f86 423 maxdepth, sortkeys);
823edd99 424 }
7820172a 425 SvREFCNT_dec(namesv);
426 }
427 else if (realtype == SVt_PVGV) { /* glob ref */
428 SV *namesv = newSVpvn("*{", 2);
429 sv_catpvn(namesv, name, namelen);
430 sv_catpvn(namesv, "}", 1);
431 sv_catpvn(retval, "\\", 1);
cea2e8a9 432 DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
30b4f386 433 postav, levelp, indent, pad, xpad, apad, sep, pair,
a2126434 434 freezer, toaster, purity, deepcopy, quotekeys, bless,
e9105f86 435 maxdepth, sortkeys);
7820172a 436 SvREFCNT_dec(namesv);
823edd99 437 }
438 else if (realtype == SVt_PVAV) {
439 SV *totpad;
440 I32 ix = 0;
441 I32 ixmax = av_len((AV *)ival);
6cde4e94 442
823edd99 443 SV *ixsv = newSViv(0);
444 /* allowing for a 24 char wide array index */
445 New(0, iname, namelen+28, char);
446 (void)strcpy(iname, name);
447 inamelen = namelen;
448 if (name[0] == '@') {
449 sv_catpvn(retval, "(", 1);
450 iname[0] = '$';
451 }
452 else {
453 sv_catpvn(retval, "[", 1);
7820172a 454 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
455 /*if (namelen > 0
456 && name[namelen-1] != ']' && name[namelen-1] != '}'
457 && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
458 if ((namelen > 0
459 && name[namelen-1] != ']' && name[namelen-1] != '}')
460 || (namelen > 4
461 && (name[1] == '{'
462 || (name[0] == '\\' && name[2] == '{'))))
463 {
823edd99 464 iname[inamelen++] = '-'; iname[inamelen++] = '>';
465 iname[inamelen] = '\0';
466 }
467 }
468 if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
469 (instr(iname+inamelen-8, "{SCALAR}") ||
470 instr(iname+inamelen-7, "{ARRAY}") ||
471 instr(iname+inamelen-6, "{HASH}"))) {
472 iname[inamelen++] = '-'; iname[inamelen++] = '>';
473 }
474 iname[inamelen++] = '['; iname[inamelen] = '\0';
475 totpad = newSVsv(sep);
476 sv_catsv(totpad, pad);
477 sv_catsv(totpad, apad);
478
479 for (ix = 0; ix <= ixmax; ++ix) {
480 STRLEN ilen;
481 SV *elem;
482 svp = av_fetch((AV*)ival, ix, FALSE);
483 if (svp)
484 elem = *svp;
485 else
3280af22 486 elem = &PL_sv_undef;
823edd99 487
488 ilen = inamelen;
489 sv_setiv(ixsv, ix);
7b0972df 490 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
823edd99 491 ilen = strlen(iname);
492 iname[ilen++] = ']'; iname[ilen] = '\0';
493 if (indent >= 3) {
494 sv_catsv(retval, totpad);
495 sv_catsv(retval, ipad);
496 sv_catpvn(retval, "#", 1);
497 sv_catsv(retval, ixsv);
498 }
499 sv_catsv(retval, totpad);
500 sv_catsv(retval, ipad);
cea2e8a9 501 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
30b4f386 502 levelp, indent, pad, xpad, apad, sep, pair,
a2126434 503 freezer, toaster, purity, deepcopy, quotekeys, bless,
e9105f86 504 maxdepth, sortkeys);
823edd99 505 if (ix < ixmax)
506 sv_catpvn(retval, ",", 1);
507 }
508 if (ixmax >= 0) {
cea2e8a9 509 SV *opad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), (*levelp)-1);
823edd99 510 sv_catsv(retval, totpad);
511 sv_catsv(retval, opad);
512 SvREFCNT_dec(opad);
513 }
514 if (name[0] == '@')
515 sv_catpvn(retval, ")", 1);
516 else
517 sv_catpvn(retval, "]", 1);
518 SvREFCNT_dec(ixsv);
519 SvREFCNT_dec(totpad);
520 Safefree(iname);
521 }
522 else if (realtype == SVt_PVHV) {
523 SV *totpad, *newapad;
524 SV *iname, *sname;
525 HE *entry;
526 char *key;
527 I32 klen;
528 SV *hval;
e9105f86 529 AV *keys = Nullav;
6cde4e94 530
7820172a 531 iname = newSVpvn(name, namelen);
823edd99 532 if (name[0] == '%') {
533 sv_catpvn(retval, "(", 1);
534 (SvPVX(iname))[0] = '$';
535 }
536 else {
537 sv_catpvn(retval, "{", 1);
7820172a 538 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
539 if ((namelen > 0
540 && name[namelen-1] != ']' && name[namelen-1] != '}')
541 || (namelen > 4
542 && (name[1] == '{'
543 || (name[0] == '\\' && name[2] == '{'))))
544 {
823edd99 545 sv_catpvn(iname, "->", 2);
546 }
547 }
548 if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
549 (instr(name+namelen-8, "{SCALAR}") ||
550 instr(name+namelen-7, "{ARRAY}") ||
551 instr(name+namelen-6, "{HASH}"))) {
552 sv_catpvn(iname, "->", 2);
553 }
554 sv_catpvn(iname, "{", 1);
555 totpad = newSVsv(sep);
556 sv_catsv(totpad, pad);
557 sv_catsv(totpad, apad);
6cde4e94 558
e9105f86 559 /* If requested, get a sorted/filtered array of hash keys */
560 if (sortkeys) {
561 if (sortkeys == &PL_sv_yes) {
fec5e1eb 562#if PERL_VERSION < 8
563 sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
564#else
e9105f86 565 keys = newAV();
566 (void)hv_iterinit((HV*)ival);
20d72259 567 while ((entry = hv_iternext((HV*)ival))) {
e9105f86 568 sv = hv_iterkeysv(entry);
569 SvREFCNT_inc(sv);
570 av_push(keys, sv);
571 }
fec5e1eb 572# ifdef USE_LOCALE_NUMERIC
e9105f86 573 sortsv(AvARRAY(keys),
574 av_len(keys)+1,
3c253d0e 575 IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
fec5e1eb 576# else
503ec68f 577 sortsv(AvARRAY(keys),
578 av_len(keys)+1,
579 Perl_sv_cmp);
fec5e1eb 580# endif
02a99678 581#endif
e9105f86 582 }
fec5e1eb 583 if (sortkeys != &PL_sv_yes) {
e9105f86 584 dSP; ENTER; SAVETMPS; PUSHMARK(sp);
585 XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
586 i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
587 SPAGAIN;
588 if (i) {
589 sv = POPs;
590 if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
591 keys = (AV*)SvREFCNT_inc(SvRV(sv));
592 }
593 if (! keys)
594 warn("Sortkeys subroutine did not return ARRAYREF\n");
595 PUTBACK; FREETMPS; LEAVE;
596 }
597 if (keys)
598 sv_2mortal((SV*)keys);
599 }
600 else
601 (void)hv_iterinit((HV*)ival);
ecfc8647 602
603 /* foreach (keys %hash) */
604 for (i = 0; 1; i++) {
fdce9ba9 605 char *nkey;
606 char *nkey_buffer = NULL;
823edd99 607 I32 nticks = 0;
dc71dc59 608 SV* keysv;
609 STRLEN keylen;
fdce9ba9 610 I32 nlen;
dc71dc59 611 bool do_utf8 = FALSE;
ecfc8647 612
91f3b821 613 if ((sortkeys && !(keys && (I32)i <= av_len(keys))) ||
ecfc8647 614 !(entry = hv_iternext((HV *)ival)))
615 break;
616
823edd99 617 if (i)
618 sv_catpvn(retval, ",", 1);
e9105f86 619
620 if (sortkeys) {
621 char *key;
622 svp = av_fetch(keys, i, FALSE);
623 keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
624 key = SvPV(keysv, keylen);
d075f8ed 625 svp = hv_fetch((HV*)ival, key,
6e21dc91 626 SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
e9105f86 627 hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
628 }
629 else {
630 keysv = hv_iterkeysv(entry);
631 hval = hv_iterval((HV*)ival, entry);
632 }
633
dc71dc59 634 do_utf8 = DO_UTF8(keysv);
635 key = SvPV(keysv, keylen);
636 klen = keylen;
637
fdce9ba9 638 sv_catsv(retval, totpad);
639 sv_catsv(retval, ipad);
640 /* old logic was first to check utf8 flag, and if utf8 always
641 call esc_q_utf8. This caused test to break under -Mutf8,
642 because there even strings like 'c' have utf8 flag on.
643 Hence with quotekeys == 0 the XS code would still '' quote
644 them based on flags, whereas the perl code would not,
645 based on regexps.
646 The perl code is correct.
647 needs_quote() decides that anything that isn't a valid
648 perl identifier needs to be quoted, hence only correctly
649 formed strings with no characters outside [A-Za-z0-9_:]
650 won't need quoting. None of those characters are used in
651 the byte encoding of utf8, so anything with utf8
652 encoded characters in will need quoting. Hence strings
653 with utf8 encoded characters in will end up inside do_utf8
654 just like before, but now strings with utf8 flag set but
655 only ascii characters will end up in the unquoted section.
656
657 There should also be less tests for the (probably currently)
658 more common doesn't need quoting case.
659 The code is also smaller (22044 vs 22260) because I've been
30b4f386 660 able to pull the common logic out to both sides. */
fdce9ba9 661 if (quotekeys || needs_quote(key)) {
662 if (do_utf8) {
663 STRLEN ocur = SvCUR(retval);
664 nlen = esc_q_utf8(aTHX_ retval, key, klen);
665 nkey = SvPVX(retval) + ocur;
666 }
667 else {
dc71dc59 668 nticks = num_q(key, klen);
fdce9ba9 669 New(0, nkey_buffer, klen+nticks+3, char);
670 nkey = nkey_buffer;
dc71dc59 671 nkey[0] = '\'';
672 if (nticks)
673 klen += esc_q(nkey+1, key, klen);
674 else
675 (void)Copy(key, nkey+1, klen, char);
676 nkey[++klen] = '\'';
677 nkey[++klen] = '\0';
fdce9ba9 678 nlen = klen;
679 sv_catpvn(retval, nkey, klen);
dc71dc59 680 }
fdce9ba9 681 }
682 else {
683 nkey = key;
684 nlen = klen;
685 sv_catpvn(retval, nkey, klen);
dc71dc59 686 }
fdce9ba9 687 sname = newSVsv(iname);
688 sv_catpvn(sname, nkey, nlen);
689 sv_catpvn(sname, "}", 1);
690
30b4f386 691 sv_catsv(retval, pair);
823edd99 692 if (indent >= 2) {
693 char *extra;
694 I32 elen = 0;
695 newapad = newSVsv(apad);
696 New(0, extra, klen+4+1, char);
697 while (elen < (klen+4))
698 extra[elen++] = ' ';
699 extra[elen] = '\0';
700 sv_catpvn(newapad, extra, elen);
701 Safefree(extra);
702 }
703 else
704 newapad = apad;
705
cea2e8a9 706 DD_dump(aTHX_ hval, SvPVX(sname), SvCUR(sname), retval, seenhv,
30b4f386 707 postav, levelp, indent, pad, xpad, newapad, sep, pair,
a2126434 708 freezer, toaster, purity, deepcopy, quotekeys, bless,
e9105f86 709 maxdepth, sortkeys);
823edd99 710 SvREFCNT_dec(sname);
fdce9ba9 711 Safefree(nkey_buffer);
823edd99 712 if (indent >= 2)
713 SvREFCNT_dec(newapad);
714 }
715 if (i) {
cea2e8a9 716 SV *opad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), *levelp-1);
823edd99 717 sv_catsv(retval, totpad);
718 sv_catsv(retval, opad);
719 SvREFCNT_dec(opad);
720 }
721 if (name[0] == '%')
722 sv_catpvn(retval, ")", 1);
723 else
724 sv_catpvn(retval, "}", 1);
725 SvREFCNT_dec(iname);
726 SvREFCNT_dec(totpad);
727 }
728 else if (realtype == SVt_PVCV) {
729 sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
730 if (purity)
731 warn("Encountered CODE ref, using dummy placeholder");
732 }
733 else {
734 warn("cannot handle ref type %ld", realtype);
735 }
736
737 if (realpack) { /* free blessed allocs */
738 if (indent >= 2) {
739 SvREFCNT_dec(apad);
740 apad = blesspad;
741 }
742 sv_catpvn(retval, ", '", 3);
743 sv_catpvn(retval, realpack, strlen(realpack));
744 sv_catpvn(retval, "' )", 3);
745 if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
746 sv_catpvn(retval, "->", 2);
747 sv_catsv(retval, toaster);
748 sv_catpvn(retval, "()", 2);
749 }
750 }
751 SvREFCNT_dec(ipad);
752 (*levelp)--;
753 }
754 else {
755 STRLEN i;
756
757 if (namelen) {
c623ac67 758 (void) sprintf(id, "0x%"UVxf, PTR2UV(val));
823edd99 759 if ((svp = hv_fetch(seenhv, id, (idlen = strlen(id)), FALSE)) &&
760 (sv = *svp) && SvROK(sv) &&
7820172a 761 (seenentry = (AV*)SvRV(sv)))
762 {
823edd99 763 SV *othername;
7820172a 764 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
765 && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
766 {
767 sv_catpvn(retval, "${", 2);
823edd99 768 sv_catsv(retval, othername);
7820172a 769 sv_catpvn(retval, "}", 1);
823edd99 770 return 1;
771 }
772 }
3bef8b4a 773 else if (val != &PL_sv_undef) {
823edd99 774 SV *namesv;
7820172a 775 namesv = newSVpvn("\\", 1);
823edd99 776 sv_catpvn(namesv, name, namelen);
777 seenentry = newAV();
778 av_push(seenentry, namesv);
fec5e1eb 779 av_push(seenentry, newRV_inc(val));
780 (void)hv_store(seenhv, id, strlen(id), newRV_inc((SV*)seenentry), 0);
823edd99 781 SvREFCNT_dec(seenentry);
782 }
783 }
7820172a 784
fec5e1eb 785 if (DD_is_integer(val)) {
823edd99 786 STRLEN len;
0e8b3009 787 if (SvIsUV(val))
5e8f63cb 788 (void) sprintf(tmpbuf, "%"UVuf, SvUV(val));
0e8b3009 789 else
790 (void) sprintf(tmpbuf, "%"IVdf, SvIV(val));
823edd99 791 len = strlen(tmpbuf);
c4cce848 792 if (SvPOK(val)) {
793 /* Need to check to see if this is a string such as " 0".
794 I'm assuming from sprintf isn't going to clash with utf8.
795 Is this valid on EBCDIC? */
796 STRLEN pvlen;
797 const char *pv = SvPV(val, pvlen);
798 if (pvlen != len || memNE(pv, tmpbuf, len))
799 goto integer_came_from_string;
800 }
801 if (len > 10) {
802 /* Looks like we're on a 64 bit system. Make it a string so that
803 if a 32 bit system reads the number it will cope better. */
804 sv_catpvf(retval, "'%s'", tmpbuf);
805 } else
806 sv_catpvn(retval, tmpbuf, len);
823edd99 807 }
808 else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
809 c = SvPV(val, i);
810 ++c; --i; /* just get the name */
811 if (i >= 6 && strncmp(c, "main::", 6) == 0) {
812 c += 4;
813 i -= 4;
814 }
815 if (needs_quote(c)) {
816 sv_grow(retval, SvCUR(retval)+6+2*i);
817 r = SvPVX(retval)+SvCUR(retval);
818 r[0] = '*'; r[1] = '{'; r[2] = '\'';
819 i += esc_q(r+3, c, i);
820 i += 3;
821 r[i++] = '\''; r[i++] = '}';
822 r[i] = '\0';
823 }
824 else {
825 sv_grow(retval, SvCUR(retval)+i+2);
826 r = SvPVX(retval)+SvCUR(retval);
827 r[0] = '*'; strcpy(r+1, c);
828 i++;
829 }
7820172a 830 SvCUR_set(retval, SvCUR(retval)+i);
823edd99 831
832 if (purity) {
833 static char *entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
834 static STRLEN sizes[] = { 8, 7, 6 };
835 SV *e;
7820172a 836 SV *nname = newSVpvn("", 0);
837 SV *newapad = newSVpvn("", 0);
823edd99 838 GV *gv = (GV*)val;
839 I32 j;
840
841 for (j=0; j<3; j++) {
842 e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
7820172a 843 if (!e)
844 continue;
845 if (j == 0 && !SvOK(e))
846 continue;
847
848 {
823edd99 849 I32 nlevel = 0;
7820172a 850 SV *postentry = newSVpvn(r,i);
823edd99 851
852 sv_setsv(nname, postentry);
853 sv_catpvn(nname, entries[j], sizes[j]);
854 sv_catpvn(postentry, " = ", 3);
855 av_push(postav, postentry);
fec5e1eb 856 e = newRV_inc(e);
823edd99 857
858 SvCUR(newapad) = 0;
859 if (indent >= 2)
cea2e8a9 860 (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
823edd99 861
cea2e8a9 862 DD_dump(aTHX_ e, SvPVX(nname), SvCUR(nname), postentry,
823edd99 863 seenhv, postav, &nlevel, indent, pad, xpad,
30b4f386 864 newapad, sep, pair, freezer, toaster, purity,
e9105f86 865 deepcopy, quotekeys, bless, maxdepth,
866 sortkeys);
823edd99 867 SvREFCNT_dec(e);
868 }
869 }
870
871 SvREFCNT_dec(newapad);
872 SvREFCNT_dec(nname);
873 }
874 }
7820172a 875 else if (val == &PL_sv_undef || !SvOK(val)) {
876 sv_catpvn(retval, "undef", 5);
877 }
823edd99 878 else {
c4cce848 879 integer_came_from_string:
823edd99 880 c = SvPV(val, i);
dc71dc59 881 if (DO_UTF8(val))
6cde4e94 882 i += esc_q_utf8(aTHX_ retval, c, i);
dc71dc59 883 else {
884 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
885 r = SvPVX(retval) + SvCUR(retval);
886 r[0] = '\'';
887 i += esc_q(r+1, c, i);
888 ++i;
889 r[i++] = '\'';
890 r[i] = '\0';
891 SvCUR_set(retval, SvCUR(retval)+i);
892 }
823edd99 893 }
823edd99 894 }
895
7820172a 896 if (idlen) {
897 if (deepcopy)
898 (void)hv_delete(seenhv, id, idlen, G_DISCARD);
899 else if (namelen && seenentry) {
900 SV *mark = *av_fetch(seenentry, 2, TRUE);
901 sv_setiv(mark,1);
902 }
903 }
823edd99 904 return 1;
905}
906
907
908MODULE = Data::Dumper PACKAGE = Data::Dumper PREFIX = Data_Dumper_
909
910#
911# This is the exact equivalent of Dump. Well, almost. The things that are
912# different as of now (due to Laziness):
913# * doesnt do double-quotes yet.
914#
915
916void
917Data_Dumper_Dumpxs(href, ...)
918 SV *href;
919 PROTOTYPE: $;$$
920 PPCODE:
921 {
922 HV *hv;
923 SV *retval, *valstr;
924 HV *seenhv = Nullhv;
925 AV *postav, *todumpav, *namesav;
926 I32 level = 0;
497b47a8 927 I32 indent, terse, i, imax, postlen;
823edd99 928 SV **svp;
30b4f386 929 SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
e9105f86 930 SV *freezer, *toaster, *bless, *sortkeys;
7b0972df 931 I32 purity, deepcopy, quotekeys, maxdepth = 0;
823edd99 932 char tmpbuf[1024];
933 I32 gimme = GIMME;
934
935 if (!SvROK(href)) { /* call new to get an object first */
0f1923bd 936 if (items < 2)
937 croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
823edd99 938
939 ENTER;
940 SAVETMPS;
941
942 PUSHMARK(sp);
943 XPUSHs(href);
0f1923bd 944 XPUSHs(sv_2mortal(newSVsv(ST(1))));
945 if (items >= 3)
946 XPUSHs(sv_2mortal(newSVsv(ST(2))));
823edd99 947 PUTBACK;
948 i = perl_call_method("new", G_SCALAR);
949 SPAGAIN;
950 if (i)
951 href = newSVsv(POPs);
952
953 PUTBACK;
954 FREETMPS;
955 LEAVE;
956 if (i)
957 (void)sv_2mortal(href);
958 }
959
960 todumpav = namesav = Nullav;
961 seenhv = Nullhv;
30b4f386 962 val = pad = xpad = apad = sep = pair = varname
3280af22 963 = freezer = toaster = bless = &PL_sv_undef;
823edd99 964 name = sv_newmortal();
965 indent = 2;
497b47a8 966 terse = purity = deepcopy = 0;
823edd99 967 quotekeys = 1;
6cde4e94 968
7820172a 969 retval = newSVpvn("", 0);
823edd99 970 if (SvROK(href)
971 && (hv = (HV*)SvRV((SV*)href))
972 && SvTYPE(hv) == SVt_PVHV) {
973
974 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
975 seenhv = (HV*)SvRV(*svp);
976 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
977 todumpav = (AV*)SvRV(*svp);
978 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
979 namesav = (AV*)SvRV(*svp);
980 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
981 indent = SvIV(*svp);
982 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
983 purity = SvIV(*svp);
984 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
985 terse = SvTRUE(*svp);
497b47a8 986#if 0 /* useqq currently unused */
823edd99 987 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
988 useqq = SvTRUE(*svp);
497b47a8 989#endif
823edd99 990 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
991 pad = *svp;
992 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
993 xpad = *svp;
994 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
995 apad = *svp;
996 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
997 sep = *svp;
30b4f386 998 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
999 pair = *svp;
823edd99 1000 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1001 varname = *svp;
1002 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1003 freezer = *svp;
1004 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1005 toaster = *svp;
1006 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1007 deepcopy = SvTRUE(*svp);
1008 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1009 quotekeys = SvTRUE(*svp);
1010 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1011 bless = *svp;
a2126434 1012 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1013 maxdepth = SvIV(*svp);
e9105f86 1014 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1015 sortkeys = *svp;
1016 if (! SvTRUE(sortkeys))
1017 sortkeys = NULL;
1018 else if (! (SvROK(sortkeys) &&
1019 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1020 {
1021 /* flag to use qsortsv() for sorting hash keys */
1022 sortkeys = &PL_sv_yes;
1023 }
1024 }
823edd99 1025 postav = newAV();
1026
1027 if (todumpav)
1028 imax = av_len(todumpav);
1029 else
1030 imax = -1;
7820172a 1031 valstr = newSVpvn("",0);
823edd99 1032 for (i = 0; i <= imax; ++i) {
1033 SV *newapad;
6cde4e94 1034
823edd99 1035 av_clear(postav);
1036 if ((svp = av_fetch(todumpav, i, FALSE)))
1037 val = *svp;
1038 else
3280af22 1039 val = &PL_sv_undef;
823edd99 1040 if ((svp = av_fetch(namesav, i, TRUE)))
1041 sv_setsv(name, *svp);
1042 else
8063af02 1043 (void)SvOK_off(name);
6cde4e94 1044
823edd99 1045 if (SvOK(name)) {
1046 if ((SvPVX(name))[0] == '*') {
1047 if (SvROK(val)) {
1048 switch (SvTYPE(SvRV(val))) {
1049 case SVt_PVAV:
1050 (SvPVX(name))[0] = '@';
1051 break;
1052 case SVt_PVHV:
1053 (SvPVX(name))[0] = '%';
1054 break;
1055 case SVt_PVCV:
1056 (SvPVX(name))[0] = '*';
1057 break;
1058 default:
1059 (SvPVX(name))[0] = '$';
1060 break;
1061 }
1062 }
1063 else
1064 (SvPVX(name))[0] = '$';
1065 }
1066 else if ((SvPVX(name))[0] != '$')
1067 sv_insert(name, 0, 0, "$", 1);
1068 }
1069 else {
1070 STRLEN nchars = 0;
1071 sv_setpvn(name, "$", 1);
1072 sv_catsv(name, varname);
faccc32b 1073 (void) sprintf(tmpbuf, "%"IVdf, (IV)(i+1));
823edd99 1074 nchars = strlen(tmpbuf);
1075 sv_catpvn(name, tmpbuf, nchars);
1076 }
6cde4e94 1077
823edd99 1078 if (indent >= 2) {
cea2e8a9 1079 SV *tmpsv = sv_x(aTHX_ Nullsv, " ", 1, SvCUR(name)+3);
823edd99 1080 newapad = newSVsv(apad);
1081 sv_catsv(newapad, tmpsv);
1082 SvREFCNT_dec(tmpsv);
1083 }
1084 else
1085 newapad = apad;
6cde4e94 1086
cea2e8a9 1087 DD_dump(aTHX_ val, SvPVX(name), SvCUR(name), valstr, seenhv,
30b4f386 1088 postav, &level, indent, pad, xpad, newapad, sep, pair,
823edd99 1089 freezer, toaster, purity, deepcopy, quotekeys,
e9105f86 1090 bless, maxdepth, sortkeys);
6cde4e94 1091
823edd99 1092 if (indent >= 2)
1093 SvREFCNT_dec(newapad);
1094
1095 postlen = av_len(postav);
1096 if (postlen >= 0 || !terse) {
1097 sv_insert(valstr, 0, 0, " = ", 3);
1098 sv_insert(valstr, 0, 0, SvPVX(name), SvCUR(name));
1099 sv_catpvn(valstr, ";", 1);
1100 }
1101 sv_catsv(retval, pad);
1102 sv_catsv(retval, valstr);
1103 sv_catsv(retval, sep);
1104 if (postlen >= 0) {
1105 I32 i;
1106 sv_catsv(retval, pad);
1107 for (i = 0; i <= postlen; ++i) {
1108 SV *elem;
1109 svp = av_fetch(postav, i, FALSE);
1110 if (svp && (elem = *svp)) {
1111 sv_catsv(retval, elem);
1112 if (i < postlen) {
1113 sv_catpvn(retval, ";", 1);
1114 sv_catsv(retval, sep);
1115 sv_catsv(retval, pad);
1116 }
1117 }
1118 }
1119 sv_catpvn(retval, ";", 1);
1120 sv_catsv(retval, sep);
1121 }
1122 sv_setpvn(valstr, "", 0);
1123 if (gimme == G_ARRAY) {
1124 XPUSHs(sv_2mortal(retval));
1125 if (i < imax) /* not the last time thro ? */
7820172a 1126 retval = newSVpvn("",0);
823edd99 1127 }
1128 }
1129 SvREFCNT_dec(postav);
1130 SvREFCNT_dec(valstr);
1131 }
1132 else
1133 croak("Call to new() method failed to return HASH ref");
1134 if (gimme == G_SCALAR)
1135 XPUSHs(sv_2mortal(retval));
1136 }