Re: Data::Dumper bug?
[p5sagit/p5-mst-13.2.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5
6 static I32 num_q (char *s, STRLEN slen);
7 static I32 esc_q (char *dest, char *src, STRLEN slen);
8 static I32 esc_q_utf8 (pTHX_ SV *sv, char *src, STRLEN slen);
9 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
10 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
11                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
12                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
13                     SV *freezer, SV *toaster,
14                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
15                     I32 maxdepth, SV *sortkeys);
16
17 #ifndef HvNAME_get
18 #define HvNAME_get HvNAME
19 #endif
20
21 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
22
23 # ifdef EBCDIC
24 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
25 # else
26 #  define UNI_TO_NATIVE(ch) (ch)
27 # endif
28
29 UV
30 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
31 {
32     UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
33                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
34     return UNI_TO_NATIVE(uv);
35 }
36
37 # if !defined(PERL_IMPLICIT_CONTEXT)
38 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
39 # else
40 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
41 # endif
42
43 #endif /* PERL_VERSION <= 6 */
44
45 /* Changes in 5.7 series mean that now IOK is only set if scalar is
46    precisely integer but in 5.6 and earlier we need to do a more
47    complex test  */
48 #if PERL_VERSION <= 6
49 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
50 #else
51 #define DD_is_integer(sv) SvIOK(sv)
52 #endif
53
54 /* does a string need to be protected? */
55 static I32
56 needs_quote(register char *s)
57 {
58 TOP:
59     if (s[0] == ':') {
60         if (*++s) {
61             if (*s++ != ':')
62                 return 1;
63         }
64         else
65             return 1;
66     }
67     if (isIDFIRST(*s)) {
68         while (*++s)
69             if (!isALNUM(*s)) {
70                 if (*s == ':')
71                     goto TOP;
72                 else
73                     return 1;
74             }
75     }
76     else
77         return 1;
78     return 0;
79 }
80
81 /* count the number of "'"s and "\"s in string */
82 static I32
83 num_q(register char *s, register STRLEN slen)
84 {
85     register I32 ret = 0;
86
87     while (slen > 0) {
88         if (*s == '\'' || *s == '\\')
89             ++ret;
90         ++s;
91         --slen;
92     }
93     return ret;
94 }
95
96
97 /* returns number of chars added to escape "'"s and "\"s in s */
98 /* slen number of characters in s will be escaped */
99 /* destination must be long enough for additional chars */
100 static I32
101 esc_q(register char *d, register char *s, register STRLEN slen)
102 {
103     register I32 ret = 0;
104
105     while (slen > 0) {
106         switch (*s) {
107         case '\'':
108         case '\\':
109             *d = '\\';
110             ++d; ++ret;
111         default:
112             *d = *s;
113             ++d; ++s; --slen;
114             break;
115         }
116     }
117     return ret;
118 }
119
120 static I32
121 esc_q_utf8(pTHX_ SV* sv, register char *src, register STRLEN slen)
122 {
123     char *s, *send, *r, *rstart;
124     STRLEN j, cur = SvCUR(sv);
125     /* Could count 128-255 and 256+ in two variables, if we want to
126        be like &qquote and make a distinction.  */
127     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
128     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
129     STRLEN backslashes = 0;
130     STRLEN single_quotes = 0;
131     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
132     STRLEN normal = 0;
133
134     /* this will need EBCDICification */
135     for (s = src, send = src + slen; s < send; s += UTF8SKIP(s)) {
136         UV k = utf8_to_uvchr((U8*)s, NULL);
137
138         if (k > 127) {
139             /* 4: \x{} then count the number of hex digits.  */
140             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
141 #if UVSIZE == 4
142                 8 /* We may allocate a bit more than the minimum here.  */
143 #else
144                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
145 #endif
146                 );
147         } else if (k == '\\') {
148             backslashes++;
149         } else if (k == '\'') {
150             single_quotes++;
151         } else if (k == '"' || k == '$' || k == '@') {
152             qq_escapables++;
153         } else {
154             normal++;
155         }
156     }
157     if (grow) {
158         /* We have something needing hex. 3 is ""\0 */
159         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
160                 + 2*qq_escapables + normal);
161         rstart = r = SvPVX(sv) + cur;
162
163         *r++ = '"';
164
165         for (s = src; s < send; s += UTF8SKIP(s)) {
166             UV k = utf8_to_uvchr((U8*)s, NULL);
167
168             if (k == '"' || k == '\\' || k == '$' || k == '@') {
169                 *r++ = '\\';
170                 *r++ = (char)k;
171             }
172             else if (k < 0x80)
173                 *r++ = (char)k;
174             else {
175               /* The return value of sprintf() is unportable.
176                * In modern systems it returns (int) the number of characters,
177                * but in older systems it might return (char*) the original
178                * buffer, or it might even be (void).  The easiest portable
179                * thing to do is probably use sprintf() in void context and
180                * then strlen(buffer) for the length.  The more proper way
181                * would of course be to figure out the prototype of sprintf.
182                * --jhi */
183                 sprintf(r, "\\x{%"UVxf"}", k);
184                 r += strlen(r);
185             }
186         }
187         *r++ = '"';
188     } else {
189         /* Single quotes.  */
190         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
191                 + qq_escapables + normal);
192         rstart = r = SvPVX(sv) + cur;
193         *r++ = '\'';
194         for (s = src; s < send; s ++) {
195             char k = *s;
196             if (k == '\'' || k == '\\')
197                 *r++ = '\\';
198             *r++ = k;
199         }
200         *r++ = '\'';
201     }
202     *r = '\0';
203     j = r - rstart;
204     SvCUR_set(sv, cur + j);
205
206     return j;
207 }
208
209 /* append a repeated string to an SV */
210 static SV *
211 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
212 {
213     if (sv == Nullsv)
214         sv = newSVpvn("", 0);
215     else
216         assert(SvTYPE(sv) >= SVt_PV);
217
218     if (n > 0) {
219         SvGROW(sv, len*n + SvCUR(sv) + 1);
220         if (len == 1) {
221             char *start = SvPVX(sv) + SvCUR(sv);
222             SvCUR_set(sv, SvCUR(sv) + n);
223             start[n] = '\0';
224             while (n > 0)
225                 start[--n] = str[0];
226         }
227         else
228             while (n > 0) {
229                 sv_catpvn(sv, str, len);
230                 --n;
231             }
232     }
233     return sv;
234 }
235
236 /*
237  * This ought to be split into smaller functions. (it is one long function since
238  * it exactly parallels the perl version, which was one long thing for
239  * efficiency raisins.)  Ugggh!
240  */
241 static I32
242 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
243         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
244         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
245         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
246 {
247     char tmpbuf[128];
248     U32 i;
249     char *c, *r, *realpack, id[128];
250     SV **svp;
251     SV *sv, *ipad, *ival;
252     SV *blesspad = Nullsv;
253     AV *seenentry = Nullav;
254     char *iname;
255     STRLEN inamelen, idlen = 0;
256     U32 realtype;
257
258     if (!val)
259         return 0;
260
261     realtype = SvTYPE(val);
262
263     if (SvGMAGICAL(val))
264         mg_get(val);
265     if (SvROK(val)) {
266
267         /* If a freeze method is provided and the object has it, call
268            it.  Warn on errors. */
269         if (SvOBJECT(SvRV(val)) && freezer &&
270             SvPOK(freezer) && SvCUR(freezer) &&
271             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
272                          SvCUR(freezer), -1) != NULL)
273         {
274             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
275             XPUSHs(val); PUTBACK;
276             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
277             SPAGAIN;
278             if (SvTRUE(ERRSV))
279                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
280             PUTBACK; FREETMPS; LEAVE;
281         }
282         
283         ival = SvRV(val);
284         realtype = SvTYPE(ival);
285         (void) sprintf(id, "0x%"UVxf, PTR2UV(ival));
286         idlen = strlen(id);
287         if (SvOBJECT(ival))
288             realpack = HvNAME_get(SvSTASH(ival));
289         else
290             realpack = Nullch;
291
292         /* if it has a name, we need to either look it up, or keep a tab
293          * on it so we know when we hit it later
294          */
295         if (namelen) {
296             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
297                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
298             {
299                 SV *othername;
300                 if ((svp = av_fetch(seenentry, 0, FALSE))
301                     && (othername = *svp))
302                 {
303                     if (purity && *levelp > 0) {
304                         SV *postentry;
305                         
306                         if (realtype == SVt_PVHV)
307                             sv_catpvn(retval, "{}", 2);
308                         else if (realtype == SVt_PVAV)
309                             sv_catpvn(retval, "[]", 2);
310                         else
311                             sv_catpvn(retval, "do{my $o}", 9);
312                         postentry = newSVpvn(name, namelen);
313                         sv_catpvn(postentry, " = ", 3);
314                         sv_catsv(postentry, othername);
315                         av_push(postav, postentry);
316                     }
317                     else {
318                         if (name[0] == '@' || name[0] == '%') {
319                             if ((SvPVX_const(othername))[0] == '\\' &&
320                                 (SvPVX_const(othername))[1] == name[0]) {
321                                 sv_catpvn(retval, SvPVX_const(othername)+1,
322                                           SvCUR(othername)-1);
323                             }
324                             else {
325                                 sv_catpvn(retval, name, 1);
326                                 sv_catpvn(retval, "{", 1);
327                                 sv_catsv(retval, othername);
328                                 sv_catpvn(retval, "}", 1);
329                             }
330                         }
331                         else
332                             sv_catsv(retval, othername);
333                     }
334                     return 1;
335                 }
336                 else {
337                     warn("ref name not found for %s", id);
338                     return 0;
339                 }
340             }
341             else {   /* store our name and continue */
342                 SV *namesv;
343                 if (name[0] == '@' || name[0] == '%') {
344                     namesv = newSVpvn("\\", 1);
345                     sv_catpvn(namesv, name, namelen);
346                 }
347                 else if (realtype == SVt_PVCV && name[0] == '*') {
348                     namesv = newSVpvn("\\", 2);
349                     sv_catpvn(namesv, name, namelen);
350                     (SvPVX(namesv))[1] = '&';
351                 }
352                 else
353                     namesv = newSVpvn(name, namelen);
354                 seenentry = newAV();
355                 av_push(seenentry, namesv);
356                 (void)SvREFCNT_inc(val);
357                 av_push(seenentry, val);
358                 (void)hv_store(seenhv, id, strlen(id),
359                                newRV_inc((SV*)seenentry), 0);
360                 SvREFCNT_dec(seenentry);
361             }
362         }
363
364         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
365             STRLEN rlen;
366             char *rval = SvPV(val, rlen);
367             char *slash = strchr(rval, '/');
368             sv_catpvn(retval, "qr/", 3);
369             while (slash) {
370                 sv_catpvn(retval, rval, slash-rval);
371                 sv_catpvn(retval, "\\/", 2);
372                 rlen -= slash-rval+1;
373                 rval = slash+1;
374                 slash = strchr(rval, '/');
375             }
376             sv_catpvn(retval, rval, rlen);
377             sv_catpvn(retval, "/", 1);
378             return 1;
379         }
380
381         /* If purity is not set and maxdepth is set, then check depth:
382          * if we have reached maximum depth, return the string
383          * representation of the thing we are currently examining
384          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
385          */
386         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
387             STRLEN vallen;
388             char *valstr = SvPV(val,vallen);
389             sv_catpvn(retval, "'", 1);
390             sv_catpvn(retval, valstr, vallen);
391             sv_catpvn(retval, "'", 1);
392             return 1;
393         }
394
395         if (realpack) {                         /* we have a blessed ref */
396             STRLEN blesslen;
397             char *blessstr = SvPV(bless, blesslen);
398             sv_catpvn(retval, blessstr, blesslen);
399             sv_catpvn(retval, "( ", 2);
400             if (indent >= 2) {
401                 blesspad = apad;
402                 apad = newSVsv(apad);
403                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
404             }
405         }
406
407         (*levelp)++;
408         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
409
410         if (realtype <= SVt_PVBM) {                          /* scalar ref */
411             SV *namesv = newSVpvn("${", 2);
412             sv_catpvn(namesv, name, namelen);
413             sv_catpvn(namesv, "}", 1);
414             if (realpack) {                                  /* blessed */
415                 sv_catpvn(retval, "do{\\(my $o = ", 13);
416                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
417                         postav, levelp, indent, pad, xpad, apad, sep, pair,
418                         freezer, toaster, purity, deepcopy, quotekeys, bless,
419                         maxdepth, sortkeys);
420                 sv_catpvn(retval, ")}", 2);
421             }                                                /* plain */
422             else {
423                 sv_catpvn(retval, "\\", 1);
424                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
425                         postav, levelp, indent, pad, xpad, apad, sep, pair,
426                         freezer, toaster, purity, deepcopy, quotekeys, bless,
427                         maxdepth, sortkeys);
428             }
429             SvREFCNT_dec(namesv);
430         }
431         else if (realtype == SVt_PVGV) {                     /* glob ref */
432             SV *namesv = newSVpvn("*{", 2);
433             sv_catpvn(namesv, name, namelen);
434             sv_catpvn(namesv, "}", 1);
435             sv_catpvn(retval, "\\", 1);
436             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
437                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
438                     freezer, toaster, purity, deepcopy, quotekeys, bless,
439                     maxdepth, sortkeys);
440             SvREFCNT_dec(namesv);
441         }
442         else if (realtype == SVt_PVAV) {
443             SV *totpad;
444             I32 ix = 0;
445             I32 ixmax = av_len((AV *)ival);
446         
447             SV *ixsv = newSViv(0);
448             /* allowing for a 24 char wide array index */
449             New(0, iname, namelen+28, char);
450             (void)strcpy(iname, name);
451             inamelen = namelen;
452             if (name[0] == '@') {
453                 sv_catpvn(retval, "(", 1);
454                 iname[0] = '$';
455             }
456             else {
457                 sv_catpvn(retval, "[", 1);
458                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
459                 /*if (namelen > 0
460                     && name[namelen-1] != ']' && name[namelen-1] != '}'
461                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
462                 if ((namelen > 0
463                      && name[namelen-1] != ']' && name[namelen-1] != '}')
464                     || (namelen > 4
465                         && (name[1] == '{'
466                             || (name[0] == '\\' && name[2] == '{'))))
467                 {
468                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
469                     iname[inamelen] = '\0';
470                 }
471             }
472             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
473                 (instr(iname+inamelen-8, "{SCALAR}") ||
474                  instr(iname+inamelen-7, "{ARRAY}") ||
475                  instr(iname+inamelen-6, "{HASH}"))) {
476                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
477             }
478             iname[inamelen++] = '['; iname[inamelen] = '\0';
479             totpad = newSVsv(sep);
480             sv_catsv(totpad, pad);
481             sv_catsv(totpad, apad);
482
483             for (ix = 0; ix <= ixmax; ++ix) {
484                 STRLEN ilen;
485                 SV *elem;
486                 svp = av_fetch((AV*)ival, ix, FALSE);
487                 if (svp)
488                     elem = *svp;
489                 else
490                     elem = &PL_sv_undef;
491                 
492                 ilen = inamelen;
493                 sv_setiv(ixsv, ix);
494                 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
495                 ilen = strlen(iname);
496                 iname[ilen++] = ']'; iname[ilen] = '\0';
497                 if (indent >= 3) {
498                     sv_catsv(retval, totpad);
499                     sv_catsv(retval, ipad);
500                     sv_catpvn(retval, "#", 1);
501                     sv_catsv(retval, ixsv);
502                 }
503                 sv_catsv(retval, totpad);
504                 sv_catsv(retval, ipad);
505                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
506                         levelp, indent, pad, xpad, apad, sep, pair,
507                         freezer, toaster, purity, deepcopy, quotekeys, bless,
508                         maxdepth, sortkeys);
509                 if (ix < ixmax)
510                     sv_catpvn(retval, ",", 1);
511             }
512             if (ixmax >= 0) {
513                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
514                 sv_catsv(retval, totpad);
515                 sv_catsv(retval, opad);
516                 SvREFCNT_dec(opad);
517             }
518             if (name[0] == '@')
519                 sv_catpvn(retval, ")", 1);
520             else
521                 sv_catpvn(retval, "]", 1);
522             SvREFCNT_dec(ixsv);
523             SvREFCNT_dec(totpad);
524             Safefree(iname);
525         }
526         else if (realtype == SVt_PVHV) {
527             SV *totpad, *newapad;
528             SV *iname, *sname;
529             HE *entry;
530             char *key;
531             I32 klen;
532             SV *hval;
533             AV *keys = Nullav;
534         
535             iname = newSVpvn(name, namelen);
536             if (name[0] == '%') {
537                 sv_catpvn(retval, "(", 1);
538                 (SvPVX(iname))[0] = '$';
539             }
540             else {
541                 sv_catpvn(retval, "{", 1);
542                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
543                 if ((namelen > 0
544                      && name[namelen-1] != ']' && name[namelen-1] != '}')
545                     || (namelen > 4
546                         && (name[1] == '{'
547                             || (name[0] == '\\' && name[2] == '{'))))
548                 {
549                     sv_catpvn(iname, "->", 2);
550                 }
551             }
552             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
553                 (instr(name+namelen-8, "{SCALAR}") ||
554                  instr(name+namelen-7, "{ARRAY}") ||
555                  instr(name+namelen-6, "{HASH}"))) {
556                 sv_catpvn(iname, "->", 2);
557             }
558             sv_catpvn(iname, "{", 1);
559             totpad = newSVsv(sep);
560             sv_catsv(totpad, pad);
561             sv_catsv(totpad, apad);
562         
563             /* If requested, get a sorted/filtered array of hash keys */
564             if (sortkeys) {
565                 if (sortkeys == &PL_sv_yes) {
566 #if PERL_VERSION < 8
567                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
568 #else
569                     keys = newAV();
570                     (void)hv_iterinit((HV*)ival);
571                     while ((entry = hv_iternext((HV*)ival))) {
572                         sv = hv_iterkeysv(entry);
573                         SvREFCNT_inc(sv);
574                         av_push(keys, sv);
575                     }
576 # ifdef USE_LOCALE_NUMERIC
577                     sortsv(AvARRAY(keys), 
578                            av_len(keys)+1, 
579                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
580 # else
581                     sortsv(AvARRAY(keys), 
582                            av_len(keys)+1, 
583                            Perl_sv_cmp);
584 # endif
585 #endif
586                 }
587                 if (sortkeys != &PL_sv_yes) {
588                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
589                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
590                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
591                     SPAGAIN;
592                     if (i) {
593                         sv = POPs;
594                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
595                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
596                     }
597                     if (! keys)
598                         warn("Sortkeys subroutine did not return ARRAYREF\n");
599                     PUTBACK; FREETMPS; LEAVE;
600                 }
601                 if (keys)
602                     sv_2mortal((SV*)keys);
603             }
604             else
605                 (void)hv_iterinit((HV*)ival);
606
607             /* foreach (keys %hash) */
608             for (i = 0; 1; i++) {
609                 char *nkey;
610                 char *nkey_buffer = NULL;
611                 I32 nticks = 0;
612                 SV* keysv;
613                 STRLEN keylen;
614                 I32 nlen;
615                 bool do_utf8 = FALSE;
616
617                if (sortkeys) {
618                    if (!(keys && (I32)i <= av_len(keys))) break;
619                } else {
620                    if (!(entry = hv_iternext((HV *)ival))) break;
621                }
622
623                 if (i)
624                     sv_catpvn(retval, ",", 1);
625
626                 if (sortkeys) {
627                     char *key;
628                     svp = av_fetch(keys, i, FALSE);
629                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
630                     key = SvPV(keysv, keylen);
631                     svp = hv_fetch((HV*)ival, key,
632                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
633                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
634                 }
635                 else {
636                     keysv = hv_iterkeysv(entry);
637                     hval = hv_iterval((HV*)ival, entry);
638                 }
639
640                 do_utf8 = DO_UTF8(keysv);
641                 key = SvPV(keysv, keylen);
642                 klen = keylen;
643
644                 sv_catsv(retval, totpad);
645                 sv_catsv(retval, ipad);
646                 /* old logic was first to check utf8 flag, and if utf8 always
647                    call esc_q_utf8.  This caused test to break under -Mutf8,
648                    because there even strings like 'c' have utf8 flag on.
649                    Hence with quotekeys == 0 the XS code would still '' quote
650                    them based on flags, whereas the perl code would not,
651                    based on regexps.
652                    The perl code is correct.
653                    needs_quote() decides that anything that isn't a valid
654                    perl identifier needs to be quoted, hence only correctly
655                    formed strings with no characters outside [A-Za-z0-9_:]
656                    won't need quoting.  None of those characters are used in
657                    the byte encoding of utf8, so anything with utf8
658                    encoded characters in will need quoting. Hence strings
659                    with utf8 encoded characters in will end up inside do_utf8
660                    just like before, but now strings with utf8 flag set but
661                    only ascii characters will end up in the unquoted section.
662
663                    There should also be less tests for the (probably currently)
664                    more common doesn't need quoting case.
665                    The code is also smaller (22044 vs 22260) because I've been
666                    able to pull the common logic out to both sides.  */
667                 if (quotekeys || needs_quote(key)) {
668                     if (do_utf8) {
669                         STRLEN ocur = SvCUR(retval);
670                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
671                         nkey = SvPVX(retval) + ocur;
672                     }
673                     else {
674                         nticks = num_q(key, klen);
675                         New(0, nkey_buffer, klen+nticks+3, char);
676                         nkey = nkey_buffer;
677                         nkey[0] = '\'';
678                         if (nticks)
679                             klen += esc_q(nkey+1, key, klen);
680                         else
681                             (void)Copy(key, nkey+1, klen, char);
682                         nkey[++klen] = '\'';
683                         nkey[++klen] = '\0';
684                         nlen = klen;
685                         sv_catpvn(retval, nkey, klen);
686                     }
687                 }
688                 else {
689                     nkey = key;
690                     nlen = klen;
691                     sv_catpvn(retval, nkey, klen);
692                 }
693                 sname = newSVsv(iname);
694                 sv_catpvn(sname, nkey, nlen);
695                 sv_catpvn(sname, "}", 1);
696
697                 sv_catsv(retval, pair);
698                 if (indent >= 2) {
699                     char *extra;
700                     I32 elen = 0;
701                     newapad = newSVsv(apad);
702                     New(0, extra, klen+4+1, char);
703                     while (elen < (klen+4))
704                         extra[elen++] = ' ';
705                     extra[elen] = '\0';
706                     sv_catpvn(newapad, extra, elen);
707                     Safefree(extra);
708                 }
709                 else
710                     newapad = apad;
711
712                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
713                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
714                         freezer, toaster, purity, deepcopy, quotekeys, bless,
715                         maxdepth, sortkeys);
716                 SvREFCNT_dec(sname);
717                 Safefree(nkey_buffer);
718                 if (indent >= 2)
719                     SvREFCNT_dec(newapad);
720             }
721             if (i) {
722                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
723                 sv_catsv(retval, totpad);
724                 sv_catsv(retval, opad);
725                 SvREFCNT_dec(opad);
726             }
727             if (name[0] == '%')
728                 sv_catpvn(retval, ")", 1);
729             else
730                 sv_catpvn(retval, "}", 1);
731             SvREFCNT_dec(iname);
732             SvREFCNT_dec(totpad);
733         }
734         else if (realtype == SVt_PVCV) {
735             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
736             if (purity)
737                 warn("Encountered CODE ref, using dummy placeholder");
738         }
739         else {
740             warn("cannot handle ref type %ld", realtype);
741         }
742
743         if (realpack) {  /* free blessed allocs */
744             if (indent >= 2) {
745                 SvREFCNT_dec(apad);
746                 apad = blesspad;
747             }
748             sv_catpvn(retval, ", '", 3);
749             sv_catpvn(retval, realpack, strlen(realpack));
750             sv_catpvn(retval, "' )", 3);
751             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
752                 sv_catpvn(retval, "->", 2);
753                 sv_catsv(retval, toaster);
754                 sv_catpvn(retval, "()", 2);
755             }
756         }
757         SvREFCNT_dec(ipad);
758         (*levelp)--;
759     }
760     else {
761         STRLEN i;
762         
763         if (namelen) {
764             (void) sprintf(id, "0x%"UVxf, PTR2UV(val));
765             if ((svp = hv_fetch(seenhv, id, (idlen = strlen(id)), FALSE)) &&
766                 (sv = *svp) && SvROK(sv) &&
767                 (seenentry = (AV*)SvRV(sv)))
768             {
769                 SV *othername;
770                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
771                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
772                 {
773                     sv_catpvn(retval, "${", 2);
774                     sv_catsv(retval, othername);
775                     sv_catpvn(retval, "}", 1);
776                     return 1;
777                 }
778             }
779             else if (val != &PL_sv_undef) {
780                 SV *namesv;
781                 namesv = newSVpvn("\\", 1);
782                 sv_catpvn(namesv, name, namelen);
783                 seenentry = newAV();
784                 av_push(seenentry, namesv);
785                 av_push(seenentry, newRV_inc(val));
786                 (void)hv_store(seenhv, id, strlen(id), newRV_inc((SV*)seenentry), 0);
787                 SvREFCNT_dec(seenentry);
788             }
789         }
790
791         if (DD_is_integer(val)) {
792             STRLEN len;
793             if (SvIsUV(val))
794               (void) sprintf(tmpbuf, "%"UVuf, SvUV(val));
795             else
796               (void) sprintf(tmpbuf, "%"IVdf, SvIV(val));
797             len = strlen(tmpbuf);
798             if (SvPOK(val)) {
799               /* Need to check to see if this is a string such as " 0".
800                  I'm assuming from sprintf isn't going to clash with utf8.
801                  Is this valid on EBCDIC?  */
802               STRLEN pvlen;
803               const char *pv = SvPV(val, pvlen);
804               if (pvlen != len || memNE(pv, tmpbuf, len))
805                 goto integer_came_from_string;
806             }
807             if (len > 10) {
808               /* Looks like we're on a 64 bit system.  Make it a string so that
809                  if a 32 bit system reads the number it will cope better.  */
810               sv_catpvf(retval, "'%s'", tmpbuf);
811             } else
812               sv_catpvn(retval, tmpbuf, len);
813         }
814         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
815             c = SvPV(val, i);
816             ++c; --i;                   /* just get the name */
817             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
818                 c += 4;
819                 i -= 4;
820             }
821             if (needs_quote(c)) {
822                 sv_grow(retval, SvCUR(retval)+6+2*i);
823                 r = SvPVX(retval)+SvCUR(retval);
824                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
825                 i += esc_q(r+3, c, i);
826                 i += 3;
827                 r[i++] = '\''; r[i++] = '}';
828                 r[i] = '\0';
829             }
830             else {
831                 sv_grow(retval, SvCUR(retval)+i+2);
832                 r = SvPVX(retval)+SvCUR(retval);
833                 r[0] = '*'; strcpy(r+1, c);
834                 i++;
835             }
836             SvCUR_set(retval, SvCUR(retval)+i);
837
838             if (purity) {
839                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
840                 static const STRLEN sizes[] = { 8, 7, 6 };
841                 SV *e;
842                 SV *nname = newSVpvn("", 0);
843                 SV *newapad = newSVpvn("", 0);
844                 GV *gv = (GV*)val;
845                 I32 j;
846                 
847                 for (j=0; j<3; j++) {
848                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
849                     if (!e)
850                         continue;
851                     if (j == 0 && !SvOK(e))
852                         continue;
853
854                     {
855                         I32 nlevel = 0;
856                         SV *postentry = newSVpvn(r,i);
857                         
858                         sv_setsv(nname, postentry);
859                         sv_catpvn(nname, entries[j], sizes[j]);
860                         sv_catpvn(postentry, " = ", 3);
861                         av_push(postav, postentry);
862                         e = newRV_inc(e);
863                         
864                         SvCUR_set(newapad, 0);
865                         if (indent >= 2)
866                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
867                         
868                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
869                                 seenhv, postav, &nlevel, indent, pad, xpad,
870                                 newapad, sep, pair, freezer, toaster, purity,
871                                 deepcopy, quotekeys, bless, maxdepth, 
872                                 sortkeys);
873                         SvREFCNT_dec(e);
874                     }
875                 }
876                 
877                 SvREFCNT_dec(newapad);
878                 SvREFCNT_dec(nname);
879             }
880         }
881         else if (val == &PL_sv_undef || !SvOK(val)) {
882             sv_catpvn(retval, "undef", 5);
883         }
884         else {
885         integer_came_from_string:
886             c = SvPV(val, i);
887             if (DO_UTF8(val))
888                 i += esc_q_utf8(aTHX_ retval, c, i);
889             else {
890                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
891                 r = SvPVX(retval) + SvCUR(retval);
892                 r[0] = '\'';
893                 i += esc_q(r+1, c, i);
894                 ++i;
895                 r[i++] = '\'';
896                 r[i] = '\0';
897                 SvCUR_set(retval, SvCUR(retval)+i);
898             }
899         }
900     }
901
902     if (idlen) {
903         if (deepcopy)
904             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
905         else if (namelen && seenentry) {
906             SV *mark = *av_fetch(seenentry, 2, TRUE);
907             sv_setiv(mark,1);
908         }
909     }
910     return 1;
911 }
912
913
914 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
915
916 #
917 # This is the exact equivalent of Dump.  Well, almost. The things that are
918 # different as of now (due to Laziness):
919 #   * doesnt do double-quotes yet.
920 #
921
922 void
923 Data_Dumper_Dumpxs(href, ...)
924         SV      *href;
925         PROTOTYPE: $;$$
926         PPCODE:
927         {
928             HV *hv;
929             SV *retval, *valstr;
930             HV *seenhv = Nullhv;
931             AV *postav, *todumpav, *namesav;
932             I32 level = 0;
933             I32 indent, terse, i, imax, postlen;
934             SV **svp;
935             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
936             SV *freezer, *toaster, *bless, *sortkeys;
937             I32 purity, deepcopy, quotekeys, maxdepth = 0;
938             char tmpbuf[1024];
939             I32 gimme = GIMME;
940
941             if (!SvROK(href)) {         /* call new to get an object first */
942                 if (items < 2)
943                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
944                 
945                 ENTER;
946                 SAVETMPS;
947                 
948                 PUSHMARK(sp);
949                 XPUSHs(href);
950                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
951                 if (items >= 3)
952                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
953                 PUTBACK;
954                 i = perl_call_method("new", G_SCALAR);
955                 SPAGAIN;
956                 if (i)
957                     href = newSVsv(POPs);
958
959                 PUTBACK;
960                 FREETMPS;
961                 LEAVE;
962                 if (i)
963                     (void)sv_2mortal(href);
964             }
965
966             todumpav = namesav = Nullav;
967             seenhv = Nullhv;
968             val = pad = xpad = apad = sep = pair = varname
969                 = freezer = toaster = bless = &PL_sv_undef;
970             name = sv_newmortal();
971             indent = 2;
972             terse = purity = deepcopy = 0;
973             quotekeys = 1;
974         
975             retval = newSVpvn("", 0);
976             if (SvROK(href)
977                 && (hv = (HV*)SvRV((SV*)href))
978                 && SvTYPE(hv) == SVt_PVHV)              {
979
980                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
981                     seenhv = (HV*)SvRV(*svp);
982                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
983                     todumpav = (AV*)SvRV(*svp);
984                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
985                     namesav = (AV*)SvRV(*svp);
986                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
987                     indent = SvIV(*svp);
988                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
989                     purity = SvIV(*svp);
990                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
991                     terse = SvTRUE(*svp);
992 #if 0 /* useqq currently unused */
993                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
994                     useqq = SvTRUE(*svp);
995 #endif
996                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
997                     pad = *svp;
998                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
999                     xpad = *svp;
1000                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1001                     apad = *svp;
1002                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1003                     sep = *svp;
1004                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1005                     pair = *svp;
1006                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1007                     varname = *svp;
1008                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1009                     freezer = *svp;
1010                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1011                     toaster = *svp;
1012                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1013                     deepcopy = SvTRUE(*svp);
1014                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1015                     quotekeys = SvTRUE(*svp);
1016                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1017                     bless = *svp;
1018                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1019                     maxdepth = SvIV(*svp);
1020                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1021                     sortkeys = *svp;
1022                     if (! SvTRUE(sortkeys))
1023                         sortkeys = NULL;
1024                     else if (! (SvROK(sortkeys) &&
1025                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1026                     {
1027                         /* flag to use qsortsv() for sorting hash keys */       
1028                         sortkeys = &PL_sv_yes; 
1029                     }
1030                 }
1031                 postav = newAV();
1032
1033                 if (todumpav)
1034                     imax = av_len(todumpav);
1035                 else
1036                     imax = -1;
1037                 valstr = newSVpvn("",0);
1038                 for (i = 0; i <= imax; ++i) {
1039                     SV *newapad;
1040                 
1041                     av_clear(postav);
1042                     if ((svp = av_fetch(todumpav, i, FALSE)))
1043                         val = *svp;
1044                     else
1045                         val = &PL_sv_undef;
1046                     if ((svp = av_fetch(namesav, i, TRUE))) {
1047                         sv_setsv(name, *svp);
1048                         if (SvOK(*svp) && !SvPOK(*svp))
1049                             (void)SvPV_nolen_const(name);
1050                     }
1051                     else
1052                         (void)SvOK_off(name);
1053                 
1054                     if (SvPOK(name)) {
1055                         if ((SvPVX_const(name))[0] == '*') {
1056                             if (SvROK(val)) {
1057                                 switch (SvTYPE(SvRV(val))) {
1058                                 case SVt_PVAV:
1059                                     (SvPVX(name))[0] = '@';
1060                                     break;
1061                                 case SVt_PVHV:
1062                                     (SvPVX(name))[0] = '%';
1063                                     break;
1064                                 case SVt_PVCV:
1065                                     (SvPVX(name))[0] = '*';
1066                                     break;
1067                                 default:
1068                                     (SvPVX(name))[0] = '$';
1069                                     break;
1070                                 }
1071                             }
1072                             else
1073                                 (SvPVX(name))[0] = '$';
1074                         }
1075                         else if ((SvPVX_const(name))[0] != '$')
1076                             sv_insert(name, 0, 0, "$", 1);
1077                     }
1078                     else {
1079                         STRLEN nchars = 0;
1080                         sv_setpvn(name, "$", 1);
1081                         sv_catsv(name, varname);
1082                         (void) sprintf(tmpbuf, "%"IVdf, (IV)(i+1));
1083                         nchars = strlen(tmpbuf);
1084                         sv_catpvn(name, tmpbuf, nchars);
1085                     }
1086                 
1087                     if (indent >= 2) {
1088                         SV *tmpsv = sv_x(aTHX_ Nullsv, " ", 1, SvCUR(name)+3);
1089                         newapad = newSVsv(apad);
1090                         sv_catsv(newapad, tmpsv);
1091                         SvREFCNT_dec(tmpsv);
1092                     }
1093                     else
1094                         newapad = apad;
1095                 
1096                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1097                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1098                             freezer, toaster, purity, deepcopy, quotekeys,
1099                             bless, maxdepth, sortkeys);
1100                 
1101                     if (indent >= 2)
1102                         SvREFCNT_dec(newapad);
1103
1104                     postlen = av_len(postav);
1105                     if (postlen >= 0 || !terse) {
1106                         sv_insert(valstr, 0, 0, " = ", 3);
1107                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1108                         sv_catpvn(valstr, ";", 1);
1109                     }
1110                     sv_catsv(retval, pad);
1111                     sv_catsv(retval, valstr);
1112                     sv_catsv(retval, sep);
1113                     if (postlen >= 0) {
1114                         I32 i;
1115                         sv_catsv(retval, pad);
1116                         for (i = 0; i <= postlen; ++i) {
1117                             SV *elem;
1118                             svp = av_fetch(postav, i, FALSE);
1119                             if (svp && (elem = *svp)) {
1120                                 sv_catsv(retval, elem);
1121                                 if (i < postlen) {
1122                                     sv_catpvn(retval, ";", 1);
1123                                     sv_catsv(retval, sep);
1124                                     sv_catsv(retval, pad);
1125                                 }
1126                             }
1127                         }
1128                         sv_catpvn(retval, ";", 1);
1129                             sv_catsv(retval, sep);
1130                     }
1131                     sv_setpvn(valstr, "", 0);
1132                     if (gimme == G_ARRAY) {
1133                         XPUSHs(sv_2mortal(retval));
1134                         if (i < imax)   /* not the last time thro ? */
1135                             retval = newSVpvn("",0);
1136                     }
1137                 }
1138                 SvREFCNT_dec(postav);
1139                 SvREFCNT_dec(valstr);
1140             }
1141             else
1142                 croak("Call to new() method failed to return HASH ref");
1143             if (gimme == G_SCALAR)
1144                 XPUSHs(sv_2mortal(retval));
1145         }