b09a023bcd2bacf04797135add0eb62b6824a897
[p5sagit/p5-mst-13.2.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5 #include "ppport.h"
6
7 #if PERL_VERSION < 6
8 #  define DD_USE_OLD_ID_FORMAT
9 #endif
10
11 static I32 num_q (const char *s, STRLEN slen);
12 static I32 esc_q (char *dest, const char *src, STRLEN slen);
13 static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
14 static I32 needs_quote(register const char *s);
15 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
16 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
17                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
18                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
19                     SV *freezer, SV *toaster,
20                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
21                     I32 maxdepth, SV *sortkeys);
22
23 #ifndef HvNAME_get
24 #define HvNAME_get HvNAME
25 #endif
26
27 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
28
29 # ifdef EBCDIC
30 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
31 # else
32 #  define UNI_TO_NATIVE(ch) (ch)
33 # endif
34
35 UV
36 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
37 {
38     const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
39                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
40     return UNI_TO_NATIVE(uv);
41 }
42
43 # if !defined(PERL_IMPLICIT_CONTEXT)
44 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
45 # else
46 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
47 # endif
48
49 #endif /* PERL_VERSION <= 6 */
50
51 /* Changes in 5.7 series mean that now IOK is only set if scalar is
52    precisely integer but in 5.6 and earlier we need to do a more
53    complex test  */
54 #if PERL_VERSION <= 6
55 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
56 #else
57 #define DD_is_integer(sv) SvIOK(sv)
58 #endif
59
60 /* does a string need to be protected? */
61 static I32
62 needs_quote(register const char *s)
63 {
64 TOP:
65     if (s[0] == ':') {
66         if (*++s) {
67             if (*s++ != ':')
68                 return 1;
69         }
70         else
71             return 1;
72     }
73     if (isIDFIRST(*s)) {
74         while (*++s)
75             if (!isALNUM(*s)) {
76                 if (*s == ':')
77                     goto TOP;
78                 else
79                     return 1;
80             }
81     }
82     else
83         return 1;
84     return 0;
85 }
86
87 /* count the number of "'"s and "\"s in string */
88 static I32
89 num_q(register const char *s, register STRLEN slen)
90 {
91     register I32 ret = 0;
92
93     while (slen > 0) {
94         if (*s == '\'' || *s == '\\')
95             ++ret;
96         ++s;
97         --slen;
98     }
99     return ret;
100 }
101
102
103 /* returns number of chars added to escape "'"s and "\"s in s */
104 /* slen number of characters in s will be escaped */
105 /* destination must be long enough for additional chars */
106 static I32
107 esc_q(register char *d, register const char *s, register STRLEN slen)
108 {
109     register I32 ret = 0;
110
111     while (slen > 0) {
112         switch (*s) {
113         case '\'':
114         case '\\':
115             *d = '\\';
116             ++d; ++ret;
117         default:
118             *d = *s;
119             ++d; ++s; --slen;
120             break;
121         }
122     }
123     return ret;
124 }
125
126 static I32
127 esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
128 {
129     char *r, *rstart;
130     const char *s = src;
131     const char * const send = src + slen;
132     STRLEN j, cur = SvCUR(sv);
133     /* Could count 128-255 and 256+ in two variables, if we want to
134        be like &qquote and make a distinction.  */
135     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
136     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
137     STRLEN backslashes = 0;
138     STRLEN single_quotes = 0;
139     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
140     STRLEN normal = 0;
141
142     /* this will need EBCDICification */
143     for (s = src; s < send; s += UTF8SKIP(s)) {
144         const UV k = utf8_to_uvchr((U8*)s, NULL);
145
146 #ifdef EBCDIC
147         if (!isprint(k) || k > 256) {
148 #else
149         if (k > 127) {
150 #endif
151             /* 4: \x{} then count the number of hex digits.  */
152             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
153 #if UVSIZE == 4
154                 8 /* We may allocate a bit more than the minimum here.  */
155 #else
156                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
157 #endif
158                 );
159         } else if (k == '\\') {
160             backslashes++;
161         } else if (k == '\'') {
162             single_quotes++;
163         } else if (k == '"' || k == '$' || k == '@') {
164             qq_escapables++;
165         } else {
166             normal++;
167         }
168     }
169     if (grow) {
170         /* We have something needing hex. 3 is ""\0 */
171         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
172                 + 2*qq_escapables + normal);
173         rstart = r = SvPVX(sv) + cur;
174
175         *r++ = '"';
176
177         for (s = src; s < send; s += UTF8SKIP(s)) {
178             const UV k = utf8_to_uvchr((U8*)s, NULL);
179
180             if (k == '"' || k == '\\' || k == '$' || k == '@') {
181                 *r++ = '\\';
182                 *r++ = (char)k;
183             }
184             else
185 #ifdef EBCDIC
186               if (isprint(k) && k < 256)
187 #else
188               if (k < 0x80)
189 #endif
190                 *r++ = (char)k;
191             else {
192                 r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
193             }
194         }
195         *r++ = '"';
196     } else {
197         /* Single quotes.  */
198         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
199                 + qq_escapables + normal);
200         rstart = r = SvPVX(sv) + cur;
201         *r++ = '\'';
202         for (s = src; s < send; s ++) {
203             const char k = *s;
204             if (k == '\'' || k == '\\')
205                 *r++ = '\\';
206             *r++ = k;
207         }
208         *r++ = '\'';
209     }
210     *r = '\0';
211     j = r - rstart;
212     SvCUR_set(sv, cur + j);
213
214     return j;
215 }
216
217 /* append a repeated string to an SV */
218 static SV *
219 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
220 {
221     if (!sv)
222         sv = newSVpvn("", 0);
223 #ifdef DEBUGGING
224     else
225         assert(SvTYPE(sv) >= SVt_PV);
226 #endif
227
228     if (n > 0) {
229         SvGROW(sv, len*n + SvCUR(sv) + 1);
230         if (len == 1) {
231             char * const start = SvPVX(sv) + SvCUR(sv);
232             SvCUR_set(sv, SvCUR(sv) + n);
233             start[n] = '\0';
234             while (n > 0)
235                 start[--n] = str[0];
236         }
237         else
238             while (n > 0) {
239                 sv_catpvn(sv, str, len);
240                 --n;
241             }
242     }
243     return sv;
244 }
245
246 /*
247  * This ought to be split into smaller functions. (it is one long function since
248  * it exactly parallels the perl version, which was one long thing for
249  * efficiency raisins.)  Ugggh!
250  */
251 static I32
252 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
253         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
254         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
255         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
256 {
257     char tmpbuf[128];
258     U32 i;
259     char *c, *r, *realpack;
260 #ifdef DD_USE_OLD_ID_FORMAT
261     char id[128];
262 #else
263     UV id_buffer;
264     char *const id = (char *)&id_buffer;
265 #endif
266     SV **svp;
267     SV *sv, *ipad, *ival;
268     SV *blesspad = Nullsv;
269     AV *seenentry = NULL;
270     char *iname;
271     STRLEN inamelen, idlen = 0;
272     U32 realtype;
273
274     if (!val)
275         return 0;
276
277     /* If the ouput buffer has less than some arbitary amount of space
278        remaining, then enlarge it. For the test case (25M of output),
279        *1.1 was slower, *2.0 was the same, so the first guess of 1.5 is
280         deemed to be good enough.  */
281     if (SvTYPE(retval) >= SVt_PV && (SvLEN(retval) - SvCUR(retval)) < 42) {
282         sv_grow(retval, SvCUR(retval) * 3 / 2);
283     }
284
285     realtype = SvTYPE(val);
286
287     if (SvGMAGICAL(val))
288         mg_get(val);
289     if (SvROK(val)) {
290
291         /* If a freeze method is provided and the object has it, call
292            it.  Warn on errors. */
293         if (SvOBJECT(SvRV(val)) && freezer &&
294             SvPOK(freezer) && SvCUR(freezer) &&
295             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
296                          SvCUR(freezer), -1) != NULL)
297         {
298             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
299             XPUSHs(val); PUTBACK;
300             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
301             SPAGAIN;
302             if (SvTRUE(ERRSV))
303                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
304             PUTBACK; FREETMPS; LEAVE;
305         }
306         
307         ival = SvRV(val);
308         realtype = SvTYPE(ival);
309 #ifdef DD_USE_OLD_ID_FORMAT
310         idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(ival));
311 #else
312         id_buffer = PTR2UV(ival);
313         idlen = sizeof(id_buffer);
314 #endif
315         if (SvOBJECT(ival))
316             realpack = HvNAME_get(SvSTASH(ival));
317         else
318             realpack = Nullch;
319
320         /* if it has a name, we need to either look it up, or keep a tab
321          * on it so we know when we hit it later
322          */
323         if (namelen) {
324             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
325                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
326             {
327                 SV *othername;
328                 if ((svp = av_fetch(seenentry, 0, FALSE))
329                     && (othername = *svp))
330                 {
331                     if (purity && *levelp > 0) {
332                         SV *postentry;
333                         
334                         if (realtype == SVt_PVHV)
335                             sv_catpvn(retval, "{}", 2);
336                         else if (realtype == SVt_PVAV)
337                             sv_catpvn(retval, "[]", 2);
338                         else
339                             sv_catpvn(retval, "do{my $o}", 9);
340                         postentry = newSVpvn(name, namelen);
341                         sv_catpvn(postentry, " = ", 3);
342                         sv_catsv(postentry, othername);
343                         av_push(postav, postentry);
344                     }
345                     else {
346                         if (name[0] == '@' || name[0] == '%') {
347                             if ((SvPVX_const(othername))[0] == '\\' &&
348                                 (SvPVX_const(othername))[1] == name[0]) {
349                                 sv_catpvn(retval, SvPVX_const(othername)+1,
350                                           SvCUR(othername)-1);
351                             }
352                             else {
353                                 sv_catpvn(retval, name, 1);
354                                 sv_catpvn(retval, "{", 1);
355                                 sv_catsv(retval, othername);
356                                 sv_catpvn(retval, "}", 1);
357                             }
358                         }
359                         else
360                             sv_catsv(retval, othername);
361                     }
362                     return 1;
363                 }
364                 else {
365 #ifdef DD_USE_OLD_ID_FORMAT
366                     warn("ref name not found for %s", id);
367 #else
368                     warn("ref name not found for 0x%"UVxf, PTR2UV(ival));
369 #endif
370                     return 0;
371                 }
372             }
373             else {   /* store our name and continue */
374                 SV *namesv;
375                 if (name[0] == '@' || name[0] == '%') {
376                     namesv = newSVpvn("\\", 1);
377                     sv_catpvn(namesv, name, namelen);
378                 }
379                 else if (realtype == SVt_PVCV && name[0] == '*') {
380                     namesv = newSVpvn("\\", 2);
381                     sv_catpvn(namesv, name, namelen);
382                     (SvPVX(namesv))[1] = '&';
383                 }
384                 else
385                     namesv = newSVpvn(name, namelen);
386                 seenentry = newAV();
387                 av_push(seenentry, namesv);
388                 (void)SvREFCNT_inc(val);
389                 av_push(seenentry, val);
390                 (void)hv_store(seenhv, id, idlen,
391                                newRV_inc((SV*)seenentry), 0);
392                 SvREFCNT_dec(seenentry);
393             }
394         }
395
396         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
397             STRLEN rlen;
398             const char *rval = SvPV(val, rlen);
399             const char *slash = strchr(rval, '/');
400             sv_catpvn(retval, "qr/", 3);
401             while (slash) {
402                 sv_catpvn(retval, rval, slash-rval);
403                 sv_catpvn(retval, "\\/", 2);
404                 rlen -= slash-rval+1;
405                 rval = slash+1;
406                 slash = strchr(rval, '/');
407             }
408             sv_catpvn(retval, rval, rlen);
409             sv_catpvn(retval, "/", 1);
410             return 1;
411         }
412
413         /* If purity is not set and maxdepth is set, then check depth:
414          * if we have reached maximum depth, return the string
415          * representation of the thing we are currently examining
416          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
417          */
418         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
419             STRLEN vallen;
420             const char * const valstr = SvPV(val,vallen);
421             sv_catpvn(retval, "'", 1);
422             sv_catpvn(retval, valstr, vallen);
423             sv_catpvn(retval, "'", 1);
424             return 1;
425         }
426
427         if (realpack) {                         /* we have a blessed ref */
428             STRLEN blesslen;
429             const char * const blessstr = SvPV(bless, blesslen);
430             sv_catpvn(retval, blessstr, blesslen);
431             sv_catpvn(retval, "( ", 2);
432             if (indent >= 2) {
433                 blesspad = apad;
434                 apad = newSVsv(apad);
435                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
436             }
437         }
438
439         (*levelp)++;
440         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
441
442         if (
443 #if PERL_VERSION < 9
444                 realtype <= SVt_PVBM
445 #else
446                 realtype <= SVt_PVMG
447 #endif
448         ) {                          /* scalar ref */
449             SV * const namesv = newSVpvn("${", 2);
450             sv_catpvn(namesv, name, namelen);
451             sv_catpvn(namesv, "}", 1);
452             if (realpack) {                                  /* blessed */
453                 sv_catpvn(retval, "do{\\(my $o = ", 13);
454                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
455                         postav, levelp, indent, pad, xpad, apad, sep, pair,
456                         freezer, toaster, purity, deepcopy, quotekeys, bless,
457                         maxdepth, sortkeys);
458                 sv_catpvn(retval, ")}", 2);
459             }                                                /* plain */
460             else {
461                 sv_catpvn(retval, "\\", 1);
462                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
463                         postav, levelp, indent, pad, xpad, apad, sep, pair,
464                         freezer, toaster, purity, deepcopy, quotekeys, bless,
465                         maxdepth, sortkeys);
466             }
467             SvREFCNT_dec(namesv);
468         }
469         else if (realtype == SVt_PVGV) {                     /* glob ref */
470             SV * const namesv = newSVpvn("*{", 2);
471             sv_catpvn(namesv, name, namelen);
472             sv_catpvn(namesv, "}", 1);
473             sv_catpvn(retval, "\\", 1);
474             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
475                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
476                     freezer, toaster, purity, deepcopy, quotekeys, bless,
477                     maxdepth, sortkeys);
478             SvREFCNT_dec(namesv);
479         }
480         else if (realtype == SVt_PVAV) {
481             SV *totpad;
482             I32 ix = 0;
483             const I32 ixmax = av_len((AV *)ival);
484         
485             SV * const ixsv = newSViv(0);
486             /* allowing for a 24 char wide array index */
487             New(0, iname, namelen+28, char);
488             (void)strcpy(iname, name);
489             inamelen = namelen;
490             if (name[0] == '@') {
491                 sv_catpvn(retval, "(", 1);
492                 iname[0] = '$';
493             }
494             else {
495                 sv_catpvn(retval, "[", 1);
496                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
497                 /*if (namelen > 0
498                     && name[namelen-1] != ']' && name[namelen-1] != '}'
499                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
500                 if ((namelen > 0
501                      && name[namelen-1] != ']' && name[namelen-1] != '}')
502                     || (namelen > 4
503                         && (name[1] == '{'
504                             || (name[0] == '\\' && name[2] == '{'))))
505                 {
506                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
507                     iname[inamelen] = '\0';
508                 }
509             }
510             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
511                 (instr(iname+inamelen-8, "{SCALAR}") ||
512                  instr(iname+inamelen-7, "{ARRAY}") ||
513                  instr(iname+inamelen-6, "{HASH}"))) {
514                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
515             }
516             iname[inamelen++] = '['; iname[inamelen] = '\0';
517             totpad = newSVsv(sep);
518             sv_catsv(totpad, pad);
519             sv_catsv(totpad, apad);
520
521             for (ix = 0; ix <= ixmax; ++ix) {
522                 STRLEN ilen;
523                 SV *elem;
524                 svp = av_fetch((AV*)ival, ix, FALSE);
525                 if (svp)
526                     elem = *svp;
527                 else
528                     elem = &PL_sv_undef;
529                 
530                 ilen = inamelen;
531                 sv_setiv(ixsv, ix);
532                 ilen = ilen + my_sprintf(iname+ilen, "%"IVdf, (IV)ix);
533                 iname[ilen++] = ']'; iname[ilen] = '\0';
534                 if (indent >= 3) {
535                     sv_catsv(retval, totpad);
536                     sv_catsv(retval, ipad);
537                     sv_catpvn(retval, "#", 1);
538                     sv_catsv(retval, ixsv);
539                 }
540                 sv_catsv(retval, totpad);
541                 sv_catsv(retval, ipad);
542                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
543                         levelp, indent, pad, xpad, apad, sep, pair,
544                         freezer, toaster, purity, deepcopy, quotekeys, bless,
545                         maxdepth, sortkeys);
546                 if (ix < ixmax)
547                     sv_catpvn(retval, ",", 1);
548             }
549             if (ixmax >= 0) {
550                 SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
551                 sv_catsv(retval, totpad);
552                 sv_catsv(retval, opad);
553                 SvREFCNT_dec(opad);
554             }
555             if (name[0] == '@')
556                 sv_catpvn(retval, ")", 1);
557             else
558                 sv_catpvn(retval, "]", 1);
559             SvREFCNT_dec(ixsv);
560             SvREFCNT_dec(totpad);
561             Safefree(iname);
562         }
563         else if (realtype == SVt_PVHV) {
564             SV *totpad, *newapad;
565             SV *sname;
566             HE *entry;
567             char *key;
568             I32 klen;
569             SV *hval;
570             AV *keys = NULL;
571         
572             SV * const iname = newSVpvn(name, namelen);
573             if (name[0] == '%') {
574                 sv_catpvn(retval, "(", 1);
575                 (SvPVX(iname))[0] = '$';
576             }
577             else {
578                 sv_catpvn(retval, "{", 1);
579                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
580                 if ((namelen > 0
581                      && name[namelen-1] != ']' && name[namelen-1] != '}')
582                     || (namelen > 4
583                         && (name[1] == '{'
584                             || (name[0] == '\\' && name[2] == '{'))))
585                 {
586                     sv_catpvn(iname, "->", 2);
587                 }
588             }
589             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
590                 (instr(name+namelen-8, "{SCALAR}") ||
591                  instr(name+namelen-7, "{ARRAY}") ||
592                  instr(name+namelen-6, "{HASH}"))) {
593                 sv_catpvn(iname, "->", 2);
594             }
595             sv_catpvn(iname, "{", 1);
596             totpad = newSVsv(sep);
597             sv_catsv(totpad, pad);
598             sv_catsv(totpad, apad);
599         
600             /* If requested, get a sorted/filtered array of hash keys */
601             if (sortkeys) {
602                 if (sortkeys == &PL_sv_yes) {
603 #if PERL_VERSION < 8
604                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
605 #else
606                     keys = newAV();
607                     (void)hv_iterinit((HV*)ival);
608                     while ((entry = hv_iternext((HV*)ival))) {
609                         sv = hv_iterkeysv(entry);
610                         SvREFCNT_inc(sv);
611                         av_push(keys, sv);
612                     }
613 # ifdef USE_LOCALE_NUMERIC
614                     sortsv(AvARRAY(keys), 
615                            av_len(keys)+1, 
616                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
617 # else
618                     sortsv(AvARRAY(keys), 
619                            av_len(keys)+1, 
620                            Perl_sv_cmp);
621 # endif
622 #endif
623                 }
624                 if (sortkeys != &PL_sv_yes) {
625                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
626                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
627                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
628                     SPAGAIN;
629                     if (i) {
630                         sv = POPs;
631                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
632                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
633                     }
634                     if (! keys)
635                         warn("Sortkeys subroutine did not return ARRAYREF\n");
636                     PUTBACK; FREETMPS; LEAVE;
637                 }
638                 if (keys)
639                     sv_2mortal((SV*)keys);
640             }
641             else
642                 (void)hv_iterinit((HV*)ival);
643
644             /* foreach (keys %hash) */
645             for (i = 0; 1; i++) {
646                 char *nkey;
647                 char *nkey_buffer = NULL;
648                 I32 nticks = 0;
649                 SV* keysv;
650                 STRLEN keylen;
651                 I32 nlen;
652                 bool do_utf8 = FALSE;
653
654                if (sortkeys) {
655                    if (!(keys && (I32)i <= av_len(keys))) break;
656                } else {
657                    if (!(entry = hv_iternext((HV *)ival))) break;
658                }
659
660                 if (i)
661                     sv_catpvn(retval, ",", 1);
662
663                 if (sortkeys) {
664                     char *key;
665                     svp = av_fetch(keys, i, FALSE);
666                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
667                     key = SvPV(keysv, keylen);
668                     svp = hv_fetch((HV*)ival, key,
669                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
670                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
671                 }
672                 else {
673                     keysv = hv_iterkeysv(entry);
674                     hval = hv_iterval((HV*)ival, entry);
675                 }
676
677                 key = SvPV(keysv, keylen);
678                 do_utf8 = DO_UTF8(keysv);
679                 klen = keylen;
680
681                 sv_catsv(retval, totpad);
682                 sv_catsv(retval, ipad);
683                 /* old logic was first to check utf8 flag, and if utf8 always
684                    call esc_q_utf8.  This caused test to break under -Mutf8,
685                    because there even strings like 'c' have utf8 flag on.
686                    Hence with quotekeys == 0 the XS code would still '' quote
687                    them based on flags, whereas the perl code would not,
688                    based on regexps.
689                    The perl code is correct.
690                    needs_quote() decides that anything that isn't a valid
691                    perl identifier needs to be quoted, hence only correctly
692                    formed strings with no characters outside [A-Za-z0-9_:]
693                    won't need quoting.  None of those characters are used in
694                    the byte encoding of utf8, so anything with utf8
695                    encoded characters in will need quoting. Hence strings
696                    with utf8 encoded characters in will end up inside do_utf8
697                    just like before, but now strings with utf8 flag set but
698                    only ascii characters will end up in the unquoted section.
699
700                    There should also be less tests for the (probably currently)
701                    more common doesn't need quoting case.
702                    The code is also smaller (22044 vs 22260) because I've been
703                    able to pull the common logic out to both sides.  */
704                 if (quotekeys || needs_quote(key)) {
705                     if (do_utf8) {
706                         STRLEN ocur = SvCUR(retval);
707                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
708                         nkey = SvPVX(retval) + ocur;
709                     }
710                     else {
711                         nticks = num_q(key, klen);
712                         New(0, nkey_buffer, klen+nticks+3, char);
713                         nkey = nkey_buffer;
714                         nkey[0] = '\'';
715                         if (nticks)
716                             klen += esc_q(nkey+1, key, klen);
717                         else
718                             (void)Copy(key, nkey+1, klen, char);
719                         nkey[++klen] = '\'';
720                         nkey[++klen] = '\0';
721                         nlen = klen;
722                         sv_catpvn(retval, nkey, klen);
723                     }
724                 }
725                 else {
726                     nkey = key;
727                     nlen = klen;
728                     sv_catpvn(retval, nkey, klen);
729                 }
730                 sname = newSVsv(iname);
731                 sv_catpvn(sname, nkey, nlen);
732                 sv_catpvn(sname, "}", 1);
733
734                 sv_catsv(retval, pair);
735                 if (indent >= 2) {
736                     char *extra;
737                     I32 elen = 0;
738                     newapad = newSVsv(apad);
739                     New(0, extra, klen+4+1, char);
740                     while (elen < (klen+4))
741                         extra[elen++] = ' ';
742                     extra[elen] = '\0';
743                     sv_catpvn(newapad, extra, elen);
744                     Safefree(extra);
745                 }
746                 else
747                     newapad = apad;
748
749                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
750                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
751                         freezer, toaster, purity, deepcopy, quotekeys, bless,
752                         maxdepth, sortkeys);
753                 SvREFCNT_dec(sname);
754                 Safefree(nkey_buffer);
755                 if (indent >= 2)
756                     SvREFCNT_dec(newapad);
757             }
758             if (i) {
759                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
760                 sv_catsv(retval, totpad);
761                 sv_catsv(retval, opad);
762                 SvREFCNT_dec(opad);
763             }
764             if (name[0] == '%')
765                 sv_catpvn(retval, ")", 1);
766             else
767                 sv_catpvn(retval, "}", 1);
768             SvREFCNT_dec(iname);
769             SvREFCNT_dec(totpad);
770         }
771         else if (realtype == SVt_PVCV) {
772             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
773             if (purity)
774                 warn("Encountered CODE ref, using dummy placeholder");
775         }
776         else {
777             warn("cannot handle ref type %ld", realtype);
778         }
779
780         if (realpack) {  /* free blessed allocs */
781             if (indent >= 2) {
782                 SvREFCNT_dec(apad);
783                 apad = blesspad;
784             }
785             sv_catpvn(retval, ", '", 3);
786             sv_catpvn(retval, realpack, strlen(realpack));
787             sv_catpvn(retval, "' )", 3);
788             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
789                 sv_catpvn(retval, "->", 2);
790                 sv_catsv(retval, toaster);
791                 sv_catpvn(retval, "()", 2);
792             }
793         }
794         SvREFCNT_dec(ipad);
795         (*levelp)--;
796     }
797     else {
798         STRLEN i;
799         
800         if (namelen) {
801 #ifdef DD_USE_OLD_ID_FORMAT
802             idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(val));
803 #else
804             id_buffer = PTR2UV(val);
805             idlen = sizeof(id_buffer);
806 #endif
807             if ((svp = hv_fetch(seenhv, id, idlen, FALSE)) &&
808                 (sv = *svp) && SvROK(sv) &&
809                 (seenentry = (AV*)SvRV(sv)))
810             {
811                 SV *othername;
812                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
813                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
814                 {
815                     sv_catpvn(retval, "${", 2);
816                     sv_catsv(retval, othername);
817                     sv_catpvn(retval, "}", 1);
818                     return 1;
819                 }
820             }
821             else if (val != &PL_sv_undef) {
822                 SV * const namesv = newSVpvn("\\", 1);
823                 sv_catpvn(namesv, name, namelen);
824                 seenentry = newAV();
825                 av_push(seenentry, namesv);
826                 av_push(seenentry, newRV_inc(val));
827                 (void)hv_store(seenhv, id, idlen, newRV_inc((SV*)seenentry), 0);
828                 SvREFCNT_dec(seenentry);
829             }
830         }
831
832         if (DD_is_integer(val)) {
833             STRLEN len;
834             if (SvIsUV(val))
835               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"UVuf, SvUV(val));
836             else
837               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
838             if (SvPOK(val)) {
839               /* Need to check to see if this is a string such as " 0".
840                  I'm assuming from sprintf isn't going to clash with utf8.
841                  Is this valid on EBCDIC?  */
842               STRLEN pvlen;
843               const char * const pv = SvPV(val, pvlen);
844               if (pvlen != len || memNE(pv, tmpbuf, len))
845                 goto integer_came_from_string;
846             }
847             if (len > 10) {
848               /* Looks like we're on a 64 bit system.  Make it a string so that
849                  if a 32 bit system reads the number it will cope better.  */
850               sv_catpvf(retval, "'%s'", tmpbuf);
851             } else
852               sv_catpvn(retval, tmpbuf, len);
853         }
854         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
855             c = SvPV(val, i);
856             ++c; --i;                   /* just get the name */
857             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
858                 c += 4;
859                 i -= 4;
860             }
861             if (needs_quote(c)) {
862                 sv_grow(retval, SvCUR(retval)+6+2*i);
863                 r = SvPVX(retval)+SvCUR(retval);
864                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
865                 i += esc_q(r+3, c, i);
866                 i += 3;
867                 r[i++] = '\''; r[i++] = '}';
868                 r[i] = '\0';
869             }
870             else {
871                 sv_grow(retval, SvCUR(retval)+i+2);
872                 r = SvPVX(retval)+SvCUR(retval);
873                 r[0] = '*'; strcpy(r+1, c);
874                 i++;
875             }
876             SvCUR_set(retval, SvCUR(retval)+i);
877
878             if (purity) {
879                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
880                 static const STRLEN sizes[] = { 8, 7, 6 };
881                 SV *e;
882                 SV * const nname = newSVpvn("", 0);
883                 SV * const newapad = newSVpvn("", 0);
884                 GV * const gv = (GV*)val;
885                 I32 j;
886                 
887                 for (j=0; j<3; j++) {
888                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
889                     if (!e)
890                         continue;
891                     if (j == 0 && !SvOK(e))
892                         continue;
893
894                     {
895                         I32 nlevel = 0;
896                         SV *postentry = newSVpvn(r,i);
897                         
898                         sv_setsv(nname, postentry);
899                         sv_catpvn(nname, entries[j], sizes[j]);
900                         sv_catpvn(postentry, " = ", 3);
901                         av_push(postav, postentry);
902                         e = newRV_inc(e);
903                         
904                         SvCUR_set(newapad, 0);
905                         if (indent >= 2)
906                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
907                         
908                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
909                                 seenhv, postav, &nlevel, indent, pad, xpad,
910                                 newapad, sep, pair, freezer, toaster, purity,
911                                 deepcopy, quotekeys, bless, maxdepth, 
912                                 sortkeys);
913                         SvREFCNT_dec(e);
914                     }
915                 }
916                 
917                 SvREFCNT_dec(newapad);
918                 SvREFCNT_dec(nname);
919             }
920         }
921         else if (val == &PL_sv_undef || !SvOK(val)) {
922             sv_catpvn(retval, "undef", 5);
923         }
924         else {
925         integer_came_from_string:
926             c = SvPV(val, i);
927             if (DO_UTF8(val))
928                 i += esc_q_utf8(aTHX_ retval, c, i);
929             else {
930                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
931                 r = SvPVX(retval) + SvCUR(retval);
932                 r[0] = '\'';
933                 i += esc_q(r+1, c, i);
934                 ++i;
935                 r[i++] = '\'';
936                 r[i] = '\0';
937                 SvCUR_set(retval, SvCUR(retval)+i);
938             }
939         }
940     }
941
942     if (idlen) {
943         if (deepcopy)
944             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
945         else if (namelen && seenentry) {
946             SV *mark = *av_fetch(seenentry, 2, TRUE);
947             sv_setiv(mark,1);
948         }
949     }
950     return 1;
951 }
952
953
954 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
955
956 #
957 # This is the exact equivalent of Dump.  Well, almost. The things that are
958 # different as of now (due to Laziness):
959 #   * doesnt do double-quotes yet.
960 #
961
962 void
963 Data_Dumper_Dumpxs(href, ...)
964         SV      *href;
965         PROTOTYPE: $;$$
966         PPCODE:
967         {
968             HV *hv;
969             SV *retval, *valstr;
970             HV *seenhv = NULL;
971             AV *postav, *todumpav, *namesav;
972             I32 level = 0;
973             I32 indent, terse, i, imax, postlen;
974             SV **svp;
975             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
976             SV *freezer, *toaster, *bless, *sortkeys;
977             I32 purity, deepcopy, quotekeys, maxdepth = 0;
978             char tmpbuf[1024];
979             I32 gimme = GIMME;
980
981             if (!SvROK(href)) {         /* call new to get an object first */
982                 if (items < 2)
983                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
984                 
985                 ENTER;
986                 SAVETMPS;
987                 
988                 PUSHMARK(sp);
989                 XPUSHs(href);
990                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
991                 if (items >= 3)
992                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
993                 PUTBACK;
994                 i = perl_call_method("new", G_SCALAR);
995                 SPAGAIN;
996                 if (i)
997                     href = newSVsv(POPs);
998
999                 PUTBACK;
1000                 FREETMPS;
1001                 LEAVE;
1002                 if (i)
1003                     (void)sv_2mortal(href);
1004             }
1005
1006             todumpav = namesav = NULL;
1007             seenhv = NULL;
1008             val = pad = xpad = apad = sep = pair = varname
1009                 = freezer = toaster = bless = sortkeys = &PL_sv_undef;
1010             name = sv_newmortal();
1011             indent = 2;
1012             terse = purity = deepcopy = 0;
1013             quotekeys = 1;
1014         
1015             retval = newSVpvn("", 0);
1016             if (SvROK(href)
1017                 && (hv = (HV*)SvRV((SV*)href))
1018                 && SvTYPE(hv) == SVt_PVHV)              {
1019
1020                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
1021                     seenhv = (HV*)SvRV(*svp);
1022                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
1023                     todumpav = (AV*)SvRV(*svp);
1024                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
1025                     namesav = (AV*)SvRV(*svp);
1026                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
1027                     indent = SvIV(*svp);
1028                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
1029                     purity = SvIV(*svp);
1030                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
1031                     terse = SvTRUE(*svp);
1032 #if 0 /* useqq currently unused */
1033                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
1034                     useqq = SvTRUE(*svp);
1035 #endif
1036                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
1037                     pad = *svp;
1038                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
1039                     xpad = *svp;
1040                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1041                     apad = *svp;
1042                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1043                     sep = *svp;
1044                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1045                     pair = *svp;
1046                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1047                     varname = *svp;
1048                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1049                     freezer = *svp;
1050                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1051                     toaster = *svp;
1052                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1053                     deepcopy = SvTRUE(*svp);
1054                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1055                     quotekeys = SvTRUE(*svp);
1056                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1057                     bless = *svp;
1058                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1059                     maxdepth = SvIV(*svp);
1060                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1061                     sortkeys = *svp;
1062                     if (! SvTRUE(sortkeys))
1063                         sortkeys = NULL;
1064                     else if (! (SvROK(sortkeys) &&
1065                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1066                     {
1067                         /* flag to use qsortsv() for sorting hash keys */       
1068                         sortkeys = &PL_sv_yes; 
1069                     }
1070                 }
1071                 postav = newAV();
1072
1073                 if (todumpav)
1074                     imax = av_len(todumpav);
1075                 else
1076                     imax = -1;
1077                 valstr = newSVpvn("",0);
1078                 for (i = 0; i <= imax; ++i) {
1079                     SV *newapad;
1080                 
1081                     av_clear(postav);
1082                     if ((svp = av_fetch(todumpav, i, FALSE)))
1083                         val = *svp;
1084                     else
1085                         val = &PL_sv_undef;
1086                     if ((svp = av_fetch(namesav, i, TRUE))) {
1087                         sv_setsv(name, *svp);
1088                         if (SvOK(*svp) && !SvPOK(*svp))
1089                             (void)SvPV_nolen_const(name);
1090                     }
1091                     else
1092                         (void)SvOK_off(name);
1093                 
1094                     if (SvPOK(name)) {
1095                         if ((SvPVX_const(name))[0] == '*') {
1096                             if (SvROK(val)) {
1097                                 switch (SvTYPE(SvRV(val))) {
1098                                 case SVt_PVAV:
1099                                     (SvPVX(name))[0] = '@';
1100                                     break;
1101                                 case SVt_PVHV:
1102                                     (SvPVX(name))[0] = '%';
1103                                     break;
1104                                 case SVt_PVCV:
1105                                     (SvPVX(name))[0] = '*';
1106                                     break;
1107                                 default:
1108                                     (SvPVX(name))[0] = '$';
1109                                     break;
1110                                 }
1111                             }
1112                             else
1113                                 (SvPVX(name))[0] = '$';
1114                         }
1115                         else if ((SvPVX_const(name))[0] != '$')
1116                             sv_insert(name, 0, 0, "$", 1);
1117                     }
1118                     else {
1119                         STRLEN nchars;
1120                         sv_setpvn(name, "$", 1);
1121                         sv_catsv(name, varname);
1122                         nchars = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, (IV)(i+1));
1123                         sv_catpvn(name, tmpbuf, nchars);
1124                     }
1125                 
1126                     if (indent >= 2) {
1127                         SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
1128                         newapad = newSVsv(apad);
1129                         sv_catsv(newapad, tmpsv);
1130                         SvREFCNT_dec(tmpsv);
1131                     }
1132                     else
1133                         newapad = apad;
1134                 
1135                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1136                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1137                             freezer, toaster, purity, deepcopy, quotekeys,
1138                             bless, maxdepth, sortkeys);
1139                 
1140                     if (indent >= 2)
1141                         SvREFCNT_dec(newapad);
1142
1143                     postlen = av_len(postav);
1144                     if (postlen >= 0 || !terse) {
1145                         sv_insert(valstr, 0, 0, " = ", 3);
1146                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1147                         sv_catpvn(valstr, ";", 1);
1148                     }
1149                     sv_catsv(retval, pad);
1150                     sv_catsv(retval, valstr);
1151                     sv_catsv(retval, sep);
1152                     if (postlen >= 0) {
1153                         I32 i;
1154                         sv_catsv(retval, pad);
1155                         for (i = 0; i <= postlen; ++i) {
1156                             SV *elem;
1157                             svp = av_fetch(postav, i, FALSE);
1158                             if (svp && (elem = *svp)) {
1159                                 sv_catsv(retval, elem);
1160                                 if (i < postlen) {
1161                                     sv_catpvn(retval, ";", 1);
1162                                     sv_catsv(retval, sep);
1163                                     sv_catsv(retval, pad);
1164                                 }
1165                             }
1166                         }
1167                         sv_catpvn(retval, ";", 1);
1168                             sv_catsv(retval, sep);
1169                     }
1170                     sv_setpvn(valstr, "", 0);
1171                     if (gimme == G_ARRAY) {
1172                         XPUSHs(sv_2mortal(retval));
1173                         if (i < imax)   /* not the last time thro ? */
1174                             retval = newSVpvn("",0);
1175                     }
1176                 }
1177                 SvREFCNT_dec(postav);
1178                 SvREFCNT_dec(valstr);
1179             }
1180             else
1181                 croak("Call to new() method failed to return HASH ref");
1182             if (gimme == G_SCALAR)
1183                 XPUSHs(sv_2mortal(retval));
1184         }