Data::Dumper doesn't need ppport.h in core
[p5sagit/p5-mst-13.2.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5 #ifdef USE_PPPORT_H
6 #  include "ppport.h"
7 #endif
8
9 #if PERL_VERSION < 6
10 #  define DD_USE_OLD_ID_FORMAT
11 #endif
12
13 static I32 num_q (const char *s, STRLEN slen);
14 static I32 esc_q (char *dest, const char *src, STRLEN slen);
15 static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
16 static I32 needs_quote(register const char *s);
17 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
18 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
19                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
20                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
21                     SV *freezer, SV *toaster,
22                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
23                     I32 maxdepth, SV *sortkeys);
24
25 #ifndef HvNAME_get
26 #define HvNAME_get HvNAME
27 #endif
28
29 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
30
31 # ifdef EBCDIC
32 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
33 # else
34 #  define UNI_TO_NATIVE(ch) (ch)
35 # endif
36
37 UV
38 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
39 {
40     const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
41                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
42     return UNI_TO_NATIVE(uv);
43 }
44
45 # if !defined(PERL_IMPLICIT_CONTEXT)
46 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
47 # else
48 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
49 # endif
50
51 #endif /* PERL_VERSION <= 6 */
52
53 /* Changes in 5.7 series mean that now IOK is only set if scalar is
54    precisely integer but in 5.6 and earlier we need to do a more
55    complex test  */
56 #if PERL_VERSION <= 6
57 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
58 #else
59 #define DD_is_integer(sv) SvIOK(sv)
60 #endif
61
62 /* does a string need to be protected? */
63 static I32
64 needs_quote(register const char *s)
65 {
66 TOP:
67     if (s[0] == ':') {
68         if (*++s) {
69             if (*s++ != ':')
70                 return 1;
71         }
72         else
73             return 1;
74     }
75     if (isIDFIRST(*s)) {
76         while (*++s)
77             if (!isALNUM(*s)) {
78                 if (*s == ':')
79                     goto TOP;
80                 else
81                     return 1;
82             }
83     }
84     else
85         return 1;
86     return 0;
87 }
88
89 /* count the number of "'"s and "\"s in string */
90 static I32
91 num_q(register const char *s, register STRLEN slen)
92 {
93     register I32 ret = 0;
94
95     while (slen > 0) {
96         if (*s == '\'' || *s == '\\')
97             ++ret;
98         ++s;
99         --slen;
100     }
101     return ret;
102 }
103
104
105 /* returns number of chars added to escape "'"s and "\"s in s */
106 /* slen number of characters in s will be escaped */
107 /* destination must be long enough for additional chars */
108 static I32
109 esc_q(register char *d, register const char *s, register STRLEN slen)
110 {
111     register I32 ret = 0;
112
113     while (slen > 0) {
114         switch (*s) {
115         case '\'':
116         case '\\':
117             *d = '\\';
118             ++d; ++ret;
119         default:
120             *d = *s;
121             ++d; ++s; --slen;
122             break;
123         }
124     }
125     return ret;
126 }
127
128 static I32
129 esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
130 {
131     char *r, *rstart;
132     const char *s = src;
133     const char * const send = src + slen;
134     STRLEN j, cur = SvCUR(sv);
135     /* Could count 128-255 and 256+ in two variables, if we want to
136        be like &qquote and make a distinction.  */
137     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
138     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
139     STRLEN backslashes = 0;
140     STRLEN single_quotes = 0;
141     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
142     STRLEN normal = 0;
143
144     /* this will need EBCDICification */
145     for (s = src; s < send; s += UTF8SKIP(s)) {
146         const UV k = utf8_to_uvchr((U8*)s, NULL);
147
148 #ifdef EBCDIC
149         if (!isprint(k) || k > 256) {
150 #else
151         if (k > 127) {
152 #endif
153             /* 4: \x{} then count the number of hex digits.  */
154             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
155 #if UVSIZE == 4
156                 8 /* We may allocate a bit more than the minimum here.  */
157 #else
158                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
159 #endif
160                 );
161         } else if (k == '\\') {
162             backslashes++;
163         } else if (k == '\'') {
164             single_quotes++;
165         } else if (k == '"' || k == '$' || k == '@') {
166             qq_escapables++;
167         } else {
168             normal++;
169         }
170     }
171     if (grow) {
172         /* We have something needing hex. 3 is ""\0 */
173         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
174                 + 2*qq_escapables + normal);
175         rstart = r = SvPVX(sv) + cur;
176
177         *r++ = '"';
178
179         for (s = src; s < send; s += UTF8SKIP(s)) {
180             const UV k = utf8_to_uvchr((U8*)s, NULL);
181
182             if (k == '"' || k == '\\' || k == '$' || k == '@') {
183                 *r++ = '\\';
184                 *r++ = (char)k;
185             }
186             else
187 #ifdef EBCDIC
188               if (isprint(k) && k < 256)
189 #else
190               if (k < 0x80)
191 #endif
192                 *r++ = (char)k;
193             else {
194                 r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
195             }
196         }
197         *r++ = '"';
198     } else {
199         /* Single quotes.  */
200         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
201                 + qq_escapables + normal);
202         rstart = r = SvPVX(sv) + cur;
203         *r++ = '\'';
204         for (s = src; s < send; s ++) {
205             const char k = *s;
206             if (k == '\'' || k == '\\')
207                 *r++ = '\\';
208             *r++ = k;
209         }
210         *r++ = '\'';
211     }
212     *r = '\0';
213     j = r - rstart;
214     SvCUR_set(sv, cur + j);
215
216     return j;
217 }
218
219 /* append a repeated string to an SV */
220 static SV *
221 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
222 {
223     if (!sv)
224         sv = newSVpvn("", 0);
225 #ifdef DEBUGGING
226     else
227         assert(SvTYPE(sv) >= SVt_PV);
228 #endif
229
230     if (n > 0) {
231         SvGROW(sv, len*n + SvCUR(sv) + 1);
232         if (len == 1) {
233             char * const start = SvPVX(sv) + SvCUR(sv);
234             SvCUR_set(sv, SvCUR(sv) + n);
235             start[n] = '\0';
236             while (n > 0)
237                 start[--n] = str[0];
238         }
239         else
240             while (n > 0) {
241                 sv_catpvn(sv, str, len);
242                 --n;
243             }
244     }
245     return sv;
246 }
247
248 /*
249  * This ought to be split into smaller functions. (it is one long function since
250  * it exactly parallels the perl version, which was one long thing for
251  * efficiency raisins.)  Ugggh!
252  */
253 static I32
254 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
255         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
256         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
257         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
258 {
259     char tmpbuf[128];
260     U32 i;
261     char *c, *r, *realpack;
262 #ifdef DD_USE_OLD_ID_FORMAT
263     char id[128];
264 #else
265     UV id_buffer;
266     char *const id = (char *)&id_buffer;
267 #endif
268     SV **svp;
269     SV *sv, *ipad, *ival;
270     SV *blesspad = Nullsv;
271     AV *seenentry = NULL;
272     char *iname;
273     STRLEN inamelen, idlen = 0;
274     U32 realtype;
275
276     if (!val)
277         return 0;
278
279     /* If the ouput buffer has less than some arbitary amount of space
280        remaining, then enlarge it. For the test case (25M of output),
281        *1.1 was slower, *2.0 was the same, so the first guess of 1.5 is
282         deemed to be good enough.  */
283     if (SvTYPE(retval) >= SVt_PV && (SvLEN(retval) - SvCUR(retval)) < 42) {
284         sv_grow(retval, SvCUR(retval) * 3 / 2);
285     }
286
287     realtype = SvTYPE(val);
288
289     if (SvGMAGICAL(val))
290         mg_get(val);
291     if (SvROK(val)) {
292
293         /* If a freeze method is provided and the object has it, call
294            it.  Warn on errors. */
295         if (SvOBJECT(SvRV(val)) && freezer &&
296             SvPOK(freezer) && SvCUR(freezer) &&
297             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
298                          SvCUR(freezer), -1) != NULL)
299         {
300             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
301             XPUSHs(val); PUTBACK;
302             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
303             SPAGAIN;
304             if (SvTRUE(ERRSV))
305                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
306             PUTBACK; FREETMPS; LEAVE;
307         }
308         
309         ival = SvRV(val);
310         realtype = SvTYPE(ival);
311 #ifdef DD_USE_OLD_ID_FORMAT
312         idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(ival));
313 #else
314         id_buffer = PTR2UV(ival);
315         idlen = sizeof(id_buffer);
316 #endif
317         if (SvOBJECT(ival))
318             realpack = HvNAME_get(SvSTASH(ival));
319         else
320             realpack = Nullch;
321
322         /* if it has a name, we need to either look it up, or keep a tab
323          * on it so we know when we hit it later
324          */
325         if (namelen) {
326             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
327                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
328             {
329                 SV *othername;
330                 if ((svp = av_fetch(seenentry, 0, FALSE))
331                     && (othername = *svp))
332                 {
333                     if (purity && *levelp > 0) {
334                         SV *postentry;
335                         
336                         if (realtype == SVt_PVHV)
337                             sv_catpvn(retval, "{}", 2);
338                         else if (realtype == SVt_PVAV)
339                             sv_catpvn(retval, "[]", 2);
340                         else
341                             sv_catpvn(retval, "do{my $o}", 9);
342                         postentry = newSVpvn(name, namelen);
343                         sv_catpvn(postentry, " = ", 3);
344                         sv_catsv(postentry, othername);
345                         av_push(postav, postentry);
346                     }
347                     else {
348                         if (name[0] == '@' || name[0] == '%') {
349                             if ((SvPVX_const(othername))[0] == '\\' &&
350                                 (SvPVX_const(othername))[1] == name[0]) {
351                                 sv_catpvn(retval, SvPVX_const(othername)+1,
352                                           SvCUR(othername)-1);
353                             }
354                             else {
355                                 sv_catpvn(retval, name, 1);
356                                 sv_catpvn(retval, "{", 1);
357                                 sv_catsv(retval, othername);
358                                 sv_catpvn(retval, "}", 1);
359                             }
360                         }
361                         else
362                             sv_catsv(retval, othername);
363                     }
364                     return 1;
365                 }
366                 else {
367 #ifdef DD_USE_OLD_ID_FORMAT
368                     warn("ref name not found for %s", id);
369 #else
370                     warn("ref name not found for 0x%"UVxf, PTR2UV(ival));
371 #endif
372                     return 0;
373                 }
374             }
375             else {   /* store our name and continue */
376                 SV *namesv;
377                 if (name[0] == '@' || name[0] == '%') {
378                     namesv = newSVpvn("\\", 1);
379                     sv_catpvn(namesv, name, namelen);
380                 }
381                 else if (realtype == SVt_PVCV && name[0] == '*') {
382                     namesv = newSVpvn("\\", 2);
383                     sv_catpvn(namesv, name, namelen);
384                     (SvPVX(namesv))[1] = '&';
385                 }
386                 else
387                     namesv = newSVpvn(name, namelen);
388                 seenentry = newAV();
389                 av_push(seenentry, namesv);
390                 (void)SvREFCNT_inc(val);
391                 av_push(seenentry, val);
392                 (void)hv_store(seenhv, id, idlen,
393                                newRV_inc((SV*)seenentry), 0);
394                 SvREFCNT_dec(seenentry);
395             }
396         }
397
398         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
399             STRLEN rlen;
400             const char *rval = SvPV(val, rlen);
401             const char *slash = strchr(rval, '/');
402             sv_catpvn(retval, "qr/", 3);
403             while (slash) {
404                 sv_catpvn(retval, rval, slash-rval);
405                 sv_catpvn(retval, "\\/", 2);
406                 rlen -= slash-rval+1;
407                 rval = slash+1;
408                 slash = strchr(rval, '/');
409             }
410             sv_catpvn(retval, rval, rlen);
411             sv_catpvn(retval, "/", 1);
412             return 1;
413         }
414
415         /* If purity is not set and maxdepth is set, then check depth:
416          * if we have reached maximum depth, return the string
417          * representation of the thing we are currently examining
418          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
419          */
420         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
421             STRLEN vallen;
422             const char * const valstr = SvPV(val,vallen);
423             sv_catpvn(retval, "'", 1);
424             sv_catpvn(retval, valstr, vallen);
425             sv_catpvn(retval, "'", 1);
426             return 1;
427         }
428
429         if (realpack) {                         /* we have a blessed ref */
430             STRLEN blesslen;
431             const char * const blessstr = SvPV(bless, blesslen);
432             sv_catpvn(retval, blessstr, blesslen);
433             sv_catpvn(retval, "( ", 2);
434             if (indent >= 2) {
435                 blesspad = apad;
436                 apad = newSVsv(apad);
437                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
438             }
439         }
440
441         (*levelp)++;
442         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
443
444         if (
445 #if PERL_VERSION < 9
446                 realtype <= SVt_PVBM
447 #else
448                 realtype <= SVt_PVMG
449 #endif
450         ) {                          /* scalar ref */
451             SV * const namesv = newSVpvn("${", 2);
452             sv_catpvn(namesv, name, namelen);
453             sv_catpvn(namesv, "}", 1);
454             if (realpack) {                                  /* blessed */
455                 sv_catpvn(retval, "do{\\(my $o = ", 13);
456                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
457                         postav, levelp, indent, pad, xpad, apad, sep, pair,
458                         freezer, toaster, purity, deepcopy, quotekeys, bless,
459                         maxdepth, sortkeys);
460                 sv_catpvn(retval, ")}", 2);
461             }                                                /* plain */
462             else {
463                 sv_catpvn(retval, "\\", 1);
464                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
465                         postav, levelp, indent, pad, xpad, apad, sep, pair,
466                         freezer, toaster, purity, deepcopy, quotekeys, bless,
467                         maxdepth, sortkeys);
468             }
469             SvREFCNT_dec(namesv);
470         }
471         else if (realtype == SVt_PVGV) {                     /* glob ref */
472             SV * const namesv = newSVpvn("*{", 2);
473             sv_catpvn(namesv, name, namelen);
474             sv_catpvn(namesv, "}", 1);
475             sv_catpvn(retval, "\\", 1);
476             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
477                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
478                     freezer, toaster, purity, deepcopy, quotekeys, bless,
479                     maxdepth, sortkeys);
480             SvREFCNT_dec(namesv);
481         }
482         else if (realtype == SVt_PVAV) {
483             SV *totpad;
484             I32 ix = 0;
485             const I32 ixmax = av_len((AV *)ival);
486         
487             SV * const ixsv = newSViv(0);
488             /* allowing for a 24 char wide array index */
489             New(0, iname, namelen+28, char);
490             (void)strcpy(iname, name);
491             inamelen = namelen;
492             if (name[0] == '@') {
493                 sv_catpvn(retval, "(", 1);
494                 iname[0] = '$';
495             }
496             else {
497                 sv_catpvn(retval, "[", 1);
498                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
499                 /*if (namelen > 0
500                     && name[namelen-1] != ']' && name[namelen-1] != '}'
501                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
502                 if ((namelen > 0
503                      && name[namelen-1] != ']' && name[namelen-1] != '}')
504                     || (namelen > 4
505                         && (name[1] == '{'
506                             || (name[0] == '\\' && name[2] == '{'))))
507                 {
508                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
509                     iname[inamelen] = '\0';
510                 }
511             }
512             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
513                 (instr(iname+inamelen-8, "{SCALAR}") ||
514                  instr(iname+inamelen-7, "{ARRAY}") ||
515                  instr(iname+inamelen-6, "{HASH}"))) {
516                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
517             }
518             iname[inamelen++] = '['; iname[inamelen] = '\0';
519             totpad = newSVsv(sep);
520             sv_catsv(totpad, pad);
521             sv_catsv(totpad, apad);
522
523             for (ix = 0; ix <= ixmax; ++ix) {
524                 STRLEN ilen;
525                 SV *elem;
526                 svp = av_fetch((AV*)ival, ix, FALSE);
527                 if (svp)
528                     elem = *svp;
529                 else
530                     elem = &PL_sv_undef;
531                 
532                 ilen = inamelen;
533                 sv_setiv(ixsv, ix);
534                 ilen = ilen + my_sprintf(iname+ilen, "%"IVdf, (IV)ix);
535                 iname[ilen++] = ']'; iname[ilen] = '\0';
536                 if (indent >= 3) {
537                     sv_catsv(retval, totpad);
538                     sv_catsv(retval, ipad);
539                     sv_catpvn(retval, "#", 1);
540                     sv_catsv(retval, ixsv);
541                 }
542                 sv_catsv(retval, totpad);
543                 sv_catsv(retval, ipad);
544                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
545                         levelp, indent, pad, xpad, apad, sep, pair,
546                         freezer, toaster, purity, deepcopy, quotekeys, bless,
547                         maxdepth, sortkeys);
548                 if (ix < ixmax)
549                     sv_catpvn(retval, ",", 1);
550             }
551             if (ixmax >= 0) {
552                 SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
553                 sv_catsv(retval, totpad);
554                 sv_catsv(retval, opad);
555                 SvREFCNT_dec(opad);
556             }
557             if (name[0] == '@')
558                 sv_catpvn(retval, ")", 1);
559             else
560                 sv_catpvn(retval, "]", 1);
561             SvREFCNT_dec(ixsv);
562             SvREFCNT_dec(totpad);
563             Safefree(iname);
564         }
565         else if (realtype == SVt_PVHV) {
566             SV *totpad, *newapad;
567             SV *sname;
568             HE *entry;
569             char *key;
570             I32 klen;
571             SV *hval;
572             AV *keys = NULL;
573         
574             SV * const iname = newSVpvn(name, namelen);
575             if (name[0] == '%') {
576                 sv_catpvn(retval, "(", 1);
577                 (SvPVX(iname))[0] = '$';
578             }
579             else {
580                 sv_catpvn(retval, "{", 1);
581                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
582                 if ((namelen > 0
583                      && name[namelen-1] != ']' && name[namelen-1] != '}')
584                     || (namelen > 4
585                         && (name[1] == '{'
586                             || (name[0] == '\\' && name[2] == '{'))))
587                 {
588                     sv_catpvn(iname, "->", 2);
589                 }
590             }
591             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
592                 (instr(name+namelen-8, "{SCALAR}") ||
593                  instr(name+namelen-7, "{ARRAY}") ||
594                  instr(name+namelen-6, "{HASH}"))) {
595                 sv_catpvn(iname, "->", 2);
596             }
597             sv_catpvn(iname, "{", 1);
598             totpad = newSVsv(sep);
599             sv_catsv(totpad, pad);
600             sv_catsv(totpad, apad);
601         
602             /* If requested, get a sorted/filtered array of hash keys */
603             if (sortkeys) {
604                 if (sortkeys == &PL_sv_yes) {
605 #if PERL_VERSION < 8
606                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
607 #else
608                     keys = newAV();
609                     (void)hv_iterinit((HV*)ival);
610                     while ((entry = hv_iternext((HV*)ival))) {
611                         sv = hv_iterkeysv(entry);
612                         SvREFCNT_inc(sv);
613                         av_push(keys, sv);
614                     }
615 # ifdef USE_LOCALE_NUMERIC
616                     sortsv(AvARRAY(keys), 
617                            av_len(keys)+1, 
618                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
619 # else
620                     sortsv(AvARRAY(keys), 
621                            av_len(keys)+1, 
622                            Perl_sv_cmp);
623 # endif
624 #endif
625                 }
626                 if (sortkeys != &PL_sv_yes) {
627                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
628                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
629                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
630                     SPAGAIN;
631                     if (i) {
632                         sv = POPs;
633                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
634                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
635                     }
636                     if (! keys)
637                         warn("Sortkeys subroutine did not return ARRAYREF\n");
638                     PUTBACK; FREETMPS; LEAVE;
639                 }
640                 if (keys)
641                     sv_2mortal((SV*)keys);
642             }
643             else
644                 (void)hv_iterinit((HV*)ival);
645
646             /* foreach (keys %hash) */
647             for (i = 0; 1; i++) {
648                 char *nkey;
649                 char *nkey_buffer = NULL;
650                 I32 nticks = 0;
651                 SV* keysv;
652                 STRLEN keylen;
653                 I32 nlen;
654                 bool do_utf8 = FALSE;
655
656                if (sortkeys) {
657                    if (!(keys && (I32)i <= av_len(keys))) break;
658                } else {
659                    if (!(entry = hv_iternext((HV *)ival))) break;
660                }
661
662                 if (i)
663                     sv_catpvn(retval, ",", 1);
664
665                 if (sortkeys) {
666                     char *key;
667                     svp = av_fetch(keys, i, FALSE);
668                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
669                     key = SvPV(keysv, keylen);
670                     svp = hv_fetch((HV*)ival, key,
671                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
672                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
673                 }
674                 else {
675                     keysv = hv_iterkeysv(entry);
676                     hval = hv_iterval((HV*)ival, entry);
677                 }
678
679                 key = SvPV(keysv, keylen);
680                 do_utf8 = DO_UTF8(keysv);
681                 klen = keylen;
682
683                 sv_catsv(retval, totpad);
684                 sv_catsv(retval, ipad);
685                 /* old logic was first to check utf8 flag, and if utf8 always
686                    call esc_q_utf8.  This caused test to break under -Mutf8,
687                    because there even strings like 'c' have utf8 flag on.
688                    Hence with quotekeys == 0 the XS code would still '' quote
689                    them based on flags, whereas the perl code would not,
690                    based on regexps.
691                    The perl code is correct.
692                    needs_quote() decides that anything that isn't a valid
693                    perl identifier needs to be quoted, hence only correctly
694                    formed strings with no characters outside [A-Za-z0-9_:]
695                    won't need quoting.  None of those characters are used in
696                    the byte encoding of utf8, so anything with utf8
697                    encoded characters in will need quoting. Hence strings
698                    with utf8 encoded characters in will end up inside do_utf8
699                    just like before, but now strings with utf8 flag set but
700                    only ascii characters will end up in the unquoted section.
701
702                    There should also be less tests for the (probably currently)
703                    more common doesn't need quoting case.
704                    The code is also smaller (22044 vs 22260) because I've been
705                    able to pull the common logic out to both sides.  */
706                 if (quotekeys || needs_quote(key)) {
707                     if (do_utf8) {
708                         STRLEN ocur = SvCUR(retval);
709                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
710                         nkey = SvPVX(retval) + ocur;
711                     }
712                     else {
713                         nticks = num_q(key, klen);
714                         New(0, nkey_buffer, klen+nticks+3, char);
715                         nkey = nkey_buffer;
716                         nkey[0] = '\'';
717                         if (nticks)
718                             klen += esc_q(nkey+1, key, klen);
719                         else
720                             (void)Copy(key, nkey+1, klen, char);
721                         nkey[++klen] = '\'';
722                         nkey[++klen] = '\0';
723                         nlen = klen;
724                         sv_catpvn(retval, nkey, klen);
725                     }
726                 }
727                 else {
728                     nkey = key;
729                     nlen = klen;
730                     sv_catpvn(retval, nkey, klen);
731                 }
732                 sname = newSVsv(iname);
733                 sv_catpvn(sname, nkey, nlen);
734                 sv_catpvn(sname, "}", 1);
735
736                 sv_catsv(retval, pair);
737                 if (indent >= 2) {
738                     char *extra;
739                     I32 elen = 0;
740                     newapad = newSVsv(apad);
741                     New(0, extra, klen+4+1, char);
742                     while (elen < (klen+4))
743                         extra[elen++] = ' ';
744                     extra[elen] = '\0';
745                     sv_catpvn(newapad, extra, elen);
746                     Safefree(extra);
747                 }
748                 else
749                     newapad = apad;
750
751                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
752                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
753                         freezer, toaster, purity, deepcopy, quotekeys, bless,
754                         maxdepth, sortkeys);
755                 SvREFCNT_dec(sname);
756                 Safefree(nkey_buffer);
757                 if (indent >= 2)
758                     SvREFCNT_dec(newapad);
759             }
760             if (i) {
761                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
762                 sv_catsv(retval, totpad);
763                 sv_catsv(retval, opad);
764                 SvREFCNT_dec(opad);
765             }
766             if (name[0] == '%')
767                 sv_catpvn(retval, ")", 1);
768             else
769                 sv_catpvn(retval, "}", 1);
770             SvREFCNT_dec(iname);
771             SvREFCNT_dec(totpad);
772         }
773         else if (realtype == SVt_PVCV) {
774             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
775             if (purity)
776                 warn("Encountered CODE ref, using dummy placeholder");
777         }
778         else {
779             warn("cannot handle ref type %ld", realtype);
780         }
781
782         if (realpack) {  /* free blessed allocs */
783             if (indent >= 2) {
784                 SvREFCNT_dec(apad);
785                 apad = blesspad;
786             }
787             sv_catpvn(retval, ", '", 3);
788             sv_catpvn(retval, realpack, strlen(realpack));
789             sv_catpvn(retval, "' )", 3);
790             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
791                 sv_catpvn(retval, "->", 2);
792                 sv_catsv(retval, toaster);
793                 sv_catpvn(retval, "()", 2);
794             }
795         }
796         SvREFCNT_dec(ipad);
797         (*levelp)--;
798     }
799     else {
800         STRLEN i;
801         
802         if (namelen) {
803 #ifdef DD_USE_OLD_ID_FORMAT
804             idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(val));
805 #else
806             id_buffer = PTR2UV(val);
807             idlen = sizeof(id_buffer);
808 #endif
809             if ((svp = hv_fetch(seenhv, id, idlen, FALSE)) &&
810                 (sv = *svp) && SvROK(sv) &&
811                 (seenentry = (AV*)SvRV(sv)))
812             {
813                 SV *othername;
814                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
815                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
816                 {
817                     sv_catpvn(retval, "${", 2);
818                     sv_catsv(retval, othername);
819                     sv_catpvn(retval, "}", 1);
820                     return 1;
821                 }
822             }
823             else if (val != &PL_sv_undef) {
824                 SV * const namesv = newSVpvn("\\", 1);
825                 sv_catpvn(namesv, name, namelen);
826                 seenentry = newAV();
827                 av_push(seenentry, namesv);
828                 av_push(seenentry, newRV_inc(val));
829                 (void)hv_store(seenhv, id, idlen, newRV_inc((SV*)seenentry), 0);
830                 SvREFCNT_dec(seenentry);
831             }
832         }
833
834         if (DD_is_integer(val)) {
835             STRLEN len;
836             if (SvIsUV(val))
837               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"UVuf, SvUV(val));
838             else
839               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
840             if (SvPOK(val)) {
841               /* Need to check to see if this is a string such as " 0".
842                  I'm assuming from sprintf isn't going to clash with utf8.
843                  Is this valid on EBCDIC?  */
844               STRLEN pvlen;
845               const char * const pv = SvPV(val, pvlen);
846               if (pvlen != len || memNE(pv, tmpbuf, len))
847                 goto integer_came_from_string;
848             }
849             if (len > 10) {
850               /* Looks like we're on a 64 bit system.  Make it a string so that
851                  if a 32 bit system reads the number it will cope better.  */
852               sv_catpvf(retval, "'%s'", tmpbuf);
853             } else
854               sv_catpvn(retval, tmpbuf, len);
855         }
856         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
857             c = SvPV(val, i);
858             ++c; --i;                   /* just get the name */
859             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
860                 c += 4;
861                 i -= 4;
862             }
863             if (needs_quote(c)) {
864                 sv_grow(retval, SvCUR(retval)+6+2*i);
865                 r = SvPVX(retval)+SvCUR(retval);
866                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
867                 i += esc_q(r+3, c, i);
868                 i += 3;
869                 r[i++] = '\''; r[i++] = '}';
870                 r[i] = '\0';
871             }
872             else {
873                 sv_grow(retval, SvCUR(retval)+i+2);
874                 r = SvPVX(retval)+SvCUR(retval);
875                 r[0] = '*'; strcpy(r+1, c);
876                 i++;
877             }
878             SvCUR_set(retval, SvCUR(retval)+i);
879
880             if (purity) {
881                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
882                 static const STRLEN sizes[] = { 8, 7, 6 };
883                 SV *e;
884                 SV * const nname = newSVpvn("", 0);
885                 SV * const newapad = newSVpvn("", 0);
886                 GV * const gv = (GV*)val;
887                 I32 j;
888                 
889                 for (j=0; j<3; j++) {
890                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
891                     if (!e)
892                         continue;
893                     if (j == 0 && !SvOK(e))
894                         continue;
895
896                     {
897                         I32 nlevel = 0;
898                         SV *postentry = newSVpvn(r,i);
899                         
900                         sv_setsv(nname, postentry);
901                         sv_catpvn(nname, entries[j], sizes[j]);
902                         sv_catpvn(postentry, " = ", 3);
903                         av_push(postav, postentry);
904                         e = newRV_inc(e);
905                         
906                         SvCUR_set(newapad, 0);
907                         if (indent >= 2)
908                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
909                         
910                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
911                                 seenhv, postav, &nlevel, indent, pad, xpad,
912                                 newapad, sep, pair, freezer, toaster, purity,
913                                 deepcopy, quotekeys, bless, maxdepth, 
914                                 sortkeys);
915                         SvREFCNT_dec(e);
916                     }
917                 }
918                 
919                 SvREFCNT_dec(newapad);
920                 SvREFCNT_dec(nname);
921             }
922         }
923         else if (val == &PL_sv_undef || !SvOK(val)) {
924             sv_catpvn(retval, "undef", 5);
925         }
926         else {
927         integer_came_from_string:
928             c = SvPV(val, i);
929             if (DO_UTF8(val))
930                 i += esc_q_utf8(aTHX_ retval, c, i);
931             else {
932                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
933                 r = SvPVX(retval) + SvCUR(retval);
934                 r[0] = '\'';
935                 i += esc_q(r+1, c, i);
936                 ++i;
937                 r[i++] = '\'';
938                 r[i] = '\0';
939                 SvCUR_set(retval, SvCUR(retval)+i);
940             }
941         }
942     }
943
944     if (idlen) {
945         if (deepcopy)
946             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
947         else if (namelen && seenentry) {
948             SV *mark = *av_fetch(seenentry, 2, TRUE);
949             sv_setiv(mark,1);
950         }
951     }
952     return 1;
953 }
954
955
956 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
957
958 #
959 # This is the exact equivalent of Dump.  Well, almost. The things that are
960 # different as of now (due to Laziness):
961 #   * doesnt do double-quotes yet.
962 #
963
964 void
965 Data_Dumper_Dumpxs(href, ...)
966         SV      *href;
967         PROTOTYPE: $;$$
968         PPCODE:
969         {
970             HV *hv;
971             SV *retval, *valstr;
972             HV *seenhv = NULL;
973             AV *postav, *todumpav, *namesav;
974             I32 level = 0;
975             I32 indent, terse, i, imax, postlen;
976             SV **svp;
977             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
978             SV *freezer, *toaster, *bless, *sortkeys;
979             I32 purity, deepcopy, quotekeys, maxdepth = 0;
980             char tmpbuf[1024];
981             I32 gimme = GIMME;
982
983             if (!SvROK(href)) {         /* call new to get an object first */
984                 if (items < 2)
985                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
986                 
987                 ENTER;
988                 SAVETMPS;
989                 
990                 PUSHMARK(sp);
991                 XPUSHs(href);
992                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
993                 if (items >= 3)
994                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
995                 PUTBACK;
996                 i = perl_call_method("new", G_SCALAR);
997                 SPAGAIN;
998                 if (i)
999                     href = newSVsv(POPs);
1000
1001                 PUTBACK;
1002                 FREETMPS;
1003                 LEAVE;
1004                 if (i)
1005                     (void)sv_2mortal(href);
1006             }
1007
1008             todumpav = namesav = NULL;
1009             seenhv = NULL;
1010             val = pad = xpad = apad = sep = pair = varname
1011                 = freezer = toaster = bless = sortkeys = &PL_sv_undef;
1012             name = sv_newmortal();
1013             indent = 2;
1014             terse = purity = deepcopy = 0;
1015             quotekeys = 1;
1016         
1017             retval = newSVpvn("", 0);
1018             if (SvROK(href)
1019                 && (hv = (HV*)SvRV((SV*)href))
1020                 && SvTYPE(hv) == SVt_PVHV)              {
1021
1022                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
1023                     seenhv = (HV*)SvRV(*svp);
1024                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
1025                     todumpav = (AV*)SvRV(*svp);
1026                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
1027                     namesav = (AV*)SvRV(*svp);
1028                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
1029                     indent = SvIV(*svp);
1030                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
1031                     purity = SvIV(*svp);
1032                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
1033                     terse = SvTRUE(*svp);
1034 #if 0 /* useqq currently unused */
1035                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
1036                     useqq = SvTRUE(*svp);
1037 #endif
1038                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
1039                     pad = *svp;
1040                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
1041                     xpad = *svp;
1042                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1043                     apad = *svp;
1044                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1045                     sep = *svp;
1046                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1047                     pair = *svp;
1048                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1049                     varname = *svp;
1050                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1051                     freezer = *svp;
1052                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1053                     toaster = *svp;
1054                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1055                     deepcopy = SvTRUE(*svp);
1056                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1057                     quotekeys = SvTRUE(*svp);
1058                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1059                     bless = *svp;
1060                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1061                     maxdepth = SvIV(*svp);
1062                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1063                     sortkeys = *svp;
1064                     if (! SvTRUE(sortkeys))
1065                         sortkeys = NULL;
1066                     else if (! (SvROK(sortkeys) &&
1067                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1068                     {
1069                         /* flag to use qsortsv() for sorting hash keys */       
1070                         sortkeys = &PL_sv_yes; 
1071                     }
1072                 }
1073                 postav = newAV();
1074
1075                 if (todumpav)
1076                     imax = av_len(todumpav);
1077                 else
1078                     imax = -1;
1079                 valstr = newSVpvn("",0);
1080                 for (i = 0; i <= imax; ++i) {
1081                     SV *newapad;
1082                 
1083                     av_clear(postav);
1084                     if ((svp = av_fetch(todumpav, i, FALSE)))
1085                         val = *svp;
1086                     else
1087                         val = &PL_sv_undef;
1088                     if ((svp = av_fetch(namesav, i, TRUE))) {
1089                         sv_setsv(name, *svp);
1090                         if (SvOK(*svp) && !SvPOK(*svp))
1091                             (void)SvPV_nolen_const(name);
1092                     }
1093                     else
1094                         (void)SvOK_off(name);
1095                 
1096                     if (SvPOK(name)) {
1097                         if ((SvPVX_const(name))[0] == '*') {
1098                             if (SvROK(val)) {
1099                                 switch (SvTYPE(SvRV(val))) {
1100                                 case SVt_PVAV:
1101                                     (SvPVX(name))[0] = '@';
1102                                     break;
1103                                 case SVt_PVHV:
1104                                     (SvPVX(name))[0] = '%';
1105                                     break;
1106                                 case SVt_PVCV:
1107                                     (SvPVX(name))[0] = '*';
1108                                     break;
1109                                 default:
1110                                     (SvPVX(name))[0] = '$';
1111                                     break;
1112                                 }
1113                             }
1114                             else
1115                                 (SvPVX(name))[0] = '$';
1116                         }
1117                         else if ((SvPVX_const(name))[0] != '$')
1118                             sv_insert(name, 0, 0, "$", 1);
1119                     }
1120                     else {
1121                         STRLEN nchars;
1122                         sv_setpvn(name, "$", 1);
1123                         sv_catsv(name, varname);
1124                         nchars = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, (IV)(i+1));
1125                         sv_catpvn(name, tmpbuf, nchars);
1126                     }
1127                 
1128                     if (indent >= 2) {
1129                         SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
1130                         newapad = newSVsv(apad);
1131                         sv_catsv(newapad, tmpsv);
1132                         SvREFCNT_dec(tmpsv);
1133                     }
1134                     else
1135                         newapad = apad;
1136                 
1137                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1138                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1139                             freezer, toaster, purity, deepcopy, quotekeys,
1140                             bless, maxdepth, sortkeys);
1141                 
1142                     if (indent >= 2)
1143                         SvREFCNT_dec(newapad);
1144
1145                     postlen = av_len(postav);
1146                     if (postlen >= 0 || !terse) {
1147                         sv_insert(valstr, 0, 0, " = ", 3);
1148                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1149                         sv_catpvn(valstr, ";", 1);
1150                     }
1151                     sv_catsv(retval, pad);
1152                     sv_catsv(retval, valstr);
1153                     sv_catsv(retval, sep);
1154                     if (postlen >= 0) {
1155                         I32 i;
1156                         sv_catsv(retval, pad);
1157                         for (i = 0; i <= postlen; ++i) {
1158                             SV *elem;
1159                             svp = av_fetch(postav, i, FALSE);
1160                             if (svp && (elem = *svp)) {
1161                                 sv_catsv(retval, elem);
1162                                 if (i < postlen) {
1163                                     sv_catpvn(retval, ";", 1);
1164                                     sv_catsv(retval, sep);
1165                                     sv_catsv(retval, pad);
1166                                 }
1167                             }
1168                         }
1169                         sv_catpvn(retval, ";", 1);
1170                             sv_catsv(retval, sep);
1171                     }
1172                     sv_setpvn(valstr, "", 0);
1173                     if (gimme == G_ARRAY) {
1174                         XPUSHs(sv_2mortal(retval));
1175                         if (i < imax)   /* not the last time thro ? */
1176                             retval = newSVpvn("",0);
1177                     }
1178                 }
1179                 SvREFCNT_dec(postav);
1180                 SvREFCNT_dec(valstr);
1181             }
1182             else
1183                 croak("Call to new() method failed to return HASH ref");
1184             if (gimme == G_SCALAR)
1185                 XPUSHs(sv_2mortal(retval));
1186         }