5ed117a63112119efa072cd9693226e6b503c5b4
[p5sagit/p5-mst-13.2.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5 #include "ppport.h"
6
7 #if PERL_VERSION < 6
8 #  define DD_USE_OLD_ID_FORMAT
9 #endif
10
11 static I32 num_q (const char *s, STRLEN slen);
12 static I32 esc_q (char *dest, const char *src, STRLEN slen);
13 static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
14 static I32 needs_quote(register const char *s);
15 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
16 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
17                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
18                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
19                     SV *freezer, SV *toaster,
20                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
21                     I32 maxdepth, SV *sortkeys);
22
23 #ifndef HvNAME_get
24 #define HvNAME_get HvNAME
25 #endif
26
27 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
28
29 # ifdef EBCDIC
30 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
31 # else
32 #  define UNI_TO_NATIVE(ch) (ch)
33 # endif
34
35 UV
36 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
37 {
38     const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
39                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
40     return UNI_TO_NATIVE(uv);
41 }
42
43 # if !defined(PERL_IMPLICIT_CONTEXT)
44 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
45 # else
46 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
47 # endif
48
49 #endif /* PERL_VERSION <= 6 */
50
51 /* Changes in 5.7 series mean that now IOK is only set if scalar is
52    precisely integer but in 5.6 and earlier we need to do a more
53    complex test  */
54 #if PERL_VERSION <= 6
55 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
56 #else
57 #define DD_is_integer(sv) SvIOK(sv)
58 #endif
59
60 /* does a string need to be protected? */
61 static I32
62 needs_quote(register const char *s)
63 {
64 TOP:
65     if (s[0] == ':') {
66         if (*++s) {
67             if (*s++ != ':')
68                 return 1;
69         }
70         else
71             return 1;
72     }
73     if (isIDFIRST(*s)) {
74         while (*++s)
75             if (!isALNUM(*s)) {
76                 if (*s == ':')
77                     goto TOP;
78                 else
79                     return 1;
80             }
81     }
82     else
83         return 1;
84     return 0;
85 }
86
87 /* count the number of "'"s and "\"s in string */
88 static I32
89 num_q(register const char *s, register STRLEN slen)
90 {
91     register I32 ret = 0;
92
93     while (slen > 0) {
94         if (*s == '\'' || *s == '\\')
95             ++ret;
96         ++s;
97         --slen;
98     }
99     return ret;
100 }
101
102
103 /* returns number of chars added to escape "'"s and "\"s in s */
104 /* slen number of characters in s will be escaped */
105 /* destination must be long enough for additional chars */
106 static I32
107 esc_q(register char *d, register const char *s, register STRLEN slen)
108 {
109     register I32 ret = 0;
110
111     while (slen > 0) {
112         switch (*s) {
113         case '\'':
114         case '\\':
115             *d = '\\';
116             ++d; ++ret;
117         default:
118             *d = *s;
119             ++d; ++s; --slen;
120             break;
121         }
122     }
123     return ret;
124 }
125
126 static I32
127 esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
128 {
129     char *r, *rstart;
130     const char *s = src;
131     const char * const send = src + slen;
132     STRLEN j, cur = SvCUR(sv);
133     /* Could count 128-255 and 256+ in two variables, if we want to
134        be like &qquote and make a distinction.  */
135     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
136     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
137     STRLEN backslashes = 0;
138     STRLEN single_quotes = 0;
139     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
140     STRLEN normal = 0;
141
142     /* this will need EBCDICification */
143     for (s = src; s < send; s += UTF8SKIP(s)) {
144         const UV k = utf8_to_uvchr((U8*)s, NULL);
145
146 #ifdef EBCDIC
147         if (!isprint(k) || k > 256) {
148 #else
149         if (k > 127) {
150 #endif
151             /* 4: \x{} then count the number of hex digits.  */
152             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
153 #if UVSIZE == 4
154                 8 /* We may allocate a bit more than the minimum here.  */
155 #else
156                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
157 #endif
158                 );
159         } else if (k == '\\') {
160             backslashes++;
161         } else if (k == '\'') {
162             single_quotes++;
163         } else if (k == '"' || k == '$' || k == '@') {
164             qq_escapables++;
165         } else {
166             normal++;
167         }
168     }
169     if (grow) {
170         /* We have something needing hex. 3 is ""\0 */
171         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
172                 + 2*qq_escapables + normal);
173         rstart = r = SvPVX(sv) + cur;
174
175         *r++ = '"';
176
177         for (s = src; s < send; s += UTF8SKIP(s)) {
178             const UV k = utf8_to_uvchr((U8*)s, NULL);
179
180             if (k == '"' || k == '\\' || k == '$' || k == '@') {
181                 *r++ = '\\';
182                 *r++ = (char)k;
183             }
184             else
185 #ifdef EBCDIC
186               if (isprint(k) && k < 256)
187 #else
188               if (k < 0x80)
189 #endif
190                 *r++ = (char)k;
191             else {
192                 r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
193             }
194         }
195         *r++ = '"';
196     } else {
197         /* Single quotes.  */
198         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
199                 + qq_escapables + normal);
200         rstart = r = SvPVX(sv) + cur;
201         *r++ = '\'';
202         for (s = src; s < send; s ++) {
203             const char k = *s;
204             if (k == '\'' || k == '\\')
205                 *r++ = '\\';
206             *r++ = k;
207         }
208         *r++ = '\'';
209     }
210     *r = '\0';
211     j = r - rstart;
212     SvCUR_set(sv, cur + j);
213
214     return j;
215 }
216
217 /* append a repeated string to an SV */
218 static SV *
219 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
220 {
221     if (!sv)
222         sv = newSVpvn("", 0);
223 #ifdef DEBUGGING
224     else
225         assert(SvTYPE(sv) >= SVt_PV);
226 #endif
227
228     if (n > 0) {
229         SvGROW(sv, len*n + SvCUR(sv) + 1);
230         if (len == 1) {
231             char * const start = SvPVX(sv) + SvCUR(sv);
232             SvCUR_set(sv, SvCUR(sv) + n);
233             start[n] = '\0';
234             while (n > 0)
235                 start[--n] = str[0];
236         }
237         else
238             while (n > 0) {
239                 sv_catpvn(sv, str, len);
240                 --n;
241             }
242     }
243     return sv;
244 }
245
246 /*
247  * This ought to be split into smaller functions. (it is one long function since
248  * it exactly parallels the perl version, which was one long thing for
249  * efficiency raisins.)  Ugggh!
250  */
251 static I32
252 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
253         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
254         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
255         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
256 {
257     char tmpbuf[128];
258     U32 i;
259     char *c, *r, *realpack;
260 #ifdef DD_USE_OLD_ID_FORMAT
261     char id[128];
262 #else
263     UV id_buffer;
264     char *const id = (char *)&id_buffer;
265 #endif
266     SV **svp;
267     SV *sv, *ipad, *ival;
268     SV *blesspad = Nullsv;
269     AV *seenentry = NULL;
270     char *iname;
271     STRLEN inamelen, idlen = 0;
272     U32 realtype;
273
274     if (!val)
275         return 0;
276
277     /* If the ouput buffer has less than some arbitary amount of space
278        remaining, then enlarge it. For the test case (25M of output),
279        *1.1 was slower, *2.0 was the same, so the first guess of 1.5 is
280         deemed to be good enough.  */
281     if (SvTYPE(retval) >= SVt_PV && (SvLEN(retval) - SvCUR(retval)) < 42) {
282         sv_grow(retval, SvCUR(retval) * 3 / 2);
283     }
284
285     realtype = SvTYPE(val);
286
287     if (SvGMAGICAL(val))
288         mg_get(val);
289     if (SvROK(val)) {
290
291         /* If a freeze method is provided and the object has it, call
292            it.  Warn on errors. */
293         if (SvOBJECT(SvRV(val)) && freezer &&
294             SvPOK(freezer) && SvCUR(freezer) &&
295             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
296                          SvCUR(freezer), -1) != NULL)
297         {
298             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
299             XPUSHs(val); PUTBACK;
300             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
301             SPAGAIN;
302             if (SvTRUE(ERRSV))
303                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
304             PUTBACK; FREETMPS; LEAVE;
305         }
306         
307         ival = SvRV(val);
308         realtype = SvTYPE(ival);
309 #ifdef DD_USE_OLD_ID_FORMAT
310         idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(ival));
311 #else
312         id_buffer = PTR2UV(ival);
313         idlen = sizeof(id_buffer);
314 #endif
315         if (SvOBJECT(ival))
316             realpack = HvNAME_get(SvSTASH(ival));
317         else
318             realpack = Nullch;
319
320         /* if it has a name, we need to either look it up, or keep a tab
321          * on it so we know when we hit it later
322          */
323         if (namelen) {
324             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
325                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
326             {
327                 SV *othername;
328                 if ((svp = av_fetch(seenentry, 0, FALSE))
329                     && (othername = *svp))
330                 {
331                     if (purity && *levelp > 0) {
332                         SV *postentry;
333                         
334                         if (realtype == SVt_PVHV)
335                             sv_catpvn(retval, "{}", 2);
336                         else if (realtype == SVt_PVAV)
337                             sv_catpvn(retval, "[]", 2);
338                         else
339                             sv_catpvn(retval, "do{my $o}", 9);
340                         postentry = newSVpvn(name, namelen);
341                         sv_catpvn(postentry, " = ", 3);
342                         sv_catsv(postentry, othername);
343                         av_push(postav, postentry);
344                     }
345                     else {
346                         if (name[0] == '@' || name[0] == '%') {
347                             if ((SvPVX_const(othername))[0] == '\\' &&
348                                 (SvPVX_const(othername))[1] == name[0]) {
349                                 sv_catpvn(retval, SvPVX_const(othername)+1,
350                                           SvCUR(othername)-1);
351                             }
352                             else {
353                                 sv_catpvn(retval, name, 1);
354                                 sv_catpvn(retval, "{", 1);
355                                 sv_catsv(retval, othername);
356                                 sv_catpvn(retval, "}", 1);
357                             }
358                         }
359                         else
360                             sv_catsv(retval, othername);
361                     }
362                     return 1;
363                 }
364                 else {
365 #ifdef DD_USE_OLD_ID_FORMAT
366                     warn("ref name not found for %s", id);
367 #else
368                     warn("ref name not found for 0x%"UVxf, PTR2UV(ival));
369 #endif
370                     return 0;
371                 }
372             }
373             else {   /* store our name and continue */
374                 SV *namesv;
375                 if (name[0] == '@' || name[0] == '%') {
376                     namesv = newSVpvn("\\", 1);
377                     sv_catpvn(namesv, name, namelen);
378                 }
379                 else if (realtype == SVt_PVCV && name[0] == '*') {
380                     namesv = newSVpvn("\\", 2);
381                     sv_catpvn(namesv, name, namelen);
382                     (SvPVX(namesv))[1] = '&';
383                 }
384                 else
385                     namesv = newSVpvn(name, namelen);
386                 seenentry = newAV();
387                 av_push(seenentry, namesv);
388                 (void)SvREFCNT_inc(val);
389                 av_push(seenentry, val);
390                 (void)hv_store(seenhv, id, idlen,
391                                newRV_inc((SV*)seenentry), 0);
392                 SvREFCNT_dec(seenentry);
393             }
394         }
395
396         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
397             STRLEN rlen;
398             const char *rval = SvPV(val, rlen);
399             const char *slash = strchr(rval, '/');
400             sv_catpvn(retval, "qr/", 3);
401             while (slash) {
402                 sv_catpvn(retval, rval, slash-rval);
403                 sv_catpvn(retval, "\\/", 2);
404                 rlen -= slash-rval+1;
405                 rval = slash+1;
406                 slash = strchr(rval, '/');
407             }
408             sv_catpvn(retval, rval, rlen);
409             sv_catpvn(retval, "/", 1);
410             return 1;
411         }
412
413         /* If purity is not set and maxdepth is set, then check depth:
414          * if we have reached maximum depth, return the string
415          * representation of the thing we are currently examining
416          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
417          */
418         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
419             STRLEN vallen;
420             const char * const valstr = SvPV(val,vallen);
421             sv_catpvn(retval, "'", 1);
422             sv_catpvn(retval, valstr, vallen);
423             sv_catpvn(retval, "'", 1);
424             return 1;
425         }
426
427         if (realpack) {                         /* we have a blessed ref */
428             STRLEN blesslen;
429             const char * const blessstr = SvPV(bless, blesslen);
430             sv_catpvn(retval, blessstr, blesslen);
431             sv_catpvn(retval, "( ", 2);
432             if (indent >= 2) {
433                 blesspad = apad;
434                 apad = newSVsv(apad);
435                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
436             }
437         }
438
439         (*levelp)++;
440         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
441
442         if (realtype <= SVt_PVBM) {                          /* scalar ref */
443             SV * const namesv = newSVpvn("${", 2);
444             sv_catpvn(namesv, name, namelen);
445             sv_catpvn(namesv, "}", 1);
446             if (realpack) {                                  /* blessed */
447                 sv_catpvn(retval, "do{\\(my $o = ", 13);
448                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
449                         postav, levelp, indent, pad, xpad, apad, sep, pair,
450                         freezer, toaster, purity, deepcopy, quotekeys, bless,
451                         maxdepth, sortkeys);
452                 sv_catpvn(retval, ")}", 2);
453             }                                                /* plain */
454             else {
455                 sv_catpvn(retval, "\\", 1);
456                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
457                         postav, levelp, indent, pad, xpad, apad, sep, pair,
458                         freezer, toaster, purity, deepcopy, quotekeys, bless,
459                         maxdepth, sortkeys);
460             }
461             SvREFCNT_dec(namesv);
462         }
463         else if (realtype == SVt_PVGV) {                     /* glob ref */
464             SV * const namesv = newSVpvn("*{", 2);
465             sv_catpvn(namesv, name, namelen);
466             sv_catpvn(namesv, "}", 1);
467             sv_catpvn(retval, "\\", 1);
468             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
469                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
470                     freezer, toaster, purity, deepcopy, quotekeys, bless,
471                     maxdepth, sortkeys);
472             SvREFCNT_dec(namesv);
473         }
474         else if (realtype == SVt_PVAV) {
475             SV *totpad;
476             I32 ix = 0;
477             const I32 ixmax = av_len((AV *)ival);
478         
479             SV * const ixsv = newSViv(0);
480             /* allowing for a 24 char wide array index */
481             New(0, iname, namelen+28, char);
482             (void)strcpy(iname, name);
483             inamelen = namelen;
484             if (name[0] == '@') {
485                 sv_catpvn(retval, "(", 1);
486                 iname[0] = '$';
487             }
488             else {
489                 sv_catpvn(retval, "[", 1);
490                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
491                 /*if (namelen > 0
492                     && name[namelen-1] != ']' && name[namelen-1] != '}'
493                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
494                 if ((namelen > 0
495                      && name[namelen-1] != ']' && name[namelen-1] != '}')
496                     || (namelen > 4
497                         && (name[1] == '{'
498                             || (name[0] == '\\' && name[2] == '{'))))
499                 {
500                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
501                     iname[inamelen] = '\0';
502                 }
503             }
504             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
505                 (instr(iname+inamelen-8, "{SCALAR}") ||
506                  instr(iname+inamelen-7, "{ARRAY}") ||
507                  instr(iname+inamelen-6, "{HASH}"))) {
508                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
509             }
510             iname[inamelen++] = '['; iname[inamelen] = '\0';
511             totpad = newSVsv(sep);
512             sv_catsv(totpad, pad);
513             sv_catsv(totpad, apad);
514
515             for (ix = 0; ix <= ixmax; ++ix) {
516                 STRLEN ilen;
517                 SV *elem;
518                 svp = av_fetch((AV*)ival, ix, FALSE);
519                 if (svp)
520                     elem = *svp;
521                 else
522                     elem = &PL_sv_undef;
523                 
524                 ilen = inamelen;
525                 sv_setiv(ixsv, ix);
526                 ilen = ilen + my_sprintf(iname+ilen, "%"IVdf, (IV)ix);
527                 iname[ilen++] = ']'; iname[ilen] = '\0';
528                 if (indent >= 3) {
529                     sv_catsv(retval, totpad);
530                     sv_catsv(retval, ipad);
531                     sv_catpvn(retval, "#", 1);
532                     sv_catsv(retval, ixsv);
533                 }
534                 sv_catsv(retval, totpad);
535                 sv_catsv(retval, ipad);
536                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
537                         levelp, indent, pad, xpad, apad, sep, pair,
538                         freezer, toaster, purity, deepcopy, quotekeys, bless,
539                         maxdepth, sortkeys);
540                 if (ix < ixmax)
541                     sv_catpvn(retval, ",", 1);
542             }
543             if (ixmax >= 0) {
544                 SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
545                 sv_catsv(retval, totpad);
546                 sv_catsv(retval, opad);
547                 SvREFCNT_dec(opad);
548             }
549             if (name[0] == '@')
550                 sv_catpvn(retval, ")", 1);
551             else
552                 sv_catpvn(retval, "]", 1);
553             SvREFCNT_dec(ixsv);
554             SvREFCNT_dec(totpad);
555             Safefree(iname);
556         }
557         else if (realtype == SVt_PVHV) {
558             SV *totpad, *newapad;
559             SV *sname;
560             HE *entry;
561             char *key;
562             I32 klen;
563             SV *hval;
564             AV *keys = NULL;
565         
566             SV * const iname = newSVpvn(name, namelen);
567             if (name[0] == '%') {
568                 sv_catpvn(retval, "(", 1);
569                 (SvPVX(iname))[0] = '$';
570             }
571             else {
572                 sv_catpvn(retval, "{", 1);
573                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
574                 if ((namelen > 0
575                      && name[namelen-1] != ']' && name[namelen-1] != '}')
576                     || (namelen > 4
577                         && (name[1] == '{'
578                             || (name[0] == '\\' && name[2] == '{'))))
579                 {
580                     sv_catpvn(iname, "->", 2);
581                 }
582             }
583             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
584                 (instr(name+namelen-8, "{SCALAR}") ||
585                  instr(name+namelen-7, "{ARRAY}") ||
586                  instr(name+namelen-6, "{HASH}"))) {
587                 sv_catpvn(iname, "->", 2);
588             }
589             sv_catpvn(iname, "{", 1);
590             totpad = newSVsv(sep);
591             sv_catsv(totpad, pad);
592             sv_catsv(totpad, apad);
593         
594             /* If requested, get a sorted/filtered array of hash keys */
595             if (sortkeys) {
596                 if (sortkeys == &PL_sv_yes) {
597 #if PERL_VERSION < 8
598                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
599 #else
600                     keys = newAV();
601                     (void)hv_iterinit((HV*)ival);
602                     while ((entry = hv_iternext((HV*)ival))) {
603                         sv = hv_iterkeysv(entry);
604                         SvREFCNT_inc(sv);
605                         av_push(keys, sv);
606                     }
607 # ifdef USE_LOCALE_NUMERIC
608                     sortsv(AvARRAY(keys), 
609                            av_len(keys)+1, 
610                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
611 # else
612                     sortsv(AvARRAY(keys), 
613                            av_len(keys)+1, 
614                            Perl_sv_cmp);
615 # endif
616 #endif
617                 }
618                 if (sortkeys != &PL_sv_yes) {
619                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
620                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
621                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
622                     SPAGAIN;
623                     if (i) {
624                         sv = POPs;
625                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
626                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
627                     }
628                     if (! keys)
629                         warn("Sortkeys subroutine did not return ARRAYREF\n");
630                     PUTBACK; FREETMPS; LEAVE;
631                 }
632                 if (keys)
633                     sv_2mortal((SV*)keys);
634             }
635             else
636                 (void)hv_iterinit((HV*)ival);
637
638             /* foreach (keys %hash) */
639             for (i = 0; 1; i++) {
640                 char *nkey;
641                 char *nkey_buffer = NULL;
642                 I32 nticks = 0;
643                 SV* keysv;
644                 STRLEN keylen;
645                 I32 nlen;
646                 bool do_utf8 = FALSE;
647
648                if (sortkeys) {
649                    if (!(keys && (I32)i <= av_len(keys))) break;
650                } else {
651                    if (!(entry = hv_iternext((HV *)ival))) break;
652                }
653
654                 if (i)
655                     sv_catpvn(retval, ",", 1);
656
657                 if (sortkeys) {
658                     char *key;
659                     svp = av_fetch(keys, i, FALSE);
660                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
661                     key = SvPV(keysv, keylen);
662                     svp = hv_fetch((HV*)ival, key,
663                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
664                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
665                 }
666                 else {
667                     keysv = hv_iterkeysv(entry);
668                     hval = hv_iterval((HV*)ival, entry);
669                 }
670
671                 key = SvPV(keysv, keylen);
672                 do_utf8 = DO_UTF8(keysv);
673                 klen = keylen;
674
675                 sv_catsv(retval, totpad);
676                 sv_catsv(retval, ipad);
677                 /* old logic was first to check utf8 flag, and if utf8 always
678                    call esc_q_utf8.  This caused test to break under -Mutf8,
679                    because there even strings like 'c' have utf8 flag on.
680                    Hence with quotekeys == 0 the XS code would still '' quote
681                    them based on flags, whereas the perl code would not,
682                    based on regexps.
683                    The perl code is correct.
684                    needs_quote() decides that anything that isn't a valid
685                    perl identifier needs to be quoted, hence only correctly
686                    formed strings with no characters outside [A-Za-z0-9_:]
687                    won't need quoting.  None of those characters are used in
688                    the byte encoding of utf8, so anything with utf8
689                    encoded characters in will need quoting. Hence strings
690                    with utf8 encoded characters in will end up inside do_utf8
691                    just like before, but now strings with utf8 flag set but
692                    only ascii characters will end up in the unquoted section.
693
694                    There should also be less tests for the (probably currently)
695                    more common doesn't need quoting case.
696                    The code is also smaller (22044 vs 22260) because I've been
697                    able to pull the common logic out to both sides.  */
698                 if (quotekeys || needs_quote(key)) {
699                     if (do_utf8) {
700                         STRLEN ocur = SvCUR(retval);
701                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
702                         nkey = SvPVX(retval) + ocur;
703                     }
704                     else {
705                         nticks = num_q(key, klen);
706                         New(0, nkey_buffer, klen+nticks+3, char);
707                         nkey = nkey_buffer;
708                         nkey[0] = '\'';
709                         if (nticks)
710                             klen += esc_q(nkey+1, key, klen);
711                         else
712                             (void)Copy(key, nkey+1, klen, char);
713                         nkey[++klen] = '\'';
714                         nkey[++klen] = '\0';
715                         nlen = klen;
716                         sv_catpvn(retval, nkey, klen);
717                     }
718                 }
719                 else {
720                     nkey = key;
721                     nlen = klen;
722                     sv_catpvn(retval, nkey, klen);
723                 }
724                 sname = newSVsv(iname);
725                 sv_catpvn(sname, nkey, nlen);
726                 sv_catpvn(sname, "}", 1);
727
728                 sv_catsv(retval, pair);
729                 if (indent >= 2) {
730                     char *extra;
731                     I32 elen = 0;
732                     newapad = newSVsv(apad);
733                     New(0, extra, klen+4+1, char);
734                     while (elen < (klen+4))
735                         extra[elen++] = ' ';
736                     extra[elen] = '\0';
737                     sv_catpvn(newapad, extra, elen);
738                     Safefree(extra);
739                 }
740                 else
741                     newapad = apad;
742
743                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
744                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
745                         freezer, toaster, purity, deepcopy, quotekeys, bless,
746                         maxdepth, sortkeys);
747                 SvREFCNT_dec(sname);
748                 Safefree(nkey_buffer);
749                 if (indent >= 2)
750                     SvREFCNT_dec(newapad);
751             }
752             if (i) {
753                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
754                 sv_catsv(retval, totpad);
755                 sv_catsv(retval, opad);
756                 SvREFCNT_dec(opad);
757             }
758             if (name[0] == '%')
759                 sv_catpvn(retval, ")", 1);
760             else
761                 sv_catpvn(retval, "}", 1);
762             SvREFCNT_dec(iname);
763             SvREFCNT_dec(totpad);
764         }
765         else if (realtype == SVt_PVCV) {
766             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
767             if (purity)
768                 warn("Encountered CODE ref, using dummy placeholder");
769         }
770         else {
771             warn("cannot handle ref type %ld", realtype);
772         }
773
774         if (realpack) {  /* free blessed allocs */
775             if (indent >= 2) {
776                 SvREFCNT_dec(apad);
777                 apad = blesspad;
778             }
779             sv_catpvn(retval, ", '", 3);
780             sv_catpvn(retval, realpack, strlen(realpack));
781             sv_catpvn(retval, "' )", 3);
782             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
783                 sv_catpvn(retval, "->", 2);
784                 sv_catsv(retval, toaster);
785                 sv_catpvn(retval, "()", 2);
786             }
787         }
788         SvREFCNT_dec(ipad);
789         (*levelp)--;
790     }
791     else {
792         STRLEN i;
793         
794         if (namelen) {
795 #ifdef DD_USE_OLD_ID_FORMAT
796             idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(val));
797 #else
798             id_buffer = PTR2UV(val);
799             idlen = sizeof(id_buffer);
800 #endif
801             if ((svp = hv_fetch(seenhv, id, idlen, FALSE)) &&
802                 (sv = *svp) && SvROK(sv) &&
803                 (seenentry = (AV*)SvRV(sv)))
804             {
805                 SV *othername;
806                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
807                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
808                 {
809                     sv_catpvn(retval, "${", 2);
810                     sv_catsv(retval, othername);
811                     sv_catpvn(retval, "}", 1);
812                     return 1;
813                 }
814             }
815             else if (val != &PL_sv_undef) {
816                 SV * const namesv = newSVpvn("\\", 1);
817                 sv_catpvn(namesv, name, namelen);
818                 seenentry = newAV();
819                 av_push(seenentry, namesv);
820                 av_push(seenentry, newRV_inc(val));
821                 (void)hv_store(seenhv, id, idlen, newRV_inc((SV*)seenentry), 0);
822                 SvREFCNT_dec(seenentry);
823             }
824         }
825
826         if (DD_is_integer(val)) {
827             STRLEN len;
828             if (SvIsUV(val))
829               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"UVuf, SvUV(val));
830             else
831               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
832             if (SvPOK(val)) {
833               /* Need to check to see if this is a string such as " 0".
834                  I'm assuming from sprintf isn't going to clash with utf8.
835                  Is this valid on EBCDIC?  */
836               STRLEN pvlen;
837               const char * const pv = SvPV(val, pvlen);
838               if (pvlen != len || memNE(pv, tmpbuf, len))
839                 goto integer_came_from_string;
840             }
841             if (len > 10) {
842               /* Looks like we're on a 64 bit system.  Make it a string so that
843                  if a 32 bit system reads the number it will cope better.  */
844               sv_catpvf(retval, "'%s'", tmpbuf);
845             } else
846               sv_catpvn(retval, tmpbuf, len);
847         }
848         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
849             c = SvPV(val, i);
850             ++c; --i;                   /* just get the name */
851             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
852                 c += 4;
853                 i -= 4;
854             }
855             if (needs_quote(c)) {
856                 sv_grow(retval, SvCUR(retval)+6+2*i);
857                 r = SvPVX(retval)+SvCUR(retval);
858                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
859                 i += esc_q(r+3, c, i);
860                 i += 3;
861                 r[i++] = '\''; r[i++] = '}';
862                 r[i] = '\0';
863             }
864             else {
865                 sv_grow(retval, SvCUR(retval)+i+2);
866                 r = SvPVX(retval)+SvCUR(retval);
867                 r[0] = '*'; strcpy(r+1, c);
868                 i++;
869             }
870             SvCUR_set(retval, SvCUR(retval)+i);
871
872             if (purity) {
873                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
874                 static const STRLEN sizes[] = { 8, 7, 6 };
875                 SV *e;
876                 SV * const nname = newSVpvn("", 0);
877                 SV * const newapad = newSVpvn("", 0);
878                 GV * const gv = (GV*)val;
879                 I32 j;
880                 
881                 for (j=0; j<3; j++) {
882                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
883                     if (!e)
884                         continue;
885                     if (j == 0 && !SvOK(e))
886                         continue;
887
888                     {
889                         I32 nlevel = 0;
890                         SV *postentry = newSVpvn(r,i);
891                         
892                         sv_setsv(nname, postentry);
893                         sv_catpvn(nname, entries[j], sizes[j]);
894                         sv_catpvn(postentry, " = ", 3);
895                         av_push(postav, postentry);
896                         e = newRV_inc(e);
897                         
898                         SvCUR_set(newapad, 0);
899                         if (indent >= 2)
900                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
901                         
902                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
903                                 seenhv, postav, &nlevel, indent, pad, xpad,
904                                 newapad, sep, pair, freezer, toaster, purity,
905                                 deepcopy, quotekeys, bless, maxdepth, 
906                                 sortkeys);
907                         SvREFCNT_dec(e);
908                     }
909                 }
910                 
911                 SvREFCNT_dec(newapad);
912                 SvREFCNT_dec(nname);
913             }
914         }
915         else if (val == &PL_sv_undef || !SvOK(val)) {
916             sv_catpvn(retval, "undef", 5);
917         }
918         else {
919         integer_came_from_string:
920             c = SvPV(val, i);
921             if (DO_UTF8(val))
922                 i += esc_q_utf8(aTHX_ retval, c, i);
923             else {
924                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
925                 r = SvPVX(retval) + SvCUR(retval);
926                 r[0] = '\'';
927                 i += esc_q(r+1, c, i);
928                 ++i;
929                 r[i++] = '\'';
930                 r[i] = '\0';
931                 SvCUR_set(retval, SvCUR(retval)+i);
932             }
933         }
934     }
935
936     if (idlen) {
937         if (deepcopy)
938             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
939         else if (namelen && seenentry) {
940             SV *mark = *av_fetch(seenentry, 2, TRUE);
941             sv_setiv(mark,1);
942         }
943     }
944     return 1;
945 }
946
947
948 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
949
950 #
951 # This is the exact equivalent of Dump.  Well, almost. The things that are
952 # different as of now (due to Laziness):
953 #   * doesnt do double-quotes yet.
954 #
955
956 void
957 Data_Dumper_Dumpxs(href, ...)
958         SV      *href;
959         PROTOTYPE: $;$$
960         PPCODE:
961         {
962             HV *hv;
963             SV *retval, *valstr;
964             HV *seenhv = NULL;
965             AV *postav, *todumpav, *namesav;
966             I32 level = 0;
967             I32 indent, terse, i, imax, postlen;
968             SV **svp;
969             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
970             SV *freezer, *toaster, *bless, *sortkeys;
971             I32 purity, deepcopy, quotekeys, maxdepth = 0;
972             char tmpbuf[1024];
973             I32 gimme = GIMME;
974
975             if (!SvROK(href)) {         /* call new to get an object first */
976                 if (items < 2)
977                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
978                 
979                 ENTER;
980                 SAVETMPS;
981                 
982                 PUSHMARK(sp);
983                 XPUSHs(href);
984                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
985                 if (items >= 3)
986                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
987                 PUTBACK;
988                 i = perl_call_method("new", G_SCALAR);
989                 SPAGAIN;
990                 if (i)
991                     href = newSVsv(POPs);
992
993                 PUTBACK;
994                 FREETMPS;
995                 LEAVE;
996                 if (i)
997                     (void)sv_2mortal(href);
998             }
999
1000             todumpav = namesav = NULL;
1001             seenhv = NULL;
1002             val = pad = xpad = apad = sep = pair = varname
1003                 = freezer = toaster = bless = sortkeys = &PL_sv_undef;
1004             name = sv_newmortal();
1005             indent = 2;
1006             terse = purity = deepcopy = 0;
1007             quotekeys = 1;
1008         
1009             retval = newSVpvn("", 0);
1010             if (SvROK(href)
1011                 && (hv = (HV*)SvRV((SV*)href))
1012                 && SvTYPE(hv) == SVt_PVHV)              {
1013
1014                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
1015                     seenhv = (HV*)SvRV(*svp);
1016                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
1017                     todumpav = (AV*)SvRV(*svp);
1018                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
1019                     namesav = (AV*)SvRV(*svp);
1020                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
1021                     indent = SvIV(*svp);
1022                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
1023                     purity = SvIV(*svp);
1024                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
1025                     terse = SvTRUE(*svp);
1026 #if 0 /* useqq currently unused */
1027                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
1028                     useqq = SvTRUE(*svp);
1029 #endif
1030                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
1031                     pad = *svp;
1032                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
1033                     xpad = *svp;
1034                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1035                     apad = *svp;
1036                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1037                     sep = *svp;
1038                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1039                     pair = *svp;
1040                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1041                     varname = *svp;
1042                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1043                     freezer = *svp;
1044                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1045                     toaster = *svp;
1046                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1047                     deepcopy = SvTRUE(*svp);
1048                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1049                     quotekeys = SvTRUE(*svp);
1050                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1051                     bless = *svp;
1052                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1053                     maxdepth = SvIV(*svp);
1054                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1055                     sortkeys = *svp;
1056                     if (! SvTRUE(sortkeys))
1057                         sortkeys = NULL;
1058                     else if (! (SvROK(sortkeys) &&
1059                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1060                     {
1061                         /* flag to use qsortsv() for sorting hash keys */       
1062                         sortkeys = &PL_sv_yes; 
1063                     }
1064                 }
1065                 postav = newAV();
1066
1067                 if (todumpav)
1068                     imax = av_len(todumpav);
1069                 else
1070                     imax = -1;
1071                 valstr = newSVpvn("",0);
1072                 for (i = 0; i <= imax; ++i) {
1073                     SV *newapad;
1074                 
1075                     av_clear(postav);
1076                     if ((svp = av_fetch(todumpav, i, FALSE)))
1077                         val = *svp;
1078                     else
1079                         val = &PL_sv_undef;
1080                     if ((svp = av_fetch(namesav, i, TRUE))) {
1081                         sv_setsv(name, *svp);
1082                         if (SvOK(*svp) && !SvPOK(*svp))
1083                             (void)SvPV_nolen_const(name);
1084                     }
1085                     else
1086                         (void)SvOK_off(name);
1087                 
1088                     if (SvPOK(name)) {
1089                         if ((SvPVX_const(name))[0] == '*') {
1090                             if (SvROK(val)) {
1091                                 switch (SvTYPE(SvRV(val))) {
1092                                 case SVt_PVAV:
1093                                     (SvPVX(name))[0] = '@';
1094                                     break;
1095                                 case SVt_PVHV:
1096                                     (SvPVX(name))[0] = '%';
1097                                     break;
1098                                 case SVt_PVCV:
1099                                     (SvPVX(name))[0] = '*';
1100                                     break;
1101                                 default:
1102                                     (SvPVX(name))[0] = '$';
1103                                     break;
1104                                 }
1105                             }
1106                             else
1107                                 (SvPVX(name))[0] = '$';
1108                         }
1109                         else if ((SvPVX_const(name))[0] != '$')
1110                             sv_insert(name, 0, 0, "$", 1);
1111                     }
1112                     else {
1113                         STRLEN nchars;
1114                         sv_setpvn(name, "$", 1);
1115                         sv_catsv(name, varname);
1116                         nchars = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, (IV)(i+1));
1117                         sv_catpvn(name, tmpbuf, nchars);
1118                     }
1119                 
1120                     if (indent >= 2) {
1121                         SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
1122                         newapad = newSVsv(apad);
1123                         sv_catsv(newapad, tmpsv);
1124                         SvREFCNT_dec(tmpsv);
1125                     }
1126                     else
1127                         newapad = apad;
1128                 
1129                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1130                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1131                             freezer, toaster, purity, deepcopy, quotekeys,
1132                             bless, maxdepth, sortkeys);
1133                 
1134                     if (indent >= 2)
1135                         SvREFCNT_dec(newapad);
1136
1137                     postlen = av_len(postav);
1138                     if (postlen >= 0 || !terse) {
1139                         sv_insert(valstr, 0, 0, " = ", 3);
1140                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1141                         sv_catpvn(valstr, ";", 1);
1142                     }
1143                     sv_catsv(retval, pad);
1144                     sv_catsv(retval, valstr);
1145                     sv_catsv(retval, sep);
1146                     if (postlen >= 0) {
1147                         I32 i;
1148                         sv_catsv(retval, pad);
1149                         for (i = 0; i <= postlen; ++i) {
1150                             SV *elem;
1151                             svp = av_fetch(postav, i, FALSE);
1152                             if (svp && (elem = *svp)) {
1153                                 sv_catsv(retval, elem);
1154                                 if (i < postlen) {
1155                                     sv_catpvn(retval, ";", 1);
1156                                     sv_catsv(retval, sep);
1157                                     sv_catsv(retval, pad);
1158                                 }
1159                             }
1160                         }
1161                         sv_catpvn(retval, ";", 1);
1162                             sv_catsv(retval, sep);
1163                     }
1164                     sv_setpvn(valstr, "", 0);
1165                     if (gimme == G_ARRAY) {
1166                         XPUSHs(sv_2mortal(retval));
1167                         if (i < imax)   /* not the last time thro ? */
1168                             retval = newSVpvn("",0);
1169                     }
1170                 }
1171                 SvREFCNT_dec(postav);
1172                 SvREFCNT_dec(valstr);
1173             }
1174             else
1175                 croak("Call to new() method failed to return HASH ref");
1176             if (gimme == G_SCALAR)
1177                 XPUSHs(sv_2mortal(retval));
1178         }