Use packed addresses for the seen tracking hash, rather than
[p5sagit/p5-mst-13.2.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5 #include "ppport.h"
6
7 #if PERL_VERSION < 6
8 #  define DD_USE_OLD_ID_FORMAT
9 #endif
10
11 static I32 num_q (const char *s, STRLEN slen);
12 static I32 esc_q (char *dest, const char *src, STRLEN slen);
13 static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
14 static I32 needs_quote(register const char *s);
15 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
16 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
17                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
18                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
19                     SV *freezer, SV *toaster,
20                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
21                     I32 maxdepth, SV *sortkeys);
22
23 #ifndef HvNAME_get
24 #define HvNAME_get HvNAME
25 #endif
26
27 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
28
29 # ifdef EBCDIC
30 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
31 # else
32 #  define UNI_TO_NATIVE(ch) (ch)
33 # endif
34
35 UV
36 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
37 {
38     const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
39                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
40     return UNI_TO_NATIVE(uv);
41 }
42
43 # if !defined(PERL_IMPLICIT_CONTEXT)
44 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
45 # else
46 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
47 # endif
48
49 #endif /* PERL_VERSION <= 6 */
50
51 /* Changes in 5.7 series mean that now IOK is only set if scalar is
52    precisely integer but in 5.6 and earlier we need to do a more
53    complex test  */
54 #if PERL_VERSION <= 6
55 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
56 #else
57 #define DD_is_integer(sv) SvIOK(sv)
58 #endif
59
60 /* does a string need to be protected? */
61 static I32
62 needs_quote(register const char *s)
63 {
64 TOP:
65     if (s[0] == ':') {
66         if (*++s) {
67             if (*s++ != ':')
68                 return 1;
69         }
70         else
71             return 1;
72     }
73     if (isIDFIRST(*s)) {
74         while (*++s)
75             if (!isALNUM(*s)) {
76                 if (*s == ':')
77                     goto TOP;
78                 else
79                     return 1;
80             }
81     }
82     else
83         return 1;
84     return 0;
85 }
86
87 /* count the number of "'"s and "\"s in string */
88 static I32
89 num_q(register const char *s, register STRLEN slen)
90 {
91     register I32 ret = 0;
92
93     while (slen > 0) {
94         if (*s == '\'' || *s == '\\')
95             ++ret;
96         ++s;
97         --slen;
98     }
99     return ret;
100 }
101
102
103 /* returns number of chars added to escape "'"s and "\"s in s */
104 /* slen number of characters in s will be escaped */
105 /* destination must be long enough for additional chars */
106 static I32
107 esc_q(register char *d, register const char *s, register STRLEN slen)
108 {
109     register I32 ret = 0;
110
111     while (slen > 0) {
112         switch (*s) {
113         case '\'':
114         case '\\':
115             *d = '\\';
116             ++d; ++ret;
117         default:
118             *d = *s;
119             ++d; ++s; --slen;
120             break;
121         }
122     }
123     return ret;
124 }
125
126 static I32
127 esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
128 {
129     char *r, *rstart;
130     const char *s = src;
131     const char * const send = src + slen;
132     STRLEN j, cur = SvCUR(sv);
133     /* Could count 128-255 and 256+ in two variables, if we want to
134        be like &qquote and make a distinction.  */
135     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
136     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
137     STRLEN backslashes = 0;
138     STRLEN single_quotes = 0;
139     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
140     STRLEN normal = 0;
141
142     /* this will need EBCDICification */
143     for (s = src; s < send; s += UTF8SKIP(s)) {
144         const UV k = utf8_to_uvchr((U8*)s, NULL);
145
146 #ifdef EBCDIC
147         if (!isprint(k) || k > 256) {
148 #else
149         if (k > 127) {
150 #endif
151             /* 4: \x{} then count the number of hex digits.  */
152             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
153 #if UVSIZE == 4
154                 8 /* We may allocate a bit more than the minimum here.  */
155 #else
156                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
157 #endif
158                 );
159         } else if (k == '\\') {
160             backslashes++;
161         } else if (k == '\'') {
162             single_quotes++;
163         } else if (k == '"' || k == '$' || k == '@') {
164             qq_escapables++;
165         } else {
166             normal++;
167         }
168     }
169     if (grow) {
170         /* We have something needing hex. 3 is ""\0 */
171         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
172                 + 2*qq_escapables + normal);
173         rstart = r = SvPVX(sv) + cur;
174
175         *r++ = '"';
176
177         for (s = src; s < send; s += UTF8SKIP(s)) {
178             const UV k = utf8_to_uvchr((U8*)s, NULL);
179
180             if (k == '"' || k == '\\' || k == '$' || k == '@') {
181                 *r++ = '\\';
182                 *r++ = (char)k;
183             }
184             else
185 #ifdef EBCDIC
186               if (isprint(k) && k < 256)
187 #else
188               if (k < 0x80)
189 #endif
190                 *r++ = (char)k;
191             else {
192                 r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
193             }
194         }
195         *r++ = '"';
196     } else {
197         /* Single quotes.  */
198         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
199                 + qq_escapables + normal);
200         rstart = r = SvPVX(sv) + cur;
201         *r++ = '\'';
202         for (s = src; s < send; s ++) {
203             const char k = *s;
204             if (k == '\'' || k == '\\')
205                 *r++ = '\\';
206             *r++ = k;
207         }
208         *r++ = '\'';
209     }
210     *r = '\0';
211     j = r - rstart;
212     SvCUR_set(sv, cur + j);
213
214     return j;
215 }
216
217 /* append a repeated string to an SV */
218 static SV *
219 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
220 {
221     if (!sv)
222         sv = newSVpvn("", 0);
223 #ifdef DEBUGGING
224     else
225         assert(SvTYPE(sv) >= SVt_PV);
226 #endif
227
228     if (n > 0) {
229         SvGROW(sv, len*n + SvCUR(sv) + 1);
230         if (len == 1) {
231             char * const start = SvPVX(sv) + SvCUR(sv);
232             SvCUR_set(sv, SvCUR(sv) + n);
233             start[n] = '\0';
234             while (n > 0)
235                 start[--n] = str[0];
236         }
237         else
238             while (n > 0) {
239                 sv_catpvn(sv, str, len);
240                 --n;
241             }
242     }
243     return sv;
244 }
245
246 /*
247  * This ought to be split into smaller functions. (it is one long function since
248  * it exactly parallels the perl version, which was one long thing for
249  * efficiency raisins.)  Ugggh!
250  */
251 static I32
252 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
253         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
254         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
255         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
256 {
257     char tmpbuf[128];
258     U32 i;
259     char *c, *r, *realpack;
260 #ifdef DD_USE_OLD_ID_FORMAT
261     char id[128];
262 #else
263     UV id_buffer;
264     char *const id = (char *)&id_buffer;
265 #endif
266     SV **svp;
267     SV *sv, *ipad, *ival;
268     SV *blesspad = Nullsv;
269     AV *seenentry = NULL;
270     char *iname;
271     STRLEN inamelen, idlen = 0;
272     U32 realtype;
273
274     if (!val)
275         return 0;
276
277     realtype = SvTYPE(val);
278
279     if (SvGMAGICAL(val))
280         mg_get(val);
281     if (SvROK(val)) {
282
283         /* If a freeze method is provided and the object has it, call
284            it.  Warn on errors. */
285         if (SvOBJECT(SvRV(val)) && freezer &&
286             SvPOK(freezer) && SvCUR(freezer) &&
287             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
288                          SvCUR(freezer), -1) != NULL)
289         {
290             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
291             XPUSHs(val); PUTBACK;
292             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
293             SPAGAIN;
294             if (SvTRUE(ERRSV))
295                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
296             PUTBACK; FREETMPS; LEAVE;
297         }
298         
299         ival = SvRV(val);
300         realtype = SvTYPE(ival);
301 #ifdef DD_USE_OLD_ID_FORMAT
302         idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(ival));
303 #else
304         id_buffer = PTR2UV(ival);
305         idlen = sizeof(id_buffer);
306 #endif
307         if (SvOBJECT(ival))
308             realpack = HvNAME_get(SvSTASH(ival));
309         else
310             realpack = Nullch;
311
312         /* if it has a name, we need to either look it up, or keep a tab
313          * on it so we know when we hit it later
314          */
315         if (namelen) {
316             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
317                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
318             {
319                 SV *othername;
320                 if ((svp = av_fetch(seenentry, 0, FALSE))
321                     && (othername = *svp))
322                 {
323                     if (purity && *levelp > 0) {
324                         SV *postentry;
325                         
326                         if (realtype == SVt_PVHV)
327                             sv_catpvn(retval, "{}", 2);
328                         else if (realtype == SVt_PVAV)
329                             sv_catpvn(retval, "[]", 2);
330                         else
331                             sv_catpvn(retval, "do{my $o}", 9);
332                         postentry = newSVpvn(name, namelen);
333                         sv_catpvn(postentry, " = ", 3);
334                         sv_catsv(postentry, othername);
335                         av_push(postav, postentry);
336                     }
337                     else {
338                         if (name[0] == '@' || name[0] == '%') {
339                             if ((SvPVX_const(othername))[0] == '\\' &&
340                                 (SvPVX_const(othername))[1] == name[0]) {
341                                 sv_catpvn(retval, SvPVX_const(othername)+1,
342                                           SvCUR(othername)-1);
343                             }
344                             else {
345                                 sv_catpvn(retval, name, 1);
346                                 sv_catpvn(retval, "{", 1);
347                                 sv_catsv(retval, othername);
348                                 sv_catpvn(retval, "}", 1);
349                             }
350                         }
351                         else
352                             sv_catsv(retval, othername);
353                     }
354                     return 1;
355                 }
356                 else {
357 #ifdef DD_USE_OLD_ID_FORMAT
358                     warn("ref name not found for %s", id);
359 #else
360                     warn("ref name not found for 0x%"UVxf, PTR2UV(ival));
361 #endif
362                     return 0;
363                 }
364             }
365             else {   /* store our name and continue */
366                 SV *namesv;
367                 if (name[0] == '@' || name[0] == '%') {
368                     namesv = newSVpvn("\\", 1);
369                     sv_catpvn(namesv, name, namelen);
370                 }
371                 else if (realtype == SVt_PVCV && name[0] == '*') {
372                     namesv = newSVpvn("\\", 2);
373                     sv_catpvn(namesv, name, namelen);
374                     (SvPVX(namesv))[1] = '&';
375                 }
376                 else
377                     namesv = newSVpvn(name, namelen);
378                 seenentry = newAV();
379                 av_push(seenentry, namesv);
380                 (void)SvREFCNT_inc(val);
381                 av_push(seenentry, val);
382                 (void)hv_store(seenhv, id, idlen,
383                                newRV_inc((SV*)seenentry), 0);
384                 SvREFCNT_dec(seenentry);
385             }
386         }
387
388         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
389             STRLEN rlen;
390             const char *rval = SvPV(val, rlen);
391             const char *slash = strchr(rval, '/');
392             sv_catpvn(retval, "qr/", 3);
393             while (slash) {
394                 sv_catpvn(retval, rval, slash-rval);
395                 sv_catpvn(retval, "\\/", 2);
396                 rlen -= slash-rval+1;
397                 rval = slash+1;
398                 slash = strchr(rval, '/');
399             }
400             sv_catpvn(retval, rval, rlen);
401             sv_catpvn(retval, "/", 1);
402             return 1;
403         }
404
405         /* If purity is not set and maxdepth is set, then check depth:
406          * if we have reached maximum depth, return the string
407          * representation of the thing we are currently examining
408          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
409          */
410         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
411             STRLEN vallen;
412             const char * const valstr = SvPV(val,vallen);
413             sv_catpvn(retval, "'", 1);
414             sv_catpvn(retval, valstr, vallen);
415             sv_catpvn(retval, "'", 1);
416             return 1;
417         }
418
419         if (realpack) {                         /* we have a blessed ref */
420             STRLEN blesslen;
421             const char * const blessstr = SvPV(bless, blesslen);
422             sv_catpvn(retval, blessstr, blesslen);
423             sv_catpvn(retval, "( ", 2);
424             if (indent >= 2) {
425                 blesspad = apad;
426                 apad = newSVsv(apad);
427                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
428             }
429         }
430
431         (*levelp)++;
432         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
433
434         if (realtype <= SVt_PVBM) {                          /* scalar ref */
435             SV * const namesv = newSVpvn("${", 2);
436             sv_catpvn(namesv, name, namelen);
437             sv_catpvn(namesv, "}", 1);
438             if (realpack) {                                  /* blessed */
439                 sv_catpvn(retval, "do{\\(my $o = ", 13);
440                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
441                         postav, levelp, indent, pad, xpad, apad, sep, pair,
442                         freezer, toaster, purity, deepcopy, quotekeys, bless,
443                         maxdepth, sortkeys);
444                 sv_catpvn(retval, ")}", 2);
445             }                                                /* plain */
446             else {
447                 sv_catpvn(retval, "\\", 1);
448                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
449                         postav, levelp, indent, pad, xpad, apad, sep, pair,
450                         freezer, toaster, purity, deepcopy, quotekeys, bless,
451                         maxdepth, sortkeys);
452             }
453             SvREFCNT_dec(namesv);
454         }
455         else if (realtype == SVt_PVGV) {                     /* glob ref */
456             SV * const namesv = newSVpvn("*{", 2);
457             sv_catpvn(namesv, name, namelen);
458             sv_catpvn(namesv, "}", 1);
459             sv_catpvn(retval, "\\", 1);
460             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
461                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
462                     freezer, toaster, purity, deepcopy, quotekeys, bless,
463                     maxdepth, sortkeys);
464             SvREFCNT_dec(namesv);
465         }
466         else if (realtype == SVt_PVAV) {
467             SV *totpad;
468             I32 ix = 0;
469             const I32 ixmax = av_len((AV *)ival);
470         
471             SV * const ixsv = newSViv(0);
472             /* allowing for a 24 char wide array index */
473             New(0, iname, namelen+28, char);
474             (void)strcpy(iname, name);
475             inamelen = namelen;
476             if (name[0] == '@') {
477                 sv_catpvn(retval, "(", 1);
478                 iname[0] = '$';
479             }
480             else {
481                 sv_catpvn(retval, "[", 1);
482                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
483                 /*if (namelen > 0
484                     && name[namelen-1] != ']' && name[namelen-1] != '}'
485                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
486                 if ((namelen > 0
487                      && name[namelen-1] != ']' && name[namelen-1] != '}')
488                     || (namelen > 4
489                         && (name[1] == '{'
490                             || (name[0] == '\\' && name[2] == '{'))))
491                 {
492                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
493                     iname[inamelen] = '\0';
494                 }
495             }
496             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
497                 (instr(iname+inamelen-8, "{SCALAR}") ||
498                  instr(iname+inamelen-7, "{ARRAY}") ||
499                  instr(iname+inamelen-6, "{HASH}"))) {
500                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
501             }
502             iname[inamelen++] = '['; iname[inamelen] = '\0';
503             totpad = newSVsv(sep);
504             sv_catsv(totpad, pad);
505             sv_catsv(totpad, apad);
506
507             for (ix = 0; ix <= ixmax; ++ix) {
508                 STRLEN ilen;
509                 SV *elem;
510                 svp = av_fetch((AV*)ival, ix, FALSE);
511                 if (svp)
512                     elem = *svp;
513                 else
514                     elem = &PL_sv_undef;
515                 
516                 ilen = inamelen;
517                 sv_setiv(ixsv, ix);
518                 ilen = ilen + my_sprintf(iname+ilen, "%"IVdf, (IV)ix);
519                 iname[ilen++] = ']'; iname[ilen] = '\0';
520                 if (indent >= 3) {
521                     sv_catsv(retval, totpad);
522                     sv_catsv(retval, ipad);
523                     sv_catpvn(retval, "#", 1);
524                     sv_catsv(retval, ixsv);
525                 }
526                 sv_catsv(retval, totpad);
527                 sv_catsv(retval, ipad);
528                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
529                         levelp, indent, pad, xpad, apad, sep, pair,
530                         freezer, toaster, purity, deepcopy, quotekeys, bless,
531                         maxdepth, sortkeys);
532                 if (ix < ixmax)
533                     sv_catpvn(retval, ",", 1);
534             }
535             if (ixmax >= 0) {
536                 SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
537                 sv_catsv(retval, totpad);
538                 sv_catsv(retval, opad);
539                 SvREFCNT_dec(opad);
540             }
541             if (name[0] == '@')
542                 sv_catpvn(retval, ")", 1);
543             else
544                 sv_catpvn(retval, "]", 1);
545             SvREFCNT_dec(ixsv);
546             SvREFCNT_dec(totpad);
547             Safefree(iname);
548         }
549         else if (realtype == SVt_PVHV) {
550             SV *totpad, *newapad;
551             SV *sname;
552             HE *entry;
553             char *key;
554             I32 klen;
555             SV *hval;
556             AV *keys = NULL;
557         
558             SV * const iname = newSVpvn(name, namelen);
559             if (name[0] == '%') {
560                 sv_catpvn(retval, "(", 1);
561                 (SvPVX(iname))[0] = '$';
562             }
563             else {
564                 sv_catpvn(retval, "{", 1);
565                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
566                 if ((namelen > 0
567                      && name[namelen-1] != ']' && name[namelen-1] != '}')
568                     || (namelen > 4
569                         && (name[1] == '{'
570                             || (name[0] == '\\' && name[2] == '{'))))
571                 {
572                     sv_catpvn(iname, "->", 2);
573                 }
574             }
575             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
576                 (instr(name+namelen-8, "{SCALAR}") ||
577                  instr(name+namelen-7, "{ARRAY}") ||
578                  instr(name+namelen-6, "{HASH}"))) {
579                 sv_catpvn(iname, "->", 2);
580             }
581             sv_catpvn(iname, "{", 1);
582             totpad = newSVsv(sep);
583             sv_catsv(totpad, pad);
584             sv_catsv(totpad, apad);
585         
586             /* If requested, get a sorted/filtered array of hash keys */
587             if (sortkeys) {
588                 if (sortkeys == &PL_sv_yes) {
589 #if PERL_VERSION < 8
590                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
591 #else
592                     keys = newAV();
593                     (void)hv_iterinit((HV*)ival);
594                     while ((entry = hv_iternext((HV*)ival))) {
595                         sv = hv_iterkeysv(entry);
596                         SvREFCNT_inc(sv);
597                         av_push(keys, sv);
598                     }
599 # ifdef USE_LOCALE_NUMERIC
600                     sortsv(AvARRAY(keys), 
601                            av_len(keys)+1, 
602                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
603 # else
604                     sortsv(AvARRAY(keys), 
605                            av_len(keys)+1, 
606                            Perl_sv_cmp);
607 # endif
608 #endif
609                 }
610                 if (sortkeys != &PL_sv_yes) {
611                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
612                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
613                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
614                     SPAGAIN;
615                     if (i) {
616                         sv = POPs;
617                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
618                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
619                     }
620                     if (! keys)
621                         warn("Sortkeys subroutine did not return ARRAYREF\n");
622                     PUTBACK; FREETMPS; LEAVE;
623                 }
624                 if (keys)
625                     sv_2mortal((SV*)keys);
626             }
627             else
628                 (void)hv_iterinit((HV*)ival);
629
630             /* foreach (keys %hash) */
631             for (i = 0; 1; i++) {
632                 char *nkey;
633                 char *nkey_buffer = NULL;
634                 I32 nticks = 0;
635                 SV* keysv;
636                 STRLEN keylen;
637                 I32 nlen;
638                 bool do_utf8 = FALSE;
639
640                if (sortkeys) {
641                    if (!(keys && (I32)i <= av_len(keys))) break;
642                } else {
643                    if (!(entry = hv_iternext((HV *)ival))) break;
644                }
645
646                 if (i)
647                     sv_catpvn(retval, ",", 1);
648
649                 if (sortkeys) {
650                     char *key;
651                     svp = av_fetch(keys, i, FALSE);
652                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
653                     key = SvPV(keysv, keylen);
654                     svp = hv_fetch((HV*)ival, key,
655                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
656                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
657                 }
658                 else {
659                     keysv = hv_iterkeysv(entry);
660                     hval = hv_iterval((HV*)ival, entry);
661                 }
662
663                 key = SvPV(keysv, keylen);
664                 do_utf8 = DO_UTF8(keysv);
665                 klen = keylen;
666
667                 sv_catsv(retval, totpad);
668                 sv_catsv(retval, ipad);
669                 /* old logic was first to check utf8 flag, and if utf8 always
670                    call esc_q_utf8.  This caused test to break under -Mutf8,
671                    because there even strings like 'c' have utf8 flag on.
672                    Hence with quotekeys == 0 the XS code would still '' quote
673                    them based on flags, whereas the perl code would not,
674                    based on regexps.
675                    The perl code is correct.
676                    needs_quote() decides that anything that isn't a valid
677                    perl identifier needs to be quoted, hence only correctly
678                    formed strings with no characters outside [A-Za-z0-9_:]
679                    won't need quoting.  None of those characters are used in
680                    the byte encoding of utf8, so anything with utf8
681                    encoded characters in will need quoting. Hence strings
682                    with utf8 encoded characters in will end up inside do_utf8
683                    just like before, but now strings with utf8 flag set but
684                    only ascii characters will end up in the unquoted section.
685
686                    There should also be less tests for the (probably currently)
687                    more common doesn't need quoting case.
688                    The code is also smaller (22044 vs 22260) because I've been
689                    able to pull the common logic out to both sides.  */
690                 if (quotekeys || needs_quote(key)) {
691                     if (do_utf8) {
692                         STRLEN ocur = SvCUR(retval);
693                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
694                         nkey = SvPVX(retval) + ocur;
695                     }
696                     else {
697                         nticks = num_q(key, klen);
698                         New(0, nkey_buffer, klen+nticks+3, char);
699                         nkey = nkey_buffer;
700                         nkey[0] = '\'';
701                         if (nticks)
702                             klen += esc_q(nkey+1, key, klen);
703                         else
704                             (void)Copy(key, nkey+1, klen, char);
705                         nkey[++klen] = '\'';
706                         nkey[++klen] = '\0';
707                         nlen = klen;
708                         sv_catpvn(retval, nkey, klen);
709                     }
710                 }
711                 else {
712                     nkey = key;
713                     nlen = klen;
714                     sv_catpvn(retval, nkey, klen);
715                 }
716                 sname = newSVsv(iname);
717                 sv_catpvn(sname, nkey, nlen);
718                 sv_catpvn(sname, "}", 1);
719
720                 sv_catsv(retval, pair);
721                 if (indent >= 2) {
722                     char *extra;
723                     I32 elen = 0;
724                     newapad = newSVsv(apad);
725                     New(0, extra, klen+4+1, char);
726                     while (elen < (klen+4))
727                         extra[elen++] = ' ';
728                     extra[elen] = '\0';
729                     sv_catpvn(newapad, extra, elen);
730                     Safefree(extra);
731                 }
732                 else
733                     newapad = apad;
734
735                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
736                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
737                         freezer, toaster, purity, deepcopy, quotekeys, bless,
738                         maxdepth, sortkeys);
739                 SvREFCNT_dec(sname);
740                 Safefree(nkey_buffer);
741                 if (indent >= 2)
742                     SvREFCNT_dec(newapad);
743             }
744             if (i) {
745                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
746                 sv_catsv(retval, totpad);
747                 sv_catsv(retval, opad);
748                 SvREFCNT_dec(opad);
749             }
750             if (name[0] == '%')
751                 sv_catpvn(retval, ")", 1);
752             else
753                 sv_catpvn(retval, "}", 1);
754             SvREFCNT_dec(iname);
755             SvREFCNT_dec(totpad);
756         }
757         else if (realtype == SVt_PVCV) {
758             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
759             if (purity)
760                 warn("Encountered CODE ref, using dummy placeholder");
761         }
762         else {
763             warn("cannot handle ref type %ld", realtype);
764         }
765
766         if (realpack) {  /* free blessed allocs */
767             if (indent >= 2) {
768                 SvREFCNT_dec(apad);
769                 apad = blesspad;
770             }
771             sv_catpvn(retval, ", '", 3);
772             sv_catpvn(retval, realpack, strlen(realpack));
773             sv_catpvn(retval, "' )", 3);
774             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
775                 sv_catpvn(retval, "->", 2);
776                 sv_catsv(retval, toaster);
777                 sv_catpvn(retval, "()", 2);
778             }
779         }
780         SvREFCNT_dec(ipad);
781         (*levelp)--;
782     }
783     else {
784         STRLEN i;
785         
786         if (namelen) {
787 #ifdef DD_USE_OLD_ID_FORMAT
788             idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(val));
789 #else
790             id_buffer = PTR2UV(val);
791             idlen = sizeof(id_buffer);
792 #endif
793             if ((svp = hv_fetch(seenhv, id, idlen, FALSE)) &&
794                 (sv = *svp) && SvROK(sv) &&
795                 (seenentry = (AV*)SvRV(sv)))
796             {
797                 SV *othername;
798                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
799                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
800                 {
801                     sv_catpvn(retval, "${", 2);
802                     sv_catsv(retval, othername);
803                     sv_catpvn(retval, "}", 1);
804                     return 1;
805                 }
806             }
807             else if (val != &PL_sv_undef) {
808                 SV * const namesv = newSVpvn("\\", 1);
809                 sv_catpvn(namesv, name, namelen);
810                 seenentry = newAV();
811                 av_push(seenentry, namesv);
812                 av_push(seenentry, newRV_inc(val));
813                 (void)hv_store(seenhv, id, idlen, newRV_inc((SV*)seenentry), 0);
814                 SvREFCNT_dec(seenentry);
815             }
816         }
817
818         if (DD_is_integer(val)) {
819             STRLEN len;
820             if (SvIsUV(val))
821               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"UVuf, SvUV(val));
822             else
823               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
824             if (SvPOK(val)) {
825               /* Need to check to see if this is a string such as " 0".
826                  I'm assuming from sprintf isn't going to clash with utf8.
827                  Is this valid on EBCDIC?  */
828               STRLEN pvlen;
829               const char * const pv = SvPV(val, pvlen);
830               if (pvlen != len || memNE(pv, tmpbuf, len))
831                 goto integer_came_from_string;
832             }
833             if (len > 10) {
834               /* Looks like we're on a 64 bit system.  Make it a string so that
835                  if a 32 bit system reads the number it will cope better.  */
836               sv_catpvf(retval, "'%s'", tmpbuf);
837             } else
838               sv_catpvn(retval, tmpbuf, len);
839         }
840         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
841             c = SvPV(val, i);
842             ++c; --i;                   /* just get the name */
843             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
844                 c += 4;
845                 i -= 4;
846             }
847             if (needs_quote(c)) {
848                 sv_grow(retval, SvCUR(retval)+6+2*i);
849                 r = SvPVX(retval)+SvCUR(retval);
850                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
851                 i += esc_q(r+3, c, i);
852                 i += 3;
853                 r[i++] = '\''; r[i++] = '}';
854                 r[i] = '\0';
855             }
856             else {
857                 sv_grow(retval, SvCUR(retval)+i+2);
858                 r = SvPVX(retval)+SvCUR(retval);
859                 r[0] = '*'; strcpy(r+1, c);
860                 i++;
861             }
862             SvCUR_set(retval, SvCUR(retval)+i);
863
864             if (purity) {
865                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
866                 static const STRLEN sizes[] = { 8, 7, 6 };
867                 SV *e;
868                 SV * const nname = newSVpvn("", 0);
869                 SV * const newapad = newSVpvn("", 0);
870                 GV * const gv = (GV*)val;
871                 I32 j;
872                 
873                 for (j=0; j<3; j++) {
874                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
875                     if (!e)
876                         continue;
877                     if (j == 0 && !SvOK(e))
878                         continue;
879
880                     {
881                         I32 nlevel = 0;
882                         SV *postentry = newSVpvn(r,i);
883                         
884                         sv_setsv(nname, postentry);
885                         sv_catpvn(nname, entries[j], sizes[j]);
886                         sv_catpvn(postentry, " = ", 3);
887                         av_push(postav, postentry);
888                         e = newRV_inc(e);
889                         
890                         SvCUR_set(newapad, 0);
891                         if (indent >= 2)
892                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
893                         
894                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
895                                 seenhv, postav, &nlevel, indent, pad, xpad,
896                                 newapad, sep, pair, freezer, toaster, purity,
897                                 deepcopy, quotekeys, bless, maxdepth, 
898                                 sortkeys);
899                         SvREFCNT_dec(e);
900                     }
901                 }
902                 
903                 SvREFCNT_dec(newapad);
904                 SvREFCNT_dec(nname);
905             }
906         }
907         else if (val == &PL_sv_undef || !SvOK(val)) {
908             sv_catpvn(retval, "undef", 5);
909         }
910         else {
911         integer_came_from_string:
912             c = SvPV(val, i);
913             if (DO_UTF8(val))
914                 i += esc_q_utf8(aTHX_ retval, c, i);
915             else {
916                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
917                 r = SvPVX(retval) + SvCUR(retval);
918                 r[0] = '\'';
919                 i += esc_q(r+1, c, i);
920                 ++i;
921                 r[i++] = '\'';
922                 r[i] = '\0';
923                 SvCUR_set(retval, SvCUR(retval)+i);
924             }
925         }
926     }
927
928     if (idlen) {
929         if (deepcopy)
930             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
931         else if (namelen && seenentry) {
932             SV *mark = *av_fetch(seenentry, 2, TRUE);
933             sv_setiv(mark,1);
934         }
935     }
936     return 1;
937 }
938
939
940 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
941
942 #
943 # This is the exact equivalent of Dump.  Well, almost. The things that are
944 # different as of now (due to Laziness):
945 #   * doesnt do double-quotes yet.
946 #
947
948 void
949 Data_Dumper_Dumpxs(href, ...)
950         SV      *href;
951         PROTOTYPE: $;$$
952         PPCODE:
953         {
954             HV *hv;
955             SV *retval, *valstr;
956             HV *seenhv = NULL;
957             AV *postav, *todumpav, *namesav;
958             I32 level = 0;
959             I32 indent, terse, i, imax, postlen;
960             SV **svp;
961             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
962             SV *freezer, *toaster, *bless, *sortkeys;
963             I32 purity, deepcopy, quotekeys, maxdepth = 0;
964             char tmpbuf[1024];
965             I32 gimme = GIMME;
966
967             if (!SvROK(href)) {         /* call new to get an object first */
968                 if (items < 2)
969                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
970                 
971                 ENTER;
972                 SAVETMPS;
973                 
974                 PUSHMARK(sp);
975                 XPUSHs(href);
976                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
977                 if (items >= 3)
978                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
979                 PUTBACK;
980                 i = perl_call_method("new", G_SCALAR);
981                 SPAGAIN;
982                 if (i)
983                     href = newSVsv(POPs);
984
985                 PUTBACK;
986                 FREETMPS;
987                 LEAVE;
988                 if (i)
989                     (void)sv_2mortal(href);
990             }
991
992             todumpav = namesav = NULL;
993             seenhv = NULL;
994             val = pad = xpad = apad = sep = pair = varname
995                 = freezer = toaster = bless = sortkeys = &PL_sv_undef;
996             name = sv_newmortal();
997             indent = 2;
998             terse = purity = deepcopy = 0;
999             quotekeys = 1;
1000         
1001             retval = newSVpvn("", 0);
1002             if (SvROK(href)
1003                 && (hv = (HV*)SvRV((SV*)href))
1004                 && SvTYPE(hv) == SVt_PVHV)              {
1005
1006                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
1007                     seenhv = (HV*)SvRV(*svp);
1008                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
1009                     todumpav = (AV*)SvRV(*svp);
1010                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
1011                     namesav = (AV*)SvRV(*svp);
1012                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
1013                     indent = SvIV(*svp);
1014                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
1015                     purity = SvIV(*svp);
1016                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
1017                     terse = SvTRUE(*svp);
1018 #if 0 /* useqq currently unused */
1019                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
1020                     useqq = SvTRUE(*svp);
1021 #endif
1022                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
1023                     pad = *svp;
1024                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
1025                     xpad = *svp;
1026                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1027                     apad = *svp;
1028                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1029                     sep = *svp;
1030                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1031                     pair = *svp;
1032                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1033                     varname = *svp;
1034                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1035                     freezer = *svp;
1036                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1037                     toaster = *svp;
1038                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1039                     deepcopy = SvTRUE(*svp);
1040                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1041                     quotekeys = SvTRUE(*svp);
1042                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1043                     bless = *svp;
1044                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1045                     maxdepth = SvIV(*svp);
1046                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1047                     sortkeys = *svp;
1048                     if (! SvTRUE(sortkeys))
1049                         sortkeys = NULL;
1050                     else if (! (SvROK(sortkeys) &&
1051                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1052                     {
1053                         /* flag to use qsortsv() for sorting hash keys */       
1054                         sortkeys = &PL_sv_yes; 
1055                     }
1056                 }
1057                 postav = newAV();
1058
1059                 if (todumpav)
1060                     imax = av_len(todumpav);
1061                 else
1062                     imax = -1;
1063                 valstr = newSVpvn("",0);
1064                 for (i = 0; i <= imax; ++i) {
1065                     SV *newapad;
1066                 
1067                     av_clear(postav);
1068                     if ((svp = av_fetch(todumpav, i, FALSE)))
1069                         val = *svp;
1070                     else
1071                         val = &PL_sv_undef;
1072                     if ((svp = av_fetch(namesav, i, TRUE))) {
1073                         sv_setsv(name, *svp);
1074                         if (SvOK(*svp) && !SvPOK(*svp))
1075                             (void)SvPV_nolen_const(name);
1076                     }
1077                     else
1078                         (void)SvOK_off(name);
1079                 
1080                     if (SvPOK(name)) {
1081                         if ((SvPVX_const(name))[0] == '*') {
1082                             if (SvROK(val)) {
1083                                 switch (SvTYPE(SvRV(val))) {
1084                                 case SVt_PVAV:
1085                                     (SvPVX(name))[0] = '@';
1086                                     break;
1087                                 case SVt_PVHV:
1088                                     (SvPVX(name))[0] = '%';
1089                                     break;
1090                                 case SVt_PVCV:
1091                                     (SvPVX(name))[0] = '*';
1092                                     break;
1093                                 default:
1094                                     (SvPVX(name))[0] = '$';
1095                                     break;
1096                                 }
1097                             }
1098                             else
1099                                 (SvPVX(name))[0] = '$';
1100                         }
1101                         else if ((SvPVX_const(name))[0] != '$')
1102                             sv_insert(name, 0, 0, "$", 1);
1103                     }
1104                     else {
1105                         STRLEN nchars;
1106                         sv_setpvn(name, "$", 1);
1107                         sv_catsv(name, varname);
1108                         nchars = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, (IV)(i+1));
1109                         sv_catpvn(name, tmpbuf, nchars);
1110                     }
1111                 
1112                     if (indent >= 2) {
1113                         SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
1114                         newapad = newSVsv(apad);
1115                         sv_catsv(newapad, tmpsv);
1116                         SvREFCNT_dec(tmpsv);
1117                     }
1118                     else
1119                         newapad = apad;
1120                 
1121                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1122                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1123                             freezer, toaster, purity, deepcopy, quotekeys,
1124                             bless, maxdepth, sortkeys);
1125                 
1126                     if (indent >= 2)
1127                         SvREFCNT_dec(newapad);
1128
1129                     postlen = av_len(postav);
1130                     if (postlen >= 0 || !terse) {
1131                         sv_insert(valstr, 0, 0, " = ", 3);
1132                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1133                         sv_catpvn(valstr, ";", 1);
1134                     }
1135                     sv_catsv(retval, pad);
1136                     sv_catsv(retval, valstr);
1137                     sv_catsv(retval, sep);
1138                     if (postlen >= 0) {
1139                         I32 i;
1140                         sv_catsv(retval, pad);
1141                         for (i = 0; i <= postlen; ++i) {
1142                             SV *elem;
1143                             svp = av_fetch(postav, i, FALSE);
1144                             if (svp && (elem = *svp)) {
1145                                 sv_catsv(retval, elem);
1146                                 if (i < postlen) {
1147                                     sv_catpvn(retval, ";", 1);
1148                                     sv_catsv(retval, sep);
1149                                     sv_catsv(retval, pad);
1150                                 }
1151                             }
1152                         }
1153                         sv_catpvn(retval, ";", 1);
1154                             sv_catsv(retval, sep);
1155                     }
1156                     sv_setpvn(valstr, "", 0);
1157                     if (gimme == G_ARRAY) {
1158                         XPUSHs(sv_2mortal(retval));
1159                         if (i < imax)   /* not the last time thro ? */
1160                             retval = newSVpvn("",0);
1161                     }
1162                 }
1163                 SvREFCNT_dec(postav);
1164                 SvREFCNT_dec(valstr);
1165             }
1166             else
1167                 croak("Call to new() method failed to return HASH ref");
1168             if (gimme == G_SCALAR)
1169                 XPUSHs(sv_2mortal(retval));
1170         }