Yves said that the XS version of Data::Dumper was inefficient because
[p5sagit/p5-mst-13.2.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5 #include "ppport.h"
6
7 #if PERL_VERSION < 6
8 #  define DD_USE_OLD_ID_FORMAT
9 #endif
10
11 static I32 num_q (const char *s, STRLEN slen);
12 static I32 esc_q (char *dest, const char *src, STRLEN slen);
13 static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
14 static I32 needs_quote(register const char *s);
15 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
16 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
17                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
18                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
19                     SV *freezer, SV *toaster,
20                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
21                     I32 maxdepth, SV *sortkeys);
22
23 #ifndef HvNAME_get
24 #define HvNAME_get HvNAME
25 #endif
26
27 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
28
29 # ifdef EBCDIC
30 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
31 # else
32 #  define UNI_TO_NATIVE(ch) (ch)
33 # endif
34
35 UV
36 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
37 {
38     const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
39                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
40     return UNI_TO_NATIVE(uv);
41 }
42
43 # if !defined(PERL_IMPLICIT_CONTEXT)
44 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
45 # else
46 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
47 # endif
48
49 #endif /* PERL_VERSION <= 6 */
50
51 /* Changes in 5.7 series mean that now IOK is only set if scalar is
52    precisely integer but in 5.6 and earlier we need to do a more
53    complex test  */
54 #if PERL_VERSION <= 6
55 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
56 #else
57 #define DD_is_integer(sv) SvIOK(sv)
58 #endif
59
60 /* does a string need to be protected? */
61 static I32
62 needs_quote(register const char *s)
63 {
64 TOP:
65     if (s[0] == ':') {
66         if (*++s) {
67             if (*s++ != ':')
68                 return 1;
69         }
70         else
71             return 1;
72     }
73     if (isIDFIRST(*s)) {
74         while (*++s)
75             if (!isALNUM(*s)) {
76                 if (*s == ':')
77                     goto TOP;
78                 else
79                     return 1;
80             }
81     }
82     else
83         return 1;
84     return 0;
85 }
86
87 /* count the number of "'"s and "\"s in string */
88 static I32
89 num_q(register const char *s, register STRLEN slen)
90 {
91     register I32 ret = 0;
92
93     while (slen > 0) {
94         if (*s == '\'' || *s == '\\')
95             ++ret;
96         ++s;
97         --slen;
98     }
99     return ret;
100 }
101
102
103 /* returns number of chars added to escape "'"s and "\"s in s */
104 /* slen number of characters in s will be escaped */
105 /* destination must be long enough for additional chars */
106 static I32
107 esc_q(register char *d, register const char *s, register STRLEN slen)
108 {
109     register I32 ret = 0;
110
111     while (slen > 0) {
112         switch (*s) {
113         case '\'':
114         case '\\':
115             *d = '\\';
116             ++d; ++ret;
117         default:
118             *d = *s;
119             ++d; ++s; --slen;
120             break;
121         }
122     }
123     return ret;
124 }
125
126 static I32
127 esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
128 {
129     char *r, *rstart;
130     const char *s = src;
131     const char * const send = src + slen;
132     STRLEN j, cur = SvCUR(sv);
133     /* Could count 128-255 and 256+ in two variables, if we want to
134        be like &qquote and make a distinction.  */
135     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
136     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
137     STRLEN backslashes = 0;
138     STRLEN single_quotes = 0;
139     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
140     STRLEN normal = 0;
141
142     /* this will need EBCDICification */
143     for (s = src; s < send; s += UTF8SKIP(s)) {
144         const UV k = utf8_to_uvchr((U8*)s, NULL);
145
146 #ifdef EBCDIC
147         if (!isprint(k) || k > 256) {
148 #else
149         if (k > 127) {
150 #endif
151             /* 4: \x{} then count the number of hex digits.  */
152             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
153 #if UVSIZE == 4
154                 8 /* We may allocate a bit more than the minimum here.  */
155 #else
156                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
157 #endif
158                 );
159         } else if (k == '\\') {
160             backslashes++;
161         } else if (k == '\'') {
162             single_quotes++;
163         } else if (k == '"' || k == '$' || k == '@') {
164             qq_escapables++;
165         } else {
166             normal++;
167         }
168     }
169     if (grow) {
170         /* We have something needing hex. 3 is ""\0 */
171         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
172                 + 2*qq_escapables + normal);
173         rstart = r = SvPVX(sv) + cur;
174
175         *r++ = '"';
176
177         for (s = src; s < send; s += UTF8SKIP(s)) {
178             const UV k = utf8_to_uvchr((U8*)s, NULL);
179
180             if (k == '"' || k == '\\' || k == '$' || k == '@') {
181                 *r++ = '\\';
182                 *r++ = (char)k;
183             }
184             else
185 #ifdef EBCDIC
186               if (isprint(k) && k < 256)
187 #else
188               if (k < 0x80)
189 #endif
190                 *r++ = (char)k;
191             else {
192                 r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
193             }
194         }
195         *r++ = '"';
196     } else {
197         /* Single quotes.  */
198         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
199                 + qq_escapables + normal);
200         rstart = r = SvPVX(sv) + cur;
201         *r++ = '\'';
202         for (s = src; s < send; s ++) {
203             const char k = *s;
204             if (k == '\'' || k == '\\')
205                 *r++ = '\\';
206             *r++ = k;
207         }
208         *r++ = '\'';
209     }
210     *r = '\0';
211     j = r - rstart;
212     SvCUR_set(sv, cur + j);
213
214     return j;
215 }
216
217 /* append a repeated string to an SV */
218 static SV *
219 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
220 {
221     if (!sv)
222         sv = newSVpvn("", 0);
223 #ifdef DEBUGGING
224     else
225         assert(SvTYPE(sv) >= SVt_PV);
226 #endif
227
228     if (n > 0) {
229         SvGROW(sv, len*n + SvCUR(sv) + 1);
230         if (len == 1) {
231             char * const start = SvPVX(sv) + SvCUR(sv);
232             SvCUR_set(sv, SvCUR(sv) + n);
233             start[n] = '\0';
234             while (n > 0)
235                 start[--n] = str[0];
236         }
237         else
238             while (n > 0) {
239                 sv_catpvn(sv, str, len);
240                 --n;
241             }
242     }
243     return sv;
244 }
245
246 /*
247  * This ought to be split into smaller functions. (it is one long function since
248  * it exactly parallels the perl version, which was one long thing for
249  * efficiency raisins.)  Ugggh!
250  */
251 static I32
252 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
253         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
254         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
255         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
256 {
257     char tmpbuf[128];
258     U32 i;
259     char *c, *r, *realpack;
260 #ifdef DD_USE_OLD_ID_FORMAT
261     char id[128];
262 #else
263     UV id_buffer;
264     char *const id = (char *)&id_buffer;
265 #endif
266     SV **svp;
267     SV *sv, *ipad, *ival;
268     SV *blesspad = Nullsv;
269     AV *seenentry = NULL;
270     char *iname;
271     STRLEN inamelen, idlen = 0;
272     U32 realtype;
273
274     if (!val)
275         return 0;
276
277     if (SvTYPE(retval) >= SVt_PV && (SvLEN(retval) - SvCUR(retval)) < 40) {
278         sv_grow(retval, SvCUR(retval) * 1.5);
279     }
280     realtype = SvTYPE(val);
281
282     if (SvGMAGICAL(val))
283         mg_get(val);
284     if (SvROK(val)) {
285
286         /* If a freeze method is provided and the object has it, call
287            it.  Warn on errors. */
288         if (SvOBJECT(SvRV(val)) && freezer &&
289             SvPOK(freezer) && SvCUR(freezer) &&
290             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
291                          SvCUR(freezer), -1) != NULL)
292         {
293             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
294             XPUSHs(val); PUTBACK;
295             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
296             SPAGAIN;
297             if (SvTRUE(ERRSV))
298                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
299             PUTBACK; FREETMPS; LEAVE;
300         }
301         
302         ival = SvRV(val);
303         realtype = SvTYPE(ival);
304 #ifdef DD_USE_OLD_ID_FORMAT
305         idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(ival));
306 #else
307         id_buffer = PTR2UV(ival);
308         idlen = sizeof(id_buffer);
309 #endif
310         if (SvOBJECT(ival))
311             realpack = HvNAME_get(SvSTASH(ival));
312         else
313             realpack = Nullch;
314
315         /* if it has a name, we need to either look it up, or keep a tab
316          * on it so we know when we hit it later
317          */
318         if (namelen) {
319             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
320                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
321             {
322                 SV *othername;
323                 if ((svp = av_fetch(seenentry, 0, FALSE))
324                     && (othername = *svp))
325                 {
326                     if (purity && *levelp > 0) {
327                         SV *postentry;
328                         
329                         if (realtype == SVt_PVHV)
330                             sv_catpvn(retval, "{}", 2);
331                         else if (realtype == SVt_PVAV)
332                             sv_catpvn(retval, "[]", 2);
333                         else
334                             sv_catpvn(retval, "do{my $o}", 9);
335                         postentry = newSVpvn(name, namelen);
336                         sv_catpvn(postentry, " = ", 3);
337                         sv_catsv(postentry, othername);
338                         av_push(postav, postentry);
339                     }
340                     else {
341                         if (name[0] == '@' || name[0] == '%') {
342                             if ((SvPVX_const(othername))[0] == '\\' &&
343                                 (SvPVX_const(othername))[1] == name[0]) {
344                                 sv_catpvn(retval, SvPVX_const(othername)+1,
345                                           SvCUR(othername)-1);
346                             }
347                             else {
348                                 sv_catpvn(retval, name, 1);
349                                 sv_catpvn(retval, "{", 1);
350                                 sv_catsv(retval, othername);
351                                 sv_catpvn(retval, "}", 1);
352                             }
353                         }
354                         else
355                             sv_catsv(retval, othername);
356                     }
357                     return 1;
358                 }
359                 else {
360 #ifdef DD_USE_OLD_ID_FORMAT
361                     warn("ref name not found for %s", id);
362 #else
363                     warn("ref name not found for 0x%"UVxf, PTR2UV(ival));
364 #endif
365                     return 0;
366                 }
367             }
368             else {   /* store our name and continue */
369                 SV *namesv;
370                 if (name[0] == '@' || name[0] == '%') {
371                     namesv = newSVpvn("\\", 1);
372                     sv_catpvn(namesv, name, namelen);
373                 }
374                 else if (realtype == SVt_PVCV && name[0] == '*') {
375                     namesv = newSVpvn("\\", 2);
376                     sv_catpvn(namesv, name, namelen);
377                     (SvPVX(namesv))[1] = '&';
378                 }
379                 else
380                     namesv = newSVpvn(name, namelen);
381                 seenentry = newAV();
382                 av_push(seenentry, namesv);
383                 (void)SvREFCNT_inc(val);
384                 av_push(seenentry, val);
385                 (void)hv_store(seenhv, id, idlen,
386                                newRV_inc((SV*)seenentry), 0);
387                 SvREFCNT_dec(seenentry);
388             }
389         }
390
391         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
392             STRLEN rlen;
393             const char *rval = SvPV(val, rlen);
394             const char *slash = strchr(rval, '/');
395             sv_catpvn(retval, "qr/", 3);
396             while (slash) {
397                 sv_catpvn(retval, rval, slash-rval);
398                 sv_catpvn(retval, "\\/", 2);
399                 rlen -= slash-rval+1;
400                 rval = slash+1;
401                 slash = strchr(rval, '/');
402             }
403             sv_catpvn(retval, rval, rlen);
404             sv_catpvn(retval, "/", 1);
405             return 1;
406         }
407
408         /* If purity is not set and maxdepth is set, then check depth:
409          * if we have reached maximum depth, return the string
410          * representation of the thing we are currently examining
411          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
412          */
413         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
414             STRLEN vallen;
415             const char * const valstr = SvPV(val,vallen);
416             sv_catpvn(retval, "'", 1);
417             sv_catpvn(retval, valstr, vallen);
418             sv_catpvn(retval, "'", 1);
419             return 1;
420         }
421
422         if (realpack) {                         /* we have a blessed ref */
423             STRLEN blesslen;
424             const char * const blessstr = SvPV(bless, blesslen);
425             sv_catpvn(retval, blessstr, blesslen);
426             sv_catpvn(retval, "( ", 2);
427             if (indent >= 2) {
428                 blesspad = apad;
429                 apad = newSVsv(apad);
430                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
431             }
432         }
433
434         (*levelp)++;
435         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
436
437         if (realtype <= SVt_PVBM) {                          /* scalar ref */
438             SV * const namesv = newSVpvn("${", 2);
439             sv_catpvn(namesv, name, namelen);
440             sv_catpvn(namesv, "}", 1);
441             if (realpack) {                                  /* blessed */
442                 sv_catpvn(retval, "do{\\(my $o = ", 13);
443                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
444                         postav, levelp, indent, pad, xpad, apad, sep, pair,
445                         freezer, toaster, purity, deepcopy, quotekeys, bless,
446                         maxdepth, sortkeys);
447                 sv_catpvn(retval, ")}", 2);
448             }                                                /* plain */
449             else {
450                 sv_catpvn(retval, "\\", 1);
451                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
452                         postav, levelp, indent, pad, xpad, apad, sep, pair,
453                         freezer, toaster, purity, deepcopy, quotekeys, bless,
454                         maxdepth, sortkeys);
455             }
456             SvREFCNT_dec(namesv);
457         }
458         else if (realtype == SVt_PVGV) {                     /* glob ref */
459             SV * const namesv = newSVpvn("*{", 2);
460             sv_catpvn(namesv, name, namelen);
461             sv_catpvn(namesv, "}", 1);
462             sv_catpvn(retval, "\\", 1);
463             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
464                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
465                     freezer, toaster, purity, deepcopy, quotekeys, bless,
466                     maxdepth, sortkeys);
467             SvREFCNT_dec(namesv);
468         }
469         else if (realtype == SVt_PVAV) {
470             SV *totpad;
471             I32 ix = 0;
472             const I32 ixmax = av_len((AV *)ival);
473         
474             SV * const ixsv = newSViv(0);
475             /* allowing for a 24 char wide array index */
476             New(0, iname, namelen+28, char);
477             (void)strcpy(iname, name);
478             inamelen = namelen;
479             if (name[0] == '@') {
480                 sv_catpvn(retval, "(", 1);
481                 iname[0] = '$';
482             }
483             else {
484                 sv_catpvn(retval, "[", 1);
485                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
486                 /*if (namelen > 0
487                     && name[namelen-1] != ']' && name[namelen-1] != '}'
488                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
489                 if ((namelen > 0
490                      && name[namelen-1] != ']' && name[namelen-1] != '}')
491                     || (namelen > 4
492                         && (name[1] == '{'
493                             || (name[0] == '\\' && name[2] == '{'))))
494                 {
495                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
496                     iname[inamelen] = '\0';
497                 }
498             }
499             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
500                 (instr(iname+inamelen-8, "{SCALAR}") ||
501                  instr(iname+inamelen-7, "{ARRAY}") ||
502                  instr(iname+inamelen-6, "{HASH}"))) {
503                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
504             }
505             iname[inamelen++] = '['; iname[inamelen] = '\0';
506             totpad = newSVsv(sep);
507             sv_catsv(totpad, pad);
508             sv_catsv(totpad, apad);
509
510             for (ix = 0; ix <= ixmax; ++ix) {
511                 STRLEN ilen;
512                 SV *elem;
513                 svp = av_fetch((AV*)ival, ix, FALSE);
514                 if (svp)
515                     elem = *svp;
516                 else
517                     elem = &PL_sv_undef;
518                 
519                 ilen = inamelen;
520                 sv_setiv(ixsv, ix);
521                 ilen = ilen + my_sprintf(iname+ilen, "%"IVdf, (IV)ix);
522                 iname[ilen++] = ']'; iname[ilen] = '\0';
523                 if (indent >= 3) {
524                     sv_catsv(retval, totpad);
525                     sv_catsv(retval, ipad);
526                     sv_catpvn(retval, "#", 1);
527                     sv_catsv(retval, ixsv);
528                 }
529                 sv_catsv(retval, totpad);
530                 sv_catsv(retval, ipad);
531                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
532                         levelp, indent, pad, xpad, apad, sep, pair,
533                         freezer, toaster, purity, deepcopy, quotekeys, bless,
534                         maxdepth, sortkeys);
535                 if (ix < ixmax)
536                     sv_catpvn(retval, ",", 1);
537             }
538             if (ixmax >= 0) {
539                 SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
540                 sv_catsv(retval, totpad);
541                 sv_catsv(retval, opad);
542                 SvREFCNT_dec(opad);
543             }
544             if (name[0] == '@')
545                 sv_catpvn(retval, ")", 1);
546             else
547                 sv_catpvn(retval, "]", 1);
548             SvREFCNT_dec(ixsv);
549             SvREFCNT_dec(totpad);
550             Safefree(iname);
551         }
552         else if (realtype == SVt_PVHV) {
553             SV *totpad, *newapad;
554             SV *sname;
555             HE *entry;
556             char *key;
557             I32 klen;
558             SV *hval;
559             AV *keys = NULL;
560         
561             SV * const iname = newSVpvn(name, namelen);
562             if (name[0] == '%') {
563                 sv_catpvn(retval, "(", 1);
564                 (SvPVX(iname))[0] = '$';
565             }
566             else {
567                 sv_catpvn(retval, "{", 1);
568                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
569                 if ((namelen > 0
570                      && name[namelen-1] != ']' && name[namelen-1] != '}')
571                     || (namelen > 4
572                         && (name[1] == '{'
573                             || (name[0] == '\\' && name[2] == '{'))))
574                 {
575                     sv_catpvn(iname, "->", 2);
576                 }
577             }
578             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
579                 (instr(name+namelen-8, "{SCALAR}") ||
580                  instr(name+namelen-7, "{ARRAY}") ||
581                  instr(name+namelen-6, "{HASH}"))) {
582                 sv_catpvn(iname, "->", 2);
583             }
584             sv_catpvn(iname, "{", 1);
585             totpad = newSVsv(sep);
586             sv_catsv(totpad, pad);
587             sv_catsv(totpad, apad);
588         
589             /* If requested, get a sorted/filtered array of hash keys */
590             if (sortkeys) {
591                 if (sortkeys == &PL_sv_yes) {
592 #if PERL_VERSION < 8
593                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
594 #else
595                     keys = newAV();
596                     (void)hv_iterinit((HV*)ival);
597                     while ((entry = hv_iternext((HV*)ival))) {
598                         sv = hv_iterkeysv(entry);
599                         SvREFCNT_inc(sv);
600                         av_push(keys, sv);
601                     }
602 # ifdef USE_LOCALE_NUMERIC
603                     sortsv(AvARRAY(keys), 
604                            av_len(keys)+1, 
605                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
606 # else
607                     sortsv(AvARRAY(keys), 
608                            av_len(keys)+1, 
609                            Perl_sv_cmp);
610 # endif
611 #endif
612                 }
613                 if (sortkeys != &PL_sv_yes) {
614                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
615                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
616                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
617                     SPAGAIN;
618                     if (i) {
619                         sv = POPs;
620                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
621                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
622                     }
623                     if (! keys)
624                         warn("Sortkeys subroutine did not return ARRAYREF\n");
625                     PUTBACK; FREETMPS; LEAVE;
626                 }
627                 if (keys)
628                     sv_2mortal((SV*)keys);
629             }
630             else
631                 (void)hv_iterinit((HV*)ival);
632
633             /* foreach (keys %hash) */
634             for (i = 0; 1; i++) {
635                 char *nkey;
636                 char *nkey_buffer = NULL;
637                 I32 nticks = 0;
638                 SV* keysv;
639                 STRLEN keylen;
640                 I32 nlen;
641                 bool do_utf8 = FALSE;
642
643                if (sortkeys) {
644                    if (!(keys && (I32)i <= av_len(keys))) break;
645                } else {
646                    if (!(entry = hv_iternext((HV *)ival))) break;
647                }
648
649                 if (i)
650                     sv_catpvn(retval, ",", 1);
651
652                 if (sortkeys) {
653                     char *key;
654                     svp = av_fetch(keys, i, FALSE);
655                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
656                     key = SvPV(keysv, keylen);
657                     svp = hv_fetch((HV*)ival, key,
658                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
659                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
660                 }
661                 else {
662                     keysv = hv_iterkeysv(entry);
663                     hval = hv_iterval((HV*)ival, entry);
664                 }
665
666                 key = SvPV(keysv, keylen);
667                 do_utf8 = DO_UTF8(keysv);
668                 klen = keylen;
669
670                 sv_catsv(retval, totpad);
671                 sv_catsv(retval, ipad);
672                 /* old logic was first to check utf8 flag, and if utf8 always
673                    call esc_q_utf8.  This caused test to break under -Mutf8,
674                    because there even strings like 'c' have utf8 flag on.
675                    Hence with quotekeys == 0 the XS code would still '' quote
676                    them based on flags, whereas the perl code would not,
677                    based on regexps.
678                    The perl code is correct.
679                    needs_quote() decides that anything that isn't a valid
680                    perl identifier needs to be quoted, hence only correctly
681                    formed strings with no characters outside [A-Za-z0-9_:]
682                    won't need quoting.  None of those characters are used in
683                    the byte encoding of utf8, so anything with utf8
684                    encoded characters in will need quoting. Hence strings
685                    with utf8 encoded characters in will end up inside do_utf8
686                    just like before, but now strings with utf8 flag set but
687                    only ascii characters will end up in the unquoted section.
688
689                    There should also be less tests for the (probably currently)
690                    more common doesn't need quoting case.
691                    The code is also smaller (22044 vs 22260) because I've been
692                    able to pull the common logic out to both sides.  */
693                 if (quotekeys || needs_quote(key)) {
694                     if (do_utf8) {
695                         STRLEN ocur = SvCUR(retval);
696                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
697                         nkey = SvPVX(retval) + ocur;
698                     }
699                     else {
700                         nticks = num_q(key, klen);
701                         New(0, nkey_buffer, klen+nticks+3, char);
702                         nkey = nkey_buffer;
703                         nkey[0] = '\'';
704                         if (nticks)
705                             klen += esc_q(nkey+1, key, klen);
706                         else
707                             (void)Copy(key, nkey+1, klen, char);
708                         nkey[++klen] = '\'';
709                         nkey[++klen] = '\0';
710                         nlen = klen;
711                         sv_catpvn(retval, nkey, klen);
712                     }
713                 }
714                 else {
715                     nkey = key;
716                     nlen = klen;
717                     sv_catpvn(retval, nkey, klen);
718                 }
719                 sname = newSVsv(iname);
720                 sv_catpvn(sname, nkey, nlen);
721                 sv_catpvn(sname, "}", 1);
722
723                 sv_catsv(retval, pair);
724                 if (indent >= 2) {
725                     char *extra;
726                     I32 elen = 0;
727                     newapad = newSVsv(apad);
728                     New(0, extra, klen+4+1, char);
729                     while (elen < (klen+4))
730                         extra[elen++] = ' ';
731                     extra[elen] = '\0';
732                     sv_catpvn(newapad, extra, elen);
733                     Safefree(extra);
734                 }
735                 else
736                     newapad = apad;
737
738                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
739                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
740                         freezer, toaster, purity, deepcopy, quotekeys, bless,
741                         maxdepth, sortkeys);
742                 SvREFCNT_dec(sname);
743                 Safefree(nkey_buffer);
744                 if (indent >= 2)
745                     SvREFCNT_dec(newapad);
746             }
747             if (i) {
748                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
749                 sv_catsv(retval, totpad);
750                 sv_catsv(retval, opad);
751                 SvREFCNT_dec(opad);
752             }
753             if (name[0] == '%')
754                 sv_catpvn(retval, ")", 1);
755             else
756                 sv_catpvn(retval, "}", 1);
757             SvREFCNT_dec(iname);
758             SvREFCNT_dec(totpad);
759         }
760         else if (realtype == SVt_PVCV) {
761             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
762             if (purity)
763                 warn("Encountered CODE ref, using dummy placeholder");
764         }
765         else {
766             warn("cannot handle ref type %ld", realtype);
767         }
768
769         if (realpack) {  /* free blessed allocs */
770             if (indent >= 2) {
771                 SvREFCNT_dec(apad);
772                 apad = blesspad;
773             }
774             sv_catpvn(retval, ", '", 3);
775             sv_catpvn(retval, realpack, strlen(realpack));
776             sv_catpvn(retval, "' )", 3);
777             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
778                 sv_catpvn(retval, "->", 2);
779                 sv_catsv(retval, toaster);
780                 sv_catpvn(retval, "()", 2);
781             }
782         }
783         SvREFCNT_dec(ipad);
784         (*levelp)--;
785     }
786     else {
787         STRLEN i;
788         
789         if (namelen) {
790 #ifdef DD_USE_OLD_ID_FORMAT
791             idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(val));
792 #else
793             id_buffer = PTR2UV(val);
794             idlen = sizeof(id_buffer);
795 #endif
796             if ((svp = hv_fetch(seenhv, id, idlen, FALSE)) &&
797                 (sv = *svp) && SvROK(sv) &&
798                 (seenentry = (AV*)SvRV(sv)))
799             {
800                 SV *othername;
801                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
802                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
803                 {
804                     sv_catpvn(retval, "${", 2);
805                     sv_catsv(retval, othername);
806                     sv_catpvn(retval, "}", 1);
807                     return 1;
808                 }
809             }
810             else if (val != &PL_sv_undef) {
811                 SV * const namesv = newSVpvn("\\", 1);
812                 sv_catpvn(namesv, name, namelen);
813                 seenentry = newAV();
814                 av_push(seenentry, namesv);
815                 av_push(seenentry, newRV_inc(val));
816                 (void)hv_store(seenhv, id, idlen, newRV_inc((SV*)seenentry), 0);
817                 SvREFCNT_dec(seenentry);
818             }
819         }
820
821         if (DD_is_integer(val)) {
822             STRLEN len;
823             if (SvIsUV(val))
824               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"UVuf, SvUV(val));
825             else
826               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
827             if (SvPOK(val)) {
828               /* Need to check to see if this is a string such as " 0".
829                  I'm assuming from sprintf isn't going to clash with utf8.
830                  Is this valid on EBCDIC?  */
831               STRLEN pvlen;
832               const char * const pv = SvPV(val, pvlen);
833               if (pvlen != len || memNE(pv, tmpbuf, len))
834                 goto integer_came_from_string;
835             }
836             if (len > 10) {
837               /* Looks like we're on a 64 bit system.  Make it a string so that
838                  if a 32 bit system reads the number it will cope better.  */
839               sv_catpvf(retval, "'%s'", tmpbuf);
840             } else
841               sv_catpvn(retval, tmpbuf, len);
842         }
843         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
844             c = SvPV(val, i);
845             ++c; --i;                   /* just get the name */
846             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
847                 c += 4;
848                 i -= 4;
849             }
850             if (needs_quote(c)) {
851                 sv_grow(retval, SvCUR(retval)+6+2*i);
852                 r = SvPVX(retval)+SvCUR(retval);
853                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
854                 i += esc_q(r+3, c, i);
855                 i += 3;
856                 r[i++] = '\''; r[i++] = '}';
857                 r[i] = '\0';
858             }
859             else {
860                 sv_grow(retval, SvCUR(retval)+i+2);
861                 r = SvPVX(retval)+SvCUR(retval);
862                 r[0] = '*'; strcpy(r+1, c);
863                 i++;
864             }
865             SvCUR_set(retval, SvCUR(retval)+i);
866
867             if (purity) {
868                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
869                 static const STRLEN sizes[] = { 8, 7, 6 };
870                 SV *e;
871                 SV * const nname = newSVpvn("", 0);
872                 SV * const newapad = newSVpvn("", 0);
873                 GV * const gv = (GV*)val;
874                 I32 j;
875                 
876                 for (j=0; j<3; j++) {
877                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
878                     if (!e)
879                         continue;
880                     if (j == 0 && !SvOK(e))
881                         continue;
882
883                     {
884                         I32 nlevel = 0;
885                         SV *postentry = newSVpvn(r,i);
886                         
887                         sv_setsv(nname, postentry);
888                         sv_catpvn(nname, entries[j], sizes[j]);
889                         sv_catpvn(postentry, " = ", 3);
890                         av_push(postav, postentry);
891                         e = newRV_inc(e);
892                         
893                         SvCUR_set(newapad, 0);
894                         if (indent >= 2)
895                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
896                         
897                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
898                                 seenhv, postav, &nlevel, indent, pad, xpad,
899                                 newapad, sep, pair, freezer, toaster, purity,
900                                 deepcopy, quotekeys, bless, maxdepth, 
901                                 sortkeys);
902                         SvREFCNT_dec(e);
903                     }
904                 }
905                 
906                 SvREFCNT_dec(newapad);
907                 SvREFCNT_dec(nname);
908             }
909         }
910         else if (val == &PL_sv_undef || !SvOK(val)) {
911             sv_catpvn(retval, "undef", 5);
912         }
913         else {
914         integer_came_from_string:
915             c = SvPV(val, i);
916             if (DO_UTF8(val))
917                 i += esc_q_utf8(aTHX_ retval, c, i);
918             else {
919                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
920                 r = SvPVX(retval) + SvCUR(retval);
921                 r[0] = '\'';
922                 i += esc_q(r+1, c, i);
923                 ++i;
924                 r[i++] = '\'';
925                 r[i] = '\0';
926                 SvCUR_set(retval, SvCUR(retval)+i);
927             }
928         }
929     }
930
931     if (idlen) {
932         if (deepcopy)
933             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
934         else if (namelen && seenentry) {
935             SV *mark = *av_fetch(seenentry, 2, TRUE);
936             sv_setiv(mark,1);
937         }
938     }
939     return 1;
940 }
941
942
943 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
944
945 #
946 # This is the exact equivalent of Dump.  Well, almost. The things that are
947 # different as of now (due to Laziness):
948 #   * doesnt do double-quotes yet.
949 #
950
951 void
952 Data_Dumper_Dumpxs(href, ...)
953         SV      *href;
954         PROTOTYPE: $;$$
955         PPCODE:
956         {
957             HV *hv;
958             SV *retval, *valstr;
959             HV *seenhv = NULL;
960             AV *postav, *todumpav, *namesav;
961             I32 level = 0;
962             I32 indent, terse, i, imax, postlen;
963             SV **svp;
964             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
965             SV *freezer, *toaster, *bless, *sortkeys;
966             I32 purity, deepcopy, quotekeys, maxdepth = 0;
967             char tmpbuf[1024];
968             I32 gimme = GIMME;
969
970             if (!SvROK(href)) {         /* call new to get an object first */
971                 if (items < 2)
972                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
973                 
974                 ENTER;
975                 SAVETMPS;
976                 
977                 PUSHMARK(sp);
978                 XPUSHs(href);
979                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
980                 if (items >= 3)
981                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
982                 PUTBACK;
983                 i = perl_call_method("new", G_SCALAR);
984                 SPAGAIN;
985                 if (i)
986                     href = newSVsv(POPs);
987
988                 PUTBACK;
989                 FREETMPS;
990                 LEAVE;
991                 if (i)
992                     (void)sv_2mortal(href);
993             }
994
995             todumpav = namesav = NULL;
996             seenhv = NULL;
997             val = pad = xpad = apad = sep = pair = varname
998                 = freezer = toaster = bless = sortkeys = &PL_sv_undef;
999             name = sv_newmortal();
1000             indent = 2;
1001             terse = purity = deepcopy = 0;
1002             quotekeys = 1;
1003         
1004             retval = newSVpvn("", 0);
1005             if (SvROK(href)
1006                 && (hv = (HV*)SvRV((SV*)href))
1007                 && SvTYPE(hv) == SVt_PVHV)              {
1008
1009                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
1010                     seenhv = (HV*)SvRV(*svp);
1011                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
1012                     todumpav = (AV*)SvRV(*svp);
1013                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
1014                     namesav = (AV*)SvRV(*svp);
1015                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
1016                     indent = SvIV(*svp);
1017                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
1018                     purity = SvIV(*svp);
1019                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
1020                     terse = SvTRUE(*svp);
1021 #if 0 /* useqq currently unused */
1022                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
1023                     useqq = SvTRUE(*svp);
1024 #endif
1025                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
1026                     pad = *svp;
1027                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
1028                     xpad = *svp;
1029                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1030                     apad = *svp;
1031                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1032                     sep = *svp;
1033                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1034                     pair = *svp;
1035                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1036                     varname = *svp;
1037                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1038                     freezer = *svp;
1039                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1040                     toaster = *svp;
1041                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1042                     deepcopy = SvTRUE(*svp);
1043                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1044                     quotekeys = SvTRUE(*svp);
1045                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1046                     bless = *svp;
1047                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1048                     maxdepth = SvIV(*svp);
1049                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1050                     sortkeys = *svp;
1051                     if (! SvTRUE(sortkeys))
1052                         sortkeys = NULL;
1053                     else if (! (SvROK(sortkeys) &&
1054                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1055                     {
1056                         /* flag to use qsortsv() for sorting hash keys */       
1057                         sortkeys = &PL_sv_yes; 
1058                     }
1059                 }
1060                 postav = newAV();
1061
1062                 if (todumpav)
1063                     imax = av_len(todumpav);
1064                 else
1065                     imax = -1;
1066                 valstr = newSVpvn("",0);
1067                 for (i = 0; i <= imax; ++i) {
1068                     SV *newapad;
1069                 
1070                     av_clear(postav);
1071                     if ((svp = av_fetch(todumpav, i, FALSE)))
1072                         val = *svp;
1073                     else
1074                         val = &PL_sv_undef;
1075                     if ((svp = av_fetch(namesav, i, TRUE))) {
1076                         sv_setsv(name, *svp);
1077                         if (SvOK(*svp) && !SvPOK(*svp))
1078                             (void)SvPV_nolen_const(name);
1079                     }
1080                     else
1081                         (void)SvOK_off(name);
1082                 
1083                     if (SvPOK(name)) {
1084                         if ((SvPVX_const(name))[0] == '*') {
1085                             if (SvROK(val)) {
1086                                 switch (SvTYPE(SvRV(val))) {
1087                                 case SVt_PVAV:
1088                                     (SvPVX(name))[0] = '@';
1089                                     break;
1090                                 case SVt_PVHV:
1091                                     (SvPVX(name))[0] = '%';
1092                                     break;
1093                                 case SVt_PVCV:
1094                                     (SvPVX(name))[0] = '*';
1095                                     break;
1096                                 default:
1097                                     (SvPVX(name))[0] = '$';
1098                                     break;
1099                                 }
1100                             }
1101                             else
1102                                 (SvPVX(name))[0] = '$';
1103                         }
1104                         else if ((SvPVX_const(name))[0] != '$')
1105                             sv_insert(name, 0, 0, "$", 1);
1106                     }
1107                     else {
1108                         STRLEN nchars;
1109                         sv_setpvn(name, "$", 1);
1110                         sv_catsv(name, varname);
1111                         nchars = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, (IV)(i+1));
1112                         sv_catpvn(name, tmpbuf, nchars);
1113                     }
1114                 
1115                     if (indent >= 2) {
1116                         SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
1117                         newapad = newSVsv(apad);
1118                         sv_catsv(newapad, tmpsv);
1119                         SvREFCNT_dec(tmpsv);
1120                     }
1121                     else
1122                         newapad = apad;
1123                 
1124                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1125                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1126                             freezer, toaster, purity, deepcopy, quotekeys,
1127                             bless, maxdepth, sortkeys);
1128                 
1129                     if (indent >= 2)
1130                         SvREFCNT_dec(newapad);
1131
1132                     postlen = av_len(postav);
1133                     if (postlen >= 0 || !terse) {
1134                         sv_insert(valstr, 0, 0, " = ", 3);
1135                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1136                         sv_catpvn(valstr, ";", 1);
1137                     }
1138                     sv_catsv(retval, pad);
1139                     sv_catsv(retval, valstr);
1140                     sv_catsv(retval, sep);
1141                     if (postlen >= 0) {
1142                         I32 i;
1143                         sv_catsv(retval, pad);
1144                         for (i = 0; i <= postlen; ++i) {
1145                             SV *elem;
1146                             svp = av_fetch(postav, i, FALSE);
1147                             if (svp && (elem = *svp)) {
1148                                 sv_catsv(retval, elem);
1149                                 if (i < postlen) {
1150                                     sv_catpvn(retval, ";", 1);
1151                                     sv_catsv(retval, sep);
1152                                     sv_catsv(retval, pad);
1153                                 }
1154                             }
1155                         }
1156                         sv_catpvn(retval, ";", 1);
1157                             sv_catsv(retval, sep);
1158                     }
1159                     sv_setpvn(valstr, "", 0);
1160                     if (gimme == G_ARRAY) {
1161                         XPUSHs(sv_2mortal(retval));
1162                         if (i < imax)   /* not the last time thro ? */
1163                             retval = newSVpvn("",0);
1164                     }
1165                 }
1166                 SvREFCNT_dec(postav);
1167                 SvREFCNT_dec(valstr);
1168             }
1169             else
1170                 croak("Call to new() method failed to return HASH ref");
1171             if (gimme == G_SCALAR)
1172                 XPUSHs(sv_2mortal(retval));
1173         }