Data::Dumper consting
[p5sagit/p5-mst-13.2.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5
6 static I32 num_q (const char *s, STRLEN slen);
7 static I32 esc_q (char *dest, const char *src, STRLEN slen);
8 static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
9 static I32 needs_quote(register const char *s);
10 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
11 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
12                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
13                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
14                     SV *freezer, SV *toaster,
15                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
16                     I32 maxdepth, SV *sortkeys);
17
18 #ifndef HvNAME_get
19 #define HvNAME_get HvNAME
20 #endif
21
22 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
23
24 # ifdef EBCDIC
25 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
26 # else
27 #  define UNI_TO_NATIVE(ch) (ch)
28 # endif
29
30 UV
31 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
32 {
33     const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
34                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
35     return UNI_TO_NATIVE(uv);
36 }
37
38 # if !defined(PERL_IMPLICIT_CONTEXT)
39 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
40 # else
41 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
42 # endif
43
44 #endif /* PERL_VERSION <= 6 */
45
46 /* Changes in 5.7 series mean that now IOK is only set if scalar is
47    precisely integer but in 5.6 and earlier we need to do a more
48    complex test  */
49 #if PERL_VERSION <= 6
50 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
51 #else
52 #define DD_is_integer(sv) SvIOK(sv)
53 #endif
54
55 /* does a string need to be protected? */
56 static I32
57 needs_quote(register const char *s)
58 {
59 TOP:
60     if (s[0] == ':') {
61         if (*++s) {
62             if (*s++ != ':')
63                 return 1;
64         }
65         else
66             return 1;
67     }
68     if (isIDFIRST(*s)) {
69         while (*++s)
70             if (!isALNUM(*s)) {
71                 if (*s == ':')
72                     goto TOP;
73                 else
74                     return 1;
75             }
76     }
77     else
78         return 1;
79     return 0;
80 }
81
82 /* count the number of "'"s and "\"s in string */
83 static I32
84 num_q(register const char *s, register STRLEN slen)
85 {
86     register I32 ret = 0;
87
88     while (slen > 0) {
89         if (*s == '\'' || *s == '\\')
90             ++ret;
91         ++s;
92         --slen;
93     }
94     return ret;
95 }
96
97
98 /* returns number of chars added to escape "'"s and "\"s in s */
99 /* slen number of characters in s will be escaped */
100 /* destination must be long enough for additional chars */
101 static I32
102 esc_q(register char *d, register const char *s, register STRLEN slen)
103 {
104     register I32 ret = 0;
105
106     while (slen > 0) {
107         switch (*s) {
108         case '\'':
109         case '\\':
110             *d = '\\';
111             ++d; ++ret;
112         default:
113             *d = *s;
114             ++d; ++s; --slen;
115             break;
116         }
117     }
118     return ret;
119 }
120
121 static I32
122 esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
123 {
124     char *r, *rstart;
125     const char *s = src;
126     const char * const send = src + slen;
127     STRLEN j, cur = SvCUR(sv);
128     /* Could count 128-255 and 256+ in two variables, if we want to
129        be like &qquote and make a distinction.  */
130     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
131     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
132     STRLEN backslashes = 0;
133     STRLEN single_quotes = 0;
134     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
135     STRLEN normal = 0;
136
137     /* this will need EBCDICification */
138     for (s = src; s < send; s += UTF8SKIP(s)) {
139         const UV k = utf8_to_uvchr((U8*)s, NULL);
140
141         if (k > 127) {
142             /* 4: \x{} then count the number of hex digits.  */
143             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
144 #if UVSIZE == 4
145                 8 /* We may allocate a bit more than the minimum here.  */
146 #else
147                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
148 #endif
149                 );
150         } else if (k == '\\') {
151             backslashes++;
152         } else if (k == '\'') {
153             single_quotes++;
154         } else if (k == '"' || k == '$' || k == '@') {
155             qq_escapables++;
156         } else {
157             normal++;
158         }
159     }
160     if (grow) {
161         /* We have something needing hex. 3 is ""\0 */
162         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
163                 + 2*qq_escapables + normal);
164         rstart = r = SvPVX(sv) + cur;
165
166         *r++ = '"';
167
168         for (s = src; s < send; s += UTF8SKIP(s)) {
169             const UV k = utf8_to_uvchr((U8*)s, NULL);
170
171             if (k == '"' || k == '\\' || k == '$' || k == '@') {
172                 *r++ = '\\';
173                 *r++ = (char)k;
174             }
175             else if (k < 0x80)
176                 *r++ = (char)k;
177             else {
178               /* The return value of sprintf() is unportable.
179                * In modern systems it returns (int) the number of characters,
180                * but in older systems it might return (char*) the original
181                * buffer, or it might even be (void).  The easiest portable
182                * thing to do is probably use sprintf() in void context and
183                * then strlen(buffer) for the length.  The more proper way
184                * would of course be to figure out the prototype of sprintf.
185                * --jhi */
186                 sprintf(r, "\\x{%"UVxf"}", k);
187                 r += strlen(r);
188             }
189         }
190         *r++ = '"';
191     } else {
192         /* Single quotes.  */
193         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
194                 + qq_escapables + normal);
195         rstart = r = SvPVX(sv) + cur;
196         *r++ = '\'';
197         for (s = src; s < send; s ++) {
198             const char k = *s;
199             if (k == '\'' || k == '\\')
200                 *r++ = '\\';
201             *r++ = k;
202         }
203         *r++ = '\'';
204     }
205     *r = '\0';
206     j = r - rstart;
207     SvCUR_set(sv, cur + j);
208
209     return j;
210 }
211
212 /* append a repeated string to an SV */
213 static SV *
214 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
215 {
216     if (!sv)
217         sv = newSVpvn("", 0);
218 #ifdef DEBUGGING
219     else
220         assert(SvTYPE(sv) >= SVt_PV);
221 #endif
222
223     if (n > 0) {
224         SvGROW(sv, len*n + SvCUR(sv) + 1);
225         if (len == 1) {
226             char * const start = SvPVX(sv) + SvCUR(sv);
227             SvCUR_set(sv, SvCUR(sv) + n);
228             start[n] = '\0';
229             while (n > 0)
230                 start[--n] = str[0];
231         }
232         else
233             while (n > 0) {
234                 sv_catpvn(sv, str, len);
235                 --n;
236             }
237     }
238     return sv;
239 }
240
241 /*
242  * This ought to be split into smaller functions. (it is one long function since
243  * it exactly parallels the perl version, which was one long thing for
244  * efficiency raisins.)  Ugggh!
245  */
246 static I32
247 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
248         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
249         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
250         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
251 {
252     char tmpbuf[128];
253     U32 i;
254     char *c, *r, *realpack, id[128];
255     SV **svp;
256     SV *sv, *ipad, *ival;
257     SV *blesspad = Nullsv;
258     AV *seenentry = NULL;
259     char *iname;
260     STRLEN inamelen, idlen = 0;
261     U32 realtype;
262
263     if (!val)
264         return 0;
265
266     realtype = SvTYPE(val);
267
268     if (SvGMAGICAL(val))
269         mg_get(val);
270     if (SvROK(val)) {
271
272         /* If a freeze method is provided and the object has it, call
273            it.  Warn on errors. */
274         if (SvOBJECT(SvRV(val)) && freezer &&
275             SvPOK(freezer) && SvCUR(freezer) &&
276             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
277                          SvCUR(freezer), -1) != NULL)
278         {
279             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
280             XPUSHs(val); PUTBACK;
281             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
282             SPAGAIN;
283             if (SvTRUE(ERRSV))
284                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
285             PUTBACK; FREETMPS; LEAVE;
286         }
287         
288         ival = SvRV(val);
289         realtype = SvTYPE(ival);
290         (void) sprintf(id, "0x%"UVxf, PTR2UV(ival));
291         idlen = strlen(id);
292         if (SvOBJECT(ival))
293             realpack = HvNAME_get(SvSTASH(ival));
294         else
295             realpack = Nullch;
296
297         /* if it has a name, we need to either look it up, or keep a tab
298          * on it so we know when we hit it later
299          */
300         if (namelen) {
301             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
302                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
303             {
304                 SV *othername;
305                 if ((svp = av_fetch(seenentry, 0, FALSE))
306                     && (othername = *svp))
307                 {
308                     if (purity && *levelp > 0) {
309                         SV *postentry;
310                         
311                         if (realtype == SVt_PVHV)
312                             sv_catpvn(retval, "{}", 2);
313                         else if (realtype == SVt_PVAV)
314                             sv_catpvn(retval, "[]", 2);
315                         else
316                             sv_catpvn(retval, "do{my $o}", 9);
317                         postentry = newSVpvn(name, namelen);
318                         sv_catpvn(postentry, " = ", 3);
319                         sv_catsv(postentry, othername);
320                         av_push(postav, postentry);
321                     }
322                     else {
323                         if (name[0] == '@' || name[0] == '%') {
324                             if ((SvPVX_const(othername))[0] == '\\' &&
325                                 (SvPVX_const(othername))[1] == name[0]) {
326                                 sv_catpvn(retval, SvPVX_const(othername)+1,
327                                           SvCUR(othername)-1);
328                             }
329                             else {
330                                 sv_catpvn(retval, name, 1);
331                                 sv_catpvn(retval, "{", 1);
332                                 sv_catsv(retval, othername);
333                                 sv_catpvn(retval, "}", 1);
334                             }
335                         }
336                         else
337                             sv_catsv(retval, othername);
338                     }
339                     return 1;
340                 }
341                 else {
342                     warn("ref name not found for %s", id);
343                     return 0;
344                 }
345             }
346             else {   /* store our name and continue */
347                 SV *namesv;
348                 if (name[0] == '@' || name[0] == '%') {
349                     namesv = newSVpvn("\\", 1);
350                     sv_catpvn(namesv, name, namelen);
351                 }
352                 else if (realtype == SVt_PVCV && name[0] == '*') {
353                     namesv = newSVpvn("\\", 2);
354                     sv_catpvn(namesv, name, namelen);
355                     (SvPVX(namesv))[1] = '&';
356                 }
357                 else
358                     namesv = newSVpvn(name, namelen);
359                 seenentry = newAV();
360                 av_push(seenentry, namesv);
361                 (void)SvREFCNT_inc(val);
362                 av_push(seenentry, val);
363                 (void)hv_store(seenhv, id, strlen(id),
364                                newRV_inc((SV*)seenentry), 0);
365                 SvREFCNT_dec(seenentry);
366             }
367         }
368
369         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
370             STRLEN rlen;
371             const char *rval = SvPV(val, rlen);
372             const char *slash = strchr(rval, '/');
373             sv_catpvn(retval, "qr/", 3);
374             while (slash) {
375                 sv_catpvn(retval, rval, slash-rval);
376                 sv_catpvn(retval, "\\/", 2);
377                 rlen -= slash-rval+1;
378                 rval = slash+1;
379                 slash = strchr(rval, '/');
380             }
381             sv_catpvn(retval, rval, rlen);
382             sv_catpvn(retval, "/", 1);
383             return 1;
384         }
385
386         /* If purity is not set and maxdepth is set, then check depth:
387          * if we have reached maximum depth, return the string
388          * representation of the thing we are currently examining
389          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
390          */
391         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
392             STRLEN vallen;
393             const char * const valstr = SvPV(val,vallen);
394             sv_catpvn(retval, "'", 1);
395             sv_catpvn(retval, valstr, vallen);
396             sv_catpvn(retval, "'", 1);
397             return 1;
398         }
399
400         if (realpack) {                         /* we have a blessed ref */
401             STRLEN blesslen;
402             const char * const blessstr = SvPV(bless, blesslen);
403             sv_catpvn(retval, blessstr, blesslen);
404             sv_catpvn(retval, "( ", 2);
405             if (indent >= 2) {
406                 blesspad = apad;
407                 apad = newSVsv(apad);
408                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
409             }
410         }
411
412         (*levelp)++;
413         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
414
415         if (realtype <= SVt_PVBM) {                          /* scalar ref */
416             SV * const namesv = newSVpvn("${", 2);
417             sv_catpvn(namesv, name, namelen);
418             sv_catpvn(namesv, "}", 1);
419             if (realpack) {                                  /* blessed */
420                 sv_catpvn(retval, "do{\\(my $o = ", 13);
421                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
422                         postav, levelp, indent, pad, xpad, apad, sep, pair,
423                         freezer, toaster, purity, deepcopy, quotekeys, bless,
424                         maxdepth, sortkeys);
425                 sv_catpvn(retval, ")}", 2);
426             }                                                /* plain */
427             else {
428                 sv_catpvn(retval, "\\", 1);
429                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
430                         postav, levelp, indent, pad, xpad, apad, sep, pair,
431                         freezer, toaster, purity, deepcopy, quotekeys, bless,
432                         maxdepth, sortkeys);
433             }
434             SvREFCNT_dec(namesv);
435         }
436         else if (realtype == SVt_PVGV) {                     /* glob ref */
437             SV * const namesv = newSVpvn("*{", 2);
438             sv_catpvn(namesv, name, namelen);
439             sv_catpvn(namesv, "}", 1);
440             sv_catpvn(retval, "\\", 1);
441             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
442                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
443                     freezer, toaster, purity, deepcopy, quotekeys, bless,
444                     maxdepth, sortkeys);
445             SvREFCNT_dec(namesv);
446         }
447         else if (realtype == SVt_PVAV) {
448             SV *totpad;
449             I32 ix = 0;
450             const I32 ixmax = av_len((AV *)ival);
451         
452             SV * const ixsv = newSViv(0);
453             /* allowing for a 24 char wide array index */
454             New(0, iname, namelen+28, char);
455             (void)strcpy(iname, name);
456             inamelen = namelen;
457             if (name[0] == '@') {
458                 sv_catpvn(retval, "(", 1);
459                 iname[0] = '$';
460             }
461             else {
462                 sv_catpvn(retval, "[", 1);
463                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
464                 /*if (namelen > 0
465                     && name[namelen-1] != ']' && name[namelen-1] != '}'
466                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
467                 if ((namelen > 0
468                      && name[namelen-1] != ']' && name[namelen-1] != '}')
469                     || (namelen > 4
470                         && (name[1] == '{'
471                             || (name[0] == '\\' && name[2] == '{'))))
472                 {
473                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
474                     iname[inamelen] = '\0';
475                 }
476             }
477             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
478                 (instr(iname+inamelen-8, "{SCALAR}") ||
479                  instr(iname+inamelen-7, "{ARRAY}") ||
480                  instr(iname+inamelen-6, "{HASH}"))) {
481                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
482             }
483             iname[inamelen++] = '['; iname[inamelen] = '\0';
484             totpad = newSVsv(sep);
485             sv_catsv(totpad, pad);
486             sv_catsv(totpad, apad);
487
488             for (ix = 0; ix <= ixmax; ++ix) {
489                 STRLEN ilen;
490                 SV *elem;
491                 svp = av_fetch((AV*)ival, ix, FALSE);
492                 if (svp)
493                     elem = *svp;
494                 else
495                     elem = &PL_sv_undef;
496                 
497                 ilen = inamelen;
498                 sv_setiv(ixsv, ix);
499                 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
500                 ilen = strlen(iname);
501                 iname[ilen++] = ']'; iname[ilen] = '\0';
502                 if (indent >= 3) {
503                     sv_catsv(retval, totpad);
504                     sv_catsv(retval, ipad);
505                     sv_catpvn(retval, "#", 1);
506                     sv_catsv(retval, ixsv);
507                 }
508                 sv_catsv(retval, totpad);
509                 sv_catsv(retval, ipad);
510                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
511                         levelp, indent, pad, xpad, apad, sep, pair,
512                         freezer, toaster, purity, deepcopy, quotekeys, bless,
513                         maxdepth, sortkeys);
514                 if (ix < ixmax)
515                     sv_catpvn(retval, ",", 1);
516             }
517             if (ixmax >= 0) {
518                 SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
519                 sv_catsv(retval, totpad);
520                 sv_catsv(retval, opad);
521                 SvREFCNT_dec(opad);
522             }
523             if (name[0] == '@')
524                 sv_catpvn(retval, ")", 1);
525             else
526                 sv_catpvn(retval, "]", 1);
527             SvREFCNT_dec(ixsv);
528             SvREFCNT_dec(totpad);
529             Safefree(iname);
530         }
531         else if (realtype == SVt_PVHV) {
532             SV *totpad, *newapad;
533             SV *sname;
534             HE *entry;
535             char *key;
536             I32 klen;
537             SV *hval;
538             AV *keys = NULL;
539         
540             SV * const iname = newSVpvn(name, namelen);
541             if (name[0] == '%') {
542                 sv_catpvn(retval, "(", 1);
543                 (SvPVX(iname))[0] = '$';
544             }
545             else {
546                 sv_catpvn(retval, "{", 1);
547                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
548                 if ((namelen > 0
549                      && name[namelen-1] != ']' && name[namelen-1] != '}')
550                     || (namelen > 4
551                         && (name[1] == '{'
552                             || (name[0] == '\\' && name[2] == '{'))))
553                 {
554                     sv_catpvn(iname, "->", 2);
555                 }
556             }
557             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
558                 (instr(name+namelen-8, "{SCALAR}") ||
559                  instr(name+namelen-7, "{ARRAY}") ||
560                  instr(name+namelen-6, "{HASH}"))) {
561                 sv_catpvn(iname, "->", 2);
562             }
563             sv_catpvn(iname, "{", 1);
564             totpad = newSVsv(sep);
565             sv_catsv(totpad, pad);
566             sv_catsv(totpad, apad);
567         
568             /* If requested, get a sorted/filtered array of hash keys */
569             if (sortkeys) {
570                 if (sortkeys == &PL_sv_yes) {
571 #if PERL_VERSION < 8
572                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
573 #else
574                     keys = newAV();
575                     (void)hv_iterinit((HV*)ival);
576                     while ((entry = hv_iternext((HV*)ival))) {
577                         sv = hv_iterkeysv(entry);
578                         SvREFCNT_inc(sv);
579                         av_push(keys, sv);
580                     }
581 # ifdef USE_LOCALE_NUMERIC
582                     sortsv(AvARRAY(keys), 
583                            av_len(keys)+1, 
584                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
585 # else
586                     sortsv(AvARRAY(keys), 
587                            av_len(keys)+1, 
588                            Perl_sv_cmp);
589 # endif
590 #endif
591                 }
592                 if (sortkeys != &PL_sv_yes) {
593                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
594                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
595                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
596                     SPAGAIN;
597                     if (i) {
598                         sv = POPs;
599                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
600                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
601                     }
602                     if (! keys)
603                         warn("Sortkeys subroutine did not return ARRAYREF\n");
604                     PUTBACK; FREETMPS; LEAVE;
605                 }
606                 if (keys)
607                     sv_2mortal((SV*)keys);
608             }
609             else
610                 (void)hv_iterinit((HV*)ival);
611
612             /* foreach (keys %hash) */
613             for (i = 0; 1; i++) {
614                 char *nkey;
615                 char *nkey_buffer = NULL;
616                 I32 nticks = 0;
617                 SV* keysv;
618                 STRLEN keylen;
619                 I32 nlen;
620                 bool do_utf8 = FALSE;
621
622                if (sortkeys) {
623                    if (!(keys && (I32)i <= av_len(keys))) break;
624                } else {
625                    if (!(entry = hv_iternext((HV *)ival))) break;
626                }
627
628                 if (i)
629                     sv_catpvn(retval, ",", 1);
630
631                 if (sortkeys) {
632                     char *key;
633                     svp = av_fetch(keys, i, FALSE);
634                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
635                     key = SvPV(keysv, keylen);
636                     svp = hv_fetch((HV*)ival, key,
637                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
638                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
639                 }
640                 else {
641                     keysv = hv_iterkeysv(entry);
642                     hval = hv_iterval((HV*)ival, entry);
643                 }
644
645                 do_utf8 = DO_UTF8(keysv);
646                 key = SvPV(keysv, keylen);
647                 klen = keylen;
648
649                 sv_catsv(retval, totpad);
650                 sv_catsv(retval, ipad);
651                 /* old logic was first to check utf8 flag, and if utf8 always
652                    call esc_q_utf8.  This caused test to break under -Mutf8,
653                    because there even strings like 'c' have utf8 flag on.
654                    Hence with quotekeys == 0 the XS code would still '' quote
655                    them based on flags, whereas the perl code would not,
656                    based on regexps.
657                    The perl code is correct.
658                    needs_quote() decides that anything that isn't a valid
659                    perl identifier needs to be quoted, hence only correctly
660                    formed strings with no characters outside [A-Za-z0-9_:]
661                    won't need quoting.  None of those characters are used in
662                    the byte encoding of utf8, so anything with utf8
663                    encoded characters in will need quoting. Hence strings
664                    with utf8 encoded characters in will end up inside do_utf8
665                    just like before, but now strings with utf8 flag set but
666                    only ascii characters will end up in the unquoted section.
667
668                    There should also be less tests for the (probably currently)
669                    more common doesn't need quoting case.
670                    The code is also smaller (22044 vs 22260) because I've been
671                    able to pull the common logic out to both sides.  */
672                 if (quotekeys || needs_quote(key)) {
673                     if (do_utf8) {
674                         STRLEN ocur = SvCUR(retval);
675                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
676                         nkey = SvPVX(retval) + ocur;
677                     }
678                     else {
679                         nticks = num_q(key, klen);
680                         New(0, nkey_buffer, klen+nticks+3, char);
681                         nkey = nkey_buffer;
682                         nkey[0] = '\'';
683                         if (nticks)
684                             klen += esc_q(nkey+1, key, klen);
685                         else
686                             (void)Copy(key, nkey+1, klen, char);
687                         nkey[++klen] = '\'';
688                         nkey[++klen] = '\0';
689                         nlen = klen;
690                         sv_catpvn(retval, nkey, klen);
691                     }
692                 }
693                 else {
694                     nkey = key;
695                     nlen = klen;
696                     sv_catpvn(retval, nkey, klen);
697                 }
698                 sname = newSVsv(iname);
699                 sv_catpvn(sname, nkey, nlen);
700                 sv_catpvn(sname, "}", 1);
701
702                 sv_catsv(retval, pair);
703                 if (indent >= 2) {
704                     char *extra;
705                     I32 elen = 0;
706                     newapad = newSVsv(apad);
707                     New(0, extra, klen+4+1, char);
708                     while (elen < (klen+4))
709                         extra[elen++] = ' ';
710                     extra[elen] = '\0';
711                     sv_catpvn(newapad, extra, elen);
712                     Safefree(extra);
713                 }
714                 else
715                     newapad = apad;
716
717                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
718                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
719                         freezer, toaster, purity, deepcopy, quotekeys, bless,
720                         maxdepth, sortkeys);
721                 SvREFCNT_dec(sname);
722                 Safefree(nkey_buffer);
723                 if (indent >= 2)
724                     SvREFCNT_dec(newapad);
725             }
726             if (i) {
727                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
728                 sv_catsv(retval, totpad);
729                 sv_catsv(retval, opad);
730                 SvREFCNT_dec(opad);
731             }
732             if (name[0] == '%')
733                 sv_catpvn(retval, ")", 1);
734             else
735                 sv_catpvn(retval, "}", 1);
736             SvREFCNT_dec(iname);
737             SvREFCNT_dec(totpad);
738         }
739         else if (realtype == SVt_PVCV) {
740             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
741             if (purity)
742                 warn("Encountered CODE ref, using dummy placeholder");
743         }
744         else {
745             warn("cannot handle ref type %ld", realtype);
746         }
747
748         if (realpack) {  /* free blessed allocs */
749             if (indent >= 2) {
750                 SvREFCNT_dec(apad);
751                 apad = blesspad;
752             }
753             sv_catpvn(retval, ", '", 3);
754             sv_catpvn(retval, realpack, strlen(realpack));
755             sv_catpvn(retval, "' )", 3);
756             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
757                 sv_catpvn(retval, "->", 2);
758                 sv_catsv(retval, toaster);
759                 sv_catpvn(retval, "()", 2);
760             }
761         }
762         SvREFCNT_dec(ipad);
763         (*levelp)--;
764     }
765     else {
766         STRLEN i;
767         
768         if (namelen) {
769             (void) sprintf(id, "0x%"UVxf, PTR2UV(val));
770             if ((svp = hv_fetch(seenhv, id, (idlen = strlen(id)), FALSE)) &&
771                 (sv = *svp) && SvROK(sv) &&
772                 (seenentry = (AV*)SvRV(sv)))
773             {
774                 SV *othername;
775                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
776                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
777                 {
778                     sv_catpvn(retval, "${", 2);
779                     sv_catsv(retval, othername);
780                     sv_catpvn(retval, "}", 1);
781                     return 1;
782                 }
783             }
784             else if (val != &PL_sv_undef) {
785                 SV * const namesv = newSVpvn("\\", 1);
786                 sv_catpvn(namesv, name, namelen);
787                 seenentry = newAV();
788                 av_push(seenentry, namesv);
789                 av_push(seenentry, newRV_inc(val));
790                 (void)hv_store(seenhv, id, strlen(id), newRV_inc((SV*)seenentry), 0);
791                 SvREFCNT_dec(seenentry);
792             }
793         }
794
795         if (DD_is_integer(val)) {
796             STRLEN len;
797             if (SvIsUV(val))
798               (void) sprintf(tmpbuf, "%"UVuf, SvUV(val));
799             else
800               (void) sprintf(tmpbuf, "%"IVdf, SvIV(val));
801             len = strlen(tmpbuf);
802             if (SvPOK(val)) {
803               /* Need to check to see if this is a string such as " 0".
804                  I'm assuming from sprintf isn't going to clash with utf8.
805                  Is this valid on EBCDIC?  */
806               STRLEN pvlen;
807               const char * const pv = SvPV(val, pvlen);
808               if (pvlen != len || memNE(pv, tmpbuf, len))
809                 goto integer_came_from_string;
810             }
811             if (len > 10) {
812               /* Looks like we're on a 64 bit system.  Make it a string so that
813                  if a 32 bit system reads the number it will cope better.  */
814               sv_catpvf(retval, "'%s'", tmpbuf);
815             } else
816               sv_catpvn(retval, tmpbuf, len);
817         }
818         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
819             c = SvPV(val, i);
820             ++c; --i;                   /* just get the name */
821             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
822                 c += 4;
823                 i -= 4;
824             }
825             if (needs_quote(c)) {
826                 sv_grow(retval, SvCUR(retval)+6+2*i);
827                 r = SvPVX(retval)+SvCUR(retval);
828                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
829                 i += esc_q(r+3, c, i);
830                 i += 3;
831                 r[i++] = '\''; r[i++] = '}';
832                 r[i] = '\0';
833             }
834             else {
835                 sv_grow(retval, SvCUR(retval)+i+2);
836                 r = SvPVX(retval)+SvCUR(retval);
837                 r[0] = '*'; strcpy(r+1, c);
838                 i++;
839             }
840             SvCUR_set(retval, SvCUR(retval)+i);
841
842             if (purity) {
843                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
844                 static const STRLEN sizes[] = { 8, 7, 6 };
845                 SV *e;
846                 SV * const nname = newSVpvn("", 0);
847                 SV * const newapad = newSVpvn("", 0);
848                 GV * const gv = (GV*)val;
849                 I32 j;
850                 
851                 for (j=0; j<3; j++) {
852                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
853                     if (!e)
854                         continue;
855                     if (j == 0 && !SvOK(e))
856                         continue;
857
858                     {
859                         I32 nlevel = 0;
860                         SV *postentry = newSVpvn(r,i);
861                         
862                         sv_setsv(nname, postentry);
863                         sv_catpvn(nname, entries[j], sizes[j]);
864                         sv_catpvn(postentry, " = ", 3);
865                         av_push(postav, postentry);
866                         e = newRV_inc(e);
867                         
868                         SvCUR_set(newapad, 0);
869                         if (indent >= 2)
870                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
871                         
872                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
873                                 seenhv, postav, &nlevel, indent, pad, xpad,
874                                 newapad, sep, pair, freezer, toaster, purity,
875                                 deepcopy, quotekeys, bless, maxdepth, 
876                                 sortkeys);
877                         SvREFCNT_dec(e);
878                     }
879                 }
880                 
881                 SvREFCNT_dec(newapad);
882                 SvREFCNT_dec(nname);
883             }
884         }
885         else if (val == &PL_sv_undef || !SvOK(val)) {
886             sv_catpvn(retval, "undef", 5);
887         }
888         else {
889         integer_came_from_string:
890             c = SvPV(val, i);
891             if (DO_UTF8(val))
892                 i += esc_q_utf8(aTHX_ retval, c, i);
893             else {
894                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
895                 r = SvPVX(retval) + SvCUR(retval);
896                 r[0] = '\'';
897                 i += esc_q(r+1, c, i);
898                 ++i;
899                 r[i++] = '\'';
900                 r[i] = '\0';
901                 SvCUR_set(retval, SvCUR(retval)+i);
902             }
903         }
904     }
905
906     if (idlen) {
907         if (deepcopy)
908             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
909         else if (namelen && seenentry) {
910             SV *mark = *av_fetch(seenentry, 2, TRUE);
911             sv_setiv(mark,1);
912         }
913     }
914     return 1;
915 }
916
917
918 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
919
920 #
921 # This is the exact equivalent of Dump.  Well, almost. The things that are
922 # different as of now (due to Laziness):
923 #   * doesnt do double-quotes yet.
924 #
925
926 void
927 Data_Dumper_Dumpxs(href, ...)
928         SV      *href;
929         PROTOTYPE: $;$$
930         PPCODE:
931         {
932             HV *hv;
933             SV *retval, *valstr;
934             HV *seenhv = NULL;
935             AV *postav, *todumpav, *namesav;
936             I32 level = 0;
937             I32 indent, terse, i, imax, postlen;
938             SV **svp;
939             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
940             SV *freezer, *toaster, *bless, *sortkeys;
941             I32 purity, deepcopy, quotekeys, maxdepth = 0;
942             char tmpbuf[1024];
943             I32 gimme = GIMME;
944
945             if (!SvROK(href)) {         /* call new to get an object first */
946                 if (items < 2)
947                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
948                 
949                 ENTER;
950                 SAVETMPS;
951                 
952                 PUSHMARK(sp);
953                 XPUSHs(href);
954                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
955                 if (items >= 3)
956                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
957                 PUTBACK;
958                 i = perl_call_method("new", G_SCALAR);
959                 SPAGAIN;
960                 if (i)
961                     href = newSVsv(POPs);
962
963                 PUTBACK;
964                 FREETMPS;
965                 LEAVE;
966                 if (i)
967                     (void)sv_2mortal(href);
968             }
969
970             todumpav = namesav = NULL;
971             seenhv = NULL;
972             val = pad = xpad = apad = sep = pair = varname
973                 = freezer = toaster = bless = sortkeys = &PL_sv_undef;
974             name = sv_newmortal();
975             indent = 2;
976             terse = purity = deepcopy = 0;
977             quotekeys = 1;
978         
979             retval = newSVpvn("", 0);
980             if (SvROK(href)
981                 && (hv = (HV*)SvRV((SV*)href))
982                 && SvTYPE(hv) == SVt_PVHV)              {
983
984                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
985                     seenhv = (HV*)SvRV(*svp);
986                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
987                     todumpav = (AV*)SvRV(*svp);
988                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
989                     namesav = (AV*)SvRV(*svp);
990                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
991                     indent = SvIV(*svp);
992                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
993                     purity = SvIV(*svp);
994                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
995                     terse = SvTRUE(*svp);
996 #if 0 /* useqq currently unused */
997                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
998                     useqq = SvTRUE(*svp);
999 #endif
1000                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
1001                     pad = *svp;
1002                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
1003                     xpad = *svp;
1004                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1005                     apad = *svp;
1006                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1007                     sep = *svp;
1008                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1009                     pair = *svp;
1010                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1011                     varname = *svp;
1012                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1013                     freezer = *svp;
1014                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1015                     toaster = *svp;
1016                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1017                     deepcopy = SvTRUE(*svp);
1018                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1019                     quotekeys = SvTRUE(*svp);
1020                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1021                     bless = *svp;
1022                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1023                     maxdepth = SvIV(*svp);
1024                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1025                     sortkeys = *svp;
1026                     if (! SvTRUE(sortkeys))
1027                         sortkeys = NULL;
1028                     else if (! (SvROK(sortkeys) &&
1029                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1030                     {
1031                         /* flag to use qsortsv() for sorting hash keys */       
1032                         sortkeys = &PL_sv_yes; 
1033                     }
1034                 }
1035                 postav = newAV();
1036
1037                 if (todumpav)
1038                     imax = av_len(todumpav);
1039                 else
1040                     imax = -1;
1041                 valstr = newSVpvn("",0);
1042                 for (i = 0; i <= imax; ++i) {
1043                     SV *newapad;
1044                 
1045                     av_clear(postav);
1046                     if ((svp = av_fetch(todumpav, i, FALSE)))
1047                         val = *svp;
1048                     else
1049                         val = &PL_sv_undef;
1050                     if ((svp = av_fetch(namesav, i, TRUE))) {
1051                         sv_setsv(name, *svp);
1052                         if (SvOK(*svp) && !SvPOK(*svp))
1053                             (void)SvPV_nolen_const(name);
1054                     }
1055                     else
1056                         (void)SvOK_off(name);
1057                 
1058                     if (SvPOK(name)) {
1059                         if ((SvPVX_const(name))[0] == '*') {
1060                             if (SvROK(val)) {
1061                                 switch (SvTYPE(SvRV(val))) {
1062                                 case SVt_PVAV:
1063                                     (SvPVX(name))[0] = '@';
1064                                     break;
1065                                 case SVt_PVHV:
1066                                     (SvPVX(name))[0] = '%';
1067                                     break;
1068                                 case SVt_PVCV:
1069                                     (SvPVX(name))[0] = '*';
1070                                     break;
1071                                 default:
1072                                     (SvPVX(name))[0] = '$';
1073                                     break;
1074                                 }
1075                             }
1076                             else
1077                                 (SvPVX(name))[0] = '$';
1078                         }
1079                         else if ((SvPVX_const(name))[0] != '$')
1080                             sv_insert(name, 0, 0, "$", 1);
1081                     }
1082                     else {
1083                         STRLEN nchars;
1084                         sv_setpvn(name, "$", 1);
1085                         sv_catsv(name, varname);
1086                         (void) sprintf(tmpbuf, "%"IVdf, (IV)(i+1));
1087                         nchars = strlen(tmpbuf);
1088                         sv_catpvn(name, tmpbuf, nchars);
1089                     }
1090                 
1091                     if (indent >= 2) {
1092                         SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
1093                         newapad = newSVsv(apad);
1094                         sv_catsv(newapad, tmpsv);
1095                         SvREFCNT_dec(tmpsv);
1096                     }
1097                     else
1098                         newapad = apad;
1099                 
1100                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1101                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1102                             freezer, toaster, purity, deepcopy, quotekeys,
1103                             bless, maxdepth, sortkeys);
1104                 
1105                     if (indent >= 2)
1106                         SvREFCNT_dec(newapad);
1107
1108                     postlen = av_len(postav);
1109                     if (postlen >= 0 || !terse) {
1110                         sv_insert(valstr, 0, 0, " = ", 3);
1111                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1112                         sv_catpvn(valstr, ";", 1);
1113                     }
1114                     sv_catsv(retval, pad);
1115                     sv_catsv(retval, valstr);
1116                     sv_catsv(retval, sep);
1117                     if (postlen >= 0) {
1118                         I32 i;
1119                         sv_catsv(retval, pad);
1120                         for (i = 0; i <= postlen; ++i) {
1121                             SV *elem;
1122                             svp = av_fetch(postav, i, FALSE);
1123                             if (svp && (elem = *svp)) {
1124                                 sv_catsv(retval, elem);
1125                                 if (i < postlen) {
1126                                     sv_catpvn(retval, ";", 1);
1127                                     sv_catsv(retval, sep);
1128                                     sv_catsv(retval, pad);
1129                                 }
1130                             }
1131                         }
1132                         sv_catpvn(retval, ";", 1);
1133                             sv_catsv(retval, sep);
1134                     }
1135                     sv_setpvn(valstr, "", 0);
1136                     if (gimme == G_ARRAY) {
1137                         XPUSHs(sv_2mortal(retval));
1138                         if (i < imax)   /* not the last time thro ? */
1139                             retval = newSVpvn("",0);
1140                     }
1141                 }
1142                 SvREFCNT_dec(postav);
1143                 SvREFCNT_dec(valstr);
1144             }
1145             else
1146                 croak("Call to new() method failed to return HASH ref");
1147             if (gimme == G_SCALAR)
1148                 XPUSHs(sv_2mortal(retval));
1149         }