s/Nullav/NULL/g
[p5sagit/p5-mst-13.2.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5
6 static I32 num_q (char *s, STRLEN slen);
7 static I32 esc_q (char *dest, char *src, STRLEN slen);
8 static I32 esc_q_utf8 (pTHX_ SV *sv, char *src, STRLEN slen);
9 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
10 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
11                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
12                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
13                     SV *freezer, SV *toaster,
14                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
15                     I32 maxdepth, SV *sortkeys);
16
17 #ifndef HvNAME_get
18 #define HvNAME_get HvNAME
19 #endif
20
21 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
22
23 # ifdef EBCDIC
24 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
25 # else
26 #  define UNI_TO_NATIVE(ch) (ch)
27 # endif
28
29 UV
30 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
31 {
32     UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
33                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
34     return UNI_TO_NATIVE(uv);
35 }
36
37 # if !defined(PERL_IMPLICIT_CONTEXT)
38 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
39 # else
40 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
41 # endif
42
43 #endif /* PERL_VERSION <= 6 */
44
45 /* Changes in 5.7 series mean that now IOK is only set if scalar is
46    precisely integer but in 5.6 and earlier we need to do a more
47    complex test  */
48 #if PERL_VERSION <= 6
49 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
50 #else
51 #define DD_is_integer(sv) SvIOK(sv)
52 #endif
53
54 /* does a string need to be protected? */
55 static I32
56 needs_quote(register char *s)
57 {
58 TOP:
59     if (s[0] == ':') {
60         if (*++s) {
61             if (*s++ != ':')
62                 return 1;
63         }
64         else
65             return 1;
66     }
67     if (isIDFIRST(*s)) {
68         while (*++s)
69             if (!isALNUM(*s)) {
70                 if (*s == ':')
71                     goto TOP;
72                 else
73                     return 1;
74             }
75     }
76     else
77         return 1;
78     return 0;
79 }
80
81 /* count the number of "'"s and "\"s in string */
82 static I32
83 num_q(register char *s, register STRLEN slen)
84 {
85     register I32 ret = 0;
86
87     while (slen > 0) {
88         if (*s == '\'' || *s == '\\')
89             ++ret;
90         ++s;
91         --slen;
92     }
93     return ret;
94 }
95
96
97 /* returns number of chars added to escape "'"s and "\"s in s */
98 /* slen number of characters in s will be escaped */
99 /* destination must be long enough for additional chars */
100 static I32
101 esc_q(register char *d, register char *s, register STRLEN slen)
102 {
103     register I32 ret = 0;
104
105     while (slen > 0) {
106         switch (*s) {
107         case '\'':
108         case '\\':
109             *d = '\\';
110             ++d; ++ret;
111         default:
112             *d = *s;
113             ++d; ++s; --slen;
114             break;
115         }
116     }
117     return ret;
118 }
119
120 static I32
121 esc_q_utf8(pTHX_ SV* sv, register char *src, register STRLEN slen)
122 {
123     char *s, *send, *r, *rstart;
124     STRLEN j, cur = SvCUR(sv);
125     /* Could count 128-255 and 256+ in two variables, if we want to
126        be like &qquote and make a distinction.  */
127     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
128     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
129     STRLEN backslashes = 0;
130     STRLEN single_quotes = 0;
131     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
132     STRLEN normal = 0;
133
134     /* this will need EBCDICification */
135     for (s = src, send = src + slen; s < send; s += UTF8SKIP(s)) {
136         UV k = utf8_to_uvchr((U8*)s, NULL);
137
138         if (k > 127) {
139             /* 4: \x{} then count the number of hex digits.  */
140             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
141 #if UVSIZE == 4
142                 8 /* We may allocate a bit more than the minimum here.  */
143 #else
144                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
145 #endif
146                 );
147         } else if (k == '\\') {
148             backslashes++;
149         } else if (k == '\'') {
150             single_quotes++;
151         } else if (k == '"' || k == '$' || k == '@') {
152             qq_escapables++;
153         } else {
154             normal++;
155         }
156     }
157     if (grow) {
158         /* We have something needing hex. 3 is ""\0 */
159         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
160                 + 2*qq_escapables + normal);
161         rstart = r = SvPVX(sv) + cur;
162
163         *r++ = '"';
164
165         for (s = src; s < send; s += UTF8SKIP(s)) {
166             UV k = utf8_to_uvchr((U8*)s, NULL);
167
168             if (k == '"' || k == '\\' || k == '$' || k == '@') {
169                 *r++ = '\\';
170                 *r++ = (char)k;
171             }
172             else if (k < 0x80)
173                 *r++ = (char)k;
174             else {
175               /* The return value of sprintf() is unportable.
176                * In modern systems it returns (int) the number of characters,
177                * but in older systems it might return (char*) the original
178                * buffer, or it might even be (void).  The easiest portable
179                * thing to do is probably use sprintf() in void context and
180                * then strlen(buffer) for the length.  The more proper way
181                * would of course be to figure out the prototype of sprintf.
182                * --jhi */
183                 sprintf(r, "\\x{%"UVxf"}", k);
184                 r += strlen(r);
185             }
186         }
187         *r++ = '"';
188     } else {
189         /* Single quotes.  */
190         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
191                 + qq_escapables + normal);
192         rstart = r = SvPVX(sv) + cur;
193         *r++ = '\'';
194         for (s = src; s < send; s ++) {
195             char k = *s;
196             if (k == '\'' || k == '\\')
197                 *r++ = '\\';
198             *r++ = k;
199         }
200         *r++ = '\'';
201     }
202     *r = '\0';
203     j = r - rstart;
204     SvCUR_set(sv, cur + j);
205
206     return j;
207 }
208
209 /* append a repeated string to an SV */
210 static SV *
211 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
212 {
213     if (sv == Nullsv)
214         sv = newSVpvn("", 0);
215 #ifdef DEBUGGING
216     else
217         assert(SvTYPE(sv) >= SVt_PV);
218 #endif
219
220     if (n > 0) {
221         SvGROW(sv, len*n + SvCUR(sv) + 1);
222         if (len == 1) {
223             char *start = SvPVX(sv) + SvCUR(sv);
224             SvCUR_set(sv, SvCUR(sv) + n);
225             start[n] = '\0';
226             while (n > 0)
227                 start[--n] = str[0];
228         }
229         else
230             while (n > 0) {
231                 sv_catpvn(sv, str, len);
232                 --n;
233             }
234     }
235     return sv;
236 }
237
238 /*
239  * This ought to be split into smaller functions. (it is one long function since
240  * it exactly parallels the perl version, which was one long thing for
241  * efficiency raisins.)  Ugggh!
242  */
243 static I32
244 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
245         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
246         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
247         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
248 {
249     char tmpbuf[128];
250     U32 i;
251     char *c, *r, *realpack, id[128];
252     SV **svp;
253     SV *sv, *ipad, *ival;
254     SV *blesspad = Nullsv;
255     AV *seenentry = NULL;
256     char *iname;
257     STRLEN inamelen, idlen = 0;
258     U32 realtype;
259
260     if (!val)
261         return 0;
262
263     realtype = SvTYPE(val);
264
265     if (SvGMAGICAL(val))
266         mg_get(val);
267     if (SvROK(val)) {
268
269         /* If a freeze method is provided and the object has it, call
270            it.  Warn on errors. */
271         if (SvOBJECT(SvRV(val)) && freezer &&
272             SvPOK(freezer) && SvCUR(freezer) &&
273             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
274                          SvCUR(freezer), -1) != NULL)
275         {
276             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
277             XPUSHs(val); PUTBACK;
278             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
279             SPAGAIN;
280             if (SvTRUE(ERRSV))
281                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
282             PUTBACK; FREETMPS; LEAVE;
283         }
284         
285         ival = SvRV(val);
286         realtype = SvTYPE(ival);
287         (void) sprintf(id, "0x%"UVxf, PTR2UV(ival));
288         idlen = strlen(id);
289         if (SvOBJECT(ival))
290             realpack = HvNAME_get(SvSTASH(ival));
291         else
292             realpack = Nullch;
293
294         /* if it has a name, we need to either look it up, or keep a tab
295          * on it so we know when we hit it later
296          */
297         if (namelen) {
298             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
299                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
300             {
301                 SV *othername;
302                 if ((svp = av_fetch(seenentry, 0, FALSE))
303                     && (othername = *svp))
304                 {
305                     if (purity && *levelp > 0) {
306                         SV *postentry;
307                         
308                         if (realtype == SVt_PVHV)
309                             sv_catpvn(retval, "{}", 2);
310                         else if (realtype == SVt_PVAV)
311                             sv_catpvn(retval, "[]", 2);
312                         else
313                             sv_catpvn(retval, "do{my $o}", 9);
314                         postentry = newSVpvn(name, namelen);
315                         sv_catpvn(postentry, " = ", 3);
316                         sv_catsv(postentry, othername);
317                         av_push(postav, postentry);
318                     }
319                     else {
320                         if (name[0] == '@' || name[0] == '%') {
321                             if ((SvPVX_const(othername))[0] == '\\' &&
322                                 (SvPVX_const(othername))[1] == name[0]) {
323                                 sv_catpvn(retval, SvPVX_const(othername)+1,
324                                           SvCUR(othername)-1);
325                             }
326                             else {
327                                 sv_catpvn(retval, name, 1);
328                                 sv_catpvn(retval, "{", 1);
329                                 sv_catsv(retval, othername);
330                                 sv_catpvn(retval, "}", 1);
331                             }
332                         }
333                         else
334                             sv_catsv(retval, othername);
335                     }
336                     return 1;
337                 }
338                 else {
339                     warn("ref name not found for %s", id);
340                     return 0;
341                 }
342             }
343             else {   /* store our name and continue */
344                 SV *namesv;
345                 if (name[0] == '@' || name[0] == '%') {
346                     namesv = newSVpvn("\\", 1);
347                     sv_catpvn(namesv, name, namelen);
348                 }
349                 else if (realtype == SVt_PVCV && name[0] == '*') {
350                     namesv = newSVpvn("\\", 2);
351                     sv_catpvn(namesv, name, namelen);
352                     (SvPVX(namesv))[1] = '&';
353                 }
354                 else
355                     namesv = newSVpvn(name, namelen);
356                 seenentry = newAV();
357                 av_push(seenentry, namesv);
358                 (void)SvREFCNT_inc(val);
359                 av_push(seenentry, val);
360                 (void)hv_store(seenhv, id, strlen(id),
361                                newRV_inc((SV*)seenentry), 0);
362                 SvREFCNT_dec(seenentry);
363             }
364         }
365
366         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
367             STRLEN rlen;
368             char *rval = SvPV(val, rlen);
369             char *slash = strchr(rval, '/');
370             sv_catpvn(retval, "qr/", 3);
371             while (slash) {
372                 sv_catpvn(retval, rval, slash-rval);
373                 sv_catpvn(retval, "\\/", 2);
374                 rlen -= slash-rval+1;
375                 rval = slash+1;
376                 slash = strchr(rval, '/');
377             }
378             sv_catpvn(retval, rval, rlen);
379             sv_catpvn(retval, "/", 1);
380             return 1;
381         }
382
383         /* If purity is not set and maxdepth is set, then check depth:
384          * if we have reached maximum depth, return the string
385          * representation of the thing we are currently examining
386          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
387          */
388         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
389             STRLEN vallen;
390             char *valstr = SvPV(val,vallen);
391             sv_catpvn(retval, "'", 1);
392             sv_catpvn(retval, valstr, vallen);
393             sv_catpvn(retval, "'", 1);
394             return 1;
395         }
396
397         if (realpack) {                         /* we have a blessed ref */
398             STRLEN blesslen;
399             char *blessstr = SvPV(bless, blesslen);
400             sv_catpvn(retval, blessstr, blesslen);
401             sv_catpvn(retval, "( ", 2);
402             if (indent >= 2) {
403                 blesspad = apad;
404                 apad = newSVsv(apad);
405                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
406             }
407         }
408
409         (*levelp)++;
410         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
411
412         if (realtype <= SVt_PVBM) {                          /* scalar ref */
413             SV *namesv = newSVpvn("${", 2);
414             sv_catpvn(namesv, name, namelen);
415             sv_catpvn(namesv, "}", 1);
416             if (realpack) {                                  /* blessed */
417                 sv_catpvn(retval, "do{\\(my $o = ", 13);
418                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
419                         postav, levelp, indent, pad, xpad, apad, sep, pair,
420                         freezer, toaster, purity, deepcopy, quotekeys, bless,
421                         maxdepth, sortkeys);
422                 sv_catpvn(retval, ")}", 2);
423             }                                                /* plain */
424             else {
425                 sv_catpvn(retval, "\\", 1);
426                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
427                         postav, levelp, indent, pad, xpad, apad, sep, pair,
428                         freezer, toaster, purity, deepcopy, quotekeys, bless,
429                         maxdepth, sortkeys);
430             }
431             SvREFCNT_dec(namesv);
432         }
433         else if (realtype == SVt_PVGV) {                     /* glob ref */
434             SV *namesv = newSVpvn("*{", 2);
435             sv_catpvn(namesv, name, namelen);
436             sv_catpvn(namesv, "}", 1);
437             sv_catpvn(retval, "\\", 1);
438             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
439                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
440                     freezer, toaster, purity, deepcopy, quotekeys, bless,
441                     maxdepth, sortkeys);
442             SvREFCNT_dec(namesv);
443         }
444         else if (realtype == SVt_PVAV) {
445             SV *totpad;
446             I32 ix = 0;
447             I32 ixmax = av_len((AV *)ival);
448         
449             SV *ixsv = newSViv(0);
450             /* allowing for a 24 char wide array index */
451             New(0, iname, namelen+28, char);
452             (void)strcpy(iname, name);
453             inamelen = namelen;
454             if (name[0] == '@') {
455                 sv_catpvn(retval, "(", 1);
456                 iname[0] = '$';
457             }
458             else {
459                 sv_catpvn(retval, "[", 1);
460                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
461                 /*if (namelen > 0
462                     && name[namelen-1] != ']' && name[namelen-1] != '}'
463                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
464                 if ((namelen > 0
465                      && name[namelen-1] != ']' && name[namelen-1] != '}')
466                     || (namelen > 4
467                         && (name[1] == '{'
468                             || (name[0] == '\\' && name[2] == '{'))))
469                 {
470                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
471                     iname[inamelen] = '\0';
472                 }
473             }
474             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
475                 (instr(iname+inamelen-8, "{SCALAR}") ||
476                  instr(iname+inamelen-7, "{ARRAY}") ||
477                  instr(iname+inamelen-6, "{HASH}"))) {
478                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
479             }
480             iname[inamelen++] = '['; iname[inamelen] = '\0';
481             totpad = newSVsv(sep);
482             sv_catsv(totpad, pad);
483             sv_catsv(totpad, apad);
484
485             for (ix = 0; ix <= ixmax; ++ix) {
486                 STRLEN ilen;
487                 SV *elem;
488                 svp = av_fetch((AV*)ival, ix, FALSE);
489                 if (svp)
490                     elem = *svp;
491                 else
492                     elem = &PL_sv_undef;
493                 
494                 ilen = inamelen;
495                 sv_setiv(ixsv, ix);
496                 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
497                 ilen = strlen(iname);
498                 iname[ilen++] = ']'; iname[ilen] = '\0';
499                 if (indent >= 3) {
500                     sv_catsv(retval, totpad);
501                     sv_catsv(retval, ipad);
502                     sv_catpvn(retval, "#", 1);
503                     sv_catsv(retval, ixsv);
504                 }
505                 sv_catsv(retval, totpad);
506                 sv_catsv(retval, ipad);
507                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
508                         levelp, indent, pad, xpad, apad, sep, pair,
509                         freezer, toaster, purity, deepcopy, quotekeys, bless,
510                         maxdepth, sortkeys);
511                 if (ix < ixmax)
512                     sv_catpvn(retval, ",", 1);
513             }
514             if (ixmax >= 0) {
515                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
516                 sv_catsv(retval, totpad);
517                 sv_catsv(retval, opad);
518                 SvREFCNT_dec(opad);
519             }
520             if (name[0] == '@')
521                 sv_catpvn(retval, ")", 1);
522             else
523                 sv_catpvn(retval, "]", 1);
524             SvREFCNT_dec(ixsv);
525             SvREFCNT_dec(totpad);
526             Safefree(iname);
527         }
528         else if (realtype == SVt_PVHV) {
529             SV *totpad, *newapad;
530             SV *iname, *sname;
531             HE *entry;
532             char *key;
533             I32 klen;
534             SV *hval;
535             AV *keys = NULL;
536         
537             iname = newSVpvn(name, namelen);
538             if (name[0] == '%') {
539                 sv_catpvn(retval, "(", 1);
540                 (SvPVX(iname))[0] = '$';
541             }
542             else {
543                 sv_catpvn(retval, "{", 1);
544                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
545                 if ((namelen > 0
546                      && name[namelen-1] != ']' && name[namelen-1] != '}')
547                     || (namelen > 4
548                         && (name[1] == '{'
549                             || (name[0] == '\\' && name[2] == '{'))))
550                 {
551                     sv_catpvn(iname, "->", 2);
552                 }
553             }
554             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
555                 (instr(name+namelen-8, "{SCALAR}") ||
556                  instr(name+namelen-7, "{ARRAY}") ||
557                  instr(name+namelen-6, "{HASH}"))) {
558                 sv_catpvn(iname, "->", 2);
559             }
560             sv_catpvn(iname, "{", 1);
561             totpad = newSVsv(sep);
562             sv_catsv(totpad, pad);
563             sv_catsv(totpad, apad);
564         
565             /* If requested, get a sorted/filtered array of hash keys */
566             if (sortkeys) {
567                 if (sortkeys == &PL_sv_yes) {
568 #if PERL_VERSION < 8
569                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
570 #else
571                     keys = newAV();
572                     (void)hv_iterinit((HV*)ival);
573                     while ((entry = hv_iternext((HV*)ival))) {
574                         sv = hv_iterkeysv(entry);
575                         SvREFCNT_inc(sv);
576                         av_push(keys, sv);
577                     }
578 # ifdef USE_LOCALE_NUMERIC
579                     sortsv(AvARRAY(keys), 
580                            av_len(keys)+1, 
581                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
582 # else
583                     sortsv(AvARRAY(keys), 
584                            av_len(keys)+1, 
585                            Perl_sv_cmp);
586 # endif
587 #endif
588                 }
589                 if (sortkeys != &PL_sv_yes) {
590                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
591                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
592                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
593                     SPAGAIN;
594                     if (i) {
595                         sv = POPs;
596                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
597                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
598                     }
599                     if (! keys)
600                         warn("Sortkeys subroutine did not return ARRAYREF\n");
601                     PUTBACK; FREETMPS; LEAVE;
602                 }
603                 if (keys)
604                     sv_2mortal((SV*)keys);
605             }
606             else
607                 (void)hv_iterinit((HV*)ival);
608
609             /* foreach (keys %hash) */
610             for (i = 0; 1; i++) {
611                 char *nkey;
612                 char *nkey_buffer = NULL;
613                 I32 nticks = 0;
614                 SV* keysv;
615                 STRLEN keylen;
616                 I32 nlen;
617                 bool do_utf8 = FALSE;
618
619                if (sortkeys) {
620                    if (!(keys && (I32)i <= av_len(keys))) break;
621                } else {
622                    if (!(entry = hv_iternext((HV *)ival))) break;
623                }
624
625                 if (i)
626                     sv_catpvn(retval, ",", 1);
627
628                 if (sortkeys) {
629                     char *key;
630                     svp = av_fetch(keys, i, FALSE);
631                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
632                     key = SvPV(keysv, keylen);
633                     svp = hv_fetch((HV*)ival, key,
634                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
635                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
636                 }
637                 else {
638                     keysv = hv_iterkeysv(entry);
639                     hval = hv_iterval((HV*)ival, entry);
640                 }
641
642                 do_utf8 = DO_UTF8(keysv);
643                 key = SvPV(keysv, keylen);
644                 klen = keylen;
645
646                 sv_catsv(retval, totpad);
647                 sv_catsv(retval, ipad);
648                 /* old logic was first to check utf8 flag, and if utf8 always
649                    call esc_q_utf8.  This caused test to break under -Mutf8,
650                    because there even strings like 'c' have utf8 flag on.
651                    Hence with quotekeys == 0 the XS code would still '' quote
652                    them based on flags, whereas the perl code would not,
653                    based on regexps.
654                    The perl code is correct.
655                    needs_quote() decides that anything that isn't a valid
656                    perl identifier needs to be quoted, hence only correctly
657                    formed strings with no characters outside [A-Za-z0-9_:]
658                    won't need quoting.  None of those characters are used in
659                    the byte encoding of utf8, so anything with utf8
660                    encoded characters in will need quoting. Hence strings
661                    with utf8 encoded characters in will end up inside do_utf8
662                    just like before, but now strings with utf8 flag set but
663                    only ascii characters will end up in the unquoted section.
664
665                    There should also be less tests for the (probably currently)
666                    more common doesn't need quoting case.
667                    The code is also smaller (22044 vs 22260) because I've been
668                    able to pull the common logic out to both sides.  */
669                 if (quotekeys || needs_quote(key)) {
670                     if (do_utf8) {
671                         STRLEN ocur = SvCUR(retval);
672                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
673                         nkey = SvPVX(retval) + ocur;
674                     }
675                     else {
676                         nticks = num_q(key, klen);
677                         New(0, nkey_buffer, klen+nticks+3, char);
678                         nkey = nkey_buffer;
679                         nkey[0] = '\'';
680                         if (nticks)
681                             klen += esc_q(nkey+1, key, klen);
682                         else
683                             (void)Copy(key, nkey+1, klen, char);
684                         nkey[++klen] = '\'';
685                         nkey[++klen] = '\0';
686                         nlen = klen;
687                         sv_catpvn(retval, nkey, klen);
688                     }
689                 }
690                 else {
691                     nkey = key;
692                     nlen = klen;
693                     sv_catpvn(retval, nkey, klen);
694                 }
695                 sname = newSVsv(iname);
696                 sv_catpvn(sname, nkey, nlen);
697                 sv_catpvn(sname, "}", 1);
698
699                 sv_catsv(retval, pair);
700                 if (indent >= 2) {
701                     char *extra;
702                     I32 elen = 0;
703                     newapad = newSVsv(apad);
704                     New(0, extra, klen+4+1, char);
705                     while (elen < (klen+4))
706                         extra[elen++] = ' ';
707                     extra[elen] = '\0';
708                     sv_catpvn(newapad, extra, elen);
709                     Safefree(extra);
710                 }
711                 else
712                     newapad = apad;
713
714                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
715                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
716                         freezer, toaster, purity, deepcopy, quotekeys, bless,
717                         maxdepth, sortkeys);
718                 SvREFCNT_dec(sname);
719                 Safefree(nkey_buffer);
720                 if (indent >= 2)
721                     SvREFCNT_dec(newapad);
722             }
723             if (i) {
724                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
725                 sv_catsv(retval, totpad);
726                 sv_catsv(retval, opad);
727                 SvREFCNT_dec(opad);
728             }
729             if (name[0] == '%')
730                 sv_catpvn(retval, ")", 1);
731             else
732                 sv_catpvn(retval, "}", 1);
733             SvREFCNT_dec(iname);
734             SvREFCNT_dec(totpad);
735         }
736         else if (realtype == SVt_PVCV) {
737             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
738             if (purity)
739                 warn("Encountered CODE ref, using dummy placeholder");
740         }
741         else {
742             warn("cannot handle ref type %ld", realtype);
743         }
744
745         if (realpack) {  /* free blessed allocs */
746             if (indent >= 2) {
747                 SvREFCNT_dec(apad);
748                 apad = blesspad;
749             }
750             sv_catpvn(retval, ", '", 3);
751             sv_catpvn(retval, realpack, strlen(realpack));
752             sv_catpvn(retval, "' )", 3);
753             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
754                 sv_catpvn(retval, "->", 2);
755                 sv_catsv(retval, toaster);
756                 sv_catpvn(retval, "()", 2);
757             }
758         }
759         SvREFCNT_dec(ipad);
760         (*levelp)--;
761     }
762     else {
763         STRLEN i;
764         
765         if (namelen) {
766             (void) sprintf(id, "0x%"UVxf, PTR2UV(val));
767             if ((svp = hv_fetch(seenhv, id, (idlen = strlen(id)), FALSE)) &&
768                 (sv = *svp) && SvROK(sv) &&
769                 (seenentry = (AV*)SvRV(sv)))
770             {
771                 SV *othername;
772                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
773                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
774                 {
775                     sv_catpvn(retval, "${", 2);
776                     sv_catsv(retval, othername);
777                     sv_catpvn(retval, "}", 1);
778                     return 1;
779                 }
780             }
781             else if (val != &PL_sv_undef) {
782                 SV *namesv;
783                 namesv = newSVpvn("\\", 1);
784                 sv_catpvn(namesv, name, namelen);
785                 seenentry = newAV();
786                 av_push(seenentry, namesv);
787                 av_push(seenentry, newRV_inc(val));
788                 (void)hv_store(seenhv, id, strlen(id), newRV_inc((SV*)seenentry), 0);
789                 SvREFCNT_dec(seenentry);
790             }
791         }
792
793         if (DD_is_integer(val)) {
794             STRLEN len;
795             if (SvIsUV(val))
796               (void) sprintf(tmpbuf, "%"UVuf, SvUV(val));
797             else
798               (void) sprintf(tmpbuf, "%"IVdf, SvIV(val));
799             len = strlen(tmpbuf);
800             if (SvPOK(val)) {
801               /* Need to check to see if this is a string such as " 0".
802                  I'm assuming from sprintf isn't going to clash with utf8.
803                  Is this valid on EBCDIC?  */
804               STRLEN pvlen;
805               const char *pv = SvPV(val, pvlen);
806               if (pvlen != len || memNE(pv, tmpbuf, len))
807                 goto integer_came_from_string;
808             }
809             if (len > 10) {
810               /* Looks like we're on a 64 bit system.  Make it a string so that
811                  if a 32 bit system reads the number it will cope better.  */
812               sv_catpvf(retval, "'%s'", tmpbuf);
813             } else
814               sv_catpvn(retval, tmpbuf, len);
815         }
816         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
817             c = SvPV(val, i);
818             ++c; --i;                   /* just get the name */
819             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
820                 c += 4;
821                 i -= 4;
822             }
823             if (needs_quote(c)) {
824                 sv_grow(retval, SvCUR(retval)+6+2*i);
825                 r = SvPVX(retval)+SvCUR(retval);
826                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
827                 i += esc_q(r+3, c, i);
828                 i += 3;
829                 r[i++] = '\''; r[i++] = '}';
830                 r[i] = '\0';
831             }
832             else {
833                 sv_grow(retval, SvCUR(retval)+i+2);
834                 r = SvPVX(retval)+SvCUR(retval);
835                 r[0] = '*'; strcpy(r+1, c);
836                 i++;
837             }
838             SvCUR_set(retval, SvCUR(retval)+i);
839
840             if (purity) {
841                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
842                 static const STRLEN sizes[] = { 8, 7, 6 };
843                 SV *e;
844                 SV *nname = newSVpvn("", 0);
845                 SV *newapad = newSVpvn("", 0);
846                 GV *gv = (GV*)val;
847                 I32 j;
848                 
849                 for (j=0; j<3; j++) {
850                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
851                     if (!e)
852                         continue;
853                     if (j == 0 && !SvOK(e))
854                         continue;
855
856                     {
857                         I32 nlevel = 0;
858                         SV *postentry = newSVpvn(r,i);
859                         
860                         sv_setsv(nname, postentry);
861                         sv_catpvn(nname, entries[j], sizes[j]);
862                         sv_catpvn(postentry, " = ", 3);
863                         av_push(postav, postentry);
864                         e = newRV_inc(e);
865                         
866                         SvCUR_set(newapad, 0);
867                         if (indent >= 2)
868                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
869                         
870                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
871                                 seenhv, postav, &nlevel, indent, pad, xpad,
872                                 newapad, sep, pair, freezer, toaster, purity,
873                                 deepcopy, quotekeys, bless, maxdepth, 
874                                 sortkeys);
875                         SvREFCNT_dec(e);
876                     }
877                 }
878                 
879                 SvREFCNT_dec(newapad);
880                 SvREFCNT_dec(nname);
881             }
882         }
883         else if (val == &PL_sv_undef || !SvOK(val)) {
884             sv_catpvn(retval, "undef", 5);
885         }
886         else {
887         integer_came_from_string:
888             c = SvPV(val, i);
889             if (DO_UTF8(val))
890                 i += esc_q_utf8(aTHX_ retval, c, i);
891             else {
892                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
893                 r = SvPVX(retval) + SvCUR(retval);
894                 r[0] = '\'';
895                 i += esc_q(r+1, c, i);
896                 ++i;
897                 r[i++] = '\'';
898                 r[i] = '\0';
899                 SvCUR_set(retval, SvCUR(retval)+i);
900             }
901         }
902     }
903
904     if (idlen) {
905         if (deepcopy)
906             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
907         else if (namelen && seenentry) {
908             SV *mark = *av_fetch(seenentry, 2, TRUE);
909             sv_setiv(mark,1);
910         }
911     }
912     return 1;
913 }
914
915
916 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
917
918 #
919 # This is the exact equivalent of Dump.  Well, almost. The things that are
920 # different as of now (due to Laziness):
921 #   * doesnt do double-quotes yet.
922 #
923
924 void
925 Data_Dumper_Dumpxs(href, ...)
926         SV      *href;
927         PROTOTYPE: $;$$
928         PPCODE:
929         {
930             HV *hv;
931             SV *retval, *valstr;
932             HV *seenhv = Nullhv;
933             AV *postav, *todumpav, *namesav;
934             I32 level = 0;
935             I32 indent, terse, i, imax, postlen;
936             SV **svp;
937             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
938             SV *freezer, *toaster, *bless, *sortkeys;
939             I32 purity, deepcopy, quotekeys, maxdepth = 0;
940             char tmpbuf[1024];
941             I32 gimme = GIMME;
942
943             if (!SvROK(href)) {         /* call new to get an object first */
944                 if (items < 2)
945                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
946                 
947                 ENTER;
948                 SAVETMPS;
949                 
950                 PUSHMARK(sp);
951                 XPUSHs(href);
952                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
953                 if (items >= 3)
954                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
955                 PUTBACK;
956                 i = perl_call_method("new", G_SCALAR);
957                 SPAGAIN;
958                 if (i)
959                     href = newSVsv(POPs);
960
961                 PUTBACK;
962                 FREETMPS;
963                 LEAVE;
964                 if (i)
965                     (void)sv_2mortal(href);
966             }
967
968             todumpav = namesav = NULL;
969             seenhv = Nullhv;
970             val = pad = xpad = apad = sep = pair = varname
971                 = freezer = toaster = bless = sortkeys = &PL_sv_undef;
972             name = sv_newmortal();
973             indent = 2;
974             terse = purity = deepcopy = 0;
975             quotekeys = 1;
976         
977             retval = newSVpvn("", 0);
978             if (SvROK(href)
979                 && (hv = (HV*)SvRV((SV*)href))
980                 && SvTYPE(hv) == SVt_PVHV)              {
981
982                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
983                     seenhv = (HV*)SvRV(*svp);
984                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
985                     todumpav = (AV*)SvRV(*svp);
986                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
987                     namesav = (AV*)SvRV(*svp);
988                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
989                     indent = SvIV(*svp);
990                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
991                     purity = SvIV(*svp);
992                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
993                     terse = SvTRUE(*svp);
994 #if 0 /* useqq currently unused */
995                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
996                     useqq = SvTRUE(*svp);
997 #endif
998                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
999                     pad = *svp;
1000                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
1001                     xpad = *svp;
1002                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1003                     apad = *svp;
1004                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1005                     sep = *svp;
1006                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1007                     pair = *svp;
1008                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1009                     varname = *svp;
1010                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1011                     freezer = *svp;
1012                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1013                     toaster = *svp;
1014                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1015                     deepcopy = SvTRUE(*svp);
1016                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1017                     quotekeys = SvTRUE(*svp);
1018                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1019                     bless = *svp;
1020                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1021                     maxdepth = SvIV(*svp);
1022                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1023                     sortkeys = *svp;
1024                     if (! SvTRUE(sortkeys))
1025                         sortkeys = NULL;
1026                     else if (! (SvROK(sortkeys) &&
1027                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1028                     {
1029                         /* flag to use qsortsv() for sorting hash keys */       
1030                         sortkeys = &PL_sv_yes; 
1031                     }
1032                 }
1033                 postav = newAV();
1034
1035                 if (todumpav)
1036                     imax = av_len(todumpav);
1037                 else
1038                     imax = -1;
1039                 valstr = newSVpvn("",0);
1040                 for (i = 0; i <= imax; ++i) {
1041                     SV *newapad;
1042                 
1043                     av_clear(postav);
1044                     if ((svp = av_fetch(todumpav, i, FALSE)))
1045                         val = *svp;
1046                     else
1047                         val = &PL_sv_undef;
1048                     if ((svp = av_fetch(namesav, i, TRUE))) {
1049                         sv_setsv(name, *svp);
1050                         if (SvOK(*svp) && !SvPOK(*svp))
1051                             (void)SvPV_nolen_const(name);
1052                     }
1053                     else
1054                         (void)SvOK_off(name);
1055                 
1056                     if (SvPOK(name)) {
1057                         if ((SvPVX_const(name))[0] == '*') {
1058                             if (SvROK(val)) {
1059                                 switch (SvTYPE(SvRV(val))) {
1060                                 case SVt_PVAV:
1061                                     (SvPVX(name))[0] = '@';
1062                                     break;
1063                                 case SVt_PVHV:
1064                                     (SvPVX(name))[0] = '%';
1065                                     break;
1066                                 case SVt_PVCV:
1067                                     (SvPVX(name))[0] = '*';
1068                                     break;
1069                                 default:
1070                                     (SvPVX(name))[0] = '$';
1071                                     break;
1072                                 }
1073                             }
1074                             else
1075                                 (SvPVX(name))[0] = '$';
1076                         }
1077                         else if ((SvPVX_const(name))[0] != '$')
1078                             sv_insert(name, 0, 0, "$", 1);
1079                     }
1080                     else {
1081                         STRLEN nchars = 0;
1082                         sv_setpvn(name, "$", 1);
1083                         sv_catsv(name, varname);
1084                         (void) sprintf(tmpbuf, "%"IVdf, (IV)(i+1));
1085                         nchars = strlen(tmpbuf);
1086                         sv_catpvn(name, tmpbuf, nchars);
1087                     }
1088                 
1089                     if (indent >= 2) {
1090                         SV *tmpsv = sv_x(aTHX_ Nullsv, " ", 1, SvCUR(name)+3);
1091                         newapad = newSVsv(apad);
1092                         sv_catsv(newapad, tmpsv);
1093                         SvREFCNT_dec(tmpsv);
1094                     }
1095                     else
1096                         newapad = apad;
1097                 
1098                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1099                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1100                             freezer, toaster, purity, deepcopy, quotekeys,
1101                             bless, maxdepth, sortkeys);
1102                 
1103                     if (indent >= 2)
1104                         SvREFCNT_dec(newapad);
1105
1106                     postlen = av_len(postav);
1107                     if (postlen >= 0 || !terse) {
1108                         sv_insert(valstr, 0, 0, " = ", 3);
1109                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1110                         sv_catpvn(valstr, ";", 1);
1111                     }
1112                     sv_catsv(retval, pad);
1113                     sv_catsv(retval, valstr);
1114                     sv_catsv(retval, sep);
1115                     if (postlen >= 0) {
1116                         I32 i;
1117                         sv_catsv(retval, pad);
1118                         for (i = 0; i <= postlen; ++i) {
1119                             SV *elem;
1120                             svp = av_fetch(postav, i, FALSE);
1121                             if (svp && (elem = *svp)) {
1122                                 sv_catsv(retval, elem);
1123                                 if (i < postlen) {
1124                                     sv_catpvn(retval, ";", 1);
1125                                     sv_catsv(retval, sep);
1126                                     sv_catsv(retval, pad);
1127                                 }
1128                             }
1129                         }
1130                         sv_catpvn(retval, ";", 1);
1131                             sv_catsv(retval, sep);
1132                     }
1133                     sv_setpvn(valstr, "", 0);
1134                     if (gimme == G_ARRAY) {
1135                         XPUSHs(sv_2mortal(retval));
1136                         if (i < imax)   /* not the last time thro ? */
1137                             retval = newSVpvn("",0);
1138                     }
1139                 }
1140                 SvREFCNT_dec(postav);
1141                 SvREFCNT_dec(valstr);
1142             }
1143             else
1144                 croak("Call to new() method failed to return HASH ref");
1145             if (gimme == G_SCALAR)
1146                 XPUSHs(sv_2mortal(retval));
1147         }