Replace some sprintf()'s in Data::Dumper with my_snprintf()'s
[p5sagit/p5-mst-13.2.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5 #include "ppport.h"
6
7 static I32 num_q (const char *s, STRLEN slen);
8 static I32 esc_q (char *dest, const char *src, STRLEN slen);
9 static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
10 static I32 needs_quote(register const char *s);
11 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
12 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
13                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
14                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
15                     SV *freezer, SV *toaster,
16                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
17                     I32 maxdepth, SV *sortkeys);
18
19 #ifndef HvNAME_get
20 #define HvNAME_get HvNAME
21 #endif
22
23 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
24
25 # ifdef EBCDIC
26 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
27 # else
28 #  define UNI_TO_NATIVE(ch) (ch)
29 # endif
30
31 UV
32 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
33 {
34     const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
35                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
36     return UNI_TO_NATIVE(uv);
37 }
38
39 # if !defined(PERL_IMPLICIT_CONTEXT)
40 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
41 # else
42 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
43 # endif
44
45 #endif /* PERL_VERSION <= 6 */
46
47 /* Changes in 5.7 series mean that now IOK is only set if scalar is
48    precisely integer but in 5.6 and earlier we need to do a more
49    complex test  */
50 #if PERL_VERSION <= 6
51 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
52 #else
53 #define DD_is_integer(sv) SvIOK(sv)
54 #endif
55
56 /* does a string need to be protected? */
57 static I32
58 needs_quote(register const char *s)
59 {
60 TOP:
61     if (s[0] == ':') {
62         if (*++s) {
63             if (*s++ != ':')
64                 return 1;
65         }
66         else
67             return 1;
68     }
69     if (isIDFIRST(*s)) {
70         while (*++s)
71             if (!isALNUM(*s)) {
72                 if (*s == ':')
73                     goto TOP;
74                 else
75                     return 1;
76             }
77     }
78     else
79         return 1;
80     return 0;
81 }
82
83 /* count the number of "'"s and "\"s in string */
84 static I32
85 num_q(register const char *s, register STRLEN slen)
86 {
87     register I32 ret = 0;
88
89     while (slen > 0) {
90         if (*s == '\'' || *s == '\\')
91             ++ret;
92         ++s;
93         --slen;
94     }
95     return ret;
96 }
97
98
99 /* returns number of chars added to escape "'"s and "\"s in s */
100 /* slen number of characters in s will be escaped */
101 /* destination must be long enough for additional chars */
102 static I32
103 esc_q(register char *d, register const char *s, register STRLEN slen)
104 {
105     register I32 ret = 0;
106
107     while (slen > 0) {
108         switch (*s) {
109         case '\'':
110         case '\\':
111             *d = '\\';
112             ++d; ++ret;
113         default:
114             *d = *s;
115             ++d; ++s; --slen;
116             break;
117         }
118     }
119     return ret;
120 }
121
122 static I32
123 esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
124 {
125     char *r, *rstart;
126     const char *s = src;
127     const char * const send = src + slen;
128     STRLEN j, cur = SvCUR(sv);
129     /* Could count 128-255 and 256+ in two variables, if we want to
130        be like &qquote and make a distinction.  */
131     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
132     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
133     STRLEN backslashes = 0;
134     STRLEN single_quotes = 0;
135     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
136     STRLEN normal = 0;
137
138     /* this will need EBCDICification */
139     for (s = src; s < send; s += UTF8SKIP(s)) {
140         const UV k = utf8_to_uvchr((U8*)s, NULL);
141
142 #ifdef EBCDIC
143         if (!isprint(k) || k > 256) {
144 #else
145         if (k > 127) {
146 #endif
147             /* 4: \x{} then count the number of hex digits.  */
148             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
149 #if UVSIZE == 4
150                 8 /* We may allocate a bit more than the minimum here.  */
151 #else
152                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
153 #endif
154                 );
155         } else if (k == '\\') {
156             backslashes++;
157         } else if (k == '\'') {
158             single_quotes++;
159         } else if (k == '"' || k == '$' || k == '@') {
160             qq_escapables++;
161         } else {
162             normal++;
163         }
164     }
165     if (grow) {
166         /* We have something needing hex. 3 is ""\0 */
167         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
168                 + 2*qq_escapables + normal);
169         rstart = r = SvPVX(sv) + cur;
170
171         *r++ = '"';
172
173         for (s = src; s < send; s += UTF8SKIP(s)) {
174             const UV k = utf8_to_uvchr((U8*)s, NULL);
175
176             if (k == '"' || k == '\\' || k == '$' || k == '@') {
177                 *r++ = '\\';
178                 *r++ = (char)k;
179             }
180             else
181 #ifdef EBCDIC
182               if (isprint(k) && k < 256)
183 #else
184               if (k < 0x80)
185 #endif
186                 *r++ = (char)k;
187             else {
188               /* The return value of sprintf() is unportable.
189                * In modern systems it returns (int) the number of characters,
190                * but in older systems it might return (char*) the original
191                * buffer, or it might even be (void).  The easiest portable
192                * thing to do is probably use sprintf() in void context and
193                * then strlen(buffer) for the length.  The more proper way
194                * would of course be to figure out the prototype of sprintf.
195                * --jhi */
196                 sprintf(r, "\\x{%"UVxf"}", k);
197                 r += strlen(r);
198             }
199         }
200         *r++ = '"';
201     } else {
202         /* Single quotes.  */
203         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
204                 + qq_escapables + normal);
205         rstart = r = SvPVX(sv) + cur;
206         *r++ = '\'';
207         for (s = src; s < send; s ++) {
208             const char k = *s;
209             if (k == '\'' || k == '\\')
210                 *r++ = '\\';
211             *r++ = k;
212         }
213         *r++ = '\'';
214     }
215     *r = '\0';
216     j = r - rstart;
217     SvCUR_set(sv, cur + j);
218
219     return j;
220 }
221
222 /* append a repeated string to an SV */
223 static SV *
224 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
225 {
226     if (!sv)
227         sv = newSVpvn("", 0);
228 #ifdef DEBUGGING
229     else
230         assert(SvTYPE(sv) >= SVt_PV);
231 #endif
232
233     if (n > 0) {
234         SvGROW(sv, len*n + SvCUR(sv) + 1);
235         if (len == 1) {
236             char * const start = SvPVX(sv) + SvCUR(sv);
237             SvCUR_set(sv, SvCUR(sv) + n);
238             start[n] = '\0';
239             while (n > 0)
240                 start[--n] = str[0];
241         }
242         else
243             while (n > 0) {
244                 sv_catpvn(sv, str, len);
245                 --n;
246             }
247     }
248     return sv;
249 }
250
251 /*
252  * This ought to be split into smaller functions. (it is one long function since
253  * it exactly parallels the perl version, which was one long thing for
254  * efficiency raisins.)  Ugggh!
255  */
256 static I32
257 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
258         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
259         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
260         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
261 {
262     char tmpbuf[128];
263     U32 i;
264     char *c, *r, *realpack, id[128];
265     SV **svp;
266     SV *sv, *ipad, *ival;
267     SV *blesspad = Nullsv;
268     AV *seenentry = NULL;
269     char *iname;
270     STRLEN inamelen, idlen = 0;
271     U32 realtype;
272
273     if (!val)
274         return 0;
275
276     realtype = SvTYPE(val);
277
278     if (SvGMAGICAL(val))
279         mg_get(val);
280     if (SvROK(val)) {
281
282         /* If a freeze method is provided and the object has it, call
283            it.  Warn on errors. */
284         if (SvOBJECT(SvRV(val)) && freezer &&
285             SvPOK(freezer) && SvCUR(freezer) &&
286             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
287                          SvCUR(freezer), -1) != NULL)
288         {
289             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
290             XPUSHs(val); PUTBACK;
291             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
292             SPAGAIN;
293             if (SvTRUE(ERRSV))
294                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
295             PUTBACK; FREETMPS; LEAVE;
296         }
297         
298         ival = SvRV(val);
299         realtype = SvTYPE(ival);
300         (void) my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(ival));
301         idlen = strlen(id);
302         if (SvOBJECT(ival))
303             realpack = HvNAME_get(SvSTASH(ival));
304         else
305             realpack = Nullch;
306
307         /* if it has a name, we need to either look it up, or keep a tab
308          * on it so we know when we hit it later
309          */
310         if (namelen) {
311             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
312                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
313             {
314                 SV *othername;
315                 if ((svp = av_fetch(seenentry, 0, FALSE))
316                     && (othername = *svp))
317                 {
318                     if (purity && *levelp > 0) {
319                         SV *postentry;
320                         
321                         if (realtype == SVt_PVHV)
322                             sv_catpvn(retval, "{}", 2);
323                         else if (realtype == SVt_PVAV)
324                             sv_catpvn(retval, "[]", 2);
325                         else
326                             sv_catpvn(retval, "do{my $o}", 9);
327                         postentry = newSVpvn(name, namelen);
328                         sv_catpvn(postentry, " = ", 3);
329                         sv_catsv(postentry, othername);
330                         av_push(postav, postentry);
331                     }
332                     else {
333                         if (name[0] == '@' || name[0] == '%') {
334                             if ((SvPVX_const(othername))[0] == '\\' &&
335                                 (SvPVX_const(othername))[1] == name[0]) {
336                                 sv_catpvn(retval, SvPVX_const(othername)+1,
337                                           SvCUR(othername)-1);
338                             }
339                             else {
340                                 sv_catpvn(retval, name, 1);
341                                 sv_catpvn(retval, "{", 1);
342                                 sv_catsv(retval, othername);
343                                 sv_catpvn(retval, "}", 1);
344                             }
345                         }
346                         else
347                             sv_catsv(retval, othername);
348                     }
349                     return 1;
350                 }
351                 else {
352                     warn("ref name not found for %s", id);
353                     return 0;
354                 }
355             }
356             else {   /* store our name and continue */
357                 SV *namesv;
358                 if (name[0] == '@' || name[0] == '%') {
359                     namesv = newSVpvn("\\", 1);
360                     sv_catpvn(namesv, name, namelen);
361                 }
362                 else if (realtype == SVt_PVCV && name[0] == '*') {
363                     namesv = newSVpvn("\\", 2);
364                     sv_catpvn(namesv, name, namelen);
365                     (SvPVX(namesv))[1] = '&';
366                 }
367                 else
368                     namesv = newSVpvn(name, namelen);
369                 seenentry = newAV();
370                 av_push(seenentry, namesv);
371                 (void)SvREFCNT_inc(val);
372                 av_push(seenentry, val);
373                 (void)hv_store(seenhv, id, strlen(id),
374                                newRV_inc((SV*)seenentry), 0);
375                 SvREFCNT_dec(seenentry);
376             }
377         }
378
379         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
380             STRLEN rlen;
381             const char *rval = SvPV(val, rlen);
382             const char *slash = strchr(rval, '/');
383             sv_catpvn(retval, "qr/", 3);
384             while (slash) {
385                 sv_catpvn(retval, rval, slash-rval);
386                 sv_catpvn(retval, "\\/", 2);
387                 rlen -= slash-rval+1;
388                 rval = slash+1;
389                 slash = strchr(rval, '/');
390             }
391             sv_catpvn(retval, rval, rlen);
392             sv_catpvn(retval, "/", 1);
393             return 1;
394         }
395
396         /* If purity is not set and maxdepth is set, then check depth:
397          * if we have reached maximum depth, return the string
398          * representation of the thing we are currently examining
399          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
400          */
401         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
402             STRLEN vallen;
403             const char * const valstr = SvPV(val,vallen);
404             sv_catpvn(retval, "'", 1);
405             sv_catpvn(retval, valstr, vallen);
406             sv_catpvn(retval, "'", 1);
407             return 1;
408         }
409
410         if (realpack) {                         /* we have a blessed ref */
411             STRLEN blesslen;
412             const char * const blessstr = SvPV(bless, blesslen);
413             sv_catpvn(retval, blessstr, blesslen);
414             sv_catpvn(retval, "( ", 2);
415             if (indent >= 2) {
416                 blesspad = apad;
417                 apad = newSVsv(apad);
418                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
419             }
420         }
421
422         (*levelp)++;
423         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
424
425         if (realtype <= SVt_PVBM) {                          /* scalar ref */
426             SV * const namesv = newSVpvn("${", 2);
427             sv_catpvn(namesv, name, namelen);
428             sv_catpvn(namesv, "}", 1);
429             if (realpack) {                                  /* blessed */
430                 sv_catpvn(retval, "do{\\(my $o = ", 13);
431                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
432                         postav, levelp, indent, pad, xpad, apad, sep, pair,
433                         freezer, toaster, purity, deepcopy, quotekeys, bless,
434                         maxdepth, sortkeys);
435                 sv_catpvn(retval, ")}", 2);
436             }                                                /* plain */
437             else {
438                 sv_catpvn(retval, "\\", 1);
439                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
440                         postav, levelp, indent, pad, xpad, apad, sep, pair,
441                         freezer, toaster, purity, deepcopy, quotekeys, bless,
442                         maxdepth, sortkeys);
443             }
444             SvREFCNT_dec(namesv);
445         }
446         else if (realtype == SVt_PVGV) {                     /* glob ref */
447             SV * const namesv = newSVpvn("*{", 2);
448             sv_catpvn(namesv, name, namelen);
449             sv_catpvn(namesv, "}", 1);
450             sv_catpvn(retval, "\\", 1);
451             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
452                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
453                     freezer, toaster, purity, deepcopy, quotekeys, bless,
454                     maxdepth, sortkeys);
455             SvREFCNT_dec(namesv);
456         }
457         else if (realtype == SVt_PVAV) {
458             SV *totpad;
459             I32 ix = 0;
460             const I32 ixmax = av_len((AV *)ival);
461         
462             SV * const ixsv = newSViv(0);
463             /* allowing for a 24 char wide array index */
464             New(0, iname, namelen+28, char);
465             (void)strcpy(iname, name);
466             inamelen = namelen;
467             if (name[0] == '@') {
468                 sv_catpvn(retval, "(", 1);
469                 iname[0] = '$';
470             }
471             else {
472                 sv_catpvn(retval, "[", 1);
473                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
474                 /*if (namelen > 0
475                     && name[namelen-1] != ']' && name[namelen-1] != '}'
476                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
477                 if ((namelen > 0
478                      && name[namelen-1] != ']' && name[namelen-1] != '}')
479                     || (namelen > 4
480                         && (name[1] == '{'
481                             || (name[0] == '\\' && name[2] == '{'))))
482                 {
483                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
484                     iname[inamelen] = '\0';
485                 }
486             }
487             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
488                 (instr(iname+inamelen-8, "{SCALAR}") ||
489                  instr(iname+inamelen-7, "{ARRAY}") ||
490                  instr(iname+inamelen-6, "{HASH}"))) {
491                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
492             }
493             iname[inamelen++] = '['; iname[inamelen] = '\0';
494             totpad = newSVsv(sep);
495             sv_catsv(totpad, pad);
496             sv_catsv(totpad, apad);
497
498             for (ix = 0; ix <= ixmax; ++ix) {
499                 STRLEN ilen;
500                 SV *elem;
501                 svp = av_fetch((AV*)ival, ix, FALSE);
502                 if (svp)
503                     elem = *svp;
504                 else
505                     elem = &PL_sv_undef;
506                 
507                 ilen = inamelen;
508                 sv_setiv(ixsv, ix);
509                 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
510                 ilen = strlen(iname);
511                 iname[ilen++] = ']'; iname[ilen] = '\0';
512                 if (indent >= 3) {
513                     sv_catsv(retval, totpad);
514                     sv_catsv(retval, ipad);
515                     sv_catpvn(retval, "#", 1);
516                     sv_catsv(retval, ixsv);
517                 }
518                 sv_catsv(retval, totpad);
519                 sv_catsv(retval, ipad);
520                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
521                         levelp, indent, pad, xpad, apad, sep, pair,
522                         freezer, toaster, purity, deepcopy, quotekeys, bless,
523                         maxdepth, sortkeys);
524                 if (ix < ixmax)
525                     sv_catpvn(retval, ",", 1);
526             }
527             if (ixmax >= 0) {
528                 SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
529                 sv_catsv(retval, totpad);
530                 sv_catsv(retval, opad);
531                 SvREFCNT_dec(opad);
532             }
533             if (name[0] == '@')
534                 sv_catpvn(retval, ")", 1);
535             else
536                 sv_catpvn(retval, "]", 1);
537             SvREFCNT_dec(ixsv);
538             SvREFCNT_dec(totpad);
539             Safefree(iname);
540         }
541         else if (realtype == SVt_PVHV) {
542             SV *totpad, *newapad;
543             SV *sname;
544             HE *entry;
545             char *key;
546             I32 klen;
547             SV *hval;
548             AV *keys = NULL;
549         
550             SV * const iname = newSVpvn(name, namelen);
551             if (name[0] == '%') {
552                 sv_catpvn(retval, "(", 1);
553                 (SvPVX(iname))[0] = '$';
554             }
555             else {
556                 sv_catpvn(retval, "{", 1);
557                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
558                 if ((namelen > 0
559                      && name[namelen-1] != ']' && name[namelen-1] != '}')
560                     || (namelen > 4
561                         && (name[1] == '{'
562                             || (name[0] == '\\' && name[2] == '{'))))
563                 {
564                     sv_catpvn(iname, "->", 2);
565                 }
566             }
567             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
568                 (instr(name+namelen-8, "{SCALAR}") ||
569                  instr(name+namelen-7, "{ARRAY}") ||
570                  instr(name+namelen-6, "{HASH}"))) {
571                 sv_catpvn(iname, "->", 2);
572             }
573             sv_catpvn(iname, "{", 1);
574             totpad = newSVsv(sep);
575             sv_catsv(totpad, pad);
576             sv_catsv(totpad, apad);
577         
578             /* If requested, get a sorted/filtered array of hash keys */
579             if (sortkeys) {
580                 if (sortkeys == &PL_sv_yes) {
581 #if PERL_VERSION < 8
582                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
583 #else
584                     keys = newAV();
585                     (void)hv_iterinit((HV*)ival);
586                     while ((entry = hv_iternext((HV*)ival))) {
587                         sv = hv_iterkeysv(entry);
588                         SvREFCNT_inc(sv);
589                         av_push(keys, sv);
590                     }
591 # ifdef USE_LOCALE_NUMERIC
592                     sortsv(AvARRAY(keys), 
593                            av_len(keys)+1, 
594                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
595 # else
596                     sortsv(AvARRAY(keys), 
597                            av_len(keys)+1, 
598                            Perl_sv_cmp);
599 # endif
600 #endif
601                 }
602                 if (sortkeys != &PL_sv_yes) {
603                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
604                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
605                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
606                     SPAGAIN;
607                     if (i) {
608                         sv = POPs;
609                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
610                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
611                     }
612                     if (! keys)
613                         warn("Sortkeys subroutine did not return ARRAYREF\n");
614                     PUTBACK; FREETMPS; LEAVE;
615                 }
616                 if (keys)
617                     sv_2mortal((SV*)keys);
618             }
619             else
620                 (void)hv_iterinit((HV*)ival);
621
622             /* foreach (keys %hash) */
623             for (i = 0; 1; i++) {
624                 char *nkey;
625                 char *nkey_buffer = NULL;
626                 I32 nticks = 0;
627                 SV* keysv;
628                 STRLEN keylen;
629                 I32 nlen;
630                 bool do_utf8 = FALSE;
631
632                if (sortkeys) {
633                    if (!(keys && (I32)i <= av_len(keys))) break;
634                } else {
635                    if (!(entry = hv_iternext((HV *)ival))) break;
636                }
637
638                 if (i)
639                     sv_catpvn(retval, ",", 1);
640
641                 if (sortkeys) {
642                     char *key;
643                     svp = av_fetch(keys, i, FALSE);
644                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
645                     key = SvPV(keysv, keylen);
646                     svp = hv_fetch((HV*)ival, key,
647                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
648                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
649                 }
650                 else {
651                     keysv = hv_iterkeysv(entry);
652                     hval = hv_iterval((HV*)ival, entry);
653                 }
654
655                 key = SvPV(keysv, keylen);
656                 do_utf8 = DO_UTF8(keysv);
657                 klen = keylen;
658
659                 sv_catsv(retval, totpad);
660                 sv_catsv(retval, ipad);
661                 /* old logic was first to check utf8 flag, and if utf8 always
662                    call esc_q_utf8.  This caused test to break under -Mutf8,
663                    because there even strings like 'c' have utf8 flag on.
664                    Hence with quotekeys == 0 the XS code would still '' quote
665                    them based on flags, whereas the perl code would not,
666                    based on regexps.
667                    The perl code is correct.
668                    needs_quote() decides that anything that isn't a valid
669                    perl identifier needs to be quoted, hence only correctly
670                    formed strings with no characters outside [A-Za-z0-9_:]
671                    won't need quoting.  None of those characters are used in
672                    the byte encoding of utf8, so anything with utf8
673                    encoded characters in will need quoting. Hence strings
674                    with utf8 encoded characters in will end up inside do_utf8
675                    just like before, but now strings with utf8 flag set but
676                    only ascii characters will end up in the unquoted section.
677
678                    There should also be less tests for the (probably currently)
679                    more common doesn't need quoting case.
680                    The code is also smaller (22044 vs 22260) because I've been
681                    able to pull the common logic out to both sides.  */
682                 if (quotekeys || needs_quote(key)) {
683                     if (do_utf8) {
684                         STRLEN ocur = SvCUR(retval);
685                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
686                         nkey = SvPVX(retval) + ocur;
687                     }
688                     else {
689                         nticks = num_q(key, klen);
690                         New(0, nkey_buffer, klen+nticks+3, char);
691                         nkey = nkey_buffer;
692                         nkey[0] = '\'';
693                         if (nticks)
694                             klen += esc_q(nkey+1, key, klen);
695                         else
696                             (void)Copy(key, nkey+1, klen, char);
697                         nkey[++klen] = '\'';
698                         nkey[++klen] = '\0';
699                         nlen = klen;
700                         sv_catpvn(retval, nkey, klen);
701                     }
702                 }
703                 else {
704                     nkey = key;
705                     nlen = klen;
706                     sv_catpvn(retval, nkey, klen);
707                 }
708                 sname = newSVsv(iname);
709                 sv_catpvn(sname, nkey, nlen);
710                 sv_catpvn(sname, "}", 1);
711
712                 sv_catsv(retval, pair);
713                 if (indent >= 2) {
714                     char *extra;
715                     I32 elen = 0;
716                     newapad = newSVsv(apad);
717                     New(0, extra, klen+4+1, char);
718                     while (elen < (klen+4))
719                         extra[elen++] = ' ';
720                     extra[elen] = '\0';
721                     sv_catpvn(newapad, extra, elen);
722                     Safefree(extra);
723                 }
724                 else
725                     newapad = apad;
726
727                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
728                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
729                         freezer, toaster, purity, deepcopy, quotekeys, bless,
730                         maxdepth, sortkeys);
731                 SvREFCNT_dec(sname);
732                 Safefree(nkey_buffer);
733                 if (indent >= 2)
734                     SvREFCNT_dec(newapad);
735             }
736             if (i) {
737                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
738                 sv_catsv(retval, totpad);
739                 sv_catsv(retval, opad);
740                 SvREFCNT_dec(opad);
741             }
742             if (name[0] == '%')
743                 sv_catpvn(retval, ")", 1);
744             else
745                 sv_catpvn(retval, "}", 1);
746             SvREFCNT_dec(iname);
747             SvREFCNT_dec(totpad);
748         }
749         else if (realtype == SVt_PVCV) {
750             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
751             if (purity)
752                 warn("Encountered CODE ref, using dummy placeholder");
753         }
754         else {
755             warn("cannot handle ref type %ld", realtype);
756         }
757
758         if (realpack) {  /* free blessed allocs */
759             if (indent >= 2) {
760                 SvREFCNT_dec(apad);
761                 apad = blesspad;
762             }
763             sv_catpvn(retval, ", '", 3);
764             sv_catpvn(retval, realpack, strlen(realpack));
765             sv_catpvn(retval, "' )", 3);
766             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
767                 sv_catpvn(retval, "->", 2);
768                 sv_catsv(retval, toaster);
769                 sv_catpvn(retval, "()", 2);
770             }
771         }
772         SvREFCNT_dec(ipad);
773         (*levelp)--;
774     }
775     else {
776         STRLEN i;
777         
778         if (namelen) {
779             (void) my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(val));
780             if ((svp = hv_fetch(seenhv, id, (idlen = strlen(id)), FALSE)) &&
781                 (sv = *svp) && SvROK(sv) &&
782                 (seenentry = (AV*)SvRV(sv)))
783             {
784                 SV *othername;
785                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
786                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
787                 {
788                     sv_catpvn(retval, "${", 2);
789                     sv_catsv(retval, othername);
790                     sv_catpvn(retval, "}", 1);
791                     return 1;
792                 }
793             }
794             else if (val != &PL_sv_undef) {
795                 SV * const namesv = newSVpvn("\\", 1);
796                 sv_catpvn(namesv, name, namelen);
797                 seenentry = newAV();
798                 av_push(seenentry, namesv);
799                 av_push(seenentry, newRV_inc(val));
800                 (void)hv_store(seenhv, id, strlen(id), newRV_inc((SV*)seenentry), 0);
801                 SvREFCNT_dec(seenentry);
802             }
803         }
804
805         if (DD_is_integer(val)) {
806             STRLEN len;
807             if (SvIsUV(val))
808               (void) my_snprintf(tmpbuf, sizeof(tmpbuf), "%"UVuf, SvUV(val));
809             else
810               (void) my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
811             len = strlen(tmpbuf);
812             if (SvPOK(val)) {
813               /* Need to check to see if this is a string such as " 0".
814                  I'm assuming from sprintf isn't going to clash with utf8.
815                  Is this valid on EBCDIC?  */
816               STRLEN pvlen;
817               const char * const pv = SvPV(val, pvlen);
818               if (pvlen != len || memNE(pv, tmpbuf, len))
819                 goto integer_came_from_string;
820             }
821             if (len > 10) {
822               /* Looks like we're on a 64 bit system.  Make it a string so that
823                  if a 32 bit system reads the number it will cope better.  */
824               sv_catpvf(retval, "'%s'", tmpbuf);
825             } else
826               sv_catpvn(retval, tmpbuf, len);
827         }
828         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
829             c = SvPV(val, i);
830             ++c; --i;                   /* just get the name */
831             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
832                 c += 4;
833                 i -= 4;
834             }
835             if (needs_quote(c)) {
836                 sv_grow(retval, SvCUR(retval)+6+2*i);
837                 r = SvPVX(retval)+SvCUR(retval);
838                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
839                 i += esc_q(r+3, c, i);
840                 i += 3;
841                 r[i++] = '\''; r[i++] = '}';
842                 r[i] = '\0';
843             }
844             else {
845                 sv_grow(retval, SvCUR(retval)+i+2);
846                 r = SvPVX(retval)+SvCUR(retval);
847                 r[0] = '*'; strcpy(r+1, c);
848                 i++;
849             }
850             SvCUR_set(retval, SvCUR(retval)+i);
851
852             if (purity) {
853                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
854                 static const STRLEN sizes[] = { 8, 7, 6 };
855                 SV *e;
856                 SV * const nname = newSVpvn("", 0);
857                 SV * const newapad = newSVpvn("", 0);
858                 GV * const gv = (GV*)val;
859                 I32 j;
860                 
861                 for (j=0; j<3; j++) {
862                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
863                     if (!e)
864                         continue;
865                     if (j == 0 && !SvOK(e))
866                         continue;
867
868                     {
869                         I32 nlevel = 0;
870                         SV *postentry = newSVpvn(r,i);
871                         
872                         sv_setsv(nname, postentry);
873                         sv_catpvn(nname, entries[j], sizes[j]);
874                         sv_catpvn(postentry, " = ", 3);
875                         av_push(postav, postentry);
876                         e = newRV_inc(e);
877                         
878                         SvCUR_set(newapad, 0);
879                         if (indent >= 2)
880                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
881                         
882                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
883                                 seenhv, postav, &nlevel, indent, pad, xpad,
884                                 newapad, sep, pair, freezer, toaster, purity,
885                                 deepcopy, quotekeys, bless, maxdepth, 
886                                 sortkeys);
887                         SvREFCNT_dec(e);
888                     }
889                 }
890                 
891                 SvREFCNT_dec(newapad);
892                 SvREFCNT_dec(nname);
893             }
894         }
895         else if (val == &PL_sv_undef || !SvOK(val)) {
896             sv_catpvn(retval, "undef", 5);
897         }
898         else {
899         integer_came_from_string:
900             c = SvPV(val, i);
901             if (DO_UTF8(val))
902                 i += esc_q_utf8(aTHX_ retval, c, i);
903             else {
904                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
905                 r = SvPVX(retval) + SvCUR(retval);
906                 r[0] = '\'';
907                 i += esc_q(r+1, c, i);
908                 ++i;
909                 r[i++] = '\'';
910                 r[i] = '\0';
911                 SvCUR_set(retval, SvCUR(retval)+i);
912             }
913         }
914     }
915
916     if (idlen) {
917         if (deepcopy)
918             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
919         else if (namelen && seenentry) {
920             SV *mark = *av_fetch(seenentry, 2, TRUE);
921             sv_setiv(mark,1);
922         }
923     }
924     return 1;
925 }
926
927
928 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
929
930 #
931 # This is the exact equivalent of Dump.  Well, almost. The things that are
932 # different as of now (due to Laziness):
933 #   * doesnt do double-quotes yet.
934 #
935
936 void
937 Data_Dumper_Dumpxs(href, ...)
938         SV      *href;
939         PROTOTYPE: $;$$
940         PPCODE:
941         {
942             HV *hv;
943             SV *retval, *valstr;
944             HV *seenhv = NULL;
945             AV *postav, *todumpav, *namesav;
946             I32 level = 0;
947             I32 indent, terse, i, imax, postlen;
948             SV **svp;
949             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
950             SV *freezer, *toaster, *bless, *sortkeys;
951             I32 purity, deepcopy, quotekeys, maxdepth = 0;
952             char tmpbuf[1024];
953             I32 gimme = GIMME;
954
955             if (!SvROK(href)) {         /* call new to get an object first */
956                 if (items < 2)
957                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
958                 
959                 ENTER;
960                 SAVETMPS;
961                 
962                 PUSHMARK(sp);
963                 XPUSHs(href);
964                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
965                 if (items >= 3)
966                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
967                 PUTBACK;
968                 i = perl_call_method("new", G_SCALAR);
969                 SPAGAIN;
970                 if (i)
971                     href = newSVsv(POPs);
972
973                 PUTBACK;
974                 FREETMPS;
975                 LEAVE;
976                 if (i)
977                     (void)sv_2mortal(href);
978             }
979
980             todumpav = namesav = NULL;
981             seenhv = NULL;
982             val = pad = xpad = apad = sep = pair = varname
983                 = freezer = toaster = bless = sortkeys = &PL_sv_undef;
984             name = sv_newmortal();
985             indent = 2;
986             terse = purity = deepcopy = 0;
987             quotekeys = 1;
988         
989             retval = newSVpvn("", 0);
990             if (SvROK(href)
991                 && (hv = (HV*)SvRV((SV*)href))
992                 && SvTYPE(hv) == SVt_PVHV)              {
993
994                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
995                     seenhv = (HV*)SvRV(*svp);
996                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
997                     todumpav = (AV*)SvRV(*svp);
998                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
999                     namesav = (AV*)SvRV(*svp);
1000                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
1001                     indent = SvIV(*svp);
1002                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
1003                     purity = SvIV(*svp);
1004                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
1005                     terse = SvTRUE(*svp);
1006 #if 0 /* useqq currently unused */
1007                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
1008                     useqq = SvTRUE(*svp);
1009 #endif
1010                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
1011                     pad = *svp;
1012                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
1013                     xpad = *svp;
1014                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1015                     apad = *svp;
1016                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1017                     sep = *svp;
1018                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1019                     pair = *svp;
1020                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1021                     varname = *svp;
1022                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1023                     freezer = *svp;
1024                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1025                     toaster = *svp;
1026                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1027                     deepcopy = SvTRUE(*svp);
1028                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1029                     quotekeys = SvTRUE(*svp);
1030                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1031                     bless = *svp;
1032                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1033                     maxdepth = SvIV(*svp);
1034                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1035                     sortkeys = *svp;
1036                     if (! SvTRUE(sortkeys))
1037                         sortkeys = NULL;
1038                     else if (! (SvROK(sortkeys) &&
1039                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1040                     {
1041                         /* flag to use qsortsv() for sorting hash keys */       
1042                         sortkeys = &PL_sv_yes; 
1043                     }
1044                 }
1045                 postav = newAV();
1046
1047                 if (todumpav)
1048                     imax = av_len(todumpav);
1049                 else
1050                     imax = -1;
1051                 valstr = newSVpvn("",0);
1052                 for (i = 0; i <= imax; ++i) {
1053                     SV *newapad;
1054                 
1055                     av_clear(postav);
1056                     if ((svp = av_fetch(todumpav, i, FALSE)))
1057                         val = *svp;
1058                     else
1059                         val = &PL_sv_undef;
1060                     if ((svp = av_fetch(namesav, i, TRUE))) {
1061                         sv_setsv(name, *svp);
1062                         if (SvOK(*svp) && !SvPOK(*svp))
1063                             (void)SvPV_nolen_const(name);
1064                     }
1065                     else
1066                         (void)SvOK_off(name);
1067                 
1068                     if (SvPOK(name)) {
1069                         if ((SvPVX_const(name))[0] == '*') {
1070                             if (SvROK(val)) {
1071                                 switch (SvTYPE(SvRV(val))) {
1072                                 case SVt_PVAV:
1073                                     (SvPVX(name))[0] = '@';
1074                                     break;
1075                                 case SVt_PVHV:
1076                                     (SvPVX(name))[0] = '%';
1077                                     break;
1078                                 case SVt_PVCV:
1079                                     (SvPVX(name))[0] = '*';
1080                                     break;
1081                                 default:
1082                                     (SvPVX(name))[0] = '$';
1083                                     break;
1084                                 }
1085                             }
1086                             else
1087                                 (SvPVX(name))[0] = '$';
1088                         }
1089                         else if ((SvPVX_const(name))[0] != '$')
1090                             sv_insert(name, 0, 0, "$", 1);
1091                     }
1092                     else {
1093                         STRLEN nchars;
1094                         sv_setpvn(name, "$", 1);
1095                         sv_catsv(name, varname);
1096                         (void) my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, (IV)(i+1));
1097                         nchars = strlen(tmpbuf);
1098                         sv_catpvn(name, tmpbuf, nchars);
1099                     }
1100                 
1101                     if (indent >= 2) {
1102                         SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
1103                         newapad = newSVsv(apad);
1104                         sv_catsv(newapad, tmpsv);
1105                         SvREFCNT_dec(tmpsv);
1106                     }
1107                     else
1108                         newapad = apad;
1109                 
1110                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1111                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1112                             freezer, toaster, purity, deepcopy, quotekeys,
1113                             bless, maxdepth, sortkeys);
1114                 
1115                     if (indent >= 2)
1116                         SvREFCNT_dec(newapad);
1117
1118                     postlen = av_len(postav);
1119                     if (postlen >= 0 || !terse) {
1120                         sv_insert(valstr, 0, 0, " = ", 3);
1121                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1122                         sv_catpvn(valstr, ";", 1);
1123                     }
1124                     sv_catsv(retval, pad);
1125                     sv_catsv(retval, valstr);
1126                     sv_catsv(retval, sep);
1127                     if (postlen >= 0) {
1128                         I32 i;
1129                         sv_catsv(retval, pad);
1130                         for (i = 0; i <= postlen; ++i) {
1131                             SV *elem;
1132                             svp = av_fetch(postav, i, FALSE);
1133                             if (svp && (elem = *svp)) {
1134                                 sv_catsv(retval, elem);
1135                                 if (i < postlen) {
1136                                     sv_catpvn(retval, ";", 1);
1137                                     sv_catsv(retval, sep);
1138                                     sv_catsv(retval, pad);
1139                                 }
1140                             }
1141                         }
1142                         sv_catpvn(retval, ";", 1);
1143                             sv_catsv(retval, sep);
1144                     }
1145                     sv_setpvn(valstr, "", 0);
1146                     if (gimme == G_ARRAY) {
1147                         XPUSHs(sv_2mortal(retval));
1148                         if (i < imax)   /* not the last time thro ? */
1149                             retval = newSVpvn("",0);
1150                     }
1151                 }
1152                 SvREFCNT_dec(postav);
1153                 SvREFCNT_dec(valstr);
1154             }
1155             else
1156                 croak("Call to new() method failed to return HASH ref");
1157             if (gimme == G_SCALAR)
1158                 XPUSHs(sv_2mortal(retval));
1159         }