Integrate mainline + lib/open.t patch from Chromatic
[p5sagit/p5-mst-13.2.git] / ext / Data / Dumper / Dumper.xs
index d3cf292..d0eb917 100644 (file)
 
 static I32 num_q (char *s, STRLEN slen);
 static I32 esc_q (char *dest, char *src, STRLEN slen);
+static I32 esc_q_utf8 (pTHX_ SV *sv, char *src, STRLEN slen);
 static SV *sv_x (pTHX_ SV *sv, char *str, STRLEN len, I32 n);
 static I32 DD_dump (pTHX_ SV *val, char *name, STRLEN namelen, SV *retval,
                    HV *seenhv, AV *postav, I32 *levelp, I32 indent,
                    SV *pad, SV *xpad, SV *apad, SV *sep,
                    SV *freezer, SV *toaster,
                    I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
-                   I32 maxdepth);
+                   I32 maxdepth, SV *sortkeys);
 
 /* does a string need to be protected? */
 static I32
@@ -52,7 +53,7 @@ TOP:
                    return 1;
            }
     }
-    else 
+    else
        return 1;
     return 0;
 }
@@ -80,7 +81,7 @@ static I32
 esc_q(register char *d, register char *s, register STRLEN slen)
 {
     register I32 ret = 0;
-    
+
     while (slen > 0) {
        switch (*s) {
        case '\'':
@@ -96,6 +97,52 @@ esc_q(register char *d, register char *s, register STRLEN slen)
     return ret;
 }
 
+static I32
+esc_q_utf8(pTHX_ SV* sv, register char *src, register STRLEN slen)
+{
+    char *s, *send, *r;
+    STRLEN grow = 0, j = 1, l;
+    bool dquote = FALSE;
+
+    /* this will need EBCDICification */
+    for (s = src, send = src + slen; s < send; s += UTF8SKIP(s)) {
+        UV k = utf8_to_uvchr((U8*)s, &l);
+
+       grow +=
+         (*s == '"' || *s == '\\') ? 2 :
+         (k < 0x80 ? 1 : UNISKIP(k) + 1 + 4); /* 4: \x{} */
+    }
+    sv_grow(sv, SvCUR(sv)+3+grow); /* 3: ""\0 */
+    r = SvPVX(sv) + SvCUR(sv);
+
+    for (s = src; s < send; s += UTF8SKIP(s)) {
+        UV k = utf8_to_uvchr((U8*)s, &l);
+
+       if (*s == '"' || *s == '\\') {
+           r[j++] = '\\';
+           r[j++] = *s;
+       }
+       else if (k < 0x80)
+           r[j++] = k;
+       else {
+           r[j++] = '\\';
+           r[j++] = 'x';
+           r[j++] = '{';
+           j += sprintf(r + j, "%"UVxf, k);
+           r[j++] = '}';
+           dquote = TRUE;
+       }
+    }
+    if (dquote)
+      r[0] = r[j++] = '"';
+    else
+      r[0] = r[j++] = '\'';
+    r[j] = '\0';
+    SvCUR_set(sv, SvCUR(sv) + j);
+
+    return j;
+}
+
 /* append a repeated string to an SV */
 static SV *
 sv_x(pTHX_ SV *sv, register char *str, STRLEN len, I32 n)
@@ -132,7 +179,7 @@ static I32
 DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
        AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
        SV *apad, SV *sep, SV *freezer, SV *toaster, I32 purity,
-       I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth)
+       I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
 {
     char tmpbuf[128];
     U32 i;
@@ -143,15 +190,13 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
     AV *seenentry = Nullav;
     char *iname;
     STRLEN inamelen, idlen = 0;
-    U32 flags;
     U32 realtype;
 
     if (!val)
        return 0;
 
-    flags = SvFLAGS(val);
     realtype = SvTYPE(val);
-    
+
     if (SvGMAGICAL(val))
         mg_get(val);
     if (SvROK(val)) {
@@ -174,7 +219,6 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
        }
        
        ival = SvRV(val);
-       flags = SvFLAGS(ival);
        realtype = SvTYPE(ival);
         (void) sprintf(id, "0x%lx", (unsigned long)ival);
        idlen = strlen(id);
@@ -275,7 +319,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
        /* If purity is not set and maxdepth is set, then check depth:
         * if we have reached maximum depth, return the string
         * representation of the thing we are currently examining
-        * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)'). 
+        * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
         */
        if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
            STRLEN vallen;
@@ -305,12 +349,12 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            SV *namesv = newSVpvn("${", 2);
            sv_catpvn(namesv, name, namelen);
            sv_catpvn(namesv, "}", 1);
-           if (realpack) {                                  /* blessed */ 
+           if (realpack) {                                  /* blessed */
                sv_catpvn(retval, "do{\\(my $o = ", 13);
                DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
                        postav, levelp, indent, pad, xpad, apad, sep,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth);
+                       maxdepth, sortkeys);
                sv_catpvn(retval, ")}", 2);
            }                                                /* plain */
            else {
@@ -318,7 +362,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
                        postav, levelp, indent, pad, xpad, apad, sep,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth);
+                       maxdepth, sortkeys);
            }
            SvREFCNT_dec(namesv);
        }
@@ -330,14 +374,14 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
                    postav, levelp,     indent, pad, xpad, apad, sep,
                    freezer, toaster, purity, deepcopy, quotekeys, bless,
-                   maxdepth);
+                   maxdepth, sortkeys);
            SvREFCNT_dec(namesv);
        }
        else if (realtype == SVt_PVAV) {
            SV *totpad;
            I32 ix = 0;
            I32 ixmax = av_len((AV *)ival);
-           
+       
            SV *ixsv = newSViv(0);
            /* allowing for a 24 char wide array index */
            New(0, iname, namelen+28, char);
@@ -399,7 +443,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
                        levelp, indent, pad, xpad, apad, sep,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth);
+                       maxdepth, sortkeys);
                if (ix < ixmax)
                    sv_catpvn(retval, ",", 1);
            }
@@ -424,7 +468,8 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            char *key;
            I32 klen;
            SV *hval;
-           
+           AV *keys = Nullav;
+       
            iname = newSVpvn(name, namelen);
            if (name[0] == '%') {
                sv_catpvn(retval, "(", 1);
@@ -452,42 +497,108 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            totpad = newSVsv(sep);
            sv_catsv(totpad, pad);
            sv_catsv(totpad, apad);
-           
-           (void)hv_iterinit((HV*)ival);
+       
+           /* If requested, get a sorted/filtered array of hash keys */
+           if (sortkeys) {
+               if (sortkeys == &PL_sv_yes) {
+                   keys = newAV();
+                   (void)hv_iterinit((HV*)ival);
+                   while (entry = hv_iternext((HV*)ival)) {
+                       sv = hv_iterkeysv(entry);
+                       SvREFCNT_inc(sv);
+                       av_push(keys, sv);
+                   }
+                   sortsv(AvARRAY(keys), 
+                          av_len(keys)+1, 
+                          Perl_sv_cmp_locale);
+               }
+               else {
+                   dSP; ENTER; SAVETMPS; PUSHMARK(sp);
+                   XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
+                   i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
+                   SPAGAIN;
+                   if (i) {
+                       sv = POPs;
+                       if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
+                           keys = (AV*)SvREFCNT_inc(SvRV(sv));
+                   }
+                   if (! keys)
+                       warn("Sortkeys subroutine did not return ARRAYREF\n");
+                   PUTBACK; FREETMPS; LEAVE;
+               }
+               if (keys)
+                   sv_2mortal((SV*)keys);
+           }
+           else
+               (void)hv_iterinit((HV*)ival);
            i = 0;
-           while ((entry = hv_iternext((HV*)ival)))  {
-               char *nkey;
+           while (sortkeys ? (void*)(keys && (i <= av_len(keys))) : 
+                             (void*)((entry = hv_iternext((HV*)ival))) )                   {
+               char *nkey = NULL;
                I32 nticks = 0;
+               SV* keysv;
+               STRLEN keylen;
+               bool do_utf8 = FALSE;
                
                if (i)
                    sv_catpvn(retval, ",", 1);
-               i++;
-               key = hv_iterkey(entry, &klen);
-               hval = hv_iterval((HV*)ival, entry);
-
-               if (quotekeys || needs_quote(key)) {
-                   nticks = num_q(key, klen);
-                   New(0, nkey, klen+nticks+3, char);
-                   nkey[0] = '\'';
-                   if (nticks)
-                       klen += esc_q(nkey+1, key, klen);
-                   else
-                       (void)Copy(key, nkey+1, klen, char);
-                   nkey[++klen] = '\'';
-                   nkey[++klen] = '\0';
+
+               if (sortkeys) {
+                   char *key;
+                   svp = av_fetch(keys, i, FALSE);
+                   keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
+                   key = SvPV(keysv, keylen);
+                   svp = hv_fetch((HV*)ival, key, keylen, 0);
+                   hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
                }
                else {
-                   New(0, nkey, klen, char);
-                   (void)Copy(key, nkey, klen, char);
+                   keysv = hv_iterkeysv(entry);
+                   hval = hv_iterval((HV*)ival, entry);
                }
-               
-               sname = newSVsv(iname);
-               sv_catpvn(sname, nkey, klen);
-               sv_catpvn(sname, "}", 1);
 
-               sv_catsv(retval, totpad);
-               sv_catsv(retval, ipad);
-               sv_catpvn(retval, nkey, klen);
+               i++;
+
+               do_utf8 = DO_UTF8(keysv);
+               key = SvPV(keysv, keylen);
+               klen = keylen;
+
+               if (do_utf8) {
+                   char *okey = SvPVX(retval) + SvCUR(retval);
+                   I32 nlen;
+
+                   sv_catsv(retval, totpad);
+                   sv_catsv(retval, ipad);
+                   nlen = esc_q_utf8(aTHX_ retval, key, klen);
+
+                   sname = newSVsv(iname);
+                   sv_catpvn(sname, okey, nlen);
+                   sv_catpvn(sname, "}", 1);
+               }
+               else {
+                   if (quotekeys || needs_quote(key)) {
+                       nticks = num_q(key, klen);
+                       New(0, nkey, klen+nticks+3, char);
+                       nkey[0] = '\'';
+                       if (nticks)
+                           klen += esc_q(nkey+1, key, klen);
+                       else
+                           (void)Copy(key, nkey+1, klen, char);
+                       nkey[++klen] = '\'';
+                       nkey[++klen] = '\0';
+                   }
+                   else {
+                       New(0, nkey, klen, char);
+                       (void)Copy(key, nkey, klen, char);
+                   }
+
+                   sname = newSVsv(iname);
+                   sv_catpvn(sname, nkey, klen);
+                   sv_catpvn(sname, "}", 1);
+
+                   sv_catsv(retval, totpad);
+                   sv_catsv(retval, ipad);
+                   sv_catpvn(retval, nkey, klen);
+               }
                sv_catpvn(retval, " => ", 4);
                if (indent >= 2) {
                    char *extra;
@@ -506,7 +617,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                DD_dump(aTHX_ hval, SvPVX(sname), SvCUR(sname), retval, seenhv,
                        postav, levelp, indent, pad, xpad, newapad, sep,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth);
+                       maxdepth, sortkeys);
                SvREFCNT_dec(sname);
                Safefree(nkey);
                if (indent >= 2)
@@ -648,7 +759,8 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                        DD_dump(aTHX_ e, SvPVX(nname), SvCUR(nname), postentry,
                                seenhv, postav, &nlevel, indent, pad, xpad,
                                newapad, sep, freezer, toaster, purity,
-                               deepcopy, quotekeys, bless, maxdepth);
+                               deepcopy, quotekeys, bless, maxdepth, 
+                               sortkeys);
                        SvREFCNT_dec(e);
                    }
                }
@@ -662,14 +774,18 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
        }
        else {
            c = SvPV(val, i);
-           sv_grow(retval, SvCUR(retval)+3+2*i);
-           r = SvPVX(retval)+SvCUR(retval);
-           r[0] = '\'';
-           i += esc_q(r+1, c, i);
-           ++i;
-           r[i++] = '\'';
-           r[i] = '\0';
-           SvCUR_set(retval, SvCUR(retval)+i);
+           if (DO_UTF8(val))
+               i += esc_q_utf8(aTHX_ retval, c, i);
+           else {
+               sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
+               r = SvPVX(retval) + SvCUR(retval);
+               r[0] = '\'';
+               i += esc_q(r+1, c, i);
+               ++i;
+               r[i++] = '\'';
+               r[i] = '\0';
+               SvCUR_set(retval, SvCUR(retval)+i);
+           }
        }
     }
 
@@ -704,10 +820,10 @@ Data_Dumper_Dumpxs(href, ...)
            HV *seenhv = Nullhv;
            AV *postav, *todumpav, *namesav;
            I32 level = 0;
-           I32 indent, terse, useqq, i, imax, postlen;
+           I32 indent, terse, i, imax, postlen;
            SV **svp;
-           SV *val, *name, *pad, *xpad, *apad, *sep, *tmp, *varname;
-           SV *freezer, *toaster, *bless;
+           SV *val, *name, *pad, *xpad, *apad, *sep, *varname;
+           SV *freezer, *toaster, *bless, *sortkeys;
            I32 purity, deepcopy, quotekeys, maxdepth = 0;
            char tmpbuf[1024];
            I32 gimme = GIMME;
@@ -739,13 +855,13 @@ Data_Dumper_Dumpxs(href, ...)
 
            todumpav = namesav = Nullav;
            seenhv = Nullhv;
-           val = pad = xpad = apad = sep = tmp = varname
+           val = pad = xpad = apad = sep = varname
                = freezer = toaster = bless = &PL_sv_undef;
            name = sv_newmortal();
            indent = 2;
-           terse = useqq = purity = deepcopy = 0;
+           terse = purity = deepcopy = 0;
            quotekeys = 1;
-           
+       
            retval = newSVpvn("", 0);
            if (SvROK(href)
                && (hv = (HV*)SvRV((SV*)href))
@@ -763,8 +879,10 @@ Data_Dumper_Dumpxs(href, ...)
                    purity = SvIV(*svp);
                if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
                    terse = SvTRUE(*svp);
+#if 0 /* useqq currently unused */
                if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
                    useqq = SvTRUE(*svp);
+#endif
                if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
                    pad = *svp;
                if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
@@ -787,6 +905,17 @@ Data_Dumper_Dumpxs(href, ...)
                    bless = *svp;
                if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
                    maxdepth = SvIV(*svp);
+               if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
+                   sortkeys = *svp;
+                   if (! SvTRUE(sortkeys))
+                       sortkeys = NULL;
+                   else if (! (SvROK(sortkeys) &&
+                               SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
+                   {
+                       /* flag to use qsortsv() for sorting hash keys */       
+                       sortkeys = &PL_sv_yes; 
+                   }
+               }
                postav = newAV();
 
                if (todumpav)
@@ -796,7 +925,7 @@ Data_Dumper_Dumpxs(href, ...)
                valstr = newSVpvn("",0);
                for (i = 0; i <= imax; ++i) {
                    SV *newapad;
-                   
+               
                    av_clear(postav);
                    if ((svp = av_fetch(todumpav, i, FALSE)))
                        val = *svp;
@@ -805,8 +934,8 @@ Data_Dumper_Dumpxs(href, ...)
                    if ((svp = av_fetch(namesav, i, TRUE)))
                        sv_setsv(name, *svp);
                    else
-                       SvOK_off(name);
-                   
+                       (void)SvOK_off(name);
+               
                    if (SvOK(name)) {
                        if ((SvPVX(name))[0] == '*') {
                            if (SvROK(val)) {
@@ -839,7 +968,7 @@ Data_Dumper_Dumpxs(href, ...)
                        nchars = strlen(tmpbuf);
                        sv_catpvn(name, tmpbuf, nchars);
                    }
-                   
+               
                    if (indent >= 2) {
                        SV *tmpsv = sv_x(aTHX_ Nullsv, " ", 1, SvCUR(name)+3);
                        newapad = newSVsv(apad);
@@ -848,12 +977,12 @@ Data_Dumper_Dumpxs(href, ...)
                    }
                    else
                        newapad = apad;
-                   
+               
                    DD_dump(aTHX_ val, SvPVX(name), SvCUR(name), valstr, seenhv,
                            postav, &level, indent, pad, xpad, newapad, sep,
                            freezer, toaster, purity, deepcopy, quotekeys,
-                           bless, maxdepth);
-                   
+                           bless, maxdepth, sortkeys);
+               
                    if (indent >= 2)
                        SvREFCNT_dec(newapad);