Integrate mainline
[p5sagit/p5-mst-13.2.git] / ext / Data / Dumper / Dumper.xs
index b9fb54b..20e4af8 100644 (file)
@@ -29,7 +29,7 @@ static I32 DD_dump (pTHX_ SV *val, char *name, STRLEN namelen, SV *retval,
                    SV *pad, SV *xpad, SV *apad, SV *sep,
                    SV *freezer, SV *toaster,
                    I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
-                   I32 maxdepth);
+                   I32 maxdepth, SV *sortkeys);
 
 /* does a string need to be protected? */
 static I32
@@ -100,45 +100,79 @@ esc_q(register char *d, register char *s, register STRLEN slen)
 static I32
 esc_q_utf8(pTHX_ SV* sv, register char *src, register STRLEN slen)
 {
-    char *s, *send, *r;
-    STRLEN grow = 0, j = 1, l;
-    bool dquote = FALSE;
+    char *s, *send, *r, *rstart;
+    STRLEN j, cur = SvCUR(sv);
+    /* Could count 128-255 and 256+ in two variables, if we want to
+       be like &qquote and make a distinction.  */
+    STRLEN grow = 0;   /* bytes needed to represent chars 128+ */
+    /* STRLEN topbit_grow = 0; bytes needed to represent chars 128-255 */
+    STRLEN backslashes = 0;
+    STRLEN single_quotes = 0;
+    STRLEN qq_escapables = 0;  /* " $ @ will need a \ in "" strings.  */
+    STRLEN normal = 0;
 
     /* this will need EBCDICification */
     for (s = src, send = src + slen; s < send; s += UTF8SKIP(s)) {
-        UV k = utf8_to_uvchr((U8*)s, &l);
+        UV k = utf8_to_uvchr((U8*)s, NULL);
 
-       grow +=
-         (*s == '"' || *s == '\\') ? 2 :
-         (k < 0x80 ? 1 : UNISKIP(k) + 1 + 4); /* 4: \x{} */
+        if (k > 127) {
+            /* 4: \x{} then count the number of hex digits.  */
+            grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
+#if UVSIZE == 4
+                8 /* We may allocate a bit more than the minimum here.  */
+#else
+                k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
+#endif
+                );
+        } else if (k == '\\') {
+            backslashes++;
+        } else if (k == '\'') {
+            single_quotes++;
+        } else if (k == '"' || k == '$' || k == '@') {
+            qq_escapables++;
+        } else {
+            normal++;
+        }
     }
-    sv_grow(sv, SvCUR(sv)+3+grow); /* 3: ""\0 */
-    r = SvPVX(sv) + SvCUR(sv);
+    if (grow) {
+        /* We have something needing hex. 3 is ""\0 */
+        sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
+               + 2*qq_escapables + normal);
+        rstart = r = SvPVX(sv) + cur;
 
-    for (s = src; s < send; s += UTF8SKIP(s)) {
-        UV k = utf8_to_uvchr((U8*)s, &l);
+        *r++ = '"';
 
-       if (*s == '"' || *s == '\\') {
-           r[j++] = '\\';
-           r[j++] = *s;
-       }
-       else if (k < 0x80)
-           r[j++] = k;
-       else {
-           r[j++] = '\\';
-           r[j++] = 'x';
-           r[j++] = '{';
-           j += sprintf(r + j, "%"UVxf, k);
-           r[j++] = '}';
-           dquote = TRUE;
-       }
+        for (s = src; s < send; s += UTF8SKIP(s)) {
+            UV k = utf8_to_uvchr((U8*)s, NULL);
+
+            if (k == '"' || k == '\\' || k == '$' || k == '@') {
+                *r++ = '\\';
+                *r++ = k;
+            }
+            else if (k < 0x80)
+                *r++ = k;
+            else {
+                r += sprintf(r, "\\x{%"UVxf"}", k);
+            }
+        }
+        *r++ = '"';
+    } else {
+        /* Single quotes.  */
+        sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
+               + qq_escapables + normal);
+        rstart = r = SvPVX(sv) + cur;
+        *r++ = '\'';
+        for (s = src; s < send; s ++) {
+            char k = *s;
+            if (k == '\'' || k == '\\')
+                *r++ = '\\';
+            *r++ = k;
+        }
+        *r++ = '\'';
     }
-    if (dquote)
-      r[0] = r[j++] = '"';
-    else
-      r[0] = r[j++] = '\'';
-    r[j++] = '\0';
-    SvCUR_set(sv, SvCUR(sv) + j);
+    *r = '\0';
+    j = r - rstart;
+    SvCUR_set(sv, cur + j);
 
     return j;
 }
@@ -179,7 +213,7 @@ static I32
 DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
        AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
        SV *apad, SV *sep, SV *freezer, SV *toaster, I32 purity,
-       I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth)
+       I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
 {
     char tmpbuf[128];
     U32 i;
@@ -354,7 +388,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
                        postav, levelp, indent, pad, xpad, apad, sep,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth);
+                       maxdepth, sortkeys);
                sv_catpvn(retval, ")}", 2);
            }                                                /* plain */
            else {
@@ -362,7 +396,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
                        postav, levelp, indent, pad, xpad, apad, sep,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth);
+                       maxdepth, sortkeys);
            }
            SvREFCNT_dec(namesv);
        }
@@ -374,7 +408,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
                    postav, levelp,     indent, pad, xpad, apad, sep,
                    freezer, toaster, purity, deepcopy, quotekeys, bless,
-                   maxdepth);
+                   maxdepth, sortkeys);
            SvREFCNT_dec(namesv);
        }
        else if (realtype == SVt_PVAV) {
@@ -443,7 +477,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
                        levelp, indent, pad, xpad, apad, sep,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth);
+                       maxdepth, sortkeys);
                if (ix < ixmax)
                    sv_catpvn(retval, ",", 1);
            }
@@ -468,6 +502,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            char *key;
            I32 klen;
            SV *hval;
+           AV *keys = Nullav;
        
            iname = newSVpvn(name, namelen);
            if (name[0] == '%') {
@@ -497,20 +532,73 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            sv_catsv(totpad, pad);
            sv_catsv(totpad, apad);
        
-           (void)hv_iterinit((HV*)ival);
-           i = 0;
-           while ((entry = hv_iternext((HV*)ival)))  {
+           /* If requested, get a sorted/filtered array of hash keys */
+           if (sortkeys) {
+               if (sortkeys == &PL_sv_yes) {
+                   keys = newAV();
+                   (void)hv_iterinit((HV*)ival);
+                   while ((entry = hv_iternext((HV*)ival))) {
+                       sv = hv_iterkeysv(entry);
+                       SvREFCNT_inc(sv);
+                       av_push(keys, sv);
+                   }
+#ifdef USE_LOCALE_NUMERIC
+                   sortsv(AvARRAY(keys), 
+                          av_len(keys)+1, 
+                          IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
+#else
+                   sortsv(AvARRAY(keys), 
+                          av_len(keys)+1, 
+                          Perl_sv_cmp);
+#endif
+               }
+               else {
+                   dSP; ENTER; SAVETMPS; PUSHMARK(sp);
+                   XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
+                   i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
+                   SPAGAIN;
+                   if (i) {
+                       sv = POPs;
+                       if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
+                           keys = (AV*)SvREFCNT_inc(SvRV(sv));
+                   }
+                   if (! keys)
+                       warn("Sortkeys subroutine did not return ARRAYREF\n");
+                   PUTBACK; FREETMPS; LEAVE;
+               }
+               if (keys)
+                   sv_2mortal((SV*)keys);
+           }
+           else
+               (void)hv_iterinit((HV*)ival);
+
+            /* foreach (keys %hash) */
+            for (i = 0; 1; i++) {
                char *nkey = NULL;
                I32 nticks = 0;
                SV* keysv;
                STRLEN keylen;
                bool do_utf8 = FALSE;
-               
+
+                if ((sortkeys && !(keys && i <= av_len(keys))) ||
+                    !(entry = hv_iternext((HV *)ival)))
+                    break;
+
                if (i)
                    sv_catpvn(retval, ",", 1);
-               i++;
-               keysv = hv_iterkeysv(entry);
-               hval  = hv_iterval((HV*)ival, entry);
+
+               if (sortkeys) {
+                   char *key;
+                   svp = av_fetch(keys, i, FALSE);
+                   keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
+                   key = SvPV(keysv, keylen);
+                   svp = hv_fetch((HV*)ival, key, keylen, 0);
+                   hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
+               }
+               else {
+                   keysv = hv_iterkeysv(entry);
+                   hval = hv_iterval((HV*)ival, entry);
+               }
 
                do_utf8 = DO_UTF8(keysv);
                key = SvPV(keysv, keylen);
@@ -571,7 +659,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                DD_dump(aTHX_ hval, SvPVX(sname), SvCUR(sname), retval, seenhv,
                        postav, levelp, indent, pad, xpad, newapad, sep,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth);
+                       maxdepth, sortkeys);
                SvREFCNT_dec(sname);
                Safefree(nkey);
                if (indent >= 2)
@@ -654,7 +742,26 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            else
              (void) sprintf(tmpbuf, "%"IVdf, SvIV(val));
             len = strlen(tmpbuf);
-           sv_catpvn(retval, tmpbuf, len);
+            /* For 5.6.x and earlier will need to change this test to check
+               NV if NOK, as there NOK trumps IOK, and NV=3.5,IV=3 is valid.
+               Current code will Dump that as $VAR1 = 3;
+               Changes in 5.7 series mean that now IOK is only set if scalar
+               is precisely integer.  */
+            if (SvPOK(val)) {
+              /* Need to check to see if this is a string such as " 0".
+                 I'm assuming from sprintf isn't going to clash with utf8.
+                 Is this valid on EBCDIC?  */
+              STRLEN pvlen;
+              const char *pv = SvPV(val, pvlen);
+              if (pvlen != len || memNE(pv, tmpbuf, len))
+                goto integer_came_from_string;
+            }
+            if (len > 10) {
+              /* Looks like we're on a 64 bit system.  Make it a string so that
+                 if a 32 bit system reads the number it will cope better.  */
+              sv_catpvf(retval, "'%s'", tmpbuf);
+            } else
+              sv_catpvn(retval, tmpbuf, len);
        }
        else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
            c = SvPV(val, i);
@@ -713,7 +820,8 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                        DD_dump(aTHX_ e, SvPVX(nname), SvCUR(nname), postentry,
                                seenhv, postav, &nlevel, indent, pad, xpad,
                                newapad, sep, freezer, toaster, purity,
-                               deepcopy, quotekeys, bless, maxdepth);
+                               deepcopy, quotekeys, bless, maxdepth, 
+                               sortkeys);
                        SvREFCNT_dec(e);
                    }
                }
@@ -726,6 +834,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            sv_catpvn(retval, "undef", 5);
        }
        else {
+        integer_came_from_string:
            c = SvPV(val, i);
            if (DO_UTF8(val))
                i += esc_q_utf8(aTHX_ retval, c, i);
@@ -776,7 +885,7 @@ Data_Dumper_Dumpxs(href, ...)
            I32 indent, terse, i, imax, postlen;
            SV **svp;
            SV *val, *name, *pad, *xpad, *apad, *sep, *varname;
-           SV *freezer, *toaster, *bless;
+           SV *freezer, *toaster, *bless, *sortkeys;
            I32 purity, deepcopy, quotekeys, maxdepth = 0;
            char tmpbuf[1024];
            I32 gimme = GIMME;
@@ -858,6 +967,17 @@ Data_Dumper_Dumpxs(href, ...)
                    bless = *svp;
                if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
                    maxdepth = SvIV(*svp);
+               if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
+                   sortkeys = *svp;
+                   if (! SvTRUE(sortkeys))
+                       sortkeys = NULL;
+                   else if (! (SvROK(sortkeys) &&
+                               SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
+                   {
+                       /* flag to use qsortsv() for sorting hash keys */       
+                       sortkeys = &PL_sv_yes; 
+                   }
+               }
                postav = newAV();
 
                if (todumpav)
@@ -923,7 +1043,7 @@ Data_Dumper_Dumpxs(href, ...)
                    DD_dump(aTHX_ val, SvPVX(name), SvCUR(name), valstr, seenhv,
                            postav, &level, indent, pad, xpad, newapad, sep,
                            freezer, toaster, purity, deepcopy, quotekeys,
-                           bless, maxdepth);
+                           bless, maxdepth, sortkeys);
                
                    if (indent >= 2)
                        SvREFCNT_dec(newapad);