Re: [PATCH] Re: Data::Dumper tests with -Mutf8
Nicholas Clark [Sat, 4 May 2002 12:38:58 +0000 (13:38 +0100)]
Message-ID: <20020504113857.GC317@Bagpuss.unfortu.net>

p4raw-id: //depot/perl@16390

ext/Data/Dumper/Dumper.xs
ext/Data/Dumper/t/dumper.t

index 0e12cbf..11a97d2 100644 (file)
@@ -574,10 +574,12 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
 
             /* foreach (keys %hash) */
             for (i = 0; 1; i++) {
-               char *nkey = NULL;
+               char *nkey;
+                char *nkey_buffer = NULL;
                I32 nticks = 0;
                SV* keysv;
                STRLEN keylen;
+                I32 nlen;
                bool do_utf8 = FALSE;
 
                 if ((sortkeys && !(keys && (I32)i <= av_len(keys))) ||
@@ -605,23 +607,39 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                key = SvPV(keysv, keylen);
                klen = keylen;
 
-               if (do_utf8) {
-                   STRLEN ocur;
-                   I32 nlen;
+                sv_catsv(retval, totpad);
+                sv_catsv(retval, ipad);
+                /* old logic was first to check utf8 flag, and if utf8 always
+                   call esc_q_utf8.  This caused test to break under -Mutf8,
+                   because there even strings like 'c' have utf8 flag on.
+                   Hence with quotekeys == 0 the XS code would still '' quote
+                   them based on flags, whereas the perl code would not,
+                   based on regexps.
+                   The perl code is correct.
+                   needs_quote() decides that anything that isn't a valid
+                   perl identifier needs to be quoted, hence only correctly
+                   formed strings with no characters outside [A-Za-z0-9_:]
+                   won't need quoting.  None of those characters are used in
+                   the byte encoding of utf8, so anything with utf8
+                   encoded characters in will need quoting. Hence strings
+                   with utf8 encoded characters in will end up inside do_utf8
+                   just like before, but now strings with utf8 flag set but
+                   only ascii characters will end up in the unquoted section.
 
-                   sv_catsv(retval, totpad);
-                   sv_catsv(retval, ipad);
-                    ocur = SvCUR(retval);
-                   nlen = esc_q_utf8(aTHX_ retval, key, klen);
-
-                   sname = newSVsv(iname);
-                   sv_catpvn(sname, SvPVX(retval) + ocur, nlen);
-                   sv_catpvn(sname, "}", 1);
-               }
-               else {
-                   if (quotekeys || needs_quote(key)) {
+                   There should also be less tests for the (probably currently)
+                   more common doesn't need quoting case.
+                   The code is also smaller (22044 vs 22260) because I've been
+                   able to pull the comon logic out to both sides.  */
+                if (quotekeys || needs_quote(key)) {
+                    if (do_utf8) {
+                        STRLEN ocur = SvCUR(retval);
+                        nlen = esc_q_utf8(aTHX_ retval, key, klen);
+                        nkey = SvPVX(retval) + ocur;
+                    }
+                    else {
                        nticks = num_q(key, klen);
-                       New(0, nkey, klen+nticks+3, char);
+                       New(0, nkey_buffer, klen+nticks+3, char);
+                        nkey = nkey_buffer;
                        nkey[0] = '\'';
                        if (nticks)
                            klen += esc_q(nkey+1, key, klen);
@@ -629,20 +647,19 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                            (void)Copy(key, nkey+1, klen, char);
                        nkey[++klen] = '\'';
                        nkey[++klen] = '\0';
+                        nlen = klen;
+                        sv_catpvn(retval, nkey, klen);
                    }
-                   else {
-                       New(0, nkey, klen, char);
-                       (void)Copy(key, nkey, klen, char);
-                   }
-
-                   sname = newSVsv(iname);
-                   sv_catpvn(sname, nkey, klen);
-                   sv_catpvn(sname, "}", 1);
-
-                   sv_catsv(retval, totpad);
-                   sv_catsv(retval, ipad);
-                   sv_catpvn(retval, nkey, klen);
+                }
+                else {
+                    nkey = key;
+                    nlen = klen;
+                    sv_catpvn(retval, nkey, klen);
                }
+                sname = newSVsv(iname);
+                sv_catpvn(sname, nkey, nlen);
+                sv_catpvn(sname, "}", 1);
+
                sv_catpvn(retval, " => ", 4);
                if (indent >= 2) {
                    char *extra;
@@ -663,7 +680,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
                        maxdepth, sortkeys);
                SvREFCNT_dec(sname);
-               Safefree(nkey);
+               Safefree(nkey_buffer);
                if (indent >= 2)
                    SvREFCNT_dec(newapad);
            }
index 2955a7f..9c7f0a6 100755 (executable)
@@ -67,11 +67,11 @@ sub TEST {
 $Data::Dumper::Useperl = 1;
 if (defined &Data::Dumper::Dumpxs) {
   print "### XS extension loaded, will run XS tests\n";
-  $TMAX = 351; $XS = 1;
+  $TMAX = 357; $XS = 1;
 }
 else {
   print "### XS extensions not loaded, will NOT run XS tests\n";
-  $TMAX = 177; $XS = 0;
+  $TMAX = 180; $XS = 0;
 }
 
 print "1..$TMAX\n";
@@ -1334,3 +1334,22 @@ EOT
     TEST q(Data::Dumper->Dumpxs([\\*ping, \\%ping], ['*ping', '*pong'])) if $XS;
   }
 }
+
+# XS for quotekeys==0 was not being defensive enough against utf8 flagged
+# scalars
+
+{
+  $WANT = <<'EOT';
+#$VAR1 = {
+#  perl => 'rocks'
+#};
+EOT
+  local $Data::Dumper::Quotekeys = 0;
+  my $k = 'perl' . chr 256;
+  chop $k;
+  %foo = ($k => 'rocks');
+
+  TEST q(Data::Dumper->Dump([\\%foo])), "quotekeys == 0 for utf8 flagged ASCII";
+  TEST q(Data::Dumper->Dumpxs([\\%foo])),
+    "XS quotekeys == 0 for utf8 flagged ASCII" if $XS;
+}