Storable should cope if the string to thaw happens to be utf8 encoded
Nicholas Clark [Thu, 17 Jun 2004 12:03:48 +0000 (12:03 +0000)]
And anything with bytes >=256 is corrupt

p4raw-id: //depot/perl@22943

ext/Storable/ChangeLog
ext/Storable/README
ext/Storable/Storable.pm
ext/Storable/Storable.xs
ext/Storable/t/utf8.t

index 6359932..4745df4 100644 (file)
@@ -1,3 +1,12 @@
+Thu Jun 17 12:26:43 BST 2004   Nicholas Clark <nick@ccl4.org>
+
+    Version 2.13
+
+        1. Don't change the type of top level overloaded references to RV -
+          they are perfectly correct as PVMG
+       2. Storable needs to cope with incoming frozen data that happens to be
+          utf8 encoded.
+
 Wed Mar 17 15:40:29 GMT 2004   Nicholas Clark <nick@ccl4.org>
 
     Version 2.12
index 551f2c4..db46b43 100644 (file)
@@ -1,4 +1,4 @@
-                         Storable 2.12
+                         Storable 2.13
                Copyright (c) 1995-2000, Raphael Manfredi
                Copyright (c) 2001-2004, Larry Wall
 
index 6c8cb3b..3a361ba 100644 (file)
@@ -21,7 +21,7 @@ package Storable; @ISA = qw(Exporter DynaLoader);
 use AutoLoader;
 use vars qw($canonical $forgive_me $VERSION);
 
-$VERSION = '2.12';
+$VERSION = '2.13';
 *AUTOLOAD = \&AutoLoader::AUTOLOAD;            # Grrr...
 
 #
index a8cd003..9a7662f 100644 (file)
@@ -5797,8 +5797,46 @@ static SV *do_retrieve(
 
        KBUFINIT();                                     /* Allocate hash key reading pool once */
 
-       if (!f && in)
+       if (!f && in) {
+#ifdef SvUTF8_on
+               if (SvUTF8(in)) {
+                       STRLEN length;
+                       const char *orig = SvPV(in, length);
+                       char *asbytes;
+                       /* This is quite deliberate. I want the UTF8 routines
+                          to encounter the '\0' which perl adds at the end
+                          of all scalars, so that any new string also has
+                          this.
+                       */
+                       I32 len32 = length + 1;
+                       bool is_utf8 = TRUE;
+
+                       /* Just casting the &klen to (STRLEN) won't work
+                          well if STRLEN and I32 are of different widths.
+                          --jhi */
+                       asbytes = (char*)bytes_from_utf8((U8*)orig,
+                                                        &len32,
+                                                        &is_utf8);
+                       if (is_utf8) {
+                               CROAK(("Frozen string corrupt - contains characters outside 0-255"));
+                       }
+                       if (asbytes != orig) {
+                               /* String has been converted.
+                                  There is no need to keep any reference to
+                                  the old string.  */
+                               in = sv_newmortal();
+                               /* We donate the SV the malloc()ed string
+                                  bytes_from_utf8 returned us.  */
+                               SvUPGRADE(in, SVt_PV);
+                               SvPOK_on(in);
+                               SvPVX(in) = asbytes;
+                               SvLEN(in) = len32;
+                               SvCUR(in) = len32 - 1;
+                       }
+               }
+#endif
                MBUF_SAVE_AND_LOAD(in);
+       }
 
        /*
         * Magic number verifications.
index 6b8724b..ab0f62c 100644 (file)
@@ -1,3 +1,4 @@
+
 #!./perl -w
 #
 #  Copyright (c) 1995-2000, Raphael Manfredi
@@ -30,7 +31,7 @@ sub ok;
 
 use Storable qw(thaw freeze);
 
-print "1..3\n";
+print "1..6\n";
 
 my $x = chr(1234);
 ok 1, $x eq ${thaw freeze \$x};
@@ -43,3 +44,20 @@ ok 2, $x eq ${thaw freeze \$x};
 $x = chr (175) . chr (256);
 chop $x;
 ok 3, $x eq ${thaw freeze \$x};
+
+# Storable needs to cope if a frozen string happens to be internall utf8
+# encoded
+
+$x = chr 256;
+my $data = freeze \$x;
+ok 4, $x eq ${thaw $data};
+
+$data .= chr 256;
+chop $data;
+ok 5, $x eq ${thaw $data};
+
+
+$data .= chr 256;
+# This definately isn't valid
+eval {thaw $data};
+ok 6, $@ =~ /corrupt.*characters outside/;