For cases FF_LINESNGL and FF_LINEGLOB in pp_formline, take great care
Nicholas Clark [Sun, 13 Jul 2008 21:02:43 +0000 (21:02 +0000)]
to call get magic exactly once. This doesn't just avoid logical errors
with tied variables, it actually avoids panics (or worse) because a
pointer is retained to the string returned by the (first) call to
SvPV_const() for a future sv_chop(), and any future call to get magic
can invalidate the buffer that that pointer points to.
Also this removes the original crazy code that would set then reset the
length of a scalar, so as to only copy an initial portion of it, and
also copy the entire scalar including trailing newline (which might
require allocating more memory), only to immediately remove the newline
from the copy by reducing the length by one.

p4raw-id: //depot/perl@34139

pp_ctl.c
t/op/write.t

index 6a0c804..dc6c215 100644 (file)
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -771,39 +771,69 @@ PP(pp_formline)
                item_is_utf8 = DO_UTF8(sv);
                itemsize = len;
                if (itemsize) {
-                   bool chopped = FALSE;
+                   STRLEN to_copy = itemsize;
                    const char *const send = s + len;
+                   const U8 *source;
+                   U8 *tmp = NULL;
+
                    gotsome = TRUE;
                    chophere = s + itemsize;
                    while (s < send) {
                        if (*s++ == '\n') {
                            if (oneline) {
-                               chopped = TRUE;
+                               to_copy = s - SvPVX_const(sv) - 1;
                                chophere = s;
                                break;
                            } else {
                                if (s == send) {
                                    itemsize--;
-                                   chopped = TRUE;
+                                   to_copy--;
                                } else
                                    lines++;
                            }
                        }
                    }
-                   SvCUR_set(PL_formtarget, t - SvPVX_const(PL_formtarget));
-                   if (oneline) {
-                       SvCUR_set(sv, chophere - item);
-                       sv_catsv(PL_formtarget, sv);
-                       SvCUR_set(sv, itemsize);
-                   } else
-                       sv_catsv(PL_formtarget, sv);
-                   if (chopped)
-                       SvCUR_set(PL_formtarget, SvCUR(PL_formtarget) - 1);
-                   SvGROW(PL_formtarget, SvCUR(PL_formtarget) + fudge + 1);
+                   if (targ_is_utf8 && !item_is_utf8) {
+                       source = tmp = bytes_to_utf8((U8*)SvPVX(sv), &to_copy);
+                       SvCUR_set(PL_formtarget,
+                                 t - SvPVX_const(PL_formtarget));
+                   } else {
+                       if (item_is_utf8 && !targ_is_utf8) {
+                           /* Upgrade targ to UTF8, and then we reduce it to
+                              a problem we have a simple solution for.  */
+                           SvCUR_set(PL_formtarget,
+                                     t - SvPVX_const(PL_formtarget));
+                           targ_is_utf8 = TRUE;
+                           /* Don't need get magic.  */
+                           sv_utf8_upgrade_flags(PL_formtarget, 0);
+                       } else {
+                           SvCUR_set(PL_formtarget,
+                                     t - SvPVX_const(PL_formtarget));
+                       }
+                       source = (U8 *) SvPVX(sv);
+
+                       /* Easy. They agree.  */
+                       assert (item_is_utf8 == targ_is_utf8);
+                   }
+                   SvGROW(PL_formtarget,
+                          SvCUR(PL_formtarget) + to_copy + fudge + 1);
                    t = SvPVX(PL_formtarget) + SvCUR(PL_formtarget);
+
+                   Copy(source, t, to_copy, char);
+                   t += to_copy;
+                   SvCUR_set(PL_formtarget, SvCUR(PL_formtarget) + to_copy);
                    if (item_is_utf8) {
-                       targ_is_utf8 = TRUE;
-                       sv_pos_b2u(sv, &itemsize);
+                       if (SvGMAGICAL(sv)) {
+                           /* Mustn't call sv_pos_b2u() as it does a second
+                              mg_get(). Is this a bug? Do we need a _flags()
+                              variant? */
+                           itemsize = utf8_length(source, source + itemsize);
+                       } else {
+                           sv_pos_b2u(sv, &itemsize);
+                       }
+                       assert(!tmp);
+                   } else if (tmp) {
+                       Safefree(tmp);
                    }
                }
                break;
index 976713f..96e99e4 100755 (executable)
@@ -61,7 +61,7 @@ for my $tref ( @NumTests ){
 my $bas_tests = 20;
 
 # number of tests in section 3
-my $bug_tests = 4 + 3 * 3 * 5 * 2;
+my $bug_tests = 4 + 3 * 3 * 5 * 2 * 3;
 
 # number of tests in section 4
 my $hmb_tests = 35;
@@ -512,7 +512,28 @@ for my $tref ( @NumTests ){
 }
 
 {
-  my ($pound_utf8, $pm_utf8) = map { my $a = "$_\x{100}"; chop $a; $a} 
+  package Count;
+
+  sub TIESCALAR {
+    my $class = shift;
+    bless [shift, 0, 0], $class;
+  }
+
+  sub FETCH {
+    my $self = shift;
+    ++$self->[1];
+    $self->[0];
+  }
+
+  sub STORE {
+    my $self = shift;
+    ++$self->[2];
+    $self->[0] = shift;
+  }
+}
+
+{
+  my ($pound_utf8, $pm_utf8) = map { my $a = "$_\x{100}"; chop $a; $a}
     my ($pound, $pm) = ("\xA3", "\xB1");
 
   foreach my $first ('N', $pound, $pound_utf8) {
@@ -521,16 +542,27 @@ for my $tref ( @NumTests ){
                          "$base\nMoo!\n",) {
        foreach (['^*', qr/(.+)/], ['@*', qr/(.*?)$/s]) {
          my ($format, $re) = @$_;
-         my $name = "$first, $second $format";
-         $name =~ s/\n/\\n/g;
-
-         my ($copy1, $copy2) = ($first, $second);
-         $first =~ /(.+)/ or die $first;
-         my $expect = "1${1}2";
-         $second =~ $re or die $second;
-         $expect .= " 3${1}4";
-
-         is swrite("1^*2 3${format}4", $copy1, $copy2), $expect, $name;
+         foreach my $class ('', 'Count') {
+           my $name = "$first, $second $format $class";
+           $name =~ s/\n/\\n/g;
+
+           $first =~ /(.+)/ or die $first;
+           my $expect = "1${1}2";
+           $second =~ $re or die $second;
+           $expect .= " 3${1}4";
+
+           if ($class) {
+             my $copy1 = $first;
+             my $copy2;
+             tie $copy2, $class, $second;
+             is swrite("1^*2 3${format}4", $copy1, $copy2), $expect, $name;
+             my $obj = tied $copy2;
+             is $obj->[1], 1, 'value read exactly once';
+           } else {
+             my ($copy1, $copy2) = ($first, $second);
+             is swrite("1^*2 3${format}4", $copy1, $copy2), $expect, $name;
+           }
+         }
        }
       }
     }