From: Nicholas Clark Date: Sun, 13 Jul 2008 21:02:43 +0000 (+0000) Subject: For cases FF_LINESNGL and FF_LINEGLOB in pp_formline, take great care X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e8e72d41921b8365de491c29d17ce128c04ed94d;p=p5sagit%2Fp5-mst-13.2.git For cases FF_LINESNGL and FF_LINEGLOB in pp_formline, take great care to call get magic exactly once. This doesn't just avoid logical errors with tied variables, it actually avoids panics (or worse) because a pointer is retained to the string returned by the (first) call to SvPV_const() for a future sv_chop(), and any future call to get magic can invalidate the buffer that that pointer points to. Also this removes the original crazy code that would set then reset the length of a scalar, so as to only copy an initial portion of it, and also copy the entire scalar including trailing newline (which might require allocating more memory), only to immediately remove the newline from the copy by reducing the length by one. p4raw-id: //depot/perl@34139 --- diff --git a/pp_ctl.c b/pp_ctl.c index 6a0c804..dc6c215 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -771,39 +771,69 @@ PP(pp_formline) item_is_utf8 = DO_UTF8(sv); itemsize = len; if (itemsize) { - bool chopped = FALSE; + STRLEN to_copy = itemsize; const char *const send = s + len; + const U8 *source; + U8 *tmp = NULL; + gotsome = TRUE; chophere = s + itemsize; while (s < send) { if (*s++ == '\n') { if (oneline) { - chopped = TRUE; + to_copy = s - SvPVX_const(sv) - 1; chophere = s; break; } else { if (s == send) { itemsize--; - chopped = TRUE; + to_copy--; } else lines++; } } } - SvCUR_set(PL_formtarget, t - SvPVX_const(PL_formtarget)); - if (oneline) { - SvCUR_set(sv, chophere - item); - sv_catsv(PL_formtarget, sv); - SvCUR_set(sv, itemsize); - } else - sv_catsv(PL_formtarget, sv); - if (chopped) - SvCUR_set(PL_formtarget, SvCUR(PL_formtarget) - 1); - SvGROW(PL_formtarget, SvCUR(PL_formtarget) + fudge + 1); + if (targ_is_utf8 && !item_is_utf8) { + source = tmp = bytes_to_utf8((U8*)SvPVX(sv), &to_copy); + SvCUR_set(PL_formtarget, + t - SvPVX_const(PL_formtarget)); + } else { + if (item_is_utf8 && !targ_is_utf8) { + /* Upgrade targ to UTF8, and then we reduce it to + a problem we have a simple solution for. */ + SvCUR_set(PL_formtarget, + t - SvPVX_const(PL_formtarget)); + targ_is_utf8 = TRUE; + /* Don't need get magic. */ + sv_utf8_upgrade_flags(PL_formtarget, 0); + } else { + SvCUR_set(PL_formtarget, + t - SvPVX_const(PL_formtarget)); + } + source = (U8 *) SvPVX(sv); + + /* Easy. They agree. */ + assert (item_is_utf8 == targ_is_utf8); + } + SvGROW(PL_formtarget, + SvCUR(PL_formtarget) + to_copy + fudge + 1); t = SvPVX(PL_formtarget) + SvCUR(PL_formtarget); + + Copy(source, t, to_copy, char); + t += to_copy; + SvCUR_set(PL_formtarget, SvCUR(PL_formtarget) + to_copy); if (item_is_utf8) { - targ_is_utf8 = TRUE; - sv_pos_b2u(sv, &itemsize); + if (SvGMAGICAL(sv)) { + /* Mustn't call sv_pos_b2u() as it does a second + mg_get(). Is this a bug? Do we need a _flags() + variant? */ + itemsize = utf8_length(source, source + itemsize); + } else { + sv_pos_b2u(sv, &itemsize); + } + assert(!tmp); + } else if (tmp) { + Safefree(tmp); } } break; diff --git a/t/op/write.t b/t/op/write.t index 976713f..96e99e4 100755 --- a/t/op/write.t +++ b/t/op/write.t @@ -61,7 +61,7 @@ for my $tref ( @NumTests ){ my $bas_tests = 20; # number of tests in section 3 -my $bug_tests = 4 + 3 * 3 * 5 * 2; +my $bug_tests = 4 + 3 * 3 * 5 * 2 * 3; # number of tests in section 4 my $hmb_tests = 35; @@ -512,7 +512,28 @@ for my $tref ( @NumTests ){ } { - my ($pound_utf8, $pm_utf8) = map { my $a = "$_\x{100}"; chop $a; $a} + package Count; + + sub TIESCALAR { + my $class = shift; + bless [shift, 0, 0], $class; + } + + sub FETCH { + my $self = shift; + ++$self->[1]; + $self->[0]; + } + + sub STORE { + my $self = shift; + ++$self->[2]; + $self->[0] = shift; + } +} + +{ + my ($pound_utf8, $pm_utf8) = map { my $a = "$_\x{100}"; chop $a; $a} my ($pound, $pm) = ("\xA3", "\xB1"); foreach my $first ('N', $pound, $pound_utf8) { @@ -521,16 +542,27 @@ for my $tref ( @NumTests ){ "$base\nMoo!\n",) { foreach (['^*', qr/(.+)/], ['@*', qr/(.*?)$/s]) { my ($format, $re) = @$_; - my $name = "$first, $second $format"; - $name =~ s/\n/\\n/g; - - my ($copy1, $copy2) = ($first, $second); - $first =~ /(.+)/ or die $first; - my $expect = "1${1}2"; - $second =~ $re or die $second; - $expect .= " 3${1}4"; - - is swrite("1^*2 3${format}4", $copy1, $copy2), $expect, $name; + foreach my $class ('', 'Count') { + my $name = "$first, $second $format $class"; + $name =~ s/\n/\\n/g; + + $first =~ /(.+)/ or die $first; + my $expect = "1${1}2"; + $second =~ $re or die $second; + $expect .= " 3${1}4"; + + if ($class) { + my $copy1 = $first; + my $copy2; + tie $copy2, $class, $second; + is swrite("1^*2 3${format}4", $copy1, $copy2), $expect, $name; + my $obj = tied $copy2; + is $obj->[1], 1, 'value read exactly once'; + } else { + my ($copy1, $copy2) = ($first, $second); + is swrite("1^*2 3${format}4", $copy1, $copy2), $expect, $name; + } + } } } }