From: John Peacock Date: Mon, 10 Sep 2001 16:34:30 +0000 (-0400) Subject: PATCH Resubmission - was Re: [ID 20010902.001] v strings over 2*31 barf X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e83d50c9254af4011034b5ec3368b06bb6254055;p=p5sagit%2Fp5-mst-13.2.git PATCH Resubmission - was Re: [ID 20010902.001] v strings over 2*31 barf Message-ID: <3B9D23D6.90BCCC25@rowman.com> p4raw-id: //depot/perl@11986 --- diff --git a/sv.c b/sv.c index dcca51c..a7883af 100644 --- a/sv.c +++ b/sv.c @@ -7955,13 +7955,15 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV if (!veclen) continue; if (vec_utf) - iv = (IV)utf8n_to_uvchr(vecstr, veclen, &ulen, 0); + uv = utf8n_to_uvchr(vecstr, veclen, &ulen, UTF8_ALLOW_ANYUV); else { - iv = *vecstr; + uv = *vecstr; ulen = 1; } vecstr += ulen; veclen -= ulen; + if (plus) + esignbuf[esignlen++] = plus; } else if (args) { switch (intsize) { @@ -7986,14 +7988,17 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV #endif } } - if (iv >= 0) { - uv = iv; - if (plus) - esignbuf[esignlen++] = plus; - } - else { - uv = -iv; - esignbuf[esignlen++] = '-'; + if ( !vectorize ) /* we already set uv above */ + { + if (iv >= 0) { + uv = iv; + if (plus) + esignbuf[esignlen++] = plus; + } + else { + uv = -iv; + esignbuf[esignlen++] = '-'; + } } base = 10; goto integer; @@ -8035,7 +8040,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV if (!veclen) continue; if (vec_utf) - uv = utf8n_to_uvchr(vecstr, veclen, &ulen, 0); + uv = utf8n_to_uvchr(vecstr, veclen, &ulen, UTF8_ALLOW_ANYUV); else { uv = *vecstr; ulen = 1; diff --git a/t/op/ver.t b/t/op/ver.t index 58408b6..4ccc84c 100755 --- a/t/op/ver.t +++ b/t/op/ver.t @@ -3,50 +3,42 @@ BEGIN { chdir 't' if -d 't'; @INC = '../lib'; + $SIG{'__WARN__'} = sub { warn $_[0] if $DOWARN }; } -print "1..39\n"; +$DOWARN = 1; # enable run-time warnings now -my $test = 1; +use Config; +$tests = $Config{'uvsize'} == 8 ? 47 : 44; -sub okeq { - my $ok = $_[0] eq $_[1];; - print "not " unless $ok; - print "ok ", $test++; - print " # $_[2]" if !$ok && @_ == 3; - print "\n"; -} +require Test::More; +Test::More->import( tests => $tests ); -sub skip { print "ok ", $test++, " # Skip: $_[0]\n" } +eval { use v5.5.640; }; +is( $@, '', "use v5.5.640; $@"); -use v5.5.640; -require v5.5.640; -print "ok $test\n"; ++$test; +require_ok('v5.5.640'); # printing characters should work if (ord("\t") == 9) { # ASCII - print v111; - print v107.32; - print "$test\n"; ++$test; + is('ok ',v111.107.32,'ASCII printing characters'); # hash keys too $h{v111.107} = "ok"; - print "$h{ok} $test\n"; ++$test; + is('ok',$h{v111.107},'ASCII hash keys'); } else { # EBCDIC - print v150; - print v146.64; - print "$test\n"; ++$test; + is('ok ',v150.146.64,'EBCDIC printing characters'); # hash keys too $h{v150.146} = "ok"; - print "$h{ok} $test\n"; ++$test; + is('ok',$h{v111.107},'ASCII hash keys'); } # poetry optimization should also sub v77 { "ok" } $x = v77; -print "$x $test\n"; ++$test; +is('ok',$x,'poetry optimization'); # but not when dots are involved if (ord("\t") == 9) { # ASCII @@ -55,15 +47,16 @@ if (ord("\t") == 9) { # ASCII else { $x = v212.213.214; } -okeq($x, "MNO"); +is($x, 'MNO','poetry optimization with dots'); -okeq(v1.20.300.4000, "\x{1}\x{14}\x{12c}\x{fa0}"); +is(v1.20.300.4000, "\x{1}\x{14}\x{12c}\x{fa0}",'compare embedded \x{} string'); # # now do the same without the "v" -use 5.5.640; -require 5.5.640; -print "ok $test\n"; ++$test; +eval { use 5.5.640; }; +is( $@, '', "use 5.5.640; $@"); + +require_ok('5.5.640'); # hash keys too if (ord("\t") == 9) { # ASCII @@ -72,7 +65,7 @@ if (ord("\t") == 9) { # ASCII else { $h{150.146.64} = "ok"; } -print "$h{ok } $test\n"; ++$test; +is('ok',$h{ok },'hash keys w/o v'); if (ord("\t") == 9) { # ASCII $x = 77.78.79; @@ -80,131 +73,117 @@ if (ord("\t") == 9) { # ASCII else { $x = 212.213.214; } -okeq($x, "MNO"); +is($x, 'MNO','poetry optimization with dots w/o v'); -okeq(1.20.300.4000, "\x{1}\x{14}\x{12c}\x{fa0}"); +is(1.20.300.4000, "\x{1}\x{14}\x{12c}\x{fa0}",'compare embedded \x{} string w/o v'); # test sprintf("%vd"...) etc if (ord("\t") == 9) { # ASCII - okeq(sprintf("%vd", "Perl"), '80.101.114.108'); + is(sprintf("%vd", "Perl"), '80.101.114.108', 'ASCII sprintf("%vd", "Perl")'); } else { - okeq(sprintf("%vd", "Perl"), '215.133.153.147'); + is(sprintf("%vd", "Perl"), '215.133.153.147', 'EBCDIC sprintf("%vd", "Perl")'); } -okeq(sprintf("%vd", v1.22.333.4444), '1.22.333.4444'); +is(sprintf("%vd", v1.22.333.4444), '1.22.333.4444', 'sprintf("%vd", v1.22.333.4444)'); if (ord("\t") == 9) { # ASCII - okeq(sprintf("%vx", "Perl"), '50.65.72.6c'); + is(sprintf("%vx", "Perl"), '50.65.72.6c', 'ASCII sprintf("%vx", "Perl")'); } else { - okeq(sprintf("%vx", "Perl"), 'd7.85.99.93'); + is(sprintf("%vx", "Perl"), 'd7.85.99.93', 'EBCDIC sprintf("%vx", "Perl")'); } -okeq(sprintf("%vX", 1.22.333.4444), '1.16.14D.115C'); +is(sprintf("%vX", 1.22.333.4444), '1.16.14D.115C','ASCII sprintf("%vX", 1.22.333.4444)'); if (ord("\t") == 9) { # ASCII - okeq(sprintf("%#*vo", ":", "Perl"), '0120:0145:0162:0154'); + is(sprintf("%#*vo", ":", "Perl"), '0120:0145:0162:0154', 'ASCII sprintf("%vo", "Perl")'); } else { - okeq(sprintf("%#*vo", ":", "Perl"), '0327:0205:0231:0223'); + is(sprintf("%#*vo", ":", "Perl"), '0327:0205:0231:0223', 'EBCDIC sprintf("%vo", "Perl")'); } -okeq(sprintf("%*vb", "##", v1.22.333.4444), - '1##10110##101001101##1000101011100'); +is(sprintf("%*vb", "##", v1.22.333.4444), + '1##10110##101001101##1000101011100', 'sprintf("%vb", 1.22.333.4444)'); -okeq(sprintf("%vd", join("", map { chr } +is(sprintf("%vd", join("", map { chr } unpack 'U*', pack('U*',2001,2002,2003))), - '2001.2002.2003'); + '2001.2002.2003','unpack/pack U*'); { use bytes; if (ord("\t") == 9) { # ASCII - okeq(sprintf("%vd", "Perl"), '80.101.114.108'); + is(sprintf("%vd", "Perl"), '80.101.114.108', 'ASCII sprintf("%vd", "Perl") w/use bytes'); } else { - okeq(sprintf("%vd", "Perl"), '215.133.153.147'); + is(sprintf("%vd", "Perl"), '215.133.153.147', 'EBCDIC sprintf("%vd", "Perl") w/use bytes'); } if (ord("\t") == 9) { # ASCII - okeq(sprintf("%vd", 1.22.333.4444), '1.22.197.141.225.133.156'); + is(sprintf("%vd", 1.22.333.4444), '1.22.197.141.225.133.156', 'ASCII sprintf("%vd", v1.22.333.4444 w/use bytes'); } else { - okeq(sprintf("%vd", 1.22.333.4444), '1.22.142.84.187.81.112'); + is(sprintf("%vd", 1.22.333.4444), '1.22.142.84.187.81.112', 'EBCDIC sprintf("%vd", v1.22.333.4444 w/use bytes'); } if (ord("\t") == 9) { # ASCII - okeq(sprintf("%vx", "Perl"), '50.65.72.6c'); + is(sprintf("%vx", "Perl"), '50.65.72.6c', 'ASCII sprintf("%vx", "Perl")'); } else { - okeq(sprintf("%vx", "Perl"), 'd7.85.99.93'); + is(sprintf("%vx", "Perl"), 'd7.85.99.93', 'EBCDIC sprintf("%vx", "Perl")'); } if (ord("\t") == 9) { # ASCII - okeq(sprintf("%vX", v1.22.333.4444), '1.16.C5.8D.E1.85.9C'); + is(sprintf("%vX", v1.22.333.4444), '1.16.C5.8D.E1.85.9C', 'ASCII sprintf("%vX", v1.22.333.4444)'); } else { - okeq(sprintf("%vX", v1.22.333.4444), '1.16.8E.54.BB.51.70'); + is(sprintf("%vX", v1.22.333.4444), '1.16.8E.54.BB.51.70', 'EBCDIC sprintf("%vX", v1.22.333.4444)'); } if (ord("\t") == 9) { # ASCII - okeq(sprintf("%#*vo", ":", "Perl"), '0120:0145:0162:0154'); + is(sprintf("%#*vo", ":", "Perl"), '0120:0145:0162:0154', 'ASCII sprintf("%#*vo", ":", "Perl")'); } else { - okeq(sprintf("%#*vo", ":", "Perl"), '0327:0205:0231:0223'); + is(sprintf("%#*vo", ":", "Perl"), '0327:0205:0231:0223', 'EBCDIC sprintf("%#*vo", ":", "Perl")'); } if (ord("\t") == 9) { # ASCII - okeq(sprintf("%*vb", "##", v1.22.333.4444), - '1##10110##11000101##10001101##11100001##10000101##10011100'); + is(sprintf("%*vb", "##", v1.22.333.4444), + '1##10110##11000101##10001101##11100001##10000101##10011100', + 'ASCII sprintf("%*vb", "##", v1.22.333.4444)'); } else { - okeq(sprintf("%*vb", "##", v1.22.333.4444), - '1##10110##10001110##1010100##10111011##1010001##1110000'); + is(sprintf("%*vb", "##", v1.22.333.4444), + '1##10110##10001110##1010100##10111011##1010001##1110000', + 'EBCDIC sprintf("%*vb", "##", v1.22.333.4444)'); } } { - # 24..28 - # bug id 20000323.056 - print "not " unless "\x{41}" eq +v65; - print "ok $test\n"; - $test++; - - print "not " unless "\x41" eq +v65; - print "ok $test\n"; - $test++; - - print "not " unless "\x{c8}" eq +v200; - print "ok $test\n"; - $test++; - - print "not " unless "\xc8" eq +v200; - print "ok $test\n"; - $test++; - - print "not " unless "\x{221b}" eq v8731; - print "ok $test\n"; - $test++; + is( "\x{41}", +v65, 'bug id 20000323.056'); + is( "\x41", +v65, 'bug id 20000323.056'); + is( "\x{c8}", +v200, 'bug id 20000323.056'); + is( "\xc8", +v200, 'bug id 20000323.056'); + is( "\x{221b}", +v8731, 'bug id 20000323.056'); } # See if the things Camel-III says are true: 29..33 # Chapter 2 pp67/68 my $vs = v1.20.300.4000; -okeq($vs,"\x{1}\x{14}\x{12c}\x{fa0}","v-string ne \\x{}"); -okeq($vs,chr(1).chr(20).chr(300).chr(4000),"v-string ne chr()"); -okeq('foo',((chr(193) eq 'A') ? v134.150.150 : v102.111.111),"v-string ne ''"); +is($vs,"\x{1}\x{14}\x{12c}\x{fa0}","v-string ne \\x{}"); +is($vs,chr(1).chr(20).chr(300).chr(4000),"v-string ne chr()"); +is('foo',((chr(193) eq 'A') ? v134.150.150 : v102.111.111),"v-string ne ''"); # Chapter 15, pp403 # See if sane addr and gethostbyaddr() work eval { require Socket; gethostbyaddr(v127.0.0.1, Socket::AF_INET) }; if ($@) { - # No - so don't test insane fails. + # No - so do not test insane fails. $@ =~ s/\n/\n# /g; skip("No Socket::AF_INET # $@"); } @@ -212,27 +191,38 @@ else { my $ip = v2004.148.0.1; my $host; eval { $host = gethostbyaddr($ip,Socket::AF_INET) }; - okeq($@ =~ /Wide character/,1,"Non-bytes leak to gethostbyaddr"); + ok($@ =~ /Wide character/,"Non-bytes leak to gethostbyaddr"); } # Chapter 28, pp671 -okeq(v5.6.0 lt v5.7.0,1,"v5.6.0 lt v5.7.0 fails"); - -# 34..37: part of 20000323.059 -okeq(v200,chr(200),"v200 ne chr(200)"); -okeq(v200,+v200,"v200 ne +v200"); -okeq(v200,eval("v200"),'v200 ne "v200"'); -okeq(v200,eval("+v200"),'v200 ne eval("+v200")'); - -# There have been no actual tests for $] itself until now -my ($REVISION,$VERSION,$SUBVERSION) = split '\.', sprintf("%vd",$^V); -my $v = sprintf("%d.%.3d%.3d",$REVISION,$VERSION,$SUBVERSION); -okeq($v,"$]","\$^V and \$] do not match (string)"); -$v = $REVISION+$VERSION/1000+$SUBVERSION/1000000; -if ( $v == $] ) { - print "ok $test"; +ok(v5.6.0 lt v5.7.0, "v5.6.0 lt v5.7.0"); + +# part of 20000323.059 +is(v200, chr(200), "v200 eq chr(200)" ); +is(v200, +v200, "v200 eq +v200" ); +is(v200, eval( "v200"), 'v200 eq "v200"' ); +is(v200, eval("+v200"), 'v200 eq eval("+v200")' ); + +# Tests for string/numeric value of $] itself +my ($revision,$version,$subversion) = split '\.', sprintf("%vd",$^V); + +my $v = sprintf("%d.%.3d%.3d",$revision,$version,$subversion); + +ok( $v eq "$]", "\$^V eq \$] (string)"); + +$v = $revision + $version/1000 + $subversion/1000000; + +ok( $v == $], "\$^V == \$] (numeric)" ); + +# [ID 20010902.001] check if v-strings handle full UV range or not +if ( $Config{'uvsize'} >= 4 ) { + is( sprintf("%vd", v2147483647.2147483648), '2147483647.2147483648', 'v-string > IV_MAX[32-bit]' ); + is( sprintf("%vd", v3141592653), '3141592653', 'IV_MAX < v-string < UV_MAX[32-bit]'); + is( sprintf("%vd", v4294967295), '4294967295', 'v-string == UV_MAX[32-bit] - 1'); } -else { - print "not ok $test \# \$^V and \$] do not match (numerically)"; + +if ( $Config{'uvsize'} >= 8 ) { + is( sprintf("%vd", v9223372036854775807.9223372036854775808), '9223372036854775807.9223372036854775808', 'v-string > IV_MAX[64-bit]' ); + is( sprintf("%vd", v17446744073709551615), '17446744073709551615', 'IV_MAX < v-string < UV_MAX[64-bit]'); + is( sprintf("%vd", v18446744073709551615), '18446744073709551615', 'v-string == UV_MAX[64-bit] - 1'); } -$test++; #in case anyone is adding more tests diff --git a/utf8.h b/utf8.h index c475d0f..01c6199 100644 --- a/utf8.h +++ b/utf8.h @@ -153,7 +153,7 @@ END_EXTERN_C UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|\ UTF8_ALLOW_FFFF|UTF8_ALLOW_LONG) #define UTF8_ALLOW_ANY 0x00ff -#define UTF8_CHECK_ONLY 0x0100 +#define UTF8_CHECK_ONLY 0x0200 #define UNICODE_SURROGATE_FIRST 0xd800 #define UNICODE_SURROGATE_LAST 0xdfff