X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=t%2Fpragma%2Futf8.t;h=c3538c0cb5927295ad211983e8b3f23b30df223e;hb=22d4bb9ccb8701e68f9243547d7e3a3c55f70908;hp=d1546feeafa0319f131b5d9342e7d49f82389e13;hpb=4b19af017623bfa3bb72bb164598a517f586e0d3;p=p5sagit%2Fp5-mst-13.2.git diff --git a/t/pragma/utf8.t b/t/pragma/utf8.t index d1546fe..c3538c0 100755 --- a/t/pragma/utf8.t +++ b/t/pragma/utf8.t @@ -2,7 +2,7 @@ BEGIN { chdir 't' if -d 't'; - unshift @INC, '../lib'; + @INC = '../lib'; $ENV{PERL5LIB} = '../lib'; if ( ord("\t") != 9 ) { # skip on ebcdic platforms print "1..0 # Skip utf8 tests on ebcdic platform.\n"; @@ -10,7 +10,7 @@ BEGIN { } } -print "1..66\n"; +print "1..99\n"; my $test = 1; @@ -20,6 +20,12 @@ sub ok { print "ok $test\n"; } +sub nok { + my ($got,$expect) = @_; + print "# expected not [$expect], got [$got]\nnot " if $got eq $expect; + print "ok $test\n"; +} + sub ok_bytes { use bytes; my ($got,$expect) = @_; @@ -27,6 +33,12 @@ sub ok_bytes { print "ok $test\n"; } +sub nok_bytes { + use bytes; + my ($got,$expect) = @_; + print "# expected not [$expect], got [$got]\nnot " if $got eq $expect; + print "ok $test\n"; +} { use utf8; @@ -295,3 +307,221 @@ sub ok_bytes { ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2); $test++; # 66 } + +{ + use utf8; + my @a = map ord, split(//, join("", map chr, (1234, 123, 2345))); + ok "@a", "1234 123 2345"; + $test++; # 67 +} + +{ + use utf8; + my $x = chr(123); + my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345))); + ok "@a", "1234 2345"; + $test++; # 68 +} + +{ + # bug id 20001009.001 + + my($a,$b); + { use bytes; $a = "\xc3\xa4"; } + { use utf8; $b = "\xe4"; } + { use bytes; ok_bytes $a, $b; $test++; } # 69 + { use utf8; nok $a, $b; $test++; } # 70 +} + +{ + # bug id 20001008.001 + + my @x = ("stra\337e 138","stra\337e 138"); + for (@x) { + s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; + my($latin) = /^(.+)(?:\s+\d)/; + print $latin eq "stra\337e" ? "ok $test\n" : + "#latin[$latin]\nnot ok $test\n"; + $test++; + $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a + use utf8; + $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a + } +} + +{ + # bug id 20000819.004 + + $_ = $dx = "\x{10f2}"; + s/($dx)/$dx$1/; + { + use bytes; + print "not " unless $_ eq "$dx$dx"; + print "ok $test\n"; + $test++; + } + + $_ = $dx = "\x{10f2}"; + s/($dx)/$1$dx/; + { + use bytes; + print "not " unless $_ eq "$dx$dx"; + print "ok $test\n"; + $test++; + } + + $dx = "\x{10f2}"; + $_ = "\x{10f2}\x{10f2}"; + s/($dx)($dx)/$1$2/; + { + use bytes; + print "not " unless $_ eq "$dx$dx"; + print "ok $test\n"; + $test++; + } +} + +{ + # bug id 20000323.056 + + use utf8; + + print "not " unless "\x{41}" eq +v65; + print "ok $test\n"; + $test++; + + print "not " unless "\x41" eq +v65; + print "ok $test\n"; + $test++; + + print "not " unless "\x{c8}" eq +v200; + print "ok $test\n"; + $test++; + + print "not " unless "\xc8" eq +v200; + print "ok $test\n"; + $test++; + + print "not " unless "\x{221b}" eq v8731; + print "ok $test\n"; + $test++; +} + +{ + # bug id 20000427.003 + + use utf8; + use warnings; + use strict; + + my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}"; + + my @charlist = split //, $sushi; + my $r = ''; + foreach my $ch (@charlist) { + $r = $r . " " . sprintf "U+%04X", ord($ch); + } + + print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B"; + print "ok $test\n"; + $test++; +} + +{ + # bug id 20000901.092 + # test that undef left and right of utf8 results in a valid string + + my $a; + $a .= "\x{1ff}"; + print "not " unless $a eq "\x{1ff}"; + print "ok $test\n"; + $test++; +} + +{ + # bug id 20000426.003 + + use utf8; + + my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20"; + + my ($a, $b, $c) = split(/\x40/, $s); + print "not " + unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a; + print "ok $test\n"; + $test++; + + my ($a, $b) = split(/\x{100}/, $s); + print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20"; + print "ok $test\n"; + $test++; + + my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s); + print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20"; + print "ok $test\n"; + $test++; + + my ($a, $b) = split(/\x40\x{80}/, $s); + print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20"; + print "ok $test\n"; + $test++; + + my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s); + print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20"; + print "ok $test\n"; + $test++; +} + +{ + # bug id 20000730.004 + + use utf8; + + my $smiley = "\x{263a}"; + + for my $s ("\x{263a}", # 1 + $smiley, # 2 + + "" . $smiley, # 3 + "" . "\x{263a}", # 4 + + $smiley . "", # 5 + "\x{263a}" . "", # 6 + ) { + my $length_chars = length($s); + my $length_bytes; + { use bytes; $length_bytes = length($s) } + my @regex_chars = $s =~ m/(.)/g; + my $regex_chars = @regex_chars; + my @split_chars = split //, $s; + my $split_chars = @split_chars; + print "not " + unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq + "1/1/1/3"; + print "ok $test\n"; + $test++; + } + + for my $s ("\x{263a}" . "\x{263a}", # 7 + $smiley . $smiley, # 8 + + "\x{263a}\x{263a}", # 9 + "$smiley$smiley", # 10 + + "\x{263a}" x 2, # 11 + $smiley x 2, # 12 + ) { + my $length_chars = length($s); + my $length_bytes; + { use bytes; $length_bytes = length($s) } + my @regex_chars = $s =~ m/(.)/g; + my $regex_chars = @regex_chars; + my @split_chars = split //, $s; + my $split_chars = @split_chars; + print "not " + unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq + "2/2/2/6"; + print "ok $test\n"; + $test++; + } +}