X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=t%2Funi%2Fclass.t;h=bf85d6e99527842201480a24726a4b06baa74306;hb=a2309040b8fe324ae09c064137c624b4292d93c1;hp=72ba7e364cb2b10c7fb3cc93ab8044f1e8ebae48;hpb=12ac2576dfc10fd43d91903e7602870c10b4f00f;p=p5sagit%2Fp5-mst-13.2.git diff --git a/t/uni/class.t b/t/uni/class.t index 72ba7e3..bf85d6e 100644 --- a/t/uni/class.t +++ b/t/uni/class.t @@ -4,7 +4,7 @@ BEGIN { require "test.pl"; } -plan tests => 4334; +plan tests => 4784; sub MyUniClass { <updir; - # the %utf8::... hashes are already in existence # because utf8_pva.pl was run by utf8_heavy.pl +*utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning + +no warnings 'utf8'; # we do not want warnings about surrogates etc + +sub char_range { + my ($h1, $h2) = @_; + + my $str; + + if (ord('A') == 193 && $h1 < 256) { + my $h3 = ($h2 || $h1) + 1; + if ($h3 - $h1 == 1) { + $str = join "", pack 'U*', $h1 .. $h3; # Using pack since chr doesn't generate Unicode chars for value < 256. + } elsif ($h3 - $h1 > 1) { + for (my $i = $h1; $i <= $h3; $i++) { + $str = join "", $str, pack 'U*', $i; + } + } + } else { + $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); + } + + return $str; +} + # non-General Category and non-Script while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) { my $prop_name = $utf8::PropertyAlias{$abbrev}; @@ -77,8 +130,9 @@ while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) { ); next unless -e $filename; - my ($h1, $h2) = map hex, split /\t/, (do $filename); - my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); + my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; + + my $str = char_range($h1, $h2); for my $p ($prop_name, $abbrev) { for my $c ($files->{$_}, $_) { @@ -97,36 +151,59 @@ for my $p ('gc', 'sc') { ); next unless -e $filename; - my ($h1, $h2) = map hex, split /\t/, (do $filename); - my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); + my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; + + my $str = char_range($h1, $h2); for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) { for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) { is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); - is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1)); - is($str =~ /(\P{$y}+)/ && $1, substr($str, -1)); + SKIP: { + skip("surrogate", 1) if $abbr eq 'cs'; + test_regexp ($str, $y); + } } } } } # test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.) -for (keys %utf8::PA_reverse) { - my $filename = File::Spec->catfile( - $updir => lib => unicore => lib => gc_sc => "$utf8::PA_reverse{$_}.pl" - ); +SKIP: +{ + skip "Can't reliably derive class names from file names", 576 if $^O eq 'VMS'; - next unless -e $filename; - my ($h1, $h2) = map hex, split /\t/, (do $filename); - my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); - - for my $x ('gc', 'General Category') { - for my $y ($_, $utf8::PA_reverse{$_}) { - is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); - is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); - is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1)); - is($str =~ /(\P{$y}+)/ && $1, substr($str, -1)); + # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to + # return true. Try to work around this by reading the filenames explicitly + # to get a case sensitive test. N.B. This will fail if filename case is + # not preserved because you might go looking for a class name of CF or cf + # when you really want Cf. Storing case sensitive data in filenames is + # simply not portable. + + my %files; + + my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc'); + opendir D, $dirname or die $!; + @files{readdir(D)} = (); + closedir D; + + for (keys %utf8::PA_reverse) { + my $leafname = "$utf8::PA_reverse{$_}.pl"; + next unless exists $files{$leafname}; + + my $filename = File::Spec->catfile($dirname, $leafname); + + my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; + + my $str = char_range($h1, $h2); + + for my $x ('gc', 'General Category') { + print "# $filename $x $_, $utf8::PA_reverse{$_}\n"; + for my $y ($_, $utf8::PA_reverse{$_}) { + is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); + is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); + test_regexp ($str, $y); + } } } } @@ -138,16 +215,20 @@ for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) { ); next unless -e $filename; - my ($h1, $h2) = map hex, split /\t/, (do $filename); - my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); - my $blk = $_; + print "# In$_ $filename\n"; - is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1)); - is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1)); + my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; - $blk =~ s/^In/Block:/; + my $str = char_range($h1, $h2); - is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1)); - is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1)); + my $blk = $_; + + SKIP: { + skip($blk, 2) if $blk =~ /surrogates/i; + test_regexp ($str, $blk); + $blk =~ s/^In/Block:/; + test_regexp ($str, $blk); + } } +