installperl patch
[p5sagit/p5-mst-13.2.git] / t / uni / class.t
CommitLineData
0f1b7392 1BEGIN {
2 chdir 't' if -d 't';
3 @INC = qw(../lib .);
4 require "test.pl";
5}
6
98fbe989 7plan tests => 4784;
0f1b7392 8
9sub MyUniClass {
10 <<END;
110030 004F
12END
13}
14
15sub Other::Class {
16 <<END;
170040 005F
18END
19}
20
21sub A::B::Intersection {
22 <<END;
23+main::MyUniClass
24&Other::Class
25END
26}
27
cd1c2c69 28sub test_regexp ($$) {
29 # test that given string consists of N-1 chars matching $qr1, and 1
30 # char matching $qr2
31 my ($str, $blk) = @_;
32
33 # constructing these objects here makes the last test loop go much faster
34 my $qr1 = qr/(\p{$blk}+)/;
35 if ($str =~ $qr1) {
36 is($1, substr($str, 0, -1)); # all except last char
37 }
38 else {
39 fail('first N-1 chars did not match');
40 }
41
42 my $qr2 = qr/(\P{$blk}+)/;
43 if ($str =~ $qr2) {
44 is($1, substr($str, -1)); # only last char
45 }
46 else {
47 fail('last char did not match');
48 }
49}
50
51use strict;
0f1b7392 52
250d67eb 53my $str;
54
55if (ord('A') == 193) {
56 $str = join "", map chr($_), 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, 0xF0 .. 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, 0x7C, 0xC1 .. 0xC9, 0xD1 .. 0xD9, 0xE2 .. 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D, 0x79, 0x81 .. 0x89, 0x91 .. 0x96; # IBM-1047
57} else {
58 $str = join "", map chr($_), 0x20 .. 0x6F;
59}
0f1b7392 60
61# make sure it finds built-in class
62is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
12ac2576 63is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
0f1b7392 64
65# make sure it finds user-defined class
66is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
67
68# make sure it finds class in other package
69is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
70
71# make sure it finds class in other OTHER package
72is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');
12ac2576 73
74# all of these should look in lib/unicore/bc/AL.pl
75$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
76is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
77is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
78is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
79is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");
80
81# make sure InGreek works
82$str = "[\x{038B}\x{038C}\x{038D}]";
83
84is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
85is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
86is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
87is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
88is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
89
12ac2576 90use File::Spec;
91my $updir = File::Spec->updir;
92
12ac2576 93# the %utf8::... hashes are already in existence
94# because utf8_pva.pl was run by utf8_heavy.pl
95
85827533 96*utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning
97
98no warnings 'utf8'; # we do not want warnings about surrogates etc
99
250d67eb 100sub char_range {
101 my ($h1, $h2) = @_;
102
103 my $str;
104
105 if (ord('A') == 193 && $h1 < 256) {
106 my $h3 = ($h2 || $h1) + 1;
107 if ($h3 - $h1 == 1) {
108 $str = join "", pack 'U*', $h1 .. $h3; # Using pack since chr doesn't generate Unicode chars for value < 256.
109 } elsif ($h3 - $h1 > 1) {
110 for (my $i = $h1; $i <= $h3; $i++) {
111 $str = join "", $str, pack 'U*', $i;
112 }
113 }
114 } else {
115 $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
116 }
117
118 return $str;
119}
120
12ac2576 121# non-General Category and non-Script
122while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
123 my $prop_name = $utf8::PropertyAlias{$abbrev};
124 next unless $prop_name;
125 next if $abbrev eq "gc_sc";
126
127 for (sort keys %$files) {
128 my $filename = File::Spec->catfile(
129 $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
130 );
131
132 next unless -e $filename;
85827533 133 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
250d67eb 134
135 my $str = char_range($h1, $h2);
12ac2576 136
137 for my $p ($prop_name, $abbrev) {
138 for my $c ($files->{$_}, $_) {
139 is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
140 is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
141 }
142 }
143 }
144}
145
146# General Category and Script
147for my $p ('gc', 'sc') {
148 while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
149 my $filename = File::Spec->catfile(
150 $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
151 );
152
153 next unless -e $filename;
85827533 154 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
250d67eb 155
156 my $str = char_range($h1, $h2);
12ac2576 157
158 for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
159 for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
160 is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
161 is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
98fbe989 162 SKIP: {
163 skip("surrogate", 1) if $abbr eq 'cs';
164 test_regexp ($str, $y);
165 }
12ac2576 166 }
167 }
168 }
169}
170
171# test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
53cd5480 172SKIP:
32d0b1dc 173{
a4cec36d 174 skip "Can't reliably derive class names from file names", 576 if $^O eq 'VMS';
53cd5480 175
176 # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to
177 # return true. Try to work around this by reading the filenames explicitly
178 # to get a case sensitive test. N.B. This will fail if filename case is
179 # not preserved because you might go looking for a class name of CF or cf
180 # when you really want Cf. Storing case sensitive data in filenames is
181 # simply not portable.
182
32d0b1dc 183 my %files;
12ac2576 184
cd1c2c69 185 my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc');
32d0b1dc 186 opendir D, $dirname or die $!;
85827533 187 @files{readdir(D)} = ();
32d0b1dc 188 closedir D;
189
190 for (keys %utf8::PA_reverse) {
191 my $leafname = "$utf8::PA_reverse{$_}.pl";
192 next unless exists $files{$leafname};
12ac2576 193
32d0b1dc 194 my $filename = File::Spec->catfile($dirname, $leafname);
195
85827533 196 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
250d67eb 197
198 my $str = char_range($h1, $h2);
32d0b1dc 199
200 for my $x ('gc', 'General Category') {
201 print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
202 for my $y ($_, $utf8::PA_reverse{$_}) {
203 is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
204 is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
cd1c2c69 205 test_regexp ($str, $y);
32d0b1dc 206 }
12ac2576 207 }
208 }
209}
210
211# test the blocks (InFoobar)
212for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
213 my $filename = File::Spec->catfile(
214 $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
215 );
216
217 next unless -e $filename;
cd1c2c69 218
219 print "# In$_ $filename\n";
220
85827533 221 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
250d67eb 222
223 my $str = char_range($h1, $h2);
12ac2576 224
225 my $blk = $_;
226
98fbe989 227 SKIP: {
228 skip($blk, 2) if $blk =~ /surrogates/i;
229 test_regexp ($str, $blk);
230 $blk =~ s/^In/Block:/;
231 test_regexp ($str, $blk);
232 }
12ac2576 233}
cd1c2c69 234