Commit | Line | Data |
0f1b7392 |
1 | BEGIN { |
2 | chdir 't' if -d 't'; |
3 | @INC = qw(../lib .); |
4 | require "test.pl"; |
5 | } |
6 | |
a2bd7410 |
7 | plan tests => 4670; |
0f1b7392 |
8 | |
9 | sub MyUniClass { |
10 | <<END; |
11 | 0030 004F |
12 | END |
13 | } |
14 | |
15 | sub Other::Class { |
16 | <<END; |
17 | 0040 005F |
18 | END |
19 | } |
20 | |
21 | sub A::B::Intersection { |
22 | <<END; |
23 | +main::MyUniClass |
24 | &Other::Class |
25 | END |
26 | } |
27 | |
cd1c2c69 |
28 | sub test_regexp ($$) { |
29 | # test that given string consists of N-1 chars matching $qr1, and 1 |
30 | # char matching $qr2 |
31 | my ($str, $blk) = @_; |
32 | |
33 | # constructing these objects here makes the last test loop go much faster |
34 | my $qr1 = qr/(\p{$blk}+)/; |
35 | if ($str =~ $qr1) { |
36 | is($1, substr($str, 0, -1)); # all except last char |
37 | } |
38 | else { |
39 | fail('first N-1 chars did not match'); |
40 | } |
41 | |
42 | my $qr2 = qr/(\P{$blk}+)/; |
43 | if ($str =~ $qr2) { |
44 | is($1, substr($str, -1)); # only last char |
45 | } |
46 | else { |
47 | fail('last char did not match'); |
48 | } |
49 | } |
50 | |
51 | use strict; |
0f1b7392 |
52 | |
53 | my $str = join "", map chr($_), 0x20 .. 0x6F; |
54 | |
55 | # make sure it finds built-in class |
56 | is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'); |
12ac2576 |
57 | is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'); |
0f1b7392 |
58 | |
59 | # make sure it finds user-defined class |
60 | is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO'); |
61 | |
62 | # make sure it finds class in other package |
63 | is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_'); |
64 | |
65 | # make sure it finds class in other OTHER package |
66 | is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO'); |
12ac2576 |
67 | |
68 | # all of these should look in lib/unicore/bc/AL.pl |
69 | $str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}"; |
70 | is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}"); |
71 | is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}"); |
72 | is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}"); |
73 | is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}"); |
74 | |
75 | # make sure InGreek works |
76 | $str = "[\x{038B}\x{038C}\x{038D}]"; |
77 | |
78 | is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); |
79 | is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); |
80 | is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); |
81 | is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); |
82 | is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); |
83 | |
12ac2576 |
84 | use File::Spec; |
85 | my $updir = File::Spec->updir; |
86 | |
12ac2576 |
87 | # the %utf8::... hashes are already in existence |
88 | # because utf8_pva.pl was run by utf8_heavy.pl |
89 | |
85827533 |
90 | *utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning |
91 | |
92 | no warnings 'utf8'; # we do not want warnings about surrogates etc |
93 | |
12ac2576 |
94 | # non-General Category and non-Script |
95 | while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) { |
96 | my $prop_name = $utf8::PropertyAlias{$abbrev}; |
97 | next unless $prop_name; |
98 | next if $abbrev eq "gc_sc"; |
99 | |
100 | for (sort keys %$files) { |
101 | my $filename = File::Spec->catfile( |
102 | $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl" |
103 | ); |
104 | |
105 | next unless -e $filename; |
85827533 |
106 | my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; |
12ac2576 |
107 | my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); |
108 | |
109 | for my $p ($prop_name, $abbrev) { |
110 | for my $c ($files->{$_}, $_) { |
111 | is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1)); |
112 | is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1)); |
113 | } |
114 | } |
115 | } |
116 | } |
117 | |
118 | # General Category and Script |
119 | for my $p ('gc', 'sc') { |
120 | while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) { |
121 | my $filename = File::Spec->catfile( |
122 | $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl" |
123 | ); |
124 | |
125 | next unless -e $filename; |
85827533 |
126 | my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; |
12ac2576 |
127 | my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); |
128 | |
129 | for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) { |
130 | for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) { |
131 | is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); |
132 | is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); |
cd1c2c69 |
133 | test_regexp ($str, $y); |
12ac2576 |
134 | } |
135 | } |
136 | } |
137 | } |
138 | |
139 | # test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.) |
53cd5480 |
140 | SKIP: |
32d0b1dc |
141 | { |
26961258 |
142 | skip "Can't reliably derive class names from file names", 592 if $^O eq 'VMS'; |
53cd5480 |
143 | |
144 | # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to |
145 | # return true. Try to work around this by reading the filenames explicitly |
146 | # to get a case sensitive test. N.B. This will fail if filename case is |
147 | # not preserved because you might go looking for a class name of CF or cf |
148 | # when you really want Cf. Storing case sensitive data in filenames is |
149 | # simply not portable. |
150 | |
32d0b1dc |
151 | my %files; |
12ac2576 |
152 | |
cd1c2c69 |
153 | my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc'); |
32d0b1dc |
154 | opendir D, $dirname or die $!; |
85827533 |
155 | @files{readdir(D)} = (); |
32d0b1dc |
156 | closedir D; |
157 | |
158 | for (keys %utf8::PA_reverse) { |
159 | my $leafname = "$utf8::PA_reverse{$_}.pl"; |
160 | next unless exists $files{$leafname}; |
12ac2576 |
161 | |
32d0b1dc |
162 | my $filename = File::Spec->catfile($dirname, $leafname); |
163 | |
85827533 |
164 | my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; |
32d0b1dc |
165 | my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); |
166 | |
167 | for my $x ('gc', 'General Category') { |
168 | print "# $filename $x $_, $utf8::PA_reverse{$_}\n"; |
169 | for my $y ($_, $utf8::PA_reverse{$_}) { |
170 | is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); |
171 | is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); |
cd1c2c69 |
172 | test_regexp ($str, $y); |
32d0b1dc |
173 | } |
12ac2576 |
174 | } |
175 | } |
176 | } |
177 | |
178 | # test the blocks (InFoobar) |
179 | for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) { |
180 | my $filename = File::Spec->catfile( |
181 | $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl" |
182 | ); |
183 | |
184 | next unless -e $filename; |
cd1c2c69 |
185 | |
186 | print "# In$_ $filename\n"; |
187 | |
85827533 |
188 | my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; |
12ac2576 |
189 | my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); |
190 | |
191 | my $blk = $_; |
192 | |
cd1c2c69 |
193 | test_regexp ($str, $blk); |
12ac2576 |
194 | $blk =~ s/^In/Block:/; |
cd1c2c69 |
195 | test_regexp ($str, $blk); |
12ac2576 |
196 | } |
cd1c2c69 |
197 | |