21 sub A::B::Intersection {
28 sub test_regexp ($$) {
29 # test that given string consists of N-1 chars matching $qr1, and 1
33 # constructing these objects here makes the last test loop go much faster
34 my $qr1 = qr/(\p{$blk}+)/;
36 is($1, substr($str, 0, -1)); # all except last char
39 fail('first N-1 chars did not match');
42 my $qr2 = qr/(\P{$blk}+)/;
44 is($1, substr($str, -1)); # only last char
47 fail('last char did not match');
55 if (ord('A') == 193) {
56 $str = join "", map chr($_), 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, 0xF0 .. 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, 0x7C, 0xC1 .. 0xC9, 0xD1 .. 0xD9, 0xE2 .. 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D, 0x79, 0x81 .. 0x89, 0x91 .. 0x96; # IBM-1047
58 $str = join "", map chr($_), 0x20 .. 0x6F;
61 # make sure it finds built-in class
62 is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
63 is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
65 # make sure it finds user-defined class
66 is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
68 # make sure it finds class in other package
69 is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
71 # make sure it finds class in other OTHER package
72 is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');
74 # all of these should look in lib/unicore/bc/AL.pl
75 $str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
76 is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
77 is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
78 is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
79 is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");
81 # make sure InGreek works
82 $str = "[\x{038B}\x{038C}\x{038D}]";
84 is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
85 is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
86 is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
87 is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
88 is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
91 my $updir = File::Spec->updir;
93 # the %utf8::... hashes are already in existence
94 # because utf8_pva.pl was run by utf8_heavy.pl
96 *utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning
98 no warnings 'utf8'; # we do not want warnings about surrogates etc
105 if (ord('A') == 193 && $h1 < 256) {
106 my $h3 = ($h2 || $h1) + 1;
107 if ($h3 - $h1 == 1) {
108 $str = join "", pack 'U*', $h1 .. $h3; # Using pack since chr doesn't generate Unicode chars for value < 256.
109 } elsif ($h3 - $h1 > 1) {
110 for (my $i = $h1; $i <= $h3; $i++) {
111 $str = join "", $str, pack 'U*', $i;
115 $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
121 # non-General Category and non-Script
122 while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
123 my $prop_name = $utf8::PropertyAlias{$abbrev};
124 next unless $prop_name;
125 next if $abbrev eq "gc_sc";
127 for (sort keys %$files) {
128 my $filename = File::Spec->catfile(
129 $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
132 next unless -e $filename;
133 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
135 my $str = char_range($h1, $h2);
137 for my $p ($prop_name, $abbrev) {
138 for my $c ($files->{$_}, $_) {
139 is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
140 is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
146 # General Category and Script
147 for my $p ('gc', 'sc') {
148 while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
149 my $filename = File::Spec->catfile(
150 $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
153 next unless -e $filename;
154 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
156 my $str = char_range($h1, $h2);
158 for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
159 for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
160 is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
161 is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
163 skip("surrogate", 1) if $abbr eq 'cs';
164 test_regexp ($str, $y);
171 # test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
174 skip "Can't reliably derive class names from file names", 576 if $^O eq 'VMS';
176 # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to
177 # return true. Try to work around this by reading the filenames explicitly
178 # to get a case sensitive test. N.B. This will fail if filename case is
179 # not preserved because you might go looking for a class name of CF or cf
180 # when you really want Cf. Storing case sensitive data in filenames is
181 # simply not portable.
185 my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc');
186 opendir D, $dirname or die $!;
187 @files{readdir(D)} = ();
190 for (keys %utf8::PA_reverse) {
191 my $leafname = "$utf8::PA_reverse{$_}.pl";
192 next unless exists $files{$leafname};
194 my $filename = File::Spec->catfile($dirname, $leafname);
196 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
198 my $str = char_range($h1, $h2);
200 for my $x ('gc', 'General Category') {
201 print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
202 for my $y ($_, $utf8::PA_reverse{$_}) {
203 is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
204 is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
205 test_regexp ($str, $y);
211 # test the blocks (InFoobar)
212 for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
213 my $filename = File::Spec->catfile(
214 $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
217 next unless -e $filename;
219 print "# In$_ $filename\n";
221 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
223 my $str = char_range($h1, $h2);
228 skip($blk, 2) if $blk =~ /surrogates/i;
229 test_regexp ($str, $blk);
230 $blk =~ s/^In/Block:/;
231 test_regexp ($str, $blk);