21 sub A::B::Intersection {
28 sub test_regexp ($$) {
29 # test that given string consists of N-1 chars matching $qr1, and 1
33 # constructing these objects here makes the last test loop go much faster
34 my $qr1 = qr/(\p{$blk}+)/;
36 is($1, substr($str, 0, -1)); # all except last char
39 fail('first N-1 chars did not match');
42 my $qr2 = qr/(\P{$blk}+)/;
44 is($1, substr($str, -1)); # only last char
47 fail('last char did not match');
55 if (ord('A') == 193) {
56 $str = join "", map chr($_), 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, 0xF0 .. 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, 0x7C, 0xC1 .. 0xC9, 0xD1 .. 0xD9, 0xE2 .. 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D, 0x79, 0x81 .. 0x89, 0x91 .. 0x96; # IBM-1047
58 $str = join "", map chr($_), 0x20 .. 0x6F;
61 # make sure it finds built-in class
62 is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
63 is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
65 # make sure it finds user-defined class
66 is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
68 # make sure it finds class in other package
69 is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
71 # make sure it finds class in other OTHER package
72 is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');
74 # all of these should look in lib/unicore/bc/AL.pl
75 $str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
76 is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
77 is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
78 is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
79 is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");
81 # make sure InGreek works
82 $str = "[\x{038B}\x{038C}\x{038D}]";
84 is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
85 is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
86 is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
87 is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
88 is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
91 my $updir = File::Spec->updir;
93 # the %utf8::... hashes are already in existence
94 # because utf8_pva.pl was run by utf8_heavy.pl
96 *utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning
98 no warnings 'utf8'; # we do not want warnings about surrogates etc
105 if (ord('A') == 193 && $h1 < 256) {
106 my $h3 = ($h2 || $h1) + 1;
107 if ($h3 - $h1 == 1) {
108 $str = join "", pack 'U*', $h1 .. $h3; # Using pack since chr doesn't generate Unicode chars for value < 256.
109 } elsif ($h3 - $h1 > 1) {
110 for (my $i = $h1; $i <= $h3; $i++) {
111 $str = join "", $str, pack 'U*', $i;
115 $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
121 # non-General Category and non-Script
122 while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
123 my $prop_name = $utf8::PropertyAlias{$abbrev};
124 next unless $prop_name;
125 next if $abbrev eq "gc_sc";
127 for (sort keys %$files) {
128 my $filename = File::Spec->catfile(
129 $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
132 next unless -e $filename;
133 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
135 my $str = char_range($h1, $h2);
137 for my $p ($prop_name, $abbrev) {
138 for my $c ($files->{$_}, $_) {
139 is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
140 is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
146 # General Category and Script
147 for my $p ('gc', 'sc') {
148 while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
149 my $filename = File::Spec->catfile(
150 $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
153 next unless -e $filename;
154 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
156 my $str = char_range($h1, $h2);
158 for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
159 for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
160 is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
161 is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
162 test_regexp ($str, $y);
168 # test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
171 skip "Can't reliably derive class names from file names", 592 if $^O eq 'VMS';
173 # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to
174 # return true. Try to work around this by reading the filenames explicitly
175 # to get a case sensitive test. N.B. This will fail if filename case is
176 # not preserved because you might go looking for a class name of CF or cf
177 # when you really want Cf. Storing case sensitive data in filenames is
178 # simply not portable.
182 my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc');
183 opendir D, $dirname or die $!;
184 @files{readdir(D)} = ();
187 for (keys %utf8::PA_reverse) {
188 my $leafname = "$utf8::PA_reverse{$_}.pl";
189 next unless exists $files{$leafname};
191 my $filename = File::Spec->catfile($dirname, $leafname);
193 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
195 my $str = char_range($h1, $h2);
197 for my $x ('gc', 'General Category') {
198 print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
199 for my $y ($_, $utf8::PA_reverse{$_}) {
200 is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
201 is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
202 test_regexp ($str, $y);
208 # test the blocks (InFoobar)
209 for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
210 my $filename = File::Spec->catfile(
211 $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
214 next unless -e $filename;
216 print "# In$_ $filename\n";
218 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
220 my $str = char_range($h1, $h2);
224 test_regexp ($str, $blk);
225 $blk =~ s/^In/Block:/;
226 test_regexp ($str, $blk);