make xsubpp generate well-formed code with CAPI && !PERL_OBJECT
[p5sagit/p5-mst-13.2.git] / lib / unicode / mktables.PL
CommitLineData
a0ed51b3 1#!../../miniperl
2
3# Note: we try to keep filenames unique within first 8 chars. Using
4# subdirectories for the following helps.
5mkdir "In", 0777;
6mkdir "Is", 0777;
7mkdir "To", 0777;
8
9@todo = (
10# typical
11
12 ['IsAlnum', '$cat =~ /^L[ulo]|^Nd/ or $code eq "005F"', ''],
13 ['IsAlpha', '$cat =~ /^L[ulo]/', ''],
14 ['IsSpace', '$cat =~ /^Z/ or $code lt "0020" and chr(hex $code) =~ /^\s/', ''],
15 ['IsDigit', '$cat =~ /^Nd$/', ''],
16 ['IsUpper', '$cat =~ /^Lu$/', ''],
17 ['IsLower', '$cat =~ /^Ll$/', ''],
18 ['IsPrint', '$cat =~ /^[^C]/', ''],
19 ['ToUpper', '$up', '$up'],
20 ['ToLower', '$down', '$down'],
21 ['ToTitle', '$title', '$title'],
22 ['ToDigit', '$dec ne ""', '$dec'],
23
24# Name
25
26 ['Name', '$name', '$name'],
27
28# Category
29
30 ['Category', '$cat', '$cat'],
31
32# Normative
33
34 ['IsM', '$cat =~ /^M/', ''], # Mark
35 ['IsMn', '$cat eq "Mn"', ''], # Mark, Non-Spacing
36 ['IsMc', '$cat eq "Mc"', ''], # Mark, Combining
37
38 ['IsN', '$cat =~ /^N/', ''], # Number
39 ['IsNd', '$cat eq "Nd"', ''], # Number, Decimal Digit
40 ['IsNo', '$cat eq "No"', ''], # Number, Other
41
42 ['IsZ', '$cat =~ /^Z/', ''], # Zeparator
43 ['IsZs', '$cat eq "Zs"', ''], # Separator, Space
44 ['IsZl', '$cat eq "Zl"', ''], # Separator, Line
45 ['IsZp', '$cat eq "Zp"', ''], # Separator, Paragraph
46
47 ['IsC', '$cat =~ /^C/', ''], # Crazy
48 ['IsCc', '$cat eq "Cc"', ''], # Other, Control or Format
49 ['IsCo', '$cat eq "Co"', ''], # Other, Private Use
50 ['IsCn', '$cat eq "Cn"', ''], # Other, Not Assigned
51
52# Informative
53
54 ['IsL', '$cat =~ /^L/', ''], # Letter
55 ['IsLu', '$cat eq "Lu"', ''], # Letter, Uppercase
56 ['IsLl', '$cat eq "Ll"', ''], # Letter, Lowercase
57 ['IsLt', '$cat eq "Lt"', ''], # Letter, Titlecase
58 ['IsLm', '$cat eq "Lm"', ''], # Letter, Modifier
59 ['IsLo', '$cat eq "Lo"', ''], # Letter, Other
60
61 ['IsP', '$cat =~ /^P/', ''], # Punctuation
62 ['IsPd', '$cat eq "Pd"', ''], # Punctuation, Dash
63 ['IsPs', '$cat eq "Ps"', ''], # Punctuation, Open
64 ['IsPe', '$cat eq "Pe"', ''], # Punctuation, Close
65 ['IsPo', '$cat eq "Po"', ''], # Punctuation, Other
66
67 ['IsS', '$cat =~ /^S/', ''], # Symbol
68 ['IsSm', '$cat eq "Sm"', ''], # Symbol, Math
69 ['IsSc', '$cat eq "Sc"', ''], # Symbol, Currency
70 ['IsSo', '$cat eq "So"', ''], # Symbol, Other
71
72# Combining class
73 ['CombiningClass', '$comb', '$comb'],
74
75# BIDIRECTIONAL PROPERTIES
76
77 ['Bidirectional', '$bid', '$bid'],
78
79# Strong types:
80
81 ['IsBidiL', '$bid eq "L"', ''], # Left-Right; Most alphabetic,
82 # syllabic, and logographic
83 # characters (e.g., CJK
84 # ideographs)
85 ['IsBidiR', '$bid eq "R"', ''], # Right-Left; Arabic, Hebrew,
86 # and punctuation specific to
87 # those scripts
88
89# Weak types:
90
91 ['IsBidiEN','$bid eq "EN"', ''], # European Number
92 ['IsBidiES','$bid eq "ES"', ''], # European Number Separator
93 ['IsBidiET','$bid eq "ET"', ''], # European Number Terminator
94 ['IsBidiAN','$bid eq "AN"', ''], # Arabic Number
95 ['IsBidiCS','$bid eq "CS"', ''], # Common Number Separator
96
97# Separators:
98
99 ['IsBidiB', '$bid eq "B"', ''], # Block Separator
100 ['IsBidiS', '$bid eq "S"', ''], # Segment Separator
101
102# Neutrals:
103
104 ['IsBidiWS','$bid eq "WS"', ''], # Whitespace
105 ['IsBidiON','$bid eq "ON"', ''], # Other Neutrals ; All other
106 # characters: punctuation,
107 # symbols
108
109# Decomposition
110
111 ['Decomposition', '$decomp', '$decomp'],
112 ['IsDecoCanon', '$decomp && $decomp !~ /^</', ''],
113 ['IsDecoCompat', '$decomp =~ /^</', ''],
114 ['IsDCfont', '$decomp =~ /^<font>/', ''],
115 ['IsDCnoBreak', '$decomp =~ /^<noBreak>/', ''],
116 ['IsDCinitial', '$decomp =~ /^<initial>/', ''],
117 ['IsDCinital', '$decomp =~ /^<medial>/', ''],
118 ['IsDCfinal', '$decomp =~ /^<final>/', ''],
119 ['IsDCisolated', '$decomp =~ /^<isolated>/', ''],
120 ['IsDCcircle', '$decomp =~ /^<circle>/', ''],
121 ['IsDCsuper', '$decomp =~ /^<super>/', ''],
122 ['IsDCsub', '$decomp =~ /^<sub>/', ''],
123 ['IsDCvertical', '$decomp =~ /^<vertical>/', ''],
124 ['IsDCwide', '$decomp =~ /^<wide>/', ''],
125 ['IsDCnarrow', '$decomp =~ /^<narrow>/', ''],
126 ['IsDCsmall', '$decomp =~ /^<small>/', ''],
127 ['IsDCsquare', '$decomp =~ /^<square>/', ''],
128 ['IsDCcompat', '$decomp =~ /^<compat>/', ''],
129
130# Number
131
132 ['Number', '$num', '$num'],
133
134# Mirrored
135
136 ['IsMirrored', '$mir eq "Y"', ''],
137
138# Arabic
139
140 ['ArabLink', '1', '$link'],
141 ['ArabLnkGrp', '1', '$linkgroup'],
142
143# Jamo
144
145 ['JamoShort', '1', '$short'],
146);
147
148# This is not written for speed...
149
150foreach $file (@todo) {
151 my ($table, $wanted, $val) = @$file;
152 next if @ARGV and not grep { $_ eq $table } @ARGV;
153 print $table,"\n";
154 if ($table =~ /^(Is|In|To)(.*)/) {
155 open(OUT, ">$1/$2.pl") or die "Can't create $1/$2.pl: $!\n";
156 }
157 else {
158 open(OUT, ">$table.pl") or die "Can't create $table.pl: $!\n";
159 }
160 print OUT <<"END";
161return <<'END';
162END
163 print OUT proplist($table, $wanted, $val);
164 print OUT "END\n";
165 close OUT;
166}
167
168# Must treat blocks specially.
169
170exit if @ARGV and not grep { $_ eq Block } @ARGV;
171print "Block\n";
172open(UD, 'blocks.txt') or die "Can't open blocks.txt: $!\n";
173open(OUT, ">Block.pl") or die "Can't create $table.pl: $!\n";
174print OUT <<"END";
175return <<'END';
176END
177
178while (<UD>) {
179 next if /^#/;
180 next if /^$/;
181 chomp;
182 ($code, $last, $name) = split(/; */);
183 if ($name) {
184 print OUT "$code $last $name\n";
185 $name =~ s/\s+//g;
186 open(BLOCK, ">In/$name.pl");
187 print BLOCK <<"END2";
188return <<'END';
189$code $last
190END
191END2
192 close BLOCK;
193 }
194}
195
196print OUT "END\n";
197close OUT;
198
199##################################################
200
201sub proplist {
202 my ($table, $wanted, $val) = @_;
203 my @wanted;
204 my $out;
205 my $split;
206
207 if ($table =~ /^Arab/) {
208 open(UD, "arabshp.txt") or warn "Can't open $table: $!";
209
210 $split = '($code, $name, $link, $linkgroup) = split(/; */);';
211 }
212 elsif ($table =~ /^Jamo/) {
213 open(UD, "jamo2.txt") or warn "Can't open $table: $!";
214
215 $split = '($code, $short, $name) = split(/; */); $code =~ s/^U\+//;';
216 }
217 else {
218 open(UD, "UnicodeData-Latest.txt") or warn "Can't open $table: $!";
219
220 $split = '($code, $name, $cat, $comb, $bid, $decomp, $dec, $dig, $num, $mir, $uni1,
221 $comment, $up, $down, $title) = split(/;/);';
222 }
223
224 if ($table =~ /^(?:To|Is)[A-Z]/) {
225 eval <<"END";
226 while (<UD>) {
227 next if /^#/;
228 next if /^\s/;
229 chop;
230 $split
231 if ($wanted) {
232 push(\@wanted, [hex \$code, hex $val, \$name =~ /, First>\$/]);
233 }
234 }
235END
236 die $@ if $@;
237
238 while (@wanted) {
239 $beg = shift @wanted;
240 $last = $beg;
241 while (@wanted and $wanted[0]->[0] == $last->[0] + 1 and
242 (not $val or $wanted[0]->[1] == $last->[1] + 1)) {
243 $last = shift @wanted;
244 }
245 $out .= sprintf "%04x", $beg->[0];
246 if ($beg->[2]) {
247 $last = shift @wanted;
248 }
249 if ($beg == $last) {
250 $out .= "\t";
251 }
252 else {
253 $out .= sprintf "\t%04x", $last->[0];
254 }
255 $out .= sprintf "\t%04x", $beg->[1] if $val;
256 $out .= "\n";
257 }
258 }
259 else {
260 eval <<"END";
261 while (<UD>) {
262 next if /^#/;
263 next if /^\s*\$/;
264 chop;
265 $split
266 if ($wanted) {
267 push(\@wanted, [hex \$code, $val, \$name =~ /, First>\$/]);
268 }
269 }
270END
271 die $@ if $@;
272
273 while (@wanted) {
274 $beg = shift @wanted;
275 $last = $beg;
276 while (@wanted and $wanted[0]->[0] == $last->[0] + 1 and
277 ($wanted[0]->[1] eq $last->[1])) {
278 $last = shift @wanted;
279 }
280 $out .= sprintf "%04x", $beg->[0];
281 if ($beg->[2]) {
282 $last = shift @wanted;
283 }
284 if ($beg == $last) {
285 $out .= "\t";
286 }
287 else {
288 $out .= sprintf "\t%04x", $last->[0];
289 }
290 $out .= sprintf "\t%s\n", $beg->[1];
291 }
292 }
293 $out;
294}