Add the 'Common' Unicode property (code points not
[p5sagit/p5-mst-13.2.git] / lib / unicore / mktables.PL
CommitLineData
a0ed51b3 1#!../../miniperl
2
7c6f5cd2 3use bytes;
4
190eec7c 5$UnicodeData = "Unicode.txt";
7c6f5cd2 6$SyllableData = "syllables.txt";
8836d2a5 7$PropData = "PropList.txt";
7c6f5cd2 8
11695a73 9
a0ed51b3 10# Note: we try to keep filenames unique within first 8 chars. Using
11# subdirectories for the following helps.
289d4f09 12mkdir "In", 0755;
13mkdir "Is", 0755;
14mkdir "To", 0755;
a0ed51b3 15
16@todo = (
17# typical
18
f59877d4 19 # 005F: SPACING UNDERSCROE
20 ['IsWord', '$cat =~ /^[LMN]/ or $code eq "005F"', ''],
21 ['IsAlnum', '$cat =~ /^[LMN]/', ''],
22 ['IsAlpha', '$cat =~ /^[LM]/', ''],
23 # 0009: HORIZONTAL TABULATION
24 # 000A: LINE FEED
25 # 000B: VERTICAL TABULATION
26 # 000C: FORM FEED
27 # 000D: CARRIAGE RETURN
3bec3564 28 # 0020: SPACE
f59877d4 29 ['IsSpace', '$cat =~ /^Z/ ||
30 $code =~ /^(0009|000A|000B|000C|000D)$/', ''],
3bec3564 31 ['IsSpacePerl',
32 '$cat =~ /^Z/ ||
33 $code =~ /^(0009|000A|000C|000D)$/', ''],
f5868911 34 ['IsBlank', '$code =~ /^(0020|0009)$/ ||
35 $cat =~ /^Z[^lp]$/', ''],
b8c5462f 36 ['IsDigit', '$cat =~ /^Nd$/', ''],
7c6f5cd2 37 ['IsUpper', '$cat =~ /^L[ut]$/', ''],
b8c5462f 38 ['IsLower', '$cat =~ /^Ll$/', ''],
f59877d4 39 ['IsASCII', '$code le "007f"', ''],
b8c5462f 40 ['IsCntrl', '$cat =~ /^C/', ''],
f59877d4 41 ['IsGraph', '$cat =~ /^([LMNPS]|Co)/', ''],
42 ['IsPrint', '$cat =~ /^([LMNPS]|Co|Zs)/', ''],
43 ['IsPunct', '$cat =~ /^P/', ''],
44 # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f
b8c5462f 45 ['IsXDigit', '$code =~ /^00(3[0-9]|[46][1-6])$/', ''],
46 ['ToUpper', '$up', '$up'],
47 ['ToLower', '$down', '$down'],
48 ['ToTitle', '$title', '$title'],
49 ['ToDigit', '$dec ne ""', '$dec'],
a0ed51b3 50
51# Name
52
53 ['Name', '$name', '$name'],
54
55# Category
56
57 ['Category', '$cat', '$cat'],
58
59# Normative
60
61 ['IsM', '$cat =~ /^M/', ''], # Mark
62 ['IsMn', '$cat eq "Mn"', ''], # Mark, Non-Spacing
63 ['IsMc', '$cat eq "Mc"', ''], # Mark, Combining
7c6f5cd2 64 ['IsMe', '$cat eq "Me"', ''], # Mark, Enclosing
a0ed51b3 65
66 ['IsN', '$cat =~ /^N/', ''], # Number
67 ['IsNd', '$cat eq "Nd"', ''], # Number, Decimal Digit
68 ['IsNo', '$cat eq "No"', ''], # Number, Other
7c6f5cd2 69 ['IsNl', '$cat eq "Nl"', ''], # Number, Letter
a0ed51b3 70
7c6f5cd2 71 ['IsZ', '$cat =~ /^Z/', ''], # Separator
a0ed51b3 72 ['IsZs', '$cat eq "Zs"', ''], # Separator, Space
73 ['IsZl', '$cat eq "Zl"', ''], # Separator, Line
74 ['IsZp', '$cat eq "Zp"', ''], # Separator, Paragraph
75
76 ['IsC', '$cat =~ /^C/', ''], # Crazy
77 ['IsCc', '$cat eq "Cc"', ''], # Other, Control or Format
78 ['IsCo', '$cat eq "Co"', ''], # Other, Private Use
79 ['IsCn', '$cat eq "Cn"', ''], # Other, Not Assigned
7c6f5cd2 80 ['IsCf', '$cat eq "Cf"', ''], # Other, Format
81 ['IsCs', '$cat eq "Cs"', ''], # Other, Surrogate
82 ['IsCn', 'Unassigned Code Value',$PropData], # Other, Not Assigned
a0ed51b3 83
84# Informative
85
86 ['IsL', '$cat =~ /^L/', ''], # Letter
87 ['IsLu', '$cat eq "Lu"', ''], # Letter, Uppercase
88 ['IsLl', '$cat eq "Ll"', ''], # Letter, Lowercase
89 ['IsLt', '$cat eq "Lt"', ''], # Letter, Titlecase
90 ['IsLm', '$cat eq "Lm"', ''], # Letter, Modifier
91 ['IsLo', '$cat eq "Lo"', ''], # Letter, Other
92
93 ['IsP', '$cat =~ /^P/', ''], # Punctuation
94 ['IsPd', '$cat eq "Pd"', ''], # Punctuation, Dash
95 ['IsPs', '$cat eq "Ps"', ''], # Punctuation, Open
96 ['IsPe', '$cat eq "Pe"', ''], # Punctuation, Close
97 ['IsPo', '$cat eq "Po"', ''], # Punctuation, Other
7c6f5cd2 98 ['IsPc', '$cat eq "Pc"', ''], # Punctuation, Connector
99 ['IsPi', '$cat eq "Pi"', ''], # Punctuation, Initial quote
100 ['IsPf', '$cat eq "Pf"', ''], # Punctuation, Final quote
a0ed51b3 101
102 ['IsS', '$cat =~ /^S/', ''], # Symbol
103 ['IsSm', '$cat eq "Sm"', ''], # Symbol, Math
7c6f5cd2 104 ['IsSk', '$cat eq "Sk"', ''], # Symbol, Modifier
a0ed51b3 105 ['IsSc', '$cat eq "Sc"', ''], # Symbol, Currency
106 ['IsSo', '$cat eq "So"', ''], # Symbol, Other
107
108# Combining class
109 ['CombiningClass', '$comb', '$comb'],
110
111# BIDIRECTIONAL PROPERTIES
112
113 ['Bidirectional', '$bid', '$bid'],
114
115# Strong types:
116
117 ['IsBidiL', '$bid eq "L"', ''], # Left-Right; Most alphabetic,
118 # syllabic, and logographic
119 # characters (e.g., CJK
120 # ideographs)
121 ['IsBidiR', '$bid eq "R"', ''], # Right-Left; Arabic, Hebrew,
122 # and punctuation specific to
123 # those scripts
124
7c6f5cd2 125 ['IsBidiLRE', '$bid eq "LRE"', ''], # Left-to-Right Embedding
126 ['IsBidiLRO', '$bid eq "LRO"', ''], # Left-to-Right Override
127 ['IsBidiAL', '$bid eq "AL"', ''], # Right-to-Left Arabic
128 ['IsBidiRLE', '$bid eq "RLE"', ''], # Right-to-Left Embedding
129 ['IsBidiRLO', '$bid eq "RLO"', ''], # Right-to-Left Override
130 ['IsBidiPDF', '$bid eq "PDF"', ''], # Pop Directional Format
131 ['IsBidiNSM', '$bid eq "NSM"', ''], # Non-Spacing Mark
132 ['IsBidiBN', '$bid eq "BN"', ''], # Boundary Neutral
133
a0ed51b3 134# Weak types:
135
136 ['IsBidiEN','$bid eq "EN"', ''], # European Number
137 ['IsBidiES','$bid eq "ES"', ''], # European Number Separator
138 ['IsBidiET','$bid eq "ET"', ''], # European Number Terminator
139 ['IsBidiAN','$bid eq "AN"', ''], # Arabic Number
140 ['IsBidiCS','$bid eq "CS"', ''], # Common Number Separator
141
142# Separators:
143
144 ['IsBidiB', '$bid eq "B"', ''], # Block Separator
145 ['IsBidiS', '$bid eq "S"', ''], # Segment Separator
146
147# Neutrals:
148
149 ['IsBidiWS','$bid eq "WS"', ''], # Whitespace
150 ['IsBidiON','$bid eq "ON"', ''], # Other Neutrals ; All other
151 # characters: punctuation,
152 # symbols
153
154# Decomposition
155
156 ['Decomposition', '$decomp', '$decomp'],
157 ['IsDecoCanon', '$decomp && $decomp !~ /^</', ''],
158 ['IsDecoCompat', '$decomp =~ /^</', ''],
159 ['IsDCfont', '$decomp =~ /^<font>/', ''],
160 ['IsDCnoBreak', '$decomp =~ /^<noBreak>/', ''],
161 ['IsDCinitial', '$decomp =~ /^<initial>/', ''],
f59877d4 162 ['IsDCmedial', '$decomp =~ /^<medial>/', ''],
a0ed51b3 163 ['IsDCfinal', '$decomp =~ /^<final>/', ''],
164 ['IsDCisolated', '$decomp =~ /^<isolated>/', ''],
165 ['IsDCcircle', '$decomp =~ /^<circle>/', ''],
166 ['IsDCsuper', '$decomp =~ /^<super>/', ''],
167 ['IsDCsub', '$decomp =~ /^<sub>/', ''],
168 ['IsDCvertical', '$decomp =~ /^<vertical>/', ''],
169 ['IsDCwide', '$decomp =~ /^<wide>/', ''],
170 ['IsDCnarrow', '$decomp =~ /^<narrow>/', ''],
171 ['IsDCsmall', '$decomp =~ /^<small>/', ''],
172 ['IsDCsquare', '$decomp =~ /^<square>/', ''],
7c6f5cd2 173 ['IsDCfraction', '$decomp =~ /^<fraction>/', ''],
a0ed51b3 174 ['IsDCcompat', '$decomp =~ /^<compat>/', ''],
175
176# Number
177
e1b504a6 178 ['Number', '$num ne ""', '$num'],
a0ed51b3 179
180# Mirrored
181
182 ['IsMirrored', '$mir eq "Y"', ''],
183
184# Arabic
185
186 ['ArabLink', '1', '$link'],
187 ['ArabLnkGrp', '1', '$linkgroup'],
188
189# Jamo
190
191 ['JamoShort', '1', '$short'],
499bfa7a 192
193# Syllables
194
7c6f5cd2 195 syllable_defs(),
196
a77b4ae5 197# Line break properties - Normative
198
199 ['IsLbrkBK','$brk eq "BK"', ''], # Mandatory Break
200 ['IsLbrkCR','$brk eq "CR"', ''], # Carriage Return
201 ['IsLbrkLF','$brk eq "LF"', ''], # Line Feed
202 ['IsLbrkCM','$brk eq "CM"', ''], # Attached Characters and Combining Marks
203 ['IsLbrkSG','$brk eq "SG"', ''], # Surrogates
204 ['IsLbrkGL','$brk eq "GL"', ''], # Non-breaking (Glue)
205 ['IsLbrkCB','$brk eq "CB"', ''], # Contingent Break Opportunity
206 ['IsLbrkSP','$brk eq "SP"', ''], # Space
207 ['IsLbrkZW','$brk eq "ZW"', ''], # Zero Width Space
208
209# Line break properties - Informative
210 ['IsLbrkXX','$brk eq "XX"', ''], # Unknown
211 ['IsLbrkOP','$brk eq "OP"', ''], # Opening Punctuation
212 ['IsLbrkCL','$brk eq "CL"', ''], # Closing Punctuation
213 ['IsLbrkQU','$brk eq "QU"', ''], # Ambiguous Quotation
214 ['IsLbrkNS','$brk eq "NS"', ''], # Non Starter
215 ['IsLbrkEX','$brk eq "EX"', ''], # Exclamation/Interrogation
216 ['IsLbrkSY','$brk eq "SY"', ''], # Symbols Allowing Breaks
217 ['IsLbrkIS','$brk eq "IS"', ''], # Infix Separator (Numeric)
218 ['IsLbrkPR','$brk eq "PR"', ''], # Prefix (Numeric)
219 ['IsLbrkPO','$brk eq "PO"', ''], # Postfix (Numeric)
220 ['IsLbrkNU','$brk eq "NU"', ''], # Numeric
221 ['IsLbrkAL','$brk eq "AL"', ''], # Ordinary Alphabetic and Symbol Characters
222 ['IsLbrkID','$brk eq "ID"', ''], # Ideographic
223 ['IsLbrkIN','$brk eq "IN"', ''], # Inseparable
224 ['IsLbrkHY','$brk eq "HY"', ''], # Hyphen
225 ['IsLbrkBB','$brk eq "BB"', ''], # Break Opportunity Before
226 ['IsLbrkBA','$brk eq "BA"', ''], # Break Opportunity After
227 ['IsLbrkSA','$brk eq "SA"', ''], # Complex Context (South East Asian)
228 ['IsLbrkAI','$brk eq "AI"', ''], # Ambiguous (Alphabetic or Ideographic)
229 ['IsLbrkB2','$brk eq "B2"', ''], # Break Opportunity Before and After
a0ed51b3 230);
231
232# This is not written for speed...
233
d9efae67 234my %InIdScript;
235my %InIdBlock;
9fdf68be 236my $InId = 0;
237
a0ed51b3 238foreach $file (@todo) {
239 my ($table, $wanted, $val) = @$file;
240 next if @ARGV and not grep { $_ eq $table } @ARGV;
9fdf68be 241 print $table, "\n";
242 $table =~ s/\W+//g;
2796c109 243 if ($table =~ /^(Is|To)(.+)/) {
a0ed51b3 244 open(OUT, ">$1/$2.pl") or die "Can't create $1/$2.pl: $!\n";
245 }
246 else {
247 open(OUT, ">$table.pl") or die "Can't create $table.pl: $!\n";
248 }
14055466 249 print OUT <<EOH;
250# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
251# This file is built by $0 from e.g. $UnicodeData.
252# Any changes made here will be lost!
253EOH
a0ed51b3 254 print OUT <<"END";
255return <<'END';
256END
257 print OUT proplist($table, $wanted, $val);
258 print OUT "END\n";
259 close OUT;
260}
261
2796c109 262print "Scripts\n";
263open(UD, 'Scripts.txt') or die "Can't open Scripts.txt: $!\n";
264open(OUT, ">Scripts.pl") or die "Can't create Scripts.pl: $!\n";
265print OUT <<EOH;
266# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
267# This file is built by $0 from e.g. $UnicodeData.
268# Any changes made here will be lost!
269EOH
270print OUT <<"END";
271return <<'END';
272END
273
274my %Scripts;
c602af67 275my $ScriptsVec = '';
276my $lastlast = 0;
2796c109 277
278while (<UD>) {
279 next if /^#/;
280 next if /^$/;
281 chomp;
5f9563ea 282 ($code, $last, $name) = /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s+;\s+(.+)\s+\#/i;
2796c109 283 if ($name) {
d9efae67 284 my $InName = $name;
2796c109 285 my $id;
d9efae67 286 unless (exists $InIdScript{$InName}) {
2796c109 287 print "\t$InName\n";
d9efae67 288 $id = $Scripts{$InName} = $InIdScript{$InName} = $InId++;
2796c109 289 open(SCRIPT, ">In/$id.pl") or die "create In/$id.pl: $!\n";
290 print SCRIPT <<EOH;
291# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
292# This file is built by $0 from e.g. $UnicodeData.
293# Any changes made here will be lost!
294return <<'END';
295EOH
296 close(SCRIPT);
297 } else {
d9efae67 298 $id = $InIdScript{$InName};
2796c109 299 }
5f9563ea 300 $last = "" unless defined $last;
d9efae67 301 print OUT "$code\t$last\t$name\t# In/$id.pl\n";
2796c109 302 open(SCRIPT, ">>In/$id.pl");
303 print SCRIPT <<END;
304$code $last
305END
306 close SCRIPT;
307 }
c602af67 308 my $firsti = hex($code);
309 my $lasti = $last ? hex($last) : $firsti;
310 for my $i ($firsti..$lasti) {
311 vec($ScriptsVec, $i, 1) = 1;
312 }
313 $lastlast = $lasti if $lasti > $lastlast;
314 print "\t\t$code..$last\n";
2796c109 315}
316
d9efae67 317for my $id (values %InIdScript) {
2796c109 318 open(SCRIPT, ">>In/$id.pl");
319 print SCRIPT <<END2;
320END
321END2
322 close(SCRIPT);
323}
324
325print OUT "END\n";
326close OUT;
327
a0ed51b3 328# Must treat blocks specially.
329
330exit if @ARGV and not grep { $_ eq Block } @ARGV;
9fdf68be 331print "Blocks\n";
7c6f5cd2 332open(UD, 'Blocks.txt') or die "Can't open Blocks.txt: $!\n";
9fdf68be 333open(OUT, ">Blocks.pl") or die "Can't create Blocks.pl: $!\n";
14055466 334print OUT <<EOH;
335# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
336# This file is built by $0 from e.g. $UnicodeData.
337# Any changes made here will be lost!
338EOH
a0ed51b3 339print OUT <<"END";
340return <<'END';
341END
342
343while (<UD>) {
344 next if /^#/;
345 next if /^$/;
346 chomp;
d9efae67 347 ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+?)\s*$/i;
a0ed51b3 348 if ($name) {
2796c109 349 my $InName = $name;
2796c109 350 print "\t$InName\n";
9fdf68be 351 my $id;
2796c109 352 # TODO: only the first one of Private Use blocks qualifies
d9efae67 353 unless (exists $InIdBlock{$InName}) {
354 $InIdBlock{$InName} = $InId++;
9fdf68be 355 }
d9efae67 356 $id = $InIdBlock{$InName};
2796c109 357 open(BLOCK, ">In/$id.pl") or die "create In/$id.pl: $!\n";
d9efae67 358 print OUT "$code\t$last\t$name\t# In/$id.pl\n";
14055466 359 print BLOCK <<EOH;
360# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
361# This file is built by $0 from e.g. $UnicodeData.
362# Any changes made here will be lost!
363EOH
a0ed51b3 364 print BLOCK <<"END2";
365return <<'END';
366$code $last
367END
368END2
369 close BLOCK;
370 }
371}
372
373print OUT "END\n";
374close OUT;
375
c602af67 376print "\tCommon\n";
377my $CommonId = $Scripts{Common} = $InIdScript{Common} = $InId++;
378open(SCRIPT, ">In/$CommonId.pl") or die "create In/$CommonId.pl: $!\n";
379print SCRIPT <<EOH;
380# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
381# This file is built by $0 from e.g. $UnicodeData.
382# Any changes made here will be lost!
383return <<'END';
384EOH
385my $first;
386sub flushzerorange {
387 my $i = shift;
388 if (defined $first) {
389 my $last = $i - 1;
390 $last = $last == $first ? "" : sprintf("%04X", $last);
391 printf SCRIPT "%04X\t$last\tCommon\t# In/$CommonId.pl\n", $first;
392 printf "\t\t%04X..$last\n", $first;
393 undef $first;
394 }
395}
396for my $i (0..$lastlast) {
397 if (vec($ScriptsVec, $i, 1)) {
398 defined $first && flushzerorange($i);
399 } else {
400 $first = $i unless defined $first;
401 }
402}
403flushzerorange($lastlast+1);
404print SCRIPT "END\n";
405close(SCRIPT);
406
9fdf68be 407open(INID, ">In.pl");
408
409print INID <<EOH;
410# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
411# This file is built by $0 from e.g. $UnicodeData.
412# Any changes made here will be lost!
413%utf8::In = (
414EOH
415
d9efae67 416my %InIdScriptById = reverse %InIdScript;
417my %InIdBlockById = reverse %InIdBlock;
418
419my @InIdScriptById = sort { $a <=> $b } keys %InIdScriptById;
420my @InIdBlockById = sort { $a <=> $b } keys %InIdBlockById;
421
422my %InId;
423my %IdIdLcName;
424
425for my $id (@InIdScriptById) {
426 my $name = $InIdScriptById{$id};
427 my $lcname = lc($name);
428 $InId{$name} = $id;
429 $IdIdLcName{$lcname} = $id;
430}
431
432for my $id (@InIdBlockById) {
433 my $name = $InIdBlockById{$id};
434 my $lcname = lc($name);
435 if (exists $IdIdLcName{$lcname}) {
436 $InId{"$name Block"} = $id;
437 } else {
438 $InId{$name} = $id;
439 }
440 $IdIdLcName{$lcname} = $id;
441}
442
443my @InId = sort { $InId{$a} <=> $InId{$b} } keys %InId;
444
445my %InIdPrefix;
446
447foreach my $in (@InId) {
448 my $inpat = $in;
f173cd49 449 $inpat =~ s/([- _])/(?:[-_]|\\s+)?/g;
d9efae67 450 push @{$InIdPrefix{lc(substr($in, 0, 3))}}, [ $in, $inpat ];
451 printf INID "%-45s => %3d,\n", "'$in'", $InId{$in};
452}
453
454print INID ");\n";
455
456print INID <<EOH;
457%utf8::InPat = (
458EOH
459
460foreach my $prefix (sort keys %InIdPrefix) {
461 printf INID "'$prefix' => {\n";
462 foreach my $ininpat (@{$InIdPrefix{$prefix}}) {
463 my ($in, $inpat) = @$ininpat;
464 printf INID "\t'$inpat' => '$in',\n";
465 }
466 printf INID "},\n";
9fdf68be 467}
468
469print INID ");\n";
470
471close(INID);
472
a0ed51b3 473##################################################
474
475sub proplist {
476 my ($table, $wanted, $val) = @_;
477 my @wanted;
478 my $out;
479 my $split;
480
7c6f5cd2 481 return listFromPropFile($wanted) if $val eq $PropData;
482
a0ed51b3 483 if ($table =~ /^Arab/) {
d357d9fe 484 open(UD, "ArabShap.txt") or warn "Can't open $table: $!";
a0ed51b3 485
486 $split = '($code, $name, $link, $linkgroup) = split(/; */);';
487 }
488 elsif ($table =~ /^Jamo/) {
505afebf 489 open(UD, "Jamo.txt") or warn "Can't open $table: $!";
a0ed51b3 490
491 $split = '($code, $short, $name) = split(/; */); $code =~ s/^U\+//;';
492 }
499bfa7a 493 elsif ($table =~ /^IsSyl/) {
7c6f5cd2 494 open(UD, $SyllableData) or warn "Can't open $table: $!";
499bfa7a 495
496 $split = '($code, $short, $syl) = split(/; */); $code =~ s/^U\+//;';
497 }
a77b4ae5 498 elsif ($table =~ /^IsLbrk/) {
499 open(UD, "LineBrk.txt") or warn "Can't open $table: $!";
500
50fc4248 501 $split = '($code, $brk, $name) = /^([0-9a-f]+);(\w+) # (.+)/i;';
a77b4ae5 502 }
a0ed51b3 503 else {
11695a73 504 open(UD, $UnicodeData) or warn "Can't open $UnicodeData: $!";
a0ed51b3 505
506 $split = '($code, $name, $cat, $comb, $bid, $decomp, $dec, $dig, $num, $mir, $uni1,
507 $comment, $up, $down, $title) = split(/;/);';
508 }
509
510 if ($table =~ /^(?:To|Is)[A-Z]/) {
511 eval <<"END";
512 while (<UD>) {
513 next if /^#/;
7c6f5cd2 514 next if /^\\s/;
515 s/\\s+\$//;
a0ed51b3 516 $split
517 if ($wanted) {
518 push(\@wanted, [hex \$code, hex $val, \$name =~ /, First>\$/]);
519 }
520 }
521END
522 die $@ if $@;
523
524 while (@wanted) {
525 $beg = shift @wanted;
526 $last = $beg;
527 while (@wanted and $wanted[0]->[0] == $last->[0] + 1 and
528 (not $val or $wanted[0]->[1] == $last->[1] + 1)) {
529 $last = shift @wanted;
530 }
531 $out .= sprintf "%04x", $beg->[0];
532 if ($beg->[2]) {
533 $last = shift @wanted;
534 }
535 if ($beg == $last) {
536 $out .= "\t";
537 }
538 else {
539 $out .= sprintf "\t%04x", $last->[0];
540 }
541 $out .= sprintf "\t%04x", $beg->[1] if $val;
542 $out .= "\n";
543 }
544 }
545 else {
546 eval <<"END";
547 while (<UD>) {
548 next if /^#/;
7c6f5cd2 549 next if /^\\s*\$/;
a0ed51b3 550 chop;
551 $split
552 if ($wanted) {
553 push(\@wanted, [hex \$code, $val, \$name =~ /, First>\$/]);
554 }
555 }
556END
557 die $@ if $@;
558
559 while (@wanted) {
560 $beg = shift @wanted;
561 $last = $beg;
562 while (@wanted and $wanted[0]->[0] == $last->[0] + 1 and
563 ($wanted[0]->[1] eq $last->[1])) {
564 $last = shift @wanted;
565 }
566 $out .= sprintf "%04x", $beg->[0];
567 if ($beg->[2]) {
568 $last = shift @wanted;
569 }
570 if ($beg == $last) {
571 $out .= "\t";
572 }
573 else {
574 $out .= sprintf "\t%04x", $last->[0];
575 }
576 $out .= sprintf "\t%s\n", $beg->[1];
577 }
578 }
579 $out;
580}
11695a73 581
7c6f5cd2 582sub listFromPropFile {
583 my ($wanted) = @_;
584 my $out;
585
586 open (UD, $PropData) or die "Can't open $PropData: $!\n";
587 local($/) = "\n" . '*' x 43 . "\n\nProperty dump for:"; # not 42?
588
589 <UD>;
590 while (<UD>) {
591 chomp;
592 if (s/0x[\d\w]+\s+\((.*?)\)// and $wanted eq $1) {
593 s/\(\d+ chars\)//g;
594 s/^\s+//mg;
595 s/\s+$//mg;
596 s/\.\./\t/g;
597 $out = lc $_;
598 last;
599 }
600 }
601 close (UD);
602 "$out\n";
603}
604
605sub syllable_defs {
606 my @defs;
607 my %seen;
608
609 open (SD, $SyllableData) or die "Can't open $SyllableData: $!\n";
610 while (<SD>) {
611 next if /^\s*(#|$)/;
612 s/\s+$//;
613 ($code, $name, $syl) = split /; */;
614 next unless $syl;
615 push (@defs, ["IsSyl$syl", qq{\$syl eq "$syl"}, ''])
616 unless $seen{$syl}++;
617 }
618 close (SD);
619 return (@defs);
620}
621
6dd159d1 622# eof