5 $UnicodeData = "Unicode.txt";
6 $SyllableData = "syllables.txt";
7 $PropData = "PropList.txt";
9 my $UnicodeLastHex = '10FFFF';
11 # Note: we try to keep filenames unique within first 8 chars. Using
12 # subdirectories for the following helps.
20 # 005F: SPACING UNDERSCROE
21 ['IsWord', '$cat =~ /^[LMN]/ or $code eq "005F"', ''],
22 ['IsAlnum', '$cat =~ /^[LMN]/', ''],
23 ['IsAlpha', '$cat =~ /^[LM]/', ''],
24 # 0009: HORIZONTAL TABULATION
26 # 000B: VERTICAL TABULATION
28 # 000D: CARRIAGE RETURN
30 ['IsSpace', '$cat =~ /^Z/ ||
31 $code =~ /^(0009|000A|000B|000C|000D)$/', ''],
34 $code =~ /^(0009|000A|000C|000D)$/', ''],
35 ['IsBlank', '$code =~ /^(0020|0009)$/ ||
36 $cat =~ /^Z[^lp]$/', ''],
37 ['IsDigit', '$cat =~ /^Nd$/', ''],
38 ['IsUpper', '$cat =~ /^L[ut]$/', ''],
39 ['IsLower', '$cat =~ /^Ll$/', ''],
40 ['IsASCII', '$code le "007f"', ''],
41 ['IsCntrl', '$cat =~ /^C/', ''],
42 ['IsGraph', '$cat =~ /^([LMNPS]|Co)/', ''],
43 ['IsPrint', '$cat =~ /^([LMNPS]|Co|Zs)/', ''],
44 ['IsPunct', '$cat =~ /^P/', ''],
45 # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f
46 ['IsXDigit', '$code =~ /^00(3[0-9]|[46][1-6])$/', ''],
47 ['ToUpper', '$up', '$up'],
48 ['ToLower', '$down', '$down'],
49 ['ToTitle', '$title', '$title'],
50 ['ToDigit', '$dec ne ""', '$dec'],
54 ['Name', '$name', '$name'],
58 ['Category', '$cat', '$cat'],
62 ['IsM', '$cat =~ /^M/', ''], # Mark
63 ['IsMn', '$cat eq "Mn"', ''], # Mark, Non-Spacing
64 ['IsMc', '$cat eq "Mc"', ''], # Mark, Combining
65 ['IsMe', '$cat eq "Me"', ''], # Mark, Enclosing
67 ['IsN', '$cat =~ /^N/', ''], # Number
68 ['IsNd', '$cat eq "Nd"', ''], # Number, Decimal Digit
69 ['IsNo', '$cat eq "No"', ''], # Number, Other
70 ['IsNl', '$cat eq "Nl"', ''], # Number, Letter
72 ['IsZ', '$cat =~ /^Z/', ''], # Separator
73 ['IsZs', '$cat eq "Zs"', ''], # Separator, Space
74 ['IsZl', '$cat eq "Zl"', ''], # Separator, Line
75 ['IsZp', '$cat eq "Zp"', ''], # Separator, Paragraph
77 ['IsC', '$cat =~ /^C/', ''], # Crazy
78 ['IsCc', '$cat eq "Cc"', ''], # Other, Control or Format
79 ['IsCo', '$cat eq "Co"', ''], # Other, Private Use
80 ['IsCn', '$cat eq "Cn"', ''], # Other, Not Assigned
81 ['IsCf', '$cat eq "Cf"', ''], # Other, Format
82 ['IsCs', '$cat eq "Cs"', ''], # Other, Surrogate
83 ['IsCn', 'Unassigned Code Value',$PropData], # Other, Not Assigned
87 ['IsL', '$cat =~ /^L/', ''], # Letter
88 ['IsLu', '$cat eq "Lu"', ''], # Letter, Uppercase
89 ['IsLl', '$cat eq "Ll"', ''], # Letter, Lowercase
90 ['IsLt', '$cat eq "Lt"', ''], # Letter, Titlecase
91 ['IsLm', '$cat eq "Lm"', ''], # Letter, Modifier
92 ['IsLo', '$cat eq "Lo"', ''], # Letter, Other
94 ['IsP', '$cat =~ /^P/', ''], # Punctuation
95 ['IsPd', '$cat eq "Pd"', ''], # Punctuation, Dash
96 ['IsPs', '$cat eq "Ps"', ''], # Punctuation, Open
97 ['IsPe', '$cat eq "Pe"', ''], # Punctuation, Close
98 ['IsPo', '$cat eq "Po"', ''], # Punctuation, Other
99 ['IsPc', '$cat eq "Pc"', ''], # Punctuation, Connector
100 ['IsPi', '$cat eq "Pi"', ''], # Punctuation, Initial quote
101 ['IsPf', '$cat eq "Pf"', ''], # Punctuation, Final quote
103 ['IsS', '$cat =~ /^S/', ''], # Symbol
104 ['IsSm', '$cat eq "Sm"', ''], # Symbol, Math
105 ['IsSk', '$cat eq "Sk"', ''], # Symbol, Modifier
106 ['IsSc', '$cat eq "Sc"', ''], # Symbol, Currency
107 ['IsSo', '$cat eq "So"', ''], # Symbol, Other
110 ['CombiningClass', '$comb', '$comb'],
112 # BIDIRECTIONAL PROPERTIES
114 ['Bidirectional', '$bid', '$bid'],
118 ['IsBidiL', '$bid eq "L"', ''], # Left-Right; Most alphabetic,
119 # syllabic, and logographic
120 # characters (e.g., CJK
122 ['IsBidiR', '$bid eq "R"', ''], # Right-Left; Arabic, Hebrew,
123 # and punctuation specific to
126 ['IsBidiLRE', '$bid eq "LRE"', ''], # Left-to-Right Embedding
127 ['IsBidiLRO', '$bid eq "LRO"', ''], # Left-to-Right Override
128 ['IsBidiAL', '$bid eq "AL"', ''], # Right-to-Left Arabic
129 ['IsBidiRLE', '$bid eq "RLE"', ''], # Right-to-Left Embedding
130 ['IsBidiRLO', '$bid eq "RLO"', ''], # Right-to-Left Override
131 ['IsBidiPDF', '$bid eq "PDF"', ''], # Pop Directional Format
132 ['IsBidiNSM', '$bid eq "NSM"', ''], # Non-Spacing Mark
133 ['IsBidiBN', '$bid eq "BN"', ''], # Boundary Neutral
137 ['IsBidiEN','$bid eq "EN"', ''], # European Number
138 ['IsBidiES','$bid eq "ES"', ''], # European Number Separator
139 ['IsBidiET','$bid eq "ET"', ''], # European Number Terminator
140 ['IsBidiAN','$bid eq "AN"', ''], # Arabic Number
141 ['IsBidiCS','$bid eq "CS"', ''], # Common Number Separator
145 ['IsBidiB', '$bid eq "B"', ''], # Block Separator
146 ['IsBidiS', '$bid eq "S"', ''], # Segment Separator
150 ['IsBidiWS','$bid eq "WS"', ''], # Whitespace
151 ['IsBidiON','$bid eq "ON"', ''], # Other Neutrals ; All other
152 # characters: punctuation,
157 ['Decomposition', '$decomp', '$decomp'],
158 ['IsDecoCanon', '$decomp && $decomp !~ /^</', ''],
159 ['IsDecoCompat', '$decomp =~ /^</', ''],
160 ['IsDCfont', '$decomp =~ /^<font>/', ''],
161 ['IsDCnoBreak', '$decomp =~ /^<noBreak>/', ''],
162 ['IsDCinitial', '$decomp =~ /^<initial>/', ''],
163 ['IsDCmedial', '$decomp =~ /^<medial>/', ''],
164 ['IsDCfinal', '$decomp =~ /^<final>/', ''],
165 ['IsDCisolated', '$decomp =~ /^<isolated>/', ''],
166 ['IsDCcircle', '$decomp =~ /^<circle>/', ''],
167 ['IsDCsuper', '$decomp =~ /^<super>/', ''],
168 ['IsDCsub', '$decomp =~ /^<sub>/', ''],
169 ['IsDCvertical', '$decomp =~ /^<vertical>/', ''],
170 ['IsDCwide', '$decomp =~ /^<wide>/', ''],
171 ['IsDCnarrow', '$decomp =~ /^<narrow>/', ''],
172 ['IsDCsmall', '$decomp =~ /^<small>/', ''],
173 ['IsDCsquare', '$decomp =~ /^<square>/', ''],
174 ['IsDCfraction', '$decomp =~ /^<fraction>/', ''],
175 ['IsDCcompat', '$decomp =~ /^<compat>/', ''],
179 ['Number', '$num ne ""', '$num'],
183 ['IsMirrored', '$mir eq "Y"', ''],
187 ['ArabLink', '1', '$link'],
188 ['ArabLnkGrp', '1', '$linkgroup'],
192 ['JamoShort', '1', '$short'],
198 # Line break properties - Normative
200 ['IsLbrkBK','$brk eq "BK"', ''], # Mandatory Break
201 ['IsLbrkCR','$brk eq "CR"', ''], # Carriage Return
202 ['IsLbrkLF','$brk eq "LF"', ''], # Line Feed
203 ['IsLbrkCM','$brk eq "CM"', ''], # Attached Characters and Combining Marks
204 ['IsLbrkSG','$brk eq "SG"', ''], # Surrogates
205 ['IsLbrkGL','$brk eq "GL"', ''], # Non-breaking (Glue)
206 ['IsLbrkCB','$brk eq "CB"', ''], # Contingent Break Opportunity
207 ['IsLbrkSP','$brk eq "SP"', ''], # Space
208 ['IsLbrkZW','$brk eq "ZW"', ''], # Zero Width Space
210 # Line break properties - Informative
211 ['IsLbrkXX','$brk eq "XX"', ''], # Unknown
212 ['IsLbrkOP','$brk eq "OP"', ''], # Opening Punctuation
213 ['IsLbrkCL','$brk eq "CL"', ''], # Closing Punctuation
214 ['IsLbrkQU','$brk eq "QU"', ''], # Ambiguous Quotation
215 ['IsLbrkNS','$brk eq "NS"', ''], # Non Starter
216 ['IsLbrkEX','$brk eq "EX"', ''], # Exclamation/Interrogation
217 ['IsLbrkSY','$brk eq "SY"', ''], # Symbols Allowing Breaks
218 ['IsLbrkIS','$brk eq "IS"', ''], # Infix Separator (Numeric)
219 ['IsLbrkPR','$brk eq "PR"', ''], # Prefix (Numeric)
220 ['IsLbrkPO','$brk eq "PO"', ''], # Postfix (Numeric)
221 ['IsLbrkNU','$brk eq "NU"', ''], # Numeric
222 ['IsLbrkAL','$brk eq "AL"', ''], # Ordinary Alphabetic and Symbol Characters
223 ['IsLbrkID','$brk eq "ID"', ''], # Ideographic
224 ['IsLbrkIN','$brk eq "IN"', ''], # Inseparable
225 ['IsLbrkHY','$brk eq "HY"', ''], # Hyphen
226 ['IsLbrkBB','$brk eq "BB"', ''], # Break Opportunity Before
227 ['IsLbrkBA','$brk eq "BA"', ''], # Break Opportunity After
228 ['IsLbrkSA','$brk eq "SA"', ''], # Complex Context (South East Asian)
229 ['IsLbrkAI','$brk eq "AI"', ''], # Ambiguous (Alphabetic or Ideographic)
230 ['IsLbrkB2','$brk eq "B2"', ''], # Break Opportunity Before and After
233 # This is not written for speed...
239 foreach $file (@todo) {
240 my ($table, $wanted, $val) = @$file;
241 next if @ARGV and not grep { $_ eq $table } @ARGV;
244 if ($table =~ /^(Is|To)(.+)/) {
245 open(OUT, ">$1/$2.pl") or die "Can't create $1/$2.pl: $!\n";
248 open(OUT, ">$table.pl") or die "Can't create $table.pl: $!\n";
251 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
252 # This file is built by $0 from e.g. $UnicodeData.
253 # Any changes made here will be lost!
258 print OUT proplist($table, $wanted, $val);
264 open(UD, 'Scripts.txt') or die "Can't open Scripts.txt: $!\n";
265 open(OUT, ">Scripts.pl") or die "Can't create Scripts.pl: $!\n";
267 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
268 # This file is built by $0 from e.g. $UnicodeData.
269 # Any changes made here will be lost!
283 ($code, $last, $name) = /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s+;\s+(.+)\s+\#/i;
287 unless (exists $InIdScript{$InName}) {
289 $id = $Scripts{$InName} = $InIdScript{$InName} = $InId++;
290 open(SCRIPT, ">In/$id.pl") or die "create In/$id.pl: $!\n";
292 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
293 # This file is built by $0 from e.g. $UnicodeData.
294 # Any changes made here will be lost!
299 $id = $InIdScript{$InName};
301 $last = "" unless defined $last;
302 print OUT "$code\t$last\t$name\t# In/$id.pl\n";
303 open(SCRIPT, ">>In/$id.pl");
309 my $firsti = hex($code);
310 my $lasti = $last ? hex($last) : $firsti;
311 for my $i ($firsti..$lasti) {
312 vec($ScriptsVec, $i, 1) = 1;
314 $lastlast = $lasti if $lasti > $lastlast;
315 print "\t\t$code..$last\n";
318 for my $id (values %InIdScript) {
319 open(SCRIPT, ">>In/$id.pl");
329 # Must treat blocks specially.
331 exit if @ARGV and not grep { $_ eq Block } @ARGV;
333 open(UD, 'Blocks.txt') or die "Can't open Blocks.txt: $!\n";
334 open(OUT, ">Blocks.pl") or die "Can't create Blocks.pl: $!\n";
336 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
337 # This file is built by $0 from e.g. $UnicodeData.
338 # Any changes made here will be lost!
348 ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+?)\s*$/i;
353 # TODO: only the first one of Private Use blocks qualifies
354 unless (exists $InIdBlock{$InName}) {
355 $InIdBlock{$InName} = $InId++;
357 $id = $InIdBlock{$InName};
358 open(BLOCK, ">In/$id.pl") or die "create In/$id.pl: $!\n";
359 print OUT "$code\t$last\t$name\t# In/$id.pl\n";
361 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
362 # This file is built by $0 from e.g. $UnicodeData.
363 # Any changes made here will be lost!
365 print BLOCK <<"END2";
378 # \p{Common} is any code point not assigned to a script
383 sub flush_zero_range {
385 if (defined $first) {
387 $last = $last == $first ? "" : sprintf("%04x", $last);
388 printf SCRIPT "%04x\t$last\n", $first;
389 printf "\t\t%04x..$last\n", $first;
395 my $CommonId = $Scripts{Common} = $InIdScript{Common} = $InId++;
396 open(SCRIPT, ">In/$CommonId.pl") or die "create In/$CommonId.pl: $!\n";
398 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
399 # This file is built by $0 from e.g. $UnicodeData.
400 # Any changes made here will be lost!
405 for my $i (0..$lastlast) {
406 if (vec($ScriptsVec, $i, 1)) {
407 defined $first && flush_zero_range($i);
409 $first = $i unless defined $first;
412 flush_zero_range($lastlast+1);
413 print SCRIPT "END\n";
417 # \p{Any} is 0..10FFFF (in Unicode 3.1.1)
421 my $AnyId = $Scripts{Any} = $InIdScript{Any} = $InId++;
422 open(SCRIPT, ">In/$AnyId.pl") or die "create In/$AnyId.pl: $!\n";
424 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
425 # This file is built by $0 from e.g. $UnicodeData.
426 # Any changes made here will be lost!
434 open(UD, 'PropList.txt') or die "Can't open PropList.txt: $!\n";
441 ($code, $last, $name) = /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s+; (\w+)\s/i;
442 $last = "" unless defined $last;
446 unless (exists $InIdScript{$InName}) {
448 print PROP <<EOH if defined $InIdProp;
451 $id = $InIdProp = $InIdScript{$InName} = $InId++;
452 open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n";
454 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
455 # This file is built by $0 from e.g. $UnicodeData.
456 # Any changes made here will be lost!
460 $id = $InIdScript{$InName};
461 print PROP "\L$code\t\L$last\n";
462 if ($InName eq 'Noncharacter_Code_Point') {
463 my $firsti = hex($code);
464 my $lasti = $last ? hex($last) : $firsti;
465 for my $i ($firsti..$lasti) {
466 vec($CnVec, $i, 1) = 1;
473 print "\tAssigned\n";
474 my $AssignedId = $Scripts{Assigned} = $InIdScript{Assigned} = $InId++;
475 open(SCRIPT, ">In/$AssignedId.pl") or die "create In/$AssignedId.pl: $!\n";
477 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
478 # This file is built by $0 from e.g. $UnicodeData.
479 # Any changes made here will be lost!
484 for my $i (0..hex($UnicodeLastHex)) {
485 if (vec($CnVec, $i, 1)) {
486 defined $first && flush_zero_range($i);
488 $first = $i unless defined $first;
491 flush_zero_range(hex($UnicodeLastHex)+1);
492 print SCRIPT "END\n";
495 # \p{Alphabetic} is \pL and \p{Other_Alphabetic}
498 print "\tAlphabetic\n";
500 push @Alphabetic, split(/\n/, do "Is/L.pl");
501 push @Alphabetic, split(/\n/, do "In/$InIdScript{Other_Alphabetic}.pl");
502 $id = $InIdScript{Alphabetic} = $InId++;
503 open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n";
505 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
506 # This file is built by $0 from e.g. $UnicodeData.
507 # Any changes made here will be lost!
510 for (sort { hex($a) <=> hex($b) } @Alphabetic) {
518 # \p{Lowercase} is \p{Ll} and \p{Other_Lowercase}
521 print "\tLowercase\n";
523 push @Lowercase, split(/\n/, do "Is/Ll.pl");
524 push @Lowercase, split(/\n/, do "In/$InIdScript{Other_Lowercase}.pl");
525 $id = $InIdScript{Lowercase} = $InId++;
526 open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n";
528 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
529 # This file is built by $0 from e.g. $UnicodeData.
530 # Any changes made here will be lost!
533 for (sort { hex($a) <=> hex($b) } @Lowercase) {
541 # \p{Uppercase} is \p{Lu} and \p{Other_Uppercase}
544 print "\tUppercase\n";
546 push @Uppercase, split(/\n/, do "Is/Lu.pl");
547 push @Uppercase, split(/\n/, do "In/$InIdScript{Other_Uppercase}.pl");
548 $id = $InIdScript{Uppercase} = $InId++;
549 open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n";
551 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
552 # This file is built by $0 from e.g. $UnicodeData.
553 # Any changes made here will be lost!
556 for (sort { hex($a) <=> hex($b) } @Uppercase) {
564 # \p{Math} is \p{Sm} and \p{Other_Math}
569 push @Math, split(/\n/, do "Is/Sm.pl");
570 push @Math, split(/\n/, do "In/$InIdScript{Other_Math}.pl");
571 $id = $InIdScript{Math} = $InId++;
572 open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n";
574 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
575 # This file is built by $0 from e.g. $UnicodeData.
576 # Any changes made here will be lost!
579 for (sort { hex($a) <=> hex($b) } @Math) {
587 # \p{L&} is \p{Ll}, \p{Lu} and \p{Lt}
590 print "\tLampersand\n";
592 push @Lampersand, split(/\n/, do "Is/Ll.pl");
593 push @Lampersand, split(/\n/, do "Is/Lu.pl");
594 push @Lampersand, split(/\n/, do "Is/Lt.pl");
595 $id = $InIdScript{Lampersand} = $InId++;
596 open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n";
598 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
599 # This file is built by $0 from e.g. $UnicodeData.
600 # Any changes made here will be lost!
603 for (sort { hex($a) <=> hex($b) } @Lampersand) {
611 # \p{ID_Start} is \p{Ll}, \p{Lu}, \p{Lt}, \p{Lm}, \p{Lo}, and \p{Nl}
614 print "\tID_Start\n";
616 push @ID_Start, split(/\n/, do "Is/Ll.pl");
617 push @ID_Start, split(/\n/, do "Is/Lu.pl");
618 push @ID_Start, split(/\n/, do "Is/Lt.pl");
619 push @ID_Start, split(/\n/, do "Is/Lm.pl");
620 push @ID_Start, split(/\n/, do "Is/Lo.pl");
621 push @ID_Start, split(/\n/, do "Is/Nl.pl");
622 $id = $InIdScript{ID_Start} = $InId++;
623 open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n";
625 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
626 # This file is built by $0 from e.g. $UnicodeData.
627 # Any changes made here will be lost!
630 for (sort { hex($a) <=> hex($b) } @ID_Start) {
638 # \p{ID_Continue} is \p{ID_Start}, \p{Mn}, \p{Mc}, \p{Nd}, and \p{Pc}
641 print "\tID_Continue\n";
643 push @ID_Continue, split(/\n/, do "In/$InIdScript{ID_Start}.pl");
644 push @ID_Continue, split(/\n/, do "Is/Mn.pl");
645 push @ID_Continue, split(/\n/, do "Is/Mc.pl");
646 push @ID_Continue, split(/\n/, do "Is/Nd.pl");
647 push @ID_Continue, split(/\n/, do "Is/Pc.pl");
648 $id = $InIdScript{ID_Continue} = $InId++;
649 open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n";
651 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
652 # This file is built by $0 from e.g. $UnicodeData.
653 # Any changes made here will be lost!
656 for (sort { hex($a) <=> hex($b) } @ID_Continue) {
663 open(INID, ">In.pl");
666 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
667 # This file is built by $0 from e.g. $UnicodeData.
668 # Any changes made here will be lost!
672 my %InIdScriptById = reverse %InIdScript;
673 my %InIdBlockById = reverse %InIdBlock;
675 my @InIdScriptById = sort { $a <=> $b } keys %InIdScriptById;
676 my @InIdBlockById = sort { $a <=> $b } keys %InIdBlockById;
681 for my $id (@InIdScriptById) {
682 my $name = $InIdScriptById{$id};
683 my $lcname = lc($name);
685 $IdIdLcName{$lcname} = $id;
688 for my $id (@InIdBlockById) {
689 my $name = $InIdBlockById{$id};
690 my $lcname = lc($name);
691 if (exists $IdIdLcName{$lcname}) {
692 $InId{"$name Block"} = $id;
696 $IdIdLcName{$lcname} = $id;
699 my @InId = sort { $InId{$a} <=> $InId{$b} } keys %InId;
703 foreach my $in (@InId) {
705 $inpat =~ s/([- _])/(?:[-_]|\\s+)?/g;
706 my $inprefix = lc(substr($in, 0, 2));
707 push @{$InIdPrefix{$inprefix}}, [ $in, $inpat ];
708 printf INID "%-45s => %3d,\n", "'$in'", $InId{$in};
717 foreach my $prefix (sort keys %InIdPrefix) {
718 printf INID "'$prefix' => {\n";
719 foreach my $ininpat (@{$InIdPrefix{$prefix}}) {
720 my ($in, $inpat) = @$ininpat;
721 printf INID "\t'$inpat' => '$in',\n";
730 ##################################################
733 my ($table, $wanted, $val) = @_;
738 return listFromPropFile($wanted) if $val eq $PropData;
740 if ($table =~ /^Arab/) {
741 open(UD, "ArabShap.txt") or warn "Can't open $table: $!";
743 $split = '($code, $name, $link, $linkgroup) = split(/; */);';
745 elsif ($table =~ /^Jamo/) {
746 open(UD, "Jamo.txt") or warn "Can't open $table: $!";
748 $split = '($code, $short, $name) = split(/; */); $code =~ s/^U\+//;';
750 elsif ($table =~ /^IsSyl/) {
751 open(UD, $SyllableData) or warn "Can't open $table: $!";
753 $split = '($code, $short, $syl) = split(/; */); $code =~ s/^U\+//;';
755 elsif ($table =~ /^IsLbrk/) {
756 open(UD, "LineBrk.txt") or warn "Can't open $table: $!";
758 $split = '($code, $brk, $name) = /^([0-9a-f]+);(\w+) # (.+)/i;';
761 open(UD, $UnicodeData) or warn "Can't open $UnicodeData: $!";
763 $split = '($code, $name, $cat, $comb, $bid, $decomp, $dec, $dig, $num, $mir, $uni1,
764 $comment, $up, $down, $title) = split(/;/);';
767 if ($table =~ /^(?:To|Is)[A-Z]/) {
775 push(\@wanted, [hex \$code, hex $val, \$name =~ /, First>\$/]);
782 $beg = shift @wanted;
784 while (@wanted and $wanted[0]->[0] == $last->[0] + 1 and
785 (not $val or $wanted[0]->[1] == $last->[1] + 1)) {
786 $last = shift @wanted;
788 $out .= sprintf "%04x", $beg->[0];
790 $last = shift @wanted;
796 $out .= sprintf "\t%04x", $last->[0];
798 $out .= sprintf "\t%04x", $beg->[1] if $val;
810 push(\@wanted, [hex \$code, $val, \$name =~ /, First>\$/]);
817 $beg = shift @wanted;
819 while (@wanted and $wanted[0]->[0] == $last->[0] + 1 and
820 ($wanted[0]->[1] eq $last->[1])) {
821 $last = shift @wanted;
823 $out .= sprintf "%04x", $beg->[0];
825 $last = shift @wanted;
831 $out .= sprintf "\t%04x", $last->[0];
833 $out .= sprintf "\t%s\n", $beg->[1];
839 sub listFromPropFile {
843 open (UD, $PropData) or die "Can't open $PropData: $!\n";
844 local($/) = "\n" . '*' x 43 . "\n\nProperty dump for:"; # not 42?
849 if (s/0x[\d\w]+\s+\((.*?)\)// and $wanted eq $1) {
866 open (SD, $SyllableData) or die "Can't open $SyllableData: $!\n";
870 ($code, $name, $syl) = split /; */;
872 push (@defs, ["IsSyl$syl", qq{\$syl eq "$syl"}, ''])
873 unless $seen{$syl}++;