disable problematical 'uninitialized value' warning tests
[p5sagit/p5-mst-13.2.git] / lib / unicore / mktables
CommitLineData
d73e5302 1#!/usr/bin/perl -w
d73e5302 2use strict;
cf25bb62 3use Carp;
5beb625e 4
cf25bb62 5##
6## mktables -- create the runtime Perl Unicode files (lib/unicore/**/*.pl)
7## from the Unicode database files (lib/unicore/*.txt).
8##
d2d499f5 9
5beb625e 10mkdir("lib", 0755);
12ac2576 11mkdir("lib/gc_sc", 0755);
5beb625e 12mkdir("To", 0755);
d73e5302 13
cf25bb62 14##
15## Process any args.
16##
5beb625e 17my $Verbose = 0;
18my $MakeTestScript = 0;
cf25bb62 19
20while (@ARGV)
21{
22 my $arg = shift @ARGV;
23 if ($arg eq '-v') {
24 $Verbose = 1;
25 } elsif ($arg eq '-q') {
26 $Verbose = 0;
5beb625e 27 } elsif ($arg eq '-maketest') {
28 $MakeTestScript = 1;
d73e5302 29 } else {
5beb625e 30 die "usage: $0 [-v|-q] [-maketest]";
d73e5302 31 }
32}
33
cf25bb62 34my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1.
d73e5302 35
cf25bb62 36my $HEADER=<<"EOF";
d73e5302 37# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
551b6b6f 38# This file is built by $0 from e.g. UnicodeData.txt.
d73e5302 39# Any changes made here will be lost!
cf25bb62 40
41EOF
42
5beb625e 43
44##
45## Given a filename and a reference to an array of lines,
46## write the lines to the file only if the contents have not changed.
47##
48sub WriteIfChanged($\@)
49{
50 my $file = shift;
51 my $lines = shift;
52
53 my $TextToWrite = join '', @$lines;
54 if (open IN, $file) {
55 local($/) = undef;
56 my $PreviousText = <IN>;
57 close IN;
58 if ($PreviousText eq $TextToWrite) {
59 print "$file unchanged.\n" if $Verbose;
60 return;
61 }
62 }
63 if (not open OUT, ">$file") {
64 die "$0: can't open $file for output: $!\n";
65 }
66 print "$file written.\n" if $Verbose;
67
68 print OUT $TextToWrite;
69 close OUT;
70}
71
cf25bb62 72##
73## The main datastructure (a "Table") represents a set of code points that
74## are part of a particular quality (that are part of \pL, \p{InGreek},
75## etc.). They are kept as ranges of code points (starting and ending of
76## each range).
77##
78## For example, a range ASCII LETTERS would be represented as:
79## [ [ 0x41 => 0x5A, 'UPPER' ],
80## [ 0x61 => 0x7A, 'LOWER, ] ]
81##
82sub RANGE_START() { 0 } ## index into range element
83sub RANGE_END() { 1 } ## index into range element
84sub RANGE_NAME() { 2 } ## index into range element
85
99598c8c 86## Conceptually, these should really be folded into the 'Table' objects
cf25bb62 87my %TableInfo;
99598c8c 88my %TableDesc;
cf25bb62 89my %FuzzyNames;
90my %AliasInfo;
5beb625e 91my %CanonicalToOrig;
cf25bb62 92
93##
99598c8c 94## Turn something like
95## OLD-ITALIC
5beb625e 96## into
99598c8c 97## OldItalic
98##
99sub CanonicalName($)
100{
5beb625e 101 my $orig = shift;
102 my $name = lc $orig;
99598c8c 103 $name =~ s/(?<![a-z])(\w)/\u$1/g;
5beb625e 104 $name =~ s/[-_\s]+//g;
99598c8c 105
5beb625e 106 $CanonicalToOrig{$name} = $orig if not $CanonicalToOrig{$name};
99598c8c 107 return $name;
108}
109
12ac2576 110
111##
112## Store the alias definitions for later use.
113##
114my %PropertyAlias;
115my %PropValueAlias;
116
117my %PA_reverse;
118my %PVA_reverse;
119
120sub Build_Aliases()
121{
122 ##
123 ## Most of the work with aliases doesn't occur here,
124 ## but rather in utf8_heavy.pl, which uses utf8_pva.pl,
125 ## which contains just this function. However, this one
126 ##
127 ## -- japhy (2004/04/13)
128
129 open PA, "< PropertyAliases.txt"
130 or confess "Can't open PropertyAliases.txt: $!";
131 while (<PA>) {
132 s/#.*//;
133 s/\s+$//;
134 next if /^$/;
135
136 my ($abbrev, $name) = split /\s*;\s*/;
137 next if $abbrev eq "n/a";
138 $PropertyAlias{$abbrev} = $name;
139 $PA_reverse{$name} = $abbrev;
140 }
141 close PA;
142
143 open PVA, "< PropValueAliases.txt"
144 or confess "Can't open PropValueAliases.txt: $!";
145 while (<PVA>) {
146 s/#.*//;
147 s/\s+$//;
148 next if /^$/;
149
150 my ($prop, @data) = split /\s*;\s*/;
151
152 if ($prop eq 'ccc') {
153 $PropValueAlias{$prop}{$data[1]} = [ @data[0,2] ];
154 $PVA_reverse{$prop}{$data[2]} = [ @data[0,1] ];
155 }
156 else {
157 next if $data[0] eq "n/a";
158 $PropValueAlias{$prop}{$data[0]} = $data[1];
159 $PVA_reverse{$prop}{$data[1]} = $data[0];
160 }
161 }
162 close PVA;
163}
164
165
99598c8c 166##
cf25bb62 167## Associates a property ("Greek", "Lu", "Assigned",...) with a Table.
168##
169## Called like:
99598c8c 170## New_Prop(In => 'Greek', $Table, Desc => 'Greek Block', Fuzzy => 1);
cf25bb62 171##
172## Normally, these parameters are set when the Table is created (when the
173## Table->New constructor is called), but there are times when it needs to
174## be done after-the-fact...)
175##
176sub New_Prop($$$@)
177{
178 my $Type = shift; ## "Is" or "In";
179 my $Name = shift;
180 my $Table = shift;
181
182 ## remaining args are optional key/val
183 my %Args = @_;
184
99598c8c 185 my $Fuzzy = delete $Args{Fuzzy};
186 my $Desc = delete $Args{Desc}; # description
187
5beb625e 188 $Name = CanonicalName($Name) if $Fuzzy;
cf25bb62 189
190 ## sanity check a few args
191 if (%Args or ($Type ne 'Is' and $Type ne 'In') or not ref $Table) {
192 confess "$0: bad args to New_Prop"
193 }
194
195 if (not $TableInfo{$Type}->{$Name})
196 {
197 $TableInfo{$Type}->{$Name} = $Table;
99598c8c 198 $TableDesc{$Type}->{$Name} = $Desc;
199 if ($Fuzzy) {
cf25bb62 200 $FuzzyNames{$Type}->{$Name} = $Name;
201 }
202 }
d73e5302 203}
204
d73e5302 205
cf25bb62 206##
207## Creates a new Table object.
208##
209## Args are key/value pairs:
99598c8c 210## In => Name -- Name of "In" property to be associated with
211## Is => Name -- Name of "Is" property to be associated with
212## Fuzzy => Boolean -- True if name can be accessed "fuzzily"
213## Desc => String -- Description of the property
cf25bb62 214##
215## No args are required.
216##
217sub Table::New
218{
219 my $class = shift;
220 my %Args = @_;
221
222 my $Table = bless [], $class;
223
99598c8c 224 my $Fuzzy = delete $Args{Fuzzy};
225 my $Desc = delete $Args{Desc};
cf25bb62 226
227 for my $Type ('Is', 'In')
228 {
229 if (my $Name = delete $Args{$Type}) {
99598c8c 230 New_Prop($Type => $Name, $Table, Desc => $Desc, Fuzzy => $Fuzzy);
cf25bb62 231 }
232 }
233
234 ## shouldn't have any left over
235 if (%Args) {
236 confess "$0: bad args to Table->New"
237 }
238
239 return $Table;
d73e5302 240}
241
cf25bb62 242##
243## Returns true if the Table has no code points
244##
245sub Table::IsEmpty
246{
247 my $Table = shift; #self
248 return not @$Table;
d73e5302 249}
250
cf25bb62 251##
252## Returns true if the Table has code points
253##
254sub Table::NotEmpty
255{
256 my $Table = shift; #self
257 return @$Table;
d73e5302 258}
259
cf25bb62 260##
261## Returns the maximum code point currently in the table.
262##
263sub Table::Max
264{
265 my $Table = shift; #self
266 confess "oops" if $Table->IsEmpty; ## must have code points to have a max
267 return $Table->[-1]->[RANGE_END];
268}
d73e5302 269
cf25bb62 270##
271## Replaces the codepoints in the Table with those in the Table given
272## as an arg. (NOTE: this is not a "deep copy").
273##
274sub Table::Replace($$)
275{
276 my $Table = shift; #self
277 my $New = shift;
d73e5302 278
cf25bb62 279 @$Table = @$New;
280}
71d929cb 281
cf25bb62 282##
283## Given a new code point, make the last range of the Table extend to
284## include the new (and all intervening) code points.
285##
286sub Table::Extend
287{
288 my $Table = shift; #self
289 my $codepoint = shift;
d73e5302 290
cf25bb62 291 my $PrevMax = $Table->Max;
e904f995 292
cf25bb62 293 confess "oops ($codepoint <= $PrevMax)" if $codepoint <= $PrevMax;
e904f995 294
cf25bb62 295 $Table->[-1]->[RANGE_END] = $codepoint;
296}
c3a8a2b8 297
cf25bb62 298##
299## Given a code point range start and end (and optional name), blindly
300## append them to the list of ranges for the Table.
301##
302## NOTE: Code points must be added in strictly ascending numeric order.
303##
304sub Table::RawAppendRange
305{
306 my $Table = shift; #self
307 my $start = shift;
308 my $end = shift;
309 my $name = shift;
310 $name = "" if not defined $name; ## warning: $name can be "0"
311
312 push @$Table, [ $start, # RANGE_START
313 $end, # RANGE_END
314 $name ]; # RANGE_NAME
315}
c3a8a2b8 316
cf25bb62 317##
318## Given a code point (and optional name), add it to the Table.
319##
320## NOTE: Code points must be added in strictly ascending numeric order.
321##
322sub Table::Append
323{
324 my $Table = shift; #self
325 my $codepoint = shift;
326 my $name = shift;
327 $name = "" if not defined $name; ## warning: $name can be "0"
328
329 ##
330 ## If we've already got a range working, and this code point is the next
331 ## one in line, and if the name is the same, just extend the current range.
332 ##
333 if ($Table->NotEmpty
334 and
335 $Table->Max == $codepoint - 1
336 and
337 $Table->[-1]->[RANGE_NAME] eq $name)
338 {
339 $Table->Extend($codepoint);
c3a8a2b8 340 }
cf25bb62 341 else
342 {
343 $Table->RawAppendRange($codepoint, $codepoint, $name);
d73e5302 344 }
cf25bb62 345}
d73e5302 346
cf25bb62 347##
348## Given a code point range starting value and ending value (and name),
349## Add the range to teh Table.
350##
351## NOTE: Code points must be added in strictly ascending numeric order.
352##
353sub Table::AppendRange
354{
355 my $Table = shift; #self
356 my $start = shift;
357 my $end = shift;
358 my $name = shift;
359 $name = "" if not defined $name; ## warning: $name can be "0"
360
361 $Table->Append($start, $name);
362 $Table->Extend($end) if $end > $start;
363}
d73e5302 364
cf25bb62 365##
366## Return a new Table that represents all code points not in the Table.
367##
368sub Table::Invert
369{
370 my $Table = shift; #self
371
372 my $New = Table->New();
373 my $max = -1;
374 for my $range (@$Table)
375 {
376 my $start = $range->[RANGE_START];
377 my $end = $range->[RANGE_END];
378 if ($start-1 >= $max+1) {
379 $New->AppendRange($max+1, $start-1, "");
380 }
381 $max = $end;
d73e5302 382 }
cf25bb62 383 if ($max+1 < $LastUnicodeCodepoint) {
384 $New->AppendRange($max+1, $LastUnicodeCodepoint);
d73e5302 385 }
cf25bb62 386 return $New;
387}
d73e5302 388
cf25bb62 389##
390## Merges any number of other tables with $self, returning the new table.
391## (existing tables are not modified)
392##
a3a8c5f0 393##
394## Args may be Tables, or individual code points (as integers).
395##
cf25bb62 396## Can be called as either a constructor or a method.
397##
398sub Table::Merge
399{
400 shift(@_) if not ref $_[0]; ## if called as a constructor, lose the class
401 my @Tables = @_;
402
403 ## Accumulate all records from all tables
404 my @Records;
a3a8c5f0 405 for my $Arg (@Tables)
406 {
407 if (ref $Arg) {
408 ## arg is a table -- get its ranges
409 push @Records, @$Arg;
410 } else {
411 ## arg is a codepoint, make a range
412 push @Records, [ $Arg, $Arg ]
413 }
d73e5302 414 }
415
cf25bb62 416 ## sort by range start, with longer ranges coming first.
417 my ($first, @Rest) = sort {
418 ($a->[RANGE_START] <=> $b->[RANGE_START])
419 or
420 ($b->[RANGE_END] <=> $b->[RANGE_END])
421 } @Records;
422
423 my $New = Table->New();
424
425 ## Ensuring the first range is there makes the subsequent loop easier
426 $New->AppendRange($first->[RANGE_START],
427 $first->[RANGE_END]);
428
429 ## Fold in records so long as they add new information.
430 for my $set (@Rest)
431 {
432 my $start = $set->[RANGE_START];
433 my $end = $set->[RANGE_END];
434 if ($start > $New->Max) {
435 $New->AppendRange($start, $end);
436 } elsif ($end > $New->Max) {
437 $New->Extend($end);
438 }
d73e5302 439 }
d73e5302 440
cf25bb62 441 return $New;
d73e5302 442}
443
cf25bb62 444##
445## Given a filename, write a representation of the Table to a file.
99598c8c 446## May have an optional comment as a 2nd arg.
cf25bb62 447##
448sub Table::Write
449{
99598c8c 450 my $Table = shift; #self
cf25bb62 451 my $filename = shift;
99598c8c 452 my $comment = shift;
d73e5302 453
5beb625e 454 my @OUT = $HEADER;
99598c8c 455 if (defined $comment) {
456 $comment =~ s/\s+\Z//;
457 $comment =~ s/^/# /gm;
5beb625e 458 push @OUT, "#\n$comment\n#\n";
99598c8c 459 }
5beb625e 460 push @OUT, "return <<'END';\n";
d73e5302 461
cf25bb62 462 for my $set (@$Table)
463 {
464 my $start = $set->[RANGE_START];
465 my $end = $set->[RANGE_END];
466 my $name = $set->[RANGE_NAME];
d73e5302 467
cf25bb62 468 if ($start == $end) {
5beb625e 469 push @OUT, sprintf "%04X\t\t%s\n", $start, $name;
cf25bb62 470 } else {
5beb625e 471 push @OUT, sprintf "%04X\t%04X\t%s\n", $start, $end, $name;
cf25bb62 472 }
473 }
d73e5302 474
5beb625e 475 push @OUT, "END\n";
476
477 WriteIfChanged($filename, @OUT);
478}
479
480## This used only for making the test script.
481## helper function
482sub IsUsable($)
483{
484 my $code = shift;
485 return 0 if $code <= 0x0000; ## don't use null
486 return 0 if $code >= $LastUnicodeCodepoint; ## keep in range
487 return 0 if ($code >= 0xD800 and $code <= 0xDFFF); ## no surrogates
488 return 0 if ($code >= 0xFDD0 and $code <= 0xFDEF); ## utf8.c says no good
489 return 0 if (($code & 0xFFFF) == 0xFFFE); ## utf8.c says no good
490 return 0 if (($code & 0xFFFF) == 0xFFFF); ## utf8.c says no good
491 return 1;
492}
493
494## Return a code point that's part of the table.
495## Returns nothing if the table is empty (or covers only surrogates).
496## This used only for making the test script.
497sub Table::ValidCode
498{
499 my $Table = shift; #self
500 for my $set (@$Table) {
501 return $set->[RANGE_END] if IsUsable($set->[RANGE_END]);
502 }
503 return ();
504}
505
506## Return a code point that's not part of the table
507## Returns nothing if the table covers all code points.
508## This used only for making the test script.
509sub Table::InvalidCode
510{
511 my $Table = shift; #self
512
513 return 0x1234 if $Table->IsEmpty();
514
515 for my $set (@$Table)
516 {
517 if (IsUsable($set->[RANGE_END] + 1))
518 {
519 return $set->[RANGE_END] + 1;
520 }
521
522 if (IsUsable($set->[RANGE_START] - 1))
523 {
524 return $set->[RANGE_START] - 1;
525 }
526 }
527 return ();
cf25bb62 528}
529
530###########################################################################
531###########################################################################
532###########################################################################
533
534
535##
536## Called like:
99598c8c 537## New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 1);
cf25bb62 538##
99598c8c 539## The args must be in that order, although the Fuzzy pair may be omitted.
cf25bb62 540##
541## This creates 'IsAll' as an alias for 'IsAny'
542##
543sub New_Alias($$$@)
544{
545 my $Type = shift; ## "Is" or "In"
546 my $Alias = shift;
99598c8c 547 my $SameAs = shift; # expecting "SameAs" -- just ignored
cf25bb62 548 my $Name = shift;
549
550 ## remaining args are optional key/val
551 my %Args = @_;
552
99598c8c 553 my $Fuzzy = delete $Args{Fuzzy};
cf25bb62 554
555 ## sanity check a few args
556 if (%Args or ($Type ne 'Is' and $Type ne 'In') or $SameAs ne 'SameAs') {
557 confess "$0: bad args to New_Alias"
d73e5302 558 }
559
5beb625e 560 $Alias = CanonicalName($Alias) if $Fuzzy;
561
562 if (not $TableInfo{$Type}->{$Name})
563 {
564 my $CName = CanonicalName($Name);
565 if ($TableInfo{$Type}->{$CName}) {
566 confess "$0: Use canonical form '$CName' instead of '$Name' for alias.";
567 } else {
12ac2576 568 confess "$0: don't have original $Type => $Name to make alias\n";
5beb625e 569 }
cf25bb62 570 }
571 if ($TableInfo{$Alias}) {
572 confess "$0: already have original $Type => $Alias; can't make alias";
d73e5302 573 }
cf25bb62 574 $AliasInfo{$Type}->{$Name} = $Alias;
99598c8c 575 if ($Fuzzy) {
cf25bb62 576 $FuzzyNames{$Type}->{$Alias} = $Name;
577 }
578
d73e5302 579}
580
d73e5302 581
cf25bb62 582## All assigned code points
99598c8c 583my $Assigned = Table->New(Is => 'Assigned',
584 Desc => "All assigned code points",
5beb625e 585 Fuzzy => 0);
d2d499f5 586
cf25bb62 587my $Name = Table->New(); ## all characters, individually by name
588my $General = Table->New(); ## all characters, grouped by category
589my %General;
590my %Cat;
d73e5302 591
cf25bb62 592##
551b6b6f 593## Process UnicodeData.txt (Categories, etc.)
cf25bb62 594##
44da8cae 595sub UnicodeData_Txt()
cf25bb62 596{
597 my $Bidi = Table->New();
598 my $Deco = Table->New();
599 my $Comb = Table->New();
600 my $Number = Table->New();
12ac2576 601 my $Mirrored = Table->New();#Is => 'Mirrored',
602 #Desc => "Mirrored in bidirectional text",
603 #Fuzzy => 0);
d73e5302 604
cf25bb62 605 my %DC;
606 my %Bidi;
12ac2576 607 my %Number;
608 $DC{can} = Table->New();
609 $DC{com} = Table->New();
cf25bb62 610
611 ## Initialize Perl-generated categories
551b6b6f 612 ## (Categories from UnicodeData.txt are auto-initialized in gencat)
44da8cae 613 $Cat{Alnum} =
614 Table->New(Is => 'Alnum', Desc => "[[:Alnum:]]", Fuzzy => 0);
615 $Cat{Alpha} =
616 Table->New(Is => 'Alpha', Desc => "[[:Alpha:]]", Fuzzy => 0);
617 $Cat{ASCII} =
618 Table->New(Is => 'ASCII', Desc => "[[:ASCII:]]", Fuzzy => 0);
619 $Cat{Blank} =
620 Table->New(Is => 'Blank', Desc => "[[:Blank:]]", Fuzzy => 0);
621 $Cat{Cntrl} =
622 Table->New(Is => 'Cntrl', Desc => "[[:Cntrl:]]", Fuzzy => 0);
623 $Cat{Digit} =
624 Table->New(Is => 'Digit', Desc => "[[:Digit:]]", Fuzzy => 0);
625 $Cat{Graph} =
626 Table->New(Is => 'Graph', Desc => "[[:Graph:]]", Fuzzy => 0);
627 $Cat{Lower} =
628 Table->New(Is => 'Lower', Desc => "[[:Lower:]]", Fuzzy => 0);
629 $Cat{Print} =
630 Table->New(Is => 'Print', Desc => "[[:Print:]]", Fuzzy => 0);
631 $Cat{Punct} =
632 Table->New(Is => 'Punct', Desc => "[[:Punct:]]", Fuzzy => 0);
633 $Cat{Space} =
634 Table->New(Is => 'Space', Desc => "[[:Space:]]", Fuzzy => 0);
635 $Cat{Title} =
636 Table->New(Is => 'Title', Desc => "[[:Title:]]", Fuzzy => 0);
637 $Cat{Upper} =
638 Table->New(Is => 'Upper', Desc => "[[:Upper:]]", Fuzzy => 0);
639 $Cat{XDigit} =
640 Table->New(Is => 'XDigit', Desc => "[[:XDigit:]]", Fuzzy => 0);
641 $Cat{Word} =
642 Table->New(Is => 'Word', Desc => "[[:Word:]]", Fuzzy => 0);
643 $Cat{SpacePerl} =
644 Table->New(Is => 'SpacePerl', Desc => '\s', Fuzzy => 0);
d73e5302 645
cf25bb62 646 my %To;
647 $To{Upper} = Table->New();
648 $To{Lower} = Table->New();
649 $To{Title} = Table->New();
650 $To{Digit} = Table->New();
651
652 sub gencat($$$$)
653 {
654 my ($name, ## Name ("LATIN CAPITAL LETTER A")
655 $cat, ## Category ("Lu", "Zp", "Nd", etc.)
656 $code, ## Code point (as an integer)
657 $op) = @_;
658
659 my $MajorCat = substr($cat, 0, 1); ## L, M, Z, S, etc
660
661 $Assigned->$op($code);
662 $Name->$op($code, $name);
663 $General->$op($code, $cat);
664
665 ## add to the sub category (e.g. "Lu", "Nd", "Cf", ..)
99598c8c 666 $Cat{$cat} ||= Table->New(Is => $cat,
667 Desc => "General Category '$cat'",
668 Fuzzy => 0);
cf25bb62 669 $Cat{$cat}->$op($code);
670
671 ## add to the major category (e.g. "L", "N", "C", ...)
99598c8c 672 $Cat{$MajorCat} ||= Table->New(Is => $MajorCat,
673 Desc => "Major Category '$MajorCat'",
674 Fuzzy => 0);
cf25bb62 675 $Cat{$MajorCat}->$op($code);
676
677 ($General{$name} ||= Table->New)->$op($code, $name);
678
679 # 005F: SPACING UNDERSCORE
d75d706f 680 $Cat{Word}->$op($code) if $cat =~ /^[LMN]|Pc/;
c65e4d19 681 $Cat{Alnum}->$op($code) if $cat =~ /^[LM]|Nd/;
cf25bb62 682 $Cat{Alpha}->$op($code) if $cat =~ /^[LM]/;
683
d75d706f 684 my $isspace =
685 ($cat =~ /Zs|Zl|Zp/ &&
686 $code != 0x200B) # 200B is ZWSP which is for line break control
687 # and therefore it is not part of "space" even while it is "Zs".
cf25bb62 688 || $code == 0x0009 # 0009: HORIZONTAL TAB
689 || $code == 0x000A # 000A: LINE FEED
690 || $code == 0x000B # 000B: VERTICAL TAB
691 || $code == 0x000C # 000C: FORM FEED
e6f58734 692 || $code == 0x000D # 000D: CARRIAGE RETURN
d75d706f 693 || $code == 0x0085 # 0085: NEL
694
695 ;
cf25bb62 696
d75d706f 697 $Cat{Space}->$op($code) if $isspace;
cf25bb62 698
d75d706f 699 $Cat{SpacePerl}->$op($code) if $isspace
700 && $code != 0x000B; # Backward compat.
cf25bb62 701
d75d706f 702 $Cat{Blank}->$op($code) if $isspace
703 && !($code == 0x000A ||
704 $code == 0x000B ||
705 $code == 0x000C ||
706 $code == 0x000D ||
707 $code == 0x0085 ||
708 $cat =~ /^Z[lp]/);
cf25bb62 709
710 $Cat{Digit}->$op($code) if $cat eq "Nd";
711 $Cat{Upper}->$op($code) if $cat eq "Lu";
712 $Cat{Lower}->$op($code) if $cat eq "Ll";
713 $Cat{Title}->$op($code) if $cat eq "Lt";
714 $Cat{ASCII}->$op($code) if $code <= 0x007F;
715 $Cat{Cntrl}->$op($code) if $cat =~ /^C/;
d75d706f 716 my $isgraph = !$isspace && $cat !~ /Cc|Cs|Cn/;
717 $Cat{Graph}->$op($code) if $isgraph;
718 $Cat{Print}->$op($code) if $isgraph || $isspace;
cf25bb62 719 $Cat{Punct}->$op($code) if $cat =~ /^P/;
720
721 $Cat{XDigit}->$op($code) if ($code >= 0x30 && $code <= 0x39) ## 0..9
722 || ($code >= 0x41 && $code <= 0x46) ## A..F
723 || ($code >= 0x61 && $code <= 0x66); ## a..f
724 }
d73e5302 725
cf25bb62 726 ## open ane read file.....
551b6b6f 727 if (not open IN, "UnicodeData.txt") {
728 die "$0: UnicodeData.txt: $!\n";
cf25bb62 729 }
d73e5302 730
a3a8c5f0 731 ##
732 ## For building \p{_CombAbove} and \p{_CanonDCIJ}
733 ##
734 my %_Above_HexCodes; ## Hexcodes for chars with $comb == 230 ("ABOVE")
735
736 my %CodeToDeco; ## Maps code to decomp. list for chars with first
737 ## decomp. char an "i" or "j" (for \p{_CanonDCIJ})
738
739 ## This is filled in as we go....
99598c8c 740 my $CombAbove = Table->New(Is => '_CombAbove',
741 Desc => '(for internal casefolding use)',
742 Fuzzy => 0);
a3a8c5f0 743
cf25bb62 744 while (<IN>)
745 {
746 next unless /^[0-9A-Fa-f]+;/;
747 s/\s+$//;
748
749 my ($hexcode, ## code point in hex (e.g. "0041")
750 $name, ## character name (e.g. "LATIN CAPITAL LETTER A")
751 $cat, ## category (e.g. "Lu")
752 $comb, ## Canonical combining class (e.t. "230")
753 $bidi, ## directional category (e.g. "L")
754 $deco, ## decomposition mapping
755 $decimal, ## decimal digit value
756 $digit, ## digit value
757 $number, ## numeric value
758 $mirrored, ## mirrored
759 $unicode10, ## name in Unicode 1.0
760 $comment, ## comment field
761 $upper, ## uppercase mapping
762 $lower, ## lowercase mapping
763 $title, ## titlecase mapping
764 ) = split(/\s*;\s*/);
765
2eb5892f 766 # Note that in Unicode 3.2 there will be names like
767 # LINE FEED (LF), which probably means that \N{} needs
768 # to cope also with LINE FEED and LF.
769 $name = $unicode10 if $name eq '<control>' && $unicode10 ne '';
770
cf25bb62 771 my $code = hex($hexcode);
772
a3a8c5f0 773 if ($comb and $comb == 230) {
774 $CombAbove->Append($code);
775 $_Above_HexCodes{$hexcode} = 1;
776 }
777
778 ## Used in building \p{_CanonDCIJ}
779 if ($deco and $deco =~ m/^006[9A]\b/) {
780 $CodeToDeco{$code} = $deco;
781 }
782
cf25bb62 783 ##
784 ## There are a few pairs of lines like:
785 ## AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
786 ## D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
787 ## that define ranges.
788 ##
789 if ($name =~ /^<(.+), (First|Last)>$/)
790 {
791 $name = $1;
792 gencat($name, $cat, $code, $2 eq 'First' ? 'Append' : 'Extend');
99598c8c 793 #New_Prop(In => $name, $General{$name}, Fuzzy => 1);
cf25bb62 794 }
795 else
796 {
797 ## normal (single-character) lines
798 gencat($name, $cat, $code, 'Append');
799
800 # No Append() here since since several codes may map into one.
801 $To{Upper}->RawAppendRange($code, $code, $upper) if $upper;
802 $To{Lower}->RawAppendRange($code, $code, $lower) if $lower;
803 $To{Title}->RawAppendRange($code, $code, $title) if $title;
804 $To{Digit}->Append($code, $decimal) if length $decimal;
805
806 $Bidi->Append($code, $bidi);
807 $Comb->Append($code, $comb) if $comb;
808 $Number->Append($code, $number) if length $number;
809
12ac2576 810 length($decimal) and ($Number{De} ||= Table->New())->Append($code)
811 or
812 length($digit) and ($Number{Di} ||= Table->New())->Append($code)
813 or
814 length($number) and ($Number{Nu} ||= Table->New())->Append($code);
815
cf25bb62 816 $Mirrored->Append($code) if $mirrored eq "Y";
817
12ac2576 818 $Bidi{$bidi} ||= Table->New();#Is => "bt/$bidi",
819 #Desc => "Bi-directional category '$bidi'",
820 #Fuzzy => 0);
cf25bb62 821 $Bidi{$bidi}->Append($code);
822
823 if ($deco)
824 {
825 $Deco->Append($code, $deco);
826 if ($deco =~/^<(\w+)>/)
827 {
12ac2576 828 my $dshort = $PVA_reverse{dt}{ucfirst lc $1};
829 $DC{com}->Append($code);
cf25bb62 830
12ac2576 831 $DC{$dshort} ||= Table->New();
832 $DC{$dshort}->Append($code);
cf25bb62 833 }
834 else
835 {
12ac2576 836 $DC{can}->Append($code);
cf25bb62 837 }
838 }
839 }
840 }
841 close IN;
d2d499f5 842
cf25bb62 843 ##
844 ## Tidy up a few special cases....
845 ##
d73e5302 846
cf25bb62 847 $Cat{Cn} = $Assigned->Invert; ## Cn is everything that doesn't exist
99598c8c 848 New_Prop(Is => 'Cn',
849 $Cat{Cn},
850 Desc => "General Category 'Cn' [not functional in Perl]",
851 Fuzzy => 0);
d73e5302 852
cf25bb62 853 ## Unassigned is the same as 'Cn'
5beb625e 854 New_Alias(Is => 'Unassigned', SameAs => 'Cn', Fuzzy => 0);
d73e5302 855
cf25bb62 856 $Cat{C}->Replace($Cat{C}->Merge($Cat{Cn})); ## Now merge in Cn into C
d73e5302 857
d73e5302 858
12ac2576 859 # LC is Ll, Lu, and Lt.
860 # (used to be L& or L_, but PropValueAliases.txt defines it as LC)
861 New_Prop(Is => 'LC',
cf25bb62 862 Table->Merge(@Cat{qw[Ll Lu Lt]}),
99598c8c 863 Desc => '[\p{Ll}\p{Lu}\p{Lt}]',
864 Fuzzy => 0);
d73e5302 865
cf25bb62 866 ## Any and All are all code points.
99598c8c 867 my $Any = Table->New(Is => 'Any',
868 Desc => sprintf("[\\x{0000}-\\x{%X}]",
869 $LastUnicodeCodepoint),
5beb625e 870 Fuzzy => 0);
cf25bb62 871 $Any->RawAppendRange(0, $LastUnicodeCodepoint);
d73e5302 872
5beb625e 873 New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 0);
d73e5302 874
a3a8c5f0 875 ##
876 ## Build special properties for Perl's internal case-folding needs:
877 ## \p{_CaseIgnorable}
878 ## \p{_CanonDCIJ}
879 ## \p{_CombAbove}
880 ## _CombAbove was built above. Others are built here....
881 ##
882
883 ## \p{_CaseIgnorable} is [\p{Mn}\0x00AD\x2010]
884 New_Prop(Is => '_CaseIgnorable',
885 Table->Merge($Cat{Mn},
886 0x00AD, #SOFT HYPHEN
887 0x2010), #HYPHEN
99598c8c 888 Desc => '(for internal casefolding use)',
889 Fuzzy => 0);
a3a8c5f0 890
891
892 ## \p{_CanonDCIJ} is fairly complex...
99598c8c 893 my $CanonCDIJ = Table->New(Is => '_CanonDCIJ',
894 Desc => '(for internal casefolding use)',
895 Fuzzy => 0);
a3a8c5f0 896 ## It contains the ASCII 'i' and 'j'....
897 $CanonCDIJ->Append(0x0069); # ASCII ord("i")
898 $CanonCDIJ->Append(0x006A); # ASCII ord("j")
899 ## ...and any character with a decomposition that starts with either of
900 ## those code points, but only if the decomposition does not have any
901 ## combining character with the "ABOVE" canonical combining class.
902 for my $code (sort { $a <=> $b} keys %CodeToDeco)
903 {
904 ## Need to ensure that all decomposition characters do not have
905 ## a %HexCodeToComb in %AboveCombClasses.
906 my $want = 1;
907 for my $deco_hexcode (split / /, $CodeToDeco{$code})
908 {
909 if (exists $_Above_HexCodes{$deco_hexcode}) {
910 ## one of the decmposition chars has an ABOVE combination
911 ## class, so we're not interested in this one
912 $want = 0;
913 last;
914 }
915 }
916 if ($want) {
917 $CanonCDIJ->Append($code);
918 }
919 }
920
921
d73e5302 922
cf25bb62 923 ##
924 ## Now dump the files.
925 ##
926 $Name->Write("Name.pl");
12ac2576 927
928 # $Bidi->Write("Bidirectional.pl");
929 mkdir("lib/bc", 0755);
930 for (keys %Bidi) {
931 $Bidi{$_}->Write(
932 "lib/bc/$_.pl",
933 "BidiClass category '$PropValueAlias{bc}{$_}'"
934 );
935 }
936
cf25bb62 937 $Comb->Write("CombiningClass.pl");
12ac2576 938 mkdir("lib/ccc", 0755);
939 for (keys %{ $PropValueAlias{ccc} }) {
940 my ($code, $name) = @{ $PropValueAlias{ccc}{$_} };
941 (my $c = Table->New())->Append($code);
942 $c->Write(
943 "lib/ccc/$_.pl",
944 "CombiningClass category '$name'"
945 );
946 }
947
cf25bb62 948 $Deco->Write("Decomposition.pl");
12ac2576 949 mkdir("lib/dt", 0755);
950 for (keys %DC) {
951 $DC{$_}->Write(
952 "lib/dt/$_.pl",
953 "DecompositionType category '$PropValueAlias{dt}{$_}'"
954 );
955 }
956
957 # $Number->Write("Number.pl");
958 mkdir("lib/nt", 0755);
959 for (keys %Number) {
960 $Number{$_}->Write(
961 "lib/nt/$_.pl",
962 "NumericType category '$PropValueAlias{nt}{$_}'"
963 );
964 }
965
966 # $General->Write("Category.pl");
cf25bb62 967
968 for my $to (sort keys %To) {
969 $To{$to}->Write("To/$to.pl");
d73e5302 970 }
12ac2576 971
972 for (keys %{ $PropValueAlias{gc} }) {
973 New_Alias(Is => $PropValueAlias{gc}{$_}, SameAs => $_, Fuzzy => 1);
974 }
d73e5302 975}
976
cf25bb62 977##
551b6b6f 978## Process LineBreak.txt
cf25bb62 979##
551b6b6f 980sub LineBreak_Txt()
cf25bb62 981{
551b6b6f 982 if (not open IN, "LineBreak.txt") {
983 die "$0: LineBreak.txt: $!\n";
cf25bb62 984 }
d73e5302 985
cf25bb62 986 my $Lbrk = Table->New();
987 my %Lbrk;
d73e5302 988
cf25bb62 989 while (<IN>)
990 {
991 next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(\w+)/;
d73e5302 992
cf25bb62 993 my ($first, $last, $lbrk) = (hex($1), hex($2||""), $3);
d73e5302 994
cf25bb62 995 $Lbrk->Append($first, $lbrk);
d73e5302 996
12ac2576 997 $Lbrk{$lbrk} ||= Table->New();
cf25bb62 998 $Lbrk{$lbrk}->Append($first);
d73e5302 999
cf25bb62 1000 if ($last) {
1001 $Lbrk->Extend($last);
1002 $Lbrk{$lbrk}->Extend($last);
d73e5302 1003 }
1004 }
cf25bb62 1005 close IN;
d73e5302 1006
12ac2576 1007 # $Lbrk->Write("Lbrk.pl");
1008
1009 mkdir("lib/lb", 0755);
1010
1011 for (keys %Lbrk) {
1012 $Lbrk{$_}->Write(
1013 "lib/lb/$_.pl",
1014 "Linebreak category '$PropValueAlias{lb}{$_}'"
1015 );
1016 }
cf25bb62 1017}
d73e5302 1018
cf25bb62 1019##
551b6b6f 1020## Process ArabicShaping.txt.
cf25bb62 1021##
551b6b6f 1022sub ArabicShaping_txt()
cf25bb62 1023{
551b6b6f 1024 if (not open IN, "ArabicShaping.txt") {
1025 die "$0: ArabicShaping.txt: $!\n";
cf25bb62 1026 }
d73e5302 1027
cf25bb62 1028 my $ArabLink = Table->New();
1029 my $ArabLinkGroup = Table->New();
d73e5302 1030
12ac2576 1031 my %JoinType;
1032
cf25bb62 1033 while (<IN>)
1034 {
1035 next unless /^[0-9A-Fa-f]+;/;
1036 s/\s+$//;
d73e5302 1037
cf25bb62 1038 my ($hexcode, $name, $link, $linkgroup) = split(/\s*;\s*/);
1039 my $code = hex($hexcode);
1040 $ArabLink->Append($code, $link);
1041 $ArabLinkGroup->Append($code, $linkgroup);
12ac2576 1042
1043 $JoinType{$link} ||= Table->New(Is => "JoinType$link");
1044 $JoinType{$link}->Append($code);
d73e5302 1045 }
cf25bb62 1046 close IN;
1047
12ac2576 1048 # $ArabLink->Write("ArabLink.pl");
1049 # $ArabLinkGroup->Write("ArabLnkGrp.pl");
1050
1051 mkdir("lib/jt", 0755);
1052
1053 for (keys %JoinType) {
1054 $JoinType{$_}->Write(
1055 "lib/jt/$_.pl",
1056 "JoiningType category '$PropValueAlias{jt}{$_}'"
1057 );
1058 }
1059}
1060
1061##
1062## Process EastAsianWidth.txt.
1063##
1064sub EastAsianWidth_txt()
1065{
1066 if (not open IN, "EastAsianWidth.txt") {
1067 die "$0: EastAsianWidth.txt: $!\n";
1068 }
1069
1070 my %EAW;
1071
1072 while (<IN>)
1073 {
1074 next unless /^[0-9A-Fa-f]+;/;
1075 s/#.*//;
1076 s/\s+$//;
1077
1078 my ($hexcode, $pv) = split(/\s*;\s*/);
1079 my $code = hex($hexcode);
1080 $EAW{$pv} ||= Table->New(Is => "EastAsianWidth$pv");
1081 $EAW{$pv}->Append($code);
1082 }
1083 close IN;
1084
1085 mkdir("lib/ea", 0755);
1086
1087 for (keys %EAW) {
1088 $EAW{$_}->Write(
1089 "lib/ea/$_.pl",
1090 "EastAsianWidth category '$PropValueAlias{ea}{$_}'"
1091 );
1092 }
1093}
1094
1095##
1096## Process HangulSyllableType.txt.
1097##
1098sub HangulSyllableType_txt()
1099{
1100 if (not open IN, "HangulSyllableType.txt") {
1101 die "$0: HangulSyllableType.txt: $!\n";
1102 }
1103
1104 my %HST;
1105
1106 while (<IN>)
1107 {
1108 next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(\w+)/;
1109 my ($first, $last, $pv) = (hex($1), hex($2||""), $3);
1110
1111 $HST{$pv} ||= Table->New(Is => "HangulSyllableType$pv");
1112 $HST{$pv}->Append($first);
1113
1114 if ($last) { $HST{$pv}->Extend($last) }
1115 }
1116 close IN;
1117
1118 mkdir("lib/hst", 0755);
1119
1120 for (keys %HST) {
1121 $HST{$_}->Write(
1122 "lib/hst/$_.pl",
1123 "HangulSyllableType category '$PropValueAlias{hst}{$_}'"
1124 );
1125 }
d73e5302 1126}
1127
cf25bb62 1128##
1129## Process Jamo.txt.
1130##
1131sub Jamo_txt()
1132{
1133 if (not open IN, "Jamo.txt") {
1134 die "$0: Jamo.txt: $!\n";
1135 }
1136 my $Short = Table->New();
d73e5302 1137
cf25bb62 1138 while (<IN>)
1139 {
1140 next unless /^([0-9A-Fa-f]+)\s*;\s*(\w*)/;
1141 my ($code, $short) = (hex($1), $2);
d73e5302 1142
cf25bb62 1143 $Short->Append($code, $short);
d73e5302 1144 }
cf25bb62 1145 close IN;
12ac2576 1146 # $Short->Write("JamoShort.pl");
d73e5302 1147}
1148
cf25bb62 1149##
1150## Process Scripts.txt.
1151##
1152sub Scripts_txt()
1153{
1154 my @ScriptInfo;
d73e5302 1155
cf25bb62 1156 if (not open(IN, "Scripts.txt")) {
1157 die "$0: Scripts.txt: $!\n";
1158 }
1159 while (<IN>) {
1160 next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
d73e5302 1161
cf25bb62 1162 # Wait until all the scripts have been read since
1163 # they are not listed in numeric order.
1164 push @ScriptInfo, [ hex($1), hex($2||""), $3 ];
1165 }
1166 close IN;
d73e5302 1167
cf25bb62 1168 # Now append the scripts properties in their code point order.
d73e5302 1169
cf25bb62 1170 my %Script;
1171 my $Scripts = Table->New();
d73e5302 1172
cf25bb62 1173 for my $script (sort { $a->[0] <=> $b->[0] } @ScriptInfo)
1174 {
1175 my ($first, $last, $name) = @$script;
1176 $Scripts->Append($first, $name);
d73e5302 1177
99598c8c 1178 $Script{$name} ||= Table->New(Is => $name,
1179 Desc => "Script '$name'",
1180 Fuzzy => 1);
cf25bb62 1181 $Script{$name}->Append($first, $name);
d73e5302 1182
cf25bb62 1183 if ($last) {
1184 $Scripts->Extend($last);
1185 $Script{$name}->Extend($last);
1186 }
1187 }
d73e5302 1188
12ac2576 1189 # $Scripts->Write("Scripts.pl");
d73e5302 1190
cf25bb62 1191 ## Common is everything not explicitly assigned to a Script
1192 ##
1193 ## ***shouldn't this be intersected with \p{Assigned}? ******
1194 ##
99598c8c 1195 New_Prop(Is => 'Common',
1196 $Scripts->Invert,
1197 Desc => 'Pseudo-Script of codepoints not in other Unicode scripts',
1198 Fuzzy => 1);
cf25bb62 1199}
d73e5302 1200
cf25bb62 1201##
1202## Given a name like "Close Punctuation", return a regex (that when applied
1203## with /i) matches any valid form of that name (e.g. "ClosePunctuation",
1204## "Close-Punctuation", etc.)
1205##
1206## Accept any space, dash, or underbar where in the official name there is
1207## space or a dash (or underbar, but there never is).
1208##
1209##
1210sub NameToRegex($)
1211{
1212 my $Name = shift;
1213 $Name =~ s/[- _]/(?:[-_]|\\s+)?/g;
1214 return $Name;
1215}
d73e5302 1216
cf25bb62 1217##
1218## Process Blocks.txt.
1219##
1220sub Blocks_txt()
1221{
1222 my $Blocks = Table->New();
1223 my %Blocks;
d73e5302 1224
cf25bb62 1225 if (not open IN, "Blocks.txt") {
1226 die "$0: Blocks.txt: $!\n";
1227 }
d73e5302 1228
cf25bb62 1229 while (<IN>)
1230 {
1231 #next if not /Private Use$/;
1232 next if not /^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.+?)\s*$/;
d73e5302 1233
cf25bb62 1234 my ($first, $last, $name) = (hex($1), hex($2), $3);
d73e5302 1235
cf25bb62 1236 $Blocks->Append($first, $name);
76ccdbe2 1237
99598c8c 1238 $Blocks{$name} ||= Table->New(In => $name,
1239 Desc => "Block '$name'",
1240 Fuzzy => 1);
cf25bb62 1241 $Blocks{$name}->Append($first, $name);
76ccdbe2 1242
cf25bb62 1243 if ($last and $last != $first) {
1244 $Blocks->Extend($last);
1245 $Blocks{$name}->Extend($last);
d73e5302 1246 }
d73e5302 1247 }
cf25bb62 1248 close IN;
1249
12ac2576 1250 # $Blocks->Write("Blocks.pl");
d73e5302 1251}
1252
cf25bb62 1253##
1254## Read in the PropList.txt. It contains extended properties not
551b6b6f 1255## listed in the UnicodeData.txt, such as 'Other_Alphabetic':
cf25bb62 1256## alphabetic but not of the general category L; many modifiers
1257## belong to this extended property category: while they are not
1258## alphabets, they are alphabetic in nature.
1259##
1260sub PropList_txt()
1261{
1262 my @PropInfo;
1263
1264 if (not open IN, "PropList.txt") {
1265 die "$0: PropList.txt: $!\n";
1266 }
d73e5302 1267
cf25bb62 1268 while (<IN>)
1269 {
1270 next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
d73e5302 1271
cf25bb62 1272 # Wait until all the extended properties have been read since
1273 # they are not listed in numeric order.
1274 push @PropInfo, [ hex($1), hex($2||""), $3 ];
1275 }
1276 close IN;
71d929cb 1277
cf25bb62 1278 # Now append the extended properties in their code point order.
1279 my $Props = Table->New();
1280 my %Prop;
71d929cb 1281
cf25bb62 1282 for my $prop (sort { $a->[0] <=> $b->[0] } @PropInfo)
1283 {
1284 my ($first, $last, $name) = @$prop;
1285 $Props->Append($first, $name);
71d929cb 1286
99598c8c 1287 $Prop{$name} ||= Table->New(Is => $name,
1288 Desc => "Extended property '$name'",
1289 Fuzzy => 1);
cf25bb62 1290 $Prop{$name}->Append($first, $name);
71d929cb 1291
cf25bb62 1292 if ($last) {
1293 $Props->Extend($last);
1294 $Prop{$name}->Extend($last);
1295 }
71d929cb 1296 }
d73e5302 1297
12ac2576 1298 for (keys %Prop) {
1299 (my $file = $PA_reverse{$_}) =~ tr/_//d;
1300 $Prop{$_}->Write(
1301 "lib/gc_sc/$file.pl",
1302 "Binary property '$_'"
1303 );
1304 }
1305
cf25bb62 1306 # Alphabetic is L and Other_Alphabetic.
99598c8c 1307 New_Prop(Is => 'Alphabetic',
cf25bb62 1308 Table->Merge($Cat{L}, $Prop{Other_Alphabetic}),
99598c8c 1309 Desc => '[\p{L}\p{OtherAlphabetic}]', # use canonical names here
1310 Fuzzy => 1);
cf25bb62 1311
1312 # Lowercase is Ll and Other_Lowercase.
99598c8c 1313 New_Prop(Is => 'Lowercase',
cf25bb62 1314 Table->Merge($Cat{Ll}, $Prop{Other_Lowercase}),
99598c8c 1315 Desc => '[\p{Ll}\p{OtherLowercase}]', # use canonical names here
1316 Fuzzy => 1);
cf25bb62 1317
1318 # Uppercase is Lu and Other_Uppercase.
1319 New_Prop(Is => 'Uppercase',
1320 Table->Merge($Cat{Lu}, $Prop{Other_Uppercase}),
99598c8c 1321 Desc => '[\p{Lu}\p{Other_Uppercase}]', # use canonical names here
1322 Fuzzy => 1);
cf25bb62 1323
1324 # Math is Sm and Other_Math.
1325 New_Prop(Is => 'Math',
1326 Table->Merge($Cat{Sm}, $Prop{Other_Math}),
99598c8c 1327 Desc => '[\p{Sm}\p{OtherMath}]', # use canonical names here
1328 Fuzzy => 1);
cf25bb62 1329
1330 # ID_Start is Ll, Lu, Lt, Lm, Lo, and Nl.
1331 New_Prop(Is => 'ID_Start',
1332 Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl]}),
99598c8c 1333 Desc => '[\p{Ll}\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]',
1334 Fuzzy => 1);
cf25bb62 1335
1336 # ID_Continue is ID_Start, Mn, Mc, Nd, and Pc.
1337 New_Prop(Is => 'ID_Continue',
1338 Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl Mn Mc Nd Pc ]}),
99598c8c 1339 Desc => '[\p{ID_Start}\p{Mn}\p{Mc}\p{Nd}\p{Pc}]',
1340 Fuzzy => 1);
d73e5302 1341}
1342
5beb625e 1343
1344##
1345## These are used in:
1346## MakePropTestScript()
1347## WriteAllMappings()
1348## for making the test script.
1349##
1350my %FuzzyNameToTest;
1351my %ExactNameToTest;
1352
1353
1354## This used only for making the test script
1355sub GenTests($$$$)
1356{
1357 my $FH = shift;
1358 my $Prop = shift;
1359 my $MatchCode = shift;
1360 my $FailCode = shift;
1361
1362 if (defined $MatchCode) {
1363 printf $FH qq/Expect(1, "\\x{%04X}", '\\p{$Prop}' );\n/, $MatchCode;
1364 printf $FH qq/Expect(0, "\\x{%04X}", '\\p{^$Prop}');\n/, $MatchCode;
1365 printf $FH qq/Expect(0, "\\x{%04X}", '\\P{$Prop}' );\n/, $MatchCode;
1366 printf $FH qq/Expect(1, "\\x{%04X}", '\\P{^$Prop}');\n/, $MatchCode;
1367 }
1368 if (defined $FailCode) {
1369 printf $FH qq/Expect(0, "\\x{%04X}", '\\p{$Prop}' );\n/, $FailCode;
1370 printf $FH qq/Expect(1, "\\x{%04X}", '\\p{^$Prop}');\n/, $FailCode;
1371 printf $FH qq/Expect(1, "\\x{%04X}", '\\P{$Prop}' );\n/, $FailCode;
1372 printf $FH qq/Expect(0, "\\x{%04X}", '\\P{^$Prop}');\n/, $FailCode;
1373 }
1374}
1375
1376## This used only for making the test script
1377sub ExpectError($$)
1378{
1379 my $FH = shift;
1380 my $prop = shift;
1381
1382 print $FH qq/Error('\\p{$prop}');\n/;
1383 print $FH qq/Error('\\P{$prop}');\n/;
1384}
1385
1386## This used only for making the test script
1387my @GoodSeps = (
1388 " ",
1389 "-",
1390 " \t ",
1391 "",
1392 "",
1393 "_",
1394 );
1395my @BadSeps = (
1396 "--",
1397 "__",
1398 " _",
1399 "/"
1400 );
1401
1402## This used only for making the test script
1403sub RandomlyFuzzifyName($;$)
1404{
1405 my $Name = shift;
1406 my $WantError = shift; ## if true, make an error
1407
1408 my @parts;
1409 for my $part (split /[-\s_]+/, $Name)
1410 {
1411 if (@parts) {
1412 if ($WantError and rand() < 0.3) {
1413 push @parts, $BadSeps[rand(@BadSeps)];
1414 $WantError = 0;
1415 } else {
1416 push @parts, $GoodSeps[rand(@GoodSeps)];
1417 }
1418 }
1419 my $switch = int rand(4);
1420 if ($switch == 0) {
1421 push @parts, uc $part;
1422 } elsif ($switch == 1) {
1423 push @parts, lc $part;
1424 } elsif ($switch == 2) {
1425 push @parts, ucfirst $part;
1426 } else {
1427 push @parts, $part;
1428 }
1429 }
1430 my $new = join('', @parts);
1431
1432 if ($WantError) {
1433 if (rand() >= 0.5) {
1434 $new .= $BadSeps[rand(@BadSeps)];
1435 } else {
1436 $new = $BadSeps[rand(@BadSeps)] . $new;
1437 }
1438 }
1439 return $new;
1440}
1441
1442## This used only for making the test script
1443sub MakePropTestScript()
1444{
1445 ## this written directly -- it's huge.
1446 if (not open OUT, ">TestProp.pl") {
1447 die "$0: TestProp.pl: $!\n";
1448 }
1449 print OUT <DATA>;
1450
1451 while (my ($Name, $Table) = each %ExactNameToTest)
1452 {
1453 GenTests(*OUT, $Name, $Table->ValidCode, $Table->InvalidCode);
1454 ExpectError(*OUT, uc $Name) if uc $Name ne $Name;
1455 ExpectError(*OUT, lc $Name) if lc $Name ne $Name;
1456 }
1457
1458
1459 while (my ($Name, $Table) = each %FuzzyNameToTest)
1460 {
1461 my $Orig = $CanonicalToOrig{$Name};
1462 my %Names = (
1463 $Name => 1,
1464 $Orig => 1,
1465 RandomlyFuzzifyName($Orig) => 1
1466 );
1467
1468 for my $N (keys %Names) {
1469 GenTests(*OUT, $N, $Table->ValidCode, $Table->InvalidCode);
1470 }
1471
1472 ExpectError(*OUT, RandomlyFuzzifyName($Orig, 'ERROR'));
1473 }
1474
1475 print OUT "Finished();\n";
1476 close OUT;
1477}
1478
1479
1480##
1481## These are used only in:
1482## RegisterFileForName()
1483## WriteAllMappings()
1484##
1485my %Exact; ## will become %utf8::Exact;
1486my %Canonical; ## will become %utf8::Canonical;
1487my %CaComment; ## Comment for %Canonical entry of same key
1488
1489##
1490## Given info about a name and a datafile that it should be associated with,
1491## register that assocation in %Exact and %Canonical.
1492sub RegisterFileForName($$$$)
1493{
1494 my $Type = shift;
1495 my $Name = shift;
1496 my $IsFuzzy = shift;
1497 my $filename = shift;
1498
1499 ##
1500 ## Now in details for the mapping. $Type eq 'Is' has the
1501 ## Is removed, as it will be removed in utf8_heavy when this
1502 ## data is being checked. In keeps its "In", but a second
1503 ## sans-In record is written if it doesn't conflict with
1504 ## anything already there.
1505 ##
1506 if (not $IsFuzzy)
1507 {
1508 if ($Type eq 'Is') {
1509 die "oops[$Name]" if $Exact{$Name};
1510 $Exact{$Name} = $filename;
1511 } else {
1512 die "oops[$Type$Name]" if $Exact{"$Type$Name"};
1513 $Exact{"$Type$Name"} = $filename;
1514 $Exact{$Name} = $filename if not $Exact{$Name};
1515 }
1516 }
1517 else
1518 {
1519 my $CName = lc $Name;
1520 if ($Type eq 'Is') {
1521 die "oops[$CName]" if $Canonical{$CName};
1522 $Canonical{$CName} = $filename;
1523 $CaComment{$CName} = $Name if $Name =~ tr/A-Z// >= 2;
1524 } else {
1525 die "oops[$Type$CName]" if $Canonical{lc "$Type$CName"};
1526 $Canonical{lc "$Type$CName"} = $filename;
1527 $CaComment{lc "$Type$CName"} = "$Type$Name";
1528 if (not $Canonical{$CName}) {
1529 $Canonical{$CName} = $filename;
1530 $CaComment{$CName} = "$Type$Name";
1531 }
1532 }
1533 }
1534}
1535
cf25bb62 1536##
1537## Writes the info accumulated in
1538##
1539## %TableInfo;
1540## %FuzzyNames;
1541## %AliasInfo;
1542##
1543##
1544sub WriteAllMappings()
1545{
99598c8c 1546 my @MAP;
1547
5beb625e 1548 my %BaseNames; ## Base names already used (for avoiding 8.3 conflicts)
cf25bb62 1549
5beb625e 1550 ## 'Is' *MUST* come first, so its names have precidence over 'In's
1551 for my $Type ('Is', 'In')
1552 {
1553 my %RawNameToFile; ## a per-$Type cache
cf25bb62 1554
5beb625e 1555 for my $Name (sort {length $a <=> length $b} keys %{$TableInfo{$Type}})
cf25bb62 1556 {
5beb625e 1557 ## Note: $Name is already canonical
99598c8c 1558 my $Table = $TableInfo{$Type}->{$Name};
5beb625e 1559 my $IsFuzzy = $FuzzyNames{$Type}->{$Name};
99598c8c 1560
1561 ## Need an 8.3 safe filename (which means "an 8 safe" $filename)
5beb625e 1562 my $filename;
cf25bb62 1563 {
5beb625e 1564 ## 'Is' items lose 'Is' from the basename.
12ac2576 1565 $filename = $Type eq 'Is' ?
1566 ($PVA_reverse{sc}{$Name} || $Name) :
1567 "$Type$Name";
5beb625e 1568
1569 $filename =~ s/[^\w_]+/_/g; # "L&" -> "L_"
1570 substr($filename, 8) = '' if length($filename) > 8;
1571
1572 ##
1573 ## Make sure the basename doesn't conflict with something we
1574 ## might have already written. If we have, say,
1575 ## InGreekExtended1
1576 ## InGreekExtended2
1577 ## they become
1578 ## InGreekE
1579 ## InGreek2
1580 ##
1581 while (my $num = $BaseNames{lc $filename}++)
1582 {
1583 $num++; ## so basenames with numbers start with '2', which
1584 ## just looks more natural.
1585 ## Want to append $num, but if it'll make the basename longer
1586 ## than 8 characters, pre-truncate $filename so that the result
1587 ## is acceptable.
1588 my $delta = length($filename) + length($num) - 8;
1589 if ($delta > 0) {
1590 substr($filename, -$delta) = $num;
1591 } else {
1592 $filename .= $num;
1593 }
99598c8c 1594 }
5beb625e 1595 };
99598c8c 1596
1597 ##
1598 ## Construct a nice comment to add to the file, and build data
1599 ## for the "./Properties" file along the way.
1600 ##
1601 my $Comment;
1602 {
1603 my $Desc = $TableDesc{$Type}->{$Name} || "";
1604 ## get list of names this table is reference by
1605 my @Supported = $Name;
1606 while (my ($Orig, $Alias) = each %{ $AliasInfo{$Type} })
1607 {
1608 if ($Orig eq $Name) {
1609 push @Supported, $Alias;
1610 }
1611 }
1612
1613 my $TypeToShow = $Type eq 'Is' ? "" : $Type;
1614 my $OrigProp;
1615
1616 $Comment = "This file supports:\n";
1617 for my $N (@Supported)
1618 {
1619 my $IsFuzzy = $FuzzyNames{$Type}->{$N};
5beb625e 1620 my $Prop = "\\p{$TypeToShow$Name}";
99598c8c 1621 $OrigProp = $Prop if not $OrigProp; #cache for aliases
1622 if ($IsFuzzy) {
1623 $Comment .= "\t$Prop (and fuzzy permutations)\n";
1624 } else {
1625 $Comment .= "\t$Prop\n";
1626 }
1627 my $MyDesc = ($N eq $Name) ? $Desc : "Alias for $OrigProp ($Desc)";
1628
1629 push @MAP, sprintf("%s %-42s %s\n",
1630 $IsFuzzy ? '*' : ' ', $Prop, $MyDesc);
1631 }
1632 if ($Desc) {
1633 $Comment .= "\nMeaning: $Desc\n";
1634 }
1635
1636 }
cf25bb62 1637 ##
1638 ## Okay, write the file...
1639 ##
12ac2576 1640 $Table->Write("lib/gc_sc/$filename.pl", $Comment);
99598c8c 1641
5beb625e 1642 ## and register it
1643 $RawNameToFile{$Name} = $filename;
1644 RegisterFileForName($Type => $Name, $IsFuzzy, $filename);
cf25bb62 1645
5beb625e 1646 if ($IsFuzzy)
1647 {
1648 my $CName = CanonicalName($Type . '_'. $Name);
1649 $FuzzyNameToTest{$Name} = $Table if !$FuzzyNameToTest{$Name};
1650 $FuzzyNameToTest{$CName} = $Table if !$FuzzyNameToTest{$CName};
1651 } else {
1652 $ExactNameToTest{$Name} = $Table;
cf25bb62 1653 }
1654
cf25bb62 1655 }
1656
5beb625e 1657 ## Register aliase info
1658 for my $Name (sort {length $a <=> length $b} keys %{$AliasInfo{$Type}})
cf25bb62 1659 {
5beb625e 1660 my $Alias = $AliasInfo{$Type}->{$Name};
1661 my $IsFuzzy = $FuzzyNames{$Type}->{$Alias};
1662 my $filename = $RawNameToFile{$Name};
1663 die "oops [$Alias]->[$Name]" if not $filename;
1664 RegisterFileForName($Type => $Alias, $IsFuzzy, $filename);
1665
1666 my $Table = $TableInfo{$Type}->{$Name};
1667 die "oops" if not $Table;
1668 if ($IsFuzzy)
1669 {
1670 my $CName = CanonicalName($Type .'_'. $Alias);
1671 $FuzzyNameToTest{$Alias} = $Table if !$FuzzyNameToTest{$Alias};
1672 $FuzzyNameToTest{$CName} = $Table if !$FuzzyNameToTest{$CName};
1673 } else {
1674 $ExactNameToTest{$Alias} = $Table;
1675 }
cf25bb62 1676 }
5beb625e 1677 }
cf25bb62 1678
5beb625e 1679 ##
1680 ## Write out the property list
1681 ##
1682 {
1683 my @OUT = (
1684 "##\n",
1685 "## This file created by $0\n",
1686 "## List of built-in \\p{...}/\\P{...} properties.\n",
1687 "##\n",
1688 "## '*' means name may be 'fuzzy'\n",
1689 "##\n\n",
1690 sort { substr($a,2) cmp substr($b, 2) } @MAP,
1691 );
1692 WriteIfChanged('Properties', @OUT);
1693 }
cf25bb62 1694
5beb625e 1695 use Text::Tabs (); ## using this makes the files about half the size
1696
1697 ## Write Exact.pl
1698 {
1699 my @OUT = (
1700 $HEADER,
1701 "##\n",
1702 "## Data in this file used by ../utf8_heavy.pl\n",
1703 "##\n\n",
12ac2576 1704 "## Mapping from name to filename in ./lib/gc_sc\n",
5beb625e 1705 "%utf8::Exact = (\n",
1706 );
cf25bb62 1707
12ac2576 1708 $Exact{InGreek} = 'InGreekA'; # this is evil kludge
cf25bb62 1709 for my $Name (sort keys %Exact)
1710 {
1711 my $File = $Exact{$Name};
5beb625e 1712 $Name = $Name =~ m/\W/ ? qq/'$Name'/ : " $Name ";
1713 my $Text = sprintf("%-15s => %s,\n", $Name, qq/'$File'/);
1714 push @OUT, Text::Tabs::unexpand($Text);
cf25bb62 1715 }
5beb625e 1716 push @OUT, ");\n1;\n";
1717
1718 WriteIfChanged('Exact.pl', @OUT);
1719 }
cf25bb62 1720
5beb625e 1721 ## Write Canonical.pl
1722 {
1723 my @OUT = (
1724 $HEADER,
1725 "##\n",
1726 "## Data in this file used by ../utf8_heavy.pl\n",
1727 "##\n\n",
1728 "## Mapping from lc(canonical name) to filename in ./lib\n",
1729 "%utf8::Canonical = (\n",
1730 );
1731 my $Trail = ""; ## used just to keep the spacing pretty
1732 for my $Name (sort keys %Canonical)
cf25bb62 1733 {
5beb625e 1734 my $File = $Canonical{$Name};
1735 if ($CaComment{$Name}) {
1736 push @OUT, "\n" if not $Trail;
1737 push @OUT, " # $CaComment{$Name}\n";
1738 $Trail = "\n";
1739 } else {
1740 $Trail = "";
cf25bb62 1741 }
5beb625e 1742 $Name = $Name =~ m/\W/ ? qq/'$Name'/ : " $Name ";
1743 my $Text = sprintf(" %-41s => %s,\n$Trail", $Name, qq/'$File'/);
1744 push @OUT, Text::Tabs::unexpand($Text);
cf25bb62 1745 }
5beb625e 1746 push @OUT, ");\n1\n";
1747 WriteIfChanged('Canonical.pl', @OUT);
d2d499f5 1748 }
5beb625e 1749
1750 MakePropTestScript() if $MakeTestScript;
d2d499f5 1751}
1752
5beb625e 1753
551b6b6f 1754sub SpecialCasing_txt()
cf25bb62 1755{
1756 #
1757 # Read in the special cases.
1758 #
983ffd37 1759
cf25bb62 1760 my %CaseInfo;
1761
551b6b6f 1762 if (not open IN, "SpecialCasing.txt") {
1763 die "$0: SpecialCasing.txt: $!\n";
cf25bb62 1764 }
1765 while (<IN>) {
1766 next unless /^[0-9A-Fa-f]+;/;
1767 s/\#.*//;
1768 s/\s+$//;
1769
1770 my ($code, $lower, $title, $upper, $condition) = split(/\s*;\s*/);
1771
1772 if ($condition) { # not implemented yet
1773 print "# SKIPPING $_\n" if $Verbose;
1774 next;
1775 }
1776
1777 # Wait until all the special cases have been read since
1778 # they are not listed in numeric order.
1779 my $ix = hex($code);
a6da3000 1780 push @{$CaseInfo{Lower}}, [ $ix, $code, $lower ]
1781 unless $code eq $lower;
1782 push @{$CaseInfo{Title}}, [ $ix, $code, $title ]
1783 unless $code eq $title;
1784 push @{$CaseInfo{Upper}}, [ $ix, $code, $upper ]
1785 unless $code eq $upper;
cf25bb62 1786 }
1787 close IN;
1788
1789 # Now write out the special cases properties in their code point order.
1790 # Prepend them to the To/{Upper,Lower,Title}.pl.
1791
1792 for my $case (qw(Lower Title Upper))
1793 {
1794 my $NormalCase = do "To/$case.pl" || die "$0: $@\n";
cf25bb62 1795
b08cf34e 1796 my @OUT =
1797 (
1798 $HEADER, "\n",
1799 "# The key UTF-8 _bytes_, the value UTF-8 (speed hack)\n",
1800 "%utf8::ToSpec$case =\n(\n",
1801 );
cf25bb62 1802
1803 for my $prop (sort { $a->[0] <=> $b->[0] } @{$CaseInfo{$case}}) {
1804 my ($ix, $code, $to) = @$prop;
1805 my $tostr =
1806 join "", map { sprintf "\\x{%s}", $_ } split ' ', $to;
b08cf34e 1807 push @OUT, sprintf qq["%s" => "$tostr",\n], join("", map { sprintf "\\x%02X", $_ } unpack("U0C*", pack("U", $ix)));
5cb851a6 1808 # Remove any single-character mappings for
1809 # the same character since we are going for
1810 # the special casing rules.
1811 $NormalCase =~ s/^$code\t\t\w+\n//m;
cf25bb62 1812 }
5beb625e 1813 push @OUT, (
1814 ");\n\n",
1815 "return <<'END';\n",
1816 $NormalCase,
1817 "END\n"
1818 );
1819 WriteIfChanged("To/$case.pl", @OUT);
d2d499f5 1820 }
d2d499f5 1821}
1822
c4051cc5 1823#
1824# Read in the case foldings.
1825#
551b6b6f 1826# We will do full case folding, C + F + I (see CaseFolding.txt).
c4051cc5 1827#
551b6b6f 1828sub CaseFolding_txt()
cf25bb62 1829{
551b6b6f 1830 if (not open IN, "CaseFolding.txt") {
1831 die "$0: CaseFolding.txt: $!\n";
cf25bb62 1832 }
c4051cc5 1833
cf25bb62 1834 my $Fold = Table->New();
c4051cc5 1835 my %Fold;
1836
cf25bb62 1837 while (<IN>) {
254ba52a 1838 # Skip status 'S', simple case folding
c4051cc5 1839 next unless /^([0-9A-Fa-f]+)\s*;\s*([CFI])\s*;\s*([0-9A-Fa-f]+(?: [0-9A-Fa-f]+)*)\s*;/;
1840
cf25bb62 1841 my ($code, $status, $fold) = (hex($1), $2, $3);
c4051cc5 1842
1843 if ($status eq 'C') { # Common: one-to-one folding
254ba52a 1844 # No append() since several codes may fold into one.
cf25bb62 1845 $Fold->RawAppendRange($code, $code, $fold);
c4051cc5 1846 } else { # F: full, or I: dotted uppercase I -> dotless lowercase I
cf25bb62 1847 $Fold{$code} = $fold;
c4051cc5 1848 }
1849 }
cf25bb62 1850 close IN;
c4051cc5 1851
cf25bb62 1852 $Fold->Write("To/Fold.pl");
c4051cc5 1853
1854 #
1855 # Prepend the special foldings to the common foldings.
1856 #
c4051cc5 1857 my $CommonFold = do "To/Fold.pl" || die "$0: To/Fold.pl: $!\n";
5beb625e 1858
b08cf34e 1859 my @OUT =
1860 (
1861 $HEADER, "\n",
1862 "# The ke UTF-8 _bytes_, the value UTF-8 (speed hack)\n",
1863 "%utf8::ToSpecFold =\n(\n",
1864 );
cf25bb62 1865 for my $code (sort { $a <=> $b } keys %Fold) {
1866 my $foldstr =
1867 join "", map { sprintf "\\x{%s}", $_ } split ' ', $Fold{$code};
b08cf34e 1868 push @OUT, sprintf qq["%s" => "$foldstr",\n], join("", map { sprintf "\\x%02X", $_ } unpack("U0C*", pack("U", $code)));
c4051cc5 1869 }
5beb625e 1870 push @OUT, (
1871 ");\n\n",
1872 "return <<'END';\n",
1873 $CommonFold,
1874 "END\n",
1875 );
1876
1877 WriteIfChanged("To/Fold.pl", @OUT);
c4051cc5 1878}
1879
cf25bb62 1880## Do it....
1881
12ac2576 1882Build_Aliases();
44da8cae 1883UnicodeData_Txt();
cf25bb62 1884PropList_txt();
1885
1886Scripts_txt();
1887Blocks_txt();
1888
5beb625e 1889WriteAllMappings();
1890
551b6b6f 1891LineBreak_Txt();
1892ArabicShaping_txt();
12ac2576 1893EastAsianWidth_txt();
1894HangulSyllableType_txt();
cf25bb62 1895Jamo_txt();
551b6b6f 1896SpecialCasing_txt();
1897CaseFolding_txt();
cf25bb62 1898
5beb625e 1899exit(0);
cf25bb62 1900
5beb625e 1901## TRAILING CODE IS USED BY MakePropTestScript()
1902__DATA__
1903use strict;
1904use warnings;
1905
1906my $Tests = 0;
1907my $Fails = 0;
cf25bb62 1908
5beb625e 1909sub Expect($$$)
1910{
1911 my $Expect = shift;
1912 my $String = shift;
1913 my $Regex = shift;
1914 my $Line = (caller)[2];
1915
1916 $Tests++;
1917 my $RegObj;
1918 my $result = eval {
1919 $RegObj = qr/$Regex/;
1920 $String =~ $RegObj ? 1 : 0
1921 };
1922
1923 if (not defined $result) {
1924 print "couldn't compile /$Regex/ on $0 line $Line: $@\n";
1925 $Fails++;
1926 } elsif ($result ^ $Expect) {
1927 print "bad result (expected $Expect) on $0 line $Line: $@\n";
1928 $Fails++;
1929 }
1930}
d73e5302 1931
5beb625e 1932sub Error($)
1933{
1934 my $Regex = shift;
1935 $Tests++;
1936 if (eval { 'x' =~ qr/$Regex/; 1 }) {
1937 $Fails++;
1938 my $Line = (caller)[2];
1939 print "expected error for /$Regex/ on $0 line $Line: $@\n";
1940 }
1941}
1942
1943sub Finished()
1944{
1945 if ($Fails == 0) {
1946 print "All $Tests tests passed.\n";
1947 exit(0);
1948 } else {
1949 print "$Tests tests, $Fails failed!\n";
1950 exit(-1);
1951 }
1952}