First step to generating Unicode files for the regexp engine at build
[p5sagit/p5-mst-13.2.git] / lib / unicore / mktables
CommitLineData
d73e5302 1#!/usr/bin/perl -w
4392c1cd 2require 5.008; # Needs pack "U". Probably safest to run on 5.8.x
d73e5302 3use strict;
cf25bb62 4use Carp;
5beb625e 5
cf25bb62 6##
7## mktables -- create the runtime Perl Unicode files (lib/unicore/**/*.pl)
8## from the Unicode database files (lib/unicore/*.txt).
9##
d2d499f5 10
cb3d361a 11## "Fuzzy" means this section in Unicode TR18:
12##
13## The recommended names for UCD properties and property values are in
14## PropertyAliases.txt [Prop] and PropertyValueAliases.txt
15## [PropValue]. There are both abbreviated names and longer, more
16## descriptive names. It is strongly recommended that both names be
17## recognized, and that loose matching of property names be used,
18## whereby the case distinctions, whitespace, hyphens, and underbar
19## are ignored.
20
5beb625e 21mkdir("lib", 0755);
12ac2576 22mkdir("lib/gc_sc", 0755);
5beb625e 23mkdir("To", 0755);
d73e5302 24
8e4b7420 25## Base names already used in lib/gc_sc (for avoiding 8.3 conflicts)
26my %BaseNames;
27
28
cf25bb62 29##
30## Process any args.
31##
5beb625e 32my $Verbose = 0;
33my $MakeTestScript = 0;
cf25bb62 34
35while (@ARGV)
36{
37 my $arg = shift @ARGV;
38 if ($arg eq '-v') {
39 $Verbose = 1;
40 } elsif ($arg eq '-q') {
41 $Verbose = 0;
5beb625e 42 } elsif ($arg eq '-maketest') {
43 $MakeTestScript = 1;
d73e5302 44 } else {
5beb625e 45 die "usage: $0 [-v|-q] [-maketest]";
d73e5302 46 }
47}
48
cf25bb62 49my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1.
d73e5302 50
cf25bb62 51my $HEADER=<<"EOF";
d73e5302 52# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
551b6b6f 53# This file is built by $0 from e.g. UnicodeData.txt.
d73e5302 54# Any changes made here will be lost!
cf25bb62 55
56EOF
57
5beb625e 58
59##
60## Given a filename and a reference to an array of lines,
61## write the lines to the file only if the contents have not changed.
62##
63sub WriteIfChanged($\@)
64{
65 my $file = shift;
66 my $lines = shift;
67
68 my $TextToWrite = join '', @$lines;
69 if (open IN, $file) {
70 local($/) = undef;
71 my $PreviousText = <IN>;
72 close IN;
73 if ($PreviousText eq $TextToWrite) {
74 print "$file unchanged.\n" if $Verbose;
75 return;
76 }
77 }
78 if (not open OUT, ">$file") {
79 die "$0: can't open $file for output: $!\n";
80 }
81 print "$file written.\n" if $Verbose;
82
83 print OUT $TextToWrite;
84 close OUT;
85}
86
cf25bb62 87##
88## The main datastructure (a "Table") represents a set of code points that
89## are part of a particular quality (that are part of \pL, \p{InGreek},
90## etc.). They are kept as ranges of code points (starting and ending of
91## each range).
92##
93## For example, a range ASCII LETTERS would be represented as:
94## [ [ 0x41 => 0x5A, 'UPPER' ],
95## [ 0x61 => 0x7A, 'LOWER, ] ]
96##
97sub RANGE_START() { 0 } ## index into range element
98sub RANGE_END() { 1 } ## index into range element
99sub RANGE_NAME() { 2 } ## index into range element
100
99598c8c 101## Conceptually, these should really be folded into the 'Table' objects
cf25bb62 102my %TableInfo;
99598c8c 103my %TableDesc;
cf25bb62 104my %FuzzyNames;
105my %AliasInfo;
5beb625e 106my %CanonicalToOrig;
cf25bb62 107
108##
99598c8c 109## Turn something like
110## OLD-ITALIC
5beb625e 111## into
99598c8c 112## OldItalic
113##
114sub CanonicalName($)
115{
5beb625e 116 my $orig = shift;
117 my $name = lc $orig;
99598c8c 118 $name =~ s/(?<![a-z])(\w)/\u$1/g;
5beb625e 119 $name =~ s/[-_\s]+//g;
99598c8c 120
5beb625e 121 $CanonicalToOrig{$name} = $orig if not $CanonicalToOrig{$name};
99598c8c 122 return $name;
123}
124
12ac2576 125
126##
127## Store the alias definitions for later use.
128##
129my %PropertyAlias;
130my %PropValueAlias;
131
132my %PA_reverse;
133my %PVA_reverse;
134
135sub Build_Aliases()
136{
137 ##
138 ## Most of the work with aliases doesn't occur here,
139 ## but rather in utf8_heavy.pl, which uses utf8_pva.pl,
140 ## which contains just this function. However, this one
141 ##
142 ## -- japhy (2004/04/13)
143
144 open PA, "< PropertyAliases.txt"
145 or confess "Can't open PropertyAliases.txt: $!";
146 while (<PA>) {
147 s/#.*//;
148 s/\s+$//;
149 next if /^$/;
150
151 my ($abbrev, $name) = split /\s*;\s*/;
152 next if $abbrev eq "n/a";
153 $PropertyAlias{$abbrev} = $name;
154 $PA_reverse{$name} = $abbrev;
155 }
156 close PA;
157
158 open PVA, "< PropValueAliases.txt"
159 or confess "Can't open PropValueAliases.txt: $!";
160 while (<PVA>) {
161 s/#.*//;
162 s/\s+$//;
163 next if /^$/;
164
165 my ($prop, @data) = split /\s*;\s*/;
166
167 if ($prop eq 'ccc') {
168 $PropValueAlias{$prop}{$data[1]} = [ @data[0,2] ];
169 $PVA_reverse{$prop}{$data[2]} = [ @data[0,1] ];
170 }
171 else {
172 next if $data[0] eq "n/a";
173 $PropValueAlias{$prop}{$data[0]} = $data[1];
174 $PVA_reverse{$prop}{$data[1]} = $data[0];
175 }
176 }
177 close PVA;
178}
179
180
99598c8c 181##
cf25bb62 182## Associates a property ("Greek", "Lu", "Assigned",...) with a Table.
183##
184## Called like:
99598c8c 185## New_Prop(In => 'Greek', $Table, Desc => 'Greek Block', Fuzzy => 1);
cf25bb62 186##
187## Normally, these parameters are set when the Table is created (when the
188## Table->New constructor is called), but there are times when it needs to
189## be done after-the-fact...)
190##
191sub New_Prop($$$@)
192{
193 my $Type = shift; ## "Is" or "In";
194 my $Name = shift;
195 my $Table = shift;
196
197 ## remaining args are optional key/val
198 my %Args = @_;
199
99598c8c 200 my $Fuzzy = delete $Args{Fuzzy};
201 my $Desc = delete $Args{Desc}; # description
202
5beb625e 203 $Name = CanonicalName($Name) if $Fuzzy;
cf25bb62 204
205 ## sanity check a few args
206 if (%Args or ($Type ne 'Is' and $Type ne 'In') or not ref $Table) {
207 confess "$0: bad args to New_Prop"
208 }
209
210 if (not $TableInfo{$Type}->{$Name})
211 {
212 $TableInfo{$Type}->{$Name} = $Table;
99598c8c 213 $TableDesc{$Type}->{$Name} = $Desc;
214 if ($Fuzzy) {
cf25bb62 215 $FuzzyNames{$Type}->{$Name} = $Name;
216 }
217 }
d73e5302 218}
219
d73e5302 220
cf25bb62 221##
222## Creates a new Table object.
223##
224## Args are key/value pairs:
99598c8c 225## In => Name -- Name of "In" property to be associated with
226## Is => Name -- Name of "Is" property to be associated with
227## Fuzzy => Boolean -- True if name can be accessed "fuzzily"
228## Desc => String -- Description of the property
cf25bb62 229##
230## No args are required.
231##
232sub Table::New
233{
234 my $class = shift;
235 my %Args = @_;
236
237 my $Table = bless [], $class;
238
99598c8c 239 my $Fuzzy = delete $Args{Fuzzy};
240 my $Desc = delete $Args{Desc};
cf25bb62 241
242 for my $Type ('Is', 'In')
243 {
244 if (my $Name = delete $Args{$Type}) {
99598c8c 245 New_Prop($Type => $Name, $Table, Desc => $Desc, Fuzzy => $Fuzzy);
cf25bb62 246 }
247 }
248
249 ## shouldn't have any left over
250 if (%Args) {
251 confess "$0: bad args to Table->New"
252 }
253
254 return $Table;
d73e5302 255}
256
cf25bb62 257##
258## Returns true if the Table has no code points
259##
260sub Table::IsEmpty
261{
262 my $Table = shift; #self
263 return not @$Table;
d73e5302 264}
265
cf25bb62 266##
267## Returns true if the Table has code points
268##
269sub Table::NotEmpty
270{
271 my $Table = shift; #self
272 return @$Table;
d73e5302 273}
274
cf25bb62 275##
276## Returns the maximum code point currently in the table.
277##
278sub Table::Max
279{
280 my $Table = shift; #self
281 confess "oops" if $Table->IsEmpty; ## must have code points to have a max
282 return $Table->[-1]->[RANGE_END];
283}
d73e5302 284
cf25bb62 285##
286## Replaces the codepoints in the Table with those in the Table given
287## as an arg. (NOTE: this is not a "deep copy").
288##
289sub Table::Replace($$)
290{
291 my $Table = shift; #self
292 my $New = shift;
d73e5302 293
cf25bb62 294 @$Table = @$New;
295}
71d929cb 296
cf25bb62 297##
298## Given a new code point, make the last range of the Table extend to
299## include the new (and all intervening) code points.
300##
301sub Table::Extend
302{
303 my $Table = shift; #self
304 my $codepoint = shift;
d73e5302 305
cf25bb62 306 my $PrevMax = $Table->Max;
e904f995 307
cf25bb62 308 confess "oops ($codepoint <= $PrevMax)" if $codepoint <= $PrevMax;
e904f995 309
cf25bb62 310 $Table->[-1]->[RANGE_END] = $codepoint;
311}
c3a8a2b8 312
cf25bb62 313##
314## Given a code point range start and end (and optional name), blindly
315## append them to the list of ranges for the Table.
316##
317## NOTE: Code points must be added in strictly ascending numeric order.
318##
319sub Table::RawAppendRange
320{
321 my $Table = shift; #self
322 my $start = shift;
323 my $end = shift;
324 my $name = shift;
325 $name = "" if not defined $name; ## warning: $name can be "0"
326
327 push @$Table, [ $start, # RANGE_START
328 $end, # RANGE_END
329 $name ]; # RANGE_NAME
330}
c3a8a2b8 331
cf25bb62 332##
333## Given a code point (and optional name), add it to the Table.
334##
335## NOTE: Code points must be added in strictly ascending numeric order.
336##
337sub Table::Append
338{
339 my $Table = shift; #self
340 my $codepoint = shift;
341 my $name = shift;
342 $name = "" if not defined $name; ## warning: $name can be "0"
343
344 ##
345 ## If we've already got a range working, and this code point is the next
346 ## one in line, and if the name is the same, just extend the current range.
347 ##
348 if ($Table->NotEmpty
349 and
350 $Table->Max == $codepoint - 1
351 and
352 $Table->[-1]->[RANGE_NAME] eq $name)
353 {
354 $Table->Extend($codepoint);
c3a8a2b8 355 }
cf25bb62 356 else
357 {
358 $Table->RawAppendRange($codepoint, $codepoint, $name);
d73e5302 359 }
cf25bb62 360}
d73e5302 361
cf25bb62 362##
363## Given a code point range starting value and ending value (and name),
364## Add the range to teh Table.
365##
366## NOTE: Code points must be added in strictly ascending numeric order.
367##
368sub Table::AppendRange
369{
370 my $Table = shift; #self
371 my $start = shift;
372 my $end = shift;
373 my $name = shift;
374 $name = "" if not defined $name; ## warning: $name can be "0"
375
376 $Table->Append($start, $name);
377 $Table->Extend($end) if $end > $start;
378}
d73e5302 379
cf25bb62 380##
381## Return a new Table that represents all code points not in the Table.
382##
383sub Table::Invert
384{
385 my $Table = shift; #self
386
387 my $New = Table->New();
388 my $max = -1;
389 for my $range (@$Table)
390 {
391 my $start = $range->[RANGE_START];
392 my $end = $range->[RANGE_END];
393 if ($start-1 >= $max+1) {
394 $New->AppendRange($max+1, $start-1, "");
395 }
396 $max = $end;
d73e5302 397 }
cf25bb62 398 if ($max+1 < $LastUnicodeCodepoint) {
399 $New->AppendRange($max+1, $LastUnicodeCodepoint);
d73e5302 400 }
cf25bb62 401 return $New;
402}
d73e5302 403
cf25bb62 404##
405## Merges any number of other tables with $self, returning the new table.
406## (existing tables are not modified)
407##
a3a8c5f0 408##
409## Args may be Tables, or individual code points (as integers).
410##
cf25bb62 411## Can be called as either a constructor or a method.
412##
413sub Table::Merge
414{
415 shift(@_) if not ref $_[0]; ## if called as a constructor, lose the class
416 my @Tables = @_;
417
418 ## Accumulate all records from all tables
419 my @Records;
a3a8c5f0 420 for my $Arg (@Tables)
421 {
422 if (ref $Arg) {
423 ## arg is a table -- get its ranges
424 push @Records, @$Arg;
425 } else {
426 ## arg is a codepoint, make a range
427 push @Records, [ $Arg, $Arg ]
428 }
d73e5302 429 }
430
cf25bb62 431 ## sort by range start, with longer ranges coming first.
432 my ($first, @Rest) = sort {
433 ($a->[RANGE_START] <=> $b->[RANGE_START])
434 or
435 ($b->[RANGE_END] <=> $b->[RANGE_END])
436 } @Records;
437
438 my $New = Table->New();
439
440 ## Ensuring the first range is there makes the subsequent loop easier
441 $New->AppendRange($first->[RANGE_START],
442 $first->[RANGE_END]);
443
444 ## Fold in records so long as they add new information.
445 for my $set (@Rest)
446 {
447 my $start = $set->[RANGE_START];
448 my $end = $set->[RANGE_END];
449 if ($start > $New->Max) {
450 $New->AppendRange($start, $end);
451 } elsif ($end > $New->Max) {
452 $New->Extend($end);
453 }
d73e5302 454 }
d73e5302 455
cf25bb62 456 return $New;
d73e5302 457}
458
cf25bb62 459##
460## Given a filename, write a representation of the Table to a file.
99598c8c 461## May have an optional comment as a 2nd arg.
cf25bb62 462##
463sub Table::Write
464{
99598c8c 465 my $Table = shift; #self
cf25bb62 466 my $filename = shift;
99598c8c 467 my $comment = shift;
d73e5302 468
5beb625e 469 my @OUT = $HEADER;
99598c8c 470 if (defined $comment) {
471 $comment =~ s/\s+\Z//;
472 $comment =~ s/^/# /gm;
5beb625e 473 push @OUT, "#\n$comment\n#\n";
99598c8c 474 }
5beb625e 475 push @OUT, "return <<'END';\n";
d73e5302 476
cf25bb62 477 for my $set (@$Table)
478 {
479 my $start = $set->[RANGE_START];
480 my $end = $set->[RANGE_END];
481 my $name = $set->[RANGE_NAME];
d73e5302 482
cf25bb62 483 if ($start == $end) {
5beb625e 484 push @OUT, sprintf "%04X\t\t%s\n", $start, $name;
cf25bb62 485 } else {
5beb625e 486 push @OUT, sprintf "%04X\t%04X\t%s\n", $start, $end, $name;
cf25bb62 487 }
488 }
d73e5302 489
5beb625e 490 push @OUT, "END\n";
491
492 WriteIfChanged($filename, @OUT);
493}
494
495## This used only for making the test script.
496## helper function
497sub IsUsable($)
498{
499 my $code = shift;
500 return 0 if $code <= 0x0000; ## don't use null
501 return 0 if $code >= $LastUnicodeCodepoint; ## keep in range
502 return 0 if ($code >= 0xD800 and $code <= 0xDFFF); ## no surrogates
503 return 0 if ($code >= 0xFDD0 and $code <= 0xFDEF); ## utf8.c says no good
504 return 0 if (($code & 0xFFFF) == 0xFFFE); ## utf8.c says no good
505 return 0 if (($code & 0xFFFF) == 0xFFFF); ## utf8.c says no good
506 return 1;
507}
508
509## Return a code point that's part of the table.
510## Returns nothing if the table is empty (or covers only surrogates).
511## This used only for making the test script.
512sub Table::ValidCode
513{
514 my $Table = shift; #self
515 for my $set (@$Table) {
516 return $set->[RANGE_END] if IsUsable($set->[RANGE_END]);
517 }
518 return ();
519}
520
521## Return a code point that's not part of the table
522## Returns nothing if the table covers all code points.
523## This used only for making the test script.
524sub Table::InvalidCode
525{
526 my $Table = shift; #self
527
528 return 0x1234 if $Table->IsEmpty();
529
530 for my $set (@$Table)
531 {
532 if (IsUsable($set->[RANGE_END] + 1))
533 {
534 return $set->[RANGE_END] + 1;
535 }
536
537 if (IsUsable($set->[RANGE_START] - 1))
538 {
539 return $set->[RANGE_START] - 1;
540 }
541 }
542 return ();
cf25bb62 543}
544
545###########################################################################
546###########################################################################
547###########################################################################
548
549
550##
551## Called like:
99598c8c 552## New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 1);
cf25bb62 553##
99598c8c 554## The args must be in that order, although the Fuzzy pair may be omitted.
cf25bb62 555##
556## This creates 'IsAll' as an alias for 'IsAny'
557##
558sub New_Alias($$$@)
559{
560 my $Type = shift; ## "Is" or "In"
561 my $Alias = shift;
99598c8c 562 my $SameAs = shift; # expecting "SameAs" -- just ignored
cf25bb62 563 my $Name = shift;
564
565 ## remaining args are optional key/val
566 my %Args = @_;
567
99598c8c 568 my $Fuzzy = delete $Args{Fuzzy};
cf25bb62 569
570 ## sanity check a few args
571 if (%Args or ($Type ne 'Is' and $Type ne 'In') or $SameAs ne 'SameAs') {
572 confess "$0: bad args to New_Alias"
d73e5302 573 }
574
5beb625e 575 $Alias = CanonicalName($Alias) if $Fuzzy;
576
577 if (not $TableInfo{$Type}->{$Name})
578 {
579 my $CName = CanonicalName($Name);
580 if ($TableInfo{$Type}->{$CName}) {
581 confess "$0: Use canonical form '$CName' instead of '$Name' for alias.";
582 } else {
12ac2576 583 confess "$0: don't have original $Type => $Name to make alias\n";
5beb625e 584 }
cf25bb62 585 }
586 if ($TableInfo{$Alias}) {
587 confess "$0: already have original $Type => $Alias; can't make alias";
d73e5302 588 }
cf25bb62 589 $AliasInfo{$Type}->{$Name} = $Alias;
99598c8c 590 if ($Fuzzy) {
cf25bb62 591 $FuzzyNames{$Type}->{$Alias} = $Name;
592 }
593
d73e5302 594}
595
d73e5302 596
cf25bb62 597## All assigned code points
99598c8c 598my $Assigned = Table->New(Is => 'Assigned',
599 Desc => "All assigned code points",
5beb625e 600 Fuzzy => 0);
d2d499f5 601
cf25bb62 602my $Name = Table->New(); ## all characters, individually by name
603my $General = Table->New(); ## all characters, grouped by category
604my %General;
605my %Cat;
d73e5302 606
cf25bb62 607##
551b6b6f 608## Process UnicodeData.txt (Categories, etc.)
cf25bb62 609##
44da8cae 610sub UnicodeData_Txt()
cf25bb62 611{
612 my $Bidi = Table->New();
613 my $Deco = Table->New();
614 my $Comb = Table->New();
615 my $Number = Table->New();
12ac2576 616 my $Mirrored = Table->New();#Is => 'Mirrored',
617 #Desc => "Mirrored in bidirectional text",
618 #Fuzzy => 0);
d73e5302 619
cf25bb62 620 my %DC;
621 my %Bidi;
12ac2576 622 my %Number;
623 $DC{can} = Table->New();
624 $DC{com} = Table->New();
cf25bb62 625
626 ## Initialize Perl-generated categories
551b6b6f 627 ## (Categories from UnicodeData.txt are auto-initialized in gencat)
44da8cae 628 $Cat{Alnum} =
629 Table->New(Is => 'Alnum', Desc => "[[:Alnum:]]", Fuzzy => 0);
630 $Cat{Alpha} =
631 Table->New(Is => 'Alpha', Desc => "[[:Alpha:]]", Fuzzy => 0);
632 $Cat{ASCII} =
633 Table->New(Is => 'ASCII', Desc => "[[:ASCII:]]", Fuzzy => 0);
634 $Cat{Blank} =
635 Table->New(Is => 'Blank', Desc => "[[:Blank:]]", Fuzzy => 0);
636 $Cat{Cntrl} =
637 Table->New(Is => 'Cntrl', Desc => "[[:Cntrl:]]", Fuzzy => 0);
638 $Cat{Digit} =
639 Table->New(Is => 'Digit', Desc => "[[:Digit:]]", Fuzzy => 0);
640 $Cat{Graph} =
641 Table->New(Is => 'Graph', Desc => "[[:Graph:]]", Fuzzy => 0);
642 $Cat{Lower} =
643 Table->New(Is => 'Lower', Desc => "[[:Lower:]]", Fuzzy => 0);
644 $Cat{Print} =
645 Table->New(Is => 'Print', Desc => "[[:Print:]]", Fuzzy => 0);
646 $Cat{Punct} =
647 Table->New(Is => 'Punct', Desc => "[[:Punct:]]", Fuzzy => 0);
648 $Cat{Space} =
649 Table->New(Is => 'Space', Desc => "[[:Space:]]", Fuzzy => 0);
650 $Cat{Title} =
651 Table->New(Is => 'Title', Desc => "[[:Title:]]", Fuzzy => 0);
652 $Cat{Upper} =
653 Table->New(Is => 'Upper', Desc => "[[:Upper:]]", Fuzzy => 0);
654 $Cat{XDigit} =
655 Table->New(Is => 'XDigit', Desc => "[[:XDigit:]]", Fuzzy => 0);
656 $Cat{Word} =
657 Table->New(Is => 'Word', Desc => "[[:Word:]]", Fuzzy => 0);
658 $Cat{SpacePerl} =
659 Table->New(Is => 'SpacePerl', Desc => '\s', Fuzzy => 0);
d73e5302 660
cf25bb62 661 my %To;
662 $To{Upper} = Table->New();
663 $To{Lower} = Table->New();
664 $To{Title} = Table->New();
665 $To{Digit} = Table->New();
666
667 sub gencat($$$$)
668 {
669 my ($name, ## Name ("LATIN CAPITAL LETTER A")
670 $cat, ## Category ("Lu", "Zp", "Nd", etc.)
671 $code, ## Code point (as an integer)
672 $op) = @_;
673
674 my $MajorCat = substr($cat, 0, 1); ## L, M, Z, S, etc
675
676 $Assigned->$op($code);
677 $Name->$op($code, $name);
678 $General->$op($code, $cat);
679
680 ## add to the sub category (e.g. "Lu", "Nd", "Cf", ..)
99598c8c 681 $Cat{$cat} ||= Table->New(Is => $cat,
682 Desc => "General Category '$cat'",
683 Fuzzy => 0);
cf25bb62 684 $Cat{$cat}->$op($code);
685
686 ## add to the major category (e.g. "L", "N", "C", ...)
99598c8c 687 $Cat{$MajorCat} ||= Table->New(Is => $MajorCat,
688 Desc => "Major Category '$MajorCat'",
689 Fuzzy => 0);
cf25bb62 690 $Cat{$MajorCat}->$op($code);
691
692 ($General{$name} ||= Table->New)->$op($code, $name);
693
694 # 005F: SPACING UNDERSCORE
d75d706f 695 $Cat{Word}->$op($code) if $cat =~ /^[LMN]|Pc/;
c65e4d19 696 $Cat{Alnum}->$op($code) if $cat =~ /^[LM]|Nd/;
cf25bb62 697 $Cat{Alpha}->$op($code) if $cat =~ /^[LM]/;
698
d75d706f 699 my $isspace =
700 ($cat =~ /Zs|Zl|Zp/ &&
701 $code != 0x200B) # 200B is ZWSP which is for line break control
702 # and therefore it is not part of "space" even while it is "Zs".
cf25bb62 703 || $code == 0x0009 # 0009: HORIZONTAL TAB
704 || $code == 0x000A # 000A: LINE FEED
705 || $code == 0x000B # 000B: VERTICAL TAB
706 || $code == 0x000C # 000C: FORM FEED
e6f58734 707 || $code == 0x000D # 000D: CARRIAGE RETURN
d75d706f 708 || $code == 0x0085 # 0085: NEL
709
710 ;
cf25bb62 711
d75d706f 712 $Cat{Space}->$op($code) if $isspace;
cf25bb62 713
d75d706f 714 $Cat{SpacePerl}->$op($code) if $isspace
715 && $code != 0x000B; # Backward compat.
cf25bb62 716
d75d706f 717 $Cat{Blank}->$op($code) if $isspace
718 && !($code == 0x000A ||
719 $code == 0x000B ||
720 $code == 0x000C ||
721 $code == 0x000D ||
722 $code == 0x0085 ||
723 $cat =~ /^Z[lp]/);
cf25bb62 724
725 $Cat{Digit}->$op($code) if $cat eq "Nd";
726 $Cat{Upper}->$op($code) if $cat eq "Lu";
727 $Cat{Lower}->$op($code) if $cat eq "Ll";
728 $Cat{Title}->$op($code) if $cat eq "Lt";
729 $Cat{ASCII}->$op($code) if $code <= 0x007F;
730 $Cat{Cntrl}->$op($code) if $cat =~ /^C/;
d75d706f 731 my $isgraph = !$isspace && $cat !~ /Cc|Cs|Cn/;
732 $Cat{Graph}->$op($code) if $isgraph;
733 $Cat{Print}->$op($code) if $isgraph || $isspace;
cf25bb62 734 $Cat{Punct}->$op($code) if $cat =~ /^P/;
735
736 $Cat{XDigit}->$op($code) if ($code >= 0x30 && $code <= 0x39) ## 0..9
737 || ($code >= 0x41 && $code <= 0x46) ## A..F
738 || ($code >= 0x61 && $code <= 0x66); ## a..f
739 }
d73e5302 740
cf25bb62 741 ## open ane read file.....
551b6b6f 742 if (not open IN, "UnicodeData.txt") {
743 die "$0: UnicodeData.txt: $!\n";
cf25bb62 744 }
d73e5302 745
a3a8c5f0 746 ##
747 ## For building \p{_CombAbove} and \p{_CanonDCIJ}
748 ##
749 my %_Above_HexCodes; ## Hexcodes for chars with $comb == 230 ("ABOVE")
750
751 my %CodeToDeco; ## Maps code to decomp. list for chars with first
752 ## decomp. char an "i" or "j" (for \p{_CanonDCIJ})
753
754 ## This is filled in as we go....
99598c8c 755 my $CombAbove = Table->New(Is => '_CombAbove',
756 Desc => '(for internal casefolding use)',
757 Fuzzy => 0);
a3a8c5f0 758
cf25bb62 759 while (<IN>)
760 {
761 next unless /^[0-9A-Fa-f]+;/;
762 s/\s+$//;
763
764 my ($hexcode, ## code point in hex (e.g. "0041")
765 $name, ## character name (e.g. "LATIN CAPITAL LETTER A")
766 $cat, ## category (e.g. "Lu")
767 $comb, ## Canonical combining class (e.t. "230")
768 $bidi, ## directional category (e.g. "L")
769 $deco, ## decomposition mapping
770 $decimal, ## decimal digit value
771 $digit, ## digit value
772 $number, ## numeric value
773 $mirrored, ## mirrored
774 $unicode10, ## name in Unicode 1.0
775 $comment, ## comment field
776 $upper, ## uppercase mapping
777 $lower, ## lowercase mapping
778 $title, ## titlecase mapping
779 ) = split(/\s*;\s*/);
780
2eb5892f 781 # Note that in Unicode 3.2 there will be names like
782 # LINE FEED (LF), which probably means that \N{} needs
783 # to cope also with LINE FEED and LF.
784 $name = $unicode10 if $name eq '<control>' && $unicode10 ne '';
785
cf25bb62 786 my $code = hex($hexcode);
787
a3a8c5f0 788 if ($comb and $comb == 230) {
789 $CombAbove->Append($code);
790 $_Above_HexCodes{$hexcode} = 1;
791 }
792
793 ## Used in building \p{_CanonDCIJ}
794 if ($deco and $deco =~ m/^006[9A]\b/) {
795 $CodeToDeco{$code} = $deco;
796 }
797
cf25bb62 798 ##
799 ## There are a few pairs of lines like:
800 ## AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
801 ## D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
802 ## that define ranges.
803 ##
804 if ($name =~ /^<(.+), (First|Last)>$/)
805 {
806 $name = $1;
807 gencat($name, $cat, $code, $2 eq 'First' ? 'Append' : 'Extend');
99598c8c 808 #New_Prop(In => $name, $General{$name}, Fuzzy => 1);
cf25bb62 809 }
810 else
811 {
812 ## normal (single-character) lines
813 gencat($name, $cat, $code, 'Append');
814
815 # No Append() here since since several codes may map into one.
816 $To{Upper}->RawAppendRange($code, $code, $upper) if $upper;
817 $To{Lower}->RawAppendRange($code, $code, $lower) if $lower;
818 $To{Title}->RawAppendRange($code, $code, $title) if $title;
819 $To{Digit}->Append($code, $decimal) if length $decimal;
820
821 $Bidi->Append($code, $bidi);
822 $Comb->Append($code, $comb) if $comb;
823 $Number->Append($code, $number) if length $number;
824
12ac2576 825 length($decimal) and ($Number{De} ||= Table->New())->Append($code)
826 or
827 length($digit) and ($Number{Di} ||= Table->New())->Append($code)
828 or
829 length($number) and ($Number{Nu} ||= Table->New())->Append($code);
830
cf25bb62 831 $Mirrored->Append($code) if $mirrored eq "Y";
832
12ac2576 833 $Bidi{$bidi} ||= Table->New();#Is => "bt/$bidi",
834 #Desc => "Bi-directional category '$bidi'",
835 #Fuzzy => 0);
cf25bb62 836 $Bidi{$bidi}->Append($code);
837
838 if ($deco)
839 {
840 $Deco->Append($code, $deco);
841 if ($deco =~/^<(\w+)>/)
842 {
12ac2576 843 my $dshort = $PVA_reverse{dt}{ucfirst lc $1};
844 $DC{com}->Append($code);
cf25bb62 845
12ac2576 846 $DC{$dshort} ||= Table->New();
847 $DC{$dshort}->Append($code);
cf25bb62 848 }
849 else
850 {
12ac2576 851 $DC{can}->Append($code);
cf25bb62 852 }
853 }
854 }
855 }
856 close IN;
d2d499f5 857
cf25bb62 858 ##
859 ## Tidy up a few special cases....
860 ##
d73e5302 861
cf25bb62 862 $Cat{Cn} = $Assigned->Invert; ## Cn is everything that doesn't exist
99598c8c 863 New_Prop(Is => 'Cn',
864 $Cat{Cn},
865 Desc => "General Category 'Cn' [not functional in Perl]",
866 Fuzzy => 0);
d73e5302 867
cf25bb62 868 ## Unassigned is the same as 'Cn'
5beb625e 869 New_Alias(Is => 'Unassigned', SameAs => 'Cn', Fuzzy => 0);
d73e5302 870
cf25bb62 871 $Cat{C}->Replace($Cat{C}->Merge($Cat{Cn})); ## Now merge in Cn into C
d73e5302 872
d73e5302 873
12ac2576 874 # LC is Ll, Lu, and Lt.
875 # (used to be L& or L_, but PropValueAliases.txt defines it as LC)
876 New_Prop(Is => 'LC',
cf25bb62 877 Table->Merge(@Cat{qw[Ll Lu Lt]}),
99598c8c 878 Desc => '[\p{Ll}\p{Lu}\p{Lt}]',
879 Fuzzy => 0);
d73e5302 880
cf25bb62 881 ## Any and All are all code points.
99598c8c 882 my $Any = Table->New(Is => 'Any',
883 Desc => sprintf("[\\x{0000}-\\x{%X}]",
884 $LastUnicodeCodepoint),
5beb625e 885 Fuzzy => 0);
cf25bb62 886 $Any->RawAppendRange(0, $LastUnicodeCodepoint);
d73e5302 887
5beb625e 888 New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 0);
d73e5302 889
a3a8c5f0 890 ##
891 ## Build special properties for Perl's internal case-folding needs:
892 ## \p{_CaseIgnorable}
893 ## \p{_CanonDCIJ}
894 ## \p{_CombAbove}
895 ## _CombAbove was built above. Others are built here....
896 ##
897
898 ## \p{_CaseIgnorable} is [\p{Mn}\0x00AD\x2010]
899 New_Prop(Is => '_CaseIgnorable',
900 Table->Merge($Cat{Mn},
901 0x00AD, #SOFT HYPHEN
902 0x2010), #HYPHEN
99598c8c 903 Desc => '(for internal casefolding use)',
904 Fuzzy => 0);
a3a8c5f0 905
906
907 ## \p{_CanonDCIJ} is fairly complex...
99598c8c 908 my $CanonCDIJ = Table->New(Is => '_CanonDCIJ',
909 Desc => '(for internal casefolding use)',
910 Fuzzy => 0);
a3a8c5f0 911 ## It contains the ASCII 'i' and 'j'....
912 $CanonCDIJ->Append(0x0069); # ASCII ord("i")
913 $CanonCDIJ->Append(0x006A); # ASCII ord("j")
914 ## ...and any character with a decomposition that starts with either of
915 ## those code points, but only if the decomposition does not have any
916 ## combining character with the "ABOVE" canonical combining class.
917 for my $code (sort { $a <=> $b} keys %CodeToDeco)
918 {
919 ## Need to ensure that all decomposition characters do not have
920 ## a %HexCodeToComb in %AboveCombClasses.
921 my $want = 1;
922 for my $deco_hexcode (split / /, $CodeToDeco{$code})
923 {
924 if (exists $_Above_HexCodes{$deco_hexcode}) {
925 ## one of the decmposition chars has an ABOVE combination
926 ## class, so we're not interested in this one
927 $want = 0;
928 last;
929 }
930 }
931 if ($want) {
932 $CanonCDIJ->Append($code);
933 }
934 }
935
936
d73e5302 937
cf25bb62 938 ##
939 ## Now dump the files.
940 ##
941 $Name->Write("Name.pl");
12ac2576 942
943 # $Bidi->Write("Bidirectional.pl");
944 mkdir("lib/bc", 0755);
945 for (keys %Bidi) {
946 $Bidi{$_}->Write(
947 "lib/bc/$_.pl",
948 "BidiClass category '$PropValueAlias{bc}{$_}'"
949 );
950 }
951
cf25bb62 952 $Comb->Write("CombiningClass.pl");
12ac2576 953 mkdir("lib/ccc", 0755);
954 for (keys %{ $PropValueAlias{ccc} }) {
955 my ($code, $name) = @{ $PropValueAlias{ccc}{$_} };
956 (my $c = Table->New())->Append($code);
957 $c->Write(
958 "lib/ccc/$_.pl",
959 "CombiningClass category '$name'"
960 );
961 }
962
cf25bb62 963 $Deco->Write("Decomposition.pl");
12ac2576 964 mkdir("lib/dt", 0755);
965 for (keys %DC) {
966 $DC{$_}->Write(
967 "lib/dt/$_.pl",
968 "DecompositionType category '$PropValueAlias{dt}{$_}'"
969 );
970 }
971
972 # $Number->Write("Number.pl");
973 mkdir("lib/nt", 0755);
974 for (keys %Number) {
975 $Number{$_}->Write(
976 "lib/nt/$_.pl",
977 "NumericType category '$PropValueAlias{nt}{$_}'"
978 );
979 }
980
981 # $General->Write("Category.pl");
cf25bb62 982
983 for my $to (sort keys %To) {
984 $To{$to}->Write("To/$to.pl");
d73e5302 985 }
12ac2576 986
987 for (keys %{ $PropValueAlias{gc} }) {
988 New_Alias(Is => $PropValueAlias{gc}{$_}, SameAs => $_, Fuzzy => 1);
989 }
d73e5302 990}
991
cf25bb62 992##
551b6b6f 993## Process LineBreak.txt
cf25bb62 994##
551b6b6f 995sub LineBreak_Txt()
cf25bb62 996{
551b6b6f 997 if (not open IN, "LineBreak.txt") {
998 die "$0: LineBreak.txt: $!\n";
cf25bb62 999 }
d73e5302 1000
cf25bb62 1001 my $Lbrk = Table->New();
1002 my %Lbrk;
d73e5302 1003
cf25bb62 1004 while (<IN>)
1005 {
1006 next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(\w+)/;
d73e5302 1007
cf25bb62 1008 my ($first, $last, $lbrk) = (hex($1), hex($2||""), $3);
d73e5302 1009
cf25bb62 1010 $Lbrk->Append($first, $lbrk);
d73e5302 1011
12ac2576 1012 $Lbrk{$lbrk} ||= Table->New();
cf25bb62 1013 $Lbrk{$lbrk}->Append($first);
d73e5302 1014
cf25bb62 1015 if ($last) {
1016 $Lbrk->Extend($last);
1017 $Lbrk{$lbrk}->Extend($last);
d73e5302 1018 }
1019 }
cf25bb62 1020 close IN;
d73e5302 1021
12ac2576 1022 # $Lbrk->Write("Lbrk.pl");
1023
1024 mkdir("lib/lb", 0755);
1025
1026 for (keys %Lbrk) {
1027 $Lbrk{$_}->Write(
1028 "lib/lb/$_.pl",
1029 "Linebreak category '$PropValueAlias{lb}{$_}'"
1030 );
1031 }
cf25bb62 1032}
d73e5302 1033
cf25bb62 1034##
551b6b6f 1035## Process ArabicShaping.txt.
cf25bb62 1036##
551b6b6f 1037sub ArabicShaping_txt()
cf25bb62 1038{
551b6b6f 1039 if (not open IN, "ArabicShaping.txt") {
1040 die "$0: ArabicShaping.txt: $!\n";
cf25bb62 1041 }
d73e5302 1042
cf25bb62 1043 my $ArabLink = Table->New();
1044 my $ArabLinkGroup = Table->New();
d73e5302 1045
12ac2576 1046 my %JoinType;
1047
cf25bb62 1048 while (<IN>)
1049 {
1050 next unless /^[0-9A-Fa-f]+;/;
1051 s/\s+$//;
d73e5302 1052
cf25bb62 1053 my ($hexcode, $name, $link, $linkgroup) = split(/\s*;\s*/);
1054 my $code = hex($hexcode);
1055 $ArabLink->Append($code, $link);
1056 $ArabLinkGroup->Append($code, $linkgroup);
12ac2576 1057
1058 $JoinType{$link} ||= Table->New(Is => "JoinType$link");
1059 $JoinType{$link}->Append($code);
d73e5302 1060 }
cf25bb62 1061 close IN;
1062
12ac2576 1063 # $ArabLink->Write("ArabLink.pl");
1064 # $ArabLinkGroup->Write("ArabLnkGrp.pl");
1065
1066 mkdir("lib/jt", 0755);
1067
1068 for (keys %JoinType) {
1069 $JoinType{$_}->Write(
1070 "lib/jt/$_.pl",
1071 "JoiningType category '$PropValueAlias{jt}{$_}'"
1072 );
1073 }
1074}
1075
1076##
1077## Process EastAsianWidth.txt.
1078##
1079sub EastAsianWidth_txt()
1080{
1081 if (not open IN, "EastAsianWidth.txt") {
1082 die "$0: EastAsianWidth.txt: $!\n";
1083 }
1084
1085 my %EAW;
1086
1087 while (<IN>)
1088 {
1089 next unless /^[0-9A-Fa-f]+;/;
1090 s/#.*//;
1091 s/\s+$//;
1092
1093 my ($hexcode, $pv) = split(/\s*;\s*/);
1094 my $code = hex($hexcode);
1095 $EAW{$pv} ||= Table->New(Is => "EastAsianWidth$pv");
1096 $EAW{$pv}->Append($code);
1097 }
1098 close IN;
1099
1100 mkdir("lib/ea", 0755);
1101
1102 for (keys %EAW) {
1103 $EAW{$_}->Write(
1104 "lib/ea/$_.pl",
1105 "EastAsianWidth category '$PropValueAlias{ea}{$_}'"
1106 );
1107 }
1108}
1109
1110##
1111## Process HangulSyllableType.txt.
1112##
1113sub HangulSyllableType_txt()
1114{
1115 if (not open IN, "HangulSyllableType.txt") {
1116 die "$0: HangulSyllableType.txt: $!\n";
1117 }
1118
1119 my %HST;
1120
1121 while (<IN>)
1122 {
1123 next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(\w+)/;
1124 my ($first, $last, $pv) = (hex($1), hex($2||""), $3);
1125
1126 $HST{$pv} ||= Table->New(Is => "HangulSyllableType$pv");
1127 $HST{$pv}->Append($first);
1128
1129 if ($last) { $HST{$pv}->Extend($last) }
1130 }
1131 close IN;
1132
1133 mkdir("lib/hst", 0755);
1134
1135 for (keys %HST) {
1136 $HST{$_}->Write(
1137 "lib/hst/$_.pl",
1138 "HangulSyllableType category '$PropValueAlias{hst}{$_}'"
1139 );
1140 }
d73e5302 1141}
1142
cf25bb62 1143##
1144## Process Jamo.txt.
1145##
1146sub Jamo_txt()
1147{
1148 if (not open IN, "Jamo.txt") {
1149 die "$0: Jamo.txt: $!\n";
1150 }
1151 my $Short = Table->New();
d73e5302 1152
cf25bb62 1153 while (<IN>)
1154 {
1155 next unless /^([0-9A-Fa-f]+)\s*;\s*(\w*)/;
1156 my ($code, $short) = (hex($1), $2);
d73e5302 1157
cf25bb62 1158 $Short->Append($code, $short);
d73e5302 1159 }
cf25bb62 1160 close IN;
12ac2576 1161 # $Short->Write("JamoShort.pl");
d73e5302 1162}
1163
cf25bb62 1164##
1165## Process Scripts.txt.
1166##
1167sub Scripts_txt()
1168{
1169 my @ScriptInfo;
d73e5302 1170
cf25bb62 1171 if (not open(IN, "Scripts.txt")) {
1172 die "$0: Scripts.txt: $!\n";
1173 }
1174 while (<IN>) {
1175 next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
d73e5302 1176
cf25bb62 1177 # Wait until all the scripts have been read since
1178 # they are not listed in numeric order.
1179 push @ScriptInfo, [ hex($1), hex($2||""), $3 ];
1180 }
1181 close IN;
d73e5302 1182
cf25bb62 1183 # Now append the scripts properties in their code point order.
d73e5302 1184
cf25bb62 1185 my %Script;
1186 my $Scripts = Table->New();
d73e5302 1187
cf25bb62 1188 for my $script (sort { $a->[0] <=> $b->[0] } @ScriptInfo)
1189 {
1190 my ($first, $last, $name) = @$script;
1191 $Scripts->Append($first, $name);
d73e5302 1192
99598c8c 1193 $Script{$name} ||= Table->New(Is => $name,
1194 Desc => "Script '$name'",
1195 Fuzzy => 1);
cf25bb62 1196 $Script{$name}->Append($first, $name);
d73e5302 1197
cf25bb62 1198 if ($last) {
1199 $Scripts->Extend($last);
1200 $Script{$name}->Extend($last);
1201 }
1202 }
d73e5302 1203
12ac2576 1204 # $Scripts->Write("Scripts.pl");
d73e5302 1205
cf25bb62 1206 ## Common is everything not explicitly assigned to a Script
1207 ##
1208 ## ***shouldn't this be intersected with \p{Assigned}? ******
1209 ##
99598c8c 1210 New_Prop(Is => 'Common',
1211 $Scripts->Invert,
1212 Desc => 'Pseudo-Script of codepoints not in other Unicode scripts',
1213 Fuzzy => 1);
cf25bb62 1214}
d73e5302 1215
cf25bb62 1216##
1217## Given a name like "Close Punctuation", return a regex (that when applied
1218## with /i) matches any valid form of that name (e.g. "ClosePunctuation",
1219## "Close-Punctuation", etc.)
1220##
1221## Accept any space, dash, or underbar where in the official name there is
1222## space or a dash (or underbar, but there never is).
1223##
1224##
1225sub NameToRegex($)
1226{
1227 my $Name = shift;
1228 $Name =~ s/[- _]/(?:[-_]|\\s+)?/g;
1229 return $Name;
1230}
d73e5302 1231
cf25bb62 1232##
1233## Process Blocks.txt.
1234##
1235sub Blocks_txt()
1236{
1237 my $Blocks = Table->New();
1238 my %Blocks;
d73e5302 1239
cf25bb62 1240 if (not open IN, "Blocks.txt") {
1241 die "$0: Blocks.txt: $!\n";
1242 }
d73e5302 1243
cf25bb62 1244 while (<IN>)
1245 {
1246 #next if not /Private Use$/;
1247 next if not /^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.+?)\s*$/;
d73e5302 1248
cf25bb62 1249 my ($first, $last, $name) = (hex($1), hex($2), $3);
d73e5302 1250
cf25bb62 1251 $Blocks->Append($first, $name);
76ccdbe2 1252
99598c8c 1253 $Blocks{$name} ||= Table->New(In => $name,
1254 Desc => "Block '$name'",
1255 Fuzzy => 1);
cf25bb62 1256 $Blocks{$name}->Append($first, $name);
76ccdbe2 1257
cf25bb62 1258 if ($last and $last != $first) {
1259 $Blocks->Extend($last);
1260 $Blocks{$name}->Extend($last);
d73e5302 1261 }
d73e5302 1262 }
cf25bb62 1263 close IN;
1264
12ac2576 1265 # $Blocks->Write("Blocks.pl");
d73e5302 1266}
1267
cf25bb62 1268##
1269## Read in the PropList.txt. It contains extended properties not
551b6b6f 1270## listed in the UnicodeData.txt, such as 'Other_Alphabetic':
cf25bb62 1271## alphabetic but not of the general category L; many modifiers
1272## belong to this extended property category: while they are not
1273## alphabets, they are alphabetic in nature.
1274##
1275sub PropList_txt()
1276{
1277 my @PropInfo;
1278
1279 if (not open IN, "PropList.txt") {
1280 die "$0: PropList.txt: $!\n";
1281 }
d73e5302 1282
cf25bb62 1283 while (<IN>)
1284 {
1285 next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
d73e5302 1286
cf25bb62 1287 # Wait until all the extended properties have been read since
1288 # they are not listed in numeric order.
1289 push @PropInfo, [ hex($1), hex($2||""), $3 ];
1290 }
1291 close IN;
71d929cb 1292
cf25bb62 1293 # Now append the extended properties in their code point order.
1294 my $Props = Table->New();
1295 my %Prop;
71d929cb 1296
cf25bb62 1297 for my $prop (sort { $a->[0] <=> $b->[0] } @PropInfo)
1298 {
1299 my ($first, $last, $name) = @$prop;
1300 $Props->Append($first, $name);
71d929cb 1301
99598c8c 1302 $Prop{$name} ||= Table->New(Is => $name,
1303 Desc => "Extended property '$name'",
1304 Fuzzy => 1);
cf25bb62 1305 $Prop{$name}->Append($first, $name);
71d929cb 1306
cf25bb62 1307 if ($last) {
1308 $Props->Extend($last);
1309 $Prop{$name}->Extend($last);
1310 }
71d929cb 1311 }
d73e5302 1312
12ac2576 1313 for (keys %Prop) {
1314 (my $file = $PA_reverse{$_}) =~ tr/_//d;
8e4b7420 1315 # XXX I'm assuming that the names from %Prop don't suffer 8.3 clashes.
1316 $BaseNames{lc $file}++;
12ac2576 1317 $Prop{$_}->Write(
1318 "lib/gc_sc/$file.pl",
1319 "Binary property '$_'"
1320 );
1321 }
1322
cf25bb62 1323 # Alphabetic is L and Other_Alphabetic.
99598c8c 1324 New_Prop(Is => 'Alphabetic',
cf25bb62 1325 Table->Merge($Cat{L}, $Prop{Other_Alphabetic}),
99598c8c 1326 Desc => '[\p{L}\p{OtherAlphabetic}]', # use canonical names here
1327 Fuzzy => 1);
cf25bb62 1328
1329 # Lowercase is Ll and Other_Lowercase.
99598c8c 1330 New_Prop(Is => 'Lowercase',
cf25bb62 1331 Table->Merge($Cat{Ll}, $Prop{Other_Lowercase}),
99598c8c 1332 Desc => '[\p{Ll}\p{OtherLowercase}]', # use canonical names here
1333 Fuzzy => 1);
cf25bb62 1334
1335 # Uppercase is Lu and Other_Uppercase.
1336 New_Prop(Is => 'Uppercase',
1337 Table->Merge($Cat{Lu}, $Prop{Other_Uppercase}),
99598c8c 1338 Desc => '[\p{Lu}\p{Other_Uppercase}]', # use canonical names here
1339 Fuzzy => 1);
cf25bb62 1340
1341 # Math is Sm and Other_Math.
1342 New_Prop(Is => 'Math',
1343 Table->Merge($Cat{Sm}, $Prop{Other_Math}),
99598c8c 1344 Desc => '[\p{Sm}\p{OtherMath}]', # use canonical names here
1345 Fuzzy => 1);
cf25bb62 1346
1347 # ID_Start is Ll, Lu, Lt, Lm, Lo, and Nl.
1348 New_Prop(Is => 'ID_Start',
1349 Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl]}),
99598c8c 1350 Desc => '[\p{Ll}\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]',
1351 Fuzzy => 1);
cf25bb62 1352
1353 # ID_Continue is ID_Start, Mn, Mc, Nd, and Pc.
1354 New_Prop(Is => 'ID_Continue',
1355 Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl Mn Mc Nd Pc ]}),
99598c8c 1356 Desc => '[\p{ID_Start}\p{Mn}\p{Mc}\p{Nd}\p{Pc}]',
1357 Fuzzy => 1);
d73e5302 1358}
1359
5beb625e 1360
1361##
1362## These are used in:
1363## MakePropTestScript()
1364## WriteAllMappings()
1365## for making the test script.
1366##
1367my %FuzzyNameToTest;
1368my %ExactNameToTest;
1369
1370
1371## This used only for making the test script
1372sub GenTests($$$$)
1373{
1374 my $FH = shift;
1375 my $Prop = shift;
1376 my $MatchCode = shift;
1377 my $FailCode = shift;
1378
1379 if (defined $MatchCode) {
1380 printf $FH qq/Expect(1, "\\x{%04X}", '\\p{$Prop}' );\n/, $MatchCode;
1381 printf $FH qq/Expect(0, "\\x{%04X}", '\\p{^$Prop}');\n/, $MatchCode;
1382 printf $FH qq/Expect(0, "\\x{%04X}", '\\P{$Prop}' );\n/, $MatchCode;
1383 printf $FH qq/Expect(1, "\\x{%04X}", '\\P{^$Prop}');\n/, $MatchCode;
1384 }
1385 if (defined $FailCode) {
1386 printf $FH qq/Expect(0, "\\x{%04X}", '\\p{$Prop}' );\n/, $FailCode;
1387 printf $FH qq/Expect(1, "\\x{%04X}", '\\p{^$Prop}');\n/, $FailCode;
1388 printf $FH qq/Expect(1, "\\x{%04X}", '\\P{$Prop}' );\n/, $FailCode;
1389 printf $FH qq/Expect(0, "\\x{%04X}", '\\P{^$Prop}');\n/, $FailCode;
1390 }
1391}
1392
1393## This used only for making the test script
1394sub ExpectError($$)
1395{
1396 my $FH = shift;
1397 my $prop = shift;
1398
1399 print $FH qq/Error('\\p{$prop}');\n/;
1400 print $FH qq/Error('\\P{$prop}');\n/;
1401}
1402
1403## This used only for making the test script
1404my @GoodSeps = (
1405 " ",
1406 "-",
1407 " \t ",
1408 "",
1409 "",
1410 "_",
1411 );
1412my @BadSeps = (
1413 "--",
1414 "__",
1415 " _",
1416 "/"
1417 );
1418
1419## This used only for making the test script
1420sub RandomlyFuzzifyName($;$)
1421{
1422 my $Name = shift;
1423 my $WantError = shift; ## if true, make an error
1424
1425 my @parts;
1426 for my $part (split /[-\s_]+/, $Name)
1427 {
1428 if (@parts) {
1429 if ($WantError and rand() < 0.3) {
1430 push @parts, $BadSeps[rand(@BadSeps)];
1431 $WantError = 0;
1432 } else {
1433 push @parts, $GoodSeps[rand(@GoodSeps)];
1434 }
1435 }
1436 my $switch = int rand(4);
1437 if ($switch == 0) {
1438 push @parts, uc $part;
1439 } elsif ($switch == 1) {
1440 push @parts, lc $part;
1441 } elsif ($switch == 2) {
1442 push @parts, ucfirst $part;
1443 } else {
1444 push @parts, $part;
1445 }
1446 }
1447 my $new = join('', @parts);
1448
1449 if ($WantError) {
1450 if (rand() >= 0.5) {
1451 $new .= $BadSeps[rand(@BadSeps)];
1452 } else {
1453 $new = $BadSeps[rand(@BadSeps)] . $new;
1454 }
1455 }
1456 return $new;
1457}
1458
1459## This used only for making the test script
1460sub MakePropTestScript()
1461{
1462 ## this written directly -- it's huge.
1463 if (not open OUT, ">TestProp.pl") {
1464 die "$0: TestProp.pl: $!\n";
1465 }
1466 print OUT <DATA>;
1467
1468 while (my ($Name, $Table) = each %ExactNameToTest)
1469 {
1470 GenTests(*OUT, $Name, $Table->ValidCode, $Table->InvalidCode);
1471 ExpectError(*OUT, uc $Name) if uc $Name ne $Name;
1472 ExpectError(*OUT, lc $Name) if lc $Name ne $Name;
1473 }
1474
1475
1476 while (my ($Name, $Table) = each %FuzzyNameToTest)
1477 {
1478 my $Orig = $CanonicalToOrig{$Name};
1479 my %Names = (
1480 $Name => 1,
1481 $Orig => 1,
1482 RandomlyFuzzifyName($Orig) => 1
1483 );
1484
1485 for my $N (keys %Names) {
1486 GenTests(*OUT, $N, $Table->ValidCode, $Table->InvalidCode);
1487 }
1488
1489 ExpectError(*OUT, RandomlyFuzzifyName($Orig, 'ERROR'));
1490 }
1491
1492 print OUT "Finished();\n";
1493 close OUT;
1494}
1495
1496
1497##
1498## These are used only in:
1499## RegisterFileForName()
1500## WriteAllMappings()
1501##
1502my %Exact; ## will become %utf8::Exact;
1503my %Canonical; ## will become %utf8::Canonical;
1504my %CaComment; ## Comment for %Canonical entry of same key
1505
1506##
1507## Given info about a name and a datafile that it should be associated with,
1508## register that assocation in %Exact and %Canonical.
1509sub RegisterFileForName($$$$)
1510{
1511 my $Type = shift;
1512 my $Name = shift;
1513 my $IsFuzzy = shift;
1514 my $filename = shift;
1515
1516 ##
1517 ## Now in details for the mapping. $Type eq 'Is' has the
1518 ## Is removed, as it will be removed in utf8_heavy when this
1519 ## data is being checked. In keeps its "In", but a second
1520 ## sans-In record is written if it doesn't conflict with
1521 ## anything already there.
1522 ##
1523 if (not $IsFuzzy)
1524 {
1525 if ($Type eq 'Is') {
1526 die "oops[$Name]" if $Exact{$Name};
1527 $Exact{$Name} = $filename;
1528 } else {
1529 die "oops[$Type$Name]" if $Exact{"$Type$Name"};
1530 $Exact{"$Type$Name"} = $filename;
1531 $Exact{$Name} = $filename if not $Exact{$Name};
1532 }
1533 }
1534 else
1535 {
1536 my $CName = lc $Name;
1537 if ($Type eq 'Is') {
1538 die "oops[$CName]" if $Canonical{$CName};
1539 $Canonical{$CName} = $filename;
1540 $CaComment{$CName} = $Name if $Name =~ tr/A-Z// >= 2;
1541 } else {
1542 die "oops[$Type$CName]" if $Canonical{lc "$Type$CName"};
1543 $Canonical{lc "$Type$CName"} = $filename;
1544 $CaComment{lc "$Type$CName"} = "$Type$Name";
1545 if (not $Canonical{$CName}) {
1546 $Canonical{$CName} = $filename;
1547 $CaComment{$CName} = "$Type$Name";
1548 }
1549 }
1550 }
1551}
1552
cf25bb62 1553##
1554## Writes the info accumulated in
1555##
1556## %TableInfo;
1557## %FuzzyNames;
1558## %AliasInfo;
1559##
1560##
1561sub WriteAllMappings()
1562{
99598c8c 1563 my @MAP;
1564
5beb625e 1565 ## 'Is' *MUST* come first, so its names have precidence over 'In's
1566 for my $Type ('Is', 'In')
1567 {
1568 my %RawNameToFile; ## a per-$Type cache
cf25bb62 1569
5beb625e 1570 for my $Name (sort {length $a <=> length $b} keys %{$TableInfo{$Type}})
cf25bb62 1571 {
5beb625e 1572 ## Note: $Name is already canonical
99598c8c 1573 my $Table = $TableInfo{$Type}->{$Name};
5beb625e 1574 my $IsFuzzy = $FuzzyNames{$Type}->{$Name};
99598c8c 1575
1576 ## Need an 8.3 safe filename (which means "an 8 safe" $filename)
5beb625e 1577 my $filename;
cf25bb62 1578 {
5beb625e 1579 ## 'Is' items lose 'Is' from the basename.
12ac2576 1580 $filename = $Type eq 'Is' ?
1581 ($PVA_reverse{sc}{$Name} || $Name) :
1582 "$Type$Name";
5beb625e 1583
1584 $filename =~ s/[^\w_]+/_/g; # "L&" -> "L_"
1585 substr($filename, 8) = '' if length($filename) > 8;
1586
1587 ##
1588 ## Make sure the basename doesn't conflict with something we
1589 ## might have already written. If we have, say,
1590 ## InGreekExtended1
1591 ## InGreekExtended2
1592 ## they become
1593 ## InGreekE
1594 ## InGreek2
1595 ##
1596 while (my $num = $BaseNames{lc $filename}++)
1597 {
1598 $num++; ## so basenames with numbers start with '2', which
1599 ## just looks more natural.
1600 ## Want to append $num, but if it'll make the basename longer
1601 ## than 8 characters, pre-truncate $filename so that the result
1602 ## is acceptable.
1603 my $delta = length($filename) + length($num) - 8;
1604 if ($delta > 0) {
1605 substr($filename, -$delta) = $num;
1606 } else {
1607 $filename .= $num;
1608 }
99598c8c 1609 }
5beb625e 1610 };
99598c8c 1611
1612 ##
1613 ## Construct a nice comment to add to the file, and build data
1614 ## for the "./Properties" file along the way.
1615 ##
1616 my $Comment;
1617 {
1618 my $Desc = $TableDesc{$Type}->{$Name} || "";
1619 ## get list of names this table is reference by
1620 my @Supported = $Name;
1621 while (my ($Orig, $Alias) = each %{ $AliasInfo{$Type} })
1622 {
1623 if ($Orig eq $Name) {
1624 push @Supported, $Alias;
1625 }
1626 }
1627
1628 my $TypeToShow = $Type eq 'Is' ? "" : $Type;
1629 my $OrigProp;
1630
1631 $Comment = "This file supports:\n";
1632 for my $N (@Supported)
1633 {
1634 my $IsFuzzy = $FuzzyNames{$Type}->{$N};
5beb625e 1635 my $Prop = "\\p{$TypeToShow$Name}";
99598c8c 1636 $OrigProp = $Prop if not $OrigProp; #cache for aliases
1637 if ($IsFuzzy) {
1638 $Comment .= "\t$Prop (and fuzzy permutations)\n";
1639 } else {
1640 $Comment .= "\t$Prop\n";
1641 }
1642 my $MyDesc = ($N eq $Name) ? $Desc : "Alias for $OrigProp ($Desc)";
1643
1644 push @MAP, sprintf("%s %-42s %s\n",
1645 $IsFuzzy ? '*' : ' ', $Prop, $MyDesc);
1646 }
1647 if ($Desc) {
1648 $Comment .= "\nMeaning: $Desc\n";
1649 }
1650
1651 }
cf25bb62 1652 ##
1653 ## Okay, write the file...
1654 ##
12ac2576 1655 $Table->Write("lib/gc_sc/$filename.pl", $Comment);
99598c8c 1656
5beb625e 1657 ## and register it
1658 $RawNameToFile{$Name} = $filename;
1659 RegisterFileForName($Type => $Name, $IsFuzzy, $filename);
cf25bb62 1660
5beb625e 1661 if ($IsFuzzy)
1662 {
1663 my $CName = CanonicalName($Type . '_'. $Name);
1664 $FuzzyNameToTest{$Name} = $Table if !$FuzzyNameToTest{$Name};
1665 $FuzzyNameToTest{$CName} = $Table if !$FuzzyNameToTest{$CName};
1666 } else {
1667 $ExactNameToTest{$Name} = $Table;
cf25bb62 1668 }
1669
cf25bb62 1670 }
1671
5beb625e 1672 ## Register aliase info
1673 for my $Name (sort {length $a <=> length $b} keys %{$AliasInfo{$Type}})
cf25bb62 1674 {
5beb625e 1675 my $Alias = $AliasInfo{$Type}->{$Name};
1676 my $IsFuzzy = $FuzzyNames{$Type}->{$Alias};
1677 my $filename = $RawNameToFile{$Name};
1678 die "oops [$Alias]->[$Name]" if not $filename;
1679 RegisterFileForName($Type => $Alias, $IsFuzzy, $filename);
1680
1681 my $Table = $TableInfo{$Type}->{$Name};
1682 die "oops" if not $Table;
1683 if ($IsFuzzy)
1684 {
1685 my $CName = CanonicalName($Type .'_'. $Alias);
1686 $FuzzyNameToTest{$Alias} = $Table if !$FuzzyNameToTest{$Alias};
1687 $FuzzyNameToTest{$CName} = $Table if !$FuzzyNameToTest{$CName};
1688 } else {
1689 $ExactNameToTest{$Alias} = $Table;
1690 }
cf25bb62 1691 }
5beb625e 1692 }
cf25bb62 1693
5beb625e 1694 ##
1695 ## Write out the property list
1696 ##
1697 {
1698 my @OUT = (
1699 "##\n",
1700 "## This file created by $0\n",
1701 "## List of built-in \\p{...}/\\P{...} properties.\n",
1702 "##\n",
1703 "## '*' means name may be 'fuzzy'\n",
1704 "##\n\n",
1705 sort { substr($a,2) cmp substr($b, 2) } @MAP,
1706 );
1707 WriteIfChanged('Properties', @OUT);
1708 }
cf25bb62 1709
5beb625e 1710 use Text::Tabs (); ## using this makes the files about half the size
1711
1712 ## Write Exact.pl
1713 {
1714 my @OUT = (
1715 $HEADER,
1716 "##\n",
1717 "## Data in this file used by ../utf8_heavy.pl\n",
1718 "##\n\n",
12ac2576 1719 "## Mapping from name to filename in ./lib/gc_sc\n",
5beb625e 1720 "%utf8::Exact = (\n",
1721 );
cf25bb62 1722
12ac2576 1723 $Exact{InGreek} = 'InGreekA'; # this is evil kludge
cf25bb62 1724 for my $Name (sort keys %Exact)
1725 {
1726 my $File = $Exact{$Name};
5beb625e 1727 $Name = $Name =~ m/\W/ ? qq/'$Name'/ : " $Name ";
1728 my $Text = sprintf("%-15s => %s,\n", $Name, qq/'$File'/);
1729 push @OUT, Text::Tabs::unexpand($Text);
cf25bb62 1730 }
5beb625e 1731 push @OUT, ");\n1;\n";
1732
1733 WriteIfChanged('Exact.pl', @OUT);
1734 }
cf25bb62 1735
5beb625e 1736 ## Write Canonical.pl
1737 {
1738 my @OUT = (
1739 $HEADER,
1740 "##\n",
1741 "## Data in this file used by ../utf8_heavy.pl\n",
1742 "##\n\n",
1743 "## Mapping from lc(canonical name) to filename in ./lib\n",
1744 "%utf8::Canonical = (\n",
1745 );
1746 my $Trail = ""; ## used just to keep the spacing pretty
1747 for my $Name (sort keys %Canonical)
cf25bb62 1748 {
5beb625e 1749 my $File = $Canonical{$Name};
1750 if ($CaComment{$Name}) {
1751 push @OUT, "\n" if not $Trail;
1752 push @OUT, " # $CaComment{$Name}\n";
1753 $Trail = "\n";
1754 } else {
1755 $Trail = "";
cf25bb62 1756 }
5beb625e 1757 $Name = $Name =~ m/\W/ ? qq/'$Name'/ : " $Name ";
1758 my $Text = sprintf(" %-41s => %s,\n$Trail", $Name, qq/'$File'/);
1759 push @OUT, Text::Tabs::unexpand($Text);
cf25bb62 1760 }
5beb625e 1761 push @OUT, ");\n1\n";
1762 WriteIfChanged('Canonical.pl', @OUT);
d2d499f5 1763 }
5beb625e 1764
1765 MakePropTestScript() if $MakeTestScript;
d2d499f5 1766}
1767
5beb625e 1768
551b6b6f 1769sub SpecialCasing_txt()
cf25bb62 1770{
1771 #
1772 # Read in the special cases.
1773 #
983ffd37 1774
cf25bb62 1775 my %CaseInfo;
1776
551b6b6f 1777 if (not open IN, "SpecialCasing.txt") {
1778 die "$0: SpecialCasing.txt: $!\n";
cf25bb62 1779 }
1780 while (<IN>) {
1781 next unless /^[0-9A-Fa-f]+;/;
1782 s/\#.*//;
1783 s/\s+$//;
1784
1785 my ($code, $lower, $title, $upper, $condition) = split(/\s*;\s*/);
1786
1787 if ($condition) { # not implemented yet
1788 print "# SKIPPING $_\n" if $Verbose;
1789 next;
1790 }
1791
1792 # Wait until all the special cases have been read since
1793 # they are not listed in numeric order.
1794 my $ix = hex($code);
a6da3000 1795 push @{$CaseInfo{Lower}}, [ $ix, $code, $lower ]
1796 unless $code eq $lower;
1797 push @{$CaseInfo{Title}}, [ $ix, $code, $title ]
1798 unless $code eq $title;
1799 push @{$CaseInfo{Upper}}, [ $ix, $code, $upper ]
1800 unless $code eq $upper;
cf25bb62 1801 }
1802 close IN;
1803
1804 # Now write out the special cases properties in their code point order.
1805 # Prepend them to the To/{Upper,Lower,Title}.pl.
1806
1807 for my $case (qw(Lower Title Upper))
1808 {
1809 my $NormalCase = do "To/$case.pl" || die "$0: $@\n";
cf25bb62 1810
b08cf34e 1811 my @OUT =
1812 (
1813 $HEADER, "\n",
1814 "# The key UTF-8 _bytes_, the value UTF-8 (speed hack)\n",
1815 "%utf8::ToSpec$case =\n(\n",
1816 );
cf25bb62 1817
1818 for my $prop (sort { $a->[0] <=> $b->[0] } @{$CaseInfo{$case}}) {
1819 my ($ix, $code, $to) = @$prop;
1820 my $tostr =
1821 join "", map { sprintf "\\x{%s}", $_ } split ' ', $to;
b08cf34e 1822 push @OUT, sprintf qq["%s" => "$tostr",\n], join("", map { sprintf "\\x%02X", $_ } unpack("U0C*", pack("U", $ix)));
5cb851a6 1823 # Remove any single-character mappings for
1824 # the same character since we are going for
1825 # the special casing rules.
1826 $NormalCase =~ s/^$code\t\t\w+\n//m;
cf25bb62 1827 }
5beb625e 1828 push @OUT, (
1829 ");\n\n",
1830 "return <<'END';\n",
1831 $NormalCase,
1832 "END\n"
1833 );
1834 WriteIfChanged("To/$case.pl", @OUT);
d2d499f5 1835 }
d2d499f5 1836}
1837
c4051cc5 1838#
1839# Read in the case foldings.
1840#
551b6b6f 1841# We will do full case folding, C + F + I (see CaseFolding.txt).
c4051cc5 1842#
551b6b6f 1843sub CaseFolding_txt()
cf25bb62 1844{
551b6b6f 1845 if (not open IN, "CaseFolding.txt") {
1846 die "$0: CaseFolding.txt: $!\n";
cf25bb62 1847 }
c4051cc5 1848
cf25bb62 1849 my $Fold = Table->New();
c4051cc5 1850 my %Fold;
1851
cf25bb62 1852 while (<IN>) {
254ba52a 1853 # Skip status 'S', simple case folding
c4051cc5 1854 next unless /^([0-9A-Fa-f]+)\s*;\s*([CFI])\s*;\s*([0-9A-Fa-f]+(?: [0-9A-Fa-f]+)*)\s*;/;
1855
cf25bb62 1856 my ($code, $status, $fold) = (hex($1), $2, $3);
c4051cc5 1857
1858 if ($status eq 'C') { # Common: one-to-one folding
254ba52a 1859 # No append() since several codes may fold into one.
cf25bb62 1860 $Fold->RawAppendRange($code, $code, $fold);
c4051cc5 1861 } else { # F: full, or I: dotted uppercase I -> dotless lowercase I
cf25bb62 1862 $Fold{$code} = $fold;
c4051cc5 1863 }
1864 }
cf25bb62 1865 close IN;
c4051cc5 1866
cf25bb62 1867 $Fold->Write("To/Fold.pl");
c4051cc5 1868
1869 #
1870 # Prepend the special foldings to the common foldings.
1871 #
c4051cc5 1872 my $CommonFold = do "To/Fold.pl" || die "$0: To/Fold.pl: $!\n";
5beb625e 1873
b08cf34e 1874 my @OUT =
1875 (
1876 $HEADER, "\n",
1877 "# The ke UTF-8 _bytes_, the value UTF-8 (speed hack)\n",
1878 "%utf8::ToSpecFold =\n(\n",
1879 );
cf25bb62 1880 for my $code (sort { $a <=> $b } keys %Fold) {
1881 my $foldstr =
1882 join "", map { sprintf "\\x{%s}", $_ } split ' ', $Fold{$code};
b08cf34e 1883 push @OUT, sprintf qq["%s" => "$foldstr",\n], join("", map { sprintf "\\x%02X", $_ } unpack("U0C*", pack("U", $code)));
c4051cc5 1884 }
5beb625e 1885 push @OUT, (
1886 ");\n\n",
1887 "return <<'END';\n",
1888 $CommonFold,
1889 "END\n",
1890 );
1891
1892 WriteIfChanged("To/Fold.pl", @OUT);
c4051cc5 1893}
1894
cf25bb62 1895## Do it....
1896
12ac2576 1897Build_Aliases();
44da8cae 1898UnicodeData_Txt();
cf25bb62 1899PropList_txt();
1900
1901Scripts_txt();
1902Blocks_txt();
1903
5beb625e 1904WriteAllMappings();
1905
551b6b6f 1906LineBreak_Txt();
1907ArabicShaping_txt();
12ac2576 1908EastAsianWidth_txt();
1909HangulSyllableType_txt();
cf25bb62 1910Jamo_txt();
551b6b6f 1911SpecialCasing_txt();
1912CaseFolding_txt();
cf25bb62 1913
5beb625e 1914exit(0);
cf25bb62 1915
5beb625e 1916## TRAILING CODE IS USED BY MakePropTestScript()
1917__DATA__
1918use strict;
1919use warnings;
1920
1921my $Tests = 0;
1922my $Fails = 0;
cf25bb62 1923
5beb625e 1924sub Expect($$$)
1925{
1926 my $Expect = shift;
1927 my $String = shift;
1928 my $Regex = shift;
1929 my $Line = (caller)[2];
1930
1931 $Tests++;
1932 my $RegObj;
1933 my $result = eval {
1934 $RegObj = qr/$Regex/;
1935 $String =~ $RegObj ? 1 : 0
1936 };
1937
1938 if (not defined $result) {
1939 print "couldn't compile /$Regex/ on $0 line $Line: $@\n";
1940 $Fails++;
1941 } elsif ($result ^ $Expect) {
1942 print "bad result (expected $Expect) on $0 line $Line: $@\n";
1943 $Fails++;
1944 }
1945}
d73e5302 1946
5beb625e 1947sub Error($)
1948{
1949 my $Regex = shift;
1950 $Tests++;
1951 if (eval { 'x' =~ qr/$Regex/; 1 }) {
1952 $Fails++;
1953 my $Line = (caller)[2];
1954 print "expected error for /$Regex/ on $0 line $Line: $@\n";
1955 }
1956}
1957
1958sub Finished()
1959{
1960 if ($Fails == 0) {
1961 print "All $Tests tests passed.\n";
1962 exit(0);
1963 } else {
1964 print "$Tests tests, $Fails failed!\n";
1965 exit(-1);
1966 }
1967}