+## !!!!!!!!!!!!!! IF YOU MODIFY THIS FILE !!!!!!!!!!!!!!!!!!!!!!!!!
+## Any files created or read by this program should be listed in 'mktables.lst'
+
#!/usr/bin/perl -w
require 5.008; # Needs pack "U". Probably safest to run on 5.8.x
use strict;
my $Verbose = 0;
my $MakeTestScript = 0;
my $AlwaysWrite = 0;
+my $UseDir = "";
+my $FileList = "$0.lst";
+my $MakeList = 0;
while (@ARGV)
{
$Verbose = 0;
} elsif ($arg eq '-w') {
$AlwaysWrite = 1; # update the files even if they havent changed
+ $FileList = "";
+ } elsif ($arg eq '-check') {
+ my $this = shift @ARGV;
+ my $ok = shift @ARGV;
+ if ($this ne $ok) {
+ print "Skipping as check params are not the same.\n";
+ exit(0);
+ }
} elsif ($arg eq '-maketest') {
$MakeTestScript = 1;
- } elsif ($arg eq '-C' && defined (my $dir = shift)) {
- chdir $dir or die "chdir $_: $!";
+ } elsif ($arg eq '-makelist') {
+ $MakeList = 1;
+ } elsif ($arg eq '-C' && defined ($UseDir = shift)) {
+ -d $UseDir or die "Unknown directory '$UseDir'";
+ } elsif ($arg eq '-L' && defined ($FileList = shift)) {
+ -e $FileList or die "Filelist '$FileList' doesn't appear to exist!";
} else {
- die "usage: $0 [-v|-q|-C dir] [-maketest]";
+ die "usage: $0 [-v|-q|-w|-C dir|-L filelist] [-maketest] [-makelist]\n",
+ " -v : Verbose Mode\n",
+ " -q : Quiet Mode\n",
+ " -w : Write files regardless\n",
+ " -maketest : Make test script\n",
+ " -makelist : Rewrite the file list based on current setup\n",
+ " -L filelist : Use this file list, (defaults to $0)\n",
+ " -C dir : Change to this directory before proceeding\n",
+ " -check A B : Executes only if A and B are the same\n";
+ }
+}
+
+if ($FileList) {
+ print "Reading file list '$FileList'\n"
+ if $Verbose;
+ open my $fh,"<",$FileList or die "Failed to read '$FileList':$!";
+ my @input;
+ my @output;
+ for my $list ( \@input, \@output ) {
+ while (<$fh>) {
+ s/^ \s+ | \s+ $//xg;
+ next if /^ \s* (?: \# .* )? $/x;
+ last if /^ =+ $/x;
+ my ( $file ) = split /\t/, $_;
+ push @$list, $file;
+ }
+ my %dupe;
+ @$list = grep !$dupe{ $_ }++, @$list;
+ }
+ close $fh;
+ die "No input or output files in '$FileList'!"
+ if !@input or !@output;
+ if ( $MakeList ) {
+ foreach my $file (@output) {
+ unlink $file;
+ }
+ }
+ if ( $Verbose ) {
+ print "Expecting ".scalar( @input )." input files. ",
+ "Checking ".scalar( @output )." output files.\n";
+ }
+ # we set maxtime to be the youngest input file, including $0 itself.
+ my $maxtime = -M $0; # do this before the chdir!
+ if ($UseDir) {
+ chdir $UseDir or die "Failed to chdir to '$UseDir':$!";
+ }
+ foreach my $in (@input) {
+ my $time = -M $in;
+ die "Missing input file '$in'" unless defined $time;
+ $maxtime = $time if $maxtime < $time;
+ }
+
+ # now we check to see if any output files are older than maxtime, if
+ # they are we need to continue on, otherwise we can presumably bail.
+ my $ok = 1;
+ foreach my $out (@output) {
+ if ( ! -e $out ) {
+ print "'$out' is missing.\n"
+ if $Verbose;
+ $ok = 0;
+ last;
+ }
+ if ( -M $out > $maxtime ) {
+ print "'$out' is too old.\n"
+ if $Verbose;
+ $ok = 0;
+ last;
+ }
+ }
+ if ($ok) {
+ print "Files seem to be ok, not bothering to rebuild.\n";
+ exit(0);
+ }
+ print "Must rebuild tables.\n"
+ if $Verbose;
+} else {
+ if ($Verbose) {
+ print "Not checking filelist.\n";
+ }
+ if ($UseDir) {
+ chdir $UseDir or die "Failed to chdir to '$UseDir':$!";
}
}
return $Table;
}
-##
-## Returns true if the Table has no code points
-##
-sub Table::IsEmpty
-{
- my $Table = shift; #self
- return not @$Table;
-}
-
-##
-## Returns true if the Table has code points
-##
-sub Table::NotEmpty
-{
- my $Table = shift; #self
- return @$Table;
-}
##
## Returns the maximum code point currently in the table.
##
sub Table::Max
{
- my $Table = shift; #self
- confess "oops" if $Table->IsEmpty; ## must have code points to have a max
- return $Table->[-1]->[RANGE_END];
+ my $last = $_[0]->[-1]; ## last code point
+ confess "oops" unless $last; ## must have code points to have a max
+ return $last->[RANGE_END];
}
##
## Given a new code point, make the last range of the Table extend to
## include the new (and all intervening) code points.
##
+## Takes the time to make sure that the extension is valid.
+##
sub Table::Extend
{
my $Table = shift; #self
confess "oops ($codepoint <= $PrevMax)" if $codepoint <= $PrevMax;
- $Table->[-1]->[RANGE_END] = $codepoint;
+ $Table->ExtendNoCheck($codepoint);
+}
+
+
+##
+## Given a new code point, make the last range of the Table extend to
+## include the new (and all intervening) code points.
+##
+## Does NOT check that the extension is valid. Assumes that the caller
+## has already made this check.
+##
+sub Table::ExtendNoCheck
+{
+ ## Optmized adding: Assumes $Table and $codepoint as parms
+ $_[0]->[-1]->[RANGE_END] = $_[1];
}
##
## If we've already got a range working, and this code point is the next
## one in line, and if the name is the same, just extend the current range.
##
- if ($Table->NotEmpty
+ my $last = $Table->[-1];
+ if ($last
and
- $Table->Max == $codepoint - 1
+ $last->[RANGE_END] == $codepoint - 1
and
- $Table->[-1]->[RANGE_NAME] eq $name)
+ $last->[RANGE_NAME] eq $name)
{
- $Table->Extend($codepoint);
+ $Table->ExtendNoCheck($codepoint);
}
else
{
if ($start > $New->Max) {
$New->AppendRange($start, $end);
} elsif ($end > $New->Max) {
- $New->Extend($end);
+ $New->ExtendNoCheck($end);
}
}
{
my $Table = shift; #self
- return 0x1234 if $Table->IsEmpty();
+ return 0x1234 if not @$Table;
for my $set (@$Table)
{
Table->New(Is => 'Word', Desc => "[[:Word:]]", Fuzzy => 0);
$Cat{SpacePerl} =
Table->New(Is => 'SpacePerl', Desc => '\s', Fuzzy => 0);
-
+ $Cat{VertSpace} =
+ Table->New(Is => 'VertSpace', Desc => '\v', Fuzzy => 0);
+ $Cat{HorizSpace} =
+ Table->New(Is => 'HorizSpace', Desc => '\h', Fuzzy => 0);
my %To;
$To{Upper} = Table->New();
$To{Lower} = Table->New();
$Cat{SpacePerl}->$op($code) if $isspace
&& $code != 0x000B; # Backward compat.
+ $Cat{VertSpace}->$op($code) if grep {$code == $_}
+ ( 0x0A..0x0D,0x85,0x2028,0x2029 );
+
+ $Cat{HorizSpace}->$op($code) if grep {$code == $_} (
+ 0x09, 0x20, 0xa0, 0x1680, 0x180e, 0x2000, 0x2001, 0x2002,
+ 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
+ 0x202f, 0x205f, 0x3000
+ );
+
$Cat{Blank}->$op($code) if $isspace
&& !($code == 0x000A ||
$code == 0x000B ||
while (<IN>)
{
- next unless /^[0-9A-Fa-f]+;/;
+ next unless /^[0-9A-Fa-f]+(\.\.[0-9A-Fa-f]+)?;/;
s/#.*//;
s/\s+$//;
- my ($hexcode, $pv) = split(/\s*;\s*/);
- my $code = hex($hexcode);
+ my ($hexcodes, $pv) = split(/\s*;\s*/);
$EAW{$pv} ||= Table->New(Is => "EastAsianWidth$pv");
- $EAW{$pv}->Append($code);
+ my ($start, $end) = split(/\.\./, $hexcodes);
+ if (defined $end) {
+ $EAW{$pv}->AppendRange(hex($start), hex($end));
+ } else {
+ $EAW{$pv}->Append(hex($start));
+ }
}
close IN;
);
}
- # Alphabetic is L and Other_Alphabetic.
+ # Alphabetic is L, Nl, and Other_Alphabetic.
New_Prop(Is => 'Alphabetic',
- Table->Merge($Cat{L}, $Prop{Other_Alphabetic}),
- Desc => '[\p{L}\p{OtherAlphabetic}]', # use canonical names here
+ Table->Merge($Cat{L}, $Cat{Nl}, $Prop{Other_Alphabetic}),
+ Desc => '[\p{L}\p{Nl}\p{OtherAlphabetic}]', # canonical names
Fuzzy => 1);
# Lowercase is Ll and Other_Lowercase.
New_Prop(Is => 'Lowercase',
Table->Merge($Cat{Ll}, $Prop{Other_Lowercase}),
- Desc => '[\p{Ll}\p{OtherLowercase}]', # use canonical names here
+ Desc => '[\p{Ll}\p{OtherLowercase}]', # canonical names
Fuzzy => 1);
# Uppercase is Lu and Other_Uppercase.
New_Prop(Is => 'Uppercase',
Table->Merge($Cat{Lu}, $Prop{Other_Uppercase}),
- Desc => '[\p{Lu}\p{Other_Uppercase}]', # use canonical names here
+ Desc => '[\p{Lu}\p{OtherUppercase}]', # canonical names
Fuzzy => 1);
# Math is Sm and Other_Math.
New_Prop(Is => 'Math',
Table->Merge($Cat{Sm}, $Prop{Other_Math}),
- Desc => '[\p{Sm}\p{OtherMath}]', # use canonical names here
+ Desc => '[\p{Sm}\p{OtherMath}]', # canonical names
Fuzzy => 1);
- # ID_Start is Ll, Lu, Lt, Lm, Lo, and Nl.
+ # ID_Start is Ll, Lu, Lt, Lm, Lo, Nl, and Other_ID_Start.
New_Prop(Is => 'ID_Start',
- Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl]}),
- Desc => '[\p{Ll}\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]',
+ Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl]}, $Prop{Other_ID_Start}),
+ Desc => '[\p{Ll}\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{Nl}\p{OtherIDStart}]',
Fuzzy => 1);
- # ID_Continue is ID_Start, Mn, Mc, Nd, and Pc.
+ # ID_Continue is ID_Start, Mn, Mc, Nd, Pc, and Other_ID_Continue.
New_Prop(Is => 'ID_Continue',
- Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl Mn Mc Nd Pc ]}),
- Desc => '[\p{ID_Start}\p{Mn}\p{Mc}\p{Nd}\p{Pc}]',
+ Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl Mn Mc Nd Pc ]},
+ @Prop{qw[Other_ID_Start Other_ID_Continue]}),
+ Desc => '[\p{ID_Start}\p{Mn}\p{Mc}\p{Nd}\p{Pc}\p{OtherIDContinue}]',
Fuzzy => 1);
+
+ # Default_Ignorable_Code_Point = Other_Default_Ignorable_Code_Point
+ # + Cf + Cc + Cs + Noncharacter + Variation_Selector
+ # - WhiteSpace - FFF9..FFFB (Annotation Characters)
+
+ my $Annotation = Table->New();
+ $Annotation->RawAppendRange(0xFFF9, 0xFFFB);
+
+ New_Prop(Is => 'Default_Ignorable_Code_Point',
+ Table->Merge(@Cat{qw[Cf Cc Cs]},
+ $Prop{Noncharacter_Code_Point},
+ $Prop{Variation_Selector},
+ $Prop{Other_Default_Ignorable_Code_Point})
+ ->Invert
+ ->Merge($Prop{White_Space}, $Annotation)
+ ->Invert,
+ Desc => '(?![\p{WhiteSpace}\x{FFF9}-\x{FFFB}])[\p{Cf}\p{Cc}'.
+ '\p{Cs}\p{NoncharacterCodePoint}\p{VariationSelector}'.
+ '\p{OtherDefaultIgnorableCodePoint}]',
+ Fuzzy => 1);
+
}
SpecialCasing_txt();
CaseFolding_txt();
+if ( $FileList and $MakeList ) {
+
+ print "Updating '$FileList'\n"
+ if ($Verbose);
+
+ open my $ofh,">",$FileList
+ or die "Can't write to '$FileList':$!";
+ print $ofh <<"EOFHEADER";
+#
+# mktables.lst -- File list for mktables.
+#
+# Autogenerated on @{[scalar localtime]}
+#
+# - First section is input files
+# (mktables itself is automatically included)
+# - Section seperator is /^=+\$/
+# - Second section is a list of output files.
+# - Lines matching /^\\s*#/ are treated as comments
+# which along with blank lines are ignored.
+#
+
+# Input files:
+
+EOFHEADER
+ my @input=("version",glob('*.txt'));
+ print $ofh "$_\n" for
+ @input,
+ "\n=================================\n",
+ "# Output files:\n",
+ # special files
+ "Properties";
+
+
+ require File::Find;
+ my $count=0;
+ File::Find::find({
+ no_chdir=>1,
+ wanted=>sub {
+ if (/\.pl$/) {
+ s!^\./!!;
+ print $ofh "$_\n";
+ $count++;
+ }
+ },
+ },".");
+
+ print $ofh "\n# ",scalar(@input)," input files\n",
+ "# ",scalar($count+1)," output files\n\n",
+ "# End list\n";
+ close $ofh
+ or warn "Failed to close $ofh: $!";
+
+ print "Filelist has ",scalar(@input)," input files and ",
+ scalar($count+1)," output files\n"
+ if $Verbose;
+}
+print "All done\n" if $Verbose;
exit(0);
## TRAILING CODE IS USED BY MakePropTestScript()