--- /dev/null
+# NameAliases-5.1.0.txt
+# Date: 2008-02-11, 11:36:00 PST [KW]
+#
+# This file is a normative contributory data file in the
+# Unicode Character Database.
+#
+# Copyright (c) 2005-2008 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# This file defines the formal name aliases for Unicode characters.
+#
+# For informative aliases see NamesList.txt
+#
+# For documentation, see UCD.html and NamesList.html
+#
+# FORMAT
+#
+# Each line has two fields
+# First field: Code point
+# Second field: Alias
+#
+# In case multiple aliases are assigned, additional aliases
+# would be provided on separate lines
+#
+#-----------------------------------------------------------------
+01A2;LATIN CAPITAL LETTER GHA
+01A3;LATIN SMALL LETTER GHA
+0CDE;KANNADA LETTER LLLA
+0E9D;LAO LETTER FO FON
+0E9F;LAO LETTER FO FAY
+0EA3;LAO LETTER RO
+0EA5;LAO LETTER LO
+0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN
+A015;YI SYLLABLE ITERATION MARK
+FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET
+1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS
+
+# Total code points: 11
+
+# EOF
mkdir $lib, 0755 or die "mkdir '$lib': $!";
}
-my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1.
+my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 5.1.
my $HEADER=<<"EOF";
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
my %General;
my %Cat;
-## Simple Data::Dumper alike. Good enough for our needs. We can't use the real
+## Simple Data::Dumper like. Good enough for our needs. We can't use the real
## thing as we have to run under miniperl
sub simple_dumper {
my @lines;
}
}
- ## open ane read file.....
+ ## open and read file.....
if (not open IN, "UnicodeData.txt") {
die "$0: UnicodeData.txt: $!\n";
}
}
close IN;
+ ## Read in the NameAliases.txt. It contains other normative names of code
+ ## points not listed in UnicodeData.txt. This happens when there is an
+ ## error in the name found after the data base was published, but instead of
+ ## changing it, to avoid breaking any code that came to rely on the
+ ## erroneous version, the correct name is added as an alias.
+
+ my $NameAliases = Table->New();
+
+ if (not open IN, "NameAliases.txt") {
+ die "$0: NameAliases.txt: $!\n";
+ }
+
+ while (<IN>)
+ {
+ next unless /^[0-9A-Fa-f]+;/;
+ s/\s+$//;
+
+ my ($hexcode, ## code point in hex (e.g. "0041")
+ $name, ## character name (e.g. "LATIN CAPITAL LETTER A")
+ ) = split(/\s*;\s*/);
+
+ my $code = hex($hexcode);
+
+ ## One is supposed to enter elements into tables in strictly increasing
+ ## order, but this in fact works to append duplicate code points at
+ ## the end of the table. The table is intended to be indexed by name
+ ## anyway.
+
+ $Name->RawAppendRange($code, $code, $name);
+ }
+ close IN;
+
+
##
## Tidy up a few special cases....
##
#
# Read in the case foldings.
#
-# We will do full case folding, C + F + I (see CaseFolding.txt).
+# We will do full case folding, C + F + I (see CaseFolding.txt). Note that
+# there are no I entries starting with Unicode 3.2, but leaving it in allows
+# for backward compatibility.
#
sub CaseFolding_txt()
{
EOFHEADER
my @input=("version",glob('*.txt'));
print $ofh "$_\n" for
- @input,
+ sort(@input),
"\n=================================\n",
"# Output files:\n",
# special files
require File::Find;
- my $count=0;
+ my @output_files;
File::Find::find({
no_chdir=>1,
wanted=>sub {
if (/\.pl$/) {
s!^\./!!;
- print $ofh "$_\n";
- $count++;
+ push @output_files, "$_\n";
}
},
},".");
+ print $ofh sort @output_files;
print $ofh "\n# ",scalar(@input)," input files\n",
- "# ",scalar($count+1)," output files\n\n",
+ "# ",scalar(@output_files)+1," output files\n\n",
"# End list\n";
close $ofh
or warn "Failed to close $ofh: $!";
print "Filelist has ",scalar(@input)," input files and ",
- scalar($count+1)," output files\n"
+ scalar(@output_files)+1," output files\n"
if $Verbose;
}
print "All done\n" if $Verbose;