#!/usr/bin/perl -w

#
# mktables -- create the runtime Perl Unicode files (lib/unicore/**/*.pl)
# from the Unicode database files (lib/unicore/*.txt).
#

use strict;

my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1.

mkdir("In", 0755);
mkdir("Is", 0755);
mkdir("To", 0755);

sub extend {
    my ($table, $last) = @_;

    $table->[-1]->[1] = $last;
}

sub append {
    my ($table, $code, $name) = @_;
    if (@$table &&
	hex($table->[-1]->[1]) == hex($code) - 1 &&
	(!defined $name || $table->[-1]->[2] eq $name)) {
	extend($table, $code);
    } else {
	push @$table, [$code, $code, $name];
    }
}

sub append_range {
    my ($table, $code_ini, $code_fin, $name) = @_;
    append($table, $code_ini, $name);
    extend($table, $code_fin);
}

sub inverse {
    my ($table) = @_;
    my $inverse = [];
    my ($first, $last);
    if ($table->[0]->[0]) {
	$last = hex($table->[0]->[0]);
	push @$inverse, [ "0000",
			  sprintf("%04X", $last - 1) ];
    }
    for my $i (0..$#$table-1) {
	$first = defined $table->[$i    ]->[1] ?
	             hex($table->[$i    ]->[1]) : 0;
	$last  = defined $table->[$i + 1]->[0] ?
	             hex($table->[$i + 1]->[0]) : $first;
	push @$inverse, [ sprintf("%04X", $first + 1),
			  sprintf("%04X", $last  - 1) ]
			      unless $first + 1 == $last;
    }
    return $inverse;
}

sub header {
    my $fh = shift;

    print $fh <<EOT;
# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
# This file is built by $0 from e.g. Unicode.txt.
# Any changes made here will be lost!
EOT
}

sub begin {
    my $fh = shift;

    print $fh <<EOT;
return <<'END';
EOT
}

sub end {
    my $fh = shift;
    
    print $fh <<EOT;
END
EOT
}

sub flush {
    my ($table, $file) = @_;
    print "$file\n";
    if (open(my $fh, ">$file")) {
	header($fh);
	begin($fh);
	for my $i (@$table) {
	    print $fh $i->[0], "\t",
                      $i->[1] ne $i->[0] ? $i->[1] : "", "\t",
                      defined $i->[2] ? $i->[2] : "", "\n";
	}
	end($fh);
	close($fh);
    } else {
	die "$0: $file: $!\n";
    }
}

#
# The %In contains the mapping of the script/block name into a number.
#

my %In;
my $InId = 0;
my %InIn;

my %InScript;
my %InBlock;

#
# Read in the Unicode.txt, the main Unicode database.
#

my %Cat;
my %General;
my @General;

sub gencat {
    my ($Name, $GeneralH, $GeneralA, $Cat,
	$name, $cat, $code, $op) = @_;

    $op->($Name,                     $code, $name);
    $op->($GeneralA,                 $code, $cat);

    $op->($GeneralH->{$name} ||= [], $code, $name);

    $op->($Cat->{$cat}       ||= [], $code);
    $op->($Cat->{substr($cat, 0, 1)}
      	                    ||= [],  $code);
    # 005F: SPACING UNDERSCORE
    $op->($Cat->{Word}       ||= [], $code)
	if $cat =~ /^[LMN]/ || $code eq "005F";
    $op->($Cat->{Alnum}      ||= [], $code)
	if $cat =~ /^[LMN]/;
    $op->($Cat->{Alpha}      ||= [], $code)
	if $cat =~ /^[LM]/;
    # 0009: HORIZONTAL TABULATION
    # 000A: LINE FEED
    # 000B: VERTICAL TABULATION
    # 000C: FORM FEED
    # 000D: CARRIAGE RETURN
    # 0020: SPACE
    $op->($Cat->{Space}      ||= [], $code)
	if $cat  =~ /^Z/ ||
	    $code =~ /^(0009|000A|000B|000C|000D)$/;
    $op->($Cat->{SpacePerl}  ||= [], $code)
	if $cat  =~ /^Z/ || $code =~ /^(?:0085|2028|2029)$/ ||
	    $code =~ /^(0009|000A|000C|000D)$/;
    $op->($Cat->{Blank}      ||= [], $code)
	if $code =~ /^(0020|0009)$/ ||
	    $cat  =~ /^Z[^lp]$/;
    $op->($Cat->{Digit}      ||= [], $code) if $cat eq "Nd";
    $op->($Cat->{Upper}      ||= [], $code) if $cat eq "Lu";
    $op->($Cat->{Lower}      ||= [], $code) if $cat eq "Ll";
    $op->($Cat->{Title}      ||= [], $code) if $cat eq "Lt";
    $op->($Cat->{ASCII}      ||= [], $code) if $code le "007F";
    $op->($Cat->{Cntrl}      ||= [], $code) if $cat =~ /^C/;
    $op->($Cat->{Graph}      ||= [], $code) if $cat =~ /^([LMNPS]|Co)/;
    $op->($Cat->{Print}      ||= [], $code) if $cat =~ /^([LMNPS]|Co|Zs)/;
    $op->($Cat->{Punct}      ||= [], $code) if $cat =~ /^P/;
    # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f
    $op->($Cat->{XDigit}     ||= [], $code)
	if $code =~ /^00(3[0-9]|[46][1-6])$/;

}

if (open(my $Unicode, "Unicode.txt")) {
    my @Name;
    my @Bidi;
    my %Bidi;
    my @Comb;
    my @Deco;
    my %Deco;
    my %DC;
    my @Number;
    my @Mirrored;
    my %To;


    my $LastCodeInt = -1; # a numeric, not a hexadecimal string.

    # UnicodeData-3.1.0.html says 
    # no characters in the file have the property, Cn, Not Assigned.

    sub check_no_characters { # in the scope of my $LastCodeInt;
	my $code = shift;
	my $diff_from_last = hex($code) - $LastCodeInt;
	my $code_ini = sprintf("%04X", $LastCodeInt + 1);
	$LastCodeInt = hex($code);
	if ($diff_from_last == 1) {
	    return;
	} elsif ($diff_from_last == 2) {
	    append($Cat{Cn}		||= [], $code_ini);
	    append($Cat{C}		||= [], $code_ini);
	} else {
	    my $code_fin = sprintf("%04X", hex($code) - 1);
	    append_range($Cat{Cn}	||= [], $code_ini, $code_fin);
	    append_range($Cat{C}	||= [], $code_ini, $code_fin);
	}
    }

    while (<$Unicode>) {
	next unless /^[0-9A-Fa-f]+;/;
	s/\s+$//;

	my ($code, $name, $cat, $comb, $bidi, $deco,
	    $decimal, $digit, $number,
	    $mirrored, $unicode10, $comment,
	    $upper, $lower, $title) = split(/\s*;\s*/);

	if ($name =~ /^<(.+), (First|Last)>$/) {
	    if($2 eq 'First') {
		check_no_characters($code);
	    } else {
		$LastCodeInt = hex($code);
	    }

	    $name = $1;
	    gencat(\@Name, \%General, \@General, \%Cat,
		   $name, $cat, $code,
		   $2 eq 'First' ? \&append : \&extend);
	    unless (defined $In{$name}) {
		$In{$name}   = $InId++;
		$InIn{$name} = $General{$name};
	    }
	} else {
	    check_no_characters($code);

	    gencat(\@Name, \%General, \@General, \%Cat,
		   $name, $cat, $code, \&append);

	    append($To{Upper}       ||= [], $code, $upper)   if $upper;
	    append($To{Lower}       ||= [], $code, $lower)   if $lower;
	    append($To{Title}       ||= [], $code, $title)   if $title;
	    append($To{Digit}       ||= [], $code, $decimal) if $decimal;
	    
	    append(\@Bidi,                  $code, $bidi);
	    append($Bidi{$bidi}     ||= [], $code);
	    
	    append(\@Comb,                  $code, $comb) if $comb;
	    
	    if ($deco) {
		append(\@Deco,                  $code, $deco);
		if ($deco =~/^<(\w+)>/) {
		    append($Deco{Compat} ||= [], $code);
		    append($DC{$1}       ||= [], $code);
		} else {
		    append($Deco{Canon}  ||= [], $code);
		}
	    }
	    
	    append(\@Number,                     $code, $number) if $number;
	    
	    append(\@Mirrored,                   $code) if $mirrored eq "Y";
	}
    }

    check_no_characters(sprintf("%X", $LastUnicodeCodepoint + 1));

    flush(\@Name, "Name.pl");

    foreach my $cat (sort keys %Cat) {
	flush($Cat{$cat}, "Is/$cat.pl");
    }

    foreach my $to (sort keys %To) {
	flush($To{$to}, "To/$to.pl");
    }

    flush(\@Bidi, "Bidirectional.pl");
    foreach my $bidi (sort keys %Bidi) {
	flush($Bidi{$bidi}, "Is/Bidi$bidi.pl");
    }

    flush(\@Comb, "CombiningClass.pl");

    flush(\@Deco, "Decomposition.pl");
    foreach my $deco (sort keys %Deco) {
	flush($Deco{$deco}, "Is/Deco$deco.pl");
    }
    foreach my $dc (sort keys %DC) {
	flush($DC{$dc}, "Is/DC$dc.pl");
    }

    flush(\@Number, "Number.pl");

    flush(\@Mirrored, "Is/Mirrored.pl");
} else {
    die "$0: Unicode.txt: $!\n";
}

#  The general cateory can be written out already now.

flush(\@General, "Category.pl");

#
# Read in the LineBrk.txt.
#

if (open(my $LineBrk, "LineBrk.txt")) {
    my @Lbrk;
    my %Lbrk;

    while (<$LineBrk>) {
        next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(\w+)/;

	my ($first, $last, $lbrk) = ($1, $2, $3);

	append(\@Lbrk,              $first, $lbrk);
	append($Lbrk{$lbrk} ||= [], $first);
	if (defined $last) {
	    extend(\@Lbrk,          $last);
	    extend($Lbrk{$lbrk},    $last);
	}
    }

    flush(\@Lbrk, "Lbrk.pl");
    foreach my $lbrk (sort keys %Lbrk) {
	flush($Lbrk{$lbrk}, "Is/Lbrk$lbrk.pl");
    }
} else {
    die "$0: LineBrk.txt: $!\n";
}

#
# Read in the ArabShap.txt.
#

if (open(my $ArabShap, "ArabShap.txt")) {
    my @ArabLink;
    my @ArabLinkGroup;

    while (<$ArabShap>) {
	next unless /^[0-9A-Fa-f]+;/;
	s/\s+$//;

	my ($code, $name, $link, $linkgroup) = split(/\s*;\s*/);

	append(\@ArabLink,      $code, $link);
	append(\@ArabLinkGroup, $code, $linkgroup);
    }

    flush(\@ArabLink,      "ArabLink.pl");
    flush(\@ArabLinkGroup, "ArabLnkGrp.pl");
} else {
    die "$0: ArabShap.txt: $!\n";
}

#
# Read in the Jamo.txt.
#

if (open(my $Jamo, "Jamo.txt")) {
    my @Short;

    while (<$Jamo>) {
	next unless /^([0-9A-Fa-f]+)\s*;\s*(\w*)/;

	my ($code, $short) = ($1, $2);

	append(\@Short, $code, $short);
    }

    flush(\@Short, "JamoShort.pl");
} else {
    die "$0: Jamo.txt: $!\n";
}

#
# Read in the Scripts.txt.
#

my @Scripts;

if (open(my $Scripts, "Scripts.txt")) {
    while (<$Scripts>) {
	next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;

	# Wait until all the scripts have been read since
	# they are not listed in numeric order.
	push @Scripts, [ hex($1), $1, $2, $3 ];
    }
} else {
    die "$0: Scripts.txt: $!\n";
}

# Now append the scripts properties in their code point order.

my %Script;
my $Scripts = [];

for my $script (sort { $a->[0] <=> $b->[0] } @Scripts) {
    my ($code, $first, $last, $name) = @$script;
    append($Scripts,              $first, $name);
    append($Script{$name} ||= [], $first, $name);
    if (defined $last) {
	extend($Scripts,       $last);
	extend($Script{$name}, $last);
    }
    unless (defined $In{$name}) {
	$InScript{$InId} = $name;
	$In{$name}       = $InId++;
	$InIn{$name}     = $Script{$name};
    }
}

# Scripts.pl can be written out already now.

flush(\@Scripts, "Scripts.pl");

# Common is everything not explicitly assigned to a Script

$In{Common} = $InId++;
my $Common = inverse($Scripts);
$InIn{Common} = $Common;

#
# Read in the Blocks.txt.
#

my @Blocks;
my %Blocks;

if (open(my $Blocks, "Blocks.txt")) {
    while (<$Blocks>) {
	next unless /^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.+?)\s*$/;
	
	my ($first, $last, $name) = ($1, $2, $3);
	my $origname = $name;

	# If there's a naming conflict (the script names are
	# in uppercase), the name of the block has " Block"
	# appended to it.
	my $pat = $name;
	$pat =~ s/([- _])/(?:[-_]|\\s+)?/g;
	for my $i (values %InScript) {
	    if ($i =~ /^$pat$/i) {
		$name .= " Block";
		last;
	    }
	}

	append(\@Blocks,              $first, $name);
	append($Blocks{$name} ||= [], $first, $name);
	if (defined $last) {
	    extend(\@Blocks,       $last);
	    extend($Blocks{$name}, $last);
	}
	unless (defined $In{$name}) {
	    $InBlock{$InId} = $origname;
	    $In{$name}      = $InId++;
	    $InIn{$name}    = $Blocks{$name};
	}
    }
} else {
    die "$0: Blocks.txt: $!\n";
}

# Blocks.pl can be written out already now.

flush(\@Blocks, "Blocks.pl");

#
# Read in the PropList.txt.  It contains extended properties not
# listed in the Unicode.txt, such as 'Other_Alphabetic':
# alphabetic but not of the general category L; many modifiers
# belong to this extended property category: while they are not
# alphabets, they are alphabetic in nature.
#

my @Props;

if (open(my $Props, "PropList.txt")) {
    while (<$Props>) {
	next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;

	# Wait until all the extended properties have been read since
	# they are not listed in numeric order.
	push @Props, [ hex($1), $1, $2, $3 ];
    }
} else {
    die "$0: PropList.txt: $!\n";
}

# Now append the extended properties in their code point order.

my %Prop;
my $Props = [];

for my $prop (sort { $a->[0] <=> $b->[0] } @Props) {
    my ($code, $first, $last, $name) = @$prop;
    append($Props,              $first, $name);
    append($Prop{$name} ||= [], $first, $name);
    if (defined $last) {
	extend($Props,       $last);
	extend($Prop{$name}, $last);
    }
    unless (defined $In{$name}) {
	$In{$name}   = $InId++;
	$InIn{$name} = $Prop{$name};
    }
}

# Assigned is everything not Cn

$In{Assigned} = $InId++;
my $Assigned = inverse($Cat{Cn});
$InIn{Assigned} = $Assigned;

# Unassigned is everything not Assigned

$In{Unassigned} = $InId++;
my $Unassigned = $Cat{Cn};
$InIn{Unassigned} = $Unassigned;

# Unassigned is everything not Assigned
sub merge_general_and_extended {
    my ($name, $general, $extended) = @_;
    my $merged;

    push @$merged,
         map { pop @{$_}; $_ }
             sort { $a->[2] <=> $b->[2] }
                  map { [ $_->[0], $_->[1], hex($_->[0]) ] }
                      ($general ?
		         map { ref $_ ? @$_ : $_ }
                             @Cat {ref $general  ? @$general  : $general } :
		         (),
                       $extended ?
		         map { ref $_ ? @$_ : $_ }
                             @Prop{ref $extended ? @$extended : $extended} :
		         ());

    $In{$name}   = $InId++;
    $InIn{$name} = $merged;
    
    return $merged;
}

# Alphabetic is L and Other_Alphabetic.

my $Alphabetic =
    merge_general_and_extended('Alphabetic', 'L', 'Other_Alphabetic');

# Lowercase is Ll and Other_Lowercase.

my $Lowercase =
    merge_general_and_extended('Lowercase', 'Ll', 'Other_Lowercase');

# Uppercase is Lu and Other_Uppercase.

my $Uppercase =
    merge_general_and_extended('Uppercase', 'Lu', 'Other_Uppercase');

# Math is Sm and Other_Math.

my $Math =
    merge_general_and_extended('Math', 'Sm', 'Other_Math');

# Lampersand is Ll, Lu, and Lt.

my $Lampersand =
    merge_general_and_extended('Lampersand', [ qw(Ll Lu Lt) ]);

# ID_Start is Ll, Lu, Lt, Lm, Lo, and Nl.

my $ID_Start =
    merge_general_and_extended('ID_Start', [ qw(Ll Lu Lt Lm Lo Nl) ]);

# ID_Continue is ID_Start, Mn, Mc, Nd, and Pc.

my $ID_Continue =
    merge_general_and_extended('ID_Continue', [ qw(Ll Lu Lt Lm Lo Nl
						   Mn Mc Nd Pc) ]);

#
# Any is any.
#

$In{Any} = $InId++;
my $Any = [ [ 0, sprintf("%04X", $LastUnicodeCodepoint) ] ];
$InIn{Any} = $Any;

#
# All is any, too.
#

$In{All} = $InId++;
$InIn{All} = $Any;

#
# mapping() will be used to write out the In and Is virtual mappings.
#

sub mapping {
    my ($map, $name) = @_;

    if (open(my $fh, ">$name.pl")) {
	print "$name.pl\n";
	header($fh);

	# The %pat will hold a hash that maps the first two
	# lowercased letters of a class to a 'fuzzified' regular
	# expression that points to the real mapping.

	my %pat;

	# But first write out the offical name to real name
	# (the filename) mapping.

	print $fh <<EOT;
%utf8::${name} =
(
EOT
        for my $i (sort { lc $a cmp lc $b } keys %$map) {
	    my $pat = $i;
	    # Here is the 'fuzzification': accept any space,
	    # dash, or underbar where in the official name
	    # there is space or a dash (or underbar, but
	    # there never is).
	    $pat =~ s/([- _])/(?:[-_]|\\s+)?/g;
	    # The prefix length of 2 is enough spread,
	    # and besides, we have 'Yi' as an In category.
	    push @{$pat{lc(substr($i, 0, 2))}}, [ $i, $pat ];
	    printf $fh "%-45s => '$map->{$i}',\n", "'$i'";
	}
        print $fh <<EOT;
);
EOT

	# Now write out the %pat mapping.

        print $fh <<EOT;
%utf8::${name}Pat =
(
EOT
        foreach my $prefix (sort keys %pat) {
	    print $fh "'$prefix' => {\n";
	    foreach my $ipat (@{$pat{$prefix}}) {
		my ($i, $pat) = @$ipat;
		print $fh "\t'$pat' => '$map->{$i}',\n";
	    }
	    print $fh "},\n";
	}
        print $fh <<EOT;
);
EOT

        close($fh);
    } else {
        die "$0: $name.pl: $!\n";
    }
}

#
# Write out the virtual In mappings.
#

mapping(\%In, "In");

#
# Append the InScript and InBlock mappings.
# These are needed only if Script= and Block= syntaxes are used.
#

if (open(my $In, ">>In.pl")) {
    print $In <<EOT;

%utf8::InScript =
(
EOT
    for my $i (sort { $a <=> $b } keys %InScript) {
	printf $In "%4d => '$InScript{$i}',\n", $i;
    }
    print $In <<EOT;
);
EOT

    print $In <<EOT;

%utf8::InBlock =
(
EOT
    for my $i (sort { $a <=> $b } keys %InBlock) {
	printf $In "%4d => '$InBlock{$i}',\n", $i;
    }
    print $In <<EOT;
);
EOT
} else {
    die "$0: In.pl: $!\n";
}

#
# Write out the real In mappings
# (the In.pl written out just above has the virtual In mappings)
#

foreach my $in (sort { $In{$a} <=> $In{$b} } keys %In) {
    flush($InIn{$in}, "In/$In{$in}.pl");
}

#
# The mapping from General Category long forms to short forms is
# currently hardwired here since no simple data file in the UCD
# seems to do that.  Unicode 3.2 will assumedly correct this.
#

my %Is = (
	'Letter'			=>	'L',
	'Uppercase_Letter'		=>	'Lu',
	'Lowercase_Letter'		=>	'Ll',
	'Titlecase_Letter'		=>	'Lt',
	'Modifier_Letter'		=>	'Lm',
	'Other_Letter'			=>	'Lo',

	'Mark'				=>	'M',
	'Non_Spacing_Mark'		=>	'Mn',
	'Spacing_Mark'			=>	'Mc',
	'Enclosing_Mark'		=>	'Me',

	'Separator'			=>	'Z',
	'Space_Separator'		=>	'Zs',
	'Line_Separator'		=>	'Zl',
	'Paragraph_Separator'		=>	'Zp',

	'Number'			=>	'N',
	'Decimal_Number'		=>	'Nd',
	'Letter_Number'			=>	'Nl',
	'Other_Number'			=>	'No',

	'Punctuation'			=>	'P',
	'Connector_Punctuation'		=>	'Pc',
	'Dash_Punctuation'		=>	'Pd',
	'Open_Punctuation'		=>	'Ps',
	'Close_Punctuation'		=>	'Pe',
	'Initial_Punctuation'		=>	'Pi',
	'Final_Punctuation'		=>	'Pf',
	'Other_Punctuation'		=>	'Po',

	'Symbol'			=>	'S',
	'Math_Symbol'			=>	'Sm',
	'Currency_Symbol'		=>	'Sc',
	'Modifier_Symbol'		=>	'Sk',
	'Other_Symbol'			=>	'So',

	'Other'				=>	'C',
	'Control'			=>	'Cc',
	'Format'			=>	'Cf',
	'Surrogate'			=>	'Cs',
	'Private Use'			=>	'Co',
	'Unassigned'			=>	'Cn',
);

#
# Write out the virtual Is mappings.
#

mapping(\%Is, "Is");

#
# Read in the special cases.
#

my %Case;

if (open(my $SpecCase, "SpecCase.txt")) {
    while (<$SpecCase>) {
	next unless /^[0-9A-Fa-f]+;/;
	s/\#.*//;
	s/\s+$//;

	my ($code, $lower, $title, $upper, $condition) = split(/\s*;\s*/);

	if ($condition) { # not implemented yet
	    print "# SKIPPING $_\n";
	    next;
	}

	# Wait until all the special cases have been read since
	# they are not listed in numeric order.
	my $ix = hex($code);
	push @{$Case{Lower}}, [ $ix, $code, $lower ];
	push @{$Case{Title}}, [ $ix, $code, $title ];
	push @{$Case{Upper}}, [ $ix, $code, $upper ];
    }
} else {
    die "$0: SpecCase.txt: $!\n";
}

# Now write out the special cases properties in their code point order.
# Prepend them to the To/{Upper,Lower,Title}.pl.

for my $case (qw(Lower Title Upper)) {
    my $NormalCase = do "To/$case.pl" || die "$0: To/$case.pl: $!\n";
    if (open(my $Case, ">To/$case.pl")) {
	header($Case);
	print $Case <<EOT;

%utf8::ToSpec$case = (
EOT
        for my $prop (sort { $a->[0] <=> $b->[0] } @{$Case{$case}}) {
	    my ($ix, $code, $to) = @$prop;
	    my $tostr =
		join "", map { sprintf "\\x{%s}", $_ } split ' ', $to;
	    printf $Case qq['%04X' => "$tostr",\n], $ix;
	}
	print $Case <<EOT;
);

EOT
	begin($Case);
	print $Case $NormalCase;
	end($Case);
    } else {
	die "$0: To/$case.txt: $!\n";
    }
}

#
# Read in the case foldings.
#
# We will do full case folding, C + F + I (see CaseFold.txt).
#

if (open(my $CaseFold, "CaseFold.txt")) {
    my @Fold;
    my %Fold;

    while (<$CaseFold>) {
	# Skip status 'S', simple case folding
	next unless /^([0-9A-Fa-f]+)\s*;\s*([CFI])\s*;\s*([0-9A-Fa-f]+(?: [0-9A-Fa-f]+)*)\s*;/;

	my ($code, $status, $fold) = ($1, $2, $3);

	if ($status eq 'C') { # Common: one-to-one folding
	    # No append() since several codes may fold into one.
	    push @Fold, [ $code, $code, $fold ];
	} else { # F: full, or I: dotted uppercase I -> dotless lowercase I
	    $Fold{hex($code)} = $fold;
	}
    }

    flush(\@Fold, "To/Fold.pl");

    #
    # Prepend the special foldings to the common foldings.
    #

    my $CommonFold = do "To/Fold.pl" || die "$0: To/Fold.pl: $!\n";
    if (open(my $Fold, ">To/Fold.pl")) {
	header($Fold);
	print $Fold <<EOT;

%utf8::ToSpecFold = (
EOT
        for my $code (sort { $a <=> $b } keys %Fold) {
	    my $foldstr =
		join "", map { sprintf "\\x{%s}", $_ } split ' ', $Fold{$code};
	    printf $Fold qq['%04X' => "$foldstr",\n], $code;
	}
	print $Fold <<EOT;
);

EOT
	begin($Fold);
	print $Fold $CommonFold;
	end($Fold);
    } else {
	die "$0: To/Fold.pl: $!\n";
    }
} else {
    die "$0: CaseFold.txt: $!\n";
}

# That's all, folks!