X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FEncode%2Fcompile;h=d0611f719f2639529084ba4c83d24073771bbd69;hb=0a1f2d144e4463451f8627bd1c6ca420a59b01b0;hp=a688c2396263a4915c20bebc70a19eef90381e1e;hpb=508a30f1a485df95340190bcdb5973c1359247c7;p=p5sagit%2Fp5-mst-13.2.git diff --git a/ext/Encode/compile b/ext/Encode/compile index a688c23..d0611f7 100755 --- a/ext/Encode/compile +++ b/ext/Encode/compile @@ -8,23 +8,16 @@ use Getopt::Std; my @orig_ARGV = @ARGV; my $perforce = '$Id$'; - sub encode_U { # UTF-8 encode long hand - only covers part of perl's range my $uv = shift; - if ($uv < 0x80) - { - return chr($uv) - } - if ($uv < 0x800) - { - return chr(($uv >> 6) | 0xC0). - chr(($uv & 0x3F) | 0x80); - } - return chr(($uv >> 12) | 0xE0). - chr((($uv >> 6) & 0x3F) | 0x80). - chr(($uv & 0x3F) | 0x80); + # chr() works in native space so convert value from table + # into that space before using chr(). + my $ch = chr(utf8::unicode_to_native($uv)); + # Now get core perl to encode that the way it likes. + utf8::encode($ch); + return $ch; } sub encode_S @@ -143,7 +136,7 @@ sub cmp_name foreach my $enc (sort cmp_name @encfiles) { my ($name,$sfx) = $enc =~ /^.*?([\w-]+)\.(enc|ucm)$/; - $name = delete $opt{'n'} if exists $opt{'n'}; + $name = $opt{'n'} if exists $opt{'n'}; if (open(E,$enc)) { if ($sfx eq 'enc') @@ -241,7 +234,7 @@ sub compile_ucm } else { - # $name = lc($cs); + $name = $cs unless exists $opt{'n'}; } my $erep; my $urep; @@ -542,9 +535,28 @@ sub decode_U my $s = shift; } +my @uname; +sub char_names +{ + my $s = do "unicode/Name.pl"; + pos($s) = 0; + while ($s =~ /\G([0-9a-f]+)\t([0-9a-f]*)\t(.*?)\s*\n/igc) + { + my $name = $3; + my $s = hex($1); + last if $s >= 0x10000; + my $e = length($2) ? hex($2) : $s; + for (my $i = $s; $i <= $e; $i++) + { + $uname[$i] = $name; +# print sprintf("U%04X $name\n",$i); + } + } +} + sub output_ucm_page { - my ($fh,$a,$t,$pre) = @_; + my ($cmap,$a,$t,$pre) = @_; # warn sprintf("Page %x\n",$pre); foreach my $b (sort keys %$t) { @@ -553,7 +565,7 @@ sub output_ucm_page my $u = ord($s); if ($n != $a && $n != $t) { - output_ucm_page($fh,$a,$n,(($pre|($u &0x3F)) << 6)&0xFFFF); + output_ucm_page($cmap,$a,$n,(($pre|($u &0x3F)) << 6)&0xFFFF); } elsif (length($out)) { @@ -561,12 +573,13 @@ sub output_ucm_page { $u = $pre|($u &0x3f); } - printf $fh " ",$u; + my $s = sprintf " ",$u; foreach my $c (split(//,$out)) { - printf $fh "\\x%02X",ord($c); + $s .= sprintf "\\x%02X",ord($c); } - printf $fh " |%d\n",($fb ? 1 : 0); + $s .= sprintf " |%d # %s\n",($fb ? 1 : 0),$uname[$u]; + push(@$cmap,$s); } else { @@ -577,9 +590,10 @@ sub output_ucm_page sub output_ucm { - my ($fh,$name,$a,$rep,$min_el,$max_el) = @_; + my ($fh,$name,$h,$rep,$min_el,$max_el) = @_; print $fh "# Written $perforce\n# $0 @orig_ARGV\n" unless $opt{'q'}; print $fh " \"$name\"\n"; + char_names(); if (defined $min_el) { print $fh " $min_el\n"; @@ -597,8 +611,13 @@ sub output_ucm } print $fh "\n"; } + my @cmap; + output_ucm_page(\@cmap,$h,$h,0); print $fh "#\nCHARMAP\n"; - output_ucm_page($fh,$a,$a,0); + foreach my $line (sort { substr($a,8) cmp substr($b,8) } @cmap) + { + print $fh $line; + } print $fh "END CHARMAP\n"; }