From: Nick Ing-Simmons Date: Sun, 21 Jan 2001 16:25:32 +0000 (+0000) Subject: Encode cleanup. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=afdae191a418d1363d2ddc3dcefffb0785c0a155;p=p5sagit%2Fp5-mst-13.2.git Encode cleanup. Add command line control over "compile" so that Makefile.PL needs can be combined with other uses. Use command line options in Makefile.PL. Fix multi-byte parsing in .ucm files. p4raw-id: //depot/perlio@8498 --- diff --git a/ext/Encode/Makefile.PL b/ext/Encode/Makefile.PL index c86dacf..6a0e1c5 100644 --- a/ext/Encode/Makefile.PL +++ b/ext/Encode/Makefile.PL @@ -88,7 +88,7 @@ sub postamble $continuator = ''; } } - $str .= "\n\t\$(PERL) compile \$\@ $table.fnm\n\n"; + $str .= "\n\t\$(PERL) compile -o \$\@ -F $table.fnm\n\n"; open (FILELIST, ">$table.fnm") || die "Could not open $table.fnm: $!"; foreach my $file (@{$tables{$table}}) diff --git a/ext/Encode/compile b/ext/Encode/compile index 03cea19..4c8ac52 100755 --- a/ext/Encode/compile +++ b/ext/Encode/compile @@ -1,6 +1,8 @@ #!../../perl -w BEGIN { @INC = '../../lib' }; use strict; +use Getopt::Std; +my @orig_ARGV = @ARGV; sub encode_U { @@ -45,9 +47,13 @@ sub encode_M # Win32 does not expand globs on command line eval "\@ARGV = map(glob(\$_),\@ARGV)" if ($^O eq 'MSWin32'); -my $cname = shift(@ARGV); +my %opt; +getopts('qo:F:',\%opt); +my $cname = (exists $opt{'o'}) ? $opt{'o'} : shift(@ARGV); chmod(0666,$cname) if -f $cname && !-w $cname; open(C,">$cname") || die "Cannot open $cname:$!"; + + my $dname = $cname; $dname =~ s/(\.[^\.]*)?$/.def/; @@ -65,11 +71,11 @@ if ($cname =~ /\.(c|xs)$/) foreach my $fh (\*C,\*D,\*H) { - print $fh <<"END"; + print $fh <<"END" unless $opt{'q'}; /* !!!!!!! DO NOT EDIT THIS FILE !!!!!!! This file was autogenerated by: - $^X $0 $cname @ARGV + $^X $0 $cname @orig_ARGV */ END } @@ -92,11 +98,19 @@ elsif ($cname =~ /\.ucm$/) $doUcm = 1; } -# 2nd argument is file containing list of filenames -my $flist = shift(@ARGV); -open(FLIST,$flist) || die "Cannot open $flist:$!"; -chomp(my @encfiles = ); -close(FLIST); +my @encfiles; +if (exists $opt{'F'}) + { + # -F is followed by name of file containing list of filenames + my $flist = $opt{'F'}; + open(FLIST,$flist) || die "Cannot open $flist:$!"; + chomp(@encfiles = ); + close(FLIST); + } +else + { + @encfiles = @ARGV; + } my %encoding; my %strings; @@ -193,10 +207,14 @@ sub compile_ucm } my $erep; my $urep; + my $max_el; + my $min_el; if (exists $attr{'subchar'}) { - my @byte = $attr{'subchar'} =~ /^\s*(?:\\x([0-9a-f]+))+\s*$/; - $erep = join('',map(hex($_),@byte)); + my @byte; + $attr{'subchar'} =~ /^\s*/cg; + push(@byte,$1) while $attr{'subchar'} =~ /\G\\x([0-9a-f]+)/icg; + $erep = join('',map(chr(hex($_)),@byte)); } warn "Scanning $name ($cs)\n"; my $nfb = 0; @@ -206,12 +224,20 @@ sub compile_ucm s/#.*$//; last if /^\s*END\s+CHARMAP\s*$/i; next if /^\s*$/; - my ($u,@byte) = /^\s+(?:\\x([0-9a-f]+))+\s*(\|[0-3]|)\s*$/i; - my $fb = pop(@byte); + my ($u,@byte); + my $fb = ''; + $u = $1 if (/^\s+/igc); + push(@byte,$1) while /\G\\x([0-9a-f]+)/igc; + $fb = $1 if /\G\s*(\|[0-3])/gc; + # warn "$_: $u @byte | $fb\n"; + die "Bad line:$_" unless /\G\s*(#.*)?$/gc; if (defined($u)) { my $uch = encode_U(hex($u)); my $ech = join('',map(chr(hex($_)),@byte)); + my $el = length($ech); + $max_el = $el if (!defined($max_el) || $el > $max_el); + $min_el = $el if (!defined($min_el) || $el < $min_el); if (length($fb)) { $fb = substr($fb,1); @@ -234,7 +260,6 @@ sub compile_ucm { warn $_; } - } if ($nfb && $hfb) { @@ -253,7 +278,7 @@ sub compile_ucm } elsif ($doUcm) { - output_ucm($ch,$name,$u2e); + output_ucm($ch,$name,$u2e,$erep,$min_el,$max_el); } } @@ -273,6 +298,8 @@ sub compile_enc my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); warn "$type encoded $name\n"; my $rep = ''; + my $min_el; + my $max_el; { my $v = hex($def); no strict 'refs'; @@ -294,6 +321,9 @@ sub compile_enc my $val = hex(substr($line,0,4,'')); if ($val || (!$ch && !$page)) { + my $el = length($ech); + $max_el = $el if (!defined($max_el) || $el > $max_el); + $min_el = $el if (!defined($min_el) || $el < $min_el); my $uch = encode_U($val); enter($e2u,$ech,$uch,$e2u,0); enter($u2e,$uch,$ech,$u2e,0); @@ -320,7 +350,7 @@ sub compile_enc } elsif ($doUcm) { - output_ucm($ch,$name,$u2e); + output_ucm($ch,$name,$u2e,$rep,$min_el,$max_el); } } @@ -489,10 +519,8 @@ sub output_enc sub decode_U { my $s = shift; - } - sub output_ucm_page { my ($fh,$a,$t,$pre) = @_; @@ -528,8 +556,27 @@ sub output_ucm_page sub output_ucm { - my ($fh,$name,$a) = @_; - print $fh "CHARMAP\n"; + my ($fh,$name,$a,$rep,$min_el,$max_el) = @_; + print $fh "# Written by $0 @orig_ARGV\n" unless $opt{'q'}; + print $fh " \"$name\"\n"; + if (defined $min_el) + { + print $fh " $min_el\n"; + } + if (defined $max_el) + { + print $fh " $max_el\n"; + } + if (defined $rep) + { + print $fh " "; + foreach my $c (split(//,$rep)) + { + printf $fh "\\x%02X",ord($c); + } + print $fh "\n"; + } + print $fh "#\nCHARMAP\n"; output_ucm_page($fh,$a,$a,0); print $fh "END CHARMAP\n"; }