Test for #14795.
[p5sagit/p5-mst-13.2.git] / ext / Encode / compile
CommitLineData
017e2add 1#!../../perl -w
508a30f1 2BEGIN {
b0bf7e4c 3 unshift @INC, qw(../../lib ../../../lib);
4 $ENV{PATH} .= ';../..;../../..' if $^O eq 'MSWin32';
508a30f1 5}
017e2add 6use strict;
afdae191 7use Getopt::Std;
8my @orig_ARGV = @ARGV;
dcda1f94 9
eb560316 10# These may get re-ordered.
11# RAW is a do_now as inserted by &enter
12# AGG is an aggreagated do_now, as built up by &process
13use constant {
14 RAW_NEXT => 0,
15 RAW_IN_LEN => 1,
16 RAW_OUT_BYTES => 2,
17 RAW_FALLBACK => 3,
18
19 AGG_MIN_IN => 0,
20 AGG_MAX_IN => 1,
21 AGG_OUT_BYTES => 2,
22 AGG_NEXT => 3,
23 AGG_IN_LEN => 4,
24 AGG_OUT_LEN => 5,
25 AGG_FALLBACK => 6,
26};
27# (See the algorithm in encengine.c - we're building structures for it)
28
29# There are two sorts of structures.
30# "do_now" (an array, two variants of what needs storing) is whatever we need
31# to do now we've read an input byte.
32# It's housed in a "do_next" (which is how we got to it), and in turn points
33# to a "do_next" which contains all the "do_now"s for the next input byte.
34
35# There will be a "do_next" which is the start state.
36# For a single byte encoding it's the only "do_next" - each "do_now" points
37# back to it, and each "do_now" will cause bytes. There is no state.
38
39# For a multi-byte encoding where all characters in the input are the same
40# length, then there will be a tree of "do_now"->"do_next"->"do_now"
41# branching out from the start state, one step for each input byte.
42# The leaf "do_now"s will all be at the same distance from the start state,
43# only the leaf "do_now"s cause output bytes, and they in turn point back to
44# the start state.
45
46# For an encoding where there are varaible length input byte sequences, you
47# will encounter a leaf "do_now" sooner for the shorter input sequences, but
48# as before the leaves will point back to the start state.
49
50# The system will cope with escape encodings (imagine them as a mostly
51# self-contained tree for each escape state, and cross links between trees
52# at the state-switching characters) but so far no input format defines these.
53
54# The system will also cope with having output "leaves" in the middle of
55# the bifurcating branches, not just at the extremities, but again no
56# input format does this yet.
57
58# There are two variants of the "do_now" structure. The first, smaller variant
59# is generated by &enter as the input file is read. There is one structure
60# for each input byte. Say we are mapping a single byte encoding to a
61# single byte encoding, with "ABCD" going "abcd". There will be
62# 4 "do_now"s, {"A" => [...,"a",...], "B" => [...,"b",...], "C"=>..., "D"=>...}
63
64# &process then walks the tree, building aggregate "do_now" structres for
65# adjacent bytes where possible. The aggregate is for a contiguous range of
66# bytes which each produce the same length of output, each move to the
67# same next state, and each have the same fallback flag.
68# So our 4 RAW "do_now"s above become replaced by a single structure
69# containing:
70# ["A", "D", "abcd", 1, ...]
71# ie, for an input byte $_ in "A".."D", output 1 byte, found as
72# substr ("abcd", (ord $_ - ord "A") * 1, 1)
73# which maps very nicely into pointer arithmetic in C for encengine.c
74
017e2add 75sub encode_U
76{
9b37254d 77 # UTF-8 encode long hand - only covers part of perl's range
7ba7f87b 78 ## my $uv = shift;
1b026014 79 # chr() works in native space so convert value from table
80 # into that space before using chr().
7ba7f87b 81 my $ch = chr(utf8::unicode_to_native($_[0]));
1b026014 82 # Now get core perl to encode that the way it likes.
83 utf8::encode($ch);
84 return $ch;
017e2add 85}
86
87sub encode_S
88{
14a8264b 89 # encode single byte
7ba7f87b 90 ## my ($ch,$page) = @_; return chr($ch);
91 return chr $_[0];
017e2add 92}
93
94sub encode_D
95{
14a8264b 96 # encode double byte MS byte first
7ba7f87b 97 ## my ($ch,$page) = @_; return chr($page).chr($ch);
98 return chr ($_[1]) . chr $_[0];
017e2add 99}
100
101sub encode_M
102{
14a8264b 103 # encode Multi-byte - single for 0..255 otherwise double
7ba7f87b 104 ## my ($ch,$page) = @_;
105 ## return &encode_D if $page;
106 ## return &encode_S;
107 return chr ($_[1]) . chr $_[0] if $_[1];
108 return chr $_[0];
017e2add 109}
110
7ba7f87b 111my %encode_types = (U => \&encode_U,
112 S => \&encode_S,
113 D => \&encode_D,
114 M => \&encode_M,
115 );
116
14a8264b 117# Win32 does not expand globs on command line
252a8565 118eval "\@ARGV = map(glob(\$_),\@ARGV)" if ($^O eq 'MSWin32');
18b7339f 119
afdae191 120my %opt;
4cfc977c 121getopts('qOo:f:n:',\%opt);
afdae191 122my $cname = (exists $opt{'o'}) ? $opt{'o'} : shift(@ARGV);
2f2b4ff2 123chmod(0666,$cname) if -f $cname && !-w $cname;
017e2add 124open(C,">$cname") || die "Cannot open $cname:$!";
afdae191 125
126
2f2b4ff2 127my $dname = $cname;
023d8852 128$dname =~ s/(\.[^\.]*)?$/_def.h/;
e0c49a6b 129
dcda1f94 130my ($doC,$doEnc,$doUcm,$doPet);
e0c49a6b 131
132if ($cname =~ /\.(c|xs)$/)
133 {
134 $doC = 1;
135 chmod(0666,$dname) if -f $cname && !-w $dname;
136 open(D,">$dname") || die "Cannot open $dname:$!";
137 my $hname = $cname;
138 $hname =~ s/(\.[^\.]*)?$/.h/;
139 chmod(0666,$hname) if -f $cname && !-w $hname;
140 open(H,">$hname") || die "Cannot open $hname:$!";
141
142 foreach my $fh (\*C,\*D,\*H)
143 {
afdae191 144 print $fh <<"END" unless $opt{'q'};
14a8264b 145/*
146 !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
147 This file was autogenerated by:
023d8852 148 $^X $0 @orig_ARGV
14a8264b 149*/
150END
e0c49a6b 151 }
14a8264b 152
e0c49a6b 153 if ($cname =~ /(\w+)\.xs$/)
154 {
155 print C "#include <EXTERN.h>\n";
156 print C "#include <perl.h>\n";
157 print C "#include <XSUB.h>\n";
158 print C "#define U8 U8\n";
159 }
160 print C "#include \"encode.h\"\n";
d811239c 161
e0c49a6b 162 }
163elsif ($cname =~ /\.enc$/)
164 {
165 $doEnc = 1;
166 }
167elsif ($cname =~ /\.ucm$/)
2f2b4ff2 168 {
e0c49a6b 169 $doUcm = 1;
2f2b4ff2 170 }
dcda1f94 171elsif ($cname =~ /\.pet$/)
172 {
173 $doPet = 1;
174 }
017e2add 175
afdae191 176my @encfiles;
3865e8e1 177if (exists $opt{'f'})
afdae191 178 {
179 # -F is followed by name of file containing list of filenames
3865e8e1 180 my $flist = $opt{'f'};
afdae191 181 open(FLIST,$flist) || die "Cannot open $flist:$!";
182 chomp(@encfiles = <FLIST>);
183 close(FLIST);
184 }
185else
186 {
187 @encfiles = @ARGV;
188 }
c6fdb90a 189
017e2add 190my %encoding;
191my %strings;
b1e7e56f 192my $saved = 0;
193my $subsave = 0;
194my $strings = 0;
017e2add 195
2f2b4ff2 196sub cmp_name
197{
198 if ($a =~ /^.*-(\d+)/)
199 {
200 my $an = $1;
201 if ($b =~ /^.*-(\d+)/)
202 {
203 my $r = $an <=> $1;
204 return $r if $r;
205 }
206 }
207 return $a cmp $b;
208}
209
dcda1f94 210
c6fdb90a 211foreach my $enc (sort cmp_name @encfiles)
017e2add 212 {
9b37254d 213 my ($name,$sfx) = $enc =~ /^.*?([\w-]+)\.(enc|ucm)$/;
51ef4e11 214 $name = $opt{'n'} if exists $opt{'n'};
017e2add 215 if (open(E,$enc))
216 {
9b37254d 217 if ($sfx eq 'enc')
218 {
dcda1f94 219 compile_enc(\*E,lc($name));
9b37254d 220 }
221 else
222 {
dcda1f94 223 compile_ucm(\*E,lc($name));
9b37254d 224 }
017e2add 225 }
226 else
227 {
228 warn "Cannot open $enc for $name:$!";
229 }
230 }
231
e0c49a6b 232if ($doC)
2f2b4ff2 233 {
b1e7e56f 234 print STDERR "Writing compiled form\n";
dcda1f94 235 foreach my $name (sort cmp_name keys %encoding)
236 {
237 my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
238 output(\*C,$name.'_utf8',$e2u);
239 output(\*C,'utf8_'.$name,$u2e);
240 push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep));
241 }
e0c49a6b 242 foreach my $enc (sort cmp_name keys %encoding)
243 {
dcda1f94 244 my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}};
245 my @info = ($e2u->{Cname},$u2e->{Cname},$rsym,length($rep),$min_el,$max_el);
e0c49a6b 246 my $sym = "${enc}_encoding";
247 $sym =~ s/\W+/_/g;
248 print C "encode_t $sym = \n";
dcda1f94 249 print C " {",join(',',@info,"{\"$enc\",(const char *)0}"),"};\n\n";
e0c49a6b 250 }
2f2b4ff2 251
e0c49a6b 252 foreach my $enc (sort cmp_name keys %encoding)
253 {
254 my $sym = "${enc}_encoding";
255 $sym =~ s/\W+/_/g;
256 print H "extern encode_t $sym;\n";
d811239c 257 print D " Encode_XSEncoding(aTHX_ &$sym);\n";
e0c49a6b 258 }
017e2add 259
e0c49a6b 260 if ($cname =~ /(\w+)\.xs$/)
261 {
262 my $mod = $1;
d811239c 263 print C <<'END';
264
023d8852 265static void
d811239c 266Encode_XSEncoding(pTHX_ encode_t *enc)
267{
268 dSP;
269 HV *stash = gv_stashpv("Encode::XS", TRUE);
270 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
271 int i = 0;
272 PUSHMARK(sp);
273 XPUSHs(sv);
274 while (enc->name[i])
275 {
276 const char *name = enc->name[i++];
277 XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
278 }
279 PUTBACK;
280 call_pv("Encode::define_encoding",G_DISCARD);
281 SvREFCNT_dec(sv);
282}
283
284END
285
e0c49a6b 286 print C "\nMODULE = Encode::$mod\tPACKAGE = Encode::$mod\n\n";
287 print C "BOOT:\n{\n";
288 print C "#include \"$dname\"\n";
289 print C "}\n";
290 }
291 close(D);
292 close(H);
b1e7e56f 293 printf STDERR "%d bytes in string tables\n",$strings;
294 printf STDERR "%d bytes (%.3g%%) saved spotting duplicates\n",$saved,100*$saved/$strings if $saved;
295 printf STDERR "%d bytes (%.3g%%) saved using substrings\n",$subsave,100*$subsave/$strings if $subsave;
2f2b4ff2 296 }
dcda1f94 297elsif ($doEnc)
298 {
299 foreach my $name (sort cmp_name keys %encoding)
300 {
301 my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
302 output_enc(\*C,$name,$e2u);
303 }
304 }
305elsif ($doUcm)
306 {
307 foreach my $name (sort cmp_name keys %encoding)
308 {
309 my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
310 output_ucm(\*C,$name,$u2e,$erep,$min_el,$max_el);
311 }
312 }
313
017e2add 314close(C);
315
9b37254d 316
b1e7e56f 317
9b37254d 318sub compile_ucm
319{
dcda1f94 320 my ($fh,$name) = @_;
9b37254d 321 my $e2u = {};
322 my $u2e = {};
323 my $cs;
324 my %attr;
325 while (<$fh>)
326 {
327 s/#.*$//;
328 last if /^\s*CHARMAP\s*$/i;
7ba7f87b 329 if (/^\s*<(\w+)>\s+"?([^"]*)"?\s*$/i) # " # Grrr
9b37254d 330 {
331 $attr{$1} = $2;
332 }
333 }
334 if (!defined($cs = $attr{'code_set_name'}))
335 {
336 warn "No <code_set_name> in $name\n";
337 }
338 else
339 {
51ef4e11 340 $name = $cs unless exists $opt{'n'};
9b37254d 341 }
342 my $erep;
343 my $urep;
afdae191 344 my $max_el;
345 my $min_el;
9b37254d 346 if (exists $attr{'subchar'})
347 {
afdae191 348 my @byte;
349 $attr{'subchar'} =~ /^\s*/cg;
350 push(@byte,$1) while $attr{'subchar'} =~ /\G\\x([0-9a-f]+)/icg;
351 $erep = join('',map(chr(hex($_)),@byte));
9b37254d 352 }
b1e7e56f 353 print "Reading $name ($cs)\n";
9b37254d 354 my $nfb = 0;
355 my $hfb = 0;
356 while (<$fh>)
357 {
358 s/#.*$//;
359 last if /^\s*END\s+CHARMAP\s*$/i;
360 next if /^\s*$/;
afdae191 361 my ($u,@byte);
362 my $fb = '';
363 $u = $1 if (/^<U([0-9a-f]+)>\s+/igc);
364 push(@byte,$1) while /\G\\x([0-9a-f]+)/igc;
365 $fb = $1 if /\G\s*(\|[0-3])/gc;
366 # warn "$_: $u @byte | $fb\n";
367 die "Bad line:$_" unless /\G\s*(#.*)?$/gc;
9b37254d 368 if (defined($u))
369 {
370 my $uch = encode_U(hex($u));
e0c49a6b 371 my $ech = join('',map(chr(hex($_)),@byte));
afdae191 372 my $el = length($ech);
373 $max_el = $el if (!defined($max_el) || $el > $max_el);
374 $min_el = $el if (!defined($min_el) || $el < $min_el);
9b37254d 375 if (length($fb))
376 {
377 $fb = substr($fb,1);
378 $hfb++;
379 }
380 else
381 {
382 $nfb++;
383 $fb = '0';
384 }
385 # $fb is fallback flag
386 # 0 - round trip safe
387 # 1 - fallback for unicode -> enc
388 # 2 - skip sub-char mapping
389 # 3 - fallback enc -> unicode
390 enter($u2e,$uch,$ech,$u2e,$fb+0) if ($fb =~ /[01]/);
391 enter($e2u,$ech,$uch,$e2u,$fb+0) if ($fb =~ /[03]/);
392 }
393 else
394 {
395 warn $_;
396 }
9b37254d 397 }
398 if ($nfb && $hfb)
399 {
400 die "$nfb entries without fallback, $hfb entries with\n";
401 }
dcda1f94 402 $encoding{$name} = [$e2u,$u2e,$erep,$min_el,$max_el];
9b37254d 403}
404
14a8264b 405sub compile_enc
017e2add 406{
dcda1f94 407 my ($fh,$name) = @_;
017e2add 408 my $e2u = {};
409 my $u2e = {};
410
411 my $type;
412 while ($type = <$fh>)
413 {
414 last if $type !~ /^\s*#/;
415 }
416 chomp($type);
417 return if $type eq 'E';
418 my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>));
14a8264b 419 warn "$type encoded $name\n";
017e2add 420 my $rep = '';
7ba7f87b 421 # Save a defined test by setting these to defined values.
422 my $min_el = ~0; # A very big integer
423 my $max_el = 0; # Anything must be longer than 0
017e2add 424 {
425 my $v = hex($def);
7ba7f87b 426 $rep = &{$encode_types{$type}}($v & 0xFF, ($v >> 8) & 0xffe);
017e2add 427 }
e03ac092 428 my %seen;
017e2add 429 while ($pages--)
430 {
431 my $line = <$fh>;
432 chomp($line);
433 my $page = hex($line);
434 my $ch = 0;
7ba7f87b 435 for (0..15)
017e2add 436 {
437 my $line = <$fh>;
7ba7f87b 438 die "Line should be exactly 65 characters long including newline"
439 unless length ($line) == 65;
440 # Split line into groups of 4 hex digits, convert groups to ints
441 for my $val (map {hex $_} $line =~ /(....)/g)
017e2add 442 {
e03ac092 443 next if $val == 0xFFFD;
7ba7f87b 444 my $ech = &{$encode_types{$type}}($ch,$page);
017e2add 445 if ($val || (!$ch && !$page))
446 {
afdae191 447 my $el = length($ech);
7ba7f87b 448 $max_el = $el if $el > $max_el;
449 $min_el = $el if $el < $min_el;
017e2add 450 my $uch = encode_U($val);
e03ac092 451 if (exists $seen{$uch})
452 {
453 warn sprintf("U%04X is %02X%02X and %02X%02X\n",
454 $val,$page,$ch,@{$seen{$uch}});
455 }
456 else
457 {
458 $seen{$uch} = [$page,$ch];
459 }
eb560316 460 # Passing 2 extra args each time is 3.6% slower!
461 # Even with having to add $fallback ||= 0 in &process
462 enter($e2u,$ech,$uch);
463 enter($u2e,$uch,$ech);
017e2add 464 }
465 else
466 {
467 # No character at this position
468 # enter($e2u,$ech,undef,$e2u);
469 }
470 $ch++;
471 }
472 }
473 }
7ba7f87b 474 die "\$min_el=$min_el, \$max_el=$max_el - seems we read no lines"
475 if $min_el > $max_el;
dcda1f94 476 $encoding{$name} = [$e2u,$u2e,$rep,$min_el,$max_el];
017e2add 477}
478
eb560316 479# my ($a,$s,$d,$t,$fb) = @_;
480sub enter {
481 my ($current,$inbytes,$outbytes,$next,$fallback) = @_;
482 # state we shift to after this (multibyte) input character defaults to same
483 # as current state.
484 $next ||= $current;
485 # Making sure it is defined seems to be faster than {no warnings;} in
486 # &process, or passing it in as 0 explicity.
487 $fallback ||= 0;
488
489 # Start at the beginning and work forwards through the string to zero.
490 # effectively we are removing 1 character from the front each time
491 # but we don't actually edit the string. [this alone seems to be 14% speedup]
492 # Hence -$pos is the length of the remaining string.
493 my $pos = -length $inbytes;
494 while (1) {
495 my $byte = substr $inbytes, $pos, 1;
496 # RAW_NEXT => 0,
497 # RAW_IN_LEN => 1,
498 # RAW_OUT_BYTES => 2,
499 # RAW_FALLBACK => 3,
500 # to unicode an array would seem to be better, because the pages are dense.
501 # from unicode can be very sparse, favouring a hash.
502 # hash using the bytes (all length 1) as keys rather than ord value,
503 # as it's easier to sort these in &process.
504
505 # It's faster to always add $fallback even if it's undef, rather than
506 # choosing between 3 and 4 element array. (hence why we set it defined
507 # above)
508 my $do_now = $current->{Raw}{$byte} ||= [{},-$pos,'',$fallback];
509 # When $pos was -1 we were at the last input character.
510 unless (++$pos) {
511 $do_now->[RAW_OUT_BYTES] = $outbytes;
512 $do_now->[RAW_NEXT] = $next;
513 return;
514 }
515 # Tail recursion. The intermdiate state may not have a name yet.
516 $current = $do_now->[RAW_NEXT];
017e2add 517 }
518}
519
b1e7e56f 520
521
017e2add 522sub outstring
523{
524 my ($fh,$name,$s) = @_;
525 my $sym = $strings{$s};
b1e7e56f 526 if ($sym)
527 {
528 $saved += length($s);
529 }
530 else
017e2add 531 {
4cfc977c 532 if ($opt{'O'}) {
533 foreach my $o (keys %strings)
534 {
535 my $i = index($o,$s);
536 if ($i >= 0)
537 {
538 $sym = $strings{$o};
539 $sym .= sprintf("+0x%02x",$i) if ($i);
540 $subsave += length($s);
541 $strings{$s} = $sym;
542 return $sym;
543 }
544 }
545 }
017e2add 546 $strings{$s} = $sym = $name;
b1e7e56f 547 $strings += length($s);
de745a2e 548 my $definition = sprintf "static const U8 %s[%d] = { ",$name,length($s);
549 # Maybe we should assert that these are all <256.
550 $definition .= join(',',unpack "C*",$s);
551 # We have a single long line. Split it at convenient commas.
552 $definition =~ s/(.{74,77},)/$1\n/g;
553 print $fh "$definition };\n\n";
017e2add 554 }
555 return $sym;
556}
557
14a8264b 558sub process
017e2add 559{
eb560316 560 my ($name,$a) = @_;
561 $name =~ s/\W+/_/g;
562 $a->{Cname} = $name;
563 my @raw = sort keys %{$a->{Raw}};
564 my ($l, $agg_max_in, $agg_next, $agg_in_len, $agg_out_len, $agg_fallback);
565 my @ent;
566 foreach my $key (@raw) {
567 # RAW_NEXT => 0,
568 # RAW_IN_LEN => 1,
569 # RAW_OUT_BYTES => 2,
570 # RAW_FALLBACK => 3,
571 my ($next, $in_len, $out_bytes, $fallback) = @{$a->{Raw}{$key}};
572 # Now we are converting from raw to aggregate, switch from 1 byte strings
573 # to numbers
574 my $b = ord $key;
575 if ($l &&
576 # If this == fails, we're going to reset $agg_max_in below anyway.
577 $b == ++$agg_max_in &&
578 # References in numeric context give the pointer as an int.
579 $agg_next == $next &&
580 $agg_in_len == $in_len &&
581 $agg_out_len == length $out_bytes &&
582 $agg_fallback == $fallback
583 # && length($l->[AGG_OUT_BYTES]) < 16
584 ) {
585 # my $i = ord($b)-ord($l->[AGG_MIN_IN]);
586 # we can aggregate this byte onto the end.
587 $l->[AGG_MAX_IN] = $b;
588 $l->[AGG_OUT_BYTES] .= $out_bytes;
589 } else {
590 # AGG_MIN_IN => 0,
591 # AGG_MAX_IN => 1,
592 # AGG_OUT_BYTES => 2,
593 # AGG_NEXT => 3,
594 # AGG_IN_LEN => 4,
595 # AGG_OUT_LEN => 5,
596 # AGG_FALLBACK => 6,
597 # Reset the last thing we saw, plus set 5 lexicals to save some derefs.
598 # (only gains .6% on euc-jp -- is it worth it?)
599 push @ent, $l = [$b, $agg_max_in = $b, $out_bytes, $agg_next = $next,
600 $agg_in_len = $in_len, $agg_out_len = length $out_bytes,
601 $agg_fallback = $fallback];
017e2add 602 }
eb560316 603 if (exists $next->{Cname}) {
604 $next->{'Forward'} = 1 if $next != $a;
605 } else {
606 process(sprintf("%s_%02x",$name,$b),$next);
017e2add 607 }
608 }
eb560316 609 # encengine.c rules say that last entry must be for 255
610 if (ord $raw[-1] < 255) {
611 push @ent, [1+ord $raw[-1], 255,undef,$a,0,0];
017e2add 612 }
eb560316 613 $a->{'Entries'} = \@ent;
14a8264b 614}
615
616sub outtable
617{
618 my ($fh,$a) = @_;
619 my $name = $a->{'Cname'};
017e2add 620 # String tables
14a8264b 621 foreach my $b (@{$a->{'Entries'}})
017e2add 622 {
eb560316 623 next unless $b->[AGG_OUT_LEN];
624 my $s = $b->[AGG_MIN_IN];
625 my $e = $b->[AGG_MAX_IN];
626 outstring($fh,sprintf("%s__%02x_%02x",$name,$s,$e),$b->[AGG_OUT_BYTES]);
017e2add 627 }
14a8264b 628 if ($a->{'Forward'})
629 {
630 print $fh "\nstatic encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n";
631 }
632 $a->{'Done'} = 1;
633 foreach my $b (@{$a->{'Entries'}})
634 {
eb560316 635 my ($s,$e,$out,$t,$end,$l) = @$b;
14a8264b 636 outtable($fh,$t) unless $t->{'Done'};
637 }
638 print $fh "\nstatic encpage_t $name\[",scalar(@{$a->{'Entries'}}),"] = {\n";
639 foreach my $b (@{$a->{'Entries'}})
017e2add 640 {
eb560316 641 my ($sc,$ec,$out,$t,$end,$l,$fb) = @$b;
e0c49a6b 642 $end |= 0x80 if $fb;
017e2add 643 print $fh "{";
644 if ($l)
645 {
646 printf $fh outstring($fh,'',$out);
647 }
648 else
649 {
650 print $fh "0";
651 }
652 print $fh ",",$t->{Cname};
2f2b4ff2 653 printf $fh ",0x%02x,0x%02x,$l,$end},\n",$sc,$ec;
017e2add 654 }
14a8264b 655 print $fh "};\n";
656}
657
658sub output
659{
660 my ($fh,$name,$a) = @_;
661 process($name,$a);
662 # Sub-tables
663 outtable($fh,$a);
017e2add 664}
665
e0c49a6b 666sub output_enc
667{
668 my ($fh,$name,$a) = @_;
eb560316 669 die "Changed - fix me for new structure";
e0c49a6b 670 foreach my $b (sort keys %$a)
671 {
672 my ($s,$e,$out,$t,$end,$l,$fb) = @{$a->{$b}};
673 }
674}
675
676sub decode_U
677{
678 my $s = shift;
e0c49a6b 679}
680
f39fb8cc 681my @uname;
682sub char_names
683{
9fb51cbc 684 my $s = do "unicore/Name.pl";
685 die "char_names: unicore/Name.pl: $!\n" unless defined $s;
f39fb8cc 686 pos($s) = 0;
687 while ($s =~ /\G([0-9a-f]+)\t([0-9a-f]*)\t(.*?)\s*\n/igc)
688 {
689 my $name = $3;
690 my $s = hex($1);
691 last if $s >= 0x10000;
692 my $e = length($2) ? hex($2) : $s;
693 for (my $i = $s; $i <= $e; $i++)
694 {
695 $uname[$i] = $name;
696# print sprintf("U%04X $name\n",$i);
697 }
698 }
699}
700
e0c49a6b 701sub output_ucm_page
702{
f39fb8cc 703 my ($cmap,$a,$t,$pre) = @_;
e0c49a6b 704 # warn sprintf("Page %x\n",$pre);
705 foreach my $b (sort keys %$t)
706 {
eb560316 707 die "Changed - fix me for new structure";
e0c49a6b 708 my ($s,$e,$out,$n,$end,$l,$fb) = @{$t->{$b}};
709 die "oops $s $e" unless $s eq $e;
710 my $u = ord($s);
711 if ($n != $a && $n != $t)
712 {
f39fb8cc 713 output_ucm_page($cmap,$a,$n,(($pre|($u &0x3F)) << 6)&0xFFFF);
e0c49a6b 714 }
715 elsif (length($out))
716 {
717 if ($pre)
718 {
719 $u = $pre|($u &0x3f);
720 }
f39fb8cc 721 my $s = sprintf "<U%04X> ",$u;
e0c49a6b 722 foreach my $c (split(//,$out))
723 {
f39fb8cc 724 $s .= sprintf "\\x%02X",ord($c);
e0c49a6b 725 }
f39fb8cc 726 $s .= sprintf " |%d # %s\n",($fb ? 1 : 0),$uname[$u];
727 push(@$cmap,$s);
e0c49a6b 728 }
729 else
730 {
731 warn join(',',@{$t->{$b}},$a,$t);
732 }
733 }
734}
735
736sub output_ucm
737{
f39fb8cc 738 my ($fh,$name,$h,$rep,$min_el,$max_el) = @_;
df1df145 739 print $fh "# $0 @orig_ARGV\n" unless $opt{'q'};
afdae191 740 print $fh "<code_set_name> \"$name\"\n";
f39fb8cc 741 char_names();
afdae191 742 if (defined $min_el)
743 {
744 print $fh "<mb_cur_min> $min_el\n";
745 }
746 if (defined $max_el)
747 {
748 print $fh "<mb_cur_max> $max_el\n";
749 }
750 if (defined $rep)
751 {
752 print $fh "<subchar> ";
753 foreach my $c (split(//,$rep))
754 {
755 printf $fh "\\x%02X",ord($c);
756 }
757 print $fh "\n";
758 }
f39fb8cc 759 my @cmap;
760 output_ucm_page(\@cmap,$h,$h,0);
afdae191 761 print $fh "#\nCHARMAP\n";
f39fb8cc 762 foreach my $line (sort { substr($a,8) cmp substr($b,8) } @cmap)
763 {
764 print $fh $line;
765 }
e0c49a6b 766 print $fh "END CHARMAP\n";
767}
017e2add 768