X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FEncode%2Fbin%2Fenc2xs;h=57d256ecd727be04c817ac63941b583db2018a9b;hb=6e8dae779caa61392088d43ba93888dc57f1df7e;hp=f2287d467539ae27df0f18e54ae351e251a327c9;hpb=621b0f8dc4f329f12773e6204726adf3d394c032;p=p5sagit%2Fp5-mst-13.2.git diff --git a/ext/Encode/bin/enc2xs b/ext/Encode/bin/enc2xs index f2287d4..57d256e 100644 --- a/ext/Encode/bin/enc2xs +++ b/ext/Encode/bin/enc2xs @@ -6,9 +6,11 @@ BEGIN { require Config; import Config; } use strict; +use warnings; use Getopt::Std; +use Config; my @orig_ARGV = @ARGV; -our $VERSION = do { my @r = (q$Revision: 1.26 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # These may get re-ordered. # RAW is a do_now as inserted by &enter @@ -158,7 +160,7 @@ my $hname = $cname; my ($doC,$doEnc,$doUcm,$doPet); -if ($cname =~ /\.(c|xs)$/) +if ($cname =~ /\.(c|xs)$/i) # VMS may have upcased filenames with DECC$ARGV_PARSE_STYLE defined { $doC = 1; $dname =~ s/(\.[^\.]*)?$/.exh/; @@ -175,6 +177,7 @@ if ($cname =~ /\.(c|xs)$/) !!!!!!! DO NOT EDIT THIS FILE !!!!!!! This file was autogenerated by: $^X $0 @orig_ARGV + enc2xs VERSION $VERSION */ END } @@ -186,7 +189,7 @@ END print C "#include \n"; print C "#define U8 U8\n"; } - print C "#include \"encode.h\"\n"; + print C "#include \"encode.h\"\n\n"; } elsif ($cname =~ /\.enc$/) @@ -204,6 +207,9 @@ elsif ($cname =~ /\.pet$/) my %encoding; my %strings; +my $string_acc; +my %strings_in_acc; + my $saved = 0; my $subsave = 0; my $strings = 0; @@ -250,10 +256,23 @@ if ($doC) foreach my $name (sort cmp_name keys %encoding) { my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}}; - output(\*C,$name.'_utf8',$e2u); - output(\*C,'utf8_'.$name,$u2e); + process($name.'_utf8',$e2u); + addstrings(\*C,$e2u); + + process('utf8_'.$name,$u2e); + addstrings(\*C,$u2e); + } + outbigstring(\*C,"enctable"); + foreach my $name (sort cmp_name keys %encoding) + { + my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}}; + outtable(\*C,$e2u, "enctable"); + outtable(\*C,$u2e, "enctable"); + # push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep)); } + my $cpp = ($Config{d_cplusplus} || '') eq 'define'; + my $ext_c = $cpp ? 'extern "C" ' : ""; foreach my $enc (sort cmp_name keys %encoding) { # my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}}; @@ -261,11 +280,19 @@ if ($doC) #my @info = ($e2u->{Cname},$u2e->{Cname},$rsym,length($rep),$min_el,$max_el); my $replen = 0; $replen++ while($rep =~ /\G\\x[0-9A-Fa-f]/g); - my @info = ($e2u->{Cname},$u2e->{Cname},qq((U8 *)"$rep"),$replen,$min_el,$max_el); my $sym = "${enc}_encoding"; $sym =~ s/\W+/_/g; - print C "encode_t $sym = \n"; - print C " {",join(',',@info,"{\"$enc\",(const char *)0}"),"};\n\n"; + my @info = ($e2u->{Cname},$u2e->{Cname},"${sym}_rep_character",$replen, + $min_el,$max_el); + print C "${ext_c}static const U8 ${sym}_rep_character[] = \"$rep\";\n"; + print C "${ext_c}static const char ${sym}_enc_name[] = \"$enc\";\n\n"; + print C "${ext_c}const encode_t $sym = \n"; + # This is to make null encoding work -- dankogai + for (my $i = (scalar @info) - 1; $i >= 0; --$i){ + $info[$i] ||= 1; + } + # end of null tweak -- dankogai + print C " {",join(',',@info,"{${sym}_enc_name,(const char *)0}"),"};\n\n"; } foreach my $enc (sort cmp_name keys %encoding) @@ -311,8 +338,8 @@ END close(D) or warn "Error closing '$dname': $!"; close(H) or warn "Error closing '$hname': $!"; - my $perc_saved = $strings/($strings + $saved) * 100; - my $perc_subsaved = $strings/($strings + $subsave) * 100; + my $perc_saved = $saved/($strings + $saved) * 100; + my $perc_subsaved = $subsave/($strings + $subsave) * 100; printf STDERR "%d bytes in string tables\n",$strings; printf STDERR "%d bytes (%.3g%%) saved spotting duplicates\n", $saved, $perc_saved if $saved; @@ -591,43 +618,6 @@ sub enter_fb0 { } } - -sub outstring -{ - my ($fh,$name,$s) = @_; - my $sym = $strings{$s}; - if ($sym) - { - $saved += length($s); - } - else - { - if ($opt{'O'}) { - foreach my $o (keys %strings) - { - next unless (my $i = index($o,$s)) >= 0; - $sym = $strings{$o}; - # gcc things that 0x0e+0x10 (anything with e+) starts to look like - # a hexadecimal floating point constant. Silly gcc. Only p - # introduces a floating point constant. Put the space in to stop it - # getting confused. - $sym .= sprintf(" +0x%02x",$i) if ($i); - $subsave += length($s); - return $strings{$s} = $sym; - } - } - $strings{$s} = $sym = $name; - $strings += length($s); - my $definition = sprintf "static const U8 %s[%d] = { ",$name,length($s); - # Maybe we should assert that these are all <256. - $definition .= join(',',unpack "C*",$s); - # We have a single long line. Split it at convenient commas. - $definition =~ s/(.{74,77},)/$1\n/g; - print $fh "$definition };\n\n"; - } - return $sym; -} - sub process { my ($name,$a) = @_; @@ -688,7 +678,8 @@ sub process $a->{'Entries'} = \@ent; } -sub outtable + +sub addstrings { my ($fh,$a) = @_; my $name = $a->{'Cname'}; @@ -696,29 +687,116 @@ sub outtable foreach my $b (@{$a->{'Entries'}}) { next unless $b->[AGG_OUT_LEN]; - my $s = $b->[AGG_MIN_IN]; - my $e = $b->[AGG_MAX_IN]; - outstring($fh,sprintf("%s__%02x_%02x",$name,$s,$e),$b->[AGG_OUT_BYTES]); + $strings{$b->[AGG_OUT_BYTES]} = undef; } if ($a->{'Forward'}) { - print $fh "\nstatic encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n"; + my $cpp = ($Config{d_cplusplus} || '') eq 'define'; + my $var = $^O eq 'MacOS' || $cpp ? 'extern' : 'static'; + my $const = $cpp ? '' : 'const'; + print $fh "$var $const encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n"; } + $a->{'DoneStrings'} = 1; + foreach my $b (@{$a->{'Entries'}}) + { + my ($s,$e,$out,$t,$end,$l) = @$b; + addstrings($fh,$t) unless $t->{'DoneStrings'}; + } +} + +sub outbigstring +{ + my ($fh,$name) = @_; + + $string_acc = ''; + + # Make the big string in the string accumulator. Longest first, on the hope + # that this makes it more likely that we find the short strings later on. + # Not sure if it helps sorting strings of the same length lexcically. + foreach my $s (sort {length $b <=> length $a || $a cmp $b} keys %strings) { + my $index = index $string_acc, $s; + if ($index >= 0) { + $saved += length($s); + $strings_in_acc{$s} = $index; + } else { + OPTIMISER: { + if ($opt{'O'}) { + my $sublength = length $s; + while (--$sublength > 0) { + # progressively lop characters off the end, to see if the start of + # the new string overlaps the end of the accumulator. + if (substr ($string_acc, -$sublength) + eq substr ($s, 0, $sublength)) { + $subsave += $sublength; + $strings_in_acc{$s} = length ($string_acc) - $sublength; + # append the last bit on the end. + $string_acc .= substr ($s, $sublength); + last OPTIMISER; + } + # or if the end of the new string overlaps the start of the + # accumulator + next unless substr ($string_acc, 0, $sublength) + eq substr ($s, -$sublength); + # well, the last $sublength characters of the accumulator match. + # so as we're prepending to the accumulator, need to shift all our + # existing offsets forwards + $_ += $sublength foreach values %strings_in_acc; + $subsave += $sublength; + $strings_in_acc{$s} = 0; + # append the first bit on the start. + $string_acc = substr ($s, 0, -$sublength) . $string_acc; + last OPTIMISER; + } + } + # Optimiser (if it ran) found nothing, so just going have to tack the + # whole thing on the end. + $strings_in_acc{$s} = length $string_acc; + $string_acc .= $s; + }; + } + } + + $strings = length $string_acc; + my $cpp = ($Config{d_cplusplus} || '') eq 'define'; + my $var = $cpp ? '' : 'static'; + my $definition = "\n$var const U8 $name\[$strings] = { " . + join(',',unpack "C*",$string_acc); + # We have a single long line. Split it at convenient commas. + print $fh $1, "\n" while $definition =~ /\G(.{74,77},)/gcs; + print $fh substr ($definition, pos $definition), " };\n"; +} + +sub findstring { + my ($name,$s) = @_; + my $offset = $strings_in_acc{$s}; + die "Can't find string " . join (',',unpack "C*",$s) . " in accumulator" + unless defined $offset; + "$name + $offset"; +} + +sub outtable +{ + my ($fh,$a,$bigname) = @_; + my $name = $a->{'Cname'}; $a->{'Done'} = 1; foreach my $b (@{$a->{'Entries'}}) { my ($s,$e,$out,$t,$end,$l) = @$b; - outtable($fh,$t) unless $t->{'Done'}; + outtable($fh,$t,$bigname) unless $t->{'Done'}; } - print $fh "\nstatic encpage_t $name\[",scalar(@{$a->{'Entries'}}),"] = {\n"; + my $cpp = ($Config{d_cplusplus} || '') eq 'define'; + my $var = $cpp ? '' : 'static'; + my $const = $cpp ? '' : 'const'; + print $fh "\n$var $const encpage_t $name\[", + scalar(@{$a->{'Entries'}}), "] = {\n"; foreach my $b (@{$a->{'Entries'}}) { my ($sc,$ec,$out,$t,$end,$l,$fb) = @$b; - $end |= 0x80 if $fb; + # $end |= 0x80 if $fb; # what the heck was on your mind, Nick? -- Dan print $fh "{"; if ($l) { - printf $fh outstring($fh,'',$out); + printf $fh findstring($bigname,$out); } else { @@ -730,14 +808,6 @@ sub outtable print $fh "};\n"; } -sub output -{ - my ($fh,$name,$a) = @_; - process($name,$a); - # Sub-tables - outtable($fh,$a); -} - sub output_enc { my ($fh,$name,$a) = @_; @@ -851,27 +921,28 @@ use vars qw( ); sub find_e2x{ - eval { require File::Find }; + eval { require File::Find; }; my (@inc, %e2x_dir); for my $inc (@INC){ - push @inc, $inc unless $inc eq '.'; #skip current dir + push @inc, $inc unless $inc eq '.'; #skip current dir } File::Find::find( - sub { - my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, - $atime,$mtime,$ctime,$blksize,$blocks) - = lstat($_) or return; - -f _ or return; - if (/^.*\.e2x$/o){ - $e2x_dir{$File::Find::dir} ||= $mtime; - } - return; - }, @inc); + sub { + my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, + $atime,$mtime,$ctime,$blksize,$blocks) + = lstat($_) or return; + -f _ or return; + if (/^.*\.e2x$/o){ + no warnings 'once'; + $e2x_dir{$File::Find::dir} ||= $mtime; + } + return; + }, @inc); warn join("\n", keys %e2x_dir), "\n"; for my $d (sort {$e2x_dir{$a} <=> $e2x_dir{$b}} keys %e2x_dir){ - $_E2X = $d; - # warn "$_E2X => ", scalar localtime($e2x_dir{$d}); - return $_E2X; + $_E2X = $d; + # warn "$_E2X => ", scalar localtime($e2x_dir{$d}); + return $_E2X; } } @@ -897,57 +968,64 @@ sub make_makefile_pl } use vars qw( - $_ModLines - $_LocalVer - ); + $_ModLines + $_LocalVer + ); -sub make_configlocal_pm -{ +sub make_configlocal_pm { eval { require Encode; }; $@ and die "Unable to require Encode: $@\n"; eval { require File::Spec; }; + # our used for variable expanstion - my %in_core = map {$_=>1}('ascii','iso-8859-1','utf8'); + my %in_core = map { $_ => 1 } ( + 'ascii', 'iso-8859-1', 'utf8', + 'ascii-ctrl', 'null', 'utf-8-strict' + ); my %LocalMod = (); - for my $d (@INC){ - my $inc = File::Spec->catfile($d, "Encode"); - -d $inc or next; - opendir my $dh, $inc or die "$inc:$!"; - warn "Checking $inc...\n"; - for my $f (grep /\.pm$/o, readdir($dh)){ - -f File::Spec->catfile($inc, "$f") or next; - $INC{"Encode/$f"} and next; - warn "require Encode/$f;\n"; - eval { require "Encode/$f"; }; - $@ and die "Can't require Encode/$f: $@\n"; - for my $enc (Encode->encodings()){ - $in_core{$enc} and next; - $Encode::Config::ExtModule{$enc} and next; - my $mod = "Encode/$f"; - $mod =~ s/\.pm$//o; $mod =~ s,/,::,og; - $LocalMod{$enc} ||= $mod; - } + # check @enc; + use File::Find (); + my $wanted = sub{ + -f $_ or return; + $File::Find::name =~ /\A\./ and return; + $File::Find::name =~ /\.pm\z/ or return; + $File::Find::name =~ m/\bEncode\b/ or return; + my $mod = $File::Find::name; + $mod =~ s/.*\bEncode\b/Encode/o; + $mod =~ s/\.pm\z//o; + $mod =~ s,/,::,og; + warn qq{ require $mod;\n}; + eval qq{ require $mod; }; + $@ and die "Can't require $mod: $@\n"; + for my $enc ( Encode->encodings() ) { + no warnings; + $in_core{$enc} and next; + $Encode::Config::ExtModule{$enc} and next; + $LocalMod{$enc} ||= $mod; } - } + }; + File::Find::find({wanted => $wanted}, @INC); $_ModLines = ""; - for my $enc (sort keys %LocalMod){ - $_ModLines .= - qq(\$Encode::ExtModule{'$enc'} =\t"$LocalMod{$enc}";\n); + for my $enc ( sort keys %LocalMod ) { + $_ModLines .= + qq(\$Encode::ExtModule{'$enc'} = "$LocalMod{$enc}";\n); } warn $_ModLines; $_LocalVer = _mkversion(); - $_E2X = find_e2x(); - $_Inc = $INC{"Encode.pm"}; $_Inc =~ s/\.pm$//o; - _print_expand(File::Spec->catfile($_E2X,"ConfigLocal_PM.e2x"), - File::Spec->catfile($_Inc,"ConfigLocal.pm"), - 1); + $_E2X = find_e2x(); + $_Inc = $INC{"Encode.pm"}; + $_Inc =~ s/\.pm$//o; + _print_expand( File::Spec->catfile( $_E2X, "ConfigLocal_PM.e2x" ), + File::Spec->catfile( $_Inc, "ConfigLocal.pm" ), 1 ); exit; } sub _mkversion{ - my ($ss,$mm,$hh,$dd,$mo,$yyyy) = localtime(); - $yyyy += 1900, $mo +=1; - return sprintf("v%04d.%04d.%04d", $yyyy, $mo*100+$dd, $hh*100+$mm); + # v-string is now depreciated; use time() instead; + #my ($ss,$mm,$hh,$dd,$mo,$yyyy) = localtime(); + #$yyyy += 1900, $mo +=1; + #return sprintf("v%04d.%04d.%04d", $yyyy, $mo*100+$dd, $hh*100+$mm); + return time(); } sub _print_expand{ @@ -956,22 +1034,22 @@ sub _print_expand{ File::Basename->import(); my ($src, $dst, $clobber) = @_; if (!$clobber and -e $dst){ - warn "$dst exists. skipping\n"; - return; + warn "$dst exists. skipping\n"; + return; } warn "Generating $dst...\n"; open my $in, $src or die "$src : $!"; if ((my $d = dirname($dst)) ne '.'){ - -d $d or mkdir $d, 0755 or die "mkdir $d : $!"; + -d $d or mkdir $d, 0755 or die "mkdir $d : $!"; } open my $out, ">$dst" or die "$!"; my $asis = 0; while (<$in>){ - if (/^#### END_OF_HEADER/){ - $asis = 1; next; - } - s/(\$_[A-Z][A-Za-z0-9]+)_/$1/gee unless $asis; - print $out $_; + if (/^#### END_OF_HEADER/){ + $asis = 1; next; + } + s/(\$_[A-Z][A-Za-z0-9]+)_/$1/gee unless $asis; + print $out $_; } } __END__ @@ -1055,7 +1133,7 @@ the pod and to add more tests. Now issue a command all Perl Mongers love: - $ perl5.7.3 Makefile.PL + $ perl Makefile.PL Writing Makefile for Encode::My =item 4. @@ -1069,8 +1147,8 @@ Now all you have to do is make. Reading myascii (myascii) Writing compiled form 128 bytes in string tables - 384 bytes (25%) saved spotting duplicates - 1 bytes (99.2%) saved using substrings + 384 bytes (75%) saved spotting duplicates + 1 bytes (0.775%) saved using substrings .... chmod 644 blib/arch/auto/Encode/My/My.bs $ @@ -1261,7 +1339,9 @@ down, here is what happens. The Encode package comes with F, a crude but sufficient utility to check the integrity of a UCM file. Check under the Encode/bin directory for this. - + +When in doubt, you can use F, yet another utility under +Encode/bin directory. =head1 Bookmarks