use strict;
use warnings;
use Getopt::Std;
+use Config;
my @orig_ARGV = @ARGV;
-our $VERSION = do { my @r = (q$Revision: 1.31 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
# These may get re-ordered.
# RAW is a do_now as inserted by &enter
my ($doC,$doEnc,$doUcm,$doPet);
-if ($cname =~ /\.(c|xs)$/)
+if ($cname =~ /\.(c|xs)$/i) # VMS may have upcased filenames with DECC$ARGV_PARSE_STYLE defined
{
$doC = 1;
$dname =~ s/(\.[^\.]*)?$/.exh/;
!!!!!!! DO NOT EDIT THIS FILE !!!!!!!
This file was autogenerated by:
$^X $0 @orig_ARGV
+ enc2xs VERSION $VERSION
*/
END
}
# push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep));
}
+ my $cpp = ($Config{d_cplusplus} || '') eq 'define';
+ my $ext_c = $cpp ? 'extern "C" ' : "";
foreach my $enc (sort cmp_name keys %encoding)
{
# my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}};
#my @info = ($e2u->{Cname},$u2e->{Cname},$rsym,length($rep),$min_el,$max_el);
my $replen = 0;
$replen++ while($rep =~ /\G\\x[0-9A-Fa-f]/g);
- my @info = ($e2u->{Cname},$u2e->{Cname},qq((U8 *)"$rep"),$replen,$min_el,$max_el);
my $sym = "${enc}_encoding";
$sym =~ s/\W+/_/g;
- print C "encode_t $sym = \n";
+ my @info = ($e2u->{Cname},$u2e->{Cname},"${sym}_rep_character",$replen,
+ $min_el,$max_el);
+ print C "${ext_c}static const U8 ${sym}_rep_character[] = \"$rep\";\n";
+ print C "${ext_c}static const char ${sym}_enc_name[] = \"$enc\";\n\n";
+ print C "${ext_c}const encode_t $sym = \n";
# This is to make null encoding work -- dankogai
for (my $i = (scalar @info) - 1; $i >= 0; --$i){
- $info[$i] ||= 1;
+ $info[$i] ||= 1;
}
# end of null tweak -- dankogai
- print C " {",join(',',@info,"{\"$enc\",(const char *)0}"),"};\n\n";
+ print C " {",join(',',@info,"{${sym}_enc_name,(const char *)0}"),"};\n\n";
}
foreach my $enc (sort cmp_name keys %encoding)
close(D) or warn "Error closing '$dname': $!";
close(H) or warn "Error closing '$hname': $!";
- my $perc_saved = $strings/($strings + $saved) * 100;
- my $perc_subsaved = $strings/($strings + $subsave) * 100;
+ my $perc_saved = $saved/($strings + $saved) * 100;
+ my $perc_subsaved = $subsave/($strings + $subsave) * 100;
printf STDERR "%d bytes in string tables\n",$strings;
printf STDERR "%d bytes (%.3g%%) saved spotting duplicates\n",
$saved, $perc_saved if $saved;
}
if ($a->{'Forward'})
{
- my $var = $^O eq 'MacOS' ? 'extern' : 'static';
- print $fh "$var encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n";
+ my $cpp = ($Config{d_cplusplus} || '') eq 'define';
+ my $var = $^O eq 'MacOS' || $cpp ? 'extern' : 'static';
+ my $const = $cpp ? '' : 'const';
+ print $fh "$var $const encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n";
}
$a->{'DoneStrings'} = 1;
foreach my $b (@{$a->{'Entries'}})
$strings_in_acc{$s} = $index;
} else {
OPTIMISER: {
- if ($opt{'O'}) {
- my $sublength = length $s;
- while (--$sublength > 0) {
- # progressively lop characters off the end, to see if the start of
- # the new string overlaps the end of the accumulator.
- if (substr ($string_acc, -$sublength)
- eq substr ($s, 0, $sublength)) {
- $subsave += $sublength;
- $strings_in_acc{$s} = length ($string_acc) - $sublength;
- # append the last bit on the end.
- $string_acc .= substr ($s, $sublength);
- last OPTIMISER;
- }
- # or if the end of the new string overlaps the start of the
- # accumulator
- next unless substr ($string_acc, 0, $sublength)
- eq substr ($s, -$sublength);
- # well, the last $sublength characters of the accumulator match.
- # so as we're prepending to the accumulator, need to shift all our
- # existing offsets forwards
- $_ += $sublength foreach values %strings_in_acc;
- $subsave += $sublength;
- $strings_in_acc{$s} = 0;
- # append the first bit on the start.
- $string_acc = substr ($s, 0, -$sublength) . $string_acc;
- last OPTIMISER;
- }
- }
- # Optimiser (if it ran) found nothing, so just going have to tack the
- # whole thing on the end.
- $strings_in_acc{$s} = length $string_acc;
- $string_acc .= $s;
+ if ($opt{'O'}) {
+ my $sublength = length $s;
+ while (--$sublength > 0) {
+ # progressively lop characters off the end, to see if the start of
+ # the new string overlaps the end of the accumulator.
+ if (substr ($string_acc, -$sublength)
+ eq substr ($s, 0, $sublength)) {
+ $subsave += $sublength;
+ $strings_in_acc{$s} = length ($string_acc) - $sublength;
+ # append the last bit on the end.
+ $string_acc .= substr ($s, $sublength);
+ last OPTIMISER;
+ }
+ # or if the end of the new string overlaps the start of the
+ # accumulator
+ next unless substr ($string_acc, 0, $sublength)
+ eq substr ($s, -$sublength);
+ # well, the last $sublength characters of the accumulator match.
+ # so as we're prepending to the accumulator, need to shift all our
+ # existing offsets forwards
+ $_ += $sublength foreach values %strings_in_acc;
+ $subsave += $sublength;
+ $strings_in_acc{$s} = 0;
+ # append the first bit on the start.
+ $string_acc = substr ($s, 0, -$sublength) . $string_acc;
+ last OPTIMISER;
+ }
+ }
+ # Optimiser (if it ran) found nothing, so just going have to tack the
+ # whole thing on the end.
+ $strings_in_acc{$s} = length $string_acc;
+ $string_acc .= $s;
};
}
}
$strings = length $string_acc;
- my $definition = "\nstatic const U8 $name\[$strings] = { " .
+ my $cpp = ($Config{d_cplusplus} || '') eq 'define';
+ my $var = $cpp ? '' : 'static';
+ my $definition = "\n$var const U8 $name\[$strings] = { " .
join(',',unpack "C*",$string_acc);
# We have a single long line. Split it at convenient commas.
print $fh $1, "\n" while $definition =~ /\G(.{74,77},)/gcs;
my ($s,$e,$out,$t,$end,$l) = @$b;
outtable($fh,$t,$bigname) unless $t->{'Done'};
}
- print $fh "\nstatic encpage_t $name\[",scalar(@{$a->{'Entries'}}),"] = {\n";
+ my $cpp = ($Config{d_cplusplus} || '') eq 'define';
+ my $var = $cpp ? '' : 'static';
+ my $const = $cpp ? '' : 'const';
+ print $fh "\n$var $const encpage_t $name\[",
+ scalar(@{$a->{'Entries'}}), "] = {\n";
foreach my $b (@{$a->{'Entries'}})
{
my ($sc,$ec,$out,$t,$end,$l,$fb) = @$b;
eval { require File::Find; };
my (@inc, %e2x_dir);
for my $inc (@INC){
- push @inc, $inc unless $inc eq '.'; #skip current dir
+ push @inc, $inc unless $inc eq '.'; #skip current dir
}
File::Find::find(
- sub {
- my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
- $atime,$mtime,$ctime,$blksize,$blocks)
- = lstat($_) or return;
- -f _ or return;
- if (/^.*\.e2x$/o){
- no warnings 'once';
- $e2x_dir{$File::Find::dir} ||= $mtime;
- }
- return;
- }, @inc);
+ sub {
+ my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
+ $atime,$mtime,$ctime,$blksize,$blocks)
+ = lstat($_) or return;
+ -f _ or return;
+ if (/^.*\.e2x$/o){
+ no warnings 'once';
+ $e2x_dir{$File::Find::dir} ||= $mtime;
+ }
+ return;
+ }, @inc);
warn join("\n", keys %e2x_dir), "\n";
for my $d (sort {$e2x_dir{$a} <=> $e2x_dir{$b}} keys %e2x_dir){
- $_E2X = $d;
- # warn "$_E2X => ", scalar localtime($e2x_dir{$d});
- return $_E2X;
+ $_E2X = $d;
+ # warn "$_E2X => ", scalar localtime($e2x_dir{$d});
+ return $_E2X;
}
}
}
use vars qw(
- $_ModLines
- $_LocalVer
- );
+ $_ModLines
+ $_LocalVer
+ );
-sub make_configlocal_pm
-{
+sub make_configlocal_pm {
eval { require Encode; };
$@ and die "Unable to require Encode: $@\n";
eval { require File::Spec; };
+
# our used for variable expanstion
- my %in_core = map {$_=>1}('ascii','iso-8859-1','utf8');
+ my %in_core = map { $_ => 1 } (
+ 'ascii', 'iso-8859-1', 'utf8',
+ 'ascii-ctrl', 'null', 'utf-8-strict'
+ );
my %LocalMod = ();
- for my $d (@INC){
- my $inc = File::Spec->catfile($d, "Encode");
- -d $inc or next;
- opendir my $dh, $inc or die "$inc:$!";
- warn "Checking $inc...\n";
- for my $f (grep /\.pm$/o, readdir($dh)){
- -f File::Spec->catfile($inc, "$f") or next;
- $INC{"Encode/$f"} and next;
- warn "require Encode/$f;\n";
- eval { require "Encode/$f"; };
- $@ and die "Can't require Encode/$f: $@\n";
- for my $enc (Encode->encodings()){
- no warnings 'once';
- $in_core{$enc} and next;
- $Encode::Config::ExtModule{$enc} and next;
- my $mod = "Encode/$f";
- $mod =~ s/\.pm$//o; $mod =~ s,/,::,og;
- $LocalMod{$enc} ||= $mod;
- }
+ # check @enc;
+ use File::Find ();
+ my $wanted = sub{
+ -f $_ or return;
+ $File::Find::name =~ /\A\./ and return;
+ $File::Find::name =~ /\.pm\z/ or return;
+ $File::Find::name =~ m/\bEncode\b/ or return;
+ my $mod = $File::Find::name;
+ $mod =~ s/.*\bEncode\b/Encode/o;
+ $mod =~ s/\.pm\z//o;
+ $mod =~ s,/,::,og;
+ warn qq{ require $mod;\n};
+ eval qq{ require $mod; };
+ $@ and die "Can't require $mod: $@\n";
+ for my $enc ( Encode->encodings() ) {
+ no warnings;
+ $in_core{$enc} and next;
+ $Encode::Config::ExtModule{$enc} and next;
+ $LocalMod{$enc} ||= $mod;
}
- }
+ };
+ File::Find::find({wanted => $wanted}, @INC);
$_ModLines = "";
- for my $enc (sort keys %LocalMod){
- $_ModLines .=
- qq(\$Encode::ExtModule{'$enc'} =\t"$LocalMod{$enc}";\n);
+ for my $enc ( sort keys %LocalMod ) {
+ $_ModLines .=
+ qq(\$Encode::ExtModule{'$enc'} = "$LocalMod{$enc}";\n);
}
warn $_ModLines;
$_LocalVer = _mkversion();
- $_E2X = find_e2x();
- $_Inc = $INC{"Encode.pm"}; $_Inc =~ s/\.pm$//o;
- _print_expand(File::Spec->catfile($_E2X,"ConfigLocal_PM.e2x"),
- File::Spec->catfile($_Inc,"ConfigLocal.pm"),
- 1);
+ $_E2X = find_e2x();
+ $_Inc = $INC{"Encode.pm"};
+ $_Inc =~ s/\.pm$//o;
+ _print_expand( File::Spec->catfile( $_E2X, "ConfigLocal_PM.e2x" ),
+ File::Spec->catfile( $_Inc, "ConfigLocal.pm" ), 1 );
exit;
}
sub _mkversion{
- my ($ss,$mm,$hh,$dd,$mo,$yyyy) = localtime();
- $yyyy += 1900, $mo +=1;
- return sprintf("v%04d.%04d.%04d", $yyyy, $mo*100+$dd, $hh*100+$mm);
+ # v-string is now depreciated; use time() instead;
+ #my ($ss,$mm,$hh,$dd,$mo,$yyyy) = localtime();
+ #$yyyy += 1900, $mo +=1;
+ #return sprintf("v%04d.%04d.%04d", $yyyy, $mo*100+$dd, $hh*100+$mm);
+ return time();
}
sub _print_expand{
File::Basename->import();
my ($src, $dst, $clobber) = @_;
if (!$clobber and -e $dst){
- warn "$dst exists. skipping\n";
- return;
+ warn "$dst exists. skipping\n";
+ return;
}
warn "Generating $dst...\n";
open my $in, $src or die "$src : $!";
if ((my $d = dirname($dst)) ne '.'){
- -d $d or mkdir $d, 0755 or die "mkdir $d : $!";
+ -d $d or mkdir $d, 0755 or die "mkdir $d : $!";
}
open my $out, ">$dst" or die "$!";
my $asis = 0;
while (<$in>){
- if (/^#### END_OF_HEADER/){
- $asis = 1; next;
- }
- s/(\$_[A-Z][A-Za-z0-9]+)_/$1/gee unless $asis;
- print $out $_;
+ if (/^#### END_OF_HEADER/){
+ $asis = 1; next;
+ }
+ s/(\$_[A-Z][A-Za-z0-9]+)_/$1/gee unless $asis;
+ print $out $_;
}
}
__END__
Reading myascii (myascii)
Writing compiled form
128 bytes in string tables
- 384 bytes (25%) saved spotting duplicates
- 1 bytes (99.2%) saved using substrings
+ 384 bytes (75%) saved spotting duplicates
+ 1 bytes (0.775%) saved using substrings
....
chmod 644 blib/arch/auto/Encode/My/My.bs
$
mappings. This format is used by IBM's ICU package and was adopted
by Nick Ing-Simmons for use with the Encode module. Since UCM is
more flexible than Tcl's Encoding Map and far more user-friendly,
-this is the recommended formet for Encode now.
+this is the recommended format for Encode now.
A UCM file looks like this.
The Encode package comes with F<ucmlint>, a crude but sufficient
utility to check the integrity of a UCM file. Check under the
Encode/bin directory for this.
-
+
+When in doubt, you can use F<ucmsort>, yet another utility under
+Encode/bin directory.
=head1 Bookmarks