From: Nicholas Clark Date: Mon, 31 May 2004 19:06:21 +0000 (+0000) Subject: replace the run time code in lib/utf8_pva.pl with data generated X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=7ebf06b31de77009e38bdcb8efee7397a3652311;p=p5sagit%2Fp5-mst-13.2.git replace the run time code in lib/utf8_pva.pl with data generated at build by mktables, stored in lib/unicore/PVA.pl p4raw-id: //depot/perl@22881 --- diff --git a/MANIFEST b/MANIFEST index a74806f..609919d 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1846,7 +1846,6 @@ lib/User/grent.t See if User::grwent works lib/User/pwent.pm By-name interface to Perl's builtin getpw* lib/User/pwent.t See if User::pwent works lib/utf8_heavy.pl Support routines for utf8 pragma -lib/utf8_pva.pl Support routines for utf8 pragma lib/utf8.pm Pragma to control Unicode support lib/utf8.t See if utf8 operations work lib/validate.pl Perl library supporting wholesale file mode validation diff --git a/Makefile.SH b/Makefile.SH index a72a372..b502fac 100644 --- a/Makefile.SH +++ b/Makefile.SH @@ -320,7 +320,7 @@ addedbyconf = UU $(shextract) $(plextract) lib/lib.pm pstruct # Unicode data files generated by mktables unidatafiles = lib/unicore/Canonical.pl lib/unicore/Exact.pl \ lib/unicore/Properties lib/unicore/Decomposition.pl \ - lib/unicore/CombiningClass.pl lib/unicore/Name.pl + lib/unicore/CombiningClass.pl lib/unicore/Name.pl lib/unicore/PVA.pl # Directories of Unicode data files generated by mktables unidatadirs = lib/unicore/To lib/unicore/lib diff --git a/lib/unicore/mktables b/lib/unicore/mktables index ae83de8..aa92643 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -22,7 +22,6 @@ use File::Spec; ## Base names already used in lib/gc_sc (for avoiding 8.3 conflicts) my %BaseNames; - ## ## Process any args. ## @@ -154,10 +153,15 @@ sub Build_Aliases() { ## ## Most of the work with aliases doesn't occur here, - ## but rather in utf8_heavy.pl, which uses utf8_pva.pl, - ## which contains just this function. However, this one - ## - ## -- japhy (2004/04/13) + ## but rather in utf8_heavy.pl, which uses PVA.pl, + + # Placate the warnings about used only once. (They are used again, but + # via a typeglob lookup) + %utf8::PropertyAlias = (); + %utf8::PA_reverse = (); + %utf8::PropValueAlias = (); + %utf8::PVA_reverse = (); + %utf8::PVA_abbr_map = (); open PA, "< PropertyAliases.txt" or confess "Can't open PropertyAliases.txt: $!"; @@ -170,6 +174,12 @@ sub Build_Aliases() next if $abbrev eq "n/a"; $PropertyAlias{$abbrev} = $name; $PA_reverse{$name} = $abbrev; + + # The %utf8::... versions use japhy's code originally from utf8_pva.pl + # However, it's moved here so that we build the tables at runtime. + tr/ _-//d for $abbrev, $name; + $utf8::PropertyAlias{lc $abbrev} = $name; + $utf8::PA_reverse{lc $name} = $abbrev; } close PA; @@ -191,8 +201,23 @@ sub Build_Aliases() $PropValueAlias{$prop}{$data[0]} = $data[1]; $PVA_reverse{$prop}{$data[1]} = $data[0]; } + + shift @data if $prop eq 'ccc'; + next if $data[0] eq "n/a"; + + $data[1] =~ tr/ _-//d; + $utf8::PropValueAlias{$prop}{lc $data[0]} = $data[1]; + $utf8::PVA_reverse{$prop}{lc $data[1]} = $data[0]; + + my $abbr_class = ($prop eq 'gc' or $prop eq 'sc') ? 'gc_sc' : $prop; + $utf8::PVA_abbr_map{$abbr_class}{lc $data[0]} = $data[0]; } close PVA; + + # backwards compatibility for L& -> LC + $utf8::PropValueAlias{gc}{'l&'} = $utf8::PropValueAlias{gc}{lc}; + $utf8::PVA_abbr_map{gc_sc}{'l&'} = $utf8::PVA_abbr_map{gc_sc}{lc}; + } @@ -623,6 +648,33 @@ my $General = Table->New(); ## all characters, grouped by category my %General; my %Cat; +## Simple Data::Dumper alike. Good enough for our needs. We can't use the real +## thing as we have to run under miniperl +sub simple_dumper { + my @lines; + my $item; + foreach $item (@_) { + if (ref $item) { + if (ref $item eq 'ARRAY') { + push @lines, "[\n", simple_dumper (@$item), "],\n"; + } elsif (ref $item eq 'HASH') { + push @lines, "{\n", simple_dumper (%$item), "},\n"; + } else { + die "Can't cope with $item"; + } + } else { + if (defined $item) { + my $copy = $item; + $copy =~ s/([\'\\])/\\$1/gs; + push @lines, "'$copy',\n"; + } else { + push @lines, "undef,\n"; + } + } + } + @lines; +} + ## ## Process UnicodeData.txt (Categories, etc.) ## @@ -959,6 +1011,18 @@ sub UnicodeData_Txt() ## $Name->Write("Name.pl"); + { + my @PVA = $HEADER; + foreach my $name (qw (PropertyAlias PA_reverse PropValueAlias + PVA_reverse PVA_abbr_map)) { + # Should I really jump through typeglob hoops just to avoid a + # symbolic reference? (%{"utf8::$name}) + push @PVA, "\n", "\%utf8::$name = (\n", + simple_dumper (%{$utf8::{$name}}), ");\n"; + } + WriteIfChanged("PVA.pl", @PVA); + } + # $Bidi->Write("Bidirectional.pl"); for (keys %Bidi) { $Bidi{$_}->Write( diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index 309cbef..b6fdeb9 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -77,7 +77,7 @@ sub SWASHNEW { ## ## See if it's in some enumeration. ## - require "utf8_pva.pl"; + require "unicore/PVA.pl"; if ($type =~ /^([\w\s]+)[:=]\s*(.*)/) { my ($enum, $val) = (lc $1, lc $2); $enum =~ tr/ _-//d; diff --git a/lib/utf8_pva.pl b/lib/utf8_pva.pl deleted file mode 100644 index 6db881b..0000000 --- a/lib/utf8_pva.pl +++ /dev/null @@ -1,56 +0,0 @@ -package utf8; - -## -## Store the alias definitions for later use. -## - -my $dir; -for (@INC) { - $dir = $_, last if -e "$_/unicore/PropertyAliases.txt"; -} - -use Carp 'confess'; - -local *_; -local $.; # localizes Pl_last_in_gv - -open PA, "< $dir/unicore/PropertyAliases.txt" - or confess "Can't open PropertyAliases.txt: $!"; -while () { - s/#.*//; - s/\s+$//; - next if /^$/; - - my ($abbrev, $name) = split /\s*;\s*/; - next if $abbrev eq "n/a"; - tr/ _-//d for $abbrev, $name; - $PropertyAlias{lc $abbrev} = $name; - $PA_reverse{lc $name} = $abbrev; -} -close PA; - -open PVA, "< $dir/unicore/PropValueAliases.txt" - or confess "Can't open PropValueAliases.txt: $!"; -while () { - s/#.*//; - s/\s+$//; - next if /^$/; - - my ($prop, @data) = split /\s*;\s*/; - shift @data if $prop eq 'ccc'; - next if $data[0] eq "n/a"; - - $data[1] =~ tr/ _-//d; - $PropValueAlias{$prop}{lc $data[0]} = $data[1]; - $PVA_reverse{$prop}{lc $data[1]} = $data[0]; - - my $abbr_class = ($prop eq 'gc' or $prop eq 'sc') ? 'gc_sc' : $prop; - $PVA_abbr_map{$abbr_class}{lc $data[0]} = $data[0]; -} -close PVA; - -# backwards compatibility for L& -> LC -$PropValueAlias{gc}{'l&'} = $PropValueAlias{gc}{lc}; -$PVA_abbr_map{gc_sc}{'l&'} = $PVA_abbr_map{gc_sc}{lc}; - -1; diff --git a/vms/descrip_mms.template b/vms/descrip_mms.template index 8aa9e0d..3cd8827 100644 --- a/vms/descrip_mms.template +++ b/vms/descrip_mms.template @@ -332,7 +332,8 @@ CRTLOPTS =,$(CRTL)/Options unidatafiles1 = [.lib.unicore]Canonical.pl [.lib.unicore]Exact.pl unidatafiles2 = [.lib.unicore]Properties [.lib.unicore]Decomposition.pl unidatafiles3 = [.lib.unicore]CombiningClass.pl [.lib.unicore]Name.pl -unidatafiles = $(unidatafiles1) $(unidatafiles2) $(unidatafiles3) +unidatafiles4 = [.lib.unicore]PVA.pl +unidatafiles = $(unidatafiles1) $(unidatafiles2) $(unidatafiles3) $(unidatafiles4) # Directories of Unicode data files generated by mktables unidatadirs = lib/unicore/To lib/unicore/lib diff --git a/win32/Makefile b/win32/Makefile index 308099f..6f3e6ce 100644 --- a/win32/Makefile +++ b/win32/Makefile @@ -454,7 +454,8 @@ X2P = ..\x2p\a2p.exe # Unicode data files generated by mktables UNIDATAFILES = ..\lib\unicore\Canonical.pl ..\lib\unicore\Exact.pl \ ..\lib\unicore\Properties ..\lib\unicore\Decomposition.pl \ - ..\lib\unicore\CombiningClass.pl ..\lib\unicore\Name.pl + ..\lib\unicore\CombiningClass.pl ..\lib\unicore\Name.pl \ + ..\lib\unicore\PVA.pl # Directories of Unicode data files generated by mktables UNIDATADIRS = ..\lib\unicore\To ..\lib\unicore\lib diff --git a/win32/makefile.mk b/win32/makefile.mk index 8aca918..f61b7b5 100644 --- a/win32/makefile.mk +++ b/win32/makefile.mk @@ -577,7 +577,8 @@ X2P = ..\x2p\a2p.exe # Unicode data files generated by mktables UNIDATAFILES = ..\lib\unicore\Canonical.pl ..\lib\unicore\Exact.pl \ ..\lib\unicore\Properties ..\lib\unicore\Decomposition.pl \ - ..\lib\unicore\CombiningClass.pl ..\lib\unicore\Name.pl + ..\lib\unicore\CombiningClass.pl ..\lib\unicore\Name.pl \ + ..\lib\unicore\PVA.pl # Directories of Unicode data files generated by mktables UNIDATADIRS = ..\lib\unicore\To ..\lib\unicore\lib