X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2Futf8_heavy.pl;h=b6fdeb997b3407bb003b5d3d884cc86c4ed47270;hb=8af3c3489f0c9ca8d37ebae27991ac4f34dfdb2f;hp=d8c46b56424bfd7f02dbb77f025a4fd107a58ddc;hpb=3a2263fe90d1c0e6c8f9368f10e6672379a975a2;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index d8c46b5..b6fdeb9 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -8,6 +8,8 @@ sub DESTROY {} my %Cache; +our (%PropertyAlias, %PA_reverse, %PropValueAlias, %PVA_reverse, %PVA_abbr_map); + sub croak { require Carp; Carp::croak(@_) } ## @@ -45,28 +47,67 @@ sub SWASHNEW { GETFILE: { - ## - ## 'Is' is always optional, so if it's there, remove it. - ## Same with 'Category=' and 'Script='. - ## - ## 'Block=' is replaced by 'In'. - ## + ## + ## It could be a user-defined property. + ## + + my $caller1 = $type =~ s/(.+)::// ? $1 : caller(1); + + if (defined $caller1 && $type =~ /^(?:\w+)$/) { + my $prop = "${caller1}::$type"; + if (exists &{$prop}) { + no strict 'refs'; + + $list = &{$prop}; + last GETFILE; + } + } + my $wasIs; ($wasIs = $type =~ s/^Is(?:\s+|[-_])?//i) or - $type =~ s/^Category\s*=\s*//i + $type =~ s/^(?:(?:General(?:\s+|_)?)?Category|gc)\s*[:=]\s*//i or - $type =~ s/^Script\s*=\s*//i + $type =~ s/^(?:Script|sc)\s*[:=]\s*//i or - $type =~ s/^Block\s*=\s*/In/i; + $type =~ s/^Block\s*[:=]\s*/In/i; + + + ## + ## See if it's in some enumeration. + ## + require "unicore/PVA.pl"; + if ($type =~ /^([\w\s]+)[:=]\s*(.*)/) { + my ($enum, $val) = (lc $1, lc $2); + $enum =~ tr/ _-//d; + $val =~ tr/ _-//d; + + my $pa = $PropertyAlias{$enum} ? $enum : $PA_reverse{$enum}; + my $f = $PropValueAlias{$pa}{$val} ? $val : $PVA_reverse{$pa}{lc $val}; + + if ($pa and $f) { + $pa = "gc_sc" if $pa eq "gc" or $pa eq "sc"; + $file = "unicore/lib/$pa/$PVA_abbr_map{$pa}{lc $f}.pl"; + last GETFILE; + } + } + else { + my $t = lc $type; + $t =~ tr/ _-//d; + + if ($PropValueAlias{gc}{$t} or $PropValueAlias{sc}{$t}) { + $file = "unicore/lib/gc_sc/$PVA_abbr_map{gc_sc}{$t}.pl"; + last GETFILE; + } + } ## ## See if it's in the direct mapping table. ## require "unicore/Exact.pl"; if (my $base = $utf8::Exact{$type}) { - $file = "unicore/lib/$base.pl"; + $file = "unicore/lib/gc_sc/$base.pl"; last GETFILE; } @@ -79,28 +120,12 @@ sub SWASHNEW { print "canonical = $canonical\n" if DEBUG; require "unicore/Canonical.pl"; - if (my $base = $utf8::Canonical{$canonical}) { - $file = "unicore/lib/$base.pl"; + if (my $base = ($utf8::Canonical{$canonical} || $utf8::Canonical{ lc $utf8::PropertyAlias{$canonical} })) { + $file = "unicore/lib/gc_sc/$base.pl"; last GETFILE; } ## - ## It could be a user-defined property. - ## - - my $caller1 = caller(1); - - if (defined $caller1 && $type =~ /^(?:\w+)$/) { - my $prop = $caller1 . "::" . ( $wasIs ? "Is" : "" ) . $type; - if (exists &{$prop}) { - no strict 'refs'; - - $list = &{$prop}; - last GETFILE; - } - } - - ## ## See if it's a user-level "To". ## @@ -108,6 +133,7 @@ sub SWASHNEW { if (defined $caller0 && $type =~ /^To(?:\w+)$/) { my $map = $caller0 . "::" . $type; + if (exists &{$map}) { no strict 'refs'; @@ -160,7 +186,7 @@ sub SWASHNEW { } my $extras; - my $bits; + my $bits = 0; my $ORIG = $list; if ($list) { @@ -203,11 +229,14 @@ sub SWASHNEW { my $char = $1; my $name = $2; print STDERR "$1 => $2\n" if DEBUG; - if ($char =~ /[-+!]/) { + if ($char =~ /[-+!&]/) { my ($c,$t) = split(/::/, $name, 2); # bogus use of ::, really my $subobj; if ($c eq 'utf8') { - $subobj = $c->SWASHNEW($t, "", 0, 0, 0); + $subobj = utf8->SWASHNEW($t, "", 0, 0, 0); + } + elsif (exists &$name) { + $subobj = utf8->SWASHNEW($name, "", 0, 0, 0); } elsif ($c =~ /^([0-9a-fA-F]+)/) { $subobj = utf8->SWASHNEW("", $c, 0, 0, 0); @@ -315,7 +344,7 @@ sub SWASHGET { } for my $x ($self->{EXTRAS}) { pos $x = 0; - while ($x =~ /^([-+!])(.*)/mg) { + while ($x =~ /^([-+!&])(.*)/mg) { my $char = $1; my $name = $2; print STDERR "INDIRECT $1 $2\n" if DEBUG; @@ -356,6 +385,18 @@ sub SWASHGET { } } } + elsif ($char eq '&') { + if ($bits == 1 and $otherbits == 1) { + $swatch &= $other; + } + else { + for ($key = 0; $key < $len; $key++) { + if (!vec($other, $key, $otherbits)) { + vec($swatch, $key, $bits) = 0; + } + } + } + } } } if (DEBUG) {