X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2Futf8_heavy.pl;h=4c5ef27a7fb7f45245857eaabb3f2285e212ec1e;hb=dd5ea221bb13f4bad856ba91d1a82acf3ec12919;hp=70bd018f81c14aa978369e31221d607a29c491f6;hpb=bc45ce41590a23464acf3b62efacccf60e155a7c;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index 70bd018..4c5ef27 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -6,10 +6,10 @@ sub DEBUG () { 0 } sub DESTROY {} -sub croak { require Carp; Carp::croak(@_) } - my %Cache; +sub croak { require Carp; Carp::croak(@_) } + ## ## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape ## @@ -28,7 +28,7 @@ sub SWASHNEW { ## ranges. ## ## To make the parsing of $type clear, this code takes the a rather - ## unorthadox approach of last'ing out of the block once we have the + ## unorthodox approach of last'ing out of the block once we have the ## info we need. Were this to be a subroutine, the 'last' would just ## be a 'return'. ## @@ -50,7 +50,9 @@ sub SWASHNEW { ## ## 'Block=' is replaced by 'In'. ## - $type =~ s/^Is(?:\s+|[-_])?//i + my $wasIs; + + ($wasIs = $type =~ s/^Is(?:\s+|[-_])?//i) or $type =~ s/^Category\s*=\s*//i or @@ -81,6 +83,22 @@ sub SWASHNEW { last GETFILE; } + ## + ## It could be a user-defined property. + ## + + my $caller = caller(1); + + if (defined $caller && $type =~ /^(?:\w+)$/) { + my $prop = $caller . "::" . ( $wasIs ? "Is" : "" ) . $type; + if (exists &{$prop}) { + no strict 'refs'; + + $list = &{$prop}; + last GETFILE; + } + } + ## ## Last attempt -- see if it's a "To" name (e.g. "ToLower") ## @@ -99,21 +117,25 @@ sub SWASHNEW { return $type; } - print "found it (file='$file')\n" if DEBUG; + if (defined $file) { + print "found it (file='$file')\n" if DEBUG; + + ## + ## If we reach here, it was due to a 'last GETFILE' above + ## (exception: user-defined properties), so we + ## have a filename, so now we load it if we haven't already. + ## If we have, return the cached results. The cache key is the + ## file to load. + ## + if ($Cache{$file} and ref($Cache{$file}) eq $class) + { + print "Returning cached '$file' for \\p{$type}\n" if DEBUG; + return $Cache{$class, $file}; + } - ## - ## If we reach here, it was due to a 'last GETFILE' above, so we - ## have a filename, so now we load it if we haven't already. - ## If we have, return the cached results. The cache key is the - ## file to load. - ## - if ($Cache{$file} and ref($Cache{$file}) eq $class) - { - print "Returning cached '$file' for \\p{$type}\n" if DEBUG; - return $Cache{$class, $file}; - } + $list = do $file; + } - $list = do $file; $ListSorted = 1; ## we know that these lists are sorted } @@ -127,8 +149,10 @@ sub SWASHNEW { no warnings; $extras = join '', grep /^[^0-9a-fA-F]/, @tmp; $list = join '', - sort { hex $a <=> hex $b } - grep {/^([0-9a-fA-F]+)/ and not $seen{$1}++} @tmp; # XXX doesn't do ranges right + map { $_->[1] } + sort { $a->[0] <=> $b->[0] } + map { /^([0-9a-fA-F]+)/; [ hex($1), $_ ] } + grep { /^([0-9a-fA-F]+)/ and not $seen{$1}++ } @tmp; # XXX doesn't do ranges right } if ($none) { @@ -161,7 +185,13 @@ sub SWASHNEW { print STDERR "$1 => $2\n" if DEBUG; if ($char =~ /[-+!]/) { my ($c,$t) = split(/::/, $name, 2); # bogus use of ::, really - my $subobj = $c->SWASHNEW($t, "", 0, 0, 0); + my $subobj; + if ($c eq 'utf8') { + $subobj = $c->SWASHNEW($t, "", 0, 0, 0); + } + elsif ($c =~ /^([0-9a-fA-F]+)/) { + $subobj = utf8->SWASHNEW("", $c, 0, 0, 0); + } return $subobj unless ref $subobj; push @extras, $name => $subobj; $bits = $subobj->{BITS} if $bits < $subobj->{BITS}; @@ -242,7 +272,7 @@ sub SWASHGET { } else { LINE: - while (/^([0-9a-fA-F]+)(?:\t([0-9a-fA-F]+))?/mg) { + while (/^([0-9a-fA-F]+)(?:[ \t]+([0-9a-fA-F]+))?/mg) { my $min = hex $1; my $max = (defined $2 ? hex $2 : $min); next if $max < $start;