X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2Fcharnames.pm;h=00faa6902c0a2b3787de19e6dec04d84b5d14198;hb=f22a20695eae00da6d6892456727b75f7267e9ea;hp=6037ea8af1ac563c176f0fffa9fdfff3608c79d4;hpb=35c0985d87e203a100f5c5fc6518bee6a2e2fd9d;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/charnames.pm b/lib/charnames.pm index 6037ea8..00faa69 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -1,11 +1,10 @@ package charnames; use strict; use warnings; -use Carp; -our $VERSION = '1.02'; +use File::Spec; +our $VERSION = '1.05'; use bytes (); # for $bytes::hint_bits -$charnames::hint_bits = 0x20000; my %alias1 = ( # Icky 3.2 names with parentheses. @@ -43,6 +42,16 @@ my %alias3 = ( ); my $txt; +sub croak +{ + require Carp; goto &Carp::croak; +} # croak + +sub carp +{ + require Carp; goto &Carp::carp; +} # carp + sub alias (@) { @_ or return %alias3; @@ -52,9 +61,21 @@ sub alias (@) sub alias_file ($) { - my $arg = shift; - my $file = -f $arg ? $arg : "unicore/${arg}_alias.pl"; + my ($arg, $file) = @_; + if (-f $arg && File::Spec->file_name_is_absolute ($arg)) { + $file = $arg; + } + elsif ($arg =~ m/^\w+$/) { + $file = "unicore/${arg}_alias.pl"; + } + else { + croak "Charnames alias files can only have identifier characters"; + } if (my @alias = do $file) { + @alias == 1 && !defined $alias[0] and + croak "$file cannot be used as alias file for charnames"; + @alias % 2 and + croak "$file did not return a (valid) list of alias pairs"; alias (@alias); return (1); } @@ -94,7 +115,7 @@ sub charnames ## @off will hold the index into the code/name string of the start and ## end of the name as we find it. - ## If :full, look for the the name exactly + ## If :full, look for the name exactly if ($^H{charnames_full} and $txt =~ /\t\t\Q$name\E$/m) { @off = ($-[0], $+[0]); } @@ -170,28 +191,37 @@ sub import if (not @_) { carp("`use charnames' needs explicit imports list"); } - $^H |= $charnames::hint_bits; $^H{charnames} = \&charnames ; ## ## fill %h keys with our @_ args. ## my ($promote, %h, @args) = (0); - while (@_ and $_ = shift) { - if ($_ eq ":alias" && @_) { + while (my $arg = shift) { + if ($arg eq ":alias") { + @_ or + croak ":alias needs an argument in charnames"; my $alias = shift; if (ref $alias) { ref $alias eq "HASH" or - die "Only HASH reference supported as argument to :alias"; + croak "Only HASH reference supported as argument to :alias"; alias ($alias); next; } - if ($alias =~ m{:(\w+)$} and $1 ne "full" && $1 ne "short") { - alias_file ($1) and $promote = 1, next; + if ($alias =~ m{:(\w+)$}) { + $1 eq "full" || $1 eq "short" and + croak ":alias cannot use existing pragma :$1 (reversed order?)"; + alias_file ($1) and $promote = 1; + next; } - alias_file ($alias) and next; + alias_file ($alias); + next; } - push @args, $_; + if (substr($arg, 0, 1) eq ':' and ! ($arg eq ":full" || $arg eq ":short")) { + warn "unsupported special '$arg' in charnames"; + next; + } + push @args, $arg; } @args == 0 && $promote and @args = (":full"); @h{@args} = (1) x @args; @@ -215,30 +245,31 @@ sub import } } # import -require Unicode::UCD; # for Unicode::UCD::_getcode() - my %viacode; sub viacode { if (@_ != 1) { carp "charnames::viacode() expects one argument"; - return () + return; } my $arg = shift; - my $code = Unicode::UCD::_getcode($arg); + # this comes actually from Unicode::UCD, where it is the named + # function _getcode (), but it avoids the overhead of loading it my $hex; - - if (defined $code) { + if ($arg =~ /^[1-9]\d*$/) { $hex = sprintf "%04X", $arg; + } elsif ($arg =~ /^(?:[Uu]\+|0[xX])?([[:xdigit:]]+)$/) { + $hex = $1; } else { carp("unexpected arg \"$arg\" to charnames::viacode()"); return; } - if ($code > 0x10FFFF) { + # checking the length first is slightly faster + if (length($hex) > 5 && hex($hex) > 0x10FFFF) { carp sprintf "Unicode characters only allocated up to U+10FFFF (you asked for U+%X)", $hex; return; } @@ -247,11 +278,9 @@ sub viacode $txt = do "unicore/Name.pl" unless $txt; - if ($txt =~ m/^$hex\t\t(.+)/m) { - return $viacode{$hex} = $1; - } else { - return; - } + return unless $txt =~ m/^$hex\t\t(.+)/m; + + $viacode{$hex} = $1; } # viacode my %vianame; @@ -310,9 +339,10 @@ charnames - define character names for C<\N{named}> string literal escapes use charnames ":full", ":alias" => { e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE", - }; + }; print "\N{e_ACUTE} is a small letter e with an acute.\n"; + use charnames (); print charnames::viacode(0x1234); # prints "ETHIOPIC SYLLABLE SEE" printf "%04X", charnames::vianame("GOTHIC LETTER AHSA"); # prints "10330" @@ -320,8 +350,8 @@ charnames - define character names for C<\N{named}> string literal escapes Pragma C supports arguments C<:full>, C<:short>, script names and customized aliases. If C<:full> is present, for expansion of -C<\N{CHARNAME}> string C is first looked in the list of -standard Unicode names of chars. If C<:short> is present, and +C<\N{CHARNAME}>, the string C is first looked up in the list of +standard Unicode character names. If C<:short> is present, and C has the form C, then C is looked up as a letter in script C