package charnames;
use strict;
use warnings;
-use Carp;
-our $VERSION = '1.02';
+use File::Spec;
+our $VERSION = '1.05';
use bytes (); # for $bytes::hint_bits
-$charnames::hint_bits = 0x20000;
+$charnames::hint_bits = 0x20000; # HINT_LOCALIZE_HH
my %alias1 = (
# Icky 3.2 names with parentheses.
);
my $txt;
+sub croak
+{
+ require Carp; goto &Carp::croak;
+} # croak
+
+sub carp
+{
+ require Carp; goto &Carp::carp;
+} # carp
+
sub alias (@)
{
@_ or return %alias3;
sub alias_file ($)
{
- my $arg = shift;
- my $file = -f $arg ? $arg : "unicore/${arg}_alias.pl";
+ my ($arg, $file) = @_;
+ if (-f $arg && File::Spec->file_name_is_absolute ($arg)) {
+ $file = $arg;
+ }
+ elsif ($arg =~ m/^\w+$/) {
+ $file = "unicore/${arg}_alias.pl";
+ }
+ else {
+ croak "Charnames alias files can only have identifier characters";
+ }
if (my @alias = do $file) {
+ @alias == 1 && !defined $alias[0] and
+ croak "$file cannot be used as alias file for charnames";
+ @alias % 2 and
+ croak "$file did not return a (valid) list of alias pairs";
alias (@alias);
return (1);
}
## @off will hold the index into the code/name string of the start and
## end of the name as we find it.
- ## If :full, look for the the name exactly
+ ## If :full, look for the name exactly
if ($^H{charnames_full} and $txt =~ /\t\t\Q$name\E$/m) {
@off = ($-[0], $+[0]);
}
## fill %h keys with our @_ args.
##
my ($promote, %h, @args) = (0);
- while (@_ and $_ = shift) {
- if ($_ eq ":alias" && @_) {
+ while (my $arg = shift) {
+ if ($arg eq ":alias") {
+ @_ or
+ croak ":alias needs an argument in charnames";
my $alias = shift;
if (ref $alias) {
ref $alias eq "HASH" or
- die "Only HASH reference supported as argument to :alias";
+ croak "Only HASH reference supported as argument to :alias";
alias ($alias);
next;
}
- if ($alias =~ m{:(\w+)$} and $1 ne "full" && $1 ne "short") {
- alias_file ($1) and $promote = 1, next;
+ if ($alias =~ m{:(\w+)$}) {
+ $1 eq "full" || $1 eq "short" and
+ croak ":alias cannot use existing pragma :$1 (reversed order?)";
+ alias_file ($1) and $promote = 1;
+ next;
}
- alias_file ($alias) and next;
+ alias_file ($alias);
+ next;
}
- push @args, $_;
+ if (substr($arg, 0, 1) eq ':' and ! ($arg eq ":full" || $arg eq ":short")) {
+ warn "unsupported special '$arg' in charnames";
+ next;
+ }
+ push @args, $arg;
}
@args == 0 && $promote and @args = (":full");
@h{@args} = (1) x @args;
}
} # import
-require Unicode::UCD; # for Unicode::UCD::_getcode()
-
my %viacode;
sub viacode
{
if (@_ != 1) {
carp "charnames::viacode() expects one argument";
- return ()
+ return;
}
my $arg = shift;
- my $code = Unicode::UCD::_getcode($arg);
+ # this comes actually from Unicode::UCD, where it is the named
+ # function _getcode (), but it avoids the overhead of loading it
my $hex;
-
- if (defined $code) {
+ if ($arg =~ /^[1-9]\d*$/) {
$hex = sprintf "%04X", $arg;
+ } elsif ($arg =~ /^(?:[Uu]\+|0[xX])?([[:xdigit:]]+)$/) {
+ $hex = $1;
} else {
carp("unexpected arg \"$arg\" to charnames::viacode()");
return;
}
- if ($code > 0x10FFFF) {
+ # checking the length first is slightly faster
+ if (length($hex) > 5 && hex($hex) > 0x10FFFF) {
carp sprintf "Unicode characters only allocated up to U+10FFFF (you asked for U+%X)", $hex;
return;
}
$txt = do "unicore/Name.pl" unless $txt;
- if ($txt =~ m/^$hex\t\t(.+)/m) {
- return $viacode{$hex} = $1;
- } else {
- return;
- }
+ return unless $txt =~ m/^$hex\t\t(.+)/m;
+
+ $viacode{$hex} = $1;
} # viacode
my %vianame;
use charnames ":full", ":alias" => {
e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE",
- };
+ };
print "\N{e_ACUTE} is a small letter e with an acute.\n";
+ use charnames ();
print charnames::viacode(0x1234); # prints "ETHIOPIC SYLLABLE SEE"
printf "%04X", charnames::vianame("GOTHIC LETTER AHSA"); # prints "10330"
Pragma C<use charnames> supports arguments C<:full>, C<:short>, script
names and customized aliases. If C<:full> is present, for expansion of
-C<\N{CHARNAME}> string C<CHARNAME> is first looked in the list of
-standard Unicode names of chars. If C<:short> is present, and
+C<\N{CHARNAME}>, the string C<CHARNAME> is first looked up in the list of
+standard Unicode character names. If C<:short> is present, and
C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
as a letter in script C<SCRIPT>. If pragma C<use charnames> is used
with script name arguments, then for C<\N{CHARNAME}> the name