}
## If we don't have it by now, give up.
- die "Unknown charname '$name'" unless @off;
+ unless (@off) {
+ carp "Unknown charname '$name'";
+ return "\x{FFFD}";
+ }
##
## Now know where in the string the name starts.
if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
use bytes;
return chr $ord if $ord <= 255;
- my $hex = sprintf '%X=0%o', $ord, $ord;
+ my $hex = sprintf "%04x", $ord;
my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
- die "Character 0x$hex with name '$fname' is above 0xFF";
+ croak "Character 0x$hex with name '$fname' is above 0xFF";
}
+
return pack "U", $ord;
}
}
}
+require Unicode::UCD; # for Unicode::UCD::_getcode()
+
+my %viacode;
+
sub viacode
{
if (@_ != 1) {
- carp "charnames::viacode() expects one numeric value";
+ carp "charnames::viacode() expects one numeric argument";
return ()
}
+
my $arg = shift;
+ my $code = Unicode::UCD::_getcode($arg);
my $hex;
- if ($arg =~ m/^[0-9]+$/) {
+
+ if (defined $code) {
$hex = sprintf "%04X", $arg;
} else {
carp("unexpected arg \"$arg\" to charnames::viacode()");
- return ();
+ return;
}
+ if ($code > 0x10FFFF) {
+ carp "Unicode characters only allocated up to 0x10FFFF (you asked for $hex)";
+ return "\x{FFFD}";
+ }
+
+ return $viacode{$hex} if exists $viacode{$hex};
+
$txt = do "unicore/Name.pl" unless $txt;
if ($txt =~ m/^$hex\t\t(.+)/m) {
- return $1;
+ return $viacode{$hex} = $1;
+ } else {
+ return;
+ }
+}
+
+my %vianame;
+
+sub vianame
+{
+ if (@_ != 1) {
+ carp "charnames::vianame() expects one name argument";
+ return ()
+ }
+
+ my $arg = shift;
+
+ return $vianame{$arg} if exists $vianame{$arg};
+
+ $txt = do "unicore/Name.pl" unless $txt;
+
+ if ($txt =~ m/^([0-9A-F]+)\t\t($arg)/m) {
+ return $vianame{$arg} = hex $1;
} else {
- return ();
+ return;
}
}
print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
print charname::viacode(0x1234); # prints "ETHIOPIC SYLLABLE SEE"
+ printf "%04X", charname::vianame("GOTHIC LETTER AHSA"); # prints "10330"
=head1 DESCRIPTION
SCRIPTNAME LETTER CHARNAME
in the table of standard Unicode names. If C<CHARNAME> is lowercase,
-then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant is
-ignored.
+then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant
+is ignored.
+
+Note that C<\N{...}> is compile-time, it's a special form of string
+constant used inside double-quoted strings: in other words, you cannot
+use variables inside the C<\N{...}>. If you want similar run-time
+functionality, use charnames::vianame().
+
+For the C0 and C1 control characters (U+0000..U+001F, U+0080..U+009F)
+as of Unicode 3.1, there are no official Unicode names but you can
+use instead the ISO 6429 names (LINE FEED, ESCAPE, and so forth).
+In Unicode 3.2 some naming changes will happen since ISO 6429 has been
+updated. Also note that the U+UU80, U+0081, U+0084, and U+0099
+do not have names even in ISO 6429.
=head1 CUSTOM TRANSLATORS
prints "FOUR TEARDROP-SPOKED ASTERISK".
-Returns nothing if no name is known for the code.
+Returns undef if no name is known for the code.
This works only for the standard names, and does not yet aply
to custom translators.
+=head1 charnames::vianame(code)
+
+Returns the code point indicated by the name.
+The example
+
+ printf "%04X", charnames::vianame("FOUR TEARDROP-SPOKED ASTERISK");
+
+prints "2722".
+
+Returns undef if no name is known for the name.
+
+This works only for the standard names, and does not yet aply
+to custom translators.
+
+=head1 ILLEGAL CHARACTERS
+
+If you ask for a character that does not exist, a warning is given
+and the special Unicode I<replacement character> "\x{FFFD}" is returned.
+
=head1 BUGS
Since evaluation of the translation function happens in a middle of