7 use bytes (); # for $bytes::hint_bits
8 $charnames::hint_bits = 0x20000;
12 # This is not optimized in any way yet
17 ## Suck in the code/name list as a big string.
19 ## "0052\t\tLATIN CAPITAL LETTER R\n"
20 $txt = do "unicore/Name.pl" unless $txt;
22 ## @off will hold the index into the code/name string of the start and
23 ## end of the name as we find it.
26 ## If :full, look for the the name exactly
27 if ($^H{charnames_full} and $txt =~ /\t\t$name$/m) {
28 @off = ($-[0], $+[0]);
31 ## If we didn't get above, and :short allowed, look for the short name.
32 ## The short name is like "greek:Sigma"
34 if ($^H{charnames_short} and $name =~ /^(.+?):(.+)/s) {
35 my ($script, $cname) = ($1,$2);
36 my $case = ( $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
37 if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U$cname$/m) {
38 @off = ($-[0], $+[0]);
43 ## If we still don't have it, check for the name among the loaded
47 my $case = ( $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
48 for my $script ( @{$^H{charnames_scripts}} )
50 if ($txt =~ m/\t\t$script (?:$case )?LETTER \U$name$/m) {
51 @off = ($-[0], $+[0]);
57 ## If we don't have it by now, give up.
59 carp "Unknown charname '$name'";
64 ## Now know where in the string the name starts.
65 ## The code, in hex, is befor that.
67 ## The code can be 4-6 characters long, so we've got to sort of
68 ## go look for it, just after the newline that comes before $off[0].
70 ## This would be much easier if unicore/Name.pl had info in
71 ## a name/code order, instead of code/name order.
73 ## The +1 after the rindex() is to skip past the newline we're finding,
74 ## or, if the rindex() fails, to put us to an offset of zero.
76 my $hexstart = rindex($txt, "\n", $off[0]) + 1;
78 ## we know where it starts, so turn into number - the ordinal for the char.
79 my $ord = hex substr($txt, $hexstart, $off[0] - $hexstart);
81 if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
83 return chr $ord if $ord <= 255;
84 my $hex = sprintf "%04x", $ord;
85 my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
86 croak "Character 0x$hex with name '$fname' is above 0xFF";
89 return pack "U", $ord;
94 shift; ## ignore class name
98 carp("`use charnames' needs explicit imports list");
100 $^H |= $charnames::hint_bits;
101 $^H{charnames} = \&charnames ;
104 ## fill %h keys with our @_ args.
109 $^H{charnames_full} = delete $h{':full'};
110 $^H{charnames_short} = delete $h{':short'};
111 $^H{charnames_scripts} = [map uc, keys %h];
114 ## If utf8? warnings are enabled, and some scripts were given,
115 ## see if at least we can find one letter of each script.
117 if (warnings::enabled('utf8') && @{$^H{charnames_scripts}})
119 $txt = do "unicore/Name.pl" unless $txt;
121 for my $script (@{$^H{charnames_scripts}})
123 if (not $txt =~ m/\t\t$script (?:CAPITAL |SMALL )?LETTER /) {
124 warnings::warn('utf8', "No such script: '$script'");
130 require Unicode::UCD; # for Unicode::UCD::_getcode()
137 carp "charnames::viacode() expects one numeric argument";
142 my $code = Unicode::UCD::_getcode($arg);
147 $hex = sprintf "%04X", $arg;
149 carp("unexpected arg \"$arg\" to charnames::viacode()");
153 if ($code > 0x10FFFF) {
154 carp "Unicode characters only allocated up to 0x10FFFF (you asked for $hex)";
158 return $viacode{$hex} if exists $viacode{$hex};
160 $txt = do "unicore/Name.pl" unless $txt;
162 if ($txt =~ m/^$hex\t\t(.+)/m) {
163 return $viacode{$hex} = $1;
174 carp "charnames::vianame() expects one name argument";
180 return $vianame{$arg} if exists $vianame{$arg};
182 $txt = do "unicore/Name.pl" unless $txt;
184 if ($txt =~ m/^([0-9A-F]+)\t\t($arg)/m) {
185 return $vianame{$arg} = hex $1;
197 charnames - define character names for C<\N{named}> string literal escapes.
201 use charnames ':full';
202 print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
204 use charnames ':short';
205 print "\N{greek:Sigma} is an upper-case sigma.\n";
207 use charnames qw(cyrillic greek);
208 print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
210 print charname::viacode(0x1234); # prints "ETHIOPIC SYLLABLE SEE"
211 printf "%04X", charname::vianame("GOTHIC LETTER AHSA"); # prints "10330"
215 Pragma C<use charnames> supports arguments C<:full>, C<:short> and
216 script names. If C<:full> is present, for expansion of
217 C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of
218 standard Unicode names of chars. If C<:short> is present, and
219 C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
220 as a letter in script C<SCRIPT>. If pragma C<use charnames> is used
221 with script name arguments, then for C<\N{CHARNAME}}> the name
222 C<CHARNAME> is looked up as a letter in the given scripts (in the
225 For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
226 this pragma looks for the names
228 SCRIPTNAME CAPITAL LETTER CHARNAME
229 SCRIPTNAME SMALL LETTER CHARNAME
230 SCRIPTNAME LETTER CHARNAME
232 in the table of standard Unicode names. If C<CHARNAME> is lowercase,
233 then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant
236 Note that C<\N{...}> is compile-time, it's a special form of string
237 constant used inside double-quoted strings: in other words, you cannot
238 use variables inside the C<\N{...}>. If you want similar run-time
239 functionality, use charnames::vianame().
241 =head1 CUSTOM TRANSLATORS
243 The mechanism of translation of C<\N{...}> escapes is general and not
244 hardwired into F<charnames.pm>. A module can install custom
245 translations (inside the scope which C<use>s the module) with the
246 following magic incantation:
248 use charnames (); # for $charnames::hint_bits
251 $^H |= $charnames::hint_bits;
252 $^H{charnames} = \&translator;
255 Here translator() is a subroutine which takes C<CHARNAME> as an
256 argument, and returns text to insert into the string instead of the
257 C<\N{CHARNAME}> escape. Since the text to insert should be different
258 in C<bytes> mode and out of it, the function should check the current
259 state of C<bytes>-flag as in:
261 use bytes (); # for $bytes::hint_bits
263 if ($^H & $bytes::hint_bits) {
264 return bytes_translator(@_);
267 return utf8_translator(@_);
271 =head1 charnames::viacode(code)
273 Returns the full name of the character indicated by the numeric code.
276 print charnames::viacode(0x2722);
278 prints "FOUR TEARDROP-SPOKED ASTERISK".
280 Returns undef if no name is known for the code.
282 This works only for the standard names, and does not yet aply
283 to custom translators.
285 =head1 charnames::vianame(code)
287 Returns the code point indicated by the name.
290 printf "%04X", charnames::vianame("FOUR TEARDROP-SPOKED ASTERISK");
294 Returns undef if no name is known for the name.
296 This works only for the standard names, and does not yet aply
297 to custom translators.
299 =head1 ILLEGAL CHARACTERS
301 If you ask for a character that does not exist, a warning is given
302 and the special Unicode I<replacement character> "\x{FFFD}" is returned.
306 Since evaluation of the translation function happens in a middle of
307 compilation (of a string literal), the translation function should not
308 do any C<eval>s or C<require>s. This restriction should be lifted in
309 a future version of Perl.