7 use bytes (); # for $bytes::hint_bits
8 $charnames::hint_bits = 0x20000;
12 # This is not optimized in any way yet
17 ## Suck in the code/name list as a big string.
19 ## "0052\t\tLATIN CAPITAL LETTER R\n"
20 $txt = do "unicore/Name.pl" unless $txt;
22 ## @off will hold the index into the code/name string of the start and
23 ## end of the name as we find it.
26 ## If :full, look for the the name exactly
27 if ($^H{charnames_full} and $txt =~ /\t\t$name$/m) {
28 @off = ($-[0], $+[0]);
31 ## If we didn't get above, and :short allowed, look for the short name.
32 ## The short name is like "greek:Sigma"
34 if ($^H{charnames_short} and $name =~ /^(.+?):(.+)/s) {
35 my ($script, $cname) = ($1,$2);
36 my $case = ( $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
37 if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U$cname$/m) {
38 @off = ($-[0], $+[0]);
43 ## If we still don't have it, check for the name among the loaded
47 my $case = ( $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
48 for my $script ( @{$^H{charnames_scripts}} )
50 if ($txt =~ m/\t\t$script (?:$case )?LETTER \U$name$/m) {
51 @off = ($-[0], $+[0]);
57 ## If we don't have it by now, give up.
58 die "Unknown charname '$name'" unless @off;
61 ## Now know where in the string the name starts.
62 ## The code, in hex, is befor that.
64 ## The code can be 4-6 characters long, so we've got to sort of
65 ## go look for it, just after the newline that comes before $off[0].
67 ## This would be much easier if unicore/Name.pl had info in
68 ## a name/code order, instead of code/name order.
70 ## The +1 after the rindex() is to skip past the newline we're finding,
71 ## or, if the rindex() fails, to put us to an offset of zero.
73 my $hexstart = rindex($txt, "\n", $off[0]) + 1;
75 ## we know where it starts, so turn into number - the ordinal for the char.
76 my $ord = hex substr($txt, $hexstart, $off[0] - $hexstart);
78 if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
80 return chr $ord if $ord <= 255;
81 my $hex = sprintf '%X=0%o', $ord, $ord;
82 my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
83 die "Character 0x$hex with name '$fname' is above 0xFF";
85 return pack "U", $ord;
90 shift; ## ignore class name
94 carp("`use charnames' needs explicit imports list");
96 $^H |= $charnames::hint_bits;
97 $^H{charnames} = \&charnames ;
100 ## fill %h keys with our @_ args.
105 $^H{charnames_full} = delete $h{':full'};
106 $^H{charnames_short} = delete $h{':short'};
107 $^H{charnames_scripts} = [map uc, keys %h];
110 ## If utf8? warnings are enabled, and some scripts were given,
111 ## see if at least we can find one letter of each script.
113 if (warnings::enabled('utf8') && @{$^H{charnames_scripts}})
115 $txt = do "unicore/Name.pl" unless $txt;
117 for my $script (@{$^H{charnames_scripts}})
119 if (not $txt =~ m/\t\t$script (?:CAPITAL |SMALL )?LETTER /) {
120 warnings::warn('utf8', "No such script: '$script'");
131 carp "charnames::viacode() expects one numeric argument";
137 if ($arg =~ m/^[0-9]+$/) {
138 $hex = sprintf "%04X", $arg;
140 carp("unexpected arg \"$arg\" to charnames::viacode()");
144 return $viacode{$hex} if exists $viacode{$hex};
146 $txt = do "unicore/Name.pl" unless $txt;
148 if ($txt =~ m/^$hex\t\t(.+)/m) {
149 return $viacode{$hex} = $1;
160 carp "charnames::vianame() expects one name argument";
166 return $vianame{$arg} if exists $vianame{$arg};
168 $txt = do "unicore/Name.pl" unless $txt;
170 if ($txt =~ m/^([0-9A-F]+)\t\t($arg)/m) {
171 return $vianame{$arg} = hex $1;
183 charnames - define character names for C<\N{named}> string literal escapes.
187 use charnames ':full';
188 print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
190 use charnames ':short';
191 print "\N{greek:Sigma} is an upper-case sigma.\n";
193 use charnames qw(cyrillic greek);
194 print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
196 print charname::viacode(0x1234); # prints "ETHIOPIC SYLLABLE SEE"
197 printf "%04X", charname::vianame("GOTHIC LETTER AHSA"); # prints "10330"
201 Pragma C<use charnames> supports arguments C<:full>, C<:short> and
202 script names. If C<:full> is present, for expansion of
203 C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of
204 standard Unicode names of chars. If C<:short> is present, and
205 C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
206 as a letter in script C<SCRIPT>. If pragma C<use charnames> is used
207 with script name arguments, then for C<\N{CHARNAME}}> the name
208 C<CHARNAME> is looked up as a letter in the given scripts (in the
211 For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
212 this pragma looks for the names
214 SCRIPTNAME CAPITAL LETTER CHARNAME
215 SCRIPTNAME SMALL LETTER CHARNAME
216 SCRIPTNAME LETTER CHARNAME
218 in the table of standard Unicode names. If C<CHARNAME> is lowercase,
219 then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant
222 Note that C<\N{...}> is compile-time, it's a special form of string
223 constant used inside double-quoted strings: in other words, you cannot
224 use variables inside the C<\N{...}>. If you want similar run-time
225 functionality, use charnames::vianame().
227 =head1 CUSTOM TRANSLATORS
229 The mechanism of translation of C<\N{...}> escapes is general and not
230 hardwired into F<charnames.pm>. A module can install custom
231 translations (inside the scope which C<use>s the module) with the
232 following magic incantation:
234 use charnames (); # for $charnames::hint_bits
237 $^H |= $charnames::hint_bits;
238 $^H{charnames} = \&translator;
241 Here translator() is a subroutine which takes C<CHARNAME> as an
242 argument, and returns text to insert into the string instead of the
243 C<\N{CHARNAME}> escape. Since the text to insert should be different
244 in C<bytes> mode and out of it, the function should check the current
245 state of C<bytes>-flag as in:
247 use bytes (); # for $bytes::hint_bits
249 if ($^H & $bytes::hint_bits) {
250 return bytes_translator(@_);
253 return utf8_translator(@_);
257 =head1 charnames::viacode(code)
259 Returns the full name of the character indicated by the numeric code.
262 print charnames::viacode(0x2722);
264 prints "FOUR TEARDROP-SPOKED ASTERISK".
266 Returns undef if no name is known for the code.
268 This works only for the standard names, and does not yet aply
269 to custom translators.
271 =head1 charnames::vianame(code)
273 Returns the code point indicated by the name.
276 printf "%04X", charnames::vianame("FOUR TEARDROP-SPOKED ASTERISK");
280 Returns undef if no name is known for the name.
282 This works only for the standard names, and does not yet aply
283 to custom translators.
287 Since evaluation of the translation function happens in a middle of
288 compilation (of a string literal), the translation function should not
289 do any C<eval>s or C<require>s. This restriction should be lifted in
290 a future version of Perl.