7 use bytes (); # for $bytes::hint_bits
8 $charnames::hint_bits = 0x20000;
12 # This is not optimized in any way yet
17 ## Suck in the code/name list as a big string.
19 ## "0052\t\tLATIN CAPITAL LETTER R\n"
20 $txt = do "unicore/Name.pl" unless $txt;
22 ## @off will hold the index into the code/name string of the start and
23 ## end of the name as we find it.
26 ## If :full, look for the the name exactly
27 if ($^H{charnames_full} and $txt =~ /\t\t$name$/m) {
28 @off = ($-[0], $+[0]);
31 ## If we didn't get above, and :short allowed, look for the short name.
32 ## The short name is like "greek:Sigma"
34 if ($^H{charnames_short} and $name =~ /^(.+?):(.+)/s) {
35 my ($script, $cname) = ($1,$2);
36 my $case = ( $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
37 if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U$cname$/m) {
38 @off = ($-[0], $+[0]);
43 ## If we still don't have it, check for the name among the loaded
47 my $case = ( $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
48 for my $script ( @{$^H{charnames_scripts}} )
50 if ($txt =~ m/\t\t$script (?:$case )?LETTER \U$name$/m) {
51 @off = ($-[0], $+[0]);
57 ## If we don't have it by now, give up.
58 die "Unknown charname '$name'" unless @off;
61 ## Now know where in the string the name starts.
62 ## The code, in hex, is befor that.
64 ## The code can be 4-6 characters long, so we've got to sort of
65 ## go look for it, just after the newline that comes before $off[0].
67 ## This would be much easier if unicore/Name.pl had info in
68 ## a name/code order, instead of code/name order.
70 ## The +1 after the rindex() is to skip past the newline we're finding,
71 ## or, if the rindex() fails, to put us to an offset of zero.
73 my $hexstart = rindex($txt, "\n", $off[0]) + 1;
75 ## we know where it starts, so turn into number - the ordinal for the char.
76 my $ord = hex substr($txt, $hexstart, $off[0] - $hexstart);
78 if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
80 return chr $ord if $ord <= 255;
81 my $hex = sprintf '%X=0%o', $ord, $ord;
82 my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
83 die "Character 0x$hex with name '$fname' is above 0xFF";
85 return pack "U", $ord;
90 shift; ## ignore class name
94 carp("`use charnames' needs explicit imports list");
96 $^H |= $charnames::hint_bits;
97 $^H{charnames} = \&charnames ;
100 ## fill %h keys with our @_ args.
105 $^H{charnames_full} = delete $h{':full'};
106 $^H{charnames_short} = delete $h{':short'};
107 $^H{charnames_scripts} = [map uc, keys %h];
110 ## If utf8? warnings are enabled, and some scripts were given,
111 ## see if at least we can find one letter of each script.
113 if (warnings::enabled('utf8') && @{$^H{charnames_scripts}})
115 $txt = do "unicore/Name.pl" unless $txt;
117 for my $script (@{$^H{charnames_scripts}})
119 if (not $txt =~ m/\t\t$script (?:CAPITAL |SMALL )?LETTER /) {
120 warnings::warn('utf8', "No such script: '$script'");
129 carp "charnames::viacode() expects one numeric value";
135 if ($arg =~ m/^[0-9]+$/) {
136 $hex = sprintf "%04X", $arg;
138 carp("unexpected arg \"$arg\" to charnames::viacode()");
142 $txt = do "unicore/Name.pl" unless $txt;
144 if ($txt =~ m/^$hex\t\t(.+)/m) {
157 charnames - define character names for C<\N{named}> string literal escapes.
161 use charnames ':full';
162 print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
164 use charnames ':short';
165 print "\N{greek:Sigma} is an upper-case sigma.\n";
167 use charnames qw(cyrillic greek);
168 print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
170 print charname::viacode(0x1234); # prints "ETHIOPIC SYLLABLE SEE"
174 Pragma C<use charnames> supports arguments C<:full>, C<:short> and
175 script names. If C<:full> is present, for expansion of
176 C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of
177 standard Unicode names of chars. If C<:short> is present, and
178 C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
179 as a letter in script C<SCRIPT>. If pragma C<use charnames> is used
180 with script name arguments, then for C<\N{CHARNAME}}> the name
181 C<CHARNAME> is looked up as a letter in the given scripts (in the
184 For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
185 this pragma looks for the names
187 SCRIPTNAME CAPITAL LETTER CHARNAME
188 SCRIPTNAME SMALL LETTER CHARNAME
189 SCRIPTNAME LETTER CHARNAME
191 in the table of standard Unicode names. If C<CHARNAME> is lowercase,
192 then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant is
195 =head1 CUSTOM TRANSLATORS
197 The mechanism of translation of C<\N{...}> escapes is general and not
198 hardwired into F<charnames.pm>. A module can install custom
199 translations (inside the scope which C<use>s the module) with the
200 following magic incantation:
202 use charnames (); # for $charnames::hint_bits
205 $^H |= $charnames::hint_bits;
206 $^H{charnames} = \&translator;
209 Here translator() is a subroutine which takes C<CHARNAME> as an
210 argument, and returns text to insert into the string instead of the
211 C<\N{CHARNAME}> escape. Since the text to insert should be different
212 in C<bytes> mode and out of it, the function should check the current
213 state of C<bytes>-flag as in:
215 use bytes (); # for $bytes::hint_bits
217 if ($^H & $bytes::hint_bits) {
218 return bytes_translator(@_);
221 return utf8_translator(@_);
225 =head1 charnames::viacode(code)
227 Returns the full name of the character indicated by the numeric code.
230 print charnames::viacode(0x2722);
232 prints "FOUR TEARDROP-SPOKED ASTERISK".
234 Returns nothing if no name is known for the code.
236 This works only for the standard names, and does not yet aply
237 to custom translators.
241 Since evaluation of the translation function happens in a middle of
242 compilation (of a string literal), the translation function should not
243 do any C<eval>s or C<require>s. This restriction should be lifted in
244 a future version of Perl.