Change #7160 had a nasty typo.
[p5sagit/p5-mst-13.2.git] / lib / charnames.pm
CommitLineData
423cee85 1package charnames;
d5448623 2use bytes (); # for $bytes::hint_bits
3$charnames::hint_bits = 0x20000;
423cee85 4
423cee85 5my $txt;
6
7# This is not optimized in any way yet
8sub charnames {
9 $name = shift;
10 $txt = do "unicode/Name.pl" unless $txt;
11 my @off;
12 if ($^H{charnames_full} and $txt =~ /\t\t$name$/m) {
13 @off = ($-[0], $+[0]);
14 }
15 unless (@off) {
16 if ($^H{charnames_short} and $name =~ /^(.*?):(.*)/s) {
17 my ($script, $cname) = ($1,$2);
18 my $case = ( $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
19 if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U$cname$/m) {
20 @off = ($-[0], $+[0]);
21 }
22 }
23 }
24 unless (@off) {
25 my $case = ( $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
26 for ( @{$^H{charnames_scripts}} ) {
27 (@off = ($-[0], $+[0])), last
28 if $txt =~ m/\t\t$_ (?:$case )?LETTER \U$name$/m;
29 }
30 }
31 die "Unknown charname '$name'" unless @off;
32
423cee85 33 my $ord = hex substr $txt, $off[0] - 4, 4;
d5448623 34 if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
8058d7ab 35 use bytes;
d41ff1b8 36 return chr $ord if $ord <= 255;
37 my $hex = sprintf '%X=0%o', $ord, $ord;
38 my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
39 die "Character 0x$hex with name '$fname' is above 0xFF";
423cee85 40 }
d41ff1b8 41 return chr $ord;
423cee85 42}
43
44sub import {
45 shift;
d5448623 46 die "`use charnames' needs explicit imports list" unless @_;
47 $^H |= $charnames::hint_bits;
423cee85 48 $^H{charnames} = \&charnames ;
49 my %h;
50 @h{@_} = (1) x @_;
51 $^H{charnames_full} = delete $h{':full'};
52 $^H{charnames_short} = delete $h{':short'};
53 $^H{charnames_scripts} = [map uc, keys %h];
54}
55
56
571;
58__END__
59
60=head1 NAME
61
4a2d328f 62charnames - define character names for C<\N{named}> string literal escape.
423cee85 63
64=head1 SYNOPSIS
65
66 use charnames ':full';
4a2d328f 67 print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
423cee85 68
69 use charnames ':short';
4a2d328f 70 print "\N{greek:Sigma} is an upper-case sigma.\n";
423cee85 71
72 use charnames qw(cyrillic greek);
4a2d328f 73 print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
423cee85 74
75=head1 DESCRIPTION
76
77Pragma C<use charnames> supports arguments C<:full>, C<:short> and
78script names. If C<:full> is present, for expansion of
4a2d328f 79C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of
423cee85 80standard Unicode names of chars. If C<:short> is present, and
81C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
82as a letter in script C<SCRIPT>. If pragma C<use charnames> is used
4a2d328f 83with script name arguments, then for C<\N{CHARNAME}}> the name
423cee85 84C<CHARNAME> is looked up as a letter in the given scripts (in the
85specified order).
86
87For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
d5448623 88this pragma looks for the names
423cee85 89
90 SCRIPTNAME CAPITAL LETTER CHARNAME
91 SCRIPTNAME SMALL LETTER CHARNAME
92 SCRIPTNAME LETTER CHARNAME
93
94in the table of standard Unicode names. If C<CHARNAME> is lowercase,
d5448623 95then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant is
423cee85 96ignored.
97
98=head1 CUSTOM TRANSLATORS
99
d5448623 100The mechanism of translation of C<\N{...}> escapes is general and not
423cee85 101hardwired into F<charnames.pm>. A module can install custom
d5448623 102translations (inside the scope which C<use>s the module) with the
423cee85 103following magic incantation:
104
d5448623 105 use charnames (); # for $charnames::hint_bits
106 sub import {
107 shift;
108 $^H |= $charnames::hint_bits;
109 $^H{charnames} = \&translator;
110 }
423cee85 111
112Here translator() is a subroutine which takes C<CHARNAME> as an
113argument, and returns text to insert into the string instead of the
4a2d328f 114C<\N{CHARNAME}> escape. Since the text to insert should be different
d5448623 115in C<bytes> mode and out of it, the function should check the current
116state of C<bytes>-flag as in:
117
118 use bytes (); # for $bytes::hint_bits
119 sub translator {
120 if ($^H & $bytes::hint_bits) {
121 return bytes_translator(@_);
122 }
123 else {
124 return utf8_translator(@_);
125 }
423cee85 126 }
423cee85 127
128=head1 BUGS
129
130Since evaluation of the translation function happens in a middle of
131compilation (of a string literal), the translation function should not
132do any C<eval>s or C<require>s. This restriction should be lifted in
133a future version of Perl.
134
135=cut