update Changes
[p5sagit/p5-mst-13.2.git] / lib / charnames.pm
CommitLineData
423cee85 1package charnames;
d5448623 2use bytes (); # for $bytes::hint_bits
3$charnames::hint_bits = 0x20000;
423cee85 4
5my $fname = 'unicode/UnicodeData-Latest.txt';
6my $txt;
7
8# This is not optimized in any way yet
9sub charnames {
10 $name = shift;
11 $txt = do "unicode/Name.pl" unless $txt;
12 my @off;
13 if ($^H{charnames_full} and $txt =~ /\t\t$name$/m) {
14 @off = ($-[0], $+[0]);
15 }
16 unless (@off) {
17 if ($^H{charnames_short} and $name =~ /^(.*?):(.*)/s) {
18 my ($script, $cname) = ($1,$2);
19 my $case = ( $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
20 if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U$cname$/m) {
21 @off = ($-[0], $+[0]);
22 }
23 }
24 }
25 unless (@off) {
26 my $case = ( $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
27 for ( @{$^H{charnames_scripts}} ) {
28 (@off = ($-[0], $+[0])), last
29 if $txt =~ m/\t\t$_ (?:$case )?LETTER \U$name$/m;
30 }
31 }
32 die "Unknown charname '$name'" unless @off;
33
423cee85 34 my $ord = hex substr $txt, $off[0] - 4, 4;
d5448623 35 if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
8058d7ab 36 use bytes;
d41ff1b8 37 return chr $ord if $ord <= 255;
38 my $hex = sprintf '%X=0%o', $ord, $ord;
39 my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
40 die "Character 0x$hex with name '$fname' is above 0xFF";
423cee85 41 }
d41ff1b8 42 return chr $ord;
423cee85 43}
44
45sub import {
46 shift;
d5448623 47 die "`use charnames' needs explicit imports list" unless @_;
48 $^H |= $charnames::hint_bits;
423cee85 49 $^H{charnames} = \&charnames ;
50 my %h;
51 @h{@_} = (1) x @_;
52 $^H{charnames_full} = delete $h{':full'};
53 $^H{charnames_short} = delete $h{':short'};
54 $^H{charnames_scripts} = [map uc, keys %h];
55}
56
57
581;
59__END__
60
61=head1 NAME
62
4a2d328f 63charnames - define character names for C<\N{named}> string literal escape.
423cee85 64
65=head1 SYNOPSIS
66
67 use charnames ':full';
4a2d328f 68 print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
423cee85 69
70 use charnames ':short';
4a2d328f 71 print "\N{greek:Sigma} is an upper-case sigma.\n";
423cee85 72
73 use charnames qw(cyrillic greek);
4a2d328f 74 print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
423cee85 75
76=head1 DESCRIPTION
77
78Pragma C<use charnames> supports arguments C<:full>, C<:short> and
79script names. If C<:full> is present, for expansion of
4a2d328f 80C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of
423cee85 81standard Unicode names of chars. If C<:short> is present, and
82C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
83as a letter in script C<SCRIPT>. If pragma C<use charnames> is used
4a2d328f 84with script name arguments, then for C<\N{CHARNAME}}> the name
423cee85 85C<CHARNAME> is looked up as a letter in the given scripts (in the
86specified order).
87
88For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
d5448623 89this pragma looks for the names
423cee85 90
91 SCRIPTNAME CAPITAL LETTER CHARNAME
92 SCRIPTNAME SMALL LETTER CHARNAME
93 SCRIPTNAME LETTER CHARNAME
94
95in the table of standard Unicode names. If C<CHARNAME> is lowercase,
d5448623 96then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant is
423cee85 97ignored.
98
99=head1 CUSTOM TRANSLATORS
100
d5448623 101The mechanism of translation of C<\N{...}> escapes is general and not
423cee85 102hardwired into F<charnames.pm>. A module can install custom
d5448623 103translations (inside the scope which C<use>s the module) with the
423cee85 104following magic incantation:
105
d5448623 106 use charnames (); # for $charnames::hint_bits
107 sub import {
108 shift;
109 $^H |= $charnames::hint_bits;
110 $^H{charnames} = \&translator;
111 }
423cee85 112
113Here translator() is a subroutine which takes C<CHARNAME> as an
114argument, and returns text to insert into the string instead of the
4a2d328f 115C<\N{CHARNAME}> escape. Since the text to insert should be different
d5448623 116in C<bytes> mode and out of it, the function should check the current
117state of C<bytes>-flag as in:
118
119 use bytes (); # for $bytes::hint_bits
120 sub translator {
121 if ($^H & $bytes::hint_bits) {
122 return bytes_translator(@_);
123 }
124 else {
125 return utf8_translator(@_);
126 }
423cee85 127 }
423cee85 128
129=head1 BUGS
130
131Since evaluation of the translation function happens in a middle of
132compilation (of a string literal), the translation function should not
133do any C<eval>s or C<require>s. This restriction should be lifted in
134a future version of Perl.
135
136=cut