Abolish USE_WIN32_RTL_ENV.
[p5sagit/p5-mst-13.2.git] / lib / charnames.pm
CommitLineData
423cee85 1package charnames;
b75c8c73 2
3our $VERSION = '1.00';
4
d5448623 5use bytes (); # for $bytes::hint_bits
bd62941a 6use warnings();
d5448623 7$charnames::hint_bits = 0x20000;
423cee85 8
423cee85 9my $txt;
10
11# This is not optimized in any way yet
12sub charnames {
13 $name = shift;
14 $txt = do "unicode/Name.pl" unless $txt;
15 my @off;
16 if ($^H{charnames_full} and $txt =~ /\t\t$name$/m) {
17 @off = ($-[0], $+[0]);
18 }
19 unless (@off) {
20 if ($^H{charnames_short} and $name =~ /^(.*?):(.*)/s) {
21 my ($script, $cname) = ($1,$2);
22 my $case = ( $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
23 if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U$cname$/m) {
24 @off = ($-[0], $+[0]);
25 }
26 }
27 }
28 unless (@off) {
29 my $case = ( $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
30 for ( @{$^H{charnames_scripts}} ) {
31 (@off = ($-[0], $+[0])), last
32 if $txt =~ m/\t\t$_ (?:$case )?LETTER \U$name$/m;
33 }
34 }
35 die "Unknown charname '$name'" unless @off;
b896c7a5 36
37 my $hexlen = 0;
38 $hexlen++ while substr($txt, $off[0] - $hexlen - 1, 1) =~ /[0-9a-f]/;
39 my $ord = hex substr $txt, $off[0] - $hexlen, $hexlen;
d5448623 40 if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
8058d7ab 41 use bytes;
d41ff1b8 42 return chr $ord if $ord <= 255;
43 my $hex = sprintf '%X=0%o', $ord, $ord;
44 my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
45 die "Character 0x$hex with name '$fname' is above 0xFF";
423cee85 46 }
d41ff1b8 47 return chr $ord;
423cee85 48}
49
50sub import {
51 shift;
d5448623 52 die "`use charnames' needs explicit imports list" unless @_;
53 $^H |= $charnames::hint_bits;
423cee85 54 $^H{charnames} = \&charnames ;
55 my %h;
56 @h{@_} = (1) x @_;
57 $^H{charnames_full} = delete $h{':full'};
58 $^H{charnames_short} = delete $h{':short'};
59 $^H{charnames_scripts} = [map uc, keys %h];
bd62941a 60 if (warnings::enabled('utf8') && @{$^H{charnames_scripts}}) {
61 $txt = do "unicode/Name.pl" unless $txt;
62 for (@{$^H{charnames_scripts}}) {
63 warnings::warn('utf8', "No such script: '$_'") unless
64 $txt =~ m/\t\t$_ (?:CAPITAL |SMALL )?LETTER /;
65 }
66 }
423cee85 67}
68
69
701;
71__END__
72
73=head1 NAME
74
4a2d328f 75charnames - define character names for C<\N{named}> string literal escape.
423cee85 76
77=head1 SYNOPSIS
78
79 use charnames ':full';
4a2d328f 80 print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
423cee85 81
82 use charnames ':short';
4a2d328f 83 print "\N{greek:Sigma} is an upper-case sigma.\n";
423cee85 84
85 use charnames qw(cyrillic greek);
4a2d328f 86 print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
423cee85 87
88=head1 DESCRIPTION
89
90Pragma C<use charnames> supports arguments C<:full>, C<:short> and
91script names. If C<:full> is present, for expansion of
4a2d328f 92C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of
423cee85 93standard Unicode names of chars. If C<:short> is present, and
94C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
95as a letter in script C<SCRIPT>. If pragma C<use charnames> is used
4a2d328f 96with script name arguments, then for C<\N{CHARNAME}}> the name
423cee85 97C<CHARNAME> is looked up as a letter in the given scripts (in the
98specified order).
99
100For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
d5448623 101this pragma looks for the names
423cee85 102
103 SCRIPTNAME CAPITAL LETTER CHARNAME
104 SCRIPTNAME SMALL LETTER CHARNAME
105 SCRIPTNAME LETTER CHARNAME
106
107in the table of standard Unicode names. If C<CHARNAME> is lowercase,
d5448623 108then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant is
423cee85 109ignored.
110
111=head1 CUSTOM TRANSLATORS
112
d5448623 113The mechanism of translation of C<\N{...}> escapes is general and not
423cee85 114hardwired into F<charnames.pm>. A module can install custom
d5448623 115translations (inside the scope which C<use>s the module) with the
423cee85 116following magic incantation:
117
d5448623 118 use charnames (); # for $charnames::hint_bits
119 sub import {
120 shift;
121 $^H |= $charnames::hint_bits;
122 $^H{charnames} = \&translator;
123 }
423cee85 124
125Here translator() is a subroutine which takes C<CHARNAME> as an
126argument, and returns text to insert into the string instead of the
4a2d328f 127C<\N{CHARNAME}> escape. Since the text to insert should be different
d5448623 128in C<bytes> mode and out of it, the function should check the current
129state of C<bytes>-flag as in:
130
131 use bytes (); # for $bytes::hint_bits
132 sub translator {
133 if ($^H & $bytes::hint_bits) {
134 return bytes_translator(@_);
135 }
136 else {
137 return utf8_translator(@_);
138 }
423cee85 139 }
423cee85 140
141=head1 BUGS
142
143Since evaluation of the translation function happens in a middle of
144compilation (of a string literal), the translation function should not
145do any C<eval>s or C<require>s. This restriction should be lifted in
146a future version of Perl.
147
148=cut