Once again syncing after too long an absence
[p5sagit/p5-mst-13.2.git] / lib / charnames.pm
CommitLineData
423cee85 1package charnames;
0e06870b 2
3our $VERSION = '1.00';
4
d5448623 5use bytes (); # for $bytes::hint_bits
22d4bb9c 6use warnings();
d5448623 7$charnames::hint_bits = 0x20000;
423cee85 8
423cee85 9my $txt;
10
11# This is not optimized in any way yet
12sub charnames {
13 $name = shift;
14 $txt = do "unicode/Name.pl" unless $txt;
15 my @off;
16 if ($^H{charnames_full} and $txt =~ /\t\t$name$/m) {
17 @off = ($-[0], $+[0]);
18 }
19 unless (@off) {
20 if ($^H{charnames_short} and $name =~ /^(.*?):(.*)/s) {
21 my ($script, $cname) = ($1,$2);
22 my $case = ( $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
23 if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U$cname$/m) {
24 @off = ($-[0], $+[0]);
25 }
26 }
27 }
28 unless (@off) {
29 my $case = ( $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
30 for ( @{$^H{charnames_scripts}} ) {
31 (@off = ($-[0], $+[0])), last
32 if $txt =~ m/\t\t$_ (?:$case )?LETTER \U$name$/m;
33 }
34 }
35 die "Unknown charname '$name'" unless @off;
36
423cee85 37 my $ord = hex substr $txt, $off[0] - 4, 4;
d5448623 38 if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
8058d7ab 39 use bytes;
d41ff1b8 40 return chr $ord if $ord <= 255;
41 my $hex = sprintf '%X=0%o', $ord, $ord;
42 my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
43 die "Character 0x$hex with name '$fname' is above 0xFF";
423cee85 44 }
d41ff1b8 45 return chr $ord;
423cee85 46}
47
48sub import {
49 shift;
d5448623 50 die "`use charnames' needs explicit imports list" unless @_;
51 $^H |= $charnames::hint_bits;
423cee85 52 $^H{charnames} = \&charnames ;
53 my %h;
54 @h{@_} = (1) x @_;
55 $^H{charnames_full} = delete $h{':full'};
56 $^H{charnames_short} = delete $h{':short'};
57 $^H{charnames_scripts} = [map uc, keys %h];
22d4bb9c 58 if (warnings::enabled('utf8') && @{$^H{charnames_scripts}}) {
59 $txt = do "unicode/Name.pl" unless $txt;
60 for (@{$^H{charnames_scripts}}) {
61 warnings::warn('utf8', "No such script: '$_'") unless
62 $txt =~ m/\t\t$_ (?:CAPITAL |SMALL )?LETTER /;
63 }
64 }
423cee85 65}
66
67
681;
69__END__
70
71=head1 NAME
72
4a2d328f 73charnames - define character names for C<\N{named}> string literal escape.
423cee85 74
75=head1 SYNOPSIS
76
77 use charnames ':full';
4a2d328f 78 print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
423cee85 79
80 use charnames ':short';
4a2d328f 81 print "\N{greek:Sigma} is an upper-case sigma.\n";
423cee85 82
83 use charnames qw(cyrillic greek);
4a2d328f 84 print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
423cee85 85
86=head1 DESCRIPTION
87
88Pragma C<use charnames> supports arguments C<:full>, C<:short> and
89script names. If C<:full> is present, for expansion of
4a2d328f 90C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of
423cee85 91standard Unicode names of chars. If C<:short> is present, and
92C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
93as a letter in script C<SCRIPT>. If pragma C<use charnames> is used
4a2d328f 94with script name arguments, then for C<\N{CHARNAME}}> the name
423cee85 95C<CHARNAME> is looked up as a letter in the given scripts (in the
96specified order).
97
98For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
d5448623 99this pragma looks for the names
423cee85 100
101 SCRIPTNAME CAPITAL LETTER CHARNAME
102 SCRIPTNAME SMALL LETTER CHARNAME
103 SCRIPTNAME LETTER CHARNAME
104
105in the table of standard Unicode names. If C<CHARNAME> is lowercase,
d5448623 106then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant is
423cee85 107ignored.
108
109=head1 CUSTOM TRANSLATORS
110
d5448623 111The mechanism of translation of C<\N{...}> escapes is general and not
423cee85 112hardwired into F<charnames.pm>. A module can install custom
d5448623 113translations (inside the scope which C<use>s the module) with the
423cee85 114following magic incantation:
115
d5448623 116 use charnames (); # for $charnames::hint_bits
117 sub import {
118 shift;
119 $^H |= $charnames::hint_bits;
120 $^H{charnames} = \&translator;
121 }
423cee85 122
123Here translator() is a subroutine which takes C<CHARNAME> as an
124argument, and returns text to insert into the string instead of the
4a2d328f 125C<\N{CHARNAME}> escape. Since the text to insert should be different
d5448623 126in C<bytes> mode and out of it, the function should check the current
127state of C<bytes>-flag as in:
128
129 use bytes (); # for $bytes::hint_bits
130 sub translator {
131 if ($^H & $bytes::hint_bits) {
132 return bytes_translator(@_);
133 }
134 else {
135 return utf8_translator(@_);
136 }
423cee85 137 }
423cee85 138
139=head1 BUGS
140
141Since evaluation of the translation function happens in a middle of
142compilation (of a string literal), the translation function should not
143do any C<eval>s or C<require>s. This restriction should be lifted in
144a future version of Perl.
145
146=cut