Re: New harness is failing things
[p5sagit/p5-mst-13.2.git] / lib / charnames.pm
CommitLineData
423cee85 1package charnames;
d5448623 2use bytes (); # for $bytes::hint_bits
bd62941a 3use warnings();
d5448623 4$charnames::hint_bits = 0x20000;
423cee85 5
423cee85 6my $txt;
7
8# This is not optimized in any way yet
9sub charnames {
10 $name = shift;
11 $txt = do "unicode/Name.pl" unless $txt;
12 my @off;
13 if ($^H{charnames_full} and $txt =~ /\t\t$name$/m) {
14 @off = ($-[0], $+[0]);
15 }
16 unless (@off) {
17 if ($^H{charnames_short} and $name =~ /^(.*?):(.*)/s) {
18 my ($script, $cname) = ($1,$2);
19 my $case = ( $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
20 if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U$cname$/m) {
21 @off = ($-[0], $+[0]);
22 }
23 }
24 }
25 unless (@off) {
26 my $case = ( $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
27 for ( @{$^H{charnames_scripts}} ) {
28 (@off = ($-[0], $+[0])), last
29 if $txt =~ m/\t\t$_ (?:$case )?LETTER \U$name$/m;
30 }
31 }
32 die "Unknown charname '$name'" unless @off;
33
423cee85 34 my $ord = hex substr $txt, $off[0] - 4, 4;
d5448623 35 if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
8058d7ab 36 use bytes;
d41ff1b8 37 return chr $ord if $ord <= 255;
38 my $hex = sprintf '%X=0%o', $ord, $ord;
39 my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
40 die "Character 0x$hex with name '$fname' is above 0xFF";
423cee85 41 }
d41ff1b8 42 return chr $ord;
423cee85 43}
44
45sub import {
46 shift;
d5448623 47 die "`use charnames' needs explicit imports list" unless @_;
48 $^H |= $charnames::hint_bits;
423cee85 49 $^H{charnames} = \&charnames ;
50 my %h;
51 @h{@_} = (1) x @_;
52 $^H{charnames_full} = delete $h{':full'};
53 $^H{charnames_short} = delete $h{':short'};
54 $^H{charnames_scripts} = [map uc, keys %h];
bd62941a 55 if (warnings::enabled('utf8') && @{$^H{charnames_scripts}}) {
56 $txt = do "unicode/Name.pl" unless $txt;
57 for (@{$^H{charnames_scripts}}) {
58 warnings::warn('utf8', "No such script: '$_'") unless
59 $txt =~ m/\t\t$_ (?:CAPITAL |SMALL )?LETTER /;
60 }
61 }
423cee85 62}
63
64
651;
66__END__
67
68=head1 NAME
69
4a2d328f 70charnames - define character names for C<\N{named}> string literal escape.
423cee85 71
72=head1 SYNOPSIS
73
74 use charnames ':full';
4a2d328f 75 print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
423cee85 76
77 use charnames ':short';
4a2d328f 78 print "\N{greek:Sigma} is an upper-case sigma.\n";
423cee85 79
80 use charnames qw(cyrillic greek);
4a2d328f 81 print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
423cee85 82
83=head1 DESCRIPTION
84
85Pragma C<use charnames> supports arguments C<:full>, C<:short> and
86script names. If C<:full> is present, for expansion of
4a2d328f 87C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of
423cee85 88standard Unicode names of chars. If C<:short> is present, and
89C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
90as a letter in script C<SCRIPT>. If pragma C<use charnames> is used
4a2d328f 91with script name arguments, then for C<\N{CHARNAME}}> the name
423cee85 92C<CHARNAME> is looked up as a letter in the given scripts (in the
93specified order).
94
95For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
d5448623 96this pragma looks for the names
423cee85 97
98 SCRIPTNAME CAPITAL LETTER CHARNAME
99 SCRIPTNAME SMALL LETTER CHARNAME
100 SCRIPTNAME LETTER CHARNAME
101
102in the table of standard Unicode names. If C<CHARNAME> is lowercase,
d5448623 103then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant is
423cee85 104ignored.
105
106=head1 CUSTOM TRANSLATORS
107
d5448623 108The mechanism of translation of C<\N{...}> escapes is general and not
423cee85 109hardwired into F<charnames.pm>. A module can install custom
d5448623 110translations (inside the scope which C<use>s the module) with the
423cee85 111following magic incantation:
112
d5448623 113 use charnames (); # for $charnames::hint_bits
114 sub import {
115 shift;
116 $^H |= $charnames::hint_bits;
117 $^H{charnames} = \&translator;
118 }
423cee85 119
120Here translator() is a subroutine which takes C<CHARNAME> as an
121argument, and returns text to insert into the string instead of the
4a2d328f 122C<\N{CHARNAME}> escape. Since the text to insert should be different
d5448623 123in C<bytes> mode and out of it, the function should check the current
124state of C<bytes>-flag as in:
125
126 use bytes (); # for $bytes::hint_bits
127 sub translator {
128 if ($^H & $bytes::hint_bits) {
129 return bytes_translator(@_);
130 }
131 else {
132 return utf8_translator(@_);
133 }
423cee85 134 }
423cee85 135
136=head1 BUGS
137
138Since evaluation of the translation function happens in a middle of
139compilation (of a string literal), the translation function should not
140do any C<eval>s or C<require>s. This restriction should be lifted in
141a future version of Perl.
142
143=cut