lib/charnames.pm

   1 package charnames;
   2 use bytes ();           # for $bytes::hint_bits
   3 $charnames::hint_bits = 0x20000;
   4
   5 my $txt;
   6
   7 # This is not optimized in any way yet
   8 sub charnames {
   9   $name = shift;
  10   $txt = do "unicode/Name.pl" unless $txt;
  11   my @off;
  12   if ($^H{charnames_full} and $txt =~ /\t\t$name$/m) {
  13     @off = ($-[0], $+[0]);
  14   }
  15   unless (@off) {
  16     if ($^H{charnames_short} and $name =~ /^(.*?):(.*)/s) {
  17       my ($script, $cname) = ($1,$2);
  18       my $case = ( $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
  19       if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U$cname$/m) {
  20         @off = ($-[0], $+[0]);
  21       }
  22     }
  23   }
  24   unless (@off) {
  25     my $case = ( $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
  26     for ( @{$^H{charnames_scripts}} ) {
  27       (@off = ($-[0], $+[0])), last
  28         if $txt =~ m/\t\t$_ (?:$case )?LETTER \U$name$/m;
  29     }
  30   }
  31   die "Unknown charname '$name'" unless @off;
  32
  33   my $ord = hex substr $txt, $off[0] - 4, 4;
  34   if ($^H & $bytes::hint_bits) {        # "use bytes" in effect?
  35     use bytes;
  36     return chr $ord if $ord <= 255;
  37     my $hex = sprintf '%X=0%o', $ord, $ord;
  38     my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
  39     die "Character 0x$hex with name '$fname' is above 0xFF";
  40   }
  41   return chr $ord;
  42 }
  43
  44 sub import {
  45   shift;
  46   die "`use charnames' needs explicit imports list" unless @_;
  47   $^H |= $charnames::hint_bits;
  48   $^H{charnames} = \&charnames ;
  49   my %h;
  50   @h{@_} = (1) x @_;
  51   $^H{charnames_full} = delete $h{':full'};
  52   $^H{charnames_short} = delete $h{':short'};
  53   $^H{charnames_scripts} = [map uc, keys %h];
  54 }
  55
  56
  57 1;
  58 __END__
  59
  60 =head1 NAME
  61
  62 charnames - define character names for C<\N{named}> string literal escape.
  63
  64 =head1 SYNOPSIS
  65
  66   use charnames ':full';
  67   print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
  68
  69   use charnames ':short';
  70   print "\N{greek:Sigma} is an upper-case sigma.\n";
  71
  72   use charnames qw(cyrillic greek);
  73   print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
  74
  75 =head1 DESCRIPTION
  76
  77 Pragma C<use charnames> supports arguments C<:full>, C<:short> and
  78 script names.  If C<:full> is present, for expansion of
  79 C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of
  80 standard Unicode names of chars.  If C<:short> is present, and
  81 C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
  82 as a letter in script C<SCRIPT>.  If pragma C<use charnames> is used
  83 with script name arguments, then for C<\N{CHARNAME}}> the name
  84 C<CHARNAME> is looked up as a letter in the given scripts (in the
  85 specified order).
  86
  87 For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
  88 this pragma looks for the names
  89
  90   SCRIPTNAME CAPITAL LETTER CHARNAME
  91   SCRIPTNAME SMALL LETTER CHARNAME
  92   SCRIPTNAME LETTER CHARNAME
  93
  94 in the table of standard Unicode names.  If C<CHARNAME> is lowercase,
  95 then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant is
  96 ignored.
  97
  98 =head1 CUSTOM TRANSLATORS
  99
 100 The mechanism of translation of C<\N{...}> escapes is general and not
 101 hardwired into F<charnames.pm>.  A module can install custom
 102 translations (inside the scope which C<use>s the module) with the
 103 following magic incantation:
 104
 105     use charnames ();           # for $charnames::hint_bits
 106     sub import {
 107         shift;
 108         $^H |= $charnames::hint_bits;
 109         $^H{charnames} = \&translator;
 110     }
 111
 112 Here translator() is a subroutine which takes C<CHARNAME> as an
 113 argument, and returns text to insert into the string instead of the
 114 C<\N{CHARNAME}> escape.  Since the text to insert should be different
 115 in C<bytes> mode and out of it, the function should check the current
 116 state of C<bytes>-flag as in:
 117
 118     use bytes ();                       # for $bytes::hint_bits
 119     sub translator {
 120         if ($^H & $bytes::hint_bits) {
 121             return bytes_translator(@_);
 122         }
 123         else {
 124             return utf8_translator(@_);
 125         }
 126     }
 127
 128 =head1 BUGS
 129
 130 Since evaluation of the translation function happens in a middle of
 131 compilation (of a string literal), the translation function should not
 132 do any C<eval>s or C<require>s.  This restriction should be lifted in
 133 a future version of Perl.
 134
 135 =cut