use charnames qw(cyrillic greek);
print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
+ use charnames ...; # either :full or :short will do
+ print "This is Unicode code point \N{U+263A}\n"; # explicit code point
+
=head1 DESCRIPTION
Pragma C<use charnames> supports arguments C<:full>, C<:short> and
your logic, or you need to put a conditional in to guard against
meaningless input.
+=item Illegal hexadecimal code on \N{U+...}
+
+(F) You must specify a hexadecimal code for the Unicode codepoint
+after the "U+" inside your "\N{}".
+
=item Illegal hexadecimal digit %s ignored
(W digit) You may have tried to use a character other than 0 - 9 or
much fun as programming in machine code. So another way to specify
Unicode characters is to use the S<B<named character> > escape
sequence C<\N{name}>. C<name> is a name for the Unicode character, as
-specified in the Unicode standard. For instance, if we wanted to
-represent or match the astrological sign for the planet Mercury, we
-could use
+specified in the Unicode standard, or "U+" followed by the hexadecimal
+code of the character. For instance, if we wanted to represent or
+match the astrological sign for the planet Mercury, we could use
use utf8; # We will be doing Unicode processing
use charnames ":full"; # use named chars with Unicode full names
}
$| = 1;
-print "1..15\n";
+print "1..16\n";
use charnames ':full';
{
use charnames ':full';
+
print "not " unless "\x{263a}" eq "\N{WHITE SMILING FACE}";
print "ok 6\n";
print "not " unless length("\x{263a}") == 1;
{
use charnames qw(:full);
- use utf8;
my $x = "\x{221b}";
my $named = "\N{CUBE ROOT}";
{
use charnames qw(:full);
- use utf8;
+
print "not " unless "\x{100}\N{CENT SIGN}" eq "\x{100}"."\N{CENT SIGN}";
print "ok 14\n";
}
}
+
+{
+ use charnames ':full';
+
+ print "not "
+ unless "\N{U+263A}" eq "\N{WHITE SMILING FACE}";
+ print "ok 16\n";
+}
+
/* \N{latin small letter a} is a named character */
case 'N':
- ++s;
+ s++;
if (*s == '{') {
char* e = strchr(s, '}');
SV *res;
e = s - 1;
goto cont_scan;
}
- res = newSVpvn(s + 1, e - s - 1);
- res = new_constant( Nullch, 0, "charnames",
- res, Nullsv, "\\N{...}" );
+ if (s[1] == 'U' && s[2] == '+') { /* \N{U+HHHH} */
+ STRLEN alen = e - s - 3;
+ STRLEN blen;
+ UV uv = (UV)scan_hex(s + 3, alen, &blen);
+
+ if (blen == alen) {
+ res = newSVpvn(s, (uv >> 8) + 1); /* filler */
+ str = (char *)uv_to_utf8((U8*)SvPVX(res), uv);
+ SvCUR_set(res, str - SvPVX(res));
+ *str = 0;
+ if (uv > 0x7f)
+ has_utf8 = TRUE;
+ }
+ else {
+ yyerror("Illegal hexadecimal code on \\N{U+...}");
+ e = s - 1;
+ goto cont_scan;
+ }
+ }
+ else {
+ res = newSVpvn(s + 1, e - s - 1);
+ res = new_constant( Nullch, 0, "charnames",
+ res, Nullsv, "\\N{...}" );
+ }
if (has_utf8)
sv_utf8_upgrade(res);
str = SvPV(res,len);