From: Jarkko Hietaniemi Date: Sun, 4 Mar 2001 17:24:49 +0000 (+0000) Subject: Add the \N{U+HHHH} syntax. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e0a47bd2cb14247750ebc5ca933bb1ba015aa70a;p=p5sagit%2Fp5-mst-13.2.git Add the \N{U+HHHH} syntax. p4raw-id: //depot/perl@9014 --- diff --git a/lib/charnames.pm b/lib/charnames.pm index 875c0a5..1bb89b4 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -86,6 +86,9 @@ charnames - define character names for C<\N{named}> string literal escape. use charnames qw(cyrillic greek); print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n"; + use charnames ...; # either :full or :short will do + print "This is Unicode code point \N{U+263A}\n"; # explicit code point + =head1 DESCRIPTION Pragma C supports arguments C<:full>, C<:short> and diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 122f5ea..5adf241 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1564,6 +1564,11 @@ to your Perl administrator. your logic, or you need to put a conditional in to guard against meaningless input. +=item Illegal hexadecimal code on \N{U+...} + +(F) You must specify a hexadecimal code for the Unicode codepoint +after the "U+" inside your "\N{}". + =item Illegal hexadecimal digit %s ignored (W digit) You may have tried to use a character other than 0 - 9 or diff --git a/pod/perlretut.pod b/pod/perlretut.pod index a77b87e..2647076 100644 --- a/pod/perlretut.pod +++ b/pod/perlretut.pod @@ -1657,9 +1657,9 @@ or deciphering someone else's hexadecimal Unicode regexp is about as much fun as programming in machine code. So another way to specify Unicode characters is to use the S > escape sequence C<\N{name}>. C is a name for the Unicode character, as -specified in the Unicode standard. For instance, if we wanted to -represent or match the astrological sign for the planet Mercury, we -could use +specified in the Unicode standard, or "U+" followed by the hexadecimal +code of the character. For instance, if we wanted to represent or +match the astrological sign for the planet Mercury, we could use use utf8; # We will be doing Unicode processing use charnames ":full"; # use named chars with Unicode full names diff --git a/t/lib/charnames.t b/t/lib/charnames.t index 6a8a8be..8ad098e 100644 --- a/t/lib/charnames.t +++ b/t/lib/charnames.t @@ -8,7 +8,7 @@ BEGIN { } $| = 1; -print "1..15\n"; +print "1..16\n"; use charnames ':full'; @@ -63,6 +63,7 @@ sub to_bytes { { use charnames ':full'; + print "not " unless "\x{263a}" eq "\N{WHITE SMILING FACE}"; print "ok 6\n"; print "not " unless length("\x{263a}") == 1; @@ -81,7 +82,6 @@ sub to_bytes { { use charnames qw(:full); - use utf8; my $x = "\x{221b}"; my $named = "\N{CUBE ROOT}"; @@ -92,7 +92,7 @@ sub to_bytes { { use charnames qw(:full); - use utf8; + print "not " unless "\x{100}\N{CENT SIGN}" eq "\x{100}"."\N{CENT SIGN}"; print "ok 14\n"; } @@ -106,3 +106,12 @@ sub to_bytes { } + +{ + use charnames ':full'; + + print "not " + unless "\N{U+263A}" eq "\N{WHITE SMILING FACE}"; + print "ok 16\n"; +} + diff --git a/toke.c b/toke.c index daa0d52..f68eac8 100644 --- a/toke.c +++ b/toke.c @@ -1518,7 +1518,7 @@ S_scan_const(pTHX_ char *start) /* \N{latin small letter a} is a named character */ case 'N': - ++s; + s++; if (*s == '{') { char* e = strchr(s, '}'); SV *res; @@ -1530,9 +1530,30 @@ S_scan_const(pTHX_ char *start) e = s - 1; goto cont_scan; } - res = newSVpvn(s + 1, e - s - 1); - res = new_constant( Nullch, 0, "charnames", - res, Nullsv, "\\N{...}" ); + if (s[1] == 'U' && s[2] == '+') { /* \N{U+HHHH} */ + STRLEN alen = e - s - 3; + STRLEN blen; + UV uv = (UV)scan_hex(s + 3, alen, &blen); + + if (blen == alen) { + res = newSVpvn(s, (uv >> 8) + 1); /* filler */ + str = (char *)uv_to_utf8((U8*)SvPVX(res), uv); + SvCUR_set(res, str - SvPVX(res)); + *str = 0; + if (uv > 0x7f) + has_utf8 = TRUE; + } + else { + yyerror("Illegal hexadecimal code on \\N{U+...}"); + e = s - 1; + goto cont_scan; + } + } + else { + res = newSVpvn(s + 1, e - s - 1); + res = new_constant( Nullch, 0, "charnames", + res, Nullsv, "\\N{...}" ); + } if (has_utf8) sv_utf8_upgrade(res); str = SvPV(res,len);