From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Sun, 4 Mar 2001 17:24:49 +0000 (+0000)
Subject: Add the \N{U+HHHH} syntax.
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e0a47bd2cb14247750ebc5ca933bb1ba015aa70a;p=p5sagit%2Fp5-mst-13.2.git

Add the \N{U+HHHH} syntax.

p4raw-id: //depot/perl@9014
---

diff --git a/lib/charnames.pm b/lib/charnames.pm
index 875c0a5..1bb89b4 100644
--- a/lib/charnames.pm
+++ b/lib/charnames.pm
@@ -86,6 +86,9 @@ charnames - define character names for C<\N{named}> string literal escape.
   use charnames qw(cyrillic greek);
   print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
 
+  use charnames ...; # either :full or :short will do
+  print "This is Unicode code point \N{U+263A}\n"; # explicit code point
+
 =head1 DESCRIPTION
 
 Pragma C<use charnames> supports arguments C<:full>, C<:short> and
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 122f5ea..5adf241 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -1564,6 +1564,11 @@ to your Perl administrator.
 your logic, or you need to put a conditional in to guard against
 meaningless input.
 
+=item Illegal hexadecimal code on \N{U+...}
+
+(F) You must specify a hexadecimal code for the Unicode codepoint
+after the "U+" inside your "\N{}".
+
 =item Illegal hexadecimal digit %s ignored
 
 (W digit) You may have tried to use a character other than 0 - 9 or
diff --git a/pod/perlretut.pod b/pod/perlretut.pod
index a77b87e..2647076 100644
--- a/pod/perlretut.pod
+++ b/pod/perlretut.pod
@@ -1657,9 +1657,9 @@ or deciphering someone else's hexadecimal Unicode regexp is about as
 much fun as programming in machine code.  So another way to specify
 Unicode characters is to use the S<B<named character> > escape
 sequence C<\N{name}>.  C<name> is a name for the Unicode character, as
-specified in the Unicode standard.  For instance, if we wanted to
-represent or match the astrological sign for the planet Mercury, we
-could use
+specified in the Unicode standard, or "U+" followed by the hexadecimal
+code of the character.  For instance, if we wanted to represent or
+match the astrological sign for the planet Mercury, we could use
 
     use utf8;              # We will be doing Unicode processing
     use charnames ":full"; # use named chars with Unicode full names
diff --git a/t/lib/charnames.t b/t/lib/charnames.t
index 6a8a8be..8ad098e 100644
--- a/t/lib/charnames.t
+++ b/t/lib/charnames.t
@@ -8,7 +8,7 @@ BEGIN {
 }
 
 $| = 1;
-print "1..15\n";
+print "1..16\n";
 
 use charnames ':full';
 
@@ -63,6 +63,7 @@ sub to_bytes {
 
 {
     use charnames ':full';
+
     print "not " unless "\x{263a}" eq "\N{WHITE SMILING FACE}";
     print "ok 6\n";
     print "not " unless length("\x{263a}") == 1;
@@ -81,7 +82,6 @@ sub to_bytes {
 
 {
    use charnames qw(:full);
-   use utf8;
    
     my $x = "\x{221b}";
     my $named = "\N{CUBE ROOT}";
@@ -92,7 +92,7 @@ sub to_bytes {
 
 {
    use charnames qw(:full);
-   use utf8;
+
    print "not " unless "\x{100}\N{CENT SIGN}" eq "\x{100}"."\N{CENT SIGN}";
    print "ok 14\n";
 }
@@ -106,3 +106,12 @@ sub to_bytes {
 
 }
 
+
+{
+  use charnames ':full';
+
+  print "not "
+      unless "\N{U+263A}" eq "\N{WHITE SMILING FACE}";
+  print "ok 16\n";
+}
+
diff --git a/toke.c b/toke.c
index daa0d52..f68eac8 100644
--- a/toke.c
+++ b/toke.c
@@ -1518,7 +1518,7 @@ S_scan_const(pTHX_ char *start)
 
  	    /* \N{latin small letter a} is a named character */
  	    case 'N':
- 		++s;
+ 		s++;
  		if (*s == '{') {
  		    char* e = strchr(s, '}');
  		    SV *res;
@@ -1530,9 +1530,30 @@ S_scan_const(pTHX_ char *start)
 			e = s - 1;
 			goto cont_scan;
 		    }
-		    res = newSVpvn(s + 1, e - s - 1);
-		    res = new_constant( Nullch, 0, "charnames",
-					res, Nullsv, "\\N{...}" );
+		    if (s[1] == 'U' && s[2] == '+') { /* \N{U+HHHH} */
+			STRLEN alen = e - s - 3;
+			STRLEN blen;
+			UV uv = (UV)scan_hex(s + 3, alen, &blen);
+
+			if (blen == alen) {
+			    res = newSVpvn(s, (uv >> 8) + 1); /* filler */
+			    str = (char *)uv_to_utf8((U8*)SvPVX(res), uv);
+			    SvCUR_set(res, str - SvPVX(res));
+			    *str = 0;
+			    if (uv > 0x7f)
+				has_utf8 = TRUE;
+			}
+			else {
+			    yyerror("Illegal hexadecimal code on \\N{U+...}");
+			    e = s - 1;
+			    goto cont_scan;
+			}
+		    }
+		    else {
+			res = newSVpvn(s + 1, e - s - 1);
+			res = new_constant( Nullch, 0, "charnames",
+					    res, Nullsv, "\\N{...}" );
+		    }
 		    if (has_utf8)
 			sv_utf8_upgrade(res);
 		    str = SvPV(res,len);