Use the "Unicode 1.0" name field for control characters,
Jarkko Hietaniemi [Sun, 10 Feb 2002 02:56:54 +0000 (02:56 +0000)]
as suggested by Mark Davis.  Note that Unicode 3.2 will
require some more legwork on this because of certain
alternative names.

p4raw-id: //depot/perl@14617

lib/unicore/Name.pl
lib/unicore/mktables

index 27e37eb..3a753dd 100644 (file)
@@ -3,7 +3,38 @@
 # Any changes made here will be lost!
 
 return <<'END';
-0000   001F    <control>
+0000           NULL
+0001           START OF HEADING
+0002           START OF TEXT
+0003           END OF TEXT
+0004           END OF TRANSMISSION
+0005           ENQUIRY
+0006           ACKNOWLEDGE
+0007           BELL
+0008           BACKSPACE
+0009           HORIZONTAL TABULATION
+000A           LINE FEED
+000B           VERTICAL TABULATION
+000C           FORM FEED
+000D           CARRIAGE RETURN
+000E           SHIFT OUT
+000F           SHIFT IN
+0010           DATA LINK ESCAPE
+0011           DEVICE CONTROL ONE
+0012           DEVICE CONTROL TWO
+0013           DEVICE CONTROL THREE
+0014           DEVICE CONTROL FOUR
+0015           NEGATIVE ACKNOWLEDGE
+0016           SYNCHRONOUS IDLE
+0017           END OF TRANSMISSION BLOCK
+0018           CANCEL
+0019           END OF MEDIUM
+001A           SUBSTITUTE
+001B           ESCAPE
+001C           FILE SEPARATOR
+001D           GROUP SEPARATOR
+001E           RECORD SEPARATOR
+001F           UNIT SEPARATOR
 0020           SPACE
 0021           EXCLAMATION MARK
 0022           QUOTATION MARK
@@ -99,7 +130,38 @@ return <<'END';
 007C           VERTICAL LINE
 007D           RIGHT CURLY BRACKET
 007E           TILDE
-007F   009F    <control>
+007F           DELETE
+0080   0081    <control>
+0082           BREAK PERMITTED HERE
+0083           NO BREAK HERE
+0084           <control>
+0085           NEXT LINE
+0086           START OF SELECTED AREA
+0087           END OF SELECTED AREA
+0088           CHARACTER TABULATION SET
+0089           CHARACTER TABULATION WITH JUSTIFICATION
+008A           LINE TABULATION SET
+008B           PARTIAL LINE DOWN
+008C           PARTIAL LINE UP
+008D           REVERSE LINE FEED
+008E           SINGLE SHIFT TWO
+008F           SINGLE SHIFT THREE
+0090           DEVICE CONTROL STRING
+0091           PRIVATE USE ONE
+0092           PRIVATE USE TWO
+0093           SET TRANSMIT STATE
+0094           CANCEL CHARACTER
+0095           MESSAGE WAITING
+0096           START OF GUARDED AREA
+0097           END OF GUARDED AREA
+0098           START OF STRING
+0099           <control>
+009A           SINGLE CHARACTER INTRODUCER
+009B           CONTROL SEQUENCE INTRODUCER
+009C           STRING TERMINATOR
+009D           OPERATING SYSTEM COMMAND
+009E           PRIVACY MESSAGE
+009F           APPLICATION PROGRAM COMMAND
 00A0           NO-BREAK SPACE
 00A1           INVERTED EXCLAMATION MARK
 00A2           CENT SIGN
index 5f2cc82..0ca6dcd 100644 (file)
@@ -706,6 +706,11 @@ sub UnicodeData_Txt()
             $title,     ## titlecase mapping
               ) = split(/\s*;\s*/);
 
+       # Note that in Unicode 3.2 there will be names like
+       # LINE FEED (LF), which probably means that \N{} needs
+       # to cope also with LINE FEED and LF.
+       $name = $unicode10 if $name eq '<control>' && $unicode10 ne '';
+
         my $code = hex($hexcode);
 
         if ($comb and $comb == 230) {