From: Jarkko Hietaniemi Date: Sun, 10 Feb 2002 02:56:54 +0000 (+0000) Subject: Use the "Unicode 1.0" name field for control characters, X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=2eb5892fff31d60c2828ced5e374168be3df4a62;p=p5sagit%2Fp5-mst-13.2.git Use the "Unicode 1.0" name field for control characters, as suggested by Mark Davis. Note that Unicode 3.2 will require some more legwork on this because of certain alternative names. p4raw-id: //depot/perl@14617 --- diff --git a/lib/unicore/Name.pl b/lib/unicore/Name.pl index 27e37eb..3a753dd 100644 --- a/lib/unicore/Name.pl +++ b/lib/unicore/Name.pl @@ -3,7 +3,38 @@ # Any changes made here will be lost! return <<'END'; -0000 001F +0000 NULL +0001 START OF HEADING +0002 START OF TEXT +0003 END OF TEXT +0004 END OF TRANSMISSION +0005 ENQUIRY +0006 ACKNOWLEDGE +0007 BELL +0008 BACKSPACE +0009 HORIZONTAL TABULATION +000A LINE FEED +000B VERTICAL TABULATION +000C FORM FEED +000D CARRIAGE RETURN +000E SHIFT OUT +000F SHIFT IN +0010 DATA LINK ESCAPE +0011 DEVICE CONTROL ONE +0012 DEVICE CONTROL TWO +0013 DEVICE CONTROL THREE +0014 DEVICE CONTROL FOUR +0015 NEGATIVE ACKNOWLEDGE +0016 SYNCHRONOUS IDLE +0017 END OF TRANSMISSION BLOCK +0018 CANCEL +0019 END OF MEDIUM +001A SUBSTITUTE +001B ESCAPE +001C FILE SEPARATOR +001D GROUP SEPARATOR +001E RECORD SEPARATOR +001F UNIT SEPARATOR 0020 SPACE 0021 EXCLAMATION MARK 0022 QUOTATION MARK @@ -99,7 +130,38 @@ return <<'END'; 007C VERTICAL LINE 007D RIGHT CURLY BRACKET 007E TILDE -007F 009F +007F DELETE +0080 0081 +0082 BREAK PERMITTED HERE +0083 NO BREAK HERE +0084 +0085 NEXT LINE +0086 START OF SELECTED AREA +0087 END OF SELECTED AREA +0088 CHARACTER TABULATION SET +0089 CHARACTER TABULATION WITH JUSTIFICATION +008A LINE TABULATION SET +008B PARTIAL LINE DOWN +008C PARTIAL LINE UP +008D REVERSE LINE FEED +008E SINGLE SHIFT TWO +008F SINGLE SHIFT THREE +0090 DEVICE CONTROL STRING +0091 PRIVATE USE ONE +0092 PRIVATE USE TWO +0093 SET TRANSMIT STATE +0094 CANCEL CHARACTER +0095 MESSAGE WAITING +0096 START OF GUARDED AREA +0097 END OF GUARDED AREA +0098 START OF STRING +0099 +009A SINGLE CHARACTER INTRODUCER +009B CONTROL SEQUENCE INTRODUCER +009C STRING TERMINATOR +009D OPERATING SYSTEM COMMAND +009E PRIVACY MESSAGE +009F APPLICATION PROGRAM COMMAND 00A0 NO-BREAK SPACE 00A1 INVERTED EXCLAMATION MARK 00A2 CENT SIGN diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 5f2cc82..0ca6dcd 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -706,6 +706,11 @@ sub UnicodeData_Txt() $title, ## titlecase mapping ) = split(/\s*;\s*/); + # Note that in Unicode 3.2 there will be names like + # LINE FEED (LF), which probably means that \N{} needs + # to cope also with LINE FEED and LF. + $name = $unicode10 if $name eq '' && $unicode10 ne ''; + my $code = hex($hexcode); if ($comb and $comb == 230) {