From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Sat, 27 Oct 2001 16:47:07 +0000 (+0000)
Subject: Unicode: property alias naming cleanup.
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e150c829b97ab41f324abbc8734f50bd5f4a4838;p=p5sagit%2Fp5-mst-13.2.git

Unicode: property alias naming cleanup.

p4raw-id: //depot/perl@12707
---

diff --git a/lib/unicore/Is.pl b/lib/unicore/Is.pl
index 6ee87e3..86e5926 100644
--- a/lib/unicore/Is.pl
+++ b/lib/unicore/Is.pl
@@ -3,48 +3,43 @@
 # Any changes made here will be lost!
 %utf8::Is =
 (
-'Close Punctuation'                           => 'Pe',
-'Connector Punctuation'                       => 'Pc',
+'Close_Punctuation'                           => 'Pe',
+'Connector_Punctuation'                       => 'Pc',
 'Control'                                     => 'Cc',
-'Currency Symbol'                             => 'Sc',
-'Dash Punctuation'                            => 'Pd',
-'Decimal Digit Number'                        => 'Nd',
-'Enclosing Mark'                              => 'Me',
-'Final Punctuation'                           => 'Pf',
+'Currency_Symbol'                             => 'Sc',
+'Dash_Punctuation'                            => 'Pd',
+'Decimal_Number'                              => 'Nd',
+'Enclosing_Mark'                              => 'Me',
+'Final_Punctuation'                           => 'Pf',
 'Format'                                      => 'Cf',
-'Initial Punctuation'                         => 'Pi',
+'Initial_Punctuation'                         => 'Pi',
 'Letter'                                      => 'L',
-'Letter Number'                               => 'Nl',
-'Line Separator'                              => 'Zl',
-'Lowercase Letter'                            => 'Ll',
+'Letter_Number'                               => 'Nl',
+'Line_Separator'                              => 'Zl',
+'Lowercase_Letter'                            => 'Ll',
 'Mark'                                        => 'M',
-'Math Symbol'                                 => 'Sm',
-'Modifier Letter'                             => 'Lm',
-'Modifier Symbol'                             => 'Sk',
-'Non-Spacing Mark'                            => 'Mn',
-'Not Assigned'                                => 'Cn',
+'Math_Symbol'                                 => 'Sm',
+'Modifier_Letter'                             => 'Lm',
+'Modifier_Symbol'                             => 'Sk',
+'Non_Spacing_Mark'                            => 'Mn',
 'Number'                                      => 'N',
-'Open Punctuation'                            => 'Ps',
+'Open_Punctuation'                            => 'Ps',
 'Other'                                       => 'C',
-'Other Control'                               => 'Cc',
-'Other Format'                                => 'Cf',
-'Other Letter'                                => 'Lo',
-'Other Not Assigned'                          => 'Cn',
-'Other Number'                                => 'No',
-'Other Private Use'                           => 'Co',
-'Other Punctuation'                           => 'Po',
-'Other Surrogate'                             => 'Cs',
-'Other Symbol'                                => 'So',
-'Paragraph Separator'                         => 'Zp',
+'Other_Letter'                                => 'Lo',
+'Other_Number'                                => 'No',
+'Other_Punctuation'                           => 'Po',
+'Other_Symbol'                                => 'So',
+'Paragraph_Separator'                         => 'Zp',
 'Private Use'                                 => 'Co',
 'Punctuation'                                 => 'P',
 'Separator'                                   => 'Z',
-'Space Separator'                             => 'Zs',
-'Spacing Combining Mark'                      => 'Mc',
+'Space_Separator'                             => 'Zs',
+'Spacing_Mark'                                => 'Mc',
 'Surrogate'                                   => 'Cs',
 'Symbol'                                      => 'S',
-'Titlecase Letter'                            => 'Lt',
-'Uppercase Letter'                            => 'Lu',
+'Titlecase_Letter'                            => 'Lt',
+'Unassigned'                                  => 'Cn',
+'Uppercase_Letter'                            => 'Lu',
 );
 %utf8::IsPat =
 (
@@ -62,7 +57,7 @@
 	'Dash(?:[-_]|\s+)?Punctuation' => 'Pd',
 },
 'de' => {
-	'Decimal(?:[-_]|\s+)?Digit(?:[-_]|\s+)?Number' => 'Nd',
+	'Decimal(?:[-_]|\s+)?Number' => 'Nd',
 },
 'en' => {
 	'Enclosing(?:[-_]|\s+)?Mark' => 'Me',
@@ -96,7 +91,6 @@
 },
 'no' => {
 	'Non(?:[-_]|\s+)?Spacing(?:[-_]|\s+)?Mark' => 'Mn',
-	'Not(?:[-_]|\s+)?Assigned' => 'Cn',
 },
 'nu' => {
 	'Number' => 'N',
@@ -106,14 +100,9 @@
 },
 'ot' => {
 	'Other' => 'C',
-	'Other(?:[-_]|\s+)?Control' => 'Cc',
-	'Other(?:[-_]|\s+)?Format' => 'Cf',
 	'Other(?:[-_]|\s+)?Letter' => 'Lo',
-	'Other(?:[-_]|\s+)?Not(?:[-_]|\s+)?Assigned' => 'Cn',
 	'Other(?:[-_]|\s+)?Number' => 'No',
-	'Other(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use' => 'Co',
 	'Other(?:[-_]|\s+)?Punctuation' => 'Po',
-	'Other(?:[-_]|\s+)?Surrogate' => 'Cs',
 	'Other(?:[-_]|\s+)?Symbol' => 'So',
 },
 'pa' => {
@@ -130,7 +119,7 @@
 },
 'sp' => {
 	'Space(?:[-_]|\s+)?Separator' => 'Zs',
-	'Spacing(?:[-_]|\s+)?Combining(?:[-_]|\s+)?Mark' => 'Mc',
+	'Spacing(?:[-_]|\s+)?Mark' => 'Mc',
 },
 'su' => {
 	'Surrogate' => 'Cs',
@@ -141,6 +130,9 @@
 'ti' => {
 	'Titlecase(?:[-_]|\s+)?Letter' => 'Lt',
 },
+'un' => {
+	'Unassigned' => 'Cn',
+},
 'up' => {
 	'Uppercase(?:[-_]|\s+)?Letter' => 'Lu',
 },
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 66027a5..676e189 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -658,54 +658,48 @@ foreach my $in (sort { $In{$a} <=> $In{$b} } keys %In) {
 
 my %Is = (
 	'Letter'			=>	'L',
-	'Uppercase Letter'		=>	'Lu',
-	'Lowercase Letter'		=>	'Ll',
-	'Titlecase Letter'		=>	'Lt',
-	'Modifier Letter'		=>	'Lm',
-	'Other Letter'			=>	'Lo',
+	'Uppercase_Letter'		=>	'Lu',
+	'Lowercase_Letter'		=>	'Ll',
+	'Titlecase_Letter'		=>	'Lt',
+	'Modifier_Letter'		=>	'Lm',
+	'Other_Letter'			=>	'Lo',
 
 	'Mark'				=>	'M',
-	'Non-Spacing Mark'		=>	'Mn',
-	'Spacing Combining Mark'	=>	'Mc',
-	'Enclosing Mark'		=>	'Me',
+	'Non_Spacing_Mark'		=>	'Mn',
+	'Spacing_Mark'			=>	'Mc',
+	'Enclosing_Mark'		=>	'Me',
 
 	'Separator'			=>	'Z',
-	'Space Separator'		=>	'Zs',
-	'Line Separator'		=>	'Zl',
-	'Paragraph Separator'		=>	'Zp',
+	'Space_Separator'		=>	'Zs',
+	'Line_Separator'		=>	'Zl',
+	'Paragraph_Separator'		=>	'Zp',
 
 	'Number'			=>	'N',
-	'Decimal Digit Number'		=>	'Nd',
-	'Letter Number'			=>	'Nl',
-	'Other Number'			=>	'No',
+	'Decimal_Number'		=>	'Nd',
+	'Letter_Number'			=>	'Nl',
+	'Other_Number'			=>	'No',
 
 	'Punctuation'			=>	'P',
-	'Connector Punctuation'		=>	'Pc',
-	'Dash Punctuation'		=>	'Pd',
-	'Open Punctuation'		=>	'Ps',
-	'Close Punctuation'		=>	'Pe',
-	'Initial Punctuation'		=>	'Pi',
-	'Final Punctuation'		=>	'Pf',
-	'Other Punctuation'		=>	'Po',
+	'Connector_Punctuation'		=>	'Pc',
+	'Dash_Punctuation'		=>	'Pd',
+	'Open_Punctuation'		=>	'Ps',
+	'Close_Punctuation'		=>	'Pe',
+	'Initial_Punctuation'		=>	'Pi',
+	'Final_Punctuation'		=>	'Pf',
+	'Other_Punctuation'		=>	'Po',
 
 	'Symbol'			=>	'S',
-	'Math Symbol'			=>	'Sm',
-	'Currency Symbol'		=>	'Sc',
-	'Modifier Symbol'		=>	'Sk',
-	'Other Symbol'			=>	'So',
+	'Math_Symbol'			=>	'Sm',
+	'Currency_Symbol'		=>	'Sc',
+	'Modifier_Symbol'		=>	'Sk',
+	'Other_Symbol'			=>	'So',
 
 	'Other'				=>	'C',
 	'Control'			=>	'Cc',
 	'Format'			=>	'Cf',
 	'Surrogate'			=>	'Cs',
 	'Private Use'			=>	'Co',
-	'Not Assigned'			=>	'Cn',
-	# 'Other' aliases
-	'Other Control'			=>	'Cc',
-	'Other Format'			=>	'Cf',
-	'Other Surrogate'		=>	'Cs',
-	'Other Private Use'		=>	'Co',
-	'Other Not Assigned'		=>	'Cn',
+	'Unassigned'			=>	'Cn',
 );
 
 #
diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod
index 37e2f22..0b52afa 100644
--- a/pod/perlunicode.pod
+++ b/pod/perlunicode.pod
@@ -173,85 +173,85 @@ are available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>.
 The C<\p{Is...}> test for "general properties" such as "letter",
 "digit", while the C<\p{In...}> test for Unicode scripts and blocks.
 
-The official Unicode script and block names have spaces and
-dashes and separators, but for convenience you can have
-dashes, spaces, and underbars at every word division, and
-you need not care about correct casing.  It is recommended,
-however, that for consistency you use the following naming:
-the official Unicode script or block name (see below for
-the additional rules that apply to block names), with the whitespace
-and dashes removed, and the words "uppercase-first-lowercase-otherwise".
-That is, "Latin-1 Supplement" becomes "Latin1Supplement".
+The official Unicode script and block names have spaces and dashes and
+separators, but for convenience you can have dashes, spaces, and
+underbars at every word division, and you need not care about correct
+casing.  It is recommended, however, that for consistency you use the
+following naming: the official Unicode script, block, or property name
+(see below for the additional rules that apply to block names),
+with whitespace and dashes replaced with underbar, and the words
+"uppercase-first-lowercase-rest".  That is, "Latin-1 Supplement"
+becomes "Latin_1_Supplement".
 
 You can also negate both C<\p{}> and C<\P{}> by introducing a caret
-(^) between the first curly and the property name: C<\p{^InTamil}> is
-equal to C<\P{InTamil}>.
+(^) between the first curly and the property name: C<\p{^In_Tamil}> is
+equal to C<\P{In_Tamil}>.
 
 The C<In> and C<Is> can be left out: C<\p{Greek}> is equal to
-C<\p{InGreek}>, C<\P{Pd}> is equal to C<\P{Pd}>.
+C<\p{In_Greek}>, C<\P{Pd}> is equal to C<\P{Pd}>.
 
     Short       Long
 
     L           Letter
-    Lu          Uppercase Letter
-    Ll          Lowercase Letter
-    Lt          Titlecase Letter
-    Lm          Modifier Letter
-    Lo          Other Letter
+    Lu          Uppercase_Letter
+    Ll          Lowercase_Letter
+    Lt          Titlecase_Letter
+    Lm          Modifier_Letter
+    Lo          Other_Letter
 
     M           Mark
-    Mn          Non-Spacing Mark
-    Mc          Spacing Combining Mark
-    Me          Enclosing Mark
+    Mn          Nonspacing_Mark
+    Mc          Spacing_Mark
+    Me          Enclosing_Mark
 
     N           Number
-    Nd          Decimal Digit Number
-    Nl          Letter Number
-    No          Other Number
+    Nd          Decimal_Number
+    Nl          Letter_Number
+    No          Other_Number
 
     P           Punctuation
-    Pc          Connector Punctuation
-    Pd          Dash Punctuation
-    Ps          Open Punctuation
-    Pe          Close Punctuation
-    Pi          Initial Punctuation
+    Pc          Connector_Punctuation
+    Pd          Dash_Punctuation
+    Ps          Open_Punctuation
+    Pe          Close_Punctuation
+    Pi          Initial_Punctuation
                 (may behave like Ps or Pe depending on usage)
-    Pf          Final Punctuation
+    Pf          Final_Punctuation
                 (may behave like Ps or Pe depending on usage)
-    Po          Other Punctuation
+    Po          Other_Punctuation
 
     S           Symbol
-    Sm          Math Symbol
-    Sc          Currency Symbol
-    Sk          Modifier Symbol
-    So          Other Symbol
+    Sm          Math_Symbol
+    Sc          Currency_Symbol
+    Sk          Modifier_Symbol
+    So          Other_Symbol
 
     Z           Separator
-    Zs          Space Separator
-    Zl          Line Separator
-    Zp          Paragraph Separator
+    Zs          Space_Separator
+    Zl          Line_Separator
+    Zp          Paragraph_Separator
 
     C           Other
-    Cc          (Other) Control
-    Cf          (Other) Format
-    Cs          (Other) Surrogate
-    Co          (Other) Private Use
-    Cn          (Other) Not Assigned
+    Cc          Control
+    Cf          Format
+    Cs          Surrogate
+    Co          Private_Use
+    Cn          Unassigned
 
 There's also C<L&> which is an alias for C<Ll>, C<Lu>, and C<Lt>.
 
 The following reserved ranges have C<In> tests:
 
-    CJK Ideograph Extension A
-    CJK Ideograph
-    Hangul Syllable
-    Non Private Use High Surrogate
-    Private Use High Surrogate
-    Low Surrogate
-    Private Surrogate
-    CJK Ideograph Extension B
-    Plane 15 Private Use
-    Plane 16 Private Use
+    CJK_Ideograph_Extension_A
+    CJK_Ideograph
+    Hangul_Syllable
+    Non_Private_Use_High_Surrogate
+    Private_Use_High_Surrogate
+    Low_Surrogate
+    Private_Surrogate
+    CJK_Ideograph_Extension_B
+    Plane_15_Private_Use
+    Plane_16_Private_Use
 
 For example C<"\x{AC00}" =~ \p{HangulSyllable}> will test true.
 (Handling of surrogates is not implemented yet, because Perl
@@ -345,7 +345,7 @@ properties, defined by the F<PropList> Unicode database:
     Other_Math
     Other_Uppercase
     Quotation_Mark
-    White_space
+    White_Space
 
 and further derived properties:
 
@@ -360,7 +360,7 @@ and further derived properties:
     Any             Any character
     Assigned        Any non-Cn character
     Common          Any character (or unassigned code point)
-                    not explicitly assigned to a script.
+                    not explicitly assigned to a script
 
 =head2 Blocks
 
@@ -385,7 +385,7 @@ a script called C<Katakana> and a block called C<Katakana>, the block
 version has C<Block> appended to its name, C<\p{InKatakanaBlock}>.
 
 Notice that this definition was introduced in Perl 5.8.0: in Perl
-5.6.0 only the blocks were used; in Perl 5.8.0 scripts became the
+5.6 only the blocks were used; in Perl 5.8.0 scripts became the
 preferential Unicode character class definition; this meant that
 the definitions of some character classes changed (the ones in the
 below list that have the C<Block> appended).