From: Jarkko Hietaniemi Date: Thu, 18 Oct 2001 02:24:52 +0000 (+0000) Subject: Prettyprinting. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=67765ba60fe5f14f3000bcc6923b10c40ab8b513;p=p5sagit%2Fp5-mst-13.2.git Prettyprinting. p4raw-id: //depot/perl@12489 --- diff --git a/lib/unicore/In.pl b/lib/unicore/In.pl index cd872fa..8d59516 100644 --- a/lib/unicore/In.pl +++ b/lib/unicore/In.pl @@ -3,178 +3,178 @@ # Any changes made here will be lost! %utf8::In = ( -'ARABIC' => '16', -'ARMENIAN' => '14', -'ASCII_Hex_Digit' => '152', -'Alphabetic' => '164', -'Alphabetic Presentation Forms' => '129', -'Any' => '171', -'Arabic Block' => '62', -'Arabic Presentation Forms-A' => '130', -'Arabic Presentation Forms-B' => '134', -'Armenian Block' => '60', -'Arrows' => '96', -'Assigned' => '163', -'BENGALI' => '20', -'BOPOMOFO' => '45', -'Basic Latin' => '51', -'Bengali Block' => '66', -'Bidi_Control' => '159', -'Block Elements' => '103', -'Bopomofo Block' => '114', -'Bopomofo Extended' => '117', -'Box Drawing' => '102', -'Braille Patterns' => '107', -'Byzantine Musical Symbols' => '140', -'CANADIAN-ABORIGINAL' => '37', -'CHEROKEE' => '36', -'CJK Compatibility' => '119', -'CJK Compatibility Forms' => '132', -'CJK Compatibility Ideographs' => '128', -'CJK Compatibility Ideographs Supplement' => '144', -'CJK Ideograph' => '1', -'CJK Ideograph Extension A' => '0', -'CJK Ideograph Extension B' => '7', -'CJK Radicals Supplement' => '108', -'CJK Symbols and Punctuation' => '111', -'CJK Unified Ideographs' => '121', -'CJK Unified Ideographs Extension A' => '120', -'CJK Unified Ideographs Extension B' => '143', -'CYRILLIC' => '13', -'Cherokee Block' => '82', -'Combining Diacritical Marks' => '57', -'Combining Half Marks' => '131', -'Combining Marks for Symbols' => '93', -'Common' => '50', -'Control Pictures' => '99', -'Currency Symbols' => '92', -'Cyrillic Block' => '59', -'DESERET' => '49', -'DEVANAGARI' => '19', -'Dash' => '151', -'Deseret Block' => '139', -'Devanagari Block' => '65', -'Diacritic' => '154', -'Dingbats' => '106', -'ETHIOPIC' => '35', -'Enclosed Alphanumerics' => '101', -'Enclosed CJK Letters and Months' => '118', -'Ethiopic Block' => '81', -'Extender' => '155', -'GEORGIAN' => '33', -'GOTHIC' => '48', -'GREEK' => '11', -'GUJARATI' => '22', -'GURMUKHI' => '21', -'General Punctuation' => '90', -'Geometric Shapes' => '104', -'Georgian Block' => '79', -'Gothic Block' => '138', -'Greek Block' => '58', -'Greek Extended' => '89', -'Gujarati Block' => '68', -'Gurmukhi Block' => '67', -'HAN' => '42', -'HANGUL' => '34', -'HEBREW' => '15', -'HIRAGANA' => '43', -'Halfwidth and Fullwidth Forms' => '136', -'Hangul Compatibility Jamo' => '115', -'Hangul Jamo' => '80', -'Hangul Syllable' => '2', -'Hangul Syllables' => '124', -'Hebrew Block' => '61', -'Hex_Digit' => '153', -'High Private Use Surrogates' => '126', -'High Surrogates' => '125', -'Hiragana Block' => '112', -'Hyphen' => '150', -'ID_Continue' => '170', -'ID_Start' => '169', -'INHERITED' => '12', -'IPA Extensions' => '55', -'Ideographic' => '161', -'Ideographic Description Characters' => '110', -'Join_Control' => '158', -'KANNADA' => '26', -'KATAKANA' => '44', -'KHMER' => '40', -'Kanbun' => '116', -'Kangxi Radicals' => '109', -'Kannada Block' => '72', -'Katakana Block' => '113', -'Khmer Block' => '86', -'LAO' => '30', -'LATIN' => '10', -'Lampersand' => '168', -'Lao Block' => '76', -'Latin Extended Additional' => '88', -'Latin Extended-A' => '53', -'Latin Extended-B' => '54', -'Latin-1 Supplement' => '52', -'Letterlike Symbols' => '94', -'Low Surrogate' => '5', -'Low Surrogates' => '127', -'Lowercase' => '165', -'MALAYALAM' => '27', -'MONGOLIAN' => '41', -'MYANMAR' => '32', -'Malayalam Block' => '73', -'Math' => '167', -'Mathematical Alphanumeric Symbols' => '142', -'Mathematical Operators' => '97', -'Miscellaneous Symbols' => '105', -'Miscellaneous Technical' => '98', -'Mongolian Block' => '87', -'Musical Symbols' => '141', -'Myanmar Block' => '78', -'Non Private Use High Surrogate' => '3', -'Noncharacter_Code_Point' => '162', -'Number Forms' => '95', -'OGHAM' => '38', -'OLD-ITALIC' => '47', -'ORIYA' => '23', -'Ogham Block' => '84', -'Old Italic' => '137', -'Optical Character Recognition' => '100', -'Oriya Block' => '69', -'Other_Alphabetic' => '157', -'Other_Lowercase' => '156', -'Other_Math' => '149', -'Other_Uppercase' => '160', -'Plane 15 Private Use' => '8', -'Plane 16 Private Use' => '9', -'Private Use' => '6', -'Private Use High Surrogate' => '4', -'Quotation_Mark' => '148', -'RUNIC' => '39', -'Runic Block' => '85', -'SINHALA' => '28', -'SYRIAC' => '17', -'Sinhala Block' => '74', -'Small Form Variants' => '133', -'Spacing Modifier Letters' => '56', -'Specials' => '135', -'Superscripts and Subscripts' => '91', -'Syriac Block' => '63', -'TAMIL' => '24', -'TELUGU' => '25', -'THAANA' => '18', -'THAI' => '29', -'TIBETAN' => '31', -'Tags' => '145', -'Tamil Block' => '70', -'Telugu Block' => '71', -'Terminal_Punctuation' => '147', -'Thaana Block' => '64', -'Thai Block' => '75', -'Tibetan Block' => '77', -'Unified Canadian Aboriginal Syllabics' => '83', -'Uppercase' => '166', -'White_space' => '146', -'YI' => '46', -'Yi Radicals' => '123', -'Yi Syllables' => '122', +'Alphabetic' => '164', +'Alphabetic Presentation Forms' => '129', +'Any' => '171', +'ARABIC' => '16', +'Arabic Block' => '62', +'Arabic Presentation Forms-A' => '130', +'Arabic Presentation Forms-B' => '134', +'ARMENIAN' => '14', +'Armenian Block' => '60', +'Arrows' => '96', +'ASCII_Hex_Digit' => '152', +'Assigned' => '163', +'Basic Latin' => '51', +'BENGALI' => '20', +'Bengali Block' => '66', +'Bidi_Control' => '159', +'Block Elements' => '103', +'BOPOMOFO' => '45', +'Bopomofo Block' => '114', +'Bopomofo Extended' => '117', +'Box Drawing' => '102', +'Braille Patterns' => '107', +'Byzantine Musical Symbols' => '140', +'CANADIAN-ABORIGINAL' => '37', +'CHEROKEE' => '36', +'Cherokee Block' => '82', +'CJK Compatibility' => '119', +'CJK Compatibility Forms' => '132', +'CJK Compatibility Ideographs' => '128', +'CJK Compatibility Ideographs Supplement' => '144', +'CJK Ideograph' => '1', +'CJK Ideograph Extension A' => '0', +'CJK Ideograph Extension B' => '7', +'CJK Radicals Supplement' => '108', +'CJK Symbols and Punctuation' => '111', +'CJK Unified Ideographs' => '121', +'CJK Unified Ideographs Extension A' => '120', +'CJK Unified Ideographs Extension B' => '143', +'Combining Diacritical Marks' => '57', +'Combining Half Marks' => '131', +'Combining Marks for Symbols' => '93', +'Common' => '50', +'Control Pictures' => '99', +'Currency Symbols' => '92', +'CYRILLIC' => '13', +'Cyrillic Block' => '59', +'Dash' => '151', +'DESERET' => '49', +'Deseret Block' => '139', +'DEVANAGARI' => '19', +'Devanagari Block' => '65', +'Diacritic' => '154', +'Dingbats' => '106', +'Enclosed Alphanumerics' => '101', +'Enclosed CJK Letters and Months' => '118', +'ETHIOPIC' => '35', +'Ethiopic Block' => '81', +'Extender' => '155', +'General Punctuation' => '90', +'Geometric Shapes' => '104', +'GEORGIAN' => '33', +'Georgian Block' => '79', +'GOTHIC' => '48', +'Gothic Block' => '138', +'GREEK' => '11', +'Greek Block' => '58', +'Greek Extended' => '89', +'GUJARATI' => '22', +'Gujarati Block' => '68', +'GURMUKHI' => '21', +'Gurmukhi Block' => '67', +'Halfwidth and Fullwidth Forms' => '136', +'HAN' => '42', +'HANGUL' => '34', +'Hangul Compatibility Jamo' => '115', +'Hangul Jamo' => '80', +'Hangul Syllable' => '2', +'Hangul Syllables' => '124', +'HEBREW' => '15', +'Hebrew Block' => '61', +'Hex_Digit' => '153', +'High Private Use Surrogates' => '126', +'High Surrogates' => '125', +'HIRAGANA' => '43', +'Hiragana Block' => '112', +'Hyphen' => '150', +'ID_Continue' => '170', +'ID_Start' => '169', +'Ideographic' => '161', +'Ideographic Description Characters' => '110', +'INHERITED' => '12', +'IPA Extensions' => '55', +'Join_Control' => '158', +'Kanbun' => '116', +'Kangxi Radicals' => '109', +'KANNADA' => '26', +'Kannada Block' => '72', +'KATAKANA' => '44', +'Katakana Block' => '113', +'KHMER' => '40', +'Khmer Block' => '86', +'Lampersand' => '168', +'LAO' => '30', +'Lao Block' => '76', +'LATIN' => '10', +'Latin Extended Additional' => '88', +'Latin Extended-A' => '53', +'Latin Extended-B' => '54', +'Latin-1 Supplement' => '52', +'Letterlike Symbols' => '94', +'Low Surrogate' => '5', +'Low Surrogates' => '127', +'Lowercase' => '165', +'MALAYALAM' => '27', +'Malayalam Block' => '73', +'Math' => '167', +'Mathematical Alphanumeric Symbols' => '142', +'Mathematical Operators' => '97', +'Miscellaneous Symbols' => '105', +'Miscellaneous Technical' => '98', +'MONGOLIAN' => '41', +'Mongolian Block' => '87', +'Musical Symbols' => '141', +'MYANMAR' => '32', +'Myanmar Block' => '78', +'Non Private Use High Surrogate' => '3', +'Noncharacter_Code_Point' => '162', +'Number Forms' => '95', +'OGHAM' => '38', +'Ogham Block' => '84', +'Old Italic' => '137', +'OLD-ITALIC' => '47', +'Optical Character Recognition' => '100', +'ORIYA' => '23', +'Oriya Block' => '69', +'Other_Alphabetic' => '157', +'Other_Lowercase' => '156', +'Other_Math' => '149', +'Other_Uppercase' => '160', +'Plane 15 Private Use' => '8', +'Plane 16 Private Use' => '9', +'Private Use' => '6', +'Private Use High Surrogate' => '4', +'Quotation_Mark' => '148', +'RUNIC' => '39', +'Runic Block' => '85', +'SINHALA' => '28', +'Sinhala Block' => '74', +'Small Form Variants' => '133', +'Spacing Modifier Letters' => '56', +'Specials' => '135', +'Superscripts and Subscripts' => '91', +'SYRIAC' => '17', +'Syriac Block' => '63', +'Tags' => '145', +'TAMIL' => '24', +'Tamil Block' => '70', +'TELUGU' => '25', +'Telugu Block' => '71', +'Terminal_Punctuation' => '147', +'THAANA' => '18', +'Thaana Block' => '64', +'THAI' => '29', +'Thai Block' => '75', +'TIBETAN' => '31', +'Tibetan Block' => '77', +'Unified Canadian Aboriginal Syllabics' => '83', +'Uppercase' => '166', +'White_space' => '146', +'YI' => '46', +'Yi Radicals' => '123', +'Yi Syllables' => '122', ); %utf8::InPat = ( @@ -187,10 +187,10 @@ }, 'ar' => { 'ARABIC' => '16', - 'ARMENIAN' => '14', 'Arabic(?:[-_]|\s+)?Block' => '62', 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => '130', 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => '134', + 'ARMENIAN' => '14', 'Armenian(?:[-_]|\s+)?Block' => '60', 'Arrows' => '96', }, @@ -263,8 +263,8 @@ }, 'de' => { 'DESERET' => '49', - 'DEVANAGARI' => '19', 'Deseret(?:[-_]|\s+)?Block' => '139', + 'DEVANAGARI' => '19', 'Devanagari(?:[-_]|\s+)?Block' => '65', }, 'di' => { @@ -283,9 +283,9 @@ 'Extender' => '155', }, 'ge' => { - 'GEORGIAN' => '33', 'General(?:[-_]|\s+)?Punctuation' => '90', 'Geometric(?:[-_]|\s+)?Shapes' => '104', + 'GEORGIAN' => '33', 'Georgian(?:[-_]|\s+)?Block' => '79', }, 'go' => { @@ -299,14 +299,14 @@ }, 'gu' => { 'GUJARATI' => '22', - 'GURMUKHI' => '21', 'Gujarati(?:[-_]|\s+)?Block' => '68', + 'GURMUKHI' => '21', 'Gurmukhi(?:[-_]|\s+)?Block' => '67', }, 'ha' => { + 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => '136', 'HAN' => '42', 'HANGUL' => '34', - 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => '136', 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => '115', 'Hangul(?:[-_]|\s+)?Jamo' => '80', 'Hangul(?:[-_]|\s+)?Syllable' => '2', @@ -318,9 +318,9 @@ 'Hex(?:[-_]|\s+)?Digit' => '153', }, 'hi' => { - 'HIRAGANA' => '43', 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => '126', 'High(?:[-_]|\s+)?Surrogates' => '125', + 'HIRAGANA' => '43', 'Hiragana(?:[-_]|\s+)?Block' => '112', }, 'hy' => { @@ -342,11 +342,11 @@ 'Join(?:[-_]|\s+)?Control' => '158', }, 'ka' => { - 'KANNADA' => '26', - 'KATAKANA' => '44', 'Kanbun' => '116', 'Kangxi(?:[-_]|\s+)?Radicals' => '109', + 'KANNADA' => '26', 'Kannada(?:[-_]|\s+)?Block' => '72', + 'KATAKANA' => '44', 'Katakana(?:[-_]|\s+)?Block' => '113', }, 'kh' => { @@ -354,10 +354,10 @@ 'Khmer(?:[-_]|\s+)?Block' => '86', }, 'la' => { - 'LAO' => '30', - 'LATIN' => '10', 'Lampersand' => '168', + 'LAO' => '30', 'Lao(?:[-_]|\s+)?Block' => '76', + 'LATIN' => '10', 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => '88', 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => '53', 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => '54', @@ -405,8 +405,8 @@ 'Ogham(?:[-_]|\s+)?Block' => '84', }, 'ol' => { - 'OLD(?:[-_]|\s+)?ITALIC' => '47', 'Old(?:[-_]|\s+)?Italic' => '137', + 'OLD(?:[-_]|\s+)?ITALIC' => '47', }, 'op' => { 'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => '100', @@ -455,8 +455,8 @@ 'Syriac(?:[-_]|\s+)?Block' => '63', }, 'ta' => { - 'TAMIL' => '24', 'Tags' => '145', + 'TAMIL' => '24', 'Tamil(?:[-_]|\s+)?Block' => '70', }, 'te' => { @@ -466,8 +466,8 @@ }, 'th' => { 'THAANA' => '18', - 'THAI' => '29', 'Thaana(?:[-_]|\s+)?Block' => '64', + 'THAI' => '29', 'Thai(?:[-_]|\s+)?Block' => '75', }, 'ti' => { diff --git a/lib/unicore/Is.pl b/lib/unicore/Is.pl index ef99c29..6ee87e3 100644 --- a/lib/unicore/Is.pl +++ b/lib/unicore/Is.pl @@ -3,48 +3,48 @@ # Any changes made here will be lost! %utf8::Is = ( -'Close Punctuation' => 'Pe', -'Connector Punctuation' => 'Pc', -'Control' => 'Cc', -'Currency Symbol' => 'Sc', -'Dash Punctuation' => 'Pd', -'Decimal Digit Number' => 'Nd', -'Enclosing Mark' => 'Me', -'Final Punctuation' => 'Pf', -'Format' => 'Cf', -'Initial Punctuation' => 'Pi', -'Letter' => 'L', -'Letter Number' => 'Nl', -'Line Separator' => 'Zl', -'Lowercase Letter' => 'Ll', -'Mark' => 'M', -'Math Symbol' => 'Sm', -'Modifier Letter' => 'Lm', -'Modifier Symbol' => 'Sk', -'Non-Spacing Mark' => 'Mn', -'Not Assigned' => 'Cn', -'Number' => 'N', -'Open Punctuation' => 'Ps', -'Other' => 'C', -'Other Control' => 'Cc', -'Other Format' => 'Cf', -'Other Letter' => 'Lo', -'Other Not Assigned' => 'Cn', -'Other Number' => 'No', -'Other Private Use' => 'Co', -'Other Punctuation' => 'Po', -'Other Surrogate' => 'Cs', -'Other Symbol' => 'So', -'Paragraph Separator' => 'Zp', -'Private Use' => 'Co', -'Punctuation' => 'P', -'Separator' => 'Z', -'Space Separator' => 'Zs', -'Spacing Combining Mark' => 'Mc', -'Surrogate' => 'Cs', -'Symbol' => 'S', -'Titlecase Letter' => 'Lt', -'Uppercase Letter' => 'Lu', +'Close Punctuation' => 'Pe', +'Connector Punctuation' => 'Pc', +'Control' => 'Cc', +'Currency Symbol' => 'Sc', +'Dash Punctuation' => 'Pd', +'Decimal Digit Number' => 'Nd', +'Enclosing Mark' => 'Me', +'Final Punctuation' => 'Pf', +'Format' => 'Cf', +'Initial Punctuation' => 'Pi', +'Letter' => 'L', +'Letter Number' => 'Nl', +'Line Separator' => 'Zl', +'Lowercase Letter' => 'Ll', +'Mark' => 'M', +'Math Symbol' => 'Sm', +'Modifier Letter' => 'Lm', +'Modifier Symbol' => 'Sk', +'Non-Spacing Mark' => 'Mn', +'Not Assigned' => 'Cn', +'Number' => 'N', +'Open Punctuation' => 'Ps', +'Other' => 'C', +'Other Control' => 'Cc', +'Other Format' => 'Cf', +'Other Letter' => 'Lo', +'Other Not Assigned' => 'Cn', +'Other Number' => 'No', +'Other Private Use' => 'Co', +'Other Punctuation' => 'Po', +'Other Surrogate' => 'Cs', +'Other Symbol' => 'So', +'Paragraph Separator' => 'Zp', +'Private Use' => 'Co', +'Punctuation' => 'P', +'Separator' => 'Z', +'Space Separator' => 'Zs', +'Spacing Combining Mark' => 'Mc', +'Surrogate' => 'Cs', +'Symbol' => 'S', +'Titlecase Letter' => 'Lt', +'Uppercase Letter' => 'Lu', ); %utf8::IsPat = ( diff --git a/lib/unicore/mktables b/lib/unicore/mktables index f851302..060a0e6 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -545,7 +545,7 @@ sub mapping { %utf8::${name} = ( EOT - for my $i (sort keys %$map) { + for my $i (sort { lc $a cmp lc $b } keys %$map) { my $pat = $i; # Here is the 'fuzzification': accept any space, # dash, or underbar where in the official name @@ -555,7 +555,7 @@ EOT # The prefix length of 2 is enough spread, # and besides, we have 'Yi' as an In category. push @{$pat{lc(substr($i, 0, 2))}}, [ $i, $pat ]; - print $fh "'$i' => '$map->{$i}',\n"; + printf $fh "%-45s => '$map->{$i}',\n", "'$i'"; } print $fh <