From: Jarkko Hietaniemi Date: Wed, 16 Apr 2003 09:21:25 +0000 (+0000) Subject: Synchronize the specifications of the POSIX character X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=c65e4d19dcce98b8a7e895060b6fe141cfaef8a6;p=p5sagit%2Fp5-mst-13.2.git Synchronize the specifications of the POSIX character classes alnum, graph, and print closer to the planned Unicode proposal. p4raw-id: //depot/perl@19231 --- diff --git a/lib/unicore/lib/Alnum.pl b/lib/unicore/lib/Alnum.pl index 0ec13c5..5f2b1f0 100644 --- a/lib/unicore/lib/Alnum.pl +++ b/lib/unicore/lib/Alnum.pl @@ -13,10 +13,8 @@ return <<'END'; 0041 005A 0061 007A 00AA -00B2 00B3 00B5 -00B9 00BA -00BC 00BE +00BA 00C0 00D6 00D8 00F6 00F8 0220 @@ -85,7 +83,6 @@ return <<'END'; 09DC 09DD 09DF 09E3 09E6 09F1 -09F4 09F9 0A02 0A05 0A0A 0A0F 0A10 @@ -144,7 +141,7 @@ return <<'END'; 0BC6 0BC8 0BCA 0BCD 0BD7 -0BE7 0BF2 +0BE7 0BEF 0C01 0C03 0C05 0C0C 0C0E 0C10 @@ -215,7 +212,7 @@ return <<'END'; 0EDC 0EDD 0F00 0F18 0F19 -0F20 0F33 +0F20 0F29 0F35 0F37 0F39 @@ -263,13 +260,12 @@ return <<'END'; 1318 131E 1320 1346 1348 135A -1369 137C +1369 1371 13A0 13F4 1401 166C 166F 1676 1681 169A 16A0 16EA -16EE 16F0 1700 170C 170E 1714 1720 1734 @@ -306,9 +302,8 @@ return <<'END'; 1FE0 1FEC 1FF2 1FF4 1FF6 1FFC -2070 2071 -2074 2079 -207F 2089 +2071 +207F 20D0 20EA 2102 2107 @@ -323,14 +318,10 @@ return <<'END'; 2133 2139 213D 213F 2145 2149 -2153 2183 -2460 249B -24EA 24FE -2776 2793 -3005 3007 -3021 302F +3005 3006 +302A 302F 3031 3035 -3038 303C +303B 303C 3041 3096 3099 309A 309D 309F @@ -338,13 +329,8 @@ return <<'END'; 30FC 30FF 3105 312C 3131 318E -3192 3195 31A0 31B7 31F0 31FF -3220 3229 -3251 325F -3280 3289 -32B1 32BF 3400 4DB5 4E00 9FA5 A000 A48C @@ -377,8 +363,7 @@ FFCA FFCF FFD2 FFD7 FFDA FFDC 10300 1031E -10320 10323 -10330 1034A +10330 10349 10400 10425 10428 1044D 1D165 1D169 diff --git a/lib/unicore/lib/Graph.pl b/lib/unicore/lib/Graph.pl index 212c4c6..1eff89d 100644 --- a/lib/unicore/lib/Graph.pl +++ b/lib/unicore/lib/Graph.pl @@ -277,7 +277,7 @@ return <<'END'; 1FDD 1FEF 1FF2 1FF4 1FF6 1FFE -2010 2027 +2010 2029 2030 2052 2057 2070 2071 @@ -331,7 +331,7 @@ return <<'END'; A000 A48C A490 A4C6 AC00 D7A3 -E000 FA2D +F900 FA2D FA30 FA6A FB00 FB06 FB13 FB17 @@ -394,6 +394,4 @@ FFFC FFFD 1D7CE 1D7FF 20000 2A6D6 2F800 2FA1D -F0000 FFFFD -100000 10FFFD END diff --git a/lib/unicore/lib/Print.pl b/lib/unicore/lib/Print.pl index d1eb1e2..ea9c553 100644 --- a/lib/unicore/lib/Print.pl +++ b/lib/unicore/lib/Print.pl @@ -278,7 +278,7 @@ return <<'END'; 1FF2 1FF4 1FF6 1FFE 2000 200B -2010 2027 +2010 2029 202F 2052 2057 205F @@ -333,7 +333,7 @@ return <<'END'; A000 A48C A490 A4C6 AC00 D7A3 -E000 FA2D +F900 FA2D FA30 FA6A FB00 FB06 FB13 FB17 @@ -396,6 +396,4 @@ FFFC FFFD 1D7CE 1D7FF 20000 2A6D6 2F800 2FA1D -F0000 FFFFD -100000 10FFFD END diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 654301e..fa07346 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -2,6 +2,9 @@ use strict; use Carp; +die "$0: Please run me as ./mktables to avoid unnecessary differences\n" + unless $0 eq "./mktables"; + ## ## mktables -- create the runtime Perl Unicode files (lib/unicore/**/*.pl) ## from the Unicode database files (lib/unicore/*.txt). @@ -625,7 +628,7 @@ sub UnicodeData_Txt() # 005F: SPACING UNDERSCORE $Cat{Word}->$op($code) if $cat =~ /^[LMN]/ || $code == 0x005F; - $Cat{Alnum}->$op($code) if $cat =~ /^[LMN]/; + $Cat{Alnum}->$op($code) if $cat =~ /^[LM]|Nd/; $Cat{Alpha}->$op($code) if $cat =~ /^[LM]/; @@ -647,7 +650,7 @@ sub UnicodeData_Txt() || $code == 0x2028 # 2028: LINE SEPARATOR || $code == 0x2029;# 2029: PARAGRAPH SEP. - $Cat{Blank}->$op($code) if $cat =~ /^Z[^lp]$/ + $Cat{Blank}->$op($code) if $cat eq "Zs" || $code == 0x0009 # 0009: HORIZONTAL TAB || $code == 0x0020; # 0020: SPACE @@ -657,8 +660,9 @@ sub UnicodeData_Txt() $Cat{Title}->$op($code) if $cat eq "Lt"; $Cat{ASCII}->$op($code) if $code <= 0x007F; $Cat{Cntrl}->$op($code) if $cat =~ /^C/; - $Cat{Graph}->$op($code) if $cat =~ /^([LMNPS]|Co)/; - $Cat{Print}->$op($code) if $cat =~ /^([LMNPS]|Co|Zs)/; + $Cat{Graph}->$op($code) if $cat =~ /^[^C]/ + && $cat ne "Zs"; + $Cat{Print}->$op($code) if $cat =~ /^[^C]/; $Cat{Punct}->$op($code) if $cat =~ /^P/; $Cat{XDigit}->$op($code) if ($code >= 0x30 && $code <= 0x39) ## 0..9 diff --git a/utf8.c b/utf8.c index b112d4c..0dd9ad8 100644 --- a/utf8.c +++ b/utf8.c @@ -1235,7 +1235,7 @@ Perl_is_utf8_upper(pTHX_ U8 *p) if (!is_utf8_char(p)) return FALSE; if (!PL_utf8_upper) - PL_utf8_upper = swash_init("utf8", "IsUpper", &PL_sv_undef, 0, 0); + PL_utf8_upper = swash_init("utf8", "IsUppercase", &PL_sv_undef, 0, 0); return swash_fetch(PL_utf8_upper, p, TRUE) != 0; } @@ -1245,7 +1245,7 @@ Perl_is_utf8_lower(pTHX_ U8 *p) if (!is_utf8_char(p)) return FALSE; if (!PL_utf8_lower) - PL_utf8_lower = swash_init("utf8", "IsLower", &PL_sv_undef, 0, 0); + PL_utf8_lower = swash_init("utf8", "IsLowercase", &PL_sv_undef, 0, 0); return swash_fetch(PL_utf8_lower, p, TRUE) != 0; }