From: Jarkko Hietaniemi Date: Wed, 4 Jun 2003 11:02:54 +0000 (+0000) Subject: On closer reading the proposed UTS#18 update required X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=d75d706fd8fb3b6eda1b75fcf6c854883a424bcd;p=p5sagit%2Fp5-mst-13.2.git On closer reading the proposed UTS#18 update required even more changes. p4raw-id: //depot/perl@19686 --- diff --git a/lib/unicore/lib/Blank.pl b/lib/unicore/lib/Blank.pl index 5de7d7d..639ccf4 100644 --- a/lib/unicore/lib/Blank.pl +++ b/lib/unicore/lib/Blank.pl @@ -14,7 +14,7 @@ return <<'END'; 00A0 1680 180E -2000 200B +2000 200A 202F 205F 3000 diff --git a/lib/unicore/lib/Graph.pl b/lib/unicore/lib/Graph.pl index 943056c..79ef3db 100644 --- a/lib/unicore/lib/Graph.pl +++ b/lib/unicore/lib/Graph.pl @@ -278,7 +278,8 @@ return <<'END'; 1FDD 1FEF 1FF2 1FF4 1FF6 1FFE -200C 202E +200B 2027 +202A 202E 2030 2054 2057 2060 2063 diff --git a/lib/unicore/lib/Print.pl b/lib/unicore/lib/Print.pl index 54954e7..a5bae46 100644 --- a/lib/unicore/lib/Print.pl +++ b/lib/unicore/lib/Print.pl @@ -9,9 +9,10 @@ # Meaning: [[:Print:]] # return <<'END'; +0009 000D 0020 007E -00A0 00AC -00AE 0236 +0085 +00A0 0236 0250 0357 035D 036F 0374 0375 @@ -36,14 +37,14 @@ return <<'END'; 05BB 05C4 05D0 05EA 05F0 05F4 +0600 0603 060C 0615 061B 061F 0621 063A 0640 0658 -0660 06DC -06DE 070D -0710 074A +0660 070D +070F 074A 074D 074F 0780 07B1 0901 0939 @@ -246,8 +247,7 @@ return <<'END'; 1760 176C 176E 1770 1772 1773 -1780 17B3 -17B6 17DD +1780 17DD 17E0 17E9 17F0 17F9 1800 180E @@ -280,12 +280,10 @@ return <<'END'; 1FDD 1FEF 1FF2 1FF4 1FF6 1FFE -2000 200B -2010 2027 -202F 2054 +2000 2054 2057 -205F -2070 2071 +205F 2063 +206A 2071 2074 208E 20A0 20B1 20D0 20EA @@ -331,7 +329,7 @@ return <<'END'; A000 A48C A490 A4C6 AC00 D7A3 -F900 FA2D +E000 FA2D FA30 FA6A FB00 FB06 FB13 FB17 @@ -352,6 +350,7 @@ FE54 FE66 FE68 FE6B FE70 FE74 FE76 FEFC +FEFF FF01 FFBE FFC2 FFC7 FFCA FFCF @@ -359,7 +358,7 @@ FFD2 FFD7 FFDA FFDC FFE0 FFE6 FFE8 FFEE -FFFC FFFD +FFF9 FFFD 10000 1000B 1000D 10026 10028 1003A @@ -385,8 +384,7 @@ FFFC FFFD 1083F 1D000 1D0F5 1D100 1D126 -1D12A 1D172 -1D17B 1D1DD +1D12A 1D1DD 1D300 1D356 1D400 1D454 1D456 1D49C @@ -411,5 +409,9 @@ FFFC FFFD 1D7CE 1D7FF 20000 2A6D6 2F800 2FA1D +E0001 +E0020 E007F E0100 E01EF +F0000 FFFFD +100000 10FFFD END diff --git a/lib/unicore/lib/Space.pl b/lib/unicore/lib/Space.pl index 9aa12c8..6c1cc2e 100644 --- a/lib/unicore/lib/Space.pl +++ b/lib/unicore/lib/Space.pl @@ -15,7 +15,7 @@ return <<'END'; 00A0 1680 180E -2000 200B +2000 200A 2028 2029 202F 205F diff --git a/lib/unicore/lib/SpacePer.pl b/lib/unicore/lib/SpacePer.pl index 18911cf..cc12e07 100644 --- a/lib/unicore/lib/SpacePer.pl +++ b/lib/unicore/lib/SpacePer.pl @@ -16,7 +16,7 @@ return <<'END'; 00A0 1680 180E -2000 200B +2000 200A 2028 2029 202F 205F diff --git a/lib/unicore/lib/Word.pl b/lib/unicore/lib/Word.pl index 0bf2ea4..20936b1 100644 --- a/lib/unicore/lib/Word.pl +++ b/lib/unicore/lib/Word.pl @@ -312,6 +312,8 @@ return <<'END'; 1FE0 1FEC 1FF2 1FF4 1FF6 1FFC +203F 2040 +2054 2070 2071 2074 2079 207F 2089 @@ -340,8 +342,7 @@ return <<'END'; 3041 3096 3099 309A 309D 309F -30A1 30FA -30FC 30FF +30A1 30FF 3105 312C 3131 318E 3192 3195 @@ -372,12 +373,15 @@ FD92 FDC7 FDF0 FDFB FE00 FE0F FE20 FE23 +FE33 FE34 +FE4D FE4F FE70 FE74 FE76 FEFC FF10 FF19 FF21 FF3A +FF3F FF41 FF5A -FF66 FFBE +FF65 FFBE FFC2 FFC7 FFCA FFCF FFD2 FFD7 diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 083bc58..5fdac52 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -627,32 +627,35 @@ sub UnicodeData_Txt() ($General{$name} ||= Table->New)->$op($code, $name); # 005F: SPACING UNDERSCORE - $Cat{Word}->$op($code) if $cat =~ /^[LMN]/ || $code == 0x005F; + $Cat{Word}->$op($code) if $cat =~ /^[LMN]|Pc/; $Cat{Alnum}->$op($code) if $cat =~ /^[LM]|Nd/; $Cat{Alpha}->$op($code) if $cat =~ /^[LM]/; - - - $Cat{Space}->$op($code) if $cat =~ /^Z/ + my $isspace = + ($cat =~ /Zs|Zl|Zp/ && + $code != 0x200B) # 200B is ZWSP which is for line break control + # and therefore it is not part of "space" even while it is "Zs". || $code == 0x0009 # 0009: HORIZONTAL TAB || $code == 0x000A # 000A: LINE FEED || $code == 0x000B # 000B: VERTICAL TAB || $code == 0x000C # 000C: FORM FEED || $code == 0x000D # 000D: CARRIAGE RETURN - || $code == 0x0085; # 0085: NEL + || $code == 0x0085 # 0085: NEL + + ; + $Cat{Space}->$op($code) if $isspace; - $Cat{SpacePerl}->$op($code) if $cat =~ /^Z/ - || $code == 0x0009 # 0009: HORIZONTAL TAB - || $code == 0x000A # 000A: LINE FEED - || $code == 0x000C # 000C: FORM FEED - || $code == 0x000D # 000D: CARRIAGE RETURN - || $code == 0x0085 # 0085: - || $code == 0x2028 # 2028: LINE SEPARATOR - || $code == 0x2029;# 2029: PARAGRAPH SEP. + $Cat{SpacePerl}->$op($code) if $isspace + && $code != 0x000B; # Backward compat. - $Cat{Blank}->$op($code) if $cat eq "Zs" - || $code == 0x0009; # 0009: HORIZONTAL TAB + $Cat{Blank}->$op($code) if $isspace + && !($code == 0x000A || + $code == 0x000B || + $code == 0x000C || + $code == 0x000D || + $code == 0x0085 || + $cat =~ /^Z[lp]/); $Cat{Digit}->$op($code) if $cat eq "Nd"; $Cat{Upper}->$op($code) if $cat eq "Lu"; @@ -660,9 +663,9 @@ sub UnicodeData_Txt() $Cat{Title}->$op($code) if $cat eq "Lt"; $Cat{ASCII}->$op($code) if $code <= 0x007F; $Cat{Cntrl}->$op($code) if $cat =~ /^C/; - $Cat{Graph}->$op($code) if $cat !~ /Zs|Cc|Cs|Cn/; - $Cat{Print}->$op($code) if $cat =~ /^[LMNPS]/ - || $cat eq "Zs"; + my $isgraph = !$isspace && $cat !~ /Cc|Cs|Cn/; + $Cat{Graph}->$op($code) if $isgraph; + $Cat{Print}->$op($code) if $isgraph || $isspace; $Cat{Punct}->$op($code) if $cat =~ /^P/; $Cat{XDigit}->$op($code) if ($code >= 0x30 && $code <= 0x39) ## 0..9