From: Karl Williamson Date: Tue, 29 Dec 2009 23:38:38 +0000 (-0700) Subject: Correct \p{print} to not match LINE SEPARATOR nor PARAGRAPH SEPARATOR X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ae5b72c8252f1f9074e08de2e76de013c8021084;p=p5sagit%2Fp5-mst-13.2.git Correct \p{print} to not match LINE SEPARATOR nor PARAGRAPH SEPARATOR The Unicode Standard defines (as a recommendation) that Print be based on graphical characters and blank characters (minus controls). Perl's has been based on space rather than blank. The only practical effect this has is that Perl erroneously matches the LINE SEPARATOR and PARAGRAPH SEPARATOR, which clearly are not printable characters. Signed-off-by: Abigail --- diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 12f6659..fbc3fab 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -10781,7 +10781,7 @@ sub compile_perl() { my $Print = $perl->add_match_table('Print', Description => "[[:Print:]] extended beyond ASCII", - Initialize => $Space + $Graph - $gc->table('Control'), + Initialize => $Blank + $Graph - $gc->table('Control'), ); $posix_equivalent{'Print'} = $Print; @@ -10801,6 +10801,7 @@ sub compile_perl() { $posix_equivalent{'Digit'} = $Digit; # AHex was not present in early releases + # XXX TUS recommends Hex_Digit, not ASCII_Hex_Digit. my $Xdigit = $perl->add_match_table('XDigit', Description => '[0-9A-Fa-f]'); my $AHex = property_ref('ASCII_Hex_Digit'); @@ -14168,3 +14169,5 @@ sub Finished() { Error('\p{Script=InGreek}'); # Bug #69018 Test_X("1100 $nobreak 1161"); # Bug #70940 +Expect(0, 0x2028, '\p{Print}', ""); # Bug # 71722 +Expect(0, 0x2029, '\p{Print}', ""); # Bug # 71722 diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod index 1ebed0d..50a2843 100644 --- a/pod/perlunicode.pod +++ b/pod/perlunicode.pod @@ -721,7 +721,7 @@ matches as well. =item B> -This matches any character that is graphical or is space, but not a control. +This matches any character that is graphical or blank, except controls. =item B>