From: Jarkko Hietaniemi Date: Mon, 17 Dec 2001 19:11:29 +0000 (+0000) Subject: Demote the surrogate and non-character errors to warnings. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=507b98002a6c136d628abe928988647938c55cff;p=p5sagit%2Fp5-mst-13.2.git Demote the surrogate and non-character errors to warnings. p4raw-id: //depot/perl@13740 --- diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 83baed2..1935550 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -3605,8 +3605,9 @@ representative, who probably put it there in the first place. =item Unicode character %s is illegal -Certain Unicode characters have been designated off-limits by the -Unicode standard and should not be generated. +(W utf8) Certain Unicode characters have been designated off-limits by +the Unicode standard and should not be generated. If you really know +what you are doing you can turn off this warning by C. =item Unknown BYTEORDER @@ -4004,11 +4005,13 @@ removed in a future version. =item UTF-16 surrogate %s -(F) You tried to generate half of an UTF-16 surrogate by requesting -a Unicode character between the code points 0xD800 and 0xDFFF (inclusive). -That range is reserved exclusively for the use of UTF-16 encoding -(by having two 16-bit UCS-2 characters); but Perl encodes its characters -in UTF-8, so what you got is a very illegal character. +(W utf8) You tried to generate half of an UTF-16 surrogate by +requesting a Unicode character between the code points 0xD800 and +0xDFFF (inclusive). That range is reserved exclusively for the use of +UTF-16 encoding (by having two 16-bit UCS-2 characters); but Perl +encodes its characters in UTF-8, so what you got is a very illegal +character. If you really know what you are doing you can turn off +this warning by C. =item Value of %s can be "0"; test with defined() diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8 index 9a7dbaf..fa7041b 100644 --- a/t/lib/warnings/utf8 +++ b/t/lib/warnings/utf8 @@ -33,3 +33,16 @@ EXPECT Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 9. Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 14. ######## +use warnings 'utf8'; +my $surr = chr(0xD800); +my $fff3 = chr(0xFFFE); +my $ffff = chr(0xFFFF); +no warnings 'utf8'; +$surr = chr(0xD800); +$fffe = chr(0xFFFE); +$ffff = chr(0xFFFF); +EXPECT +UTF-16 surrogate 0xd800 at - line 2. +Unicode character 0xfffe is illegal at - line 3. +Unicode character 0xffff is illegal at - line 4. +######## diff --git a/t/op/ord.t b/t/op/ord.t index f746055..ff51c18 100755 --- a/t/op/ord.t +++ b/t/op/ord.t @@ -2,11 +2,11 @@ BEGIN { chdir 't' if -d 't'; - @INC = '.'; + @INC = qw(.); require "test.pl"; } -print "1..8\n"; +plan tests => 7; # compile time evaluation @@ -33,10 +33,3 @@ is(ord("\x{1234}"), 0x1234, 'compile time ord \x{....}'); $x = "\x{1234}"; is(ord($x), 0x1234, 'runtime ord \x{....}'); -{ - eval 'my $surrogate = chr(0xD800)'; - - like($@, qr/^UTF-16 surrogate 0xd800 /, "surrogates bad"); -} - - diff --git a/utf8.c b/utf8.c index 0979506..af36592 100644 --- a/utf8.c +++ b/utf8.c @@ -46,11 +46,14 @@ is the recommended Unicode-aware way of saying U8 * Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv) { - if (UNICODE_IS_SURROGATE(uv)) - Perl_croak(aTHX_ "UTF-16 surrogate 0x%04"UVxf, uv); - else if ((uv >= 0xFDD0 && uv <= 0xFDEF) || - (uv == 0xFFFE || uv == 0xFFFF)) - Perl_croak(aTHX_ "Unicode character 0x%04"UVxf" is illegal", uv); + if (ckWARN_d(WARN_UTF8)) { + if (UNICODE_IS_SURROGATE(uv)) + Perl_warner(aTHX_ WARN_UTF8, "UTF-16 surrogate 0x%04"UVxf, uv); + else if ((uv >= 0xFDD0 && uv <= 0xFDEF) || + (uv == 0xFFFE || uv == 0xFFFF)) + Perl_warner(aTHX_ WARN_UTF8, + "Unicode character 0x%04"UVxf" is illegal", uv); + } if (UNI_IS_INVARIANT(uv)) { *d++ = UTF_TO_NATIVE(uv); return d;