=item Unicode character %s is illegal
-Certain Unicode characters have been designated off-limits by the
-Unicode standard and should not be generated.
+(W utf8) Certain Unicode characters have been designated off-limits by
+the Unicode standard and should not be generated. If you really know
+what you are doing you can turn off this warning by C<no warnings 'utf8';>.
=item Unknown BYTEORDER
=item UTF-16 surrogate %s
-(F) You tried to generate half of an UTF-16 surrogate by requesting
-a Unicode character between the code points 0xD800 and 0xDFFF (inclusive).
-That range is reserved exclusively for the use of UTF-16 encoding
-(by having two 16-bit UCS-2 characters); but Perl encodes its characters
-in UTF-8, so what you got is a very illegal character.
+(W utf8) You tried to generate half of an UTF-16 surrogate by
+requesting a Unicode character between the code points 0xD800 and
+0xDFFF (inclusive). That range is reserved exclusively for the use of
+UTF-16 encoding (by having two 16-bit UCS-2 characters); but Perl
+encodes its characters in UTF-8, so what you got is a very illegal
+character. If you really know what you are doing you can turn off
+this warning by C<no warnings 'utf8';>.
=item Value of %s can be "0"; test with defined()
Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 9.
Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 14.
########
+use warnings 'utf8';
+my $surr = chr(0xD800);
+my $fff3 = chr(0xFFFE);
+my $ffff = chr(0xFFFF);
+no warnings 'utf8';
+$surr = chr(0xD800);
+$fffe = chr(0xFFFE);
+$ffff = chr(0xFFFF);
+EXPECT
+UTF-16 surrogate 0xd800 at - line 2.
+Unicode character 0xfffe is illegal at - line 3.
+Unicode character 0xffff is illegal at - line 4.
+########
BEGIN {
chdir 't' if -d 't';
- @INC = '.';
+ @INC = qw(.);
require "test.pl";
}
-print "1..8\n";
+plan tests => 7;
# compile time evaluation
$x = "\x{1234}";
is(ord($x), 0x1234, 'runtime ord \x{....}');
-{
- eval 'my $surrogate = chr(0xD800)';
-
- like($@, qr/^UTF-16 surrogate 0xd800 /, "surrogates bad");
-}
-
-
U8 *
Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
{
- if (UNICODE_IS_SURROGATE(uv))
- Perl_croak(aTHX_ "UTF-16 surrogate 0x%04"UVxf, uv);
- else if ((uv >= 0xFDD0 && uv <= 0xFDEF) ||
- (uv == 0xFFFE || uv == 0xFFFF))
- Perl_croak(aTHX_ "Unicode character 0x%04"UVxf" is illegal", uv);
+ if (ckWARN_d(WARN_UTF8)) {
+ if (UNICODE_IS_SURROGATE(uv))
+ Perl_warner(aTHX_ WARN_UTF8, "UTF-16 surrogate 0x%04"UVxf, uv);
+ else if ((uv >= 0xFDD0 && uv <= 0xFDEF) ||
+ (uv == 0xFFFE || uv == 0xFFFF))
+ Perl_warner(aTHX_ WARN_UTF8,
+ "Unicode character 0x%04"UVxf" is illegal", uv);
+ }
if (UNI_IS_INVARIANT(uv)) {
*d++ = UTF_TO_NATIVE(uv);
return d;