From: Jarkko Hietaniemi Date: Thu, 31 Jan 2002 15:26:41 +0000 (+0000) Subject: Turn the I/O Unicode error by default on, but the X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=62961d2e50d22e7ae5a679eac7bf6d593193e108;p=p5sagit%2Fp5-mst-13.2.git Turn the I/O Unicode error by default on, but the character-generating Unicode error by default off, as Larry suggested. p4raw-id: //depot/perl@14505 --- diff --git a/doio.c b/doio.c index ab74d4a..3c06585 100644 --- a/doio.c +++ b/doio.c @@ -1223,7 +1223,7 @@ Perl_do_print(pTHX_ register SV *sv, PerlIO *fp) } else if (DO_UTF8(sv)) { if (!sv_utf8_downgrade((sv = sv_mortalcopy(sv)), TRUE) - && ckWARN(WARN_UTF8)) + && ckWARN_d(WARN_UTF8)) { Perl_warner(aTHX_ WARN_UTF8, "Wide character in print"); } diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 2a3f5d0..56b6950 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -4184,7 +4184,10 @@ So put in parentheses to say what you really mean. =item Wide character in %s -(W utf8) Perl met a wide character (>255) when it wasn't expecting one. +(W utf8) Perl met a wide character (>255) when it wasn't expecting +one. This warning is by default on for I/O (like print) but can be +turned off by C. You are supposed to explicitly +mark the filehandle with an encoding, see L and L. =item write() on closed filehandle %s diff --git a/pp.c b/pp.c index 2d462c4..51facc0 100644 --- a/pp.c +++ b/pp.c @@ -3168,8 +3168,7 @@ PP(pp_chr) if (value > 255 && !IN_BYTES) { SvGROW(TARG, UNISKIP(value)+1); - tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, - UNICODE_ALLOW_SUPER); + tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0); SvCUR_set(TARG, tmps - SvPVX(TARG)); *tmps = '\0'; (void)SvPOK_only(TARG); diff --git a/t/io/utf8.t b/t/io/utf8.t index e8caf72..d0fe0f1 100755 --- a/t/io/utf8.t +++ b/t/io/utf8.t @@ -12,7 +12,7 @@ BEGIN { no utf8; # needed for use utf8 not griping about the raw octets $| = 1; -print "1..26\n"; +print "1..31\n"; open(F,"+>:utf8",'a'); print F chr(0x100).'£'; @@ -186,7 +186,7 @@ if (ord('A') == 193) { close F; unlink('a'); -open F, ">a"; +open F, ">:utf8", "a"; @a = map { chr(1 << ($_ << 2)) } 0..5; # 0x1, 0x10, .., 0x100000 unshift @a, chr(0); # ... and a null byte in front just for fun print F @a; @@ -216,6 +216,52 @@ for (@a) { close F; print "ok 26\n"; +{ + # Check that warnings are on on I/O, and that they can be muffled. + + local $SIG{__WARN__} = sub { $@ = shift }; + + undef $@; + open F, ">a"; + print F chr(0x100); + close(F); + + print $@ =~ /Wide character in print/ ? "ok 27\n" : "not ok 27\n"; + + undef $@; + open F, ">:utf8", ">a"; + print F chr(0x100); + close(F); + + print defined $@ ? "not ok 28\n" : "ok 28\n"; + + undef $@; + open F, ">a"; + binmode(F, ":utf8"); + print F chr(0x100); + close(F); + + print defined $@ ? "not ok 29\n" : "ok 29\n"; + + no warnings 'utf8'; + + undef $@; + open F, ">a"; + print F chr(0x100); + close(F); + + print defined $@ ? "not ok 30\n" : "ok 30\n"; + + use warnings 'utf8'; + + undef $@; + open F, ">a"; + print F chr(0x100); + close(F); + + print $@ =~ /Wide character in print/ ? "ok 31\n" : "not ok 31\n"; +} + # sysread() and syswrite() tested in lib/open.t since Fnctl is used END { @@ -223,4 +269,3 @@ END { 1 while unlink "b"; } - diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8 index d2ac06f..747436a 100644 --- a/t/lib/warnings/utf8 +++ b/t/lib/warnings/utf8 @@ -34,15 +34,88 @@ Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately af Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14. ######## use warnings 'utf8'; -my $surr = chr(0xD800); -my $fff3 = chr(0xFFFE); -my $ffff = chr(0xFFFF); +my $d7ff = chr(0xD7FF); +my $d800 = chr(0xD800); +my $dfff = chr(0xDFFF); +my $e000 = chr(0xE000); +my $fffd = chr(0xFFFD); +my $fffe = chr(0xFFFE); +my $ffff = chr(0xFFFF); +my $hex4 = chr(0x10000); +my $hex5 = chr(0x100000); +my $max = chr(0x10FFFF); no warnings 'utf8'; -$surr = chr(0xD800); -$fffe = chr(0xFFFE); -$ffff = chr(0xFFFF); +my $d7ff = chr(0xD7FF); +my $d800 = chr(0xD800); +my $dfff = chr(0xDFFF); +my $e000 = chr(0xE000); +my $fffd = chr(0xFFFD); +my $fffe = chr(0xFFFE); +my $ffff = chr(0xFFFF); +my $hex4 = chr(0x10000); +my $hex5 = chr(0x100000); +my $max = chr(0x10FFFF); EXPECT -UTF-16 surrogate 0xd800 at - line 2. -Unicode character 0xfffe is illegal at - line 3. -Unicode character 0xffff is illegal at - line 4. +UTF-16 surrogate 0xd800 at - line 3. +UTF-16 surrogate 0xdfff at - line 4. +Unicode character 0xfffe is illegal at - line 7. +Unicode character 0xffff is illegal at - line 8. +Unicode character 0x10ffff is illegal at - line 11. ######## +use warnings 'utf8'; +my $d7ff = pack("U", 0xD7FF); +my $d800 = pack("U", 0xD800); +my $dfff = pack("U", 0xDFFF); +my $e000 = pack("U", 0xE000); +my $fffd = pack("U", 0xFFFD); +my $fffe = pack("U", 0xFFFE); +my $ffff = pack("U", 0xFFFF); +my $hex4 = pack("U", 0x10000); +my $hex5 = pack("U", 0x100000); +my $max = pack("U", 0x10FFFF); +no warnings 'utf8'; +my $d7ff = pack("U", 0xD7FF); +my $d800 = pack("U", 0xD800); +my $dfff = pack("U", 0xDFFF); +my $e000 = pack("U", 0xE000); +my $fffd = pack("U", 0xFFFD); +my $fffe = pack("U", 0xFFFE); +my $ffff = pack("U", 0xFFFF); +my $hex4 = pack("U", 0x10000); +my $hex5 = pack("U", 0x100000); +my $max = pack("U", 0x10FFFF); +EXPECT +UTF-16 surrogate 0xd800 at - line 3. +UTF-16 surrogate 0xdfff at - line 4. +Unicode character 0xfffe is illegal at - line 7. +Unicode character 0xffff is illegal at - line 8. +Unicode character 0x10ffff is illegal at - line 11. +######## +use warnings 'utf8'; +my $d7ff = "\x{D7FF}"; +my $d800 = "\x{D800}"; +my $dfff = "\x{DFFF}"; +my $e000 = "\x{E000}"; +my $fffd = "\x{FFFD}"; +my $fffe = "\x{FFFE}"; +my $ffff = "\x{FFFF}"; +my $hex4 = "\x{10000}"; +my $hex5 = "\x{100000}"; +my $max = "\x{10FFFF}"; +no warnings 'utf8'; +my $d7ff = "\x{D7FF}"; +my $d800 = "\x{D800}"; +my $dfff = "\x{DFFF}"; +my $e000 = "\x{E000}"; +my $fffd = "\x{FFFD}"; +my $fffe = "\x{FFFE}"; +my $ffff = "\x{FFFF}"; +my $hex4 = "\x{10000}"; +my $hex5 = "\x{100000}"; +my $max = "\x{10FFFF}"; +EXPECT +UTF-16 surrogate 0xd800 at - line 3. +UTF-16 surrogate 0xdfff at - line 4. +Unicode character 0xfffe is illegal at - line 7. +Unicode character 0xffff is illegal at - line 8. +Unicode character 0x10ffff is illegal at - line 11. diff --git a/utf8.c b/utf8.c index 5d124ba..cf3f48d 100644 --- a/utf8.c +++ b/utf8.c @@ -54,7 +54,7 @@ is the recommended Unicode-aware way of saying U8 * Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) { - if (ckWARN_d(WARN_UTF8)) { + if (ckWARN(WARN_UTF8)) { if (UNICODE_IS_SURROGATE(uv) && !(flags & UNICODE_ALLOW_SURROGATE)) Perl_warner(aTHX_ WARN_UTF8, "UTF-16 surrogate 0x%04"UVxf, uv);