From: Eric Brine Date: Fri, 27 Nov 2009 00:16:16 +0000 (-0500) Subject: Fix -DPERL_NO_UTF16_FILTER X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ee6ba15dedda3e88eb66891eaf387c00a4c0a2fb;p=p5sagit%2Fp5-mst-13.2.git Fix -DPERL_NO_UTF16_FILTER --- diff --git a/t/comp/require.t b/t/comp/require.t index baf4887..988a102 100644 --- a/t/comp/require.t +++ b/t/comp/require.t @@ -266,9 +266,9 @@ EOT if ($Is_EBCDIC || $Is_UTF8) { exit; } my %templates = ( - utf8 => 'C0U', - utf16be => 'n', - utf16le => 'v', + 'UTF-8' => 'C0U', + 'UTF-16BE' => 'n', + 'UTF-16LE' => 'v', ); sub bytes_to_utf { @@ -280,6 +280,9 @@ sub bytes_to_utf { foreach (sort keys %templates) { $i++; do_require(bytes_to_utf($_, qq(print "ok $i # $_\\n"; 1;\n), 1)); + if ($@ =~ /^(Unsupported script encoding \Q$_\E)/) { + print "ok $i # skip $1\n"; + } } END { diff --git a/t/comp/utf.t b/t/comp/utf.t index 1e0e68a..f5190f9 100644 --- a/t/comp/utf.t +++ b/t/comp/utf.t @@ -4,9 +4,9 @@ print "1..4016\n"; my $test = 0; my %templates = ( - utf8 => 'C0U', - utf16be => 'n', - utf16le => 'v', + 'UTF-8' => 'C0U', + 'UTF-16BE' => 'n', + 'UTF-16LE' => 'v', ); sub bytes_to_utf { @@ -14,7 +14,7 @@ sub bytes_to_utf { my $template = $templates{$enc}; die "Unsupported encoding $enc" unless $template; my @chars = unpack "U*", $content; - if ($enc ne 'utf8') { + if ($enc ne 'UTF-8') { # Make surrogate pairs my @remember_that_utf_16_is_variable_length; foreach my $ord (@chars) { @@ -41,7 +41,11 @@ sub test { my $got = do "./utf$$.pl"; $test = $test + 1; if (!defined $got) { - print "not ok $test # $enc $bom $nl $name; got undef\n"; + if ($@ =~ /^(Unsupported script encoding \Q$enc\E)/) { + print "ok $test # skip $1\n"; + } else { + print "not ok $test # $enc $bom $nl $name; got undef\n"; + } } elsif ($got ne $expect) { print "not ok $test # $enc $bom $nl $name; got '$got'\n"; } else { @@ -50,7 +54,7 @@ sub test { } for my $bom (0, 1) { - for my $enc (qw(utf16le utf16be utf8)) { + for my $enc (qw(UTF-16LE UTF-16BE UTF-8)) { for my $nl (1, 0) { for my $value (123, 1234, 12345) { test($enc, $value, $value, $bom, $nl, $value); @@ -58,7 +62,7 @@ for my $bom (0, 1) { # loop without the bug fix it corresponds to: test($enc, "($value)", $value, $bom, $nl, "($value)"); } - next if $enc eq 'utf8'; + next if $enc eq 'UTF-8'; # Arguably a bug that currently string literals from UTF-8 file # handles are not implicitly "use utf8", but don't FIXME that # right now, as here we're testing the input filter itself. diff --git a/t/porting/diag.t b/t/porting/diag.t index 66e5a21..14c2f84 100644 --- a/t/porting/diag.t +++ b/t/porting/diag.t @@ -358,10 +358,10 @@ Unknown PerlIO layer "scalar" Unknown Unicode option letter '%c' unrecognised control character '%c' Unstable directory path, current directory changed unexpectedly -Unsupported script encoding UTF16-BE -Unsupported script encoding UTF16-LE -Unsupported script encoding UTF32-BE -Unsupported script encoding UTF32-LE +Unsupported script encoding UTF-16BE +Unsupported script encoding UTF-16LE +Unsupported script encoding UTF-32BE +Unsupported script encoding UTF-32LE Unterminated compressed integer in unpack Usage: CODE(0x%x)(%s) Usage: %s(%s) diff --git a/toke.c b/toke.c index dfcb034..784ed7a 100644 --- a/toke.c +++ b/toke.c @@ -13287,17 +13287,17 @@ S_swallow_bom(pTHX_ U8 *s) switch (s[0]) { case 0xFF: if (s[1] == 0xFE) { - /* UTF-16 little-endian? (or UTF32-LE?) */ + /* UTF-16 little-endian? (or UTF-32LE?) */ if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ - Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE"); + Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); #ifndef PERL_NO_UTF16_FILTER - if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n"); + if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); s += 2; if (PL_bufend > (char*)s) { s = add_utf16_textfilter(s, TRUE); } #else - Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE"); + Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); #endif } break; @@ -13310,7 +13310,7 @@ S_swallow_bom(pTHX_ U8 *s) s = add_utf16_textfilter(s, FALSE); } #else - Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE"); + Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); #endif } break; @@ -13325,15 +13325,19 @@ S_swallow_bom(pTHX_ U8 *s) if (s[1] == 0) { if (s[2] == 0xFE && s[3] == 0xFF) { /* UTF-32 big-endian */ - Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE"); + Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); } } else if (s[2] == 0 && s[3] != 0) { /* Leading bytes * 00 xx 00 xx * are a good indicator of UTF-16BE. */ +#ifndef PERL_NO_UTF16_FILTER if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); - s = add_utf16_textfilter(s, FALSE); + s = add_utf16_textfilter(s, FALSE); +#else + Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); +#endif } } #ifdef EBCDIC @@ -13350,8 +13354,12 @@ S_swallow_bom(pTHX_ U8 *s) /* Leading bytes * xx 00 xx 00 * are a good indicator of UTF-16LE. */ +#ifndef PERL_NO_UTF16_FILTER if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); s = add_utf16_textfilter(s, TRUE); +#else + Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); +#endif } } return (char*)s;