From: Simon Cozens Date: Sat, 17 Jun 2000 11:49:57 +0000 (+0000) Subject: Re: [PATCH cfgperl] BOMs away! X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=27d76ecff97d0a9449f569d789504cc8b69a6d01;p=p5sagit%2Fp5-mst-13.2.git Re: [PATCH cfgperl] BOMs away! Message-ID: p4raw-id: //depot/cfgperl@6261 --- diff --git a/pod/perldiag.pod b/pod/perldiag.pod index bba1320..a754daa 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -3401,6 +3401,11 @@ Note that under some systems, like OS/2, there may be different flavors of Perl executables, some of which may support fork, some not. Try changing the name you call Perl by to C, C, and so on. +=item Unsupported script encoding + +(F) Your program file begins with a Unicode Byte Order Mark (BOM) which +declares it to be in a Unicode encoding that Perl cannot yet read. + =item Unsupported socket function "%s" called (F) Your machine doesn't support the Berkeley socket mechanism, or at diff --git a/t/comp/require.t b/t/comp/require.t index 1d92687..48e3e00 100755 --- a/t/comp/require.t +++ b/t/comp/require.t @@ -7,7 +7,7 @@ BEGIN { # don't make this lexical $i = 1; -print "1..20\n"; +print "1..23\n"; sub do_require { %INC = (); @@ -124,6 +124,16 @@ sub dofile { do "bleah.do"; }; print $x; $i++; +# UTF-encoded things +my $utf8 = chr(0xFEFF); +my $utf16 = chr(255).chr(254); +do_require("${utf8}print \"ok $i\n\"; 1;\n"); +$i++; +do_require("$utf8\nprint \"ok $i\n\"; 1;\n"); +$i++; +do_require("$utf16\n1;"); +print "ok $i\n" if $@ =~ /Unsupported script encoding/; + END { 1 while unlink 'bleah.pm'; 1 while unlink 'bleah.do'; } # ***interaction with pod (don't put any thing after here)*** diff --git a/toke.c b/toke.c index 4a54f72..d6bb6d9 100644 --- a/toke.c +++ b/toke.c @@ -326,7 +326,7 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) } #endif -#if 0 +#ifdef PERL_UTF16_FILTER STATIC I32 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) { @@ -2490,6 +2490,8 @@ Perl_yylex(pTHX) goto retry; } do { + bool bof; + bof = PL_rsfp && (PerlIO_tell(PL_rsfp)==0); /* *Before* read! */ if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) { fake_eof: if (PL_rsfp) { @@ -2525,7 +2527,9 @@ Perl_yylex(pTHX) PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); PL_doextract = FALSE; } - } + } + if (bof) + s = swallow_bom(s); incline(s); } while (PL_doextract); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; @@ -7407,3 +7411,55 @@ restore_rsfp(pTHXo_ void *f) PerlIO_close(PL_rsfp); PL_rsfp = fp; } + +STATIC char* +S_swallow_bom(pTHX_ char *s) { + STRLEN slen; + slen = SvCUR(PL_linestr); + switch (*s) { + case -1: + if ((s[1] & 255) == 254) { + /* UTF-16 little-endian */ +#ifdef PERL_UTF16_FILTER + U8 *news; +#endif + s+=2; + if (*s == 0 && s[1] == 0) /* UTF-32 little-endian */ + Perl_croak(aTHX_ "Unsupported script encoding"); +#ifdef PERL_UTF16_FILTER + filter_add(S_utf16rev_textfilter, NULL); + New(898, news, (PL_bufend - s) * 3 / 2 + 1, U8); + PL_bufend = utf16_to_utf8((U16*)s, news, PL_bufend - s); + s = news; +#else + Perl_croak(aTHX_ "Unsupported script encoding"); +#endif + } + break; + + case -2: + if ((s[1] & 255) == 255) { /* UTF-16 big-endian */ +#ifdef PERL_UTF16_FILTER + U8 *news; + filter_add(S_utf16_textfilter, NULL); + New(898, news, (PL_bufend - s) * 3 / 2 + 1, U8); + PL_bufend = utf16_to_utf8((U16*)s, news, PL_bufend - s); + s = news; +#else + Perl_croak(aTHX_ "Unsupported script encoding"); +#endif + } + break; + + case -17: + if ( slen>2 && (s[1] & 255) == 187 && (s[2] & 255) == 191) { + s+=3; /* UTF-8 */ + } + break; + case 0: + if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */ + s[2] & 255 == 254 && s[3] & 255 == 255) + Perl_croak(aTHX_ "Unsupported script encoding"); +} +return s; +}