do_require("$utf8\nprint \"ok $i\n\"; 1;\n");
$i++;
do_require("$utf16\n1;");
-print "ok $i\n" if $@ =~ /Unsupported script encoding/;
+print "not " unless $@ =~ /^Unrecognized character /;
+print "ok $i\n";
END { 1 while unlink 'bleah.pm'; 1 while unlink 'bleah.do'; }
}
}
if (bof)
+ {
+ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ /* Shouldn't this wsallow_bom() be earlier, e.g.
+ * immediately after where bof is set? Currently you can't
+ * have e.g. a UTF16 sharpbang line. --Mike Guy */
s = swallow_bom((U8*)s);
+ }
incline(s);
} while (PL_doextract);
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
return dst;
}
-/* XXX NOTHING CALLS THE FOLLOWING TWO ROUTINES YET!!! */
/*
* Convert native or reversed UTF-16 to UTF-8.
*
* Destination must be pre-extended to 3/2 source. Do not use in-place.
* We optimize for native, for obvious reasons. */
+/* There are several problems with utf16_to_utf8().
+ * (1) U16 is not necessarily *exactly* two bytes.
+ * (2) Secondly, no check is made for odd length.
+ * (3) Thirdly, the "Malformed UTF-16 surrogate" should probably be
+ * a hard error (and it should be listed in perldiag).
+ * (4) The tests (in comp/t/require.t) are a joke: the UTF16 BOM
+ * really ought to be followed by valid UTF16 characters.
+ * --Mike Guy */
U8*
Perl_utf16_to_utf8(pTHX_ U16* p, U8* d, I32 bytelen)
{