From: Nicholas Clark Date: Sun, 18 Oct 2009 20:06:06 +0000 (+0100) Subject: Expose utf16_to_utf8{,reversed} via XS::APItest, and provide some basic tests. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=30685b5659009a95642202219acc6ded18f74dbc;p=p5sagit%2Fp5-mst-13.2.git Expose utf16_to_utf8{,reversed} via XS::APItest, and provide some basic tests. --- diff --git a/MANIFEST b/MANIFEST index 8b0f5d8..7d247df 100644 --- a/MANIFEST +++ b/MANIFEST @@ -3204,6 +3204,7 @@ ext/XS-APItest/t/push.t XS::APItest extension ext/XS-APItest/t/rmagical.t XS::APItest extension ext/XS-APItest/t/svpeek.t XS::APItest extension ext/XS-APItest/t/svsetsv.t Test behaviour of sv_setsv with/without PERL_CORE +ext/XS-APItest/t/utf16_to_utf8.t Test behaviour of utf16_to_utf8{,reversed} ext/XS-APItest/t/xs_special_subs_require.t for require too ext/XS-APItest/t/xs_special_subs.t Test that XS BEGIN/CHECK/INIT/END work ext/XS-Typemap/Makefile.PL XS::Typemap extension diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm index 12d0a03..c40e4b8 100644 --- a/ext/XS-APItest/APItest.pm +++ b/ext/XS-APItest/APItest.pm @@ -23,10 +23,10 @@ our @EXPORT = qw( print_double print_int print_long my_cxt_getint my_cxt_getsv my_cxt_setint my_cxt_setsv sv_setsv_cow_hashkey_core sv_setsv_cow_hashkey_notcore rmagical_cast rmagical_flags - DPeek + DPeek utf16_to_utf8 utf16_to_utf8_reversed ); -our $VERSION = '0.15'; +our $VERSION = '0.16'; use vars '$WARNINGS_ON_BOOTSTRAP'; use vars map "\$${_}_called_PP", qw(BEGIN UNITCHECK CHECK INIT END); diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index 7e7f78b..4eac4a6 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -892,3 +892,32 @@ void END() CODE: sv_inc(get_sv("XS::APItest::END_called", GV_ADD|GV_ADDMULTI)); + +void +utf16_to_utf8 (sv, ...) + SV* sv + ALIAS: + utf16_to_utf8_reversed = 1 + PREINIT: + STRLEN len; + U8 *source; + SV *dest; + I32 got; /* Gah, badly thought out APIs */ + CODE: + source = (U8 *)SvPVbyte(sv, len); + /* Optionally only convert part of the buffer. */ + if (items > 1) { + len = SvUV(ST(1)); + } + /* Mortalise this right now, as we'll be testing croak()s */ + dest = sv_2mortal(newSV(len * 3 / 2 + 1)); + if (ix) { + utf16_to_utf8_reversed(source, SvPVX(dest), len, &got); + } else { + utf16_to_utf8(source, SvPVX(dest), len, &got); + } + SvCUR_set(dest, got); + SvPVX(dest)[got] = '\0'; + SvPOK_on(dest); + ST(0) = dest; + XSRETURN(1); diff --git a/ext/XS-APItest/t/utf16_to_utf8.t b/ext/XS-APItest/t/utf16_to_utf8.t new file mode 100644 index 0000000..3da3d7d --- /dev/null +++ b/ext/XS-APItest/t/utf16_to_utf8.t @@ -0,0 +1,49 @@ +#!perl -w + +use strict; +use Test::More 'no_plan'; +use Encode; + +use XS::APItest qw(utf16_to_utf8 utf16_to_utf8_reversed); + +for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0x10000) { + my $chr = chr $ord; + for my $prefix ('', "\0", 'Perl rules') { + for my $suffix ('', "\0", "Moo!") { + my $string = $prefix . $chr . $suffix; + my $name = sprintf "for chr $ord prefix %d, suffix %d", + length $prefix, length $suffix; + my $as_utf8 = encode('UTF-8', $string); + is(utf16_to_utf8(encode('UTF-16BE', $string)), $as_utf8, + "utf16_to_utf8 $name"); + is(utf16_to_utf8_reversed(encode('UTF-16LE', $string)), $as_utf8, + "utf16_to_utf8_reversed $name"); + } + } +} + +# Currently this is special-cased to work. Should it? + +is(utf16_to_utf8("\0"), "\0", 'Short string to utf16_to_utf8'); + +# But anything else is fatal + +my $got = eval {utf16_to_utf8('N')}; +like($@, qr/^panic: utf16_to_utf8: odd bytelen 1 at/, 'Odd byte length panics'); +is($got, undef, 'hence eval returns undef'); + +for (["\xD8\0\0\0", 'NULs'], + ["\xD8\0\xD8\0", '2 Lows'], + ) { + my ($malformed, $name) = @$_; + $got = eval {utf16_to_utf8($malformed)}; + like($@, qr/^Malformed UTF-16 surrogate at/, + "Malformed surrogate $name croaks for utf16_to_utf8"); + is($got, undef, 'hence eval returns undef'); + + $malformed =~ s/(.)(.)/$2$1/gs; + $got = eval {utf16_to_utf8_reversed($malformed)}; + like($@, qr/^Malformed UTF-16 surrogate at/, + "Malformed surrogate $name croaks for utf16_to_utf8_reversed"); + is($got, undef, 'hence eval returns undef'); +}