Expose utf16_to_utf8{,reversed} via XS::APItest, and provide some basic tests.
Nicholas Clark [Sun, 18 Oct 2009 20:06:06 +0000 (21:06 +0100)]
MANIFEST
ext/XS-APItest/APItest.pm
ext/XS-APItest/APItest.xs
ext/XS-APItest/t/utf16_to_utf8.t [new file with mode: 0644]

index 8b0f5d8..7d247df 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -3204,6 +3204,7 @@ ext/XS-APItest/t/push.t           XS::APItest extension
 ext/XS-APItest/t/rmagical.t    XS::APItest extension
 ext/XS-APItest/t/svpeek.t      XS::APItest extension
 ext/XS-APItest/t/svsetsv.t     Test behaviour of sv_setsv with/without PERL_CORE
+ext/XS-APItest/t/utf16_to_utf8.t       Test behaviour of utf16_to_utf8{,reversed}
 ext/XS-APItest/t/xs_special_subs_require.t     for require too
 ext/XS-APItest/t/xs_special_subs.t     Test that XS BEGIN/CHECK/INIT/END work
 ext/XS-Typemap/Makefile.PL     XS::Typemap extension
index 12d0a03..c40e4b8 100644 (file)
@@ -23,10 +23,10 @@ our @EXPORT = qw( print_double print_int print_long
                  my_cxt_getint my_cxt_getsv my_cxt_setint my_cxt_setsv
                  sv_setsv_cow_hashkey_core sv_setsv_cow_hashkey_notcore
                  rmagical_cast rmagical_flags
-                 DPeek
+                 DPeek utf16_to_utf8 utf16_to_utf8_reversed
 );
 
-our $VERSION = '0.15';
+our $VERSION = '0.16';
 
 use vars '$WARNINGS_ON_BOOTSTRAP';
 use vars map "\$${_}_called_PP", qw(BEGIN UNITCHECK CHECK INIT END);
index 7e7f78b..4eac4a6 100644 (file)
@@ -892,3 +892,32 @@ void
 END()
     CODE:
        sv_inc(get_sv("XS::APItest::END_called", GV_ADD|GV_ADDMULTI));
+
+void
+utf16_to_utf8 (sv, ...)
+    SV* sv
+       ALIAS:
+           utf16_to_utf8_reversed = 1
+    PREINIT:
+        STRLEN len;
+       U8 *source;
+       SV *dest;
+       I32 got; /* Gah, badly thought out APIs */
+    CODE:
+       source = (U8 *)SvPVbyte(sv, len);
+       /* Optionally only convert part of the buffer.  */      
+       if (items > 1) {
+           len = SvUV(ST(1));
+       }
+       /* Mortalise this right now, as we'll be testing croak()s  */
+       dest = sv_2mortal(newSV(len * 3 / 2 + 1));
+       if (ix) {
+           utf16_to_utf8_reversed(source, SvPVX(dest), len, &got);
+       } else {
+           utf16_to_utf8(source, SvPVX(dest), len, &got);
+       }
+       SvCUR_set(dest, got);
+       SvPVX(dest)[got] = '\0';
+       SvPOK_on(dest);
+       ST(0) = dest;
+       XSRETURN(1);
diff --git a/ext/XS-APItest/t/utf16_to_utf8.t b/ext/XS-APItest/t/utf16_to_utf8.t
new file mode 100644 (file)
index 0000000..3da3d7d
--- /dev/null
@@ -0,0 +1,49 @@
+#!perl -w
+
+use strict;
+use Test::More 'no_plan';
+use Encode;
+
+use XS::APItest qw(utf16_to_utf8 utf16_to_utf8_reversed);
+
+for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0x10000) {
+    my $chr = chr $ord;
+    for my $prefix ('', "\0", 'Perl rules') {
+       for my $suffix ('', "\0", "Moo!") {
+           my $string = $prefix . $chr . $suffix;
+           my $name = sprintf "for chr $ord prefix %d, suffix %d",
+               length $prefix, length $suffix;
+           my $as_utf8 = encode('UTF-8', $string);
+           is(utf16_to_utf8(encode('UTF-16BE', $string)), $as_utf8,
+              "utf16_to_utf8 $name");
+           is(utf16_to_utf8_reversed(encode('UTF-16LE', $string)), $as_utf8,
+              "utf16_to_utf8_reversed $name");
+       }
+    }
+}
+
+# Currently this is special-cased to work. Should it?
+
+is(utf16_to_utf8("\0"), "\0", 'Short string to utf16_to_utf8');
+
+# But anything else is fatal
+
+my $got = eval {utf16_to_utf8('N')};
+like($@, qr/^panic: utf16_to_utf8: odd bytelen 1 at/, 'Odd byte length panics');
+is($got, undef, 'hence eval returns undef');
+
+for (["\xD8\0\0\0", 'NULs'],
+     ["\xD8\0\xD8\0", '2 Lows'],
+    ) {
+    my ($malformed, $name) = @$_;
+    $got = eval {utf16_to_utf8($malformed)};
+    like($@, qr/^Malformed UTF-16 surrogate at/,
+        "Malformed surrogate $name croaks for utf16_to_utf8");
+    is($got, undef, 'hence eval returns undef');
+
+    $malformed =~ s/(.)(.)/$2$1/gs;
+    $got = eval {utf16_to_utf8_reversed($malformed)};
+    like($@, qr/^Malformed UTF-16 surrogate at/,
+        "Malformed surrogate $name croaks for utf16_to_utf8_reversed");
+    is($got, undef, 'hence eval returns undef');
+}