Re-write S_utf16_textfilter() to correctly handle partial reads of UTF-16.
[p5sagit/p5-mst-13.2.git] / ext / XS-APItest / t / utf16_to_utf8.t
CommitLineData
30685b56 1#!perl -w
2
3use strict;
4use Test::More 'no_plan';
5use Encode;
6
7use XS::APItest qw(utf16_to_utf8 utf16_to_utf8_reversed);
8
52b9aa85 9for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0xFFFD,
10 0x10000, 0x10FC00, 0x103FF, 0x10FFFD) {
30685b56 11 my $chr = chr $ord;
12 for my $prefix ('', "\0", 'Perl rules') {
13 for my $suffix ('', "\0", "Moo!") {
14 my $string = $prefix . $chr . $suffix;
15 my $name = sprintf "for chr $ord prefix %d, suffix %d",
16 length $prefix, length $suffix;
17 my $as_utf8 = encode('UTF-8', $string);
18 is(utf16_to_utf8(encode('UTF-16BE', $string)), $as_utf8,
19 "utf16_to_utf8 $name");
20 is(utf16_to_utf8_reversed(encode('UTF-16LE', $string)), $as_utf8,
21 "utf16_to_utf8_reversed $name");
22 }
23 }
24}
25
26# Currently this is special-cased to work. Should it?
27
28is(utf16_to_utf8("\0"), "\0", 'Short string to utf16_to_utf8');
29
30# But anything else is fatal
31
32my $got = eval {utf16_to_utf8('N')};
33like($@, qr/^panic: utf16_to_utf8: odd bytelen 1 at/, 'Odd byte length panics');
34is($got, undef, 'hence eval returns undef');
35
36for (["\xD8\0\0\0", 'NULs'],
37 ["\xD8\0\xD8\0", '2 Lows'],
dbde1951 38 ["\xDC\0\0\0", 'High NUL'],
39 ["\xDC\0\xD8\0", 'High Low'],
40 ["\xDC\0\xDC\0", 'High High'],
30685b56 41 ) {
42 my ($malformed, $name) = @$_;
43 $got = eval {utf16_to_utf8($malformed)};
44 like($@, qr/^Malformed UTF-16 surrogate at/,
45 "Malformed surrogate $name croaks for utf16_to_utf8");
46 is($got, undef, 'hence eval returns undef');
47
48 $malformed =~ s/(.)(.)/$2$1/gs;
49 $got = eval {utf16_to_utf8_reversed($malformed)};
50 like($@, qr/^Malformed UTF-16 surrogate at/,
51 "Malformed surrogate $name croaks for utf16_to_utf8_reversed");
52 is($got, undef, 'hence eval returns undef');
53}
e0ea5e2d 54
55my $in = "NA";
56$got = eval {utf16_to_utf8_reversed($in, 1)};
57like($@, qr/^panic: utf16_to_utf8_reversed: odd bytelen 1 at/,
58 'Odd byte length panics');
59is($got, undef, 'hence eval returns undef');
60is($in, "NA", 'and input unchanged');
01ea242b 61
62$in = "\xD8\0\xDC\0";
63$got = eval {utf16_to_utf8($in, 2)};
64like($@, qr/^Malformed UTF-16 surrogate at/, 'Lone surrogate croaks');
65(ok(!defined $got, 'hence eval returns undef')) or
66 diag(join ', ', map {ord $_} split //, $got);
67