Commit | Line | Data |
30685b56 |
1 | #!perl -w |
2 | |
3 | use strict; |
4 | use Test::More 'no_plan'; |
5 | use Encode; |
6 | |
7 | use XS::APItest qw(utf16_to_utf8 utf16_to_utf8_reversed); |
8 | |
52b9aa85 |
9 | for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0xFFFD, |
10 | 0x10000, 0x10FC00, 0x103FF, 0x10FFFD) { |
30685b56 |
11 | my $chr = chr $ord; |
12 | for my $prefix ('', "\0", 'Perl rules') { |
13 | for my $suffix ('', "\0", "Moo!") { |
14 | my $string = $prefix . $chr . $suffix; |
15 | my $name = sprintf "for chr $ord prefix %d, suffix %d", |
16 | length $prefix, length $suffix; |
17 | my $as_utf8 = encode('UTF-8', $string); |
18 | is(utf16_to_utf8(encode('UTF-16BE', $string)), $as_utf8, |
19 | "utf16_to_utf8 $name"); |
20 | is(utf16_to_utf8_reversed(encode('UTF-16LE', $string)), $as_utf8, |
21 | "utf16_to_utf8_reversed $name"); |
22 | } |
23 | } |
24 | } |
25 | |
26 | # Currently this is special-cased to work. Should it? |
27 | |
28 | is(utf16_to_utf8("\0"), "\0", 'Short string to utf16_to_utf8'); |
29 | |
30 | # But anything else is fatal |
31 | |
32 | my $got = eval {utf16_to_utf8('N')}; |
33 | like($@, qr/^panic: utf16_to_utf8: odd bytelen 1 at/, 'Odd byte length panics'); |
34 | is($got, undef, 'hence eval returns undef'); |
35 | |
36 | for (["\xD8\0\0\0", 'NULs'], |
37 | ["\xD8\0\xD8\0", '2 Lows'], |
dbde1951 |
38 | ["\xDC\0\0\0", 'High NUL'], |
39 | ["\xDC\0\xD8\0", 'High Low'], |
40 | ["\xDC\0\xDC\0", 'High High'], |
30685b56 |
41 | ) { |
42 | my ($malformed, $name) = @$_; |
43 | $got = eval {utf16_to_utf8($malformed)}; |
44 | like($@, qr/^Malformed UTF-16 surrogate at/, |
45 | "Malformed surrogate $name croaks for utf16_to_utf8"); |
46 | is($got, undef, 'hence eval returns undef'); |
47 | |
48 | $malformed =~ s/(.)(.)/$2$1/gs; |
49 | $got = eval {utf16_to_utf8_reversed($malformed)}; |
50 | like($@, qr/^Malformed UTF-16 surrogate at/, |
51 | "Malformed surrogate $name croaks for utf16_to_utf8_reversed"); |
52 | is($got, undef, 'hence eval returns undef'); |
53 | } |
e0ea5e2d |
54 | |
55 | my $in = "NA"; |
56 | $got = eval {utf16_to_utf8_reversed($in, 1)}; |
57 | like($@, qr/^panic: utf16_to_utf8_reversed: odd bytelen 1 at/, |
58 | 'Odd byte length panics'); |
59 | is($got, undef, 'hence eval returns undef'); |
60 | is($in, "NA", 'and input unchanged'); |
01ea242b |
61 | |
62 | $in = "\xD8\0\xDC\0"; |
63 | $got = eval {utf16_to_utf8($in, 2)}; |
64 | like($@, qr/^Malformed UTF-16 surrogate at/, 'Lone surrogate croaks'); |
65 | (ok(!defined $got, 'hence eval returns undef')) or |
66 | diag(join ', ', map {ord $_} split //, $got); |
67 | |