Commit | Line | Data |
30685b56 |
1 | #!perl -w |
2 | |
3 | use strict; |
4 | use Test::More 'no_plan'; |
5 | use Encode; |
6 | |
7 | use XS::APItest qw(utf16_to_utf8 utf16_to_utf8_reversed); |
8 | |
9 | for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0x10000) { |
10 | my $chr = chr $ord; |
11 | for my $prefix ('', "\0", 'Perl rules') { |
12 | for my $suffix ('', "\0", "Moo!") { |
13 | my $string = $prefix . $chr . $suffix; |
14 | my $name = sprintf "for chr $ord prefix %d, suffix %d", |
15 | length $prefix, length $suffix; |
16 | my $as_utf8 = encode('UTF-8', $string); |
17 | is(utf16_to_utf8(encode('UTF-16BE', $string)), $as_utf8, |
18 | "utf16_to_utf8 $name"); |
19 | is(utf16_to_utf8_reversed(encode('UTF-16LE', $string)), $as_utf8, |
20 | "utf16_to_utf8_reversed $name"); |
21 | } |
22 | } |
23 | } |
24 | |
25 | # Currently this is special-cased to work. Should it? |
26 | |
27 | is(utf16_to_utf8("\0"), "\0", 'Short string to utf16_to_utf8'); |
28 | |
29 | # But anything else is fatal |
30 | |
31 | my $got = eval {utf16_to_utf8('N')}; |
32 | like($@, qr/^panic: utf16_to_utf8: odd bytelen 1 at/, 'Odd byte length panics'); |
33 | is($got, undef, 'hence eval returns undef'); |
34 | |
35 | for (["\xD8\0\0\0", 'NULs'], |
36 | ["\xD8\0\xD8\0", '2 Lows'], |
dbde1951 |
37 | ["\xDC\0\0\0", 'High NUL'], |
38 | ["\xDC\0\xD8\0", 'High Low'], |
39 | ["\xDC\0\xDC\0", 'High High'], |
30685b56 |
40 | ) { |
41 | my ($malformed, $name) = @$_; |
42 | $got = eval {utf16_to_utf8($malformed)}; |
43 | like($@, qr/^Malformed UTF-16 surrogate at/, |
44 | "Malformed surrogate $name croaks for utf16_to_utf8"); |
45 | is($got, undef, 'hence eval returns undef'); |
46 | |
47 | $malformed =~ s/(.)(.)/$2$1/gs; |
48 | $got = eval {utf16_to_utf8_reversed($malformed)}; |
49 | like($@, qr/^Malformed UTF-16 surrogate at/, |
50 | "Malformed surrogate $name croaks for utf16_to_utf8_reversed"); |
51 | is($got, undef, 'hence eval returns undef'); |
52 | } |
e0ea5e2d |
53 | |
54 | my $in = "NA"; |
55 | $got = eval {utf16_to_utf8_reversed($in, 1)}; |
56 | like($@, qr/^panic: utf16_to_utf8_reversed: odd bytelen 1 at/, |
57 | 'Odd byte length panics'); |
58 | is($got, undef, 'hence eval returns undef'); |
59 | is($in, "NA", 'and input unchanged'); |
01ea242b |
60 | |
61 | $in = "\xD8\0\xDC\0"; |
62 | $got = eval {utf16_to_utf8($in, 2)}; |
63 | like($@, qr/^Malformed UTF-16 surrogate at/, 'Lone surrogate croaks'); |
64 | (ok(!defined $got, 'hence eval returns undef')) or |
65 | diag(join ', ', map {ord $_} split //, $got); |
66 | |