Commit | Line | Data |
30685b56 |
1 | #!perl -w |
2 | |
3 | use strict; |
4 | use Test::More 'no_plan'; |
5 | use Encode; |
6 | |
7 | use XS::APItest qw(utf16_to_utf8 utf16_to_utf8_reversed); |
8 | |
9 | for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0x10000) { |
10 | my $chr = chr $ord; |
11 | for my $prefix ('', "\0", 'Perl rules') { |
12 | for my $suffix ('', "\0", "Moo!") { |
13 | my $string = $prefix . $chr . $suffix; |
14 | my $name = sprintf "for chr $ord prefix %d, suffix %d", |
15 | length $prefix, length $suffix; |
16 | my $as_utf8 = encode('UTF-8', $string); |
17 | is(utf16_to_utf8(encode('UTF-16BE', $string)), $as_utf8, |
18 | "utf16_to_utf8 $name"); |
19 | is(utf16_to_utf8_reversed(encode('UTF-16LE', $string)), $as_utf8, |
20 | "utf16_to_utf8_reversed $name"); |
21 | } |
22 | } |
23 | } |
24 | |
25 | # Currently this is special-cased to work. Should it? |
26 | |
27 | is(utf16_to_utf8("\0"), "\0", 'Short string to utf16_to_utf8'); |
28 | |
29 | # But anything else is fatal |
30 | |
31 | my $got = eval {utf16_to_utf8('N')}; |
32 | like($@, qr/^panic: utf16_to_utf8: odd bytelen 1 at/, 'Odd byte length panics'); |
33 | is($got, undef, 'hence eval returns undef'); |
34 | |
35 | for (["\xD8\0\0\0", 'NULs'], |
36 | ["\xD8\0\xD8\0", '2 Lows'], |
37 | ) { |
38 | my ($malformed, $name) = @$_; |
39 | $got = eval {utf16_to_utf8($malformed)}; |
40 | like($@, qr/^Malformed UTF-16 surrogate at/, |
41 | "Malformed surrogate $name croaks for utf16_to_utf8"); |
42 | is($got, undef, 'hence eval returns undef'); |
43 | |
44 | $malformed =~ s/(.)(.)/$2$1/gs; |
45 | $got = eval {utf16_to_utf8_reversed($malformed)}; |
46 | like($@, qr/^Malformed UTF-16 surrogate at/, |
47 | "Malformed surrogate $name croaks for utf16_to_utf8_reversed"); |
48 | is($got, undef, 'hence eval returns undef'); |
49 | } |
e0ea5e2d |
50 | |
51 | my $in = "NA"; |
52 | $got = eval {utf16_to_utf8_reversed($in, 1)}; |
53 | like($@, qr/^panic: utf16_to_utf8_reversed: odd bytelen 1 at/, |
54 | 'Odd byte length panics'); |
55 | is($got, undef, 'hence eval returns undef'); |
56 | is($in, "NA", 'and input unchanged'); |
01ea242b |
57 | |
58 | $in = "\xD8\0\xDC\0"; |
59 | $got = eval {utf16_to_utf8($in, 2)}; |
60 | like($@, qr/^Malformed UTF-16 surrogate at/, 'Lone surrogate croaks'); |
61 | (ok(!defined $got, 'hence eval returns undef')) or |
62 | diag(join ', ', map {ord $_} split //, $got); |
63 | |