Commit | Line | Data |
26b787bb |
1 | # |
b2704119 |
2 | # $Id: Unicode.t,v 1.8 2002/04/16 23:35:00 dankogai Exp $ |
26b787bb |
3 | # |
85d9270e |
4 | # This script is written entirely in ASCII, even though quoted literals |
5 | # do include non-BMP unicode characters -- Are you happy, jhi? |
26b787bb |
6 | # |
7 | |
aae85ceb |
8 | our $ON_EBCDIC; |
26b787bb |
9 | BEGIN { |
c731e18e |
10 | require Config; import Config; |
11 | if ($Config{'extensions'} !~ /\bEncode\b/) { |
12 | print "1..0 # Skip: Encode was not built\n"; |
13 | exit 0; |
14 | } |
aae85ceb |
15 | $ON_EBCDIC = (ord("A") == 193) || $ARGV[0]; |
c731e18e |
16 | $| = 1; |
26b787bb |
17 | } |
18 | |
19 | use strict; |
20 | #use Test::More 'no_plan'; |
aae85ceb |
21 | use Test::More tests => 30; |
26b787bb |
22 | use Encode qw(encode decode); |
26b787bb |
23 | |
24 | # |
25 | # see |
26 | # http://www.unicode.org/unicode/reports/tr19/ |
27 | # |
28 | |
77ea6967 |
29 | my $dankogai = "\x{5c0f}\x{98fc}\x{3000}\x{5f3e}"; |
30 | my $nasty = "$dankogai\x{1abcd}"; |
31 | my $fallback = "$dankogai\x{fffd}"; |
26b787bb |
32 | |
33 | #hi: (0x1abcd - 0x10000) / 0x400 + 0xD800 = 0xd82a |
34 | #lo: (0x1abcd - 0x10000) % 0x400 + 0xDC00 = 0xdfcd |
35 | |
c731e18e |
36 | my $n_16be = |
77ea6967 |
37 | pack("C*", map {hex($_)} qw<5c 0f 98 fc 30 00 5f 3e d8 2a df cd>); |
26b787bb |
38 | my $n_16le = |
77ea6967 |
39 | pack("C*", map {hex($_)} qw<0f 5c fc 98 00 30 3e 5f 2a d8 cd df>); |
c731e18e |
40 | my $f_16be = |
77ea6967 |
41 | pack("C*", map {hex($_)} qw<5c 0f 98 fc 30 00 5f 3e ff fd>); |
26b787bb |
42 | my $f_16le = |
77ea6967 |
43 | pack("C*", map {hex($_)} qw<0f 5c fc 98 00 30 3e 5f fd ff>); |
44 | my $n_32be = |
45 | pack("C*", map {hex($_)} |
46 | qw<00 00 5c 0f 00 00 98 fc 00 00 30 00 00 00 5f 3e 00 01 ab cd>); |
c731e18e |
47 | my $n_32le = |
77ea6967 |
48 | pack("C*", map {hex($_)} |
49 | qw<0f 5c 00 00 fc 98 00 00 00 30 00 00 3e 5f 00 00 cd ab 01 00>); |
26b787bb |
50 | |
fdd579e2 |
51 | my $n_16bb = pack('n', 0xFeFF) . $n_16be; |
52 | my $n_16lb = pack('v', 0xFeFF) . $n_16le; |
53 | my $n_32bb = pack('N', 0xFeFF) . $n_32be; |
54 | my $n_32lb = pack('V', 0xFeFF) . $n_32le; |
26b787bb |
55 | |
56 | is($n_16be, encode('UTF-16BE', $nasty), qq{encode UTF-16BE}); |
57 | is($n_16le, encode('UTF-16LE', $nasty), qq{encode UTF-16LE}); |
58 | is($n_32be, encode('UTF-32BE', $nasty), qq{encode UTF-32BE}); |
59 | is($n_32le, encode('UTF-32LE', $nasty), qq{encode UTF-16LE}); |
60 | |
61 | is($nasty, decode('UTF-16BE', $n_16be), qq{decode UTF-16BE}); |
62 | is($nasty, decode('UTF-16LE', $n_16le), qq{decode UTF-16LE}); |
63 | is($nasty, decode('UTF-32BE', $n_32be), qq{decode UTF-32BE}); |
64 | is($nasty, decode('UTF-32LE', $n_32le), qq{decode UTF-32LE}); |
65 | |
66 | is($n_16bb, encode('UTF-16', $nasty), qq{encode UTF-16}); |
67 | is($n_32bb, encode('UTF-32', $nasty), qq{encode UTF-32}); |
68 | is($nasty, decode('UTF-16', $n_16bb), qq{decode UTF-16, bom=be}); |
69 | is($nasty, decode('UTF-16', $n_16lb), qq{decode UTF-16, bom=le}); |
70 | is($nasty, decode('UTF-32', $n_32bb), qq{decode UTF-32, bom=be}); |
71 | is($nasty, decode('UTF-32', $n_32lb), qq{decode UTF-32, bom=le}); |
72 | |
73 | is(decode('UCS-2BE', $n_16be), $fallback, "decode UCS-2BE: fallback"); |
74 | is(decode('UCS-2LE', $n_16le), $fallback, "decode UCS-2LE: fallback"); |
c731e18e |
75 | eval { decode('UCS-2BE', $n_16be, 1) }; |
aae85ceb |
76 | is (index($@,'UCS-2BE:'), 0, "decode UCS-2BE: exception"); |
77 | eval { decode('UCS-2LE', $n_16le, 1) }; |
78 | is (index($@,'UCS-2LE:'), 0, "decode UCS-2LE: exception"); |
26b787bb |
79 | is(encode('UCS-2BE', $nasty), $f_16be, "encode UCS-2BE: fallback"); |
80 | is(encode('UCS-2LE', $nasty), $f_16le, "encode UCS-2LE: fallback"); |
c731e18e |
81 | eval { encode('UCS-2BE', $nasty, 1) }; |
aae85ceb |
82 | is(index($@, 'UCS-2BE'), 0, "encode UCS-2BE: exception"); |
c731e18e |
83 | eval { encode('UCS-2LE', $nasty, 1) }; |
aae85ceb |
84 | is(index($@, 'UCS-2LE'), 0, "encode UCS-2LE: exception"); |
85 | |
86 | # |
87 | # SvGROW test for (en|de)code_xs |
88 | # |
89 | SKIP: { |
90 | skip "Not on EBCDIC", 8 if $ON_EBCDIC; |
91 | my $utf8 = ''; |
92 | for my $j (0,0x10){ |
93 | for my $i (0..0xffff){ |
94 | $j == 0 and (0xD800 <= $i && $i <= 0xDFFF) and next; |
95 | $utf8 .= ord($j+$i); |
96 | } |
97 | my $len = length($utf8); |
98 | for my $major ('UTF-16', 'UTF-32'){ |
99 | for my $minor ('BE', 'LE'){ |
100 | my $enc = $major.$minor; |
101 | is(decode($enc, encode($enc, $utf8)), $utf8, "$enc RT ($len)"); |
102 | } |
103 | } |
104 | } |
105 | }; |
106 | |
26b787bb |
107 | |
108 | 1; |
109 | __END__ |