Commit | Line | Data |
26b787bb |
1 | # |
aae85ceb |
2 | # $Id: Unicode.t,v 1.7 2002/04/14 22:05:20 dankogai Exp $ |
26b787bb |
3 | # |
85d9270e |
4 | # This script is written entirely in ASCII, even though quoted literals |
5 | # do include non-BMP unicode characters -- Are you happy, jhi? |
26b787bb |
6 | # |
7 | |
aae85ceb |
8 | our $ON_EBCDIC; |
26b787bb |
9 | BEGIN { |
c731e18e |
10 | require Config; import Config; |
11 | if ($Config{'extensions'} !~ /\bEncode\b/) { |
12 | print "1..0 # Skip: Encode was not built\n"; |
13 | exit 0; |
14 | } |
26b787bb |
15 | # should work without perlio |
16 | # unless (find PerlIO::Layer 'perlio') { |
c731e18e |
17 | # print "1..0 # Skip: PerlIO was not built\n"; |
18 | # exit 0; |
26b787bb |
19 | # } |
aae85ceb |
20 | |
77ea6967 |
21 | # should work on EBCDIC |
22 | # if (ord("A") == 193) { |
23 | # print "1..0 # Skip: EBCDIC\n"; |
24 | # exit 0; |
25 | # } |
aae85ceb |
26 | $ON_EBCDIC = (ord("A") == 193) || $ARGV[0]; |
c731e18e |
27 | $| = 1; |
26b787bb |
28 | } |
29 | |
30 | use strict; |
31 | #use Test::More 'no_plan'; |
aae85ceb |
32 | use Test::More tests => 30; |
26b787bb |
33 | use Encode qw(encode decode); |
26b787bb |
34 | |
35 | # |
36 | # see |
37 | # http://www.unicode.org/unicode/reports/tr19/ |
38 | # |
39 | |
77ea6967 |
40 | my $dankogai = "\x{5c0f}\x{98fc}\x{3000}\x{5f3e}"; |
41 | my $nasty = "$dankogai\x{1abcd}"; |
42 | my $fallback = "$dankogai\x{fffd}"; |
26b787bb |
43 | |
44 | #hi: (0x1abcd - 0x10000) / 0x400 + 0xD800 = 0xd82a |
45 | #lo: (0x1abcd - 0x10000) % 0x400 + 0xDC00 = 0xdfcd |
46 | |
c731e18e |
47 | my $n_16be = |
77ea6967 |
48 | pack("C*", map {hex($_)} qw<5c 0f 98 fc 30 00 5f 3e d8 2a df cd>); |
26b787bb |
49 | my $n_16le = |
77ea6967 |
50 | pack("C*", map {hex($_)} qw<0f 5c fc 98 00 30 3e 5f 2a d8 cd df>); |
c731e18e |
51 | my $f_16be = |
77ea6967 |
52 | pack("C*", map {hex($_)} qw<5c 0f 98 fc 30 00 5f 3e ff fd>); |
26b787bb |
53 | my $f_16le = |
77ea6967 |
54 | pack("C*", map {hex($_)} qw<0f 5c fc 98 00 30 3e 5f fd ff>); |
55 | my $n_32be = |
56 | pack("C*", map {hex($_)} |
57 | qw<00 00 5c 0f 00 00 98 fc 00 00 30 00 00 00 5f 3e 00 01 ab cd>); |
c731e18e |
58 | my $n_32le = |
77ea6967 |
59 | pack("C*", map {hex($_)} |
60 | qw<0f 5c 00 00 fc 98 00 00 00 30 00 00 3e 5f 00 00 cd ab 01 00>); |
26b787bb |
61 | |
fdd579e2 |
62 | my $n_16bb = pack('n', 0xFeFF) . $n_16be; |
63 | my $n_16lb = pack('v', 0xFeFF) . $n_16le; |
64 | my $n_32bb = pack('N', 0xFeFF) . $n_32be; |
65 | my $n_32lb = pack('V', 0xFeFF) . $n_32le; |
26b787bb |
66 | |
67 | is($n_16be, encode('UTF-16BE', $nasty), qq{encode UTF-16BE}); |
68 | is($n_16le, encode('UTF-16LE', $nasty), qq{encode UTF-16LE}); |
69 | is($n_32be, encode('UTF-32BE', $nasty), qq{encode UTF-32BE}); |
70 | is($n_32le, encode('UTF-32LE', $nasty), qq{encode UTF-16LE}); |
71 | |
72 | is($nasty, decode('UTF-16BE', $n_16be), qq{decode UTF-16BE}); |
73 | is($nasty, decode('UTF-16LE', $n_16le), qq{decode UTF-16LE}); |
74 | is($nasty, decode('UTF-32BE', $n_32be), qq{decode UTF-32BE}); |
75 | is($nasty, decode('UTF-32LE', $n_32le), qq{decode UTF-32LE}); |
76 | |
77 | is($n_16bb, encode('UTF-16', $nasty), qq{encode UTF-16}); |
78 | is($n_32bb, encode('UTF-32', $nasty), qq{encode UTF-32}); |
79 | is($nasty, decode('UTF-16', $n_16bb), qq{decode UTF-16, bom=be}); |
80 | is($nasty, decode('UTF-16', $n_16lb), qq{decode UTF-16, bom=le}); |
81 | is($nasty, decode('UTF-32', $n_32bb), qq{decode UTF-32, bom=be}); |
82 | is($nasty, decode('UTF-32', $n_32lb), qq{decode UTF-32, bom=le}); |
83 | |
84 | is(decode('UCS-2BE', $n_16be), $fallback, "decode UCS-2BE: fallback"); |
85 | is(decode('UCS-2LE', $n_16le), $fallback, "decode UCS-2LE: fallback"); |
c731e18e |
86 | eval { decode('UCS-2BE', $n_16be, 1) }; |
aae85ceb |
87 | is (index($@,'UCS-2BE:'), 0, "decode UCS-2BE: exception"); |
88 | eval { decode('UCS-2LE', $n_16le, 1) }; |
89 | is (index($@,'UCS-2LE:'), 0, "decode UCS-2LE: exception"); |
26b787bb |
90 | is(encode('UCS-2BE', $nasty), $f_16be, "encode UCS-2BE: fallback"); |
91 | is(encode('UCS-2LE', $nasty), $f_16le, "encode UCS-2LE: fallback"); |
c731e18e |
92 | eval { encode('UCS-2BE', $nasty, 1) }; |
aae85ceb |
93 | is(index($@, 'UCS-2BE'), 0, "encode UCS-2BE: exception"); |
c731e18e |
94 | eval { encode('UCS-2LE', $nasty, 1) }; |
aae85ceb |
95 | is(index($@, 'UCS-2LE'), 0, "encode UCS-2LE: exception"); |
96 | |
97 | # |
98 | # SvGROW test for (en|de)code_xs |
99 | # |
100 | SKIP: { |
101 | skip "Not on EBCDIC", 8 if $ON_EBCDIC; |
102 | my $utf8 = ''; |
103 | for my $j (0,0x10){ |
104 | for my $i (0..0xffff){ |
105 | $j == 0 and (0xD800 <= $i && $i <= 0xDFFF) and next; |
106 | $utf8 .= ord($j+$i); |
107 | } |
108 | my $len = length($utf8); |
109 | for my $major ('UTF-16', 'UTF-32'){ |
110 | for my $minor ('BE', 'LE'){ |
111 | my $enc = $major.$minor; |
112 | is(decode($enc, encode($enc, $utf8)), $utf8, "$enc RT ($len)"); |
113 | } |
114 | } |
115 | } |
116 | }; |
117 | |
26b787bb |
118 | |
119 | 1; |
120 | __END__ |