Commit | Line | Data |
26b787bb |
1 | # |
7237418a |
2 | # $Id: Unicode.t,v 2.0 2004/05/16 20:55:17 dankogai Exp $ |
26b787bb |
3 | # |
85d9270e |
4 | # This script is written entirely in ASCII, even though quoted literals |
5 | # do include non-BMP unicode characters -- Are you happy, jhi? |
26b787bb |
6 | # |
7 | |
26b787bb |
8 | BEGIN { |
c731e18e |
9 | require Config; import Config; |
10 | if ($Config{'extensions'} !~ /\bEncode\b/) { |
11 | print "1..0 # Skip: Encode was not built\n"; |
12 | exit 0; |
13 | } |
cff1be36 |
14 | if (ord("A") == 193) { |
15 | print "1..0 # Skip: EBCDIC\n"; |
16 | exit 0; |
17 | } |
c731e18e |
18 | $| = 1; |
26b787bb |
19 | } |
20 | |
21 | use strict; |
22 | #use Test::More 'no_plan'; |
1485817e |
23 | use Test::More tests => 37; |
26b787bb |
24 | use Encode qw(encode decode); |
26b787bb |
25 | |
26 | # |
27 | # see |
28 | # http://www.unicode.org/unicode/reports/tr19/ |
29 | # |
30 | |
77ea6967 |
31 | my $dankogai = "\x{5c0f}\x{98fc}\x{3000}\x{5f3e}"; |
32 | my $nasty = "$dankogai\x{1abcd}"; |
33 | my $fallback = "$dankogai\x{fffd}"; |
26b787bb |
34 | |
35 | #hi: (0x1abcd - 0x10000) / 0x400 + 0xD800 = 0xd82a |
36 | #lo: (0x1abcd - 0x10000) % 0x400 + 0xDC00 = 0xdfcd |
37 | |
c731e18e |
38 | my $n_16be = |
77ea6967 |
39 | pack("C*", map {hex($_)} qw<5c 0f 98 fc 30 00 5f 3e d8 2a df cd>); |
26b787bb |
40 | my $n_16le = |
77ea6967 |
41 | pack("C*", map {hex($_)} qw<0f 5c fc 98 00 30 3e 5f 2a d8 cd df>); |
c731e18e |
42 | my $f_16be = |
77ea6967 |
43 | pack("C*", map {hex($_)} qw<5c 0f 98 fc 30 00 5f 3e ff fd>); |
26b787bb |
44 | my $f_16le = |
77ea6967 |
45 | pack("C*", map {hex($_)} qw<0f 5c fc 98 00 30 3e 5f fd ff>); |
46 | my $n_32be = |
47 | pack("C*", map {hex($_)} |
48 | qw<00 00 5c 0f 00 00 98 fc 00 00 30 00 00 00 5f 3e 00 01 ab cd>); |
c731e18e |
49 | my $n_32le = |
77ea6967 |
50 | pack("C*", map {hex($_)} |
51 | qw<0f 5c 00 00 fc 98 00 00 00 30 00 00 3e 5f 00 00 cd ab 01 00>); |
26b787bb |
52 | |
fdd579e2 |
53 | my $n_16bb = pack('n', 0xFeFF) . $n_16be; |
54 | my $n_16lb = pack('v', 0xFeFF) . $n_16le; |
55 | my $n_32bb = pack('N', 0xFeFF) . $n_32be; |
56 | my $n_32lb = pack('V', 0xFeFF) . $n_32le; |
26b787bb |
57 | |
58 | is($n_16be, encode('UTF-16BE', $nasty), qq{encode UTF-16BE}); |
59 | is($n_16le, encode('UTF-16LE', $nasty), qq{encode UTF-16LE}); |
60 | is($n_32be, encode('UTF-32BE', $nasty), qq{encode UTF-32BE}); |
61 | is($n_32le, encode('UTF-32LE', $nasty), qq{encode UTF-16LE}); |
62 | |
63 | is($nasty, decode('UTF-16BE', $n_16be), qq{decode UTF-16BE}); |
64 | is($nasty, decode('UTF-16LE', $n_16le), qq{decode UTF-16LE}); |
65 | is($nasty, decode('UTF-32BE', $n_32be), qq{decode UTF-32BE}); |
66 | is($nasty, decode('UTF-32LE', $n_32le), qq{decode UTF-32LE}); |
67 | |
68 | is($n_16bb, encode('UTF-16', $nasty), qq{encode UTF-16}); |
69 | is($n_32bb, encode('UTF-32', $nasty), qq{encode UTF-32}); |
70 | is($nasty, decode('UTF-16', $n_16bb), qq{decode UTF-16, bom=be}); |
71 | is($nasty, decode('UTF-16', $n_16lb), qq{decode UTF-16, bom=le}); |
72 | is($nasty, decode('UTF-32', $n_32bb), qq{decode UTF-32, bom=be}); |
73 | is($nasty, decode('UTF-32', $n_32lb), qq{decode UTF-32, bom=le}); |
74 | |
75 | is(decode('UCS-2BE', $n_16be), $fallback, "decode UCS-2BE: fallback"); |
76 | is(decode('UCS-2LE', $n_16le), $fallback, "decode UCS-2LE: fallback"); |
c731e18e |
77 | eval { decode('UCS-2BE', $n_16be, 1) }; |
aae85ceb |
78 | is (index($@,'UCS-2BE:'), 0, "decode UCS-2BE: exception"); |
79 | eval { decode('UCS-2LE', $n_16le, 1) }; |
80 | is (index($@,'UCS-2LE:'), 0, "decode UCS-2LE: exception"); |
26b787bb |
81 | is(encode('UCS-2BE', $nasty), $f_16be, "encode UCS-2BE: fallback"); |
82 | is(encode('UCS-2LE', $nasty), $f_16le, "encode UCS-2LE: fallback"); |
c731e18e |
83 | eval { encode('UCS-2BE', $nasty, 1) }; |
aae85ceb |
84 | is(index($@, 'UCS-2BE'), 0, "encode UCS-2BE: exception"); |
c731e18e |
85 | eval { encode('UCS-2LE', $nasty, 1) }; |
aae85ceb |
86 | is(index($@, 'UCS-2LE'), 0, "encode UCS-2LE: exception"); |
87 | |
88 | # |
89 | # SvGROW test for (en|de)code_xs |
90 | # |
91 | SKIP: { |
aae85ceb |
92 | my $utf8 = ''; |
93 | for my $j (0,0x10){ |
94 | for my $i (0..0xffff){ |
95 | $j == 0 and (0xD800 <= $i && $i <= 0xDFFF) and next; |
96 | $utf8 .= ord($j+$i); |
97 | } |
aae85ceb |
98 | for my $major ('UTF-16', 'UTF-32'){ |
99 | for my $minor ('BE', 'LE'){ |
100 | my $enc = $major.$minor; |
621b0f8d |
101 | is(decode($enc, encode($enc, $utf8)), $utf8, "$enc RT"); |
aae85ceb |
102 | } |
103 | } |
104 | } |
105 | }; |
106 | |
1485817e |
107 | # |
108 | # CJKT vs. UTF-7 |
109 | # |
26b787bb |
110 | |
1485817e |
111 | use File::Spec; |
112 | use File::Basename; |
113 | |
114 | my $dir = dirname(__FILE__); |
115 | opendir my $dh, $dir or die "$dir:$!"; |
116 | my @file = sort grep {/\.utf$/o} readdir $dh; |
117 | closedir $dh; |
118 | for my $file (@file){ |
119 | my $path = File::Spec->catfile($dir, $file); |
67e51b56 |
120 | open my $fh, '<', $path or die "$path:$!"; |
121 | my $content; |
fcc05d01 |
122 | if (PerlIO::Layer->find('perlio')){ |
03871ea6 |
123 | binmode $fh => ':utf8'; |
124 | $content = join('' => <$fh>); |
67e51b56 |
125 | }else{ # ugh! |
03871ea6 |
126 | binmode $fh; |
127 | $content = join('' => <$fh>); |
128 | Encode::_utf8_on($content) |
fcc05d01 |
129 | } |
1485817e |
130 | close $fh; |
131 | is(decode("UTF-7", encode("UTF-7", $content)), $content, |
132 | "UTF-7 RT:$file"); |
133 | } |
26b787bb |
134 | 1; |
135 | __END__ |