Commit | Line | Data |
71a18b0f |
1 | BEGIN { |
2 | chdir 't' if -d 't'; |
3 | @INC = '../lib'; |
4 | require Config; import Config; |
5 | if ($Config{'extensions'} !~ /\bEncode\b/) { |
6 | print "1..0 # Skip: Encode was not built\n"; |
7 | exit 0; |
8 | } |
9 | } |
10 | use Test; |
11 | use Encode qw(encode decode); |
12 | use Encode::Tcl; |
13 | |
14 | my @encodings = qw(euc-cn euc-jp euc-kr big5 shiftjis); # CJK |
15 | my $n = 2; |
16 | |
17 | my %greek = ( |
18 | 'euc-cn' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8], |
19 | 'euc-jp' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8], |
20 | 'euc-kr' => [0xA5C1..0xA5D8,0xA5E1..0xA5F8], |
21 | 'big5' => [0xA344..0xA35B,0xA35C..0xA373], |
22 | 'shiftjis' => [0x839F..0x83B6,0x83BF..0x83D6], |
23 | 'utf8' => [0x0391..0x03A1,0x03A3..0x03A9,0x03B1..0x03C1,0x03C3..0x03C9], |
24 | ); |
25 | my @greek = qw( |
26 | ALPHA BETA GAMMA DELTA EPSILON ZETA ETA |
27 | THETA IOTA KAPPA LAMBDA MU NU XI OMICRON |
28 | PI RHO SIGMA TAU UPSILON PHI CHI PSI OMEGA |
29 | alpha beta gamma delta epsilon zeta eta |
30 | theta iota kappa lambda mu nu xi omicron |
31 | pi rho sigma tau upsilon phi chi psi omega |
32 | ); |
33 | |
34 | my %ideodigit = ( # cjk ideograph 'one' to 'ten' |
35 | 'euc-cn' => [qw(d2bb b6fe c8fd cbc4 cee5 c1f9 c6df b0cb bec5 caae)], |
36 | 'euc-jp' => [qw(b0ec c6f3 bbb0 bbcd b8de cfbb bcb7 c8ac b6e5 bdbd)], |
37 | 'euc-kr' => [qw(ece9 eca3 dfb2 decc e7e9 d7bf f6d2 f8a2 cefa e4a8)], |
38 | 'big5' => [qw(a440 a447 a454 a57c a4ad a4bb a443 a44b a445 a451)], |
39 | 'shiftjis' => [qw(88ea 93f1 8e4f 8e6c 8cdc 985a 8eb5 94aa 8be3 8f5c)], |
40 | 'utf8' => [qw(4e00 4e8c 4e09 56db 4e94 516d 4e03 516b 4e5d 5341)], |
41 | ); |
42 | my @ideodigit = qw(one two three four five six seven eight nine ten); |
43 | |
33f2f539 |
44 | my $jis = '7bit-jis'; |
466d6cd3 |
45 | my $kr = 'iso2022-kr'; |
33f2f539 |
46 | my %esc_str; |
47 | |
48 | $esc_str{$jis} = {qw( |
49 | 1b24422422242424262428242a1b2842 |
50 | 3042304430463048304a |
51 | 1b284931323334355d1b2842 |
52 | ff71ff72ff73ff74ff75ff9d |
53 | 1b2442467c4b5c1b2842 |
54 | 65e5672c |
55 | 3132331b244234413b7a1b28425065726c |
56 | 0031003200336f225b57005000650072006c |
57 | 546573740a1b24422546253925481b28420a |
58 | 0054006500730074000a30c630b930c8000a |
59 | )}; |
60 | |
61 | $esc_str{$kr} = {qw( |
62 | 1b2429430e2a22213e0f410d0a |
63 | 304200b10041000d000a |
64 | 1b2429430e3021332a34593673383639593b673e46405a0f0d0a |
65 | ac00b098b2e4b77cb9c8bc14c0acc544c790000d000a |
66 | 1b2429434142430d0a |
67 | 004100420043000d000a |
68 | )}; |
69 | |
70 | my $num_esc = $n * keys(%esc_str); |
71 | foreach (values %esc_str){ $num_esc += $n * keys %$_ } |
72 | |
c72d1b38 |
73 | my $FS_preserves_case = 1; # Unix e.g. |
74 | if ($^O eq 'VMS') { # || $^O eq ... |
75 | $FS_preserves_case = 0; |
76 | } |
466d6cd3 |
77 | my $hz = 'HZ'; # HanZi |
c72d1b38 |
78 | if (!$FS_preserves_case) { |
79 | $hz = 'hz'; # HanZi |
80 | } |
466d6cd3 |
81 | |
82 | my @hz_txt = ( |
83 | "~~in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye.~~", |
84 | "~~in GB.~{<:Ky2;S{#,~}~\cJ~{NpJ)l6HK!#~}Bye.~~", |
85 | "~~in GB.~\cJ~{<:Ky2;S{#,NpJ)l6HK!#~}~\cJBye.~~", |
86 | ); |
87 | |
88 | my $hz_exp = '007e0069006e002000470042002e5df162404e0d6b32' |
89 | . 'ff0c52ff65bd65bc4eba3002004200790065002e007e'; |
90 | |
33f2f539 |
91 | plan test => $n*@encodings + $n*@encodings*@greek |
466d6cd3 |
92 | + $n*@encodings*@ideodigit + $num_esc + $n + @hz_txt; |
71a18b0f |
93 | |
94 | foreach my $enc (@encodings) |
95 | { |
96 | my $tab = Encode->getEncoding($enc); |
97 | ok(1,defined($tab),"Could not load $enc"); |
98 | my $str = join('',map(chr($_),0x20..0x7E)); |
99 | my $uni = $tab->decode($str); |
100 | my $cpy = $tab->encode($uni); |
101 | ok($cpy,$str,"$enc mangled translating to Unicode and back"); |
102 | } |
103 | |
104 | foreach my $enc (@encodings) |
105 | { |
106 | my $tab = Encode->getEncoding($enc); |
107 | foreach my $gk (0..$#greek) |
108 | { |
109 | my $uni = unpack 'U', $tab->decode(pack 'n', $greek{$enc}[$gk]); |
110 | ok($uni,$greek{'utf8'}[$gk], |
111 | "$enc mangled translating to Unicode GREEK $greek[$gk]"); |
112 | my $cpy = unpack 'n',$tab->encode(pack 'U',$uni); |
113 | ok($cpy,$greek{$enc}[$gk], |
114 | "$enc mangled translating from Unicode GREEK $greek[$gk]"); |
115 | } |
116 | } |
117 | |
118 | foreach my $enc (@encodings) |
119 | { |
120 | my $tab = Encode->getEncoding($enc); |
121 | foreach my $id (0..$#ideodigit) |
122 | { |
123 | my $uni = unpack 'U',$tab->decode(pack 'H*', $ideodigit{$enc}[$id]); |
124 | ok($uni,hex($ideodigit{'utf8'}[$id]), |
125 | "$enc mangled translating to Unicode CJK IDEOGRAPH $ideodigit[$id]"); |
126 | my $cpy = lc unpack 'H*', $tab->encode(pack 'U',$uni); |
127 | ok($cpy,$ideodigit{$enc}[$id], |
128 | "$enc mangled translating from Unicode CJK IDEOGRAPH $ideodigit[$id]"); |
129 | } |
130 | } |
131 | |
33f2f539 |
132 | { |
133 | sub to_unicode |
134 | { |
135 | my $enc = shift; |
136 | return unpack('H*', pack 'n*', unpack 'U*', |
137 | decode $enc, pack 'H*', join '', @_); |
138 | } |
139 | |
140 | sub from_unicode |
141 | { |
142 | my $enc = shift; |
143 | return unpack('H*', encode $enc, |
144 | pack 'U*', unpack 'n*', pack 'H*', join '', @_); |
145 | } |
146 | |
147 | foreach my $enc (sort keys %esc_str) |
148 | { |
149 | my $tab = Encode->getEncoding($enc); |
150 | ok(1,defined($tab),"Could not load $enc"); |
151 | my %strings = %{ $esc_str{$enc} }; |
152 | foreach my $estr (sort keys %strings) |
153 | { |
154 | my $ustr = to_unicode($enc, $estr); |
155 | ok($ustr, $strings{$estr}, |
156 | "$enc mangled translating to Unicode"); |
157 | ok(from_unicode($enc, $ustr), $estr, |
158 | "$enc mangled translating from Unicode"); |
159 | } |
160 | ok(to_unicode($enc, keys %strings), join('', values %strings), |
161 | "$enc mangled translating to Unicode"); |
162 | } |
163 | } |
466d6cd3 |
164 | |
165 | |
166 | { |
167 | my $hz_to_unicode = sub |
168 | { |
169 | return unpack('H*', pack 'n*', unpack 'U*', decode $hz, shift); |
170 | }; |
171 | |
172 | my $hz_from_unicode = sub |
173 | { |
174 | return encode($hz, pack 'U*', unpack 'n*', pack 'H*', shift); |
175 | }; |
176 | |
177 | foreach my $enc ($hz) |
178 | { |
179 | my $tab = Encode->getEncoding($enc); |
180 | ok(1,defined($tab),"Could not load $enc"); |
181 | |
182 | ok(&$hz_from_unicode($hz_exp), $hz_txt[0], |
183 | "$enc mangled translating from Unicode"); |
184 | |
185 | foreach my $str (@hz_txt) |
186 | { |
187 | ok(&$hz_to_unicode($str), $hz_exp, |
188 | "$enc mangled translating to Unicode"); |
189 | } |
190 | } |
191 | } |