DOC PATCH 5.7.3 (perldelta)
[p5sagit/p5-mst-13.2.git] / ext / Encode / t / Tcl.t
CommitLineData
71a18b0f 1BEGIN {
2 chdir 't' if -d 't';
96d6357c 3# @INC = '../lib';
71a18b0f 4 require Config; import Config;
5 if ($Config{'extensions'} !~ /\bEncode\b/) {
6 print "1..0 # Skip: Encode was not built\n";
7 exit 0;
8 }
72f0eb71 9 if (ord("A") == 193) {
10 print "1..0 # Skip: EBCDIC\n";
11 exit 0;
12 }
71a18b0f 13}
14use Test;
15use Encode qw(encode decode);
16use Encode::Tcl;
17
18my @encodings = qw(euc-cn euc-jp euc-kr big5 shiftjis); # CJK
19my $n = 2;
20
21my %greek = (
22 'euc-cn' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
23 'euc-jp' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
24 'euc-kr' => [0xA5C1..0xA5D8,0xA5E1..0xA5F8],
25 'big5' => [0xA344..0xA35B,0xA35C..0xA373],
26 'shiftjis' => [0x839F..0x83B6,0x83BF..0x83D6],
27 'utf8' => [0x0391..0x03A1,0x03A3..0x03A9,0x03B1..0x03C1,0x03C3..0x03C9],
28);
29my @greek = qw(
30 ALPHA BETA GAMMA DELTA EPSILON ZETA ETA
31 THETA IOTA KAPPA LAMBDA MU NU XI OMICRON
32 PI RHO SIGMA TAU UPSILON PHI CHI PSI OMEGA
33 alpha beta gamma delta epsilon zeta eta
34 theta iota kappa lambda mu nu xi omicron
35 pi rho sigma tau upsilon phi chi psi omega
36);
37
38my %ideodigit = ( # cjk ideograph 'one' to 'ten'
39 'euc-cn' => [qw(d2bb b6fe c8fd cbc4 cee5 c1f9 c6df b0cb bec5 caae)],
40 'euc-jp' => [qw(b0ec c6f3 bbb0 bbcd b8de cfbb bcb7 c8ac b6e5 bdbd)],
41 'euc-kr' => [qw(ece9 eca3 dfb2 decc e7e9 d7bf f6d2 f8a2 cefa e4a8)],
42 'big5' => [qw(a440 a447 a454 a57c a4ad a4bb a443 a44b a445 a451)],
43 'shiftjis' => [qw(88ea 93f1 8e4f 8e6c 8cdc 985a 8eb5 94aa 8be3 8f5c)],
44 'utf8' => [qw(4e00 4e8c 4e09 56db 4e94 516d 4e03 516b 4e5d 5341)],
45);
46my @ideodigit = qw(one two three four five six seven eight nine ten);
47
33f2f539 48my $jis = '7bit-jis';
62756987 49my $kr = '2022-kr';
33f2f539 50my %esc_str;
51
52$esc_str{$jis} = {qw(
53 1b24422422242424262428242a1b2842
54 3042304430463048304a
55 1b284931323334355d1b2842
56 ff71ff72ff73ff74ff75ff9d
57 1b2442467c4b5c1b2842
58 65e5672c
59 3132331b244234413b7a1b28425065726c
60 0031003200336f225b57005000650072006c
61 546573740a1b24422546253925481b28420a
62 0054006500730074000a30c630b930c8000a
63)};
64
65$esc_str{$kr} = {qw(
66 1b2429430e2a22213e0f410d0a
67 304200b10041000d000a
68 1b2429430e3021332a34593673383639593b673e46405a0f0d0a
69 ac00b098b2e4b77cb9c8bc14c0acc544c790000d000a
70 1b2429434142430d0a
71 004100420043000d000a
72)};
73
74my $num_esc = $n * keys(%esc_str);
75foreach (values %esc_str){ $num_esc += $n * keys %$_ }
76
c72d1b38 77my $FS_preserves_case = 1; # Unix e.g.
78if ($^O eq 'VMS') { # || $^O eq ...
79 $FS_preserves_case = 0;
80}
466d6cd3 81my $hz = 'HZ'; # HanZi
c72d1b38 82if (!$FS_preserves_case) {
83 $hz = 'hz'; # HanZi
84}
466d6cd3 85
86my @hz_txt = (
87 "~~in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye.~~",
88 "~~in GB.~{<:Ky2;S{#,~}~\cJ~{NpJ)l6HK!#~}Bye.~~",
89 "~~in GB.~\cJ~{<:Ky2;S{#,NpJ)l6HK!#~}~\cJBye.~~",
90);
91
92my $hz_exp = '007e0069006e002000470042002e5df162404e0d6b32'
93 . 'ff0c52ff65bd65bc4eba3002004200790065002e007e';
94
96d6357c 95use constant BUFSIZ => 64; # for test
96use constant hiragana => "\x{3042}\x{3044}\x{3046}\x{3048}\x{304A}";
97use constant han_kana => "\x{FF71}\x{FF72}\x{FF73}\x{FF74}\x{FF75}";
98use constant macron => "\x{0100}\x{0112}\x{012a}\x{014c}\x{016a}";
99use constant TAIL => 'bbb';
100use constant YES => 1;
101
102my @ary_buff = ( # [ encoding, decoded, encoded ]
103# type-M
104 ["euc-cn", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
105 ["euc-jp", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
106 ["euc-jp", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
107 ["euc-kr", hiragana, "\xAA\xA2\xAA\xA4\xAA\xA6\xAA\xA8\xAA\xAA" ],
108 ["shiftjis", hiragana, "\x82\xA0\x82\xA2\x82\xA4\x82\xA6\x82\xA8" ],
109 ["shiftjis", han_kana, "\xB1\xB2\xB3\xB4\xB5" ],
110# type-E
111 ["2022-cn", hiragana, "\e\$)A\cN". '$"$$$&$($*' . "\cO" ],
112 ["2022-jp", hiragana, "\e\$B".'$"$$$&$($*'."\e(B" ],
113 ["2022-kr", hiragana, "\e\$)C\cN". '*"*$*&*(**' . "\cO" ],
114 [ $jis, han_kana, "\e\(I".'12345'."\e(B" ],
115 ["2022-jp1", macron, "\e\$(D\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B"],
116 ["2022-jp2", "\x{C0}" . macron . "\x{C1}",
117 "\e\$(D\e.A\eN\x40\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B\eN\x41"],
118# type-X
119 ["euc-jp-0212", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
120 ["euc-jp-0212", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
121 ["euc-jp-0212", macron,
122 "\x8F\xAA\xA7\x8F\xAA\xB7\x8F\xAA\xC5\x8F\xAA\xD7\x8F\xAA\xE9" ],
123# type-H
124 [ $hz, hiragana, "~{". '$"$$$&$($*' . "~}" ],
125 [ $hz, hiragana, "~{". '$"$$' ."~\cJ". '$&$($*' . "~}" ],
126);
127
33f2f539 128plan test => $n*@encodings + $n*@encodings*@greek
6008b2cd 129 + $n*@encodings*@ideodigit + $num_esc +
d7187ca8 130# + $n + @hz_txt # no HZ for now
131 + @ary_buff
132 - 2; # no HZ for now
71a18b0f 133
134foreach my $enc (@encodings)
135 {
136 my $tab = Encode->getEncoding($enc);
137 ok(1,defined($tab),"Could not load $enc");
138 my $str = join('',map(chr($_),0x20..0x7E));
139 my $uni = $tab->decode($str);
140 my $cpy = $tab->encode($uni);
141 ok($cpy,$str,"$enc mangled translating to Unicode and back");
142 }
143
144foreach my $enc (@encodings)
145 {
146 my $tab = Encode->getEncoding($enc);
147 foreach my $gk (0..$#greek)
148 {
149 my $uni = unpack 'U', $tab->decode(pack 'n', $greek{$enc}[$gk]);
150 ok($uni,$greek{'utf8'}[$gk],
151 "$enc mangled translating to Unicode GREEK $greek[$gk]");
152 my $cpy = unpack 'n',$tab->encode(pack 'U',$uni);
153 ok($cpy,$greek{$enc}[$gk],
154 "$enc mangled translating from Unicode GREEK $greek[$gk]");
155 }
156 }
157
158foreach my $enc (@encodings)
159 {
160 my $tab = Encode->getEncoding($enc);
161 foreach my $id (0..$#ideodigit)
162 {
163 my $uni = unpack 'U',$tab->decode(pack 'H*', $ideodigit{$enc}[$id]);
164 ok($uni,hex($ideodigit{'utf8'}[$id]),
165 "$enc mangled translating to Unicode CJK IDEOGRAPH $ideodigit[$id]");
166 my $cpy = lc unpack 'H*', $tab->encode(pack 'U',$uni);
167 ok($cpy,$ideodigit{$enc}[$id],
168 "$enc mangled translating from Unicode CJK IDEOGRAPH $ideodigit[$id]");
169 }
170 }
171
33f2f539 172{
173 sub to_unicode
174 {
175 my $enc = shift;
176 return unpack('H*', pack 'n*', unpack 'U*',
177 decode $enc, pack 'H*', join '', @_);
178 }
179
180 sub from_unicode
181 {
182 my $enc = shift;
183 return unpack('H*', encode $enc,
184 pack 'U*', unpack 'n*', pack 'H*', join '', @_);
185 }
186
187 foreach my $enc (sort keys %esc_str)
188 {
189 my $tab = Encode->getEncoding($enc);
190 ok(1,defined($tab),"Could not load $enc");
191 my %strings = %{ $esc_str{$enc} };
192 foreach my $estr (sort keys %strings)
193 {
194 my $ustr = to_unicode($enc, $estr);
195 ok($ustr, $strings{$estr},
196 "$enc mangled translating to Unicode");
197 ok(from_unicode($enc, $ustr), $estr,
198 "$enc mangled translating from Unicode");
199 }
200 ok(to_unicode($enc, keys %strings), join('', values %strings),
201 "$enc mangled translating to Unicode");
202 }
203}
466d6cd3 204
205
206{
207 my $hz_to_unicode = sub
208 {
209 return unpack('H*', pack 'n*', unpack 'U*', decode $hz, shift);
210 };
211
212 my $hz_from_unicode = sub
213 {
214 return encode($hz, pack 'U*', unpack 'n*', pack 'H*', shift);
215 };
216
6008b2cd 217 if(0){
466d6cd3 218 foreach my $enc ($hz)
219 {
220 my $tab = Encode->getEncoding($enc);
221 ok(1,defined($tab),"Could not load $enc");
222
223 ok(&$hz_from_unicode($hz_exp), $hz_txt[0],
224 "$enc mangled translating from Unicode");
225
226 foreach my $str (@hz_txt)
227 {
228 ok(&$hz_to_unicode($str), $hz_exp,
229 "$enc mangled translating to Unicode");
230 }
231 }
6008b2cd 232 }
466d6cd3 233}
96d6357c 234
235for my $ary (@ary_buff) {
236 my $NG = 0;
237 my $enc = $ary->[0];
d7187ca8 238 next if $enc eq 'HZ';
96d6357c 239 for my $n ( int(BUFSIZ/2) .. 2*BUFSIZ+4 ){
240 my $dst = "a"x$n. $ary->[1] . TAIL;
241 my $src = "a"x$n. $ary->[2] . TAIL;
242 my $utf = buff_decode($enc, $src);
243 $NG++ unless $dst eq $utf;
244 }
245 ok($NG, 0, "$enc mangled translating to Unicode");
246}
247
248sub buff_decode {
249 my($enc, $str) = @_;
250 my $utf8 = '';
251 my $inconv = '';
252 while(length $str){
253 my $buff = $inconv.substr($str,0,BUFSIZ - length $inconv,'');
254 my $decoded = decode($enc, $buff, YES);
255 if(length $decoded){
256 $utf8 .= $decoded;
257 $inconv = $buff;
258 } else {
259 last; # malformed?
260 }
261 }
262 return $utf8;
263}
264