4 require Config; import Config;
5 if ($Config{'extensions'} !~ /\bEncode\b/) {
6 print "1..0 # Skip: Encode was not built\n";
10 print "1..0 # Skip: EBCDIC\n";
15 use Encode qw(encode decode);
18 my @encodings = qw(euc-cn euc-jp euc-kr big5 shiftjis); # CJK
22 'euc-cn' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
23 'euc-jp' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
24 'euc-kr' => [0xA5C1..0xA5D8,0xA5E1..0xA5F8],
25 'big5' => [0xA344..0xA35B,0xA35C..0xA373],
26 'shiftjis' => [0x839F..0x83B6,0x83BF..0x83D6],
27 'utf8' => [0x0391..0x03A1,0x03A3..0x03A9,0x03B1..0x03C1,0x03C3..0x03C9],
30 ALPHA BETA GAMMA DELTA EPSILON ZETA ETA
31 THETA IOTA KAPPA LAMBDA MU NU XI OMICRON
32 PI RHO SIGMA TAU UPSILON PHI CHI PSI OMEGA
33 alpha beta gamma delta epsilon zeta eta
34 theta iota kappa lambda mu nu xi omicron
35 pi rho sigma tau upsilon phi chi psi omega
38 my %ideodigit = ( # cjk ideograph 'one' to 'ten'
39 'euc-cn' => [qw(d2bb b6fe c8fd cbc4 cee5 c1f9 c6df b0cb bec5 caae)],
40 'euc-jp' => [qw(b0ec c6f3 bbb0 bbcd b8de cfbb bcb7 c8ac b6e5 bdbd)],
41 'euc-kr' => [qw(ece9 eca3 dfb2 decc e7e9 d7bf f6d2 f8a2 cefa e4a8)],
42 'big5' => [qw(a440 a447 a454 a57c a4ad a4bb a443 a44b a445 a451)],
43 'shiftjis' => [qw(88ea 93f1 8e4f 8e6c 8cdc 985a 8eb5 94aa 8be3 8f5c)],
44 'utf8' => [qw(4e00 4e8c 4e09 56db 4e94 516d 4e03 516b 4e5d 5341)],
46 my @ideodigit = qw(one two three four five six seven eight nine ten);
53 1b24422422242424262428242a1b2842
55 1b284931323334355d1b2842
56 ff71ff72ff73ff74ff75ff9d
59 3132331b244234413b7a1b28425065726c
60 0031003200336f225b57005000650072006c
61 546573740a1b24422546253925481b28420a
62 0054006500730074000a30c630b930c8000a
66 1b2429430e2a22213e0f410d0a
68 1b2429430e3021332a34593673383639593b673e46405a0f0d0a
69 ac00b098b2e4b77cb9c8bc14c0acc544c790000d000a
74 my $num_esc = $n * keys(%esc_str);
75 foreach (values %esc_str){ $num_esc += $n * keys %$_ }
77 my $FS_preserves_case = 1; # Unix e.g.
78 if ($^O eq 'VMS') { # || $^O eq ...
79 $FS_preserves_case = 0;
81 my $hz = 'HZ'; # HanZi
82 if (!$FS_preserves_case) {
87 "~~in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye.~~",
88 "~~in GB.~{<:Ky2;S{#,~}~\cJ~{NpJ)l6HK!#~}Bye.~~",
89 "~~in GB.~\cJ~{<:Ky2;S{#,NpJ)l6HK!#~}~\cJBye.~~",
92 my $hz_exp = '007e0069006e002000470042002e5df162404e0d6b32'
93 . 'ff0c52ff65bd65bc4eba3002004200790065002e007e';
95 use constant BUFSIZ => 64; # for test
96 use constant hiragana => "\x{3042}\x{3044}\x{3046}\x{3048}\x{304A}";
97 use constant han_kana => "\x{FF71}\x{FF72}\x{FF73}\x{FF74}\x{FF75}";
98 use constant macron => "\x{0100}\x{0112}\x{012a}\x{014c}\x{016a}";
99 use constant TAIL => 'bbb';
100 use constant YES => 1;
102 my @ary_buff = ( # [ encoding, decoded, encoded ]
104 ["euc-cn", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
105 ["euc-jp", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
106 ["euc-jp", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
107 ["euc-kr", hiragana, "\xAA\xA2\xAA\xA4\xAA\xA6\xAA\xA8\xAA\xAA" ],
108 ["shiftjis", hiragana, "\x82\xA0\x82\xA2\x82\xA4\x82\xA6\x82\xA8" ],
109 ["shiftjis", han_kana, "\xB1\xB2\xB3\xB4\xB5" ],
111 ["2022-cn", hiragana, "\e\$)A\cN". '$"$$$&$($*' . "\cO" ],
112 ["2022-jp", hiragana, "\e\$B".'$"$$$&$($*'."\e(B" ],
113 ["2022-kr", hiragana, "\e\$)C\cN". '*"*$*&*(**' . "\cO" ],
114 [ $jis, han_kana, "\e\(I".'12345'."\e(B" ],
115 ["2022-jp1", macron, "\e\$(D\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B"],
116 ["2022-jp2", "\x{C0}" . macron . "\x{C1}",
117 "\e\$(D\e.A\eN\x40\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B\eN\x41"],
119 ["euc-jp-0212", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
120 ["euc-jp-0212", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
121 ["euc-jp-0212", macron,
122 "\x8F\xAA\xA7\x8F\xAA\xB7\x8F\xAA\xC5\x8F\xAA\xD7\x8F\xAA\xE9" ],
124 [ $hz, hiragana, "~{". '$"$$$&$($*' . "~}" ],
125 [ $hz, hiragana, "~{". '$"$$' ."~\cJ". '$&$($*' . "~}" ],
128 plan test => $n*@encodings + $n*@encodings*@greek
129 + $n*@encodings*@ideodigit + $num_esc +
133 foreach my $enc (@encodings)
135 my $tab = Encode->getEncoding($enc);
136 ok(1,defined($tab),"Could not load $enc");
137 my $str = join('',map(chr($_),0x20..0x7E));
138 my $uni = $tab->decode($str);
139 my $cpy = $tab->encode($uni);
140 ok($cpy,$str,"$enc mangled translating to Unicode and back");
143 foreach my $enc (@encodings)
145 my $tab = Encode->getEncoding($enc);
146 foreach my $gk (0..$#greek)
148 my $uni = unpack 'U', $tab->decode(pack 'n', $greek{$enc}[$gk]);
149 ok($uni,$greek{'utf8'}[$gk],
150 "$enc mangled translating to Unicode GREEK $greek[$gk]");
151 my $cpy = unpack 'n',$tab->encode(pack 'U',$uni);
152 ok($cpy,$greek{$enc}[$gk],
153 "$enc mangled translating from Unicode GREEK $greek[$gk]");
157 foreach my $enc (@encodings)
159 my $tab = Encode->getEncoding($enc);
160 foreach my $id (0..$#ideodigit)
162 my $uni = unpack 'U',$tab->decode(pack 'H*', $ideodigit{$enc}[$id]);
163 ok($uni,hex($ideodigit{'utf8'}[$id]),
164 "$enc mangled translating to Unicode CJK IDEOGRAPH $ideodigit[$id]");
165 my $cpy = lc unpack 'H*', $tab->encode(pack 'U',$uni);
166 ok($cpy,$ideodigit{$enc}[$id],
167 "$enc mangled translating from Unicode CJK IDEOGRAPH $ideodigit[$id]");
175 return unpack('H*', pack 'n*', unpack 'U*',
176 decode $enc, pack 'H*', join '', @_);
182 return unpack('H*', encode $enc,
183 pack 'U*', unpack 'n*', pack 'H*', join '', @_);
186 foreach my $enc (sort keys %esc_str)
188 my $tab = Encode->getEncoding($enc);
189 ok(1,defined($tab),"Could not load $enc");
190 my %strings = %{ $esc_str{$enc} };
191 foreach my $estr (sort keys %strings)
193 my $ustr = to_unicode($enc, $estr);
194 ok($ustr, $strings{$estr},
195 "$enc mangled translating to Unicode");
196 ok(from_unicode($enc, $ustr), $estr,
197 "$enc mangled translating from Unicode");
199 ok(to_unicode($enc, keys %strings), join('', values %strings),
200 "$enc mangled translating to Unicode");
206 my $hz_to_unicode = sub
208 return unpack('H*', pack 'n*', unpack 'U*', decode $hz, shift);
211 my $hz_from_unicode = sub
213 return encode($hz, pack 'U*', unpack 'n*', pack 'H*', shift);
217 foreach my $enc ($hz)
219 my $tab = Encode->getEncoding($enc);
220 ok(1,defined($tab),"Could not load $enc");
222 ok(&$hz_from_unicode($hz_exp), $hz_txt[0],
223 "$enc mangled translating from Unicode");
225 foreach my $str (@hz_txt)
227 ok(&$hz_to_unicode($str), $hz_exp,
228 "$enc mangled translating to Unicode");
234 for my $ary (@ary_buff) {
237 for my $n ( int(BUFSIZ/2) .. 2*BUFSIZ+4 ){
238 my $dst = "a"x$n. $ary->[1] . TAIL;
239 my $src = "a"x$n. $ary->[2] . TAIL;
240 my $utf = buff_decode($enc, $src);
241 $NG++ unless $dst eq $utf;
243 ok($NG, 0, "$enc mangled translating to Unicode");
251 my $buff = $inconv.substr($str,0,BUFSIZ - length $inconv,'');
252 my $decoded = decode($enc, $buff, YES);