4 require Config; import Config;
5 if ($Config{'extensions'} !~ /\bEncode\b/) {
6 print "1..0 # Skip: Encode was not built\n";
10 print "1..0 # Skip: EBCDIC\n";
15 use Encode qw(encode decode);
18 my @encodings = qw(euc-cn euc-jp euc-kr big5 shiftjis); # CJK
22 'euc-cn' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
23 'euc-jp' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
24 'euc-kr' => [0xA5C1..0xA5D8,0xA5E1..0xA5F8],
25 'big5' => [0xA344..0xA35B,0xA35C..0xA373],
26 'shiftjis' => [0x839F..0x83B6,0x83BF..0x83D6],
27 'utf8' => [0x0391..0x03A1,0x03A3..0x03A9,0x03B1..0x03C1,0x03C3..0x03C9],
30 ALPHA BETA GAMMA DELTA EPSILON ZETA ETA
31 THETA IOTA KAPPA LAMBDA MU NU XI OMICRON
32 PI RHO SIGMA TAU UPSILON PHI CHI PSI OMEGA
33 alpha beta gamma delta epsilon zeta eta
34 theta iota kappa lambda mu nu xi omicron
35 pi rho sigma tau upsilon phi chi psi omega
38 my %ideodigit = ( # cjk ideograph 'one' to 'ten'
39 'euc-cn' => [qw(d2bb b6fe c8fd cbc4 cee5 c1f9 c6df b0cb bec5 caae)],
40 'euc-jp' => [qw(b0ec c6f3 bbb0 bbcd b8de cfbb bcb7 c8ac b6e5 bdbd)],
41 'euc-kr' => [qw(ece9 eca3 dfb2 decc e7e9 d7bf f6d2 f8a2 cefa e4a8)],
42 'big5' => [qw(a440 a447 a454 a57c a4ad a4bb a443 a44b a445 a451)],
43 'shiftjis' => [qw(88ea 93f1 8e4f 8e6c 8cdc 985a 8eb5 94aa 8be3 8f5c)],
44 'utf8' => [qw(4e00 4e8c 4e09 56db 4e94 516d 4e03 516b 4e5d 5341)],
46 my @ideodigit = qw(one two three four five six seven eight nine ten);
53 1b24422422242424262428242a1b2842
55 1b284931323334355d1b2842
56 ff71ff72ff73ff74ff75ff9d
59 3132331b244234413b7a1b28425065726c
60 0031003200336f225b57005000650072006c
61 546573740a1b24422546253925481b28420a
62 0054006500730074000a30c630b930c8000a
66 1b2429430e2a22213e0f410d0a
68 1b2429430e3021332a34593673383639593b673e46405a0f0d0a
69 ac00b098b2e4b77cb9c8bc14c0acc544c790000d000a
74 my $num_esc = $n * keys(%esc_str);
75 foreach (values %esc_str){ $num_esc += $n * keys %$_ }
77 use constant BUFSIZ => 64; # for test
78 use constant hiragana => "\x{3042}\x{3044}\x{3046}\x{3048}\x{304A}";
79 use constant han_kana => "\x{FF71}\x{FF72}\x{FF73}\x{FF74}\x{FF75}";
80 use constant macron => "\x{0100}\x{0112}\x{012a}\x{014c}\x{016a}";
81 use constant TAIL => 'bbb';
82 use constant YES => 1;
84 my @ary_buff = ( # [ encoding, decoded, encoded ]
86 ["euc-cn", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
87 ["euc-jp", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
88 ["euc-jp", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
89 ["euc-kr", hiragana, "\xAA\xA2\xAA\xA4\xAA\xA6\xAA\xA8\xAA\xAA" ],
90 ["shiftjis", hiragana, "\x82\xA0\x82\xA2\x82\xA4\x82\xA6\x82\xA8" ],
91 ["shiftjis", han_kana, "\xB1\xB2\xB3\xB4\xB5" ],
93 ["2022-cn", hiragana, "\e\$)A\cN". '$"$$$&$($*' . "\cO" ],
94 ["2022-jp", hiragana, "\e\$B".'$"$$$&$($*'."\e(B" ],
95 ["2022-kr", hiragana, "\e\$)C\cN". '*"*$*&*(**' . "\cO" ],
96 [ $jis, han_kana, "\e\(I".'12345'."\e(B" ],
97 ["2022-jp1", macron, "\e\$(D\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B"],
98 ["2022-jp2", "\x{C0}" . macron . "\x{C1}",
99 "\e\$(D\e.A\eN\x40\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B\eN\x41"],
101 ["euc-jp-0212", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
102 ["euc-jp-0212", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
103 ["euc-jp-0212", macron,
104 "\x8F\xAA\xA7\x8F\xAA\xB7\x8F\xAA\xC5\x8F\xAA\xD7\x8F\xAA\xE9" ],
107 plan test => $n*@encodings + $n*@encodings*@greek
108 + $n*@encodings*@ideodigit + $num_esc + @ary_buff;
110 foreach my $enc (@encodings)
112 my $tab = Encode->getEncoding($enc);
113 ok(1,defined($tab),"Could not load $enc");
114 my $str = join('',map(chr($_),0x20..0x7E));
115 my $uni = $tab->decode($str);
116 my $cpy = $tab->encode($uni);
117 ok($cpy,$str,"$enc mangled translating to Unicode and back");
120 foreach my $enc (@encodings)
122 my $tab = Encode->getEncoding($enc);
123 foreach my $gk (0..$#greek)
125 my $uni = unpack 'U', $tab->decode(pack 'n', $greek{$enc}[$gk]);
126 ok($uni,$greek{'utf8'}[$gk],
127 "$enc mangled translating to Unicode GREEK $greek[$gk]");
128 my $cpy = unpack 'n',$tab->encode(pack 'U',$uni);
129 ok($cpy,$greek{$enc}[$gk],
130 "$enc mangled translating from Unicode GREEK $greek[$gk]");
134 foreach my $enc (@encodings)
136 my $tab = Encode->getEncoding($enc);
137 foreach my $id (0..$#ideodigit)
139 my $uni = unpack 'U',$tab->decode(pack 'H*', $ideodigit{$enc}[$id]);
140 ok($uni,hex($ideodigit{'utf8'}[$id]),
141 "$enc mangled translating to Unicode CJK IDEOGRAPH $ideodigit[$id]");
142 my $cpy = lc unpack 'H*', $tab->encode(pack 'U',$uni);
143 ok($cpy,$ideodigit{$enc}[$id],
144 "$enc mangled translating from Unicode CJK IDEOGRAPH $ideodigit[$id]");
152 return unpack('H*', pack 'n*', unpack 'U*',
153 decode $enc, pack 'H*', join '', @_);
159 return unpack('H*', encode $enc,
160 pack 'U*', unpack 'n*', pack 'H*', join '', @_);
163 foreach my $enc (sort keys %esc_str)
165 my $tab = Encode->getEncoding($enc);
166 ok(1,defined($tab),"Could not load $enc");
167 my %strings = %{ $esc_str{$enc} };
168 foreach my $estr (sort keys %strings)
170 my $ustr = to_unicode($enc, $estr);
171 ok($ustr, $strings{$estr},
172 "$enc mangled translating to Unicode");
173 ok(from_unicode($enc, $ustr), $estr,
174 "$enc mangled translating from Unicode");
176 ok(to_unicode($enc, keys %strings), join('', values %strings),
177 "$enc mangled translating to Unicode");
181 for my $ary (@ary_buff) {
184 for my $n ( int(BUFSIZ/2) .. 2*BUFSIZ+4 ){
185 my $dst = "a"x$n. $ary->[1] . TAIL;
186 my $src = "a"x$n. $ary->[2] . TAIL;
187 my $utf = buff_decode($enc, $src);
188 $NG++ unless $dst eq $utf;
190 ok($NG, 0, "$enc mangled translating to Unicode");
198 my $buff = $inconv.substr($str,0,BUFSIZ - length $inconv,'');
199 my $decoded = decode($enc, $buff, YES);