3 unshift @INC, '../lib';
4 require Config; import Config;
5 if ($Config{'extensions'} !~ /\bEncode\b/) {
6 print "1..0 # Skip: Encode was not built\n";
10 print "1..0 # Skip: EBCDIC\n";
15 use Encode qw(encode decode);
18 my @encodings = qw(euc-cn euc-kr big5 shiftjis); # CJK
22 'euc-cn' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
23 'euc-kr' => [0xA5C1..0xA5D8,0xA5E1..0xA5F8],
24 'big5' => [0xA344..0xA35B,0xA35C..0xA373],
25 'shiftjis' => [0x839F..0x83B6,0x83BF..0x83D6],
26 'utf8' => [0x0391..0x03A1,0x03A3..0x03A9,0x03B1..0x03C1,0x03C3..0x03C9],
29 ALPHA BETA GAMMA DELTA EPSILON ZETA ETA
30 THETA IOTA KAPPA LAMBDA MU NU XI OMICRON
31 PI RHO SIGMA TAU UPSILON PHI CHI PSI OMEGA
32 alpha beta gamma delta epsilon zeta eta
33 theta iota kappa lambda mu nu xi omicron
34 pi rho sigma tau upsilon phi chi psi omega
37 my %ideodigit = ( # cjk ideograph 'one' to 'ten'
38 'euc-cn' => [qw(d2bb b6fe c8fd cbc4 cee5 c1f9 c6df b0cb bec5 caae)],
39 'euc-kr' => [qw(ece9 eca3 dfb2 decc e7e9 d7bf f6d2 f8a2 cefa e4a8)],
40 'big5' => [qw(a440 a447 a454 a57c a4ad a4bb a443 a44b a445 a451)],
41 'shiftjis' => [qw(88ea 93f1 8e4f 8e6c 8cdc 985a 8eb5 94aa 8be3 8f5c)],
42 'utf8' => [qw(4e00 4e8c 4e09 56db 4e94 516d 4e03 516b 4e5d 5341)],
44 my @ideodigit = qw(one two three four five six seven eight nine ten);
50 1b2429430e2a22213e0f410d0a
52 1b2429430e3021332a34593673383639593b673e46405a0f0d0a
53 ac00b098b2e4b77cb9c8bc14c0acc544c790000d000a
58 my $num_esc = $n * keys(%esc_str);
59 foreach (values %esc_str){ $num_esc += $n * keys %$_ }
61 use constant BUFSIZ => 64; # for test
62 use constant hiragana => "\x{3042}\x{3044}\x{3046}\x{3048}\x{304A}";
63 use constant han_kana => "\x{FF71}\x{FF72}\x{FF73}\x{FF74}\x{FF75}";
64 use constant macron => "\x{0100}\x{0112}\x{012a}\x{014c}\x{016a}";
65 use constant TAIL => 'bbb';
66 use constant YES => 1;
68 my @ary_buff = ( # [ encoding, decoded, encoded ]
70 ["euc-cn", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
71 ["euc-kr", hiragana, "\xAA\xA2\xAA\xA4\xAA\xA6\xAA\xA8\xAA\xAA" ],
72 ["shiftjis", hiragana, "\x82\xA0\x82\xA2\x82\xA4\x82\xA6\x82\xA8" ],
73 ["shiftjis", han_kana, "\xB1\xB2\xB3\xB4\xB5" ],
75 ["2022-cn", hiragana, "\e\$)A\cN". '$"$$$&$($*' . "\cO" ],
76 ["2022-kr", hiragana, "\e\$)C\cN". '*"*$*&*(**' . "\cO" ],
77 ["2022-jp1", macron, "\e\$(D\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B"],
78 ["2022-jp2", "\x{C0}" . macron . "\x{C1}",
79 "\e\$(D\e.A\eN\x40\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B\eN\x41"],
82 plan test => $n*@encodings + $n*@encodings*@greek
83 + $n*@encodings*@ideodigit + $num_esc + @ary_buff;
85 foreach my $enc (@encodings)
87 my $tab = Encode->getEncoding($enc);
88 ok(1,defined($tab),"Could not load $enc");
89 my $str = join('',map(chr($_),0x20..0x7E));
90 my $uni = $tab->decode($str);
91 my $cpy = $tab->encode($uni);
92 ok($cpy,$str,"$enc mangled translating to Unicode and back");
95 foreach my $enc (@encodings)
97 my $tab = Encode->getEncoding($enc);
98 foreach my $gk (0..$#greek)
100 my $uni = unpack 'U', $tab->decode(pack 'n', $greek{$enc}[$gk]);
101 ok($uni,$greek{'utf8'}[$gk],
102 "$enc mangled translating to Unicode GREEK $greek[$gk]");
103 my $cpy = unpack 'n',$tab->encode(pack 'U',$uni);
104 ok($cpy,$greek{$enc}[$gk],
105 "$enc mangled translating from Unicode GREEK $greek[$gk]");
109 foreach my $enc (@encodings)
111 my $tab = Encode->getEncoding($enc);
112 foreach my $id (0..$#ideodigit)
114 my $uni = unpack 'U',$tab->decode(pack 'H*', $ideodigit{$enc}[$id]);
115 ok($uni,hex($ideodigit{'utf8'}[$id]),
116 "$enc mangled translating to Unicode CJK IDEOGRAPH $ideodigit[$id]");
117 my $cpy = lc unpack 'H*', $tab->encode(pack 'U',$uni);
118 ok($cpy,$ideodigit{$enc}[$id],
119 "$enc mangled translating from Unicode CJK IDEOGRAPH $ideodigit[$id]");
127 return unpack('H*', pack 'n*', unpack 'U*',
128 decode $enc, pack 'H*', join '', @_);
134 return unpack('H*', encode $enc,
135 pack 'U*', unpack 'n*', pack 'H*', join '', @_);
138 foreach my $enc (sort keys %esc_str)
140 my $tab = Encode->getEncoding($enc);
141 ok(1,defined($tab),"Could not load $enc");
142 my %strings = %{ $esc_str{$enc} };
143 foreach my $estr (sort keys %strings)
145 my $ustr = to_unicode($enc, $estr);
146 ok($ustr, $strings{$estr},
147 "$enc mangled translating to Unicode");
148 ok(from_unicode($enc, $ustr), $estr,
149 "$enc mangled translating from Unicode");
151 ok(to_unicode($enc, keys %strings), join('', values %strings),
152 "$enc mangled translating to Unicode");
156 for my $ary (@ary_buff) {
159 for my $n ( int(BUFSIZ/2) .. 2*BUFSIZ+4 ){
160 my $dst = "a"x$n. $ary->[1] . TAIL;
161 my $src = "a"x$n. $ary->[2] . TAIL;
162 my $utf = buff_decode($enc, $src);
163 $NG++ unless $dst eq $utf;
165 ok($NG, 0, "$enc mangled translating to Unicode");
173 my $buff = $inconv.substr($str,0,BUFSIZ - length $inconv,'');
174 my $decoded = decode($enc, $buff, YES);