Upgrade to Encode 0.97, from Dan Kogai.
[p5sagit/p5-mst-13.2.git] / ext / Encode / t / Tcl.t
CommitLineData
71a18b0f 1BEGIN {
071db25d 2 if (! -d 'blib' and -d 't'){ chdir 't' };
ee981de6 3 unshift @INC, '../lib';
71a18b0f 4 require Config; import Config;
5 if ($Config{'extensions'} !~ /\bEncode\b/) {
6 print "1..0 # Skip: Encode was not built\n";
7 exit 0;
8 }
72f0eb71 9 if (ord("A") == 193) {
10 print "1..0 # Skip: EBCDIC\n";
11 exit 0;
12 }
71a18b0f 13}
14use Test;
15use Encode qw(encode decode);
16use Encode::Tcl;
17
38a64d23 18my @encodings = qw(euc-cn euc-kr big5 shiftjis); # CJK
71a18b0f 19my $n = 2;
20
21my %greek = (
22 'euc-cn' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
71a18b0f 23 'euc-kr' => [0xA5C1..0xA5D8,0xA5E1..0xA5F8],
24 'big5' => [0xA344..0xA35B,0xA35C..0xA373],
25 'shiftjis' => [0x839F..0x83B6,0x83BF..0x83D6],
26 'utf8' => [0x0391..0x03A1,0x03A3..0x03A9,0x03B1..0x03C1,0x03C3..0x03C9],
27);
28my @greek = qw(
29 ALPHA BETA GAMMA DELTA EPSILON ZETA ETA
30 THETA IOTA KAPPA LAMBDA MU NU XI OMICRON
31 PI RHO SIGMA TAU UPSILON PHI CHI PSI OMEGA
32 alpha beta gamma delta epsilon zeta eta
33 theta iota kappa lambda mu nu xi omicron
34 pi rho sigma tau upsilon phi chi psi omega
35);
36
37my %ideodigit = ( # cjk ideograph 'one' to 'ten'
38 'euc-cn' => [qw(d2bb b6fe c8fd cbc4 cee5 c1f9 c6df b0cb bec5 caae)],
71a18b0f 39 'euc-kr' => [qw(ece9 eca3 dfb2 decc e7e9 d7bf f6d2 f8a2 cefa e4a8)],
40 'big5' => [qw(a440 a447 a454 a57c a4ad a4bb a443 a44b a445 a451)],
41 'shiftjis' => [qw(88ea 93f1 8e4f 8e6c 8cdc 985a 8eb5 94aa 8be3 8f5c)],
42 'utf8' => [qw(4e00 4e8c 4e09 56db 4e94 516d 4e03 516b 4e5d 5341)],
43);
44my @ideodigit = qw(one two three four five six seven eight nine ten);
45
62756987 46my $kr = '2022-kr';
33f2f539 47my %esc_str;
48
33f2f539 49$esc_str{$kr} = {qw(
50 1b2429430e2a22213e0f410d0a
51 304200b10041000d000a
52 1b2429430e3021332a34593673383639593b673e46405a0f0d0a
53 ac00b098b2e4b77cb9c8bc14c0acc544c790000d000a
54 1b2429434142430d0a
55 004100420043000d000a
56)};
57
58my $num_esc = $n * keys(%esc_str);
59foreach (values %esc_str){ $num_esc += $n * keys %$_ }
60
96d6357c 61use constant BUFSIZ => 64; # for test
62use constant hiragana => "\x{3042}\x{3044}\x{3046}\x{3048}\x{304A}";
63use constant han_kana => "\x{FF71}\x{FF72}\x{FF73}\x{FF74}\x{FF75}";
64use constant macron => "\x{0100}\x{0112}\x{012a}\x{014c}\x{016a}";
65use constant TAIL => 'bbb';
66use constant YES => 1;
67
68my @ary_buff = ( # [ encoding, decoded, encoded ]
69# type-M
70 ["euc-cn", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
96d6357c 71 ["euc-kr", hiragana, "\xAA\xA2\xAA\xA4\xAA\xA6\xAA\xA8\xAA\xAA" ],
72 ["shiftjis", hiragana, "\x82\xA0\x82\xA2\x82\xA4\x82\xA6\x82\xA8" ],
73 ["shiftjis", han_kana, "\xB1\xB2\xB3\xB4\xB5" ],
74# type-E
75 ["2022-cn", hiragana, "\e\$)A\cN". '$"$$$&$($*' . "\cO" ],
96d6357c 76 ["2022-kr", hiragana, "\e\$)C\cN". '*"*$*&*(**' . "\cO" ],
96d6357c 77 ["2022-jp1", macron, "\e\$(D\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B"],
78 ["2022-jp2", "\x{C0}" . macron . "\x{C1}",
79 "\e\$(D\e.A\eN\x40\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B\eN\x41"],
96d6357c 80);
81
33f2f539 82plan test => $n*@encodings + $n*@encodings*@greek
fb4330c5 83 + $n*@encodings*@ideodigit + $num_esc + @ary_buff;
71a18b0f 84
85foreach my $enc (@encodings)
86 {
87 my $tab = Encode->getEncoding($enc);
88 ok(1,defined($tab),"Could not load $enc");
89 my $str = join('',map(chr($_),0x20..0x7E));
90 my $uni = $tab->decode($str);
91 my $cpy = $tab->encode($uni);
92 ok($cpy,$str,"$enc mangled translating to Unicode and back");
93 }
94
95foreach my $enc (@encodings)
96 {
97 my $tab = Encode->getEncoding($enc);
98 foreach my $gk (0..$#greek)
99 {
100 my $uni = unpack 'U', $tab->decode(pack 'n', $greek{$enc}[$gk]);
101 ok($uni,$greek{'utf8'}[$gk],
102 "$enc mangled translating to Unicode GREEK $greek[$gk]");
103 my $cpy = unpack 'n',$tab->encode(pack 'U',$uni);
104 ok($cpy,$greek{$enc}[$gk],
105 "$enc mangled translating from Unicode GREEK $greek[$gk]");
106 }
107 }
108
109foreach my $enc (@encodings)
110 {
111 my $tab = Encode->getEncoding($enc);
112 foreach my $id (0..$#ideodigit)
113 {
114 my $uni = unpack 'U',$tab->decode(pack 'H*', $ideodigit{$enc}[$id]);
115 ok($uni,hex($ideodigit{'utf8'}[$id]),
116 "$enc mangled translating to Unicode CJK IDEOGRAPH $ideodigit[$id]");
117 my $cpy = lc unpack 'H*', $tab->encode(pack 'U',$uni);
118 ok($cpy,$ideodigit{$enc}[$id],
119 "$enc mangled translating from Unicode CJK IDEOGRAPH $ideodigit[$id]");
120 }
121 }
122
33f2f539 123{
124 sub to_unicode
125 {
126 my $enc = shift;
127 return unpack('H*', pack 'n*', unpack 'U*',
128 decode $enc, pack 'H*', join '', @_);
129 }
130
131 sub from_unicode
132 {
133 my $enc = shift;
134 return unpack('H*', encode $enc,
135 pack 'U*', unpack 'n*', pack 'H*', join '', @_);
136 }
137
138 foreach my $enc (sort keys %esc_str)
139 {
140 my $tab = Encode->getEncoding($enc);
141 ok(1,defined($tab),"Could not load $enc");
142 my %strings = %{ $esc_str{$enc} };
143 foreach my $estr (sort keys %strings)
144 {
145 my $ustr = to_unicode($enc, $estr);
146 ok($ustr, $strings{$estr},
147 "$enc mangled translating to Unicode");
148 ok(from_unicode($enc, $ustr), $estr,
149 "$enc mangled translating from Unicode");
150 }
151 ok(to_unicode($enc, keys %strings), join('', values %strings),
152 "$enc mangled translating to Unicode");
153 }
154}
466d6cd3 155
96d6357c 156for my $ary (@ary_buff) {
157 my $NG = 0;
158 my $enc = $ary->[0];
159 for my $n ( int(BUFSIZ/2) .. 2*BUFSIZ+4 ){
160 my $dst = "a"x$n. $ary->[1] . TAIL;
161 my $src = "a"x$n. $ary->[2] . TAIL;
162 my $utf = buff_decode($enc, $src);
163 $NG++ unless $dst eq $utf;
164 }
165 ok($NG, 0, "$enc mangled translating to Unicode");
166}
167
168sub buff_decode {
169 my($enc, $str) = @_;
170 my $utf8 = '';
171 my $inconv = '';
172 while(length $str){
173 my $buff = $inconv.substr($str,0,BUFSIZ - length $inconv,'');
174 my $decoded = decode($enc, $buff, YES);
175 if(length $decoded){
176 $utf8 .= $decoded;
177 $inconv = $buff;
178 } else {
179 last; # malformed?
180 }
181 }
182 return $utf8;
183}
184