Commit | Line | Data |
71a18b0f |
1 | BEGIN { |
071db25d |
2 | if (! -d 'blib' and -d 't'){ chdir 't' }; |
ee981de6 |
3 | unshift @INC, '../lib'; |
71a18b0f |
4 | require Config; import Config; |
5 | if ($Config{'extensions'} !~ /\bEncode\b/) { |
6 | print "1..0 # Skip: Encode was not built\n"; |
7 | exit 0; |
8 | } |
72f0eb71 |
9 | if (ord("A") == 193) { |
10 | print "1..0 # Skip: EBCDIC\n"; |
11 | exit 0; |
12 | } |
71a18b0f |
13 | } |
14 | use Test; |
15 | use Encode qw(encode decode); |
16 | use Encode::Tcl; |
17 | |
38a64d23 |
18 | my @encodings = qw(euc-cn euc-kr big5 shiftjis); # CJK |
71a18b0f |
19 | my $n = 2; |
20 | |
21 | my %greek = ( |
22 | 'euc-cn' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8], |
71a18b0f |
23 | 'euc-kr' => [0xA5C1..0xA5D8,0xA5E1..0xA5F8], |
24 | 'big5' => [0xA344..0xA35B,0xA35C..0xA373], |
25 | 'shiftjis' => [0x839F..0x83B6,0x83BF..0x83D6], |
26 | 'utf8' => [0x0391..0x03A1,0x03A3..0x03A9,0x03B1..0x03C1,0x03C3..0x03C9], |
27 | ); |
28 | my @greek = qw( |
29 | ALPHA BETA GAMMA DELTA EPSILON ZETA ETA |
30 | THETA IOTA KAPPA LAMBDA MU NU XI OMICRON |
31 | PI RHO SIGMA TAU UPSILON PHI CHI PSI OMEGA |
32 | alpha beta gamma delta epsilon zeta eta |
33 | theta iota kappa lambda mu nu xi omicron |
34 | pi rho sigma tau upsilon phi chi psi omega |
35 | ); |
36 | |
37 | my %ideodigit = ( # cjk ideograph 'one' to 'ten' |
38 | 'euc-cn' => [qw(d2bb b6fe c8fd cbc4 cee5 c1f9 c6df b0cb bec5 caae)], |
71a18b0f |
39 | 'euc-kr' => [qw(ece9 eca3 dfb2 decc e7e9 d7bf f6d2 f8a2 cefa e4a8)], |
40 | 'big5' => [qw(a440 a447 a454 a57c a4ad a4bb a443 a44b a445 a451)], |
41 | 'shiftjis' => [qw(88ea 93f1 8e4f 8e6c 8cdc 985a 8eb5 94aa 8be3 8f5c)], |
42 | 'utf8' => [qw(4e00 4e8c 4e09 56db 4e94 516d 4e03 516b 4e5d 5341)], |
43 | ); |
44 | my @ideodigit = qw(one two three four five six seven eight nine ten); |
45 | |
62756987 |
46 | my $kr = '2022-kr'; |
33f2f539 |
47 | my %esc_str; |
48 | |
33f2f539 |
49 | $esc_str{$kr} = {qw( |
50 | 1b2429430e2a22213e0f410d0a |
51 | 304200b10041000d000a |
52 | 1b2429430e3021332a34593673383639593b673e46405a0f0d0a |
53 | ac00b098b2e4b77cb9c8bc14c0acc544c790000d000a |
54 | 1b2429434142430d0a |
55 | 004100420043000d000a |
56 | )}; |
57 | |
58 | my $num_esc = $n * keys(%esc_str); |
59 | foreach (values %esc_str){ $num_esc += $n * keys %$_ } |
60 | |
96d6357c |
61 | use constant BUFSIZ => 64; # for test |
62 | use constant hiragana => "\x{3042}\x{3044}\x{3046}\x{3048}\x{304A}"; |
63 | use constant han_kana => "\x{FF71}\x{FF72}\x{FF73}\x{FF74}\x{FF75}"; |
64 | use constant macron => "\x{0100}\x{0112}\x{012a}\x{014c}\x{016a}"; |
65 | use constant TAIL => 'bbb'; |
66 | use constant YES => 1; |
67 | |
68 | my @ary_buff = ( # [ encoding, decoded, encoded ] |
69 | # type-M |
70 | ["euc-cn", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ], |
96d6357c |
71 | ["euc-kr", hiragana, "\xAA\xA2\xAA\xA4\xAA\xA6\xAA\xA8\xAA\xAA" ], |
72 | ["shiftjis", hiragana, "\x82\xA0\x82\xA2\x82\xA4\x82\xA6\x82\xA8" ], |
73 | ["shiftjis", han_kana, "\xB1\xB2\xB3\xB4\xB5" ], |
74 | # type-E |
75 | ["2022-cn", hiragana, "\e\$)A\cN". '$"$$$&$($*' . "\cO" ], |
96d6357c |
76 | ["2022-kr", hiragana, "\e\$)C\cN". '*"*$*&*(**' . "\cO" ], |
96d6357c |
77 | ["2022-jp1", macron, "\e\$(D\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B"], |
78 | ["2022-jp2", "\x{C0}" . macron . "\x{C1}", |
79 | "\e\$(D\e.A\eN\x40\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B\eN\x41"], |
96d6357c |
80 | ); |
81 | |
33f2f539 |
82 | plan test => $n*@encodings + $n*@encodings*@greek |
fb4330c5 |
83 | + $n*@encodings*@ideodigit + $num_esc + @ary_buff; |
71a18b0f |
84 | |
85 | foreach my $enc (@encodings) |
86 | { |
87 | my $tab = Encode->getEncoding($enc); |
88 | ok(1,defined($tab),"Could not load $enc"); |
89 | my $str = join('',map(chr($_),0x20..0x7E)); |
90 | my $uni = $tab->decode($str); |
91 | my $cpy = $tab->encode($uni); |
92 | ok($cpy,$str,"$enc mangled translating to Unicode and back"); |
93 | } |
94 | |
95 | foreach my $enc (@encodings) |
96 | { |
97 | my $tab = Encode->getEncoding($enc); |
98 | foreach my $gk (0..$#greek) |
99 | { |
100 | my $uni = unpack 'U', $tab->decode(pack 'n', $greek{$enc}[$gk]); |
101 | ok($uni,$greek{'utf8'}[$gk], |
102 | "$enc mangled translating to Unicode GREEK $greek[$gk]"); |
103 | my $cpy = unpack 'n',$tab->encode(pack 'U',$uni); |
104 | ok($cpy,$greek{$enc}[$gk], |
105 | "$enc mangled translating from Unicode GREEK $greek[$gk]"); |
106 | } |
107 | } |
108 | |
109 | foreach my $enc (@encodings) |
110 | { |
111 | my $tab = Encode->getEncoding($enc); |
112 | foreach my $id (0..$#ideodigit) |
113 | { |
114 | my $uni = unpack 'U',$tab->decode(pack 'H*', $ideodigit{$enc}[$id]); |
115 | ok($uni,hex($ideodigit{'utf8'}[$id]), |
116 | "$enc mangled translating to Unicode CJK IDEOGRAPH $ideodigit[$id]"); |
117 | my $cpy = lc unpack 'H*', $tab->encode(pack 'U',$uni); |
118 | ok($cpy,$ideodigit{$enc}[$id], |
119 | "$enc mangled translating from Unicode CJK IDEOGRAPH $ideodigit[$id]"); |
120 | } |
121 | } |
122 | |
33f2f539 |
123 | { |
124 | sub to_unicode |
125 | { |
126 | my $enc = shift; |
127 | return unpack('H*', pack 'n*', unpack 'U*', |
128 | decode $enc, pack 'H*', join '', @_); |
129 | } |
130 | |
131 | sub from_unicode |
132 | { |
133 | my $enc = shift; |
134 | return unpack('H*', encode $enc, |
135 | pack 'U*', unpack 'n*', pack 'H*', join '', @_); |
136 | } |
137 | |
138 | foreach my $enc (sort keys %esc_str) |
139 | { |
140 | my $tab = Encode->getEncoding($enc); |
141 | ok(1,defined($tab),"Could not load $enc"); |
142 | my %strings = %{ $esc_str{$enc} }; |
143 | foreach my $estr (sort keys %strings) |
144 | { |
145 | my $ustr = to_unicode($enc, $estr); |
146 | ok($ustr, $strings{$estr}, |
147 | "$enc mangled translating to Unicode"); |
148 | ok(from_unicode($enc, $ustr), $estr, |
149 | "$enc mangled translating from Unicode"); |
150 | } |
151 | ok(to_unicode($enc, keys %strings), join('', values %strings), |
152 | "$enc mangled translating to Unicode"); |
153 | } |
154 | } |
466d6cd3 |
155 | |
96d6357c |
156 | for my $ary (@ary_buff) { |
157 | my $NG = 0; |
158 | my $enc = $ary->[0]; |
159 | for my $n ( int(BUFSIZ/2) .. 2*BUFSIZ+4 ){ |
160 | my $dst = "a"x$n. $ary->[1] . TAIL; |
161 | my $src = "a"x$n. $ary->[2] . TAIL; |
162 | my $utf = buff_decode($enc, $src); |
163 | $NG++ unless $dst eq $utf; |
164 | } |
165 | ok($NG, 0, "$enc mangled translating to Unicode"); |
166 | } |
167 | |
168 | sub buff_decode { |
169 | my($enc, $str) = @_; |
170 | my $utf8 = ''; |
171 | my $inconv = ''; |
172 | while(length $str){ |
173 | my $buff = $inconv.substr($str,0,BUFSIZ - length $inconv,''); |
174 | my $decoded = decode($enc, $buff, YES); |
175 | if(length $decoded){ |
176 | $utf8 .= $decoded; |
177 | $inconv = $buff; |
178 | } else { |
179 | last; # malformed? |
180 | } |
181 | } |
182 | return $utf8; |
183 | } |
184 | |