Commit | Line | Data |
00a464f7 |
1 | BEGIN { |
071db25d |
2 | if (! -d 'blib' and -d 't'){ chdir 't' }; |
ee981de6 |
3 | unshift @INC, '../lib'; |
00a464f7 |
4 | require Config; import Config; |
5 | if ($Config{'extensions'} !~ /\bEncode\b/) { |
6 | print "1..0 # Skip: Encode was not built\n"; |
7 | exit 0; |
8 | } |
9 | unless (find PerlIO::Layer 'perlio') { |
10 | print "1..0 # Skip: PerlIO was not built\n"; |
11 | exit 0; |
12 | } |
13 | if (ord("A") == 193) { |
14 | print "1..0 # Skip: EBCDIC\n"; |
15 | exit 0; |
16 | } |
17 | $| = 1; |
18 | } |
19 | |
20 | use strict; |
21 | use Test::More tests => 29; |
22 | use Encode; |
23 | |
24 | use_ok('Encode::CN'); |
25 | |
26 | # Since JP.t already test basic file IO, we will just focus on |
27 | # internal encode / decode test here. Unfortunately, to test |
28 | # against all the UniHan characters will take a huge disk space, |
29 | # not to mention the time it will take, and the fact that Perl |
30 | # did not bundle UniHan.txt anyway. |
31 | |
32 | # So, here we just test a typical snippet spanning multiple Unicode |
33 | # blocks, and hope it can point out obvious errors. |
34 | |
35 | run_tests('Simplified Chinese only', { |
36 | 'utf' => ( |
37 | 12298.26131.32463.12299.31532.19968.21350. |
38 | 24406.26352.65306. |
39 | 22823.21705.20094.20803.65292.19975.29289.36164.22987.65292. |
40 | 20035.32479.22825.12290. |
41 | 20113.34892.38632.26045.65292.21697.29289.27969.24418.12290. |
42 | 22823.26126.22987.32456.65292.20845.20301.26102.25104.65292. |
43 | 26102.20056.20845.40857.20197.24481.22825.12290. |
44 | 20094.36947.21464.21270.65292.21508.27491.24615.21629.65292. |
45 | 20445.21512.22823.21644.65292.20035.21033.36126.12290. |
46 | 39318.20986.24246.29289.65292.19975.22269.21688.23425.12290 |
47 | ), |
48 | |
49 | 'euc-cn' => join('', |
50 | '¡¶Ò×¾¡·µÚÒ»ØÔ', |
51 | 'åèÔ»£º', |
52 | '´óÔÕǬԪ£¬ÍòÎï×Êʼ£¬', |
53 | 'ÄËͳÌì¡£', |
54 | 'ÔÆÐÐÓêÊ©£¬Æ·ÎïÁ÷ÐΡ£', |
55 | '´óÃ÷ʼÖÕ£¬Áùλʱ³É£¬', |
56 | 'ʱ³ËÁùÁúÒÔÓùÌì¡£', |
57 | 'ǬµÀ±ä»¯£¬¸÷ÕýÐÔÃü£¬', |
58 | '±£ºÏ´óºÍ£¬ÄËÀûÕê¡£', |
59 | 'Ê׳öÊüÎÍò¹úÏÌÄþ¡£', |
60 | ), |
61 | |
62 | 'gb2312' => join('', |
63 | '!6RW>-!75ZR;XT', |
64 | 'ehT;#:', |
65 | '4sTUG,T*#,MrNoWJJ<#,', |
66 | 'DKM3Ll!#', |
67 | 'TFPPSjJ)#,F7NoAwPN!#', |
68 | '4sCwJ<VU#,AyN;J13I#,', |
69 | 'J13KAyAzRTSyLl!#', |
70 | 'G,5@1d;/#,8wU}PTC|#,', |
71 | '1#:O4s:M#,DK@{Uj!#', |
72 | 'JW3vJ|No#,Mr9zOLD~!#' |
73 | ), |
74 | |
75 | 'iso-ir-165'=> join('', |
76 | '!6RW>-!75ZR;XT', |
77 | 'ehT;#:', |
78 | '4sTUG,T*#,MrNoWJJ<#,', |
79 | 'DKM3Ll!#', |
80 | 'TFPPSjJ)#,F7NoAwPN!#', |
81 | '4sCwJ<VU#,AyN;J13I#,', |
82 | 'J13KAyAzRTSyLl!#', |
83 | 'G,5@1d;/#,8wU}PTC|#,', |
84 | '1#:O4s:M#,DK@{Uj!#', |
85 | 'JW3vJ|No#,Mr9zOLD~!#' |
86 | ), |
87 | }); |
88 | |
89 | run_tests('Simplified Chinese + ASCII', { |
90 | 'utf' => ( |
91 | 35937.26352.65306.10. |
92 | 22825.34892.20581.65292.21531.23376.20197.33258.24378.19981.24687.12290.10. |
93 | 28508.40857.21247.29992.65292.38451.22312.19979.20063.12290.32. |
94 | 35265.40857.22312.30000.65292.24503.26045.26222.20063.12290.32. |
95 | 32456.26085.20094.20094.65292.21453.22797.36947.20063.12290.10. |
96 | 25110.36291.22312.28170.65292.36827.26080.21646.20063.12290.39134. |
97 | 40857.22312.22825.65292.22823.20154.36896.20063.12290.32. |
98 | 20130.40857.26377.24724.65292.30408.19981.21487.20037.20063.12290.10. |
99 | 29992.20061.65292.22825.24503.19981.21487.20026.39318.20063.12290 |
100 | ), |
101 | |
102 | 'cp936' => join(chr(10), |
103 | 'ÏóÔ»£º', |
104 | 'ÌìÐн¡£¬¾ý×ÓÒÔ×ÔÇ¿²»Ï¢¡£', |
105 | 'DZÁúÎðÓã¬ÑôÔÚÏÂÒ²¡£ ¼ûÁúÔÚÌµÂÊ©ÆÕÒ²¡£ ÖÕÈÕǬǬ£¬·´¸´µÀÒ²¡£', |
106 | '»òÔ¾ÔÚÔ¨£¬½øÎÞ¾ÌÒ²¡£·ÉÁúÔÚÌ죬´óÈËÔìÒ²¡£ ¿ºÁúÓлڣ¬Ó¯²»¿É¾ÃÒ²¡£', |
107 | 'Óþţ¬ÌìµÂ²»¿ÉΪÊ×Ò²¡£', |
108 | ), |
109 | |
110 | 'hz' => join(chr(10), |
111 | '~{OsT;#:~}', |
112 | '~{LlPP=!#,>}WSRTWTG?2;O"!#~}', |
113 | '~{G1AzNpSC#,QtTZOBR2!#~} ~{<{AzTZLo#,5BJ)FUR2!#~} ~{VUHUG,G,#,74845@R2!#~}', |
114 | '~{;rT>TZT(#,=xN^>LR2!#7IAzTZLl#,4sHKTlR2!#~} ~{?:AzSP;Z#,S/2;?I>CR2!#~}', |
115 | '~{SC>E#,Ll5B2;?IN*JWR2!#~}', |
116 | ), |
117 | }); |
118 | |
119 | run_tests('Traditional Chinese', { |
120 | 'utf', => 20094.65306.20803.12289.20136.12289.21033.12289.35998, |
121 | 'gb12345' => 'G,#:T*!":`!"@{!"Uj', |
122 | 'gbk' => 'Ǭ£ºÔª¡¢ºà¡¢Àû¡¢Ø', |
123 | }); |
124 | |
125 | sub run_tests { |
126 | my ($title, $tests) = @_; |
127 | my $utf = delete $tests->{'utf'}; |
128 | |
129 | # $enc = encoding, $str = content |
130 | foreach my $enc (sort keys %{$tests}) { |
131 | my $str = $tests->{$enc}; |
132 | |
133 | is(Encode::decode($enc, $str), $utf, "[$enc] decode - $title"); |
134 | is(Encode::encode($enc, $utf), $str, "[$enc] encode - $title"); |
135 | |
136 | my $str2 = $str; |
137 | my $utf8 = Encode::encode('utf-8', $utf); |
138 | |
139 | Encode::from_to($str2, $enc, 'utf-8'); |
140 | is($str2, $utf8, "[$enc] from_to => utf8 - $title"); |
141 | |
142 | Encode::from_to($utf8, 'utf-8', $enc); # convert $utf8 as $enc |
143 | is($utf8, $str, "[$enc] utf8 => from_to - $title"); |
144 | } |
145 | } |