Commit | Line | Data |
c0d88b76 |
1 | package Encode::CN::HZ; |
2 | |
3 | use Encode::CN; |
4 | use Encode qw|encode decode|; |
5 | use base 'Encode::Encoding'; |
6 | |
7 | use strict; |
8 | |
9 | # HZ is but escaped GB, so we implement it with the |
10 | # GB2312(raw) encoding here. Cf. RFC 1842 & 1843. |
11 | |
12 | my $canon = 'hz'; |
13 | my $obj = bless {name => $canon}, __PACKAGE__; |
14 | $obj->Define($canon); |
15 | |
16 | sub decode |
17 | { |
18 | my ($obj,$str,$chk) = @_; |
19 | my $gb = Encode::find_encoding('gb2312'); |
20 | |
21 | $str =~ s{~(?:(~)|\n|{([^~]*)~}|)} |
22 | {$1 ? '~' : defined $2 ? $gb->decode($2, $chk) : ''}eg; |
23 | |
24 | return $str; |
25 | } |
26 | |
27 | sub encode |
28 | { |
29 | my ($obj,$str,$chk) = @_; |
30 | my $gb = Encode::find_encoding('gb2312'); |
31 | |
32 | $str =~ s/~/~~/g; |
33 | $str =~ s/((?: |
34 | \p{InCJKCompatibility}| |
35 | \p{InCJKCompatibilityForms}| |
36 | \p{InCJKCompatibilityIdeographs}| |
37 | \p{InCJKCompatibilityIdeographsSupplement}| |
38 | \p{InCJKRadicalsSupplement}| |
39 | \p{InCJKSymbolsAndPunctuation}| |
40 | \p{InCJKUnifiedIdeographsExtensionA}| |
41 | \p{InCJKUnifiedIdeographs}| |
42 | \p{InCJKUnifiedIdeographsExtensionB}| |
43 | \p{InEnclosedCJKLettersAndMonths} |
44 | )+)/'~{'.$gb->encode($1, $chk).'~}'/egx; |
45 | |
46 | return $str; |
47 | } |
48 | |
49 | 1; |
50 | __END__ |