Commit | Line | Data |
c0d88b76 |
1 | package Encode::CN::HZ; |
2 | |
00a464f7 |
3 | use strict; |
4 | no warnings 'redefine'; # to quell the "use Encode" below |
5 | |
c0d88b76 |
6 | use Encode::CN; |
7 | use Encode qw|encode decode|; |
8 | use base 'Encode::Encoding'; |
9 | |
c0d88b76 |
10 | # HZ is but escaped GB, so we implement it with the |
11 | # GB2312(raw) encoding here. Cf. RFC 1842 & 1843. |
12 | |
13 | my $canon = 'hz'; |
14 | my $obj = bless {name => $canon}, __PACKAGE__; |
15 | $obj->Define($canon); |
16 | |
17 | sub decode |
18 | { |
19 | my ($obj,$str,$chk) = @_; |
20 | my $gb = Encode::find_encoding('gb2312'); |
21 | |
00a464f7 |
22 | $str =~ s{~ # starting tilde |
23 | (?: |
24 | (~) # another tilde - escaped (set $1) |
25 | | # or |
26 | \n # \n - output nothing |
27 | | # or |
28 | \{ # opening brace of GB data |
29 | ( # set $2 to any number of... |
30 | (?: |
31 | [^~] # non-tilde GB character |
32 | | # or |
33 | ~(?!\}) # tilde not followed by a closing brace |
34 | )* |
35 | ) |
36 | ~\} # closing brace of GB data |
37 | | # XXX: invalid escape - maybe die on $chk? |
38 | ) |
39 | }{ |
40 | (defined $1) ? '~' # two tildes make one tilde |
41 | : |
42 | (defined $2) ? $gb->decode($2, $chk) # decode the characters |
43 | : |
44 | '' # '' on ~\n and invalid escape |
45 | }egx; |
c0d88b76 |
46 | |
47 | return $str; |
48 | } |
49 | |
50 | sub encode |
51 | { |
52 | my ($obj,$str,$chk) = @_; |
00a464f7 |
53 | my ($out, $in_gb); |
c0d88b76 |
54 | my $gb = Encode::find_encoding('gb2312'); |
55 | |
56 | $str =~ s/~/~~/g; |
c0d88b76 |
57 | |
00a464f7 |
58 | # XXX: Since CHECK and partial decoding has not been implemented yet, |
59 | # we'll use a very crude way to test for GB2312ness. |
60 | |
61 | for my $index (0 .. length($str) - 1) { |
62 | no warnings 'utf8'; |
63 | |
64 | my $char = substr($str, $index, 1); |
65 | my $try = $gb->encode($char); # try encode this char |
66 | |
67 | if (defined($try)) { # is a GB character |
68 | if ($in_gb) { |
69 | $out .= $try; # in GB mode - just append it |
70 | } |
71 | else { |
72 | $out .= "~{$try"; # enter GB mode, then append it |
73 | $in_gb = 1; |
74 | } |
75 | } |
76 | elsif ($in_gb) { |
77 | $out .= "~}$char"; # leave GB mode, then append it |
78 | $in_gb = 0; |
79 | } |
80 | else { |
81 | $out .= $char; # not in GB mode - just append it |
82 | } |
83 | } |
84 | |
85 | $out .= '~}' if $in_gb; # add closing brace as needed |
86 | |
87 | return $out; |
c0d88b76 |
88 | } |
89 | |
90 | 1; |
91 | __END__ |