Commit | Line | Data |
c0d88b76 |
1 | package Encode::CN::HZ; |
2 | |
00a464f7 |
3 | use strict; |
00a464f7 |
4 | |
eb042f38 |
5 | use vars qw($VERSION); |
0ab8f81e |
6 | $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; |
eb042f38 |
7 | |
67d7b5ef |
8 | use Encode (); |
c0d88b76 |
9 | use Encode::CN; |
c0d88b76 |
10 | use base 'Encode::Encoding'; |
11 | |
0ab8f81e |
12 | # HZ is only escaped GB, so we implement it with the |
13 | # GB2312(raw) encoding here. Cf. RFCs 1842 & 1843. |
c0d88b76 |
14 | |
15 | my $canon = 'hz'; |
16 | my $obj = bless {name => $canon}, __PACKAGE__; |
17 | $obj->Define($canon); |
18 | |
0ab8f81e |
19 | sub needs_lines { 1 } |
20 | |
21 | sub perlio_ok { |
22 | # exists $INC{"PerlIO/encoding.pm"} or return 0; |
23 | # PerlIO::encoding->VERSION >= 0.03 and return 1; |
24 | return 0; # for the time being |
25 | } |
26 | |
c0d88b76 |
27 | sub decode |
28 | { |
29 | my ($obj,$str,$chk) = @_; |
67d7b5ef |
30 | my $gb = Encode::find_encoding('gb2312-raw'); |
c0d88b76 |
31 | |
00a464f7 |
32 | $str =~ s{~ # starting tilde |
33 | (?: |
34 | (~) # another tilde - escaped (set $1) |
35 | | # or |
36 | \n # \n - output nothing |
37 | | # or |
38 | \{ # opening brace of GB data |
39 | ( # set $2 to any number of... |
40 | (?: |
41 | [^~] # non-tilde GB character |
42 | | # or |
43 | ~(?!\}) # tilde not followed by a closing brace |
44 | )* |
45 | ) |
46 | ~\} # closing brace of GB data |
47 | | # XXX: invalid escape - maybe die on $chk? |
48 | ) |
49 | }{ |
50 | (defined $1) ? '~' # two tildes make one tilde |
51 | : |
52 | (defined $2) ? $gb->decode($2, $chk) # decode the characters |
53 | : |
67d7b5ef |
54 | '' # ~\n and invalid escape = '' |
00a464f7 |
55 | }egx; |
c0d88b76 |
56 | |
57 | return $str; |
58 | } |
59 | |
60 | sub encode |
61 | { |
62 | my ($obj,$str,$chk) = @_; |
00a464f7 |
63 | my ($out, $in_gb); |
67d7b5ef |
64 | my $gb = Encode::find_encoding('gb2312-raw'); |
c0d88b76 |
65 | |
66 | $str =~ s/~/~~/g; |
c0d88b76 |
67 | |
67d7b5ef |
68 | # XXX: Since CHECK and partial decoding has not been implemented yet, |
00a464f7 |
69 | # we'll use a very crude way to test for GB2312ness. |
70 | |
71 | for my $index (0 .. length($str) - 1) { |
72 | no warnings 'utf8'; |
73 | |
74 | my $char = substr($str, $index, 1); |
b2704119 |
75 | # try to encode this character |
76 | # with CHECK on so it stops at proper place. |
77 | # also note that the assignement was braced in eval |
78 | # -- dankogai |
79 | my $try; |
80 | eval{ $try = $gb->encode($char, 1) }; |
81 | |
67d7b5ef |
82 | if (defined($try)) { # is a GB character: |
00a464f7 |
83 | if ($in_gb) { |
67d7b5ef |
84 | $out .= $try; # in GB mode - just append it |
00a464f7 |
85 | } |
86 | else { |
67d7b5ef |
87 | $in_gb = 1; # enter GB mode, then append it |
88 | $out .= "~{$try"; |
00a464f7 |
89 | } |
67d7b5ef |
90 | } # not a GB character: |
00a464f7 |
91 | elsif ($in_gb) { |
67d7b5ef |
92 | $in_gb = 0; # leave GB mode, then append it |
93 | $out .= "~}$char"; |
00a464f7 |
94 | } |
95 | else { |
67d7b5ef |
96 | $out .= $char; # not in GB mode - just append it |
00a464f7 |
97 | } |
98 | } |
99 | |
67d7b5ef |
100 | $out .= '~}' if $in_gb; # add closing brace if needed |
00a464f7 |
101 | |
102 | return $out; |
c0d88b76 |
103 | } |
104 | |
105 | 1; |
106 | __END__ |
67d7b5ef |
107 | |
108 | |
109 | =head1 NAME |
110 | |
111 | Encode::CN::HZ -- internally used by Encode::CN |
112 | |
113 | =cut |