small corrections
[p5sagit/p5-mst-13.2.git] / ext / Encode / lib / Encode / CN / HZ.pm
CommitLineData
c0d88b76 1package Encode::CN::HZ;
2
00a464f7 3use strict;
00a464f7 4
eb042f38 5use vars qw($VERSION);
0ab8f81e 6$VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
eb042f38 7
67d7b5ef 8use Encode ();
c0d88b76 9use Encode::CN;
c0d88b76 10use base 'Encode::Encoding';
11
0ab8f81e 12# HZ is only escaped GB, so we implement it with the
13# GB2312(raw) encoding here. Cf. RFCs 1842 & 1843.
c0d88b76 14
15my $canon = 'hz';
16my $obj = bless {name => $canon}, __PACKAGE__;
17$obj->Define($canon);
18
0ab8f81e 19sub needs_lines { 1 }
20
21sub perlio_ok {
22 # exists $INC{"PerlIO/encoding.pm"} or return 0;
23 # PerlIO::encoding->VERSION >= 0.03 and return 1;
24 return 0; # for the time being
25}
26
c0d88b76 27sub decode
28{
29 my ($obj,$str,$chk) = @_;
67d7b5ef 30 my $gb = Encode::find_encoding('gb2312-raw');
c0d88b76 31
00a464f7 32 $str =~ s{~ # starting tilde
33 (?:
34 (~) # another tilde - escaped (set $1)
35 | # or
36 \n # \n - output nothing
37 | # or
38 \{ # opening brace of GB data
39 ( # set $2 to any number of...
40 (?:
41 [^~] # non-tilde GB character
42 | # or
43 ~(?!\}) # tilde not followed by a closing brace
44 )*
45 )
46 ~\} # closing brace of GB data
47 | # XXX: invalid escape - maybe die on $chk?
48 )
49 }{
50 (defined $1) ? '~' # two tildes make one tilde
51 :
52 (defined $2) ? $gb->decode($2, $chk) # decode the characters
53 :
67d7b5ef 54 '' # ~\n and invalid escape = ''
00a464f7 55 }egx;
c0d88b76 56
57 return $str;
58}
59
60sub encode
61{
62 my ($obj,$str,$chk) = @_;
00a464f7 63 my ($out, $in_gb);
67d7b5ef 64 my $gb = Encode::find_encoding('gb2312-raw');
c0d88b76 65
66 $str =~ s/~/~~/g;
c0d88b76 67
67d7b5ef 68 # XXX: Since CHECK and partial decoding has not been implemented yet,
00a464f7 69 # we'll use a very crude way to test for GB2312ness.
70
71 for my $index (0 .. length($str) - 1) {
72 no warnings 'utf8';
73
74 my $char = substr($str, $index, 1);
b2704119 75 # try to encode this character
76 # with CHECK on so it stops at proper place.
77 # also note that the assignement was braced in eval
78 # -- dankogai
79 my $try;
80 eval{ $try = $gb->encode($char, 1) };
81
67d7b5ef 82 if (defined($try)) { # is a GB character:
00a464f7 83 if ($in_gb) {
67d7b5ef 84 $out .= $try; # in GB mode - just append it
00a464f7 85 }
86 else {
67d7b5ef 87 $in_gb = 1; # enter GB mode, then append it
88 $out .= "~{$try";
00a464f7 89 }
67d7b5ef 90 } # not a GB character:
00a464f7 91 elsif ($in_gb) {
67d7b5ef 92 $in_gb = 0; # leave GB mode, then append it
93 $out .= "~}$char";
00a464f7 94 }
95 else {
67d7b5ef 96 $out .= $char; # not in GB mode - just append it
00a464f7 97 }
98 }
99
67d7b5ef 100 $out .= '~}' if $in_gb; # add closing brace if needed
00a464f7 101
102 return $out;
c0d88b76 103}
104
1051;
106__END__
67d7b5ef 107
108
109=head1 NAME
110
111Encode::CN::HZ -- internally used by Encode::CN
112
113=cut