perl datastructure conversion shortcuts for DDD
[p5sagit/p5-mst-13.2.git] / ext / Encode / lib / Encode / CN / HZ.pm
CommitLineData
c0d88b76 1package Encode::CN::HZ;
2
00a464f7 3use strict;
4no warnings 'redefine'; # to quell the "use Encode" below
5
c0d88b76 6use Encode::CN;
7use Encode qw|encode decode|;
8use base 'Encode::Encoding';
9
c0d88b76 10# HZ is but escaped GB, so we implement it with the
11# GB2312(raw) encoding here. Cf. RFC 1842 & 1843.
12
13my $canon = 'hz';
14my $obj = bless {name => $canon}, __PACKAGE__;
15$obj->Define($canon);
16
17sub decode
18{
19 my ($obj,$str,$chk) = @_;
20 my $gb = Encode::find_encoding('gb2312');
21
00a464f7 22 $str =~ s{~ # starting tilde
23 (?:
24 (~) # another tilde - escaped (set $1)
25 | # or
26 \n # \n - output nothing
27 | # or
28 \{ # opening brace of GB data
29 ( # set $2 to any number of...
30 (?:
31 [^~] # non-tilde GB character
32 | # or
33 ~(?!\}) # tilde not followed by a closing brace
34 )*
35 )
36 ~\} # closing brace of GB data
37 | # XXX: invalid escape - maybe die on $chk?
38 )
39 }{
40 (defined $1) ? '~' # two tildes make one tilde
41 :
42 (defined $2) ? $gb->decode($2, $chk) # decode the characters
43 :
44 '' # '' on ~\n and invalid escape
45 }egx;
c0d88b76 46
47 return $str;
48}
49
50sub encode
51{
52 my ($obj,$str,$chk) = @_;
00a464f7 53 my ($out, $in_gb);
c0d88b76 54 my $gb = Encode::find_encoding('gb2312');
55
56 $str =~ s/~/~~/g;
c0d88b76 57
00a464f7 58 # XXX: Since CHECK and partial decoding has not been implemented yet,
59 # we'll use a very crude way to test for GB2312ness.
60
61 for my $index (0 .. length($str) - 1) {
62 no warnings 'utf8';
63
64 my $char = substr($str, $index, 1);
65 my $try = $gb->encode($char); # try encode this char
66
67 if (defined($try)) { # is a GB character
68 if ($in_gb) {
69 $out .= $try; # in GB mode - just append it
70 }
71 else {
72 $out .= "~{$try"; # enter GB mode, then append it
73 $in_gb = 1;
74 }
75 }
76 elsif ($in_gb) {
77 $out .= "~}$char"; # leave GB mode, then append it
78 $in_gb = 0;
79 }
80 else {
81 $out .= $char; # not in GB mode - just append it
82 }
83 }
84
85 $out .= '~}' if $in_gb; # add closing brace as needed
86
87 return $out;
c0d88b76 88}
89
901;
91__END__