Integrate mainline (Win2k/MinGW all ok except threads/t/end.t)
[p5sagit/p5-mst-13.2.git] / ext / Encode / lib / Encode / CN / HZ.pm
CommitLineData
c0d88b76 1package Encode::CN::HZ;
2
00a464f7 3use strict;
4no warnings 'redefine'; # to quell the "use Encode" below
5
eb042f38 6use vars qw($VERSION);
d6b7ef86 7$VERSION = do { my @r = (q$Revision: 0.92 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
eb042f38 8
c0d88b76 9use Encode::CN;
10use Encode qw|encode decode|;
11use base 'Encode::Encoding';
12
c0d88b76 13# HZ is but escaped GB, so we implement it with the
14# GB2312(raw) encoding here. Cf. RFC 1842 & 1843.
15
16my $canon = 'hz';
17my $obj = bless {name => $canon}, __PACKAGE__;
18$obj->Define($canon);
19
20sub decode
21{
22 my ($obj,$str,$chk) = @_;
23 my $gb = Encode::find_encoding('gb2312');
24
00a464f7 25 $str =~ s{~ # starting tilde
26 (?:
27 (~) # another tilde - escaped (set $1)
28 | # or
29 \n # \n - output nothing
30 | # or
31 \{ # opening brace of GB data
32 ( # set $2 to any number of...
33 (?:
34 [^~] # non-tilde GB character
35 | # or
36 ~(?!\}) # tilde not followed by a closing brace
37 )*
38 )
39 ~\} # closing brace of GB data
40 | # XXX: invalid escape - maybe die on $chk?
41 )
42 }{
43 (defined $1) ? '~' # two tildes make one tilde
44 :
45 (defined $2) ? $gb->decode($2, $chk) # decode the characters
46 :
47 '' # '' on ~\n and invalid escape
48 }egx;
c0d88b76 49
50 return $str;
51}
52
53sub encode
54{
55 my ($obj,$str,$chk) = @_;
00a464f7 56 my ($out, $in_gb);
c0d88b76 57 my $gb = Encode::find_encoding('gb2312');
58
59 $str =~ s/~/~~/g;
c0d88b76 60
00a464f7 61 # XXX: Since CHECK and partial decoding has not been implemented yet,
62 # we'll use a very crude way to test for GB2312ness.
63
64 for my $index (0 .. length($str) - 1) {
65 no warnings 'utf8';
66
67 my $char = substr($str, $index, 1);
68 my $try = $gb->encode($char); # try encode this char
69
70 if (defined($try)) { # is a GB character
71 if ($in_gb) {
72 $out .= $try; # in GB mode - just append it
73 }
74 else {
75 $out .= "~{$try"; # enter GB mode, then append it
76 $in_gb = 1;
77 }
78 }
79 elsif ($in_gb) {
80 $out .= "~}$char"; # leave GB mode, then append it
81 $in_gb = 0;
82 }
83 else {
84 $out .= $char; # not in GB mode - just append it
85 }
86 }
87
88 $out .= '~}' if $in_gb; # add closing brace as needed
89
90 return $out;
c0d88b76 91}
92
931;
94__END__