(Was Re: [ID 20020129.003] Trouble building perl 5.6.1)
[p5sagit/p5-mst-13.2.git] / ext / Encode / lib / Encode / Tcl / Escape.pm
CommitLineData
df1df145 1package Encode::Tcl::Escape;
2use strict;
3our $VERSION = do {my @r=(q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r};
4use base 'Encode::Encoding';
5
6use Carp;
7
8use constant SI => "\cO";
9use constant SO => "\cN";
10use constant SS2 => "\eN";
11use constant SS3 => "\eO";
12
13sub read
14{
15 my ($obj,$fh,$name) = @_;
16 my(%tbl, @seq, $enc, @esc, %grp);
17 while (<$fh>)
18 {
19 next unless /^(\S+)\s+(.*)$/;
20 my ($key,$val) = ($1,$2);
21 $val =~ s/^\{(.*?)\}/$1/g;
22 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
23
24 if($enc = Encode->getEncoding($key))
25 {
26 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
27 push @seq, $val;
28 $grp{$val} =
29 $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO"
30 $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN"
31 $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN"
32 $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO"
33 0; # G0
34 }
35 else
36 {
37 $obj->{$key} = $val;
38 }
39 if($val =~ /^\e(.*)/)
40 {
41 push(@esc, quotemeta $1);
42 }
43 }
44 $obj->{'Grp'} = \%grp; # graphic chars
45 $obj->{'Seq'} = \@seq; # escape sequences
46 $obj->{'Tbl'} = \%tbl; # encoding tables
47 $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC
48 return $obj;
49}
50
51sub decode
52{
53 my ($obj,$str,$chk) = @_;
54 my $name = $obj->{'Name'};
55 my $tbl = $obj->{'Tbl'};
56 my $seq = $obj->{'Seq'};
57 my $grp = $obj->{'Grp'};
58 my $esc = $obj->{'Esc'};
59 my $ini = $obj->{'init'};
60 my $fin = $obj->{'final'};
61 my $std = $seq->[0];
62 my $cur = $std;
63 my @sta = ($std, undef, undef, undef); # G0 .. G3 state
64 my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1);
65 my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3);
66 my $uni;
67 while (length($str))
68 {
69 my $cc = substr($str,0,1,'');
70 if($cc eq "\e")
71 {
72 if($str =~ s/^($esc)//)
73 {
74 my $e = "\e$1";
75 $sta[ $grp->{$e} ] = $e if $tbl->{$e};
76 }
77 # appearance of "\eN\eO" or "\eO\eN" isn't supposed.
78 # but in that case, the former will be ignored.
79 elsif($str =~ s/^N//)
80 {
81 $ss = 2;
82 }
83 elsif($str =~ s/^O//)
84 {
85 $ss = 3;
86 }
87 else
88 {
89 # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped.
90 $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//;
91 if($chk && ! length $str)
92 {
93 $str = "\e$1"; # split sequence
94 last;
95 }
96 croak "unknown escape sequence: ESC $1";
97 }
98 next;
99 }
100 if($cc eq SO)
101 {
102 $s = 1; next;
103 }
104 if($cc eq SI)
105 {
106 $s = 0; next;
107 }
108
109 $cur = $ss ? $sta[$ss] : $sta[$s];
110
111 if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table')
112 {
113 $uni .= $tbl->{$cur}->decode($cc);
114 $ss = 0;
115 next;
116 }
117 my $ch = ord($cc);
118 my $rep = $tbl->{$cur}->{'Rep'};
119 my $touni = $tbl->{$cur}->{'ToUni'};
120 my $x;
121 if (&$rep($ch) eq 'C')
122 {
123 $x = $touni->[0][$ch];
124 }
125 else
126 {
127 if(! length $str)
128 {
129 $str = $cc; # split leading byte
130 last;
131 }
132 my $c2 = substr($str,0,1,'');
133 $cc .= $c2;
134 $x = $touni->[$ch][ord($c2)];
135 }
136 unless (defined $x)
137 {
138 Encode::Tcl::no_map_in_decode($name, $cc.$str);
139 }
140 $uni .= $x;
141 $ss = 0;
142 }
143 if($chk)
144 {
145 my $back = join('', grep defined($_) && $_ ne $std, @sta);
146 $back .= SO if $s;
147 $back .= $ss == 2 ? SS2 : SS3 if $ss;
148 $_[1] = $back.$str;
149 }
150 return $uni;
151}
152
153sub encode
154{
155 my ($obj,$uni,$chk) = @_;
156 my $name = $obj->{'Name'};
157 my $tbl = $obj->{'Tbl'};
158 my $seq = $obj->{'Seq'};
159 my $grp = $obj->{'Grp'};
160 my $ini = $obj->{'init'};
161 my $fin = $obj->{'final'};
162 my $std = $seq->[0];
163 my $str = $ini;
164 my @sta = ($std,undef,undef,undef); # G0 .. G3 state
165 my $cur = $std;
166 my $pG = 0; # previous G: 0 or 1.
167 my $cG = 0; # current G: 0,1,2,3.
168
169 if($ini && defined $grp->{$ini})
170 {
171 $sta[ $grp->{$ini} ] = $ini;
172 }
173
174 while (length($uni))
175 {
176 my $ch = substr($uni,0,1,'');
177 my $x;
178 foreach my $e_seq (@$seq)
179 {
180 $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table'
181 ? $tbl->{$e_seq}->{FmUni}->{$ch}
182 : $tbl->{$e_seq}->encode($ch,1);
183 $cur = $e_seq, last if defined $x;
184 }
185 unless (defined $x)
186 {
187 unless($chk)
188 {
189 Encode::Tcl::no_map_in_encode(ord($ch), $name)
190 }
191 return undef;
192 }
193 if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table')
194 {
195 my $def = $tbl->{$cur}->{'Def'};
196 my $rep = $tbl->{$cur}->{'Rep'};
197 $x = pack(&$rep($x),$x);
198 }
199 $cG = $grp->{$cur};
200 $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG];
201
202 $str .= $cG == 0 && $pG == 1 ? SI :
203 $cG == 1 && $pG == 0 ? SO :
204 $cG == 2 ? SS2 :
205 $cG == 3 ? SS3 : "";
206 $str .= $x;
207 $pG = $cG if $cG < 2;
208 }
209 $str .= SI if $pG == 1; # back to G0
210 $str .= $std unless $std eq $sta[0]; # GO to ASCII
211 $str .= $fin; # necessary?
212 $_[1] = $uni if $chk;
213 return $str;
214}
215
2161;
217__END__