Commit | Line | Data |
df1df145 |
1 | package Encode::Tcl::Escape; |
2 | use strict; |
d6b7ef86 |
3 | our $VERSION = do { my @r = (q$Revision: 0.92 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; |
4 | |
df1df145 |
5 | use base 'Encode::Encoding'; |
6 | |
7 | use Carp; |
8 | |
9 | use constant SI => "\cO"; |
10 | use constant SO => "\cN"; |
28e59c41 |
11 | use constant SS2 => "\e\x4E"; # ESC N |
12 | use constant SS3 => "\e\x4F"; # ESC O |
df1df145 |
13 | |
14 | sub read |
15 | { |
16 | my ($obj,$fh,$name) = @_; |
28e59c41 |
17 | my(%tbl, @seq, $enc, @esc, %grp, %mbc); |
df1df145 |
18 | while (<$fh>) |
19 | { |
20 | next unless /^(\S+)\s+(.*)$/; |
21 | my ($key,$val) = ($1,$2); |
22 | $val =~ s/^\{(.*?)\}/$1/g; |
23 | $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; |
24 | |
28e59c41 |
25 | if ($enc = Encode->getEncoding($key)) |
df1df145 |
26 | { |
28e59c41 |
27 | $tbl{$val} = |
28 | ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; |
29 | |
30 | $mbc{$val} = |
31 | $val !~ /\e\x24/ ? 1 : # single-byte |
32 | $val =~ /[\x30-\x3F]$/ ? 2 : # (only 2 is supported) |
33 | $val =~ /[\x40-\x5F]$/ ? 2 : # double byte |
34 | $val =~ /[\x60-\x6F]$/ ? 3 : # triple byte |
d6b7ef86 |
35 | $val =~ /[\x70-\x7E]$/ ? 4 : |
28e59c41 |
36 | # 4 or more (only 4 is supported) |
37 | croak("odd sequence is defined"); |
38 | |
df1df145 |
39 | push @seq, $val; |
28e59c41 |
40 | |
df1df145 |
41 | $grp{$val} = |
28e59c41 |
42 | $val =~ /\e\x24?[\x28]/ ? 0 : # G0 : SI |
43 | $val =~ /\e\x24?[\x29\x2D]/ ? 1 : # G1 : SO |
44 | $val =~ /\e\x24?[\x2A\x2E]/ ? 2 : # G2 : SS2 |
45 | $val =~ /\e\x24?[\x2B\x2F]/ ? 3 : # G3 : SS3 |
46 | 0; # G0 (ESC 02/04 F, etc.) |
df1df145 |
47 | } |
48 | else |
49 | { |
50 | $obj->{$key} = $val; |
51 | } |
28e59c41 |
52 | if ($val =~ /^\e(.*)/) |
df1df145 |
53 | { |
54 | push(@esc, quotemeta $1); |
55 | } |
56 | } |
57 | $obj->{'Grp'} = \%grp; # graphic chars |
28e59c41 |
58 | $obj->{'Mbc'} = \%mbc; # bytes per char |
df1df145 |
59 | $obj->{'Seq'} = \@seq; # escape sequences |
60 | $obj->{'Tbl'} = \%tbl; # encoding tables |
61 | $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC |
62 | return $obj; |
63 | } |
64 | |
65 | sub decode |
66 | { |
67 | my ($obj,$str,$chk) = @_; |
df1df145 |
68 | my $tbl = $obj->{'Tbl'}; |
69 | my $seq = $obj->{'Seq'}; |
28e59c41 |
70 | my $mbc = $obj->{'Mbc'}; |
df1df145 |
71 | my $grp = $obj->{'Grp'}; |
72 | my $esc = $obj->{'Esc'}; |
df1df145 |
73 | my $std = $seq->[0]; |
74 | my $cur = $std; |
75 | my @sta = ($std, undef, undef, undef); # G0 .. G3 state |
76 | my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1); |
77 | my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3); |
78 | my $uni; |
79 | while (length($str)) |
80 | { |
28e59c41 |
81 | if ($str =~ s/^\e//) |
df1df145 |
82 | { |
28e59c41 |
83 | if ($str =~ s/^($esc)//) |
df1df145 |
84 | { |
85 | my $e = "\e$1"; |
86 | $sta[ $grp->{$e} ] = $e if $tbl->{$e}; |
87 | } |
88 | # appearance of "\eN\eO" or "\eO\eN" isn't supposed. |
89 | # but in that case, the former will be ignored. |
28e59c41 |
90 | elsif ($str =~ s/^\x4E//) |
df1df145 |
91 | { |
92 | $ss = 2; |
93 | } |
28e59c41 |
94 | elsif ($str =~ s/^\x4F//) |
df1df145 |
95 | { |
96 | $ss = 3; |
97 | } |
98 | else |
99 | { |
d6b7ef86 |
100 | # strictly, ([\x21-\x2F]*[\x30-\x7E]). '?' for chopped. |
101 | $str =~ s/^([\x21-\x2F]*[\x30-\x7E]?)//; |
28e59c41 |
102 | if ($chk && ! length $str) |
df1df145 |
103 | { |
104 | $str = "\e$1"; # split sequence |
105 | last; |
106 | } |
107 | croak "unknown escape sequence: ESC $1"; |
108 | } |
109 | next; |
110 | } |
28e59c41 |
111 | if ($str =~ s/^\cN//) # SO |
df1df145 |
112 | { |
113 | $s = 1; next; |
114 | } |
28e59c41 |
115 | if ($str =~ s/^\cO//) # SI |
df1df145 |
116 | { |
117 | $s = 0; next; |
118 | } |
119 | |
120 | $cur = $ss ? $sta[$ss] : $sta[$s]; |
121 | |
28e59c41 |
122 | length($str) < $mbc->{$cur} and last; # split leading byte |
123 | |
124 | my $cc = substr($str, 0, $mbc->{$cur}, ''); |
125 | |
126 | my $x = $tbl->{$cur}->decode($cc); |
127 | defined $x or Encode::Tcl::no_map_in_decode($obj->{'Name'}, $cc); |
df1df145 |
128 | $uni .= $x; |
129 | $ss = 0; |
130 | } |
28e59c41 |
131 | if ($chk) |
df1df145 |
132 | { |
133 | my $back = join('', grep defined($_) && $_ ne $std, @sta); |
134 | $back .= SO if $s; |
28e59c41 |
135 | $back .= $ss == 2 ? SS2 : $ss == 3 ? SS3 : ''; |
df1df145 |
136 | $_[1] = $back.$str; |
137 | } |
138 | return $uni; |
139 | } |
140 | |
141 | sub encode |
142 | { |
143 | my ($obj,$uni,$chk) = @_; |
df1df145 |
144 | my $tbl = $obj->{'Tbl'}; |
145 | my $seq = $obj->{'Seq'}; |
146 | my $grp = $obj->{'Grp'}; |
147 | my $ini = $obj->{'init'}; |
df1df145 |
148 | my $std = $seq->[0]; |
149 | my $str = $ini; |
150 | my @sta = ($std,undef,undef,undef); # G0 .. G3 state |
151 | my $cur = $std; |
152 | my $pG = 0; # previous G: 0 or 1. |
153 | my $cG = 0; # current G: 0,1,2,3. |
154 | |
28e59c41 |
155 | if ($ini && defined $grp->{$ini}) |
df1df145 |
156 | { |
157 | $sta[ $grp->{$ini} ] = $ini; |
158 | } |
159 | |
160 | while (length($uni)) |
161 | { |
162 | my $ch = substr($uni,0,1,''); |
163 | my $x; |
164 | foreach my $e_seq (@$seq) |
165 | { |
28e59c41 |
166 | $x = $tbl->{$e_seq}->encode($ch, 1); |
df1df145 |
167 | $cur = $e_seq, last if defined $x; |
168 | } |
169 | unless (defined $x) |
170 | { |
28e59c41 |
171 | $chk or Encode::Tcl::no_map_in_encode(ord($ch), $obj->{'Name'}); |
df1df145 |
172 | return undef; |
173 | } |
df1df145 |
174 | $cG = $grp->{$cur}; |
175 | $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG]; |
176 | |
177 | $str .= $cG == 0 && $pG == 1 ? SI : |
178 | $cG == 1 && $pG == 0 ? SO : |
179 | $cG == 2 ? SS2 : |
180 | $cG == 3 ? SS3 : ""; |
181 | $str .= $x; |
182 | $pG = $cG if $cG < 2; |
183 | } |
184 | $str .= SI if $pG == 1; # back to G0 |
28e59c41 |
185 | $str .= $std unless $std eq $sta[0]; # G0 to ASCII |
186 | $str .= $obj->{'final'}; # necessary? I don't know what is this for. |
df1df145 |
187 | $_[1] = $uni if $chk; |
188 | return $str; |
189 | } |
190 | |
191 | 1; |
192 | __END__ |
6b6c03af |
193 | |
194 | =head1 NAME |
195 | |
196 | Encode::Tcl::Escape - Tcl Escape encodings |
197 | |
198 | =head1 SYNOPSIS |
199 | |
200 | none |
201 | |
202 | =head1 DESCRIPTION |
203 | |
204 | This module is used internally by Encode::Tcl |
205 | and handles type E of Tcl encodings (7-bit code only). |
206 | |
207 | Control sequences supported by this module are |
208 | ESCAPE SEQUENCEs to designate graphic character sets |
209 | and the following: |
210 | |
211 | name (abbr.) bit combination |
212 | |
213 | ESCAPE (ESC) 01/11 |
214 | SHIFT-IN (SI) 00/15 |
215 | SHIFT-OUT (SO) 00/14 |
216 | SINGLE SHIFT TWO (SS2) ESC 04/14 |
217 | SINGLE SHIFT THREE (SS3) ESC 04/15 |
218 | |
d6b7ef86 |
219 | Designation of control character sets is not supported. |
6b6c03af |
220 | |
221 | =head1 SEE ALSO |
222 | |
223 | L<Encode> |
224 | |
225 | L<Encode::Tcl> |
226 | |
227 | L<http://www.itscj.ipsj.or.jp/ISO-IR/> [ISOREG] |
228 | |
229 | =cut |