Commit | Line | Data |
df1df145 |
1 | package Encode::Tcl::Escape; |
2 | use strict; |
3 | our $VERSION = do {my @r=(q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; |
4 | use base 'Encode::Encoding'; |
5 | |
6 | use Carp; |
7 | |
8 | use constant SI => "\cO"; |
9 | use constant SO => "\cN"; |
28e59c41 |
10 | use constant SS2 => "\e\x4E"; # ESC N |
11 | use constant SS3 => "\e\x4F"; # ESC O |
df1df145 |
12 | |
13 | sub read |
14 | { |
15 | my ($obj,$fh,$name) = @_; |
28e59c41 |
16 | my(%tbl, @seq, $enc, @esc, %grp, %mbc); |
df1df145 |
17 | while (<$fh>) |
18 | { |
19 | next unless /^(\S+)\s+(.*)$/; |
20 | my ($key,$val) = ($1,$2); |
21 | $val =~ s/^\{(.*?)\}/$1/g; |
22 | $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; |
23 | |
28e59c41 |
24 | if ($enc = Encode->getEncoding($key)) |
df1df145 |
25 | { |
28e59c41 |
26 | $tbl{$val} = |
27 | ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; |
28 | |
29 | $mbc{$val} = |
30 | $val !~ /\e\x24/ ? 1 : # single-byte |
31 | $val =~ /[\x30-\x3F]$/ ? 2 : # (only 2 is supported) |
32 | $val =~ /[\x40-\x5F]$/ ? 2 : # double byte |
33 | $val =~ /[\x60-\x6F]$/ ? 3 : # triple byte |
34 | $val =~ /[\x70-\x7F]$/ ? 4 : |
35 | # 4 or more (only 4 is supported) |
36 | croak("odd sequence is defined"); |
37 | |
df1df145 |
38 | push @seq, $val; |
28e59c41 |
39 | |
df1df145 |
40 | $grp{$val} = |
28e59c41 |
41 | $val =~ /\e\x24?[\x28]/ ? 0 : # G0 : SI |
42 | $val =~ /\e\x24?[\x29\x2D]/ ? 1 : # G1 : SO |
43 | $val =~ /\e\x24?[\x2A\x2E]/ ? 2 : # G2 : SS2 |
44 | $val =~ /\e\x24?[\x2B\x2F]/ ? 3 : # G3 : SS3 |
45 | 0; # G0 (ESC 02/04 F, etc.) |
df1df145 |
46 | } |
47 | else |
48 | { |
49 | $obj->{$key} = $val; |
50 | } |
28e59c41 |
51 | if ($val =~ /^\e(.*)/) |
df1df145 |
52 | { |
53 | push(@esc, quotemeta $1); |
54 | } |
55 | } |
56 | $obj->{'Grp'} = \%grp; # graphic chars |
28e59c41 |
57 | $obj->{'Mbc'} = \%mbc; # bytes per char |
df1df145 |
58 | $obj->{'Seq'} = \@seq; # escape sequences |
59 | $obj->{'Tbl'} = \%tbl; # encoding tables |
60 | $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC |
61 | return $obj; |
62 | } |
63 | |
64 | sub decode |
65 | { |
66 | my ($obj,$str,$chk) = @_; |
df1df145 |
67 | my $tbl = $obj->{'Tbl'}; |
68 | my $seq = $obj->{'Seq'}; |
28e59c41 |
69 | my $mbc = $obj->{'Mbc'}; |
df1df145 |
70 | my $grp = $obj->{'Grp'}; |
71 | my $esc = $obj->{'Esc'}; |
df1df145 |
72 | my $std = $seq->[0]; |
73 | my $cur = $std; |
74 | my @sta = ($std, undef, undef, undef); # G0 .. G3 state |
75 | my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1); |
76 | my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3); |
77 | my $uni; |
78 | while (length($str)) |
79 | { |
28e59c41 |
80 | if ($str =~ s/^\e//) |
df1df145 |
81 | { |
28e59c41 |
82 | if ($str =~ s/^($esc)//) |
df1df145 |
83 | { |
84 | my $e = "\e$1"; |
85 | $sta[ $grp->{$e} ] = $e if $tbl->{$e}; |
86 | } |
87 | # appearance of "\eN\eO" or "\eO\eN" isn't supposed. |
88 | # but in that case, the former will be ignored. |
28e59c41 |
89 | elsif ($str =~ s/^\x4E//) |
df1df145 |
90 | { |
91 | $ss = 2; |
92 | } |
28e59c41 |
93 | elsif ($str =~ s/^\x4F//) |
df1df145 |
94 | { |
95 | $ss = 3; |
96 | } |
97 | else |
98 | { |
99 | # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped. |
100 | $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//; |
28e59c41 |
101 | if ($chk && ! length $str) |
df1df145 |
102 | { |
103 | $str = "\e$1"; # split sequence |
104 | last; |
105 | } |
106 | croak "unknown escape sequence: ESC $1"; |
107 | } |
108 | next; |
109 | } |
28e59c41 |
110 | if ($str =~ s/^\cN//) # SO |
df1df145 |
111 | { |
112 | $s = 1; next; |
113 | } |
28e59c41 |
114 | if ($str =~ s/^\cO//) # SI |
df1df145 |
115 | { |
116 | $s = 0; next; |
117 | } |
118 | |
119 | $cur = $ss ? $sta[$ss] : $sta[$s]; |
120 | |
28e59c41 |
121 | length($str) < $mbc->{$cur} and last; # split leading byte |
122 | |
123 | my $cc = substr($str, 0, $mbc->{$cur}, ''); |
124 | |
125 | my $x = $tbl->{$cur}->decode($cc); |
126 | defined $x or Encode::Tcl::no_map_in_decode($obj->{'Name'}, $cc); |
df1df145 |
127 | $uni .= $x; |
128 | $ss = 0; |
129 | } |
28e59c41 |
130 | if ($chk) |
df1df145 |
131 | { |
132 | my $back = join('', grep defined($_) && $_ ne $std, @sta); |
133 | $back .= SO if $s; |
28e59c41 |
134 | $back .= $ss == 2 ? SS2 : $ss == 3 ? SS3 : ''; |
df1df145 |
135 | $_[1] = $back.$str; |
136 | } |
137 | return $uni; |
138 | } |
139 | |
140 | sub encode |
141 | { |
142 | my ($obj,$uni,$chk) = @_; |
df1df145 |
143 | my $tbl = $obj->{'Tbl'}; |
144 | my $seq = $obj->{'Seq'}; |
145 | my $grp = $obj->{'Grp'}; |
146 | my $ini = $obj->{'init'}; |
df1df145 |
147 | my $std = $seq->[0]; |
148 | my $str = $ini; |
149 | my @sta = ($std,undef,undef,undef); # G0 .. G3 state |
150 | my $cur = $std; |
151 | my $pG = 0; # previous G: 0 or 1. |
152 | my $cG = 0; # current G: 0,1,2,3. |
153 | |
28e59c41 |
154 | if ($ini && defined $grp->{$ini}) |
df1df145 |
155 | { |
156 | $sta[ $grp->{$ini} ] = $ini; |
157 | } |
158 | |
159 | while (length($uni)) |
160 | { |
161 | my $ch = substr($uni,0,1,''); |
162 | my $x; |
163 | foreach my $e_seq (@$seq) |
164 | { |
28e59c41 |
165 | $x = $tbl->{$e_seq}->encode($ch, 1); |
df1df145 |
166 | $cur = $e_seq, last if defined $x; |
167 | } |
168 | unless (defined $x) |
169 | { |
28e59c41 |
170 | $chk or Encode::Tcl::no_map_in_encode(ord($ch), $obj->{'Name'}); |
df1df145 |
171 | return undef; |
172 | } |
df1df145 |
173 | $cG = $grp->{$cur}; |
174 | $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG]; |
175 | |
176 | $str .= $cG == 0 && $pG == 1 ? SI : |
177 | $cG == 1 && $pG == 0 ? SO : |
178 | $cG == 2 ? SS2 : |
179 | $cG == 3 ? SS3 : ""; |
180 | $str .= $x; |
181 | $pG = $cG if $cG < 2; |
182 | } |
183 | $str .= SI if $pG == 1; # back to G0 |
28e59c41 |
184 | $str .= $std unless $std eq $sta[0]; # G0 to ASCII |
185 | $str .= $obj->{'final'}; # necessary? I don't know what is this for. |
df1df145 |
186 | $_[1] = $uni if $chk; |
187 | return $str; |
188 | } |
189 | |
190 | 1; |
191 | __END__ |
6b6c03af |
192 | |
193 | =head1 NAME |
194 | |
195 | Encode::Tcl::Escape - Tcl Escape encodings |
196 | |
197 | =head1 SYNOPSIS |
198 | |
199 | none |
200 | |
201 | =head1 DESCRIPTION |
202 | |
203 | This module is used internally by Encode::Tcl |
204 | and handles type E of Tcl encodings (7-bit code only). |
205 | |
206 | Control sequences supported by this module are |
207 | ESCAPE SEQUENCEs to designate graphic character sets |
208 | and the following: |
209 | |
210 | name (abbr.) bit combination |
211 | |
212 | ESCAPE (ESC) 01/11 |
213 | SHIFT-IN (SI) 00/15 |
214 | SHIFT-OUT (SO) 00/14 |
215 | SINGLE SHIFT TWO (SS2) ESC 04/14 |
216 | SINGLE SHIFT THREE (SS3) ESC 04/15 |
217 | |
218 | Designation of control character sets are not supported. |
219 | |
220 | =head1 SEE ALSO |
221 | |
222 | L<Encode> |
223 | |
224 | L<Encode::Tcl> |
225 | |
226 | L<http://www.itscj.ipsj.or.jp/ISO-IR/> [ISOREG] |
227 | |
228 | =cut |