Commit | Line | Data |
df1df145 |
1 | package Encode::Tcl::Escape; |
2 | use strict; |
3 | our $VERSION = do {my @r=(q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; |
4 | use base 'Encode::Encoding'; |
5 | |
6 | use Carp; |
7 | |
8 | use constant SI => "\cO"; |
9 | use constant SO => "\cN"; |
10 | use constant SS2 => "\eN"; |
11 | use constant SS3 => "\eO"; |
12 | |
13 | sub read |
14 | { |
15 | my ($obj,$fh,$name) = @_; |
16 | my(%tbl, @seq, $enc, @esc, %grp); |
17 | while (<$fh>) |
18 | { |
19 | next unless /^(\S+)\s+(.*)$/; |
20 | my ($key,$val) = ($1,$2); |
21 | $val =~ s/^\{(.*?)\}/$1/g; |
22 | $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; |
23 | |
24 | if($enc = Encode->getEncoding($key)) |
25 | { |
26 | $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; |
27 | push @seq, $val; |
28 | $grp{$val} = |
29 | $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO" |
30 | $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN" |
31 | $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN" |
32 | $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO" |
33 | 0; # G0 |
34 | } |
35 | else |
36 | { |
37 | $obj->{$key} = $val; |
38 | } |
39 | if($val =~ /^\e(.*)/) |
40 | { |
41 | push(@esc, quotemeta $1); |
42 | } |
43 | } |
44 | $obj->{'Grp'} = \%grp; # graphic chars |
45 | $obj->{'Seq'} = \@seq; # escape sequences |
46 | $obj->{'Tbl'} = \%tbl; # encoding tables |
47 | $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC |
48 | return $obj; |
49 | } |
50 | |
51 | sub decode |
52 | { |
53 | my ($obj,$str,$chk) = @_; |
54 | my $name = $obj->{'Name'}; |
55 | my $tbl = $obj->{'Tbl'}; |
56 | my $seq = $obj->{'Seq'}; |
57 | my $grp = $obj->{'Grp'}; |
58 | my $esc = $obj->{'Esc'}; |
59 | my $ini = $obj->{'init'}; |
60 | my $fin = $obj->{'final'}; |
61 | my $std = $seq->[0]; |
62 | my $cur = $std; |
63 | my @sta = ($std, undef, undef, undef); # G0 .. G3 state |
64 | my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1); |
65 | my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3); |
66 | my $uni; |
67 | while (length($str)) |
68 | { |
69 | my $cc = substr($str,0,1,''); |
70 | if($cc eq "\e") |
71 | { |
72 | if($str =~ s/^($esc)//) |
73 | { |
74 | my $e = "\e$1"; |
75 | $sta[ $grp->{$e} ] = $e if $tbl->{$e}; |
76 | } |
77 | # appearance of "\eN\eO" or "\eO\eN" isn't supposed. |
78 | # but in that case, the former will be ignored. |
79 | elsif($str =~ s/^N//) |
80 | { |
81 | $ss = 2; |
82 | } |
83 | elsif($str =~ s/^O//) |
84 | { |
85 | $ss = 3; |
86 | } |
87 | else |
88 | { |
89 | # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped. |
90 | $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//; |
91 | if($chk && ! length $str) |
92 | { |
93 | $str = "\e$1"; # split sequence |
94 | last; |
95 | } |
96 | croak "unknown escape sequence: ESC $1"; |
97 | } |
98 | next; |
99 | } |
100 | if($cc eq SO) |
101 | { |
102 | $s = 1; next; |
103 | } |
104 | if($cc eq SI) |
105 | { |
106 | $s = 0; next; |
107 | } |
108 | |
109 | $cur = $ss ? $sta[$ss] : $sta[$s]; |
110 | |
111 | if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') |
112 | { |
113 | $uni .= $tbl->{$cur}->decode($cc); |
114 | $ss = 0; |
115 | next; |
116 | } |
117 | my $ch = ord($cc); |
118 | my $rep = $tbl->{$cur}->{'Rep'}; |
119 | my $touni = $tbl->{$cur}->{'ToUni'}; |
120 | my $x; |
121 | if (&$rep($ch) eq 'C') |
122 | { |
123 | $x = $touni->[0][$ch]; |
124 | } |
125 | else |
126 | { |
127 | if(! length $str) |
128 | { |
129 | $str = $cc; # split leading byte |
130 | last; |
131 | } |
132 | my $c2 = substr($str,0,1,''); |
133 | $cc .= $c2; |
134 | $x = $touni->[$ch][ord($c2)]; |
135 | } |
136 | unless (defined $x) |
137 | { |
138 | Encode::Tcl::no_map_in_decode($name, $cc.$str); |
139 | } |
140 | $uni .= $x; |
141 | $ss = 0; |
142 | } |
143 | if($chk) |
144 | { |
145 | my $back = join('', grep defined($_) && $_ ne $std, @sta); |
146 | $back .= SO if $s; |
147 | $back .= $ss == 2 ? SS2 : SS3 if $ss; |
148 | $_[1] = $back.$str; |
149 | } |
150 | return $uni; |
151 | } |
152 | |
153 | sub encode |
154 | { |
155 | my ($obj,$uni,$chk) = @_; |
156 | my $name = $obj->{'Name'}; |
157 | my $tbl = $obj->{'Tbl'}; |
158 | my $seq = $obj->{'Seq'}; |
159 | my $grp = $obj->{'Grp'}; |
160 | my $ini = $obj->{'init'}; |
161 | my $fin = $obj->{'final'}; |
162 | my $std = $seq->[0]; |
163 | my $str = $ini; |
164 | my @sta = ($std,undef,undef,undef); # G0 .. G3 state |
165 | my $cur = $std; |
166 | my $pG = 0; # previous G: 0 or 1. |
167 | my $cG = 0; # current G: 0,1,2,3. |
168 | |
169 | if($ini && defined $grp->{$ini}) |
170 | { |
171 | $sta[ $grp->{$ini} ] = $ini; |
172 | } |
173 | |
174 | while (length($uni)) |
175 | { |
176 | my $ch = substr($uni,0,1,''); |
177 | my $x; |
178 | foreach my $e_seq (@$seq) |
179 | { |
180 | $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table' |
181 | ? $tbl->{$e_seq}->{FmUni}->{$ch} |
182 | : $tbl->{$e_seq}->encode($ch,1); |
183 | $cur = $e_seq, last if defined $x; |
184 | } |
185 | unless (defined $x) |
186 | { |
187 | unless($chk) |
188 | { |
189 | Encode::Tcl::no_map_in_encode(ord($ch), $name) |
190 | } |
191 | return undef; |
192 | } |
193 | if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') |
194 | { |
195 | my $def = $tbl->{$cur}->{'Def'}; |
196 | my $rep = $tbl->{$cur}->{'Rep'}; |
197 | $x = pack(&$rep($x),$x); |
198 | } |
199 | $cG = $grp->{$cur}; |
200 | $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG]; |
201 | |
202 | $str .= $cG == 0 && $pG == 1 ? SI : |
203 | $cG == 1 && $pG == 0 ? SO : |
204 | $cG == 2 ? SS2 : |
205 | $cG == 3 ? SS3 : ""; |
206 | $str .= $x; |
207 | $pG = $cG if $cG < 2; |
208 | } |
209 | $str .= SI if $pG == 1; # back to G0 |
210 | $str .= $std unless $std eq $sta[0]; # GO to ASCII |
211 | $str .= $fin; # necessary? |
212 | $_[1] = $uni if $chk; |
213 | return $str; |
214 | } |
215 | |
216 | 1; |
217 | __END__ |