Noise with -w.
[p5sagit/p5-mst-13.2.git] / ext / Encode / lib / Encode / Tcl / Escape.pm
CommitLineData
df1df145 1package Encode::Tcl::Escape;
2use strict;
fab31126 3our $VERSION = do { my @r = (q$Revision: 0.92 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
ee981de6 4
df1df145 5use base 'Encode::Encoding';
6
7use Carp;
8
9use constant SI => "\cO";
10use constant SO => "\cN";
28e59c41 11use constant SS2 => "\e\x4E"; # ESC N
12use constant SS3 => "\e\x4F"; # ESC O
df1df145 13
14sub read
15{
16 my ($obj,$fh,$name) = @_;
28e59c41 17 my(%tbl, @seq, $enc, @esc, %grp, %mbc);
df1df145 18 while (<$fh>)
19 {
20 next unless /^(\S+)\s+(.*)$/;
21 my ($key,$val) = ($1,$2);
22 $val =~ s/^\{(.*?)\}/$1/g;
23 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
24
28e59c41 25 if ($enc = Encode->getEncoding($key))
df1df145 26 {
28e59c41 27 $tbl{$val} =
28 ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
29
30 $mbc{$val} =
31 $val !~ /\e\x24/ ? 1 : # single-byte
32 $val =~ /[\x30-\x3F]$/ ? 2 : # (only 2 is supported)
33 $val =~ /[\x40-\x5F]$/ ? 2 : # double byte
34 $val =~ /[\x60-\x6F]$/ ? 3 : # triple byte
38a64d23 35 $val =~ /[\x70-\x7E]$/ ? 4 :
28e59c41 36 # 4 or more (only 4 is supported)
37 croak("odd sequence is defined");
38
df1df145 39 push @seq, $val;
28e59c41 40
df1df145 41 $grp{$val} =
28e59c41 42 $val =~ /\e\x24?[\x28]/ ? 0 : # G0 : SI
43 $val =~ /\e\x24?[\x29\x2D]/ ? 1 : # G1 : SO
44 $val =~ /\e\x24?[\x2A\x2E]/ ? 2 : # G2 : SS2
45 $val =~ /\e\x24?[\x2B\x2F]/ ? 3 : # G3 : SS3
46 0; # G0 (ESC 02/04 F, etc.)
df1df145 47 }
48 else
49 {
50 $obj->{$key} = $val;
51 }
28e59c41 52 if ($val =~ /^\e(.*)/)
df1df145 53 {
54 push(@esc, quotemeta $1);
55 }
56 }
57 $obj->{'Grp'} = \%grp; # graphic chars
28e59c41 58 $obj->{'Mbc'} = \%mbc; # bytes per char
df1df145 59 $obj->{'Seq'} = \@seq; # escape sequences
60 $obj->{'Tbl'} = \%tbl; # encoding tables
61 $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC
62 return $obj;
63}
64
65sub decode
66{
67 my ($obj,$str,$chk) = @_;
df1df145 68 my $tbl = $obj->{'Tbl'};
69 my $seq = $obj->{'Seq'};
28e59c41 70 my $mbc = $obj->{'Mbc'};
df1df145 71 my $grp = $obj->{'Grp'};
72 my $esc = $obj->{'Esc'};
df1df145 73 my $std = $seq->[0];
74 my $cur = $std;
75 my @sta = ($std, undef, undef, undef); # G0 .. G3 state
76 my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1);
77 my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3);
78 my $uni;
79 while (length($str))
80 {
28e59c41 81 if ($str =~ s/^\e//)
df1df145 82 {
28e59c41 83 if ($str =~ s/^($esc)//)
df1df145 84 {
85 my $e = "\e$1";
86 $sta[ $grp->{$e} ] = $e if $tbl->{$e};
87 }
88 # appearance of "\eN\eO" or "\eO\eN" isn't supposed.
89 # but in that case, the former will be ignored.
28e59c41 90 elsif ($str =~ s/^\x4E//)
df1df145 91 {
92 $ss = 2;
93 }
28e59c41 94 elsif ($str =~ s/^\x4F//)
df1df145 95 {
96 $ss = 3;
97 }
98 else
99 {
38a64d23 100 # strictly, ([\x21-\x2F]*[\x30-\x7E]). '?' for chopped.
101 $str =~ s/^([\x21-\x2F]*[\x30-\x7E]?)//;
28e59c41 102 if ($chk && ! length $str)
df1df145 103 {
104 $str = "\e$1"; # split sequence
105 last;
106 }
107 croak "unknown escape sequence: ESC $1";
108 }
109 next;
110 }
28e59c41 111 if ($str =~ s/^\cN//) # SO
df1df145 112 {
113 $s = 1; next;
114 }
28e59c41 115 if ($str =~ s/^\cO//) # SI
df1df145 116 {
117 $s = 0; next;
118 }
119
120 $cur = $ss ? $sta[$ss] : $sta[$s];
121
28e59c41 122 length($str) < $mbc->{$cur} and last; # split leading byte
123
124 my $cc = substr($str, 0, $mbc->{$cur}, '');
125
126 my $x = $tbl->{$cur}->decode($cc);
127 defined $x or Encode::Tcl::no_map_in_decode($obj->{'Name'}, $cc);
df1df145 128 $uni .= $x;
129 $ss = 0;
130 }
28e59c41 131 if ($chk)
df1df145 132 {
133 my $back = join('', grep defined($_) && $_ ne $std, @sta);
134 $back .= SO if $s;
28e59c41 135 $back .= $ss == 2 ? SS2 : $ss == 3 ? SS3 : '';
df1df145 136 $_[1] = $back.$str;
137 }
138 return $uni;
139}
140
141sub encode
142{
143 my ($obj,$uni,$chk) = @_;
df1df145 144 my $tbl = $obj->{'Tbl'};
145 my $seq = $obj->{'Seq'};
146 my $grp = $obj->{'Grp'};
147 my $ini = $obj->{'init'};
df1df145 148 my $std = $seq->[0];
149 my $str = $ini;
150 my @sta = ($std,undef,undef,undef); # G0 .. G3 state
151 my $cur = $std;
152 my $pG = 0; # previous G: 0 or 1.
153 my $cG = 0; # current G: 0,1,2,3.
154
28e59c41 155 if ($ini && defined $grp->{$ini})
df1df145 156 {
157 $sta[ $grp->{$ini} ] = $ini;
158 }
159
160 while (length($uni))
161 {
162 my $ch = substr($uni,0,1,'');
163 my $x;
164 foreach my $e_seq (@$seq)
165 {
28e59c41 166 $x = $tbl->{$e_seq}->encode($ch, 1);
df1df145 167 $cur = $e_seq, last if defined $x;
168 }
169 unless (defined $x)
170 {
28e59c41 171 $chk or Encode::Tcl::no_map_in_encode(ord($ch), $obj->{'Name'});
df1df145 172 return undef;
173 }
df1df145 174 $cG = $grp->{$cur};
175 $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG];
176
177 $str .= $cG == 0 && $pG == 1 ? SI :
178 $cG == 1 && $pG == 0 ? SO :
179 $cG == 2 ? SS2 :
180 $cG == 3 ? SS3 : "";
181 $str .= $x;
182 $pG = $cG if $cG < 2;
183 }
184 $str .= SI if $pG == 1; # back to G0
28e59c41 185 $str .= $std unless $std eq $sta[0]; # G0 to ASCII
186 $str .= $obj->{'final'}; # necessary? I don't know what is this for.
df1df145 187 $_[1] = $uni if $chk;
188 return $str;
189}
190
1911;
192__END__
6b6c03af 193
194=head1 NAME
195
196Encode::Tcl::Escape - Tcl Escape encodings
197
198=head1 SYNOPSIS
199
200none
201
202=head1 DESCRIPTION
203
204This module is used internally by Encode::Tcl
205and handles type E of Tcl encodings (7-bit code only).
206
207Control sequences supported by this module are
208ESCAPE SEQUENCEs to designate graphic character sets
209and the following:
210
211 name (abbr.) bit combination
212
213 ESCAPE (ESC) 01/11
214 SHIFT-IN (SI) 00/15
215 SHIFT-OUT (SO) 00/14
216 SINGLE SHIFT TWO (SS2) ESC 04/14
217 SINGLE SHIFT THREE (SS3) ESC 04/15
218
38a64d23 219Designation of control character sets is not supported.
6b6c03af 220
221=head1 SEE ALSO
222
223L<Encode>
224
225L<Encode::Tcl>
226
227L<http://www.itscj.ipsj.or.jp/ISO-IR/> [ISOREG]
228
229=cut