1 package Encode::Tcl::Escape;
3 our $VERSION = do {my @r=(q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r};
4 use base 'Encode::Encoding';
8 use constant SI => "\cO";
9 use constant SO => "\cN";
10 use constant SS2 => "\e\x4E"; # ESC N
11 use constant SS3 => "\e\x4F"; # ESC O
15 my ($obj,$fh,$name) = @_;
16 my(%tbl, @seq, $enc, @esc, %grp, %mbc);
19 next unless /^(\S+)\s+(.*)$/;
20 my ($key,$val) = ($1,$2);
21 $val =~ s/^\{(.*?)\}/$1/g;
22 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
24 if ($enc = Encode->getEncoding($key))
27 ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
30 $val !~ /\e\x24/ ? 1 : # single-byte
31 $val =~ /[\x30-\x3F]$/ ? 2 : # (only 2 is supported)
32 $val =~ /[\x40-\x5F]$/ ? 2 : # double byte
33 $val =~ /[\x60-\x6F]$/ ? 3 : # triple byte
34 $val =~ /[\x70-\x7F]$/ ? 4 :
35 # 4 or more (only 4 is supported)
36 croak("odd sequence is defined");
41 $val =~ /\e\x24?[\x28]/ ? 0 : # G0 : SI
42 $val =~ /\e\x24?[\x29\x2D]/ ? 1 : # G1 : SO
43 $val =~ /\e\x24?[\x2A\x2E]/ ? 2 : # G2 : SS2
44 $val =~ /\e\x24?[\x2B\x2F]/ ? 3 : # G3 : SS3
45 0; # G0 (ESC 02/04 F, etc.)
51 if ($val =~ /^\e(.*)/)
53 push(@esc, quotemeta $1);
56 $obj->{'Grp'} = \%grp; # graphic chars
57 $obj->{'Mbc'} = \%mbc; # bytes per char
58 $obj->{'Seq'} = \@seq; # escape sequences
59 $obj->{'Tbl'} = \%tbl; # encoding tables
60 $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC
66 my ($obj,$str,$chk) = @_;
67 my $tbl = $obj->{'Tbl'};
68 my $seq = $obj->{'Seq'};
69 my $mbc = $obj->{'Mbc'};
70 my $grp = $obj->{'Grp'};
71 my $esc = $obj->{'Esc'};
74 my @sta = ($std, undef, undef, undef); # G0 .. G3 state
75 my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1);
76 my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3);
82 if ($str =~ s/^($esc)//)
85 $sta[ $grp->{$e} ] = $e if $tbl->{$e};
87 # appearance of "\eN\eO" or "\eO\eN" isn't supposed.
88 # but in that case, the former will be ignored.
89 elsif ($str =~ s/^\x4E//)
93 elsif ($str =~ s/^\x4F//)
99 # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped.
100 $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//;
101 if ($chk && ! length $str)
103 $str = "\e$1"; # split sequence
106 croak "unknown escape sequence: ESC $1";
110 if ($str =~ s/^\cN//) # SO
114 if ($str =~ s/^\cO//) # SI
119 $cur = $ss ? $sta[$ss] : $sta[$s];
121 length($str) < $mbc->{$cur} and last; # split leading byte
123 my $cc = substr($str, 0, $mbc->{$cur}, '');
125 my $x = $tbl->{$cur}->decode($cc);
126 defined $x or Encode::Tcl::no_map_in_decode($obj->{'Name'}, $cc);
132 my $back = join('', grep defined($_) && $_ ne $std, @sta);
134 $back .= $ss == 2 ? SS2 : $ss == 3 ? SS3 : '';
142 my ($obj,$uni,$chk) = @_;
143 my $tbl = $obj->{'Tbl'};
144 my $seq = $obj->{'Seq'};
145 my $grp = $obj->{'Grp'};
146 my $ini = $obj->{'init'};
149 my @sta = ($std,undef,undef,undef); # G0 .. G3 state
151 my $pG = 0; # previous G: 0 or 1.
152 my $cG = 0; # current G: 0,1,2,3.
154 if ($ini && defined $grp->{$ini})
156 $sta[ $grp->{$ini} ] = $ini;
161 my $ch = substr($uni,0,1,'');
163 foreach my $e_seq (@$seq)
165 $x = $tbl->{$e_seq}->encode($ch, 1);
166 $cur = $e_seq, last if defined $x;
170 $chk or Encode::Tcl::no_map_in_encode(ord($ch), $obj->{'Name'});
174 $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG];
176 $str .= $cG == 0 && $pG == 1 ? SI :
177 $cG == 1 && $pG == 0 ? SO :
181 $pG = $cG if $cG < 2;
183 $str .= SI if $pG == 1; # back to G0
184 $str .= $std unless $std eq $sta[0]; # G0 to ASCII
185 $str .= $obj->{'final'}; # necessary? I don't know what is this for.
186 $_[1] = $uni if $chk;
195 Encode::Tcl::Escape - Tcl Escape encodings
203 This module is used internally by Encode::Tcl
204 and handles type E of Tcl encodings (7-bit code only).
206 Control sequences supported by this module are
207 ESCAPE SEQUENCEs to designate graphic character sets
210 name (abbr.) bit combination
215 SINGLE SHIFT TWO (SS2) ESC 04/14
216 SINGLE SHIFT THREE (SS3) ESC 04/15
218 Designation of control character sets are not supported.
226 L<http://www.itscj.ipsj.or.jp/ISO-IR/> [ISOREG]