1 package Encode::Tcl::Escape;
3 our $VERSION = do { my @r = (q$Revision: 0.92 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
5 use base 'Encode::Encoding';
9 use constant SI => "\cO";
10 use constant SO => "\cN";
11 use constant SS2 => "\e\x4E"; # ESC N
12 use constant SS3 => "\e\x4F"; # ESC O
16 my ($obj,$fh,$name) = @_;
17 my(%tbl, @seq, $enc, @esc, %grp, %mbc);
20 next unless /^(\S+)\s+(.*)$/;
21 my ($key,$val) = ($1,$2);
22 $val =~ s/^\{(.*?)\}/$1/g;
23 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
25 if ($enc = Encode->getEncoding($key))
28 ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
31 $val !~ /\e\x24/ ? 1 : # single-byte
32 $val =~ /[\x30-\x3F]$/ ? 2 : # (only 2 is supported)
33 $val =~ /[\x40-\x5F]$/ ? 2 : # double byte
34 $val =~ /[\x60-\x6F]$/ ? 3 : # triple byte
35 $val =~ /[\x70-\x7E]$/ ? 4 :
36 # 4 or more (only 4 is supported)
37 croak("odd sequence is defined");
42 $val =~ /\e\x24?[\x28]/ ? 0 : # G0 : SI
43 $val =~ /\e\x24?[\x29\x2D]/ ? 1 : # G1 : SO
44 $val =~ /\e\x24?[\x2A\x2E]/ ? 2 : # G2 : SS2
45 $val =~ /\e\x24?[\x2B\x2F]/ ? 3 : # G3 : SS3
46 0; # G0 (ESC 02/04 F, etc.)
52 if ($val =~ /^\e(.*)/)
54 push(@esc, quotemeta $1);
57 $obj->{'Grp'} = \%grp; # graphic chars
58 $obj->{'Mbc'} = \%mbc; # bytes per char
59 $obj->{'Seq'} = \@seq; # escape sequences
60 $obj->{'Tbl'} = \%tbl; # encoding tables
61 $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC
67 my ($obj,$str,$chk) = @_;
68 my $tbl = $obj->{'Tbl'};
69 my $seq = $obj->{'Seq'};
70 my $mbc = $obj->{'Mbc'};
71 my $grp = $obj->{'Grp'};
72 my $esc = $obj->{'Esc'};
75 my @sta = ($std, undef, undef, undef); # G0 .. G3 state
76 my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1);
77 my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3);
83 if ($str =~ s/^($esc)//)
86 $sta[ $grp->{$e} ] = $e if $tbl->{$e};
88 # appearance of "\eN\eO" or "\eO\eN" isn't supposed.
89 # but in that case, the former will be ignored.
90 elsif ($str =~ s/^\x4E//)
94 elsif ($str =~ s/^\x4F//)
100 # strictly, ([\x21-\x2F]*[\x30-\x7E]). '?' for chopped.
101 $str =~ s/^([\x21-\x2F]*[\x30-\x7E]?)//;
102 if ($chk && ! length $str)
104 $str = "\e$1"; # split sequence
107 croak "unknown escape sequence: ESC $1";
111 if ($str =~ s/^\cN//) # SO
115 if ($str =~ s/^\cO//) # SI
120 $cur = $ss ? $sta[$ss] : $sta[$s];
122 length($str) < $mbc->{$cur} and last; # split leading byte
124 my $cc = substr($str, 0, $mbc->{$cur}, '');
126 my $x = $tbl->{$cur}->decode($cc);
127 defined $x or Encode::Tcl::no_map_in_decode($obj->{'Name'}, $cc);
133 my $back = join('', grep defined($_) && $_ ne $std, @sta);
135 $back .= $ss == 2 ? SS2 : $ss == 3 ? SS3 : '';
143 my ($obj,$uni,$chk) = @_;
144 my $tbl = $obj->{'Tbl'};
145 my $seq = $obj->{'Seq'};
146 my $grp = $obj->{'Grp'};
147 my $ini = $obj->{'init'};
150 my @sta = ($std,undef,undef,undef); # G0 .. G3 state
152 my $pG = 0; # previous G: 0 or 1.
153 my $cG = 0; # current G: 0,1,2,3.
155 if ($ini && defined $grp->{$ini})
157 $sta[ $grp->{$ini} ] = $ini;
162 my $ch = substr($uni,0,1,'');
164 foreach my $e_seq (@$seq)
166 $x = $tbl->{$e_seq}->encode($ch, 1);
167 $cur = $e_seq, last if defined $x;
171 $chk or Encode::Tcl::no_map_in_encode(ord($ch), $obj->{'Name'});
175 $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG];
177 $str .= $cG == 0 && $pG == 1 ? SI :
178 $cG == 1 && $pG == 0 ? SO :
182 $pG = $cG if $cG < 2;
184 $str .= SI if $pG == 1; # back to G0
185 $str .= $std unless $std eq $sta[0]; # G0 to ASCII
186 $str .= $obj->{'final'}; # necessary? I don't know what is this for.
187 $_[1] = $uni if $chk;
196 Encode::Tcl::Escape - Tcl Escape encodings
204 This module is used internally by Encode::Tcl
205 and handles type E of Tcl encodings (7-bit code only).
207 Control sequences supported by this module are
208 ESCAPE SEQUENCEs to designate graphic character sets
211 name (abbr.) bit combination
216 SINGLE SHIFT TWO (SS2) ESC 04/14
217 SINGLE SHIFT THREE (SS3) ESC 04/15
219 Designation of control character sets is not supported.
227 L<http://www.itscj.ipsj.or.jp/ISO-IR/> [ISOREG]