1 package Encode::Tcl::Escape;
3 our $VERSION = do {my @r=(q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r};
4 use base 'Encode::Encoding';
8 use constant SI => "\cO";
9 use constant SO => "\cN";
10 use constant SS2 => "\e\x4E"; # ESC N
11 use constant SS3 => "\e\x4F"; # ESC O
15 my ($obj,$fh,$name) = @_;
16 my(%tbl, @seq, $enc, @esc, %grp, %mbc);
19 next unless /^(\S+)\s+(.*)$/;
20 my ($key,$val) = ($1,$2);
21 $val =~ s/^\{(.*?)\}/$1/g;
22 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
24 if ($enc = Encode->getEncoding($key))
27 ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
30 $val !~ /\e\x24/ ? 1 : # single-byte
31 $val =~ /[\x30-\x3F]$/ ? 2 : # (only 2 is supported)
32 $val =~ /[\x40-\x5F]$/ ? 2 : # double byte
33 $val =~ /[\x60-\x6F]$/ ? 3 : # triple byte
34 $val =~ /[\x70-\x7F]$/ ? 4 :
35 # 4 or more (only 4 is supported)
36 croak("odd sequence is defined");
41 $val =~ /\e\x24?[\x28]/ ? 0 : # G0 : SI
42 $val =~ /\e\x24?[\x29\x2D]/ ? 1 : # G1 : SO
43 $val =~ /\e\x24?[\x2A\x2E]/ ? 2 : # G2 : SS2
44 $val =~ /\e\x24?[\x2B\x2F]/ ? 3 : # G3 : SS3
45 0; # G0 (ESC 02/04 F, etc.)
51 if ($val =~ /^\e(.*)/)
53 push(@esc, quotemeta $1);
56 $obj->{'Grp'} = \%grp; # graphic chars
57 $obj->{'Mbc'} = \%mbc; # bytes per char
58 $obj->{'Seq'} = \@seq; # escape sequences
59 $obj->{'Tbl'} = \%tbl; # encoding tables
60 $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC
66 my ($obj,$str,$chk) = @_;
67 my $tbl = $obj->{'Tbl'};
68 my $seq = $obj->{'Seq'};
69 my $mbc = $obj->{'Mbc'};
70 my $grp = $obj->{'Grp'};
71 my $esc = $obj->{'Esc'};
74 my @sta = ($std, undef, undef, undef); # G0 .. G3 state
75 my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1);
76 my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3);
82 if ($str =~ s/^($esc)//)
85 $sta[ $grp->{$e} ] = $e if $tbl->{$e};
87 # appearance of "\eN\eO" or "\eO\eN" isn't supposed.
88 # but in that case, the former will be ignored.
89 elsif ($str =~ s/^\x4E//)
93 elsif ($str =~ s/^\x4F//)
99 # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped.
100 $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//;
101 if ($chk && ! length $str)
103 $str = "\e$1"; # split sequence
106 croak "unknown escape sequence: ESC $1";
110 if ($str =~ s/^\cN//) # SO
114 if ($str =~ s/^\cO//) # SI
119 $cur = $ss ? $sta[$ss] : $sta[$s];
121 length($str) < $mbc->{$cur} and last; # split leading byte
123 my $cc = substr($str, 0, $mbc->{$cur}, '');
125 my $x = $tbl->{$cur}->decode($cc);
126 defined $x or Encode::Tcl::no_map_in_decode($obj->{'Name'}, $cc);
132 my $back = join('', grep defined($_) && $_ ne $std, @sta);
134 $back .= $ss == 2 ? SS2 : $ss == 3 ? SS3 : '';
142 my ($obj,$uni,$chk) = @_;
143 my $tbl = $obj->{'Tbl'};
144 my $seq = $obj->{'Seq'};
145 my $grp = $obj->{'Grp'};
146 my $ini = $obj->{'init'};
149 my @sta = ($std,undef,undef,undef); # G0 .. G3 state
151 my $pG = 0; # previous G: 0 or 1.
152 my $cG = 0; # current G: 0,1,2,3.
154 if ($ini && defined $grp->{$ini})
156 $sta[ $grp->{$ini} ] = $ini;
161 my $ch = substr($uni,0,1,'');
163 foreach my $e_seq (@$seq)
165 $x = $tbl->{$e_seq}->encode($ch, 1);
166 $cur = $e_seq, last if defined $x;
170 $chk or Encode::Tcl::no_map_in_encode(ord($ch), $obj->{'Name'});
174 $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG];
176 $str .= $cG == 0 && $pG == 1 ? SI :
177 $cG == 1 && $pG == 0 ? SO :
181 $pG = $cG if $cG < 2;
183 $str .= SI if $pG == 1; # back to G0
184 $str .= $std unless $std eq $sta[0]; # G0 to ASCII
185 $str .= $obj->{'final'}; # necessary? I don't know what is this for.
186 $_[1] = $uni if $chk;