From: SADAHIRO Tomoyuki Date: Fri, 1 Feb 2002 23:51:03 +0000 (+0900) Subject: [Patch] Encode::Tcl::Escape, more ISO2022 like X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=28e59c410a339daf5892ca9867eaff1ef0079c0e;p=p5sagit%2Fp5-mst-13.2.git [Patch] Encode::Tcl::Escape, more ISO2022 like Message-Id: <20020201234727.15ED.BQW10602@nifty.com> p4raw-id: //depot/perl@14523 --- diff --git a/ext/Encode/lib/Encode/Tcl/Escape.pm b/ext/Encode/lib/Encode/Tcl/Escape.pm index 572e2bf..d3f55d7 100644 --- a/ext/Encode/lib/Encode/Tcl/Escape.pm +++ b/ext/Encode/lib/Encode/Tcl/Escape.pm @@ -7,13 +7,13 @@ use Carp; use constant SI => "\cO"; use constant SO => "\cN"; -use constant SS2 => "\eN"; -use constant SS3 => "\eO"; +use constant SS2 => "\e\x4E"; # ESC N +use constant SS3 => "\e\x4F"; # ESC O sub read { my ($obj,$fh,$name) = @_; - my(%tbl, @seq, $enc, @esc, %grp); + my(%tbl, @seq, $enc, @esc, %grp, %mbc); while (<$fh>) { next unless /^(\S+)\s+(.*)$/; @@ -21,27 +21,40 @@ sub read $val =~ s/^\{(.*?)\}/$1/g; $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; - if($enc = Encode->getEncoding($key)) + if ($enc = Encode->getEncoding($key)) { - $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; + $tbl{$val} = + ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; + + $mbc{$val} = + $val !~ /\e\x24/ ? 1 : # single-byte + $val =~ /[\x30-\x3F]$/ ? 2 : # (only 2 is supported) + $val =~ /[\x40-\x5F]$/ ? 2 : # double byte + $val =~ /[\x60-\x6F]$/ ? 3 : # triple byte + $val =~ /[\x70-\x7F]$/ ? 4 : + # 4 or more (only 4 is supported) + croak("odd sequence is defined"); + push @seq, $val; + $grp{$val} = - $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO" - $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN" - $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN" - $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO" - 0; # G0 + $val =~ /\e\x24?[\x28]/ ? 0 : # G0 : SI + $val =~ /\e\x24?[\x29\x2D]/ ? 1 : # G1 : SO + $val =~ /\e\x24?[\x2A\x2E]/ ? 2 : # G2 : SS2 + $val =~ /\e\x24?[\x2B\x2F]/ ? 3 : # G3 : SS3 + 0; # G0 (ESC 02/04 F, etc.) } else { $obj->{$key} = $val; } - if($val =~ /^\e(.*)/) + if ($val =~ /^\e(.*)/) { push(@esc, quotemeta $1); } } $obj->{'Grp'} = \%grp; # graphic chars + $obj->{'Mbc'} = \%mbc; # bytes per char $obj->{'Seq'} = \@seq; # escape sequences $obj->{'Tbl'} = \%tbl; # encoding tables $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC @@ -51,13 +64,11 @@ sub read sub decode { my ($obj,$str,$chk) = @_; - my $name = $obj->{'Name'}; my $tbl = $obj->{'Tbl'}; my $seq = $obj->{'Seq'}; + my $mbc = $obj->{'Mbc'}; my $grp = $obj->{'Grp'}; my $esc = $obj->{'Esc'}; - my $ini = $obj->{'init'}; - my $fin = $obj->{'final'}; my $std = $seq->[0]; my $cur = $std; my @sta = ($std, undef, undef, undef); # G0 .. G3 state @@ -66,21 +77,20 @@ sub decode my $uni; while (length($str)) { - my $cc = substr($str,0,1,''); - if($cc eq "\e") + if ($str =~ s/^\e//) { - if($str =~ s/^($esc)//) + if ($str =~ s/^($esc)//) { my $e = "\e$1"; $sta[ $grp->{$e} ] = $e if $tbl->{$e}; } # appearance of "\eN\eO" or "\eO\eN" isn't supposed. # but in that case, the former will be ignored. - elsif($str =~ s/^N//) + elsif ($str =~ s/^\x4E//) { $ss = 2; } - elsif($str =~ s/^O//) + elsif ($str =~ s/^\x4F//) { $ss = 3; } @@ -88,7 +98,7 @@ sub decode { # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped. $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//; - if($chk && ! length $str) + if ($chk && ! length $str) { $str = "\e$1"; # split sequence last; @@ -97,54 +107,31 @@ sub decode } next; } - if($cc eq SO) + if ($str =~ s/^\cN//) # SO { $s = 1; next; } - if($cc eq SI) + if ($str =~ s/^\cO//) # SI { $s = 0; next; } $cur = $ss ? $sta[$ss] : $sta[$s]; - if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') - { - $uni .= $tbl->{$cur}->decode($cc); - $ss = 0; - next; - } - my $ch = ord($cc); - my $rep = $tbl->{$cur}->{'Rep'}; - my $touni = $tbl->{$cur}->{'ToUni'}; - my $x; - if (&$rep($ch) eq 'C') - { - $x = $touni->[0][$ch]; - } - else - { - if(! length $str) - { - $str = $cc; # split leading byte - last; - } - my $c2 = substr($str,0,1,''); - $cc .= $c2; - $x = $touni->[$ch][ord($c2)]; - } - unless (defined $x) - { - Encode::Tcl::no_map_in_decode($name, $cc.$str); - } + length($str) < $mbc->{$cur} and last; # split leading byte + + my $cc = substr($str, 0, $mbc->{$cur}, ''); + + my $x = $tbl->{$cur}->decode($cc); + defined $x or Encode::Tcl::no_map_in_decode($obj->{'Name'}, $cc); $uni .= $x; $ss = 0; } - if($chk) + if ($chk) { my $back = join('', grep defined($_) && $_ ne $std, @sta); $back .= SO if $s; - $back .= $ss == 2 ? SS2 : SS3 if $ss; + $back .= $ss == 2 ? SS2 : $ss == 3 ? SS3 : ''; $_[1] = $back.$str; } return $uni; @@ -153,12 +140,10 @@ sub decode sub encode { my ($obj,$uni,$chk) = @_; - my $name = $obj->{'Name'}; my $tbl = $obj->{'Tbl'}; my $seq = $obj->{'Seq'}; my $grp = $obj->{'Grp'}; my $ini = $obj->{'init'}; - my $fin = $obj->{'final'}; my $std = $seq->[0]; my $str = $ini; my @sta = ($std,undef,undef,undef); # G0 .. G3 state @@ -166,7 +151,7 @@ sub encode my $pG = 0; # previous G: 0 or 1. my $cG = 0; # current G: 0,1,2,3. - if($ini && defined $grp->{$ini}) + if ($ini && defined $grp->{$ini}) { $sta[ $grp->{$ini} ] = $ini; } @@ -177,25 +162,14 @@ sub encode my $x; foreach my $e_seq (@$seq) { - $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table' - ? $tbl->{$e_seq}->{FmUni}->{$ch} - : $tbl->{$e_seq}->encode($ch,1); + $x = $tbl->{$e_seq}->encode($ch, 1); $cur = $e_seq, last if defined $x; } unless (defined $x) { - unless($chk) - { - Encode::Tcl::no_map_in_encode(ord($ch), $name) - } + $chk or Encode::Tcl::no_map_in_encode(ord($ch), $obj->{'Name'}); return undef; } - if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') - { - my $def = $tbl->{$cur}->{'Def'}; - my $rep = $tbl->{$cur}->{'Rep'}; - $x = pack(&$rep($x),$x); - } $cG = $grp->{$cur}; $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG]; @@ -207,8 +181,8 @@ sub encode $pG = $cG if $cG < 2; } $str .= SI if $pG == 1; # back to G0 - $str .= $std unless $std eq $sta[0]; # GO to ASCII - $str .= $fin; # necessary? + $str .= $std unless $std eq $sta[0]; # G0 to ASCII + $str .= $obj->{'final'}; # necessary? I don't know what is this for. $_[1] = $uni if $chk; return $str; }