SADAHIRO Tomoyuki.
p4raw-id: //depot/perl@15321
ext/Encode/Encode/dingbats.enc Encode table
ext/Encode/Encode/dingbats.ucm Encode table
ext/Encode/Encode/euc-cn.enc Encode table
-ext/Encode/Encode/euc-jp+0212.ucm Encode extension
-ext/Encode/Encode/euc-jp-0212.enc Encode table
+ext/Encode/Encode/euc-jp+0212.ucm Encode table
ext/Encode/Encode/euc-jp.enc Encode table
ext/Encode/Encode/euc-jp.ucm Encode table
ext/Encode/Encode/euc-kr.enc Encode table
ext/Encode/lib/Encode/JP/JIS.pm Encode extension
ext/Encode/lib/Encode/Tcl.pm Encode extension
ext/Encode/lib/Encode/Tcl/Escape.pm Encode extension
-ext/Encode/lib/Encode/Tcl/Extended.pm Encode extension
ext/Encode/lib/Encode/Tcl/Table.pm Encode extension
ext/Encode/lib/Encode/ucs2_le.pm Encode extension
ext/Encode/lib/Encode/Unicode.pm Encode extension
+++ /dev/null
-# Encoding file: euc-jp-0212, extended
-X
-name euc-jp-0212
-ascii {}
-jis0208 >{}
-7bit-kana >\x8e
-jis0212 >\x8f
Encode/dingbats.enc
Encode/dingbats.ucm
Encode/euc-cn.enc
-Encode/euc-jp-0212.enc
Encode/euc-jp.enc
Encode/euc-jp.ucm
Encode/euc-jp+0212.ucm
lib/Encode/JP/JIS.pm
lib/Encode/Tcl.pm
lib/Encode/Tcl/Escape.pm
-lib/Encode/Tcl/Extended.pm
lib/Encode/Tcl/Table.pm
lib/Encode/Unicode.pm
lib/Encode/XS.pm
$val =~ /[\x30-\x3F]$/ ? 2 : # (only 2 is supported)
$val =~ /[\x40-\x5F]$/ ? 2 : # double byte
$val =~ /[\x60-\x6F]$/ ? 3 : # triple byte
- $val =~ /[\x70-\x7F]$/ ? 4 :
+ $val =~ /[\x70-\x7E]$/ ? 4 :
# 4 or more (only 4 is supported)
croak("odd sequence is defined");
}
else
{
- # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped.
- $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//;
+ # strictly, ([\x21-\x2F]*[\x30-\x7E]). '?' for chopped.
+ $str =~ s/^([\x21-\x2F]*[\x30-\x7E]?)//;
if ($chk && ! length $str)
{
$str = "\e$1"; # split sequence
SINGLE SHIFT TWO (SS2) ESC 04/14
SINGLE SHIFT THREE (SS3) ESC 04/15
-Designation of control character sets are not supported.
+Designation of control character sets is not supported.
=head1 SEE ALSO
+++ /dev/null
-package Encode::Tcl::Extended;
-use strict;
-our $VERSION = do { my @r = (q$Revision: 0.90 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-
-use base 'Encode::Encoding';
-
-use Carp;
-
-sub read
-{
- my ($obj,$fh,$name) = @_;
- my(%tbl, $enc, %ssc, @key);
- while (<$fh>)
- {
- next unless /^(\S+)\s+(.*)$/;
- my ($key,$val) = ($1,$2);
- $val =~ s/\{(.*?)\}/$1/;
- $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
-
- if($enc = Encode->getEncoding($key))
- {
- push @key, $val;
- $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
- $ssc{$val} = substr($val,1) if $val =~ /^>/;
- }
- else
- {
- $obj->{$key} = $val;
- }
- }
- $obj->{'SSC'} = \%ssc; # single shift char
- $obj->{'Tbl'} = \%tbl; # encoding tables
- $obj->{'Key'} = \@key; # keys of table hash
- return $obj;
-}
-
-sub decode
-{
- my ($obj,$str,$chk) = @_;
- my $name = $obj->{'Name'};
- my $tbl = $obj->{'Tbl'};
- my $ssc = $obj->{'SSC'};
- my $cur = ''; # current state
- my $uni;
- while (length($str))
- {
- my $cc = substr($str,0,1,'');
- my $ch = ord($cc);
- if(!$cur && $ch > 0x7F)
- {
- $cur = '>';
- $cur .= $cc, next if $ssc->{$cur.$cc};
- }
- $ch ^= 0x80 if $cur;
-
- if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table')
- {
- $uni .= $tbl->{$cur}->decode($cc);
- $cur = '';
- next;
- }
- my $rep = $tbl->{$cur}->{'Rep'};
- my $touni = $tbl->{$cur}->{'ToUni'};
- my $x;
- if (&$rep($ch) eq 'C')
- {
- $x = $touni->[0][$ch];
- }
- else
- {
- if(! length $str)
- {
- $str = $cc; # split leading byte
- last;
- }
- my $c2 = substr($str,0,1,'');
- $cc .= $c2;
- $x = $touni->[$ch][0x80 ^ ord($c2)];
- }
- unless (defined $x)
- {
- Encode::Tcl::no_map_in_decode($name, $cc.$str);
- }
- $uni .= $x;
- $cur = '';
- }
- if($chk)
- {
- $cur =~ s/>//;
- $_[1] = $cur ne '' ? $cur.$str : $str;
- }
- return $uni;
-}
-
-sub encode
-{
- my ($obj,$uni,$chk) = @_;
- my $name = $obj->{'Name'};
- my $tbl = $obj->{'Tbl'};
- my $ssc = $obj->{'SSC'};
- my $key = $obj->{'Key'};
- my $str;
- my $cur;
-
- while (length($uni))
- {
- my $ch = substr($uni,0,1,'');
- my $x;
- foreach my $k (@$key)
- {
- $x = ref($tbl->{$k}) ne 'Encode::Tcl::Table'
- ? $k =~ /^>/
- ? $tbl->{$k}->encode(chr(0x80 ^ ord $ch),1)
- : $tbl->{$k}->encode($ch,1)
- : $tbl->{$k}->{FmUni}->{$ch};
- $cur = $k, last if defined $x;
- }
- unless (defined $x)
- {
- unless($chk)
- {
- Encode::Tcl::no_map_in_encode(ord($ch), $name)
- }
- return undef;
- }
- if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table')
- {
- my $def = $tbl->{$cur}->{'Def'};
- my $rep = $tbl->{$cur}->{'Rep'};
- my $r = &$rep($x);
- $x = pack($r,
- $cur =~ /^>/
- ? $r eq 'C' ? 0x80 ^ $x : 0x8080 ^ $x
- : $x);
- }
- $str .= $ssc->{$cur} if defined $ssc->{$cur};
- $str .= $x;
- }
- $_[1] = $uni if $chk;
- return $str;
-}
-1;
-__END__
-
-=head1 NAME
-
-Encode::Tcl::Extended - Tcl EUC encodings
-
-=head1 SYNOPSIS
-
-none
-
-=head1 DESCRIPTION
-
-This module is used internally by Encode::Tcl
-and handles type X of Tcl encodings (a Perl extenstion).
-
-Only F<euc-jp-0212.enc> belongs to type X.
-This is a variant of EUC-JP with JIS X 0212 in G3.
-If another Encode:: module would support the above encoding,
-this module should be removed.
-
-=head1 SEE ALSO
-
-L<Encode>
-
-L<Encode::Tcl>
-
-L<Encode::JP>
-
-=cut
and handles types S, D, and M of Tcl encodings.
Implementation for type M is restricted to encodings
-in which bytes per a character is up to 2.
+in which the number of bytes per a character is up to 2.
=head1 SEE ALSO
use Encode qw(encode decode);
use Encode::Tcl;
-my @encodings = qw(euc-cn euc-jp euc-kr big5 shiftjis); # CJK
+my @encodings = qw(euc-cn euc-kr big5 shiftjis); # CJK
my $n = 2;
my %greek = (
'euc-cn' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
- 'euc-jp' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
'euc-kr' => [0xA5C1..0xA5D8,0xA5E1..0xA5F8],
'big5' => [0xA344..0xA35B,0xA35C..0xA373],
'shiftjis' => [0x839F..0x83B6,0x83BF..0x83D6],
my %ideodigit = ( # cjk ideograph 'one' to 'ten'
'euc-cn' => [qw(d2bb b6fe c8fd cbc4 cee5 c1f9 c6df b0cb bec5 caae)],
- 'euc-jp' => [qw(b0ec c6f3 bbb0 bbcd b8de cfbb bcb7 c8ac b6e5 bdbd)],
'euc-kr' => [qw(ece9 eca3 dfb2 decc e7e9 d7bf f6d2 f8a2 cefa e4a8)],
'big5' => [qw(a440 a447 a454 a57c a4ad a4bb a443 a44b a445 a451)],
'shiftjis' => [qw(88ea 93f1 8e4f 8e6c 8cdc 985a 8eb5 94aa 8be3 8f5c)],
);
my @ideodigit = qw(one two three four five six seven eight nine ten);
-my $jis = '7bit-jis';
my $kr = '2022-kr';
my %esc_str;
-$esc_str{$jis} = {qw(
- 1b24422422242424262428242a1b2842
- 3042304430463048304a
- 1b284931323334355d1b2842
- ff71ff72ff73ff74ff75ff9d
- 1b2442467c4b5c1b2842
- 65e5672c
- 3132331b244234413b7a1b28425065726c
- 0031003200336f225b57005000650072006c
- 546573740a1b24422546253925481b28420a
- 0054006500730074000a30c630b930c8000a
-)};
-
$esc_str{$kr} = {qw(
1b2429430e2a22213e0f410d0a
304200b10041000d000a
my @ary_buff = ( # [ encoding, decoded, encoded ]
# type-M
["euc-cn", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
- ["euc-jp", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
- ["euc-jp", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
["euc-kr", hiragana, "\xAA\xA2\xAA\xA4\xAA\xA6\xAA\xA8\xAA\xAA" ],
["shiftjis", hiragana, "\x82\xA0\x82\xA2\x82\xA4\x82\xA6\x82\xA8" ],
["shiftjis", han_kana, "\xB1\xB2\xB3\xB4\xB5" ],
# type-E
["2022-cn", hiragana, "\e\$)A\cN". '$"$$$&$($*' . "\cO" ],
- ["2022-jp", hiragana, "\e\$B".'$"$$$&$($*'."\e(B" ],
["2022-kr", hiragana, "\e\$)C\cN". '*"*$*&*(**' . "\cO" ],
-# [ $jis, han_kana, "\e\(I".'12345'."\e(B" ],
["2022-jp1", macron, "\e\$(D\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B"],
["2022-jp2", "\x{C0}" . macron . "\x{C1}",
"\e\$(D\e.A\eN\x40\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B\eN\x41"],
-# type-X
- ["euc-jp-0212", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
- ["euc-jp-0212", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
- ["euc-jp-0212", macron,
- "\x8F\xAA\xA7\x8F\xAA\xB7\x8F\xAA\xC5\x8F\xAA\xD7\x8F\xAA\xE9" ],
);
plan test => $n*@encodings + $n*@encodings*@greek