From: SADAHIRO Tomoyuki Date: Sat, 30 Jun 2001 07:33:37 +0000 (+0900) Subject: [PATCH] Encode.pm to use escape-sequence encoding X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e91cad5bebe0a5356f113f66ad716f23981f3fb5;p=p5sagit%2Fp5-mst-13.2.git [PATCH] Encode.pm to use escape-sequence encoding Date: Sat, 30 Jun 2001 07:33:37 +0900 Message-Id: <20010630073226.7C79.BQW10602@nifty.com> Subject: Re: [PATCH] Encode.pm to use escape-sequence encoding From: SADAHIRO Tomoyuki Date: Sat, 30 Jun 2001 21:38:14 +0900 Message-Id: <20010630213554.F67A.BQW10602@nifty.com> p4raw-id: //depot/perl@11036 --- diff --git a/MANIFEST b/MANIFEST index 658e08c..fe882a2 100644 --- a/MANIFEST +++ b/MANIFEST @@ -174,6 +174,9 @@ ext/Encode/encengine.c Encode extension ext/Encode/encode.h Encode extension ext/Encode/Encode.pm Encode extension ext/Encode/Encode.xs Encode extension +ext/Encode/Encode/7bit-jis.enc Encoding tables +ext/Encode/Encode/7bit-kana.enc Encoding tables +ext/Encode/Encode/7bit-kr.enc Encoding tables ext/Encode/Encode/ascii.enc Encoding tables ext/Encode/Encode/ascii.ucm Encoding tables ext/Encode/Encode/big5.enc Encoding tables @@ -716,9 +719,8 @@ lib/abbrev.pl An abbreviation table builder lib/AnyDBM_File.pm Perl module to emulate dbmopen lib/AnyDBM_File.t See if AnyDBM_File works lib/assert.pl assertion and panic with stack trace -lib/Attribute/Handlers/Changes Attribute::Handlers -lib/Attribute/Handlers/README Attribute::Handlers lib/Attribute/Handlers.pm Attribute::Handlers +lib/Attribute/Handlers/Changes Attribute::Handlers lib/Attribute/Handlers/demo/demo.pl Attribute::Handlers demo lib/Attribute/Handlers/demo/Demo.pm Attribute::Handlers demo lib/Attribute/Handlers/demo/demo2.pl Attribute::Handlers demo @@ -733,6 +735,7 @@ lib/Attribute/Handlers/demo/demo_range.pl Attribute::Handlers demo lib/Attribute/Handlers/demo/demo_rawdata.pl Attribute::Handlers demo lib/Attribute/Handlers/demo/Descriptions.pm Attribute::Handlers demo lib/Attribute/Handlers/demo/MyClass.pm Attribute::Handlers demo +lib/Attribute/Handlers/README Attribute::Handlers lib/Attribute/Handlers/test.pl See if Attribute::Handlers works lib/attributes.pm For "sub foo : attrlist" lib/AutoLoader.pm Autoloader base class @@ -1095,9 +1098,9 @@ lib/Test/Harness.pm A test harness lib/Test/Harness.t See if Test::Harness works lib/Test/More.pm More utilities for writing tests lib/Test/More/Changes Test::More changes -lib/Test/More/t/More.t Test::More test, basic operation lib/Test/More/t/fail-like.t Test::More test, like() and qr// bug lib/Test/More/t/fail.t Test::More test, failing tests +lib/Test/More/t/More.t Test::More test, basic operation lib/Test/More/t/plan_is_noplan.t Test::More test, noplan lib/Test/More/t/skipall.t Test::More test, skipping all tests lib/Test/Simple.pm Basic utility for writing tests @@ -1519,10 +1522,10 @@ NetWare/Nwpipe.c Netware port NetWare/nwpipe.h Netware port NetWare/nwplglob.c Netware port NetWare/nwplglob.h Netware port +NetWare/nwstdio.h Netware port NetWare/NWTInfo.c Netware port NetWare/nwtinfo.h Netware port NetWare/NWUtil.c Netware port -NetWare/nwstdio.h Netware port NetWare/nwutil.h Netware port NetWare/perlsdio.h Netware port NetWare/t/NWModify.pl Netware port @@ -1546,7 +1549,6 @@ os2/dl_os2.c Addon for dl_open os2/Makefile.SHs Shared library generation for OS/2 os2/os2.c Additional code for OS/2 os2/os2.sym Additional symbols to export -os2/os2_base.t Additional tests for builtin methods os2/OS2/ExtAttr/Changes EA access module os2/OS2/ExtAttr/ExtAttr.pm EA access module os2/OS2/ExtAttr/ExtAttr.xs EA access module @@ -1589,6 +1591,7 @@ os2/OS2/REXX/t/rx_vrexx.t DLL access module os2/os2add.sym Overriding symbols to export os2/os2ish.h Header for OS/2 os2/os2thread.h pthread-like typedefs +os2/os2_base.t Additional tests for builtin methods os2/perl2cmd.pl Corrects installed binaries under OS/2 patchlevel.h The current patch level of perl perl.c main() diff --git a/ext/Encode/Encode/7bit-jis.enc b/ext/Encode/Encode/7bit-jis.enc new file mode 100644 index 0000000..eae9e31 --- /dev/null +++ b/ext/Encode/Encode/7bit-jis.enc @@ -0,0 +1,12 @@ +# Encoding file: 7bit-jis, escape-driven +E +name 7bit-jis +init {} +final {} +ascii \x1b(B +ascii \x1b(J +7bit-kana \x1b(I +jis0208 \x1b$B +jis0208 \x1b$@ +jis0208 \x1b&@\x1b$B +jis0212 \x1b$(D diff --git a/ext/Encode/Encode/7bit-kana.enc b/ext/Encode/Encode/7bit-kana.enc new file mode 100644 index 0000000..871dbf6 --- /dev/null +++ b/ext/Encode/Encode/7bit-kana.enc @@ -0,0 +1,20 @@ +# Encoding file: 7bit-kana, single-byte +S +0025 0 1 +00 +0000000100020003000400050006000700080009000A000B000C000D00000000 +0010001100120013001400150016001700180019001A0000001C001D001E001F +0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F +FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F +FF80FF81FF82FF83FF84FF85FF86FF87FF88FF89FF8AFF8BFF8CFF8DFF8EFF8F +FF90FF91FF92FF93FF94FF95FF96FF97FF98FF99FF9AFF9BFF9CFF9DFF9EFF9F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 diff --git a/ext/Encode/Encode/7bit-kr.enc b/ext/Encode/Encode/7bit-kr.enc new file mode 100644 index 0000000..30c5395 --- /dev/null +++ b/ext/Encode/Encode/7bit-kr.enc @@ -0,0 +1,7 @@ +# Encoding file: 7bit-kr, escape-driven +E +name 7bit-kr +init \x1b$)C +final {} +ascii \x0f +ksc5601 \x0e diff --git a/ext/Encode/Encode/Tcl.pm b/ext/Encode/Encode/Tcl.pm index dc6455d..f862eef 100644 --- a/ext/Encode/Encode/Tcl.pm +++ b/ext/Encode/Encode/Tcl.pm @@ -174,7 +174,7 @@ sub decode my ($obj,$str,$chk) = @_; my $rep = $obj->{'Rep'}; my $touni = $obj->{'ToUni'}; - my $uni = ''; + my $uni; while (length($str)) { my $ch = ord(substr($str,0,1,'')); @@ -204,9 +204,9 @@ sub encode { my ($obj,$uni,$chk) = @_; my $fmuni = $obj->{'FmUni'}; - my $str = ''; my $def = $obj->{'Def'}; my $rep = $obj->{'Rep'}; + my $str; while (length($uni)) { my $ch = substr($uni,0,1,''); @@ -229,27 +229,130 @@ use Carp; sub read { - my ($class,$fh,$name) = @_; - my %self = (Name => $name, Num => 0); + my ($obj,$fh,$name) = @_; + my(%tbl, @esc, $enc); while (<$fh>) { my ($key,$val) = /^(\S+)\s+(.*)$/; $val =~ s/^\{(.*?)\}/$1/g; $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; - $self{$key} = $val; + if($enc = Encode->getEncoding($key)){ + $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; + push @esc, $val; + }else{ + $obj->{$key} = $val; + } } - return bless \%self,$class; + $obj->{'Ctl'} = \@esc; + $obj->{'Tbl'} = \%tbl; + return $obj; } sub decode { - croak("Not implemented yet"); + my ($obj,$str,$chk) = @_; + my $tbl = $obj->{'Tbl'}; + my $ctl = $obj->{'Ctl'}; + my $ini = $obj->{'init'}; + my $fin = $obj->{'final'}; + my $std = $ctl->[0]; + my $cur = $std; + my $uni; + while (length($str)){ + my $uch = substr($str,0,1,''); + if($uch eq "\e"){ + $str =~ s/^([\x20-\x2F]*[\x30-\x7E](?:\x1b[\x20-\x2F]*[\x30-\x7E])*)//; + my $esc = "\e$1"; + if($tbl->{$esc}){ $cur = $esc } + elsif($esc eq $ini || $esc eq $fin){ $cur = $std } + else{carp "unknown escape sequence" } + next; + } + if($uch eq "\x0e" || $uch eq "\x0f"){ + $cur = $uch and next; + } + my $x; + if(ref($tbl->{$cur}) eq 'Encode::XS'){ + $uni .= $tbl->{$cur}->decode($uch); + next; + } + my $ch = ord($uch); + my $rep = $tbl->{$cur}->{'Rep'}; + my $touni = $tbl->{$cur}->{'ToUni'}; + if (&$rep($ch) eq 'C') + { + $x = $touni->[0][$ch]; + } + else + { + $x = $touni->[$ch][ord(substr($str,0,1,''))]; + } + unless (defined $x) + { + last if $chk; + # What do we do here ? + $x = ''; + } + $uni .= $x; + } + $_[1] = $str if $chk; + return $uni; } sub encode { - croak("Not implemented yet"); -} + my ($obj,$uni,$chk) = @_; + my $tbl = $obj->{'Tbl'}; + my $ctl = $obj->{'Ctl'}; + my $ini = $obj->{'init'}; + my $fin = $obj->{'final'}; + my $std = $ctl->[0]; + my $str = $ini; + my $pre = $std; + my $cur = $pre; + while (length($uni)){ + my $ch = chr(ord(substr($uni,0,1,''))); + my $x = ref($tbl->{$pre}) eq 'Encode::XS' + ? $tbl->{$pre}->encode($ch,1) + : $tbl->{$pre}->{FmUni}->{$ch}; + + unless(defined $x){ + foreach my $esc (@$ctl){ + $x = ref($tbl->{$esc}) eq 'Encode::XS' + ? $tbl->{$esc}->encode($ch,1) + : $tbl->{$esc}->{FmUni}->{$ch}; + $cur = $esc and last if defined $x; + } + } + if($x == 0x0d && !($ini eq '' && $fin eq '') && substr($uni,0,1) eq "\x0a") + { + $str .= $cur unless $cur eq $pre; + $str .= $fin."\x0d\x0a".$ini; + substr($uni,0,1,''); + $pre = $std; + next; + } + if(ref($tbl->{$cur}) eq 'Encode::XS'){ + $str .= $cur unless $cur eq $pre; + $str .= $x; # "DEF" is lost + $pre = $cur; + next; + } + my $def = $tbl->{$cur}->{'Def'}; + my $rep = $tbl->{$cur}->{'Rep'}; + unless (defined $x){ + last if ($chk); + $x = $def; + } + $str .= $cur unless $cur eq $pre; + $str .= pack(&$rep($x),$x); + $pre = $cur; + } + $str .= $std unless $cur eq $std; + $str .= $fin; + $_[1] = $uni if $chk; + return $str; +} 1; __END__