[PATCH] Encode.pm to use escape-sequence encoding
SADAHIRO Tomoyuki [Sat, 30 Jun 2001 07:33:37 +0000 (16:33 +0900)]
Date: Sat, 30 Jun 2001 07:33:37 +0900
Message-Id: <20010630073226.7C79.BQW10602@nifty.com>

Subject: Re: [PATCH] Encode.pm to use escape-sequence encoding
From: SADAHIRO Tomoyuki <BQW10602@nifty.com>
Date: Sat, 30 Jun 2001 21:38:14 +0900
Message-Id: <20010630213554.F67A.BQW10602@nifty.com>

p4raw-id: //depot/perl@11036

MANIFEST
ext/Encode/Encode/7bit-jis.enc [new file with mode: 0644]
ext/Encode/Encode/7bit-kana.enc [new file with mode: 0644]
ext/Encode/Encode/7bit-kr.enc [new file with mode: 0644]
ext/Encode/Encode/Tcl.pm

index 658e08c..fe882a2 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -174,6 +174,9 @@ ext/Encode/encengine.c      Encode extension
 ext/Encode/encode.h    Encode extension
 ext/Encode/Encode.pm           Encode extension
 ext/Encode/Encode.xs           Encode extension
+ext/Encode/Encode/7bit-jis.enc Encoding tables
+ext/Encode/Encode/7bit-kana.enc        Encoding tables
+ext/Encode/Encode/7bit-kr.enc  Encoding tables
 ext/Encode/Encode/ascii.enc    Encoding tables
 ext/Encode/Encode/ascii.ucm    Encoding tables
 ext/Encode/Encode/big5.enc     Encoding tables
@@ -716,9 +719,8 @@ lib/abbrev.pl                       An abbreviation table builder
 lib/AnyDBM_File.pm             Perl module to emulate dbmopen
 lib/AnyDBM_File.t              See if AnyDBM_File works
 lib/assert.pl                  assertion and panic with stack trace
-lib/Attribute/Handlers/Changes Attribute::Handlers
-lib/Attribute/Handlers/README  Attribute::Handlers
 lib/Attribute/Handlers.pm              Attribute::Handlers
+lib/Attribute/Handlers/Changes Attribute::Handlers
 lib/Attribute/Handlers/demo/demo.pl    Attribute::Handlers demo
 lib/Attribute/Handlers/demo/Demo.pm    Attribute::Handlers demo
 lib/Attribute/Handlers/demo/demo2.pl   Attribute::Handlers demo
@@ -733,6 +735,7 @@ lib/Attribute/Handlers/demo/demo_range.pl   Attribute::Handlers demo
 lib/Attribute/Handlers/demo/demo_rawdata.pl    Attribute::Handlers demo
 lib/Attribute/Handlers/demo/Descriptions.pm    Attribute::Handlers demo
 lib/Attribute/Handlers/demo/MyClass.pm Attribute::Handlers demo
+lib/Attribute/Handlers/README  Attribute::Handlers
 lib/Attribute/Handlers/test.pl         See if Attribute::Handlers works
 lib/attributes.pm              For "sub foo : attrlist"
 lib/AutoLoader.pm              Autoloader base class
@@ -1095,9 +1098,9 @@ lib/Test/Harness.pm               A test harness
 lib/Test/Harness.t             See if Test::Harness works
 lib/Test/More.pm                More utilities for writing tests
 lib/Test/More/Changes          Test::More changes
-lib/Test/More/t/More.t          Test::More test, basic operation
 lib/Test/More/t/fail-like.t     Test::More test, like() and qr// bug
 lib/Test/More/t/fail.t          Test::More test, failing tests
+lib/Test/More/t/More.t          Test::More test, basic operation
 lib/Test/More/t/plan_is_noplan.t        Test::More test, noplan
 lib/Test/More/t/skipall.t       Test::More test, skipping all tests
 lib/Test/Simple.pm              Basic utility for writing tests
@@ -1519,10 +1522,10 @@ NetWare/Nwpipe.c                Netware port
 NetWare/nwpipe.h               Netware port
 NetWare/nwplglob.c             Netware port
 NetWare/nwplglob.h             Netware port
+NetWare/nwstdio.h              Netware port
 NetWare/NWTInfo.c              Netware port
 NetWare/nwtinfo.h              Netware port
 NetWare/NWUtil.c               Netware port
-NetWare/nwstdio.h              Netware port
 NetWare/nwutil.h               Netware port
 NetWare/perlsdio.h             Netware port
 NetWare/t/NWModify.pl          Netware port
@@ -1546,7 +1549,6 @@ os2/dl_os2.c                      Addon for dl_open
 os2/Makefile.SHs               Shared library generation for OS/2
 os2/os2.c                      Additional code for OS/2
 os2/os2.sym                    Additional symbols to export
-os2/os2_base.t                 Additional tests for builtin methods
 os2/OS2/ExtAttr/Changes                EA access module
 os2/OS2/ExtAttr/ExtAttr.pm     EA access module
 os2/OS2/ExtAttr/ExtAttr.xs     EA access module
@@ -1589,6 +1591,7 @@ os2/OS2/REXX/t/rx_vrexx.t DLL access module
 os2/os2add.sym                 Overriding symbols to export
 os2/os2ish.h                   Header for OS/2
 os2/os2thread.h                        pthread-like typedefs
+os2/os2_base.t                 Additional tests for builtin methods
 os2/perl2cmd.pl                        Corrects installed binaries under OS/2
 patchlevel.h                   The current patch level of perl
 perl.c                         main()
diff --git a/ext/Encode/Encode/7bit-jis.enc b/ext/Encode/Encode/7bit-jis.enc
new file mode 100644 (file)
index 0000000..eae9e31
--- /dev/null
@@ -0,0 +1,12 @@
+# Encoding file: 7bit-jis, escape-driven
+E
+name           7bit-jis
+init           {}
+final          {}
+ascii          \x1b(B
+ascii          \x1b(J
+7bit-kana      \x1b(I
+jis0208                \x1b$B
+jis0208                \x1b$@
+jis0208                \x1b&@\x1b$B
+jis0212                \x1b$(D
diff --git a/ext/Encode/Encode/7bit-kana.enc b/ext/Encode/Encode/7bit-kana.enc
new file mode 100644 (file)
index 0000000..871dbf6
--- /dev/null
@@ -0,0 +1,20 @@
+# Encoding file: 7bit-kana, single-byte
+S
+0025 0 1
+00
+0000000100020003000400050006000700080009000A000B000C000D00000000
+0010001100120013001400150016001700180019001A0000001C001D001E001F
+0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F
+FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F
+FF80FF81FF82FF83FF84FF85FF86FF87FF88FF89FF8AFF8BFF8CFF8DFF8EFF8F
+FF90FF91FF92FF93FF94FF95FF96FF97FF98FF99FF9AFF9BFF9CFF9DFF9EFF9F
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
diff --git a/ext/Encode/Encode/7bit-kr.enc b/ext/Encode/Encode/7bit-kr.enc
new file mode 100644 (file)
index 0000000..30c5395
--- /dev/null
@@ -0,0 +1,7 @@
+# Encoding file: 7bit-kr, escape-driven
+E
+name           7bit-kr
+init           \x1b$)C
+final          {}
+ascii          \x0f
+ksc5601                \x0e
index dc6455d..f862eef 100644 (file)
@@ -174,7 +174,7 @@ sub decode
  my ($obj,$str,$chk) = @_;
  my $rep   = $obj->{'Rep'};
  my $touni = $obj->{'ToUni'};
- my $uni   = '';
+ my $uni;
  while (length($str))
   {
    my $ch = ord(substr($str,0,1,''));
@@ -204,9 +204,9 @@ sub encode
 {
  my ($obj,$uni,$chk) = @_;
  my $fmuni = $obj->{'FmUni'};
- my $str   = '';
  my $def   = $obj->{'Def'};
  my $rep   = $obj->{'Rep'};
+ my $str;
  while (length($uni))
   {
    my $ch = substr($uni,0,1,'');
@@ -229,27 +229,130 @@ use Carp;
 
 sub read
 {
- my ($class,$fh,$name) = @_;
- my %self = (Name => $name, Num => 0);
+ my ($obj,$fh,$name) = @_;
+ my(%tbl, @esc, $enc);
  while (<$fh>)
   {
    my ($key,$val) = /^(\S+)\s+(.*)$/;
    $val =~ s/^\{(.*?)\}/$1/g;
    $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
-   $self{$key} = $val;
+   if($enc = Encode->getEncoding($key)){
+     $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
+     push @esc, $val;
+   }else{
+     $obj->{$key} = $val;
+   }
   }
- return bless \%self,$class;
+ $obj->{'Ctl'} = \@esc;
+ $obj->{'Tbl'} = \%tbl;
+ return $obj;
 }
 
 sub decode
 {
- croak("Not implemented yet");
+ my ($obj,$str,$chk) = @_;
+ my $tbl = $obj->{'Tbl'};
+ my $ctl = $obj->{'Ctl'};
+ my $ini = $obj->{'init'};
+ my $fin = $obj->{'final'};
+ my $std = $ctl->[0];
+ my $cur = $std;
+ my $uni;
+ while (length($str)){
+   my $uch = substr($str,0,1,'');
+   if($uch eq "\e"){
+    $str =~ s/^([\x20-\x2F]*[\x30-\x7E](?:\x1b[\x20-\x2F]*[\x30-\x7E])*)//;
+    my $esc = "\e$1";
+    if($tbl->{$esc}){ $cur = $esc }
+    elsif($esc eq $ini || $esc eq $fin){ $cur = $std }
+    else{carp "unknown escape sequence" }
+    next;
+   }
+   if($uch eq "\x0e" || $uch eq "\x0f"){
+    $cur = $uch and next;
+   }
+   my $x;
+   if(ref($tbl->{$cur}) eq 'Encode::XS'){
+     $uni .= $tbl->{$cur}->decode($uch);
+     next;
+   }
+   my $ch = ord($uch);
+   my $rep   = $tbl->{$cur}->{'Rep'};
+   my $touni = $tbl->{$cur}->{'ToUni'};
+   if (&$rep($ch) eq 'C')
+    {
+     $x = $touni->[0][$ch];
+    }
+   else
+    {
+     $x = $touni->[$ch][ord(substr($str,0,1,''))];
+    }
+   unless (defined $x)
+    {
+     last if $chk;
+     # What do we do here ?
+     $x = '';
+    }
+   $uni .= $x;
+  }
+ $_[1] = $str if $chk;
+ return $uni;
 }
 
 sub encode
 {
- croak("Not implemented yet");
-}
+ my ($obj,$uni,$chk) = @_;
+ my $tbl = $obj->{'Tbl'};
+ my $ctl = $obj->{'Ctl'};
+ my $ini = $obj->{'init'};
+ my $fin = $obj->{'final'};
+ my $std = $ctl->[0];
+ my $str = $ini;
+ my $pre = $std;
+ my $cur = $pre;
 
+ while (length($uni)){
+  my $ch = chr(ord(substr($uni,0,1,'')));
+  my $x  = ref($tbl->{$pre}) eq 'Encode::XS'
+       ? $tbl->{$pre}->encode($ch,1)
+       : $tbl->{$pre}->{FmUni}->{$ch};
+
+  unless(defined $x){
+   foreach my $esc (@$ctl){
+    $x = ref($tbl->{$esc}) eq 'Encode::XS'
+       ? $tbl->{$esc}->encode($ch,1)
+       : $tbl->{$esc}->{FmUni}->{$ch};
+    $cur = $esc and last if defined $x;
+   }
+  }
+  if($x == 0x0d && !($ini eq '' && $fin eq '') && substr($uni,0,1) eq "\x0a")
+   {
+    $str .= $cur unless $cur eq $pre;
+    $str .= $fin."\x0d\x0a".$ini;
+    substr($uni,0,1,'');
+    $pre = $std;
+    next;
+   }
+  if(ref($tbl->{$cur}) eq 'Encode::XS'){
+   $str .= $cur unless $cur eq $pre;
+   $str .= $x; # "DEF" is lost
+   $pre = $cur;
+   next;
+  }
+  my $def = $tbl->{$cur}->{'Def'};
+  my $rep = $tbl->{$cur}->{'Rep'};
+  unless (defined $x){
+   last if ($chk);
+   $x = $def;
+  }
+  $str .= $cur unless $cur eq $pre;
+  $str .= pack(&$rep($x),$x);
+  $pre = $cur;
+ }
+ $str .= $std unless $cur eq $std;
+ $str .= $fin;
+ $_[1] = $uni if $chk;
+ return $str;
+}
 1;
 __END__