[Encode] 1.40 released!

diff --git a/MANIFEST b/MANIFEST

index 0ea3148..9a94b69 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -213,6 +213,7 @@ ext/Encode/encengine.c              Encode extension
 ext/Encode/Encode.pm          Mother of all Encode extensions
 ext/Encode/Encode.xs           Encode extension
 ext/Encode/Encode/Changes.e2x          Skeleton file for enc2xs
+ext/Encode/Encode/ConfigLocal_PM.e2x   Skeleton file for enc2xs
 ext/Encode/Encode/encode.h             Encode extension header file
 ext/Encode/Encode/Makefile_PL.e2x      Skeleton file for enc2xs
 ext/Encode/Encode/README.e2x           Skeleton file for enc2xs
@@ -229,10 +230,8 @@ ext/Encode/lib/Encode/CN/HZ.pm             Encode extension
 ext/Encode/lib/Encode/Config.pm                Encode configuration module
 ext/Encode/lib/Encode/Encoder.pm              OO Encoder
 ext/Encode/lib/Encode/Encoding.pm      Encode extension
-ext/Encode/lib/Encode/JP/2022_JP.pm    Encode extension
-ext/Encode/lib/Encode/JP/2022_JP1.pm   Encode extension
 ext/Encode/lib/Encode/JP/H2Z.pm                Encode extension
-ext/Encode/lib/Encode/JP/JIS.pm        Encode extension
+ext/Encode/lib/Encode/JP/JIS7.pm       Encode extension
 ext/Encode/lib/Encode/KR/2022_KR.pm     Encode extension
 ext/Encode/lib/Encode/Supported.pod    Documents supported encodings
 ext/Encode/lib/Encode/Unicode.pm       Encode extension
diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS

index b7097a3..6c8cc9b 100644 (file)
--- a/ext/Encode/AUTHORS
+++ b/ext/Encode/AUTHORS
@@ -12,6 +12,7 @@
 Andreas J. Koenig              <andreas.koenig@anima.de> 
 Anton Tagunov                  <tagunov@motor.ru>
 Autrijus Tang                  <autrijus@autrijus.org>
+Benjamin Goldberg               <goldbb2@earthlink.net>
 Craig A. Berry                 <craigberry@mac.com>
 Dan Kogai                      <dankogai@dan.co.jp>
 Gerrit P. Haase                        <gp@familiehaase.de>
diff --git a/ext/Encode/CN/CN.pm b/ext/Encode/CN/CN.pm

index 39ad6d7..2cdf969 100644 (file)
--- a/ext/Encode/CN/CN.pm
+++ b/ext/Encode/CN/CN.pm
@@ -4,7 +4,7 @@ BEGIN {
        die "Encode::CN not supported on EBCDIC\n";
     }
 }
-our $VERSION = do { my @r = (q$Revision: 1.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
 
 use Encode;
 use Encode::CN::HZ;
@@ -13,6 +13,7 @@ XSLoader::load('Encode::CN',$VERSION);
 
 # Relocated from Encode.pm
 
+use Encode::CN::HZ;
 # use Encode::CN::2022_CN;
 
 1;
diff --git a/ext/Encode/Changes b/ext/Encode/Changes

index 181f277..88023bf 100644 (file)
--- a/ext/Encode/Changes
+++ b/ext/Encode/Changes
@@ -1,9 +1,48 @@
 # Revision history for Perl extension Encode.
 #
-# $Id: Changes,v 1.34 2002/04/12 20:23:05 dankogai Exp dankogai $
+# $Id: Changes,v 1.40 2002/04/14 22:27:14 dankogai Exp $
 #
 
-1.34 $Date: 2002/04/12 20:23:05 $ (Unreleased)
+1.40 $Date: 2002/04/14 22:27:14 $
++ Encode/ConfigLocal_PM.e2x
+! lib/Encode/Config.pm
+! bin/enc2xs
+  "enc2xs -C" now generates/updates Encode::ConfigLocal. 
+  ConfigLocal_PM.e2x is a skelton thereof.
+! lib/Encode/Config.pm
+! CN/CN.pm
+  "use  Encode::CN::HZ;" was missing.
+! t/Unicode.t
+! t/unibench.t
+  More rigorous tests added to test XS, especially on memory allocation.
+! Encode.xs
+! lib/Encode/Unicode.pm
+  NI-S implemented an XS version -- merged
+  Message-Id: <20020414154857.2066.4@bactrian.ni-s.u-net.com>
+! encoding.pm
+! t/jperl.t
+  Source filter option added.  With this option on, you can write
+  perl 5.8-savvy scripts (such as UTF-8 identifiers) in legacy
+  encodings.  t/jperl.t enhanced to test this feature.
+! t/Unicode.t
+  ok() gotcha addressed by Benjamin fixed.  Though I didn't exactly
+  apply his suggestion, this degree of nitting is enough to add him
+  to AUTHORS list. 
+  Message-Id: <3CB93223.291E5E2E@earthlink.net>
+! JP/JP.pm
++ lib/Encode/JP/JIS7.pm
+- lib/Encode/JP/JIS.pm
+- lib/Encode/JP/2022_JP.pm
+- lib/Encode/JP/2022_JP1.pm
+  7bit-jis, iso-2022-jp and iso-2022-jp1 are all aggregated to
+  JIS7.pm for better maintainability and performance
+! encoding.pm
+  Added caveat for non-ascii identifiers.
+! encoding.pm
+  fixes by jhi, the original author of this pragramtic module.
+  Message-Id: <20020413231527.V1826@alpha.hut.fi>
+
+1.34 2002/04/12 20:23:05 (Unreleased)
 ! Encode.pm
 ! t/Unicode.t
   EBCDIC fixes addressed by jhi.
@@ -18,7 +57,7 @@
 ! AUTHORS
 ! t/Encoder.t
 ! lib/Encode/Encoder.pm
-  s/= shift;/= @_/g # trivial but a common idiomatic typo :)
+  s/ = shift;/ = @_;/ # trivial but a common idiomatic typo :)
   This adds Miyagawa-kun to AUTHORS. 
   * encoding() no longer exported by default but on demand
   * t/Encoder.t updated to test all these
@@ -257,7 +296,7 @@
   Typo fixes and improvements by jhi
   Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al.
 
-1.11  $Date: 2002/04/12 20:23:05 $
+1.11  $Date: 2002/04/14 22:27:14 $
 + t/encoding.t
 + t/jperl.t
 ! MANIFEST
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm

index 3ebe7b9..e6a2048 100644 (file)
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -1,6 +1,6 @@
 package Encode;
 use strict;
-our $VERSION = do { my @r = (q$Revision: 1.34 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.40 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
 our $DEBUG = 0;
 
 require DynaLoader;
@@ -44,7 +44,9 @@ use Encode::Alias;
 
 # Make a %Encoding package variable to allow a certain amount of cheating
 our %Encoding;
-use Encode::Config;
+our %ExtModule;
+require Encode::Config;
+eval { require Encode::ConfigLocal };
 
 sub encodings
 {
@@ -244,15 +246,6 @@ sub predefine_encodings{
        $Encode::Encoding{utf8} = 
            bless {Name => "utf8"} => "Encode::utf8";
     }
-    # do externals if necessary 
-    require File::Basename;
-    require File::Spec;
-    for my $ext (qw()){
-       my $pm =
-           File::Spec->catfile(File::Basename::dirname($INC{'Encode.pm'}),
-                               "Encode", "$ext.pm");
-       do $pm;
-    }
 }
 
 require Encode::Encoding;
@@ -599,4 +592,11 @@ L<perlunicode>,
 L<utf8>, 
 the Perl Unicode Mailing List E<lt>perl-unicode@perl.orgE<gt>
 
+head2 MAINTAINER
+
+This project was originated by Nick Ing-Simmons and later maintained
+by Dan Kogai E<lt>dankogai@dan.co.jpE<gt>.  See AUTHORS for full list
+of people involved.  For any questions, use
+E<lt>perl-unicode@perl.orgE<gt> so others can share.
+
 =cut
diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs

index 470f14e..229359e 100644 (file)
--- a/ext/Encode/Encode.xs
+++ b/ext/Encode/Encode.xs
@@ -6,6 +6,79 @@
 #include "encode.h"
 #include "def_t.h"
 
+#define FBCHAR                 0xFFFd
+#define BOM_BE                 0xFeFF
+#define BOM16LE                        0xFFFe
+#define BOM32LE                        0xFFFe0000
+
+#define valid_ucs2(x)          ((0 <= (x) && (x) < 0xD800) || (0xDFFF < (x) && (x) <= 0xFFFF))
+
+#define issurrogate(x)         (0xD800 <= (x)  && (x) <= 0xDFFF )
+#define isHiSurrogate(x)       (0xD800 <= (x)  && (x) <  0xDC00 )
+#define isLoSurrogate(x)       (0xDC00 <= (x)  && (x) <= 0xDFFF )
+
+static UV
+enc_unpack(pTHX_ U8 **sp,U8 *e,STRLEN size,U8 endian)
+{
+    U8 *s = *sp;
+    UV v = 0;
+    if (s+size > e) {
+       croak("Partial character %c",(char) endian);
+    }
+    switch(endian) {
+       case 'N':
+           v = *s++;
+           v = (v << 8) | *s++;
+       case 'n':
+           v = (v << 8) | *s++;
+           v = (v << 8) | *s++;
+           break;
+       case 'V':
+       case 'v':
+           v |= *s++;
+           v |= (*s++ << 8);
+           if (endian == 'v')
+               break;
+           v |= (*s++ << 16);
+           v |= (*s++ << 24);
+           break;
+       default:
+           croak("Unknown endian %c",(char) endian);
+           break;
+    }
+    *sp = s;
+    return v;
+}
+
+void
+enc_pack(pTHX_ SV *result,STRLEN size,U8 endian,UV value)
+{
+    U8 *d = SvGROW(result,SvCUR(result)+size);
+    switch(endian) {
+       case 'v':
+       case 'V':
+           d += SvCUR(result);
+           SvCUR_set(result,SvCUR(result)+size);
+           while (size--) {
+               *d++ = value & 0xFF;
+               value >>= 8;
+           }
+           break;
+       case 'n':
+       case 'N':
+           SvCUR_set(result,SvCUR(result)+size);
+           d += SvCUR(result);
+           while (size--) {
+               *--d = value & 0xFF;
+               value >>= 8;
+           }
+           break;
+       default:
+           croak("Unknown endian %c",(char) endian);
+           break;
+    }
+}
+
 #define ENCODE_XS_PROFILE 0 /* set 1 or more to profile.
                               t/encoding.t dumps core because of
                               Perl_warner and PerlIO don't work well */
@@ -674,6 +747,164 @@ CODE:
   XSRETURN(1);
  }
 
+MODULE = Encode                PACKAGE = Encode::Unicode
+
+void
+decode_xs(obj, str, chk = &PL_sv_undef)
+SV *   obj
+SV *   str
+SV *   chk
+CODE:
+{
+    int size    = SvIV(*hv_fetch((HV *)SvRV(obj),"size",4,0));
+    U8 endian   = *((U8 *)SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"endian",6,0)));
+    int ucs2    = SvTRUE(*hv_fetch((HV *)SvRV(obj),"ucs2",4,0));
+    SV *result = newSVpvn("",0);
+    STRLEN ulen;
+    U8 *s = SvPVbyte(str,ulen);
+    U8 *e = SvEND(str);
+    ST(0) = sv_2mortal(result);
+    SvUTF8_on(result);
+
+    if (!endian && s+size <= e) {
+       UV bom;
+       endian = (size == 4) ? 'N' : 'n';
+       bom = enc_unpack(aTHX_ &s,e,size,endian);
+        if (bom != BOM_BE) {
+           if (bom == BOM16LE) {
+               endian = 'v';
+           }
+           else if (bom == BOM32LE) {
+               endian = 'V';
+           }
+           else {
+               croak("%s:Unregognised BOM %"UVxf,
+                      SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),bom);
+           }
+       }
+#if 0
+       /* Update endian for this sequence */
+       hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+#endif
+    }
+    while (s < e && s+size <= e) {
+       UV ord = enc_unpack(aTHX_ &s,e,size,endian);
+       U8 *d;
+       if (size != 4 && !valid_ucs2(ord)) {
+           if (ucs2) {
+               if (SvTRUE(chk)) {
+                   croak("%s:no surrogates allowed %"UVxf,
+                         SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),ord);
+               }
+               if (s+size <= e) {
+                    enc_unpack(aTHX_ &s,e,size,endian); /* skip the next one as well */
+               }
+               ord = FBCHAR;
+           }
+           else {
+               UV lo;
+               if (!isHiSurrogate(ord)) {
+                   croak("%s:Malformed HI surrogate %"UVxf,
+                         SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),ord);
+               }
+               if (s+size > e) {
+                   /* Partial character */
+                   s -= size;   /* back up to 1st half */
+                   break;       /* And exit loop */
+               }
+               lo = enc_unpack(aTHX_ &s,e,size,endian);
+               if (!isLoSurrogate(lo)){
+                   croak("%s:Malformed LO surrogate %"UVxf,
+                         SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),ord);
+               }
+               ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00);
+           }
+       }
+       d = (U8 *) SvGROW(result,SvCUR(result)+UTF8_MAXLEN+1);
+       d = uvuni_to_utf8_flags(d+SvCUR(result), ord, 0);
+       SvCUR_set(result,d - (U8 *)SvPVX(result));
+    }
+    if (SvTRUE(chk)) {
+       if (s < e) {
+            Perl_warner(aTHX_ packWARN(WARN_UTF8),"%s:Partial character",
+                         SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)));
+            Move(s,SvPVX(str),e-s,U8);
+            SvCUR_set(str,(e-s));
+       }
+       else {
+           SvCUR_set(str,0);
+       }
+       *SvEND(str) = '\0';
+    }
+    XSRETURN(1);
+}
+
+void
+encode_xs(obj, utf8, chk = &PL_sv_undef)
+SV *   obj
+SV *   utf8
+SV *   chk
+CODE:
+{
+    int size   = SvIV(*hv_fetch((HV *)SvRV(obj),"size",4,0));
+    U8 endian = *((U8 *)SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"endian",6,0)));
+    int ucs2   = SvTRUE(*hv_fetch((HV *)SvRV(obj),"ucs2",4,0));
+    SV *result = newSVpvn("",0);
+    STRLEN ulen;
+    U8 *s = SvPVutf8(utf8,ulen);
+    U8 *e = SvEND(utf8);
+    ST(0) = sv_2mortal(result);
+    if (!endian) {
+       endian = (size == 4) ? 'N' : 'n';
+       enc_pack(aTHX_ result,size,endian,BOM_BE);
+#if 0
+       /* Update endian for this sequence */
+       hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+#endif
+    }
+    while (s < e && s+UTF8SKIP(s) <= e) {
+       STRLEN len;
+       UV ord = utf8n_to_uvuni(s, e-s, &len, 0);
+        s += len;
+       if (size != 4 && !valid_ucs2(ord)) {
+           if (!issurrogate(ord)){
+               if (ucs2) {
+                   if (SvTRUE(chk)) {
+                       croak("%s:code point \"\\x{"UVxf"}\" too high",
+                         SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),ord);
+                   }
+                   enc_pack(aTHX_ result,size,endian,FBCHAR);
+               }else{
+                   UV hi = ((ord - 0x10000) >> 10)   + 0xD800;
+                   UV lo = ((ord - 0x10000) & 0x3FF) + 0xDC00;
+                   enc_pack(aTHX_ result,size,endian,hi);
+                   enc_pack(aTHX_ result,size,endian,lo);
+               }
+           }
+           else {
+               /* not supposed to happen */
+               enc_pack(aTHX_ result,size,endian,FBCHAR);
+           }
+       }
+       else {
+           enc_pack(aTHX_ result,size,endian,ord);
+       }
+    }
+    if (SvTRUE(chk)) {
+       if (s < e) {
+            Perl_warner(aTHX_ packWARN(WARN_UTF8),"%s:Partial character",
+                         SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)));
+            Move(s,SvPVX(utf8),e-s,U8);
+            SvCUR_set(utf8,(e-s));
+       }
+       else {
+           SvCUR_set(utf8,0);
+       }
+       *SvEND(utf8) = '\0';
+    }
+    XSRETURN(1);
+}
+
 MODULE = Encode         PACKAGE = Encode
 
 PROTOTYPES: ENABLE
diff --git a/ext/Encode/Encode/ConfigLocal_PM.e2x b/ext/Encode/Encode/ConfigLocal_PM.e2x

new file mode 100644 (file)

index 0000000..e203dfd
--- /dev/null
+++ b/ext/Encode/Encode/ConfigLocal_PM.e2x
@@ -0,0 +1,13 @@
+#
+# Local demand-load module list
+#
+# You should not edit this file by hand!  use "enc2xs -C"
+# 
+package Encode::ConfigLocal;
+our $VERSION = $_LocalVer_;
+
+use strict;
+
+$_ModLines_
+
+1;
diff --git a/ext/Encode/JP/JP.pm b/ext/Encode/JP/JP.pm

index 931d4e3..10eb59b 100644 (file)
--- a/ext/Encode/JP/JP.pm
+++ b/ext/Encode/JP/JP.pm
@@ -5,14 +5,12 @@ BEGIN {
     }
 }
 use Encode;
-our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.22 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
 
 use XSLoader;
 XSLoader::load('Encode::JP',$VERSION);
 
-use Encode::JP::JIS;
-use Encode::JP::2022_JP;
-use Encode::JP::2022_JP1;
+use Encode::JP::JIS7;
 
 1;
 __END__
diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST

index 3c4a187..22f12c6 100644 (file)
--- a/ext/Encode/MANIFEST
+++ b/ext/Encode/MANIFEST
@@ -9,6 +9,7 @@ EBCDIC/Makefile.PL     Encode extension
 Encode.pm             Mother of all Encode extensions
 Encode.xs              Encode extension
 Encode/Changes.e2x             Skeleton file for enc2xs
+Encode/ConfigLocal_PM.e2x      Skeleton file for enc2xs
 Encode/Makefile_PL.e2x Skeleton file for enc2xs
 Encode/README.e2x              Skeleton file for enc2xs
 Encode/_PM.e2x         Skeleton file for enc2xs
@@ -38,10 +39,8 @@ lib/Encode/CN/HZ.pm          Encode extension
 lib/Encode/Config.pm           Encode configuration module
 lib/Encode/Encoder.pm         OO Encoder
 lib/Encode/Encoding.pm Encode extension
-lib/Encode/JP/2022_JP.pm       Encode extension
-lib/Encode/JP/2022_JP1.pm      Encode extension
 lib/Encode/JP/H2Z.pm           Encode extension
-lib/Encode/JP/JIS.pm   Encode extension
+lib/Encode/JP/JIS7.pm  Encode extension
 lib/Encode/KR/2022_KR.pm        Encode extension
 lib/Encode/Supported.pod       Documents supported encodings
 lib/Encode/Unicode.pm  Encode extension
diff --git a/ext/Encode/README b/ext/Encode/README

index 4ff4622..b4078d8 100644 (file)
--- a/ext/Encode/README
+++ b/ext/Encode/README
@@ -38,6 +38,12 @@ DEPENDENCIES
 
 This module requires perl5.7.3 or later.
 
+MAINTAINER
+
+This project was originated by Nick Ing-Simmons and later maintained by
+Dan Kogai <dankogai@dan.co.jp>.  See AUTHORS for full list of people
+involved.
+
 QUESTIONS?
 
 If you have any questions "perldoc Encode" does not answer, please
diff --git a/ext/Encode/bin/enc2xs b/ext/Encode/bin/enc2xs

index dff18cc..bc03b82 100644 (file)
--- a/ext/Encode/bin/enc2xs
+++ b/ext/Encode/bin/enc2xs
@@ -8,7 +8,7 @@ BEGIN {
 use strict;
 use Getopt::Std;
 my @orig_ARGV = @ARGV;
-our $VERSION  = do { my @r = (q$Revision: 1.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION  = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
 
 # These may get re-ordered.
 # RAW is a do_now as inserted by &enter
@@ -131,9 +131,10 @@ my %opt;
 # -o <output> to specify the output file name (else it's the first arg)
 # -f <inlist> to give a file with a list of input files (else use the args)
 # -n <name> to name the encoding (else use the basename of the input file.
-getopts('M:SQqOo:f:n:',\%opt);
+getopts('CM:SQqOo:f:n:',\%opt);
 
 $opt{M} and make_makefile_pl($opt{M}, @ARGV);
+$opt{C} and make_configlocal_pm($opt{C}, @ARGV);
 
 # This really should go first, else the die here causes empty (non-erroneous)
 # output files to be written.
@@ -853,19 +854,73 @@ sub make_makefile_pl
     $_Name = shift;
     $_TableFiles = join(",", map {qq('$_')} @_);
     $_Now = scalar localtime();
+    eval { require File::Spec; };
     warn "Generating Makefile.PL\n";
-    _print_expand("$_Inc/Makefile_PL.e2x", "Makefile.PL");
+    _print_expand(File::Spec->catfile($_Inc,"Makefile_PL.e2x"),"Makefile.PL");
     warn "Generating $_Name.pm\n";
-    _print_expand("$_Inc/_PM.e2x",         "$_Name.pm");
+    _print_expand(File::Spec->catfile($_Inc,"_PM.e2x"),        "$_Name.pm");
     warn "Generating t/$_Name.t\n";
-    _print_expand("$_Inc/_T.e2x",          "t/$_Name.t");
+    _print_expand(File::Spec->catfile($_Inc,"_T.e2x"),         "t/$_Name.t");
     warn "Generating README\n";
-    _print_expand("$_Inc/README.e2x",      "README");
+    _print_expand(File::Spec->catfile($_Inc,"README.e2x"),     "README");
     warn "Generating t/$_Name.t\n";
-    _print_expand("$_Inc/Changes.e2x",     "Changes");
+    _print_expand(File::Spec->catfile($_Inc,"Changes.e2x"),    "Changes");
     exit;
 }
 
+use vars qw(
+           $_ModLines
+           $_LocalVer
+           );
+
+sub make_configlocal_pm
+{
+    eval { require Encode; };
+    $@ and die "Unable to require Encode: $@\n";
+    eval { require File::Spec; };
+    # our used for variable expanstion
+    my %in_core = map {$_=>1}('ascii','iso-8859-1','utf8');
+    my %LocalMod = ();
+    for my $d (@INC){
+       my $inc = File::Spec->catfile($d, "Encode");
+       -d $inc or next;
+       opendir my $dh, $inc or die "$inc:$!";
+       warn "Checking $inc...\n";
+       for my $f (grep /\.pm$/o, readdir($dh)){
+           -f File::Spec->catfile($inc, "$f") or next;
+           $INC{"Encode/$f"} and next;
+           warn "require Encode/$f;\n";
+           eval { require "Encode/$f"; };
+           $@ and die "Can't require Encode/$f: $@\n";
+           for my $enc (Encode->encodings()){
+               $in_core{$enc} and next;
+               $Encode::Config::ExtModule{$enc} and next;
+               my $mod = "Encode/$f"; 
+               $mod =~ s/\.pm$//o; $mod =~ s,/,::,og;
+               warn "$enc => $mod\n";
+               $LocalMod{$enc} = $mod;
+           }
+       }
+    }
+    $_ModLines = "";
+    for my $enc (sort keys %LocalMod){
+       $_ModLines .= 
+           qq(\$Encode::ExtModule{'$enc'} =\t"$LocalMod{$enc}";\n);
+    }
+    $_LocalVer = _mkversion();
+    $_Inc = $INC{"Encode.pm"}; $_Inc =~ s/\.pm$//o;    
+    warn "Writing Encode::ConfigLocal\n";
+    _print_expand(File::Spec->catfile($_Inc,"ConfigLocal_PM.e2x"),    
+                 File::Spec->catfile($_Inc,"ConfigLocal.pm"));
+    exit;
+}
+
+sub _mkversion{
+    my ($ss,$mm,$hh,$dd,$mo,$yyyy) = localtime();
+    $yyyy += 1900, $mo +=1;
+    return sprintf("v%04d.%04d.%04d", $yyyy, $mo*100+$dd, $hh*100+$mm);
+}
+
 sub _print_expand{
     eval { require File::Basename; };
     $@ and die "File::Basename needed.  Are you on miniperl?;\nerror: $@\n";
@@ -893,8 +948,9 @@ enc2xs -- Perl Encode Module Generator
 
 =head1 SYNOPSIS
 
-  enc2xs -M ModName mapfiles...
   enc2xs -[options]
+  enc2xs -M ModName mapfiles...
+  enc2xs -C
 
 =head1 DESCRIPTION
 
@@ -1002,6 +1058,16 @@ You can "make install" already but you should test first.
 
 If you are content with the test result, just "make install"
 
+=item 7.
+
+If you want to add your encoding to Encode demand-loading list
+(so you don't have to "use Encode::YourEncoding"), run
+
+  enc2xs -C
+
+to update Encode::ConfigLocal, a module that controls local settings.
+After that, "use Encode;" is enough to load your encodings on demand.
+
 =back
 
 =head1 The Unicode Character Map
diff --git a/ext/Encode/encoding.pm b/ext/Encode/encoding.pm

index 16dee96..d5b32c7 100644 (file)
--- a/ext/Encode/encoding.pm
+++ b/ext/Encode/encoding.pm
@@ -1,5 +1,5 @@
 package encoding;
-our $VERSION = do { my @r = (q$Revision: 1.25 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.26 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
 
 use Encode;
 use strict;
@@ -22,23 +22,42 @@ sub import {
        require Carp;
        Carp::croak "Unknown encoding '$name'";
     }
-    ${^ENCODING} = $enc; # this is all you need, actually.
-
-    # $_OPEN_ORIG = ${^OPEN};
-    for my $h (qw(STDIN STDOUT STDERR)){
-       if ($arg{$h}){
-           unless (defined find_encoding($name)) {
+    unless ($arg{Filter}){
+       ${^ENCODING} = $enc; # this is all you need, actually.
+       for my $h (qw(STDIN STDOUT)){
+           if ($arg{$h}){
+               unless (defined find_encoding($arg{h})) {
+                   require Carp;
+                   Carp::croak "Unknown encoding for $h, '$arg{$h}'";
+               }
+               eval qq{ binmode($h, ":encoding($arg{$h})") };
+           }else{
+               unless (exists $arg{$h}){
+                   eval qq{ binmode($h, ":encoding($name)") };
+               }
+           }
+           if ($@){
                require Carp;
-               Carp::croak "Unknown encoding for $h, '$arg{$h}'";
+               Carp::croak($@);
            }
-           eval qq{ binmode($h, ":encoding($arg{$h})") };
-       }else{
-           eval qq{ binmode($h, ":encoding($name)") };
-       }
-       if ($@){
-           require Carp;
-           Carp::croak($@);
        }
+    }else{
+       defined(${^ENCODING}) and undef ${^ENCODING};
+       eval {
+           require Filter::Util::Call ;
+           Filter::Util::Call->import ;
+           binmode(STDIN,  ":raw");
+           binmode(STDOUT, ":raw");
+           filter_add(sub{
+                          my $status;
+                           if (($status = filter_read()) > 0){
+                              $_ = $enc->decode($_, 1);
+                              # warn $_;
+                          }
+                          $status ;
+                      });
+       };
+       # warn "Filter installed";
     }
     return 1; # I doubt if we need it, though
 }
@@ -48,8 +67,9 @@ sub unimport{
     undef ${^ENCODING};
     binmode(STDIN,  ":raw");
     binmode(STDOUT, ":raw");
-    # Leaves STDERR alone.
-    # binmode(STDERR, ":raw");
+    if ($INC{"Filter/Util/Call.pm"}){
+       eval { filter_del() };
+    }
 }
 
 1;
@@ -80,6 +100,11 @@ encoding -  allows you to write your script in non-asii or non-utf8
   # "no encoding;" supported (but not scoped!)
   no encoding;
 
+  # an alternate way, Filter
+  use encoding "euc-jp", Filter=>1;
+  use utf8;
+  # now you can use kanji identifiers -- in euc-jp!
+
 =head1 ABSTRACT
 
 Perl 5.6.0 has introduced Unicode support.  You could apply
@@ -133,11 +158,12 @@ error will be thrown.
 Note that non-STD file handles remain unaffected.  Use C<use open> or
 C<binmode> to change disciplines of those.
 
-=item use encoding I<ENCNAME> [ STDIN => I<ENCNAME_IN> ...] ;
+=item use encoding I<ENCNAME> [ STDIN =E<gt> I<ENCNAME_IN> ...] ;
 
-You can also individually set encodings of STDIN, STDOUT, and STDERR
-via STDI<FH> => I<ENCNAME_FH> form.  In this case, you cannot omit the
-first I<ENCNAME>.
+You can also individually set encodings of STDIN and STDOUT via
+STDI<FH> =E<gt> I<ENCNAME_FH> form.  In this case, you cannot omit the
+first I<ENCNAME>.  C<STDI<FH> =E<gt> undef> turns IO transcoding
+completely off.
 
 =item no encoding;
 
@@ -187,6 +213,41 @@ After all, the best thing about this pragma is that you don't have to
 resort to \x... just to spell your name in native encoding.  So feel
 free to put your strings in your encoding in quotes and regexes.
 
+=head1 NON-ASCII Identifiers and Filter option
+
+The magic of C<use encoding> is not applied to the names of identifiers.
+In order to make C<${"4eba"}++> ($man++, where man is a single ideograph)
+work, you still need to write your script in UTF-8 or use a source filter.
+
+In other words, the same restriction as Jperl applies.
+
+If you dare experiment, however, you can try Fitlter option.
+
+=over 4
+
+=item use encoding I<ENCNAME> Filter=E<gt>1;
+
+This turns encoding pragma into source filter.  While the default
+approach just decodes interpolated literals (in qq() and qr()), this
+will apply source filter to entire source code.  In this case, STDIN
+and STDOUT remain untouched.
+
+=back
+
+What does this mean?  Your source code behaves as if it is written 
+in UTF-8.  So even if your editor only supports Shift_JIS, for 
+example.  You can still try examples in Chapter 15 of 
+C<Programming Perl, 3rd Ed.>  For instance, you can use UTF-8
+identifiers.
+
+This option is significantly slower and (as of this writing) non-ASCII
+identifiers are not very stable WITHOUT this option and with the
+source code written in UTF-8.
+
+To make your script in legacy encoding work with minimum effort, do
+not use Filter=E<gt>1
+
+
 =head1 EXAMPLE - Greekperl
 
     use encoding "iso 8859-7";
@@ -231,6 +292,10 @@ The encoding pragma is not supported on EBCDIC platforms.
 
 =head1 SEE ALSO
 
-L<perlunicode>, L<Encode>, L<open>
+L<perlunicode>, L<Encode>, L<open>, L<Filter::Util::Call>,
+
+Ch. 15 of C<Programming Perl (3rd Edition)>
+by Larry Wall, Tom Christiansen, Jon Orwant;
+O'Reilly & Associates; ISBN 0-596-00027-8
 
 =cut
diff --git a/ext/Encode/lib/Encode/Config.pm b/ext/Encode/lib/Encode/Config.pm

index 48095f6..34f7b18 100644 (file)
--- a/ext/Encode/lib/Encode/Config.pm
+++ b/ext/Encode/lib/Encode/Config.pm
@@ -2,10 +2,9 @@
 # Demand-load module list
 #
 package Encode::Config;
-our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
 
 use strict;
-require Exporter;
 
 our %ExtModule = 
     (
@@ -108,6 +107,7 @@ unless (ord("A") == 193){
         'euc-cn'             => 'Encode::CN',
         'gb12345-raw'        => 'Encode::CN',
         'gb2312-raw'         => 'Encode::CN',
+        'hz'                 => 'Encode::CN',
         'iso-ir-165'         => 'Encode::CN',
         'cp936'              => 'Encode::CN',
         'MacChineseSimp'     => 'Encode::CN',
@@ -136,12 +136,17 @@ unless (ord("A") == 193){
         'cp950'              => 'Encode::TW',
         'MacChineseTrad'     => 'Encode::TW',
 
-        'big5plus'           => 'Encode::HanExtra',
-        'euc-tw'             => 'Encode::HanExtra',
-        'gb18030'            => 'Encode::HanExtra',
+        #'big5plus'           => 'Encode::HanExtra',
+        #'euc-tw'             => 'Encode::HanExtra',
+        #'gb18030'            => 'Encode::HanExtra',
        );
 }
 
-*Encode::ExtModule = \%ExtModule;
+#
+# Why not export ? to keep ConfigLocal Happy!
+#
+while (my ($enc,$mod) = each %ExtModule){
+    $Encode::ExtModule{$enc} = $mod;
+}
 
 1;
diff --git a/ext/Encode/lib/Encode/Encoder.pm b/ext/Encode/lib/Encode/Encoder.pm

index dbc1839..793dacf 100644 (file)
--- a/ext/Encode/lib/Encode/Encoder.pm
+++ b/ext/Encode/lib/Encode/Encoder.pm
@@ -1,5 +1,5 @@
 #
-# $Id: Encoder.pm,v 0.4 2002/04/12 20:23:05 dankogai Exp dankogai $
+# $Id: Encoder.pm,v 0.4 2002/04/12 20:23:05 dankogai Exp $
 #
 package Encode::Encoder;
 use strict;
diff --git a/ext/Encode/lib/Encode/JP/2022_JP.pm b/ext/Encode/lib/Encode/JP/2022_JP.pm

deleted file mode 100644 (file)

index 54b863f..0000000
--- a/ext/Encode/lib/Encode/JP/2022_JP.pm
+++ /dev/null
@@ -1,44 +0,0 @@
-package Encode::JP::2022_JP;
-use Encode::JP;
-use Encode::JP::JIS;
-use Encode::JP::H2Z;
-use base 'Encode::Encoding';
-
-use vars qw($VERSION);
-$VERSION = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-
-my $canon = 'iso-2022-jp';
-my $obj = bless {name => $canon}, __PACKAGE__;
-$obj->Define($canon);
-
-sub name { return $_[0]->{name}; }
-
-#
-# decode is identical to 7bit-jis
-#
-
-sub decode
-{
-    my ($obj,$str,$chk) = @_;
-    return Encode::decode('7bit-jis', $str, $chk);
-}
-
-# iso-2022-jp = 7bit-jis with all x201 (Hankaku) converted to
-#               x208 equivalent (Zenkaku)
-
-sub encode
-{
-    my ($obj,$str,$chk) = @_;
-    my $euc =  Encode::encode('euc-jp', $str, $chk);
-    &Encode::JP::H2Z::h2z(\$euc);
-    return &Encode::JP::JIS::euc_jis_nox0212(\$euc);
-}
-
-1;
-__END__
-
-=head1 NAME
-
-Encode::JP::2022_JP -- internally used by Encode::JP
-
-=cut
diff --git a/ext/Encode/lib/Encode/JP/2022_JP1.pm b/ext/Encode/lib/Encode/JP/2022_JP1.pm

deleted file mode 100644 (file)

index 46d1f3e..0000000
--- a/ext/Encode/lib/Encode/JP/2022_JP1.pm
+++ /dev/null
@@ -1,45 +0,0 @@
-package Encode::JP::2022_JP1;
-use Encode::JP;
-use Encode::JP::JIS;
-use Encode::JP::H2Z;
-use base 'Encode::Encoding';
-
-use vars qw($VERSION);
-$VERSION = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-
-my $canon = 'iso-2022-jp-1';
-my $obj = bless {name => $canon}, __PACKAGE__;
-$obj->Define($canon);
-
-sub name { return $_[0]->{name}; }
-
-#
-# decode is identical to 7bit-jis
-#
-
-sub decode
-{
-    my ($obj,$str,$chk) = @_;
-    return Encode::decode('7bit-jis', $str, $chk);
-}
-
-# iso-2022-jp = 7bit-jis with all x201 (Hankaku) converted to
-#               x208 equivalent (Zenkaku)
-
-sub encode
-{
-    my ($obj,$str,$chk) = @_;
-    my $euc =  Encode::encode('euc-jp', $str, $chk);
-    &Encode::JP::H2Z::h2z(\$euc);
-    return &Encode::JP::JIS::euc_jis(\$euc);
-}
-
-1;
-__END__
-
-
-=head1 NAME
-
-Encode::JP::2022_JP1 -- internally used by Encode::JP
-
-=cut
diff --git a/ext/Encode/lib/Encode/JP/JIS.pm b/ext/Encode/lib/Encode/JP/JIS.pm

deleted file mode 100644 (file)

index 0df3c1d..0000000
--- a/ext/Encode/lib/Encode/JP/JIS.pm
+++ /dev/null
@@ -1,93 +0,0 @@
-package Encode::JP::JIS;
-use Encode::JP;
-use base 'Encode::Encoding';
-
-use strict;
-
-our $VERSION = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-
-# Just for the time being, we implement jis-7bit
-# encoding via EUC
-
-my $canon = '7bit-jis';
-my $obj = bless {name => $canon}, __PACKAGE__;
-$obj->Define($canon);
-
-sub name { return $_[0]->{name}; }
-
-sub decode
-{
-    my ($obj,$str,$chk) = @_;
-    my $res = $str;
-    jis_euc(\$res);
-    return Encode::decode('euc-jp', $res, $chk);
-}
-
-sub encode
-{
-    my ($obj,$str,$chk) = @_;
-    my $res = Encode::encode('euc-jp', $str, $chk);
-    euc_jis(\$res);
-    return $res;
-}
-
-use Encode::CJKConstants qw(:all);
-
-# JIS<->EUC
-
-sub jis_euc {
-    my $r_str = shift;
-    $$r_str =~ s(
-                ($RE{JIS_0212}|$RE{JIS_0208}|$RE{ISO_ASC}|$RE{JIS_KANA})
-                ([^\e]*)
-                )
-    {
-       my ($esc, $str) = ($1, $2);
-       if ($esc !~ /$RE{ISO_ASC}/o) {
-           $str =~ tr/\x21-\x7e/\xa1-\xfe/;
-           if ($esc =~ /$RE{JIS_KANA}/o) {
-               $str =~ s/([\xa1-\xdf])/\x8e$1/og;
-           }
-           elsif ($esc =~ /$RE{JIS_0212}/o) {
-               $str =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
-           }
-       }
-       $str;
-    }geox;
-    $$r_str;
-}
-
-sub euc_jis{
-    my $r_str = shift;
-    $$r_str =~ s{
-       ((?:$RE{EUC_C})+|(?:$RE{EUC_KANA})+|(?:$RE{EUC_0212})+)
-       }{
-           my $str = $1;
-           my $esc = 
-               ( $str =~ tr/\x8E//d ) ? $ESC{KANA} :
-                   ( $str =~ tr/\x8F//d ) ? $ESC{JIS_0212} :
-                       $ESC{JIS_0208};
-           $str =~ tr/\xA1-\xFE/\x21-\x7E/;
-           $esc . $str . $ESC{ASC};
-       }geox;
-    $$r_str =~
-       s/\Q$ESC{ASC}\E
-           (\Q$ESC{KANA}\E|\Q$ESC{JIS_0212}\E|\Q$ESC{JIS_0208}\E)/$1/gox;
-    $$r_str;
-}
-
-sub euc_jis_nox0212{
-    my $r_str = shift;
-    $$r_str =~ s/$RE{EUC_0212}/$CHARCODE{UNDEF_EUC}/go;
-    euc_jis($r_str);
-}
-
-1;
-__END__
-
-
-=head1 NAME
-
-Encode::JP::JIS -- internally used by Encode::JP
-
-=cut
diff --git a/ext/Encode/lib/Encode/JP/JIS7.pm b/ext/Encode/lib/Encode/JP/JIS7.pm

new file mode 100644 (file)

index 0000000..8cc40ca
--- /dev/null
+++ b/ext/Encode/lib/Encode/JP/JIS7.pm
@@ -0,0 +1,108 @@
+package Encode::JP::JIS7;
+use strict;
+
+our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+
+require Encode;
+for my $name ('7bit-jis', 'iso-2022-jp', 'iso-2022-jp-1'){
+    my $h2z     = ($name eq '7bit-jis')    ? 0 : 1;
+    my $jis0212 = ($name eq 'iso-2022-jp') ? 0 : 1;
+    
+    $Encode::Encoding{$name} =  
+        bless {
+               Name      =>   $name,
+               h2z       =>   $h2z,
+               jis0212   =>   $jis0212,
+              } => __PACKAGE__;
+}
+
+sub name { shift->{'Name'} }
+sub new_sequence { $_[0] };
+
+use Encode::CJKConstants qw(:all);
+
+#
+# decode is identical for all 2022 variants
+#
+
+sub decode
+{
+    my ($obj,$str,$chk) = @_;
+    jis_euc(\$str);
+    return Encode::decode('euc-jp', $str, $chk);
+}
+
+#
+# encode is different
+#
+
+sub encode
+{
+    require Encode::JP::H2Z;
+    my ($obj,$str,$chk) = @_;
+    my ($h2z, $jis0212) = @$obj{qw(h2z jis0212)};
+    my $result = Encode::encode('euc-jp', $str, $chk);
+    $h2z and &Encode::JP::H2Z::h2z(\$result);
+    euc_jis(\$result, $jis0212);
+    return $result;
+}
+
+
+# JIS<->EUC
+
+sub jis_euc {
+    my $r_str = shift;
+    $$r_str =~ s(
+                ($RE{JIS_0212}|$RE{JIS_0208}|$RE{ISO_ASC}|$RE{JIS_KANA})
+                ([^\e]*)
+                )
+    {
+       my ($esc, $str) = ($1, $2);
+       if ($esc !~ /$RE{ISO_ASC}/o) {
+           $str =~ tr/\x21-\x7e/\xa1-\xfe/;
+           if ($esc =~ /$RE{JIS_KANA}/o) {
+               $str =~ s/([\xa1-\xdf])/\x8e$1/og;
+           }
+           elsif ($esc =~ /$RE{JIS_0212}/o) {
+               $str =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
+           }
+       }
+       $str;
+    }geox;
+    $$r_str;
+}
+
+sub euc_jis{
+    my $r_str = shift;
+    my $jis0212 = shift;
+    $$r_str =~ s{
+       ((?:$RE{EUC_C})+|(?:$RE{EUC_KANA})+|(?:$RE{EUC_0212})+)
+       }{
+           my $str = $1;
+           my $esc = 
+               ( $str =~ tr/\x8E//d ) ? $ESC{KANA} :
+                   ( $str =~ tr/\x8F//d ) ? $ESC{JIS_0212} :
+                       $ESC{JIS_0208};
+           if ($esc eq $ESC{JIS_0212} && !$jis0212){
+               # fallback to '?'
+               $str =~ tr/\xA1-\xFE/\x3F/;
+           }else{
+               $str =~ tr/\xA1-\xFE/\x21-\x7E/;
+           }
+           $esc . $str . $ESC{ASC};
+       }geox;
+    $$r_str =~
+       s/\Q$ESC{ASC}\E
+           (\Q$ESC{KANA}\E|\Q$ESC{JIS_0212}\E|\Q$ESC{JIS_0208}\E)/$1/gox;
+    $$r_str;
+}
+
+1;
+__END__
+
+
+=head1 NAME
+
+Encode::JP::JIS7 -- internally used by Encode::JP
+
+=cut
diff --git a/ext/Encode/lib/Encode/Unicode.pm b/ext/Encode/lib/Encode/Unicode.pm

index 2a05ef0..55ae975 100644 (file)
--- a/ext/Encode/lib/Encode/Unicode.pm
+++ b/ext/Encode/lib/Encode/Unicode.pm
@@ -3,7 +3,7 @@ package Encode::Unicode;
 use strict;
 use warnings;
 
-our $VERSION = do { my @r = (q$Revision: 1.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.31 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
 
 #
 # Aux. subs & constants
@@ -69,17 +69,28 @@ for my $name (qw(UTF-16 UTF-16BE UTF-16LE
 }
 
 sub name { shift->{'Name'} }
-sub new_sequence { $_[0] };
+sub new_sequence
+{
+    my $self = shift;
+    # Return the original if endian known
+    return $self if ($self->{endian});
+    # Return a clone
+    return bless {%$self},ref($self);
+}
+
 
 #
-# two implementation of (en|de)code exist.  *_modern use
-# an array and *_classic stick with substr.  *_classic is much
-# slower but more memory conservative.  *_modern is default.
+# three implementation of (en|de)code exist.  XS version is the fastest.
+# *_modern use # an array and *_classic stick with substr.  *_classic is
+#  much slower but more memory conservative.  *_xs is default.
 
 sub set_transcoder{
     no warnings qw(redefine);
     my $type = shift;
-    if     ($type eq "modern"){
+    if    ($type eq "xs"){
+       *decode = \&decode_xs;
+       *encode = \&encode_xs;
+    }elsif($type eq "modern"){
        *decode = \&decode_modern;
        *encode = \&encode_modern;
     }elsif($type eq "classic"){
@@ -87,17 +98,17 @@ sub set_transcoder{
        *encode = \&encode_classic;
     }else{
        require Carp; 
-       Carp::croak __PACKAGE__, "::set_transcoder(modern|classic)";
+       Carp::croak __PACKAGE__, "::set_transcoder(modern|classic|xs)";
     }
 }
 
-set_transcoder("modern");
+set_transcoder("xs");
 
 #
 # *_modern are much faster but guzzle more memory
 #
 
-sub decode_modern
+sub decode_modern($$;$)
 {
     my ($obj, $str, $chk ) = @_;
     my ($size, $endian, $ucs2) = @$obj{qw(size endian ucs2)};
@@ -134,7 +145,7 @@ sub decode_modern
     return $utf8;
 }
 
-sub encode_modern
+sub encode_modern($$;$)
 {
     my ($obj, $utf8, $chk) = @_;
     my ($size, $endian, $ucs2) = @$obj{qw(size endian ucs2)};
@@ -171,7 +182,7 @@ sub encode_modern
 # *_classic are slower but more memory conservative
 #
 
-sub decode_classic
+sub decode_classic($$;$)
 {
     my ($obj, $str, $chk ) = @_;
     my ($size, $endian, $ucs2) = @$obj{qw(size endian ucs2)};
@@ -207,7 +218,7 @@ sub decode_classic
     return $utf8;
 }
 
-sub encode_classic
+sub encode_classic($$;$)
 {
     my ($obj, $utf8, $chk) = @_;
     my ($size, $endian, $ucs2) = @$obj{qw(size endian ucs2)};
diff --git a/ext/Encode/t/Unicode.t b/ext/Encode/t/Unicode.t

index 6436c57..02eac86 100644 (file)
--- a/ext/Encode/t/Unicode.t
+++ b/ext/Encode/t/Unicode.t
@@ -1,10 +1,11 @@
 #
-# $Id: Unicode.t,v 1.6 2002/04/12 20:23:05 dankogai Exp dankogai $
+# $Id: Unicode.t,v 1.7 2002/04/14 22:05:20 dankogai Exp $
 #
 # This script is written entirely in ASCII, even though quoted literals
 # do include non-BMP unicode characters -- Are you happy, jhi?
 #
 
+our $ON_EBCDIC;
 BEGIN {
     require Config; import Config;
     if ($Config{'extensions'} !~ /\bEncode\b/) {
@@ -16,17 +17,19 @@ BEGIN {
 #      print "1..0 # Skip: PerlIO was not built\n";
 #      exit 0;
 #     }
+
 # should work on EBCDIC
 #    if (ord("A") == 193) {
 #      print "1..0 # Skip: EBCDIC\n";
 #      exit 0;
 #    }
+    $ON_EBCDIC = (ord("A") == 193) || $ARGV[0];
     $| = 1;
 }
 
 use strict;
 #use Test::More 'no_plan';
-use Test::More tests => 22;
+use Test::More tests => 30;
 use Encode qw(encode decode);
 
 #
@@ -81,15 +84,37 @@ is($nasty,  decode('UTF-32',   $n_32lb), qq{decode UTF-32, bom=le});
 is(decode('UCS-2BE', $n_16be), $fallback, "decode UCS-2BE: fallback");
 is(decode('UCS-2LE', $n_16le), $fallback, "decode UCS-2LE: fallback");
 eval { decode('UCS-2BE', $n_16be, 1) }; 
-ok($@=~/^UCS-2BE:/, "decode UCS-2BE: exception");
-eval { decode('UCS-2LE', $n_16le, 1) }; 
-ok($@=~/^UCS-2LE:/, "decode UCS-2LE: exception");
+is (index($@,'UCS-2BE:'), 0, "decode UCS-2BE: exception");
+eval { decode('UCS-2LE', $n_16le, 1) };
+is (index($@,'UCS-2LE:'), 0, "decode UCS-2LE: exception");
 is(encode('UCS-2BE', $nasty), $f_16be, "encode UCS-2BE: fallback");
 is(encode('UCS-2LE', $nasty), $f_16le, "encode UCS-2LE: fallback");
 eval { encode('UCS-2BE', $nasty, 1) }; 
-ok($@=~/^UCS-2BE:/, "encode UCS-2BE: exception");
+is(index($@, 'UCS-2BE'), 0, "encode UCS-2BE: exception");
 eval { encode('UCS-2LE', $nasty, 1) }; 
-ok($@=~/^UCS-2LE:/, "encode UCS-2LE: exception");
+is(index($@, 'UCS-2LE'), 0, "encode UCS-2LE: exception");
+
+#
+# SvGROW test for (en|de)code_xs
+#
+SKIP: {
+    skip "Not on EBCDIC", 8 if $ON_EBCDIC;
+    my $utf8 = '';
+    for my $j (0,0x10){
+       for my $i (0..0xffff){
+           $j == 0 and (0xD800 <= $i && $i <= 0xDFFF) and next;
+           $utf8 .= ord($j+$i);
+       }
+       my $len = length($utf8);
+       for my $major ('UTF-16', 'UTF-32'){
+           for my $minor ('BE', 'LE'){
+               my $enc = $major.$minor;
+               is(decode($enc, encode($enc, $utf8)), $utf8, "$enc RT ($len)");
+           }
+       }
+    }
+};
+
 
 1;
 __END__
diff --git a/ext/Encode/t/jperl.t b/ext/Encode/t/jperl.t

index a43a6de..83fc12f 100644 (file)
--- a/ext/Encode/t/jperl.t
+++ b/ext/Encode/t/jperl.t
@@ -1,5 +1,5 @@
 #
-# $Id: jperl.t,v 1.20 2002/04/04 19:50:52 dankogai Exp $
+# $Id: jperl.t,v 1.21 2002/04/14 22:05:20 dankogai Exp $
 #
 # This script is written in euc-jp
 
@@ -21,7 +21,7 @@ BEGIN {
 }
 
 use strict;
-use Test::More tests => 15;
+use Test::More tests => 18;
 my $Debug = shift;
 
 no encoding; # ensure
@@ -68,6 +68,26 @@ ok(! defined(${^ENCODING}), q{not scoped yet});
 ok(! defined(${^ENCODING}), q{out of black magic});
 use bytes;
 is (length($Namae), 10);
+
+#
+# now something completely different!
+#
+{
+    use encoding "euc-jp", Filter=>1;
+    ok(1, "Filter on");
+    use utf8;
+    no strict 'vars'; # fools
+    # doesn't work w/ "my" as of this writing.
+    # because of  buggy strict.pm and utf8.pm
+    our $¿Í = 2; 
+    #   ^^U+4eba, "human" in CJK ideograph
+    $¿Í++; # a child is born
+    *people = \$¿Í;
+    is ($people, 3, "Filter:utf8 identifier");
+    no encoding;
+    ok(1, "Filter off");
+}
+
 1;
 __END__
 
diff --git a/ext/Encode/t/unibench.pl b/ext/Encode/t/unibench.pl

index 0d8dbf0..ed444cd 100644 (file)
--- a/ext/Encode/t/unibench.pl
+++ b/ext/Encode/t/unibench.pl
@@ -37,6 +37,12 @@ for my $i (@sizes){
        for my $op (qw(encode decode)){
            my ($meth, $from, $to) = ($op eq 'encode') ?
                (\&encode, 'utf8', 'utf16') : (\&decode, 'utf16', 'utf8');
+           my $XS = sub {
+               Encode::Unicode::set_transcoder("xs");  
+               $meth->('UTF-16BE', $S{$from}{$sz}{$cp})
+                    eq $S{$to}{$sz}{$cp} 
+                        or die "$op,$from,$to,$sz,$cp";
+           };
            my $modern = sub {
                Encode::Unicode::set_transcoder("modern");  
                $meth->('UTF-16BE', $S{$from}{$sz}{$cp})
@@ -52,7 +58,8 @@ for my $i (@sizes){
            print "---- $op length=$sz/range=$cp ----\n";
            my $r = timethese($count,
                     {
-                     "Modern" => $modern,
+                     "XS"      => $XS,
+                     "Modern"  => $modern,
                      "Classic" => $classic,
                     },
                     'none',
MANIFEST		patch \| blob \| blame \| history
ext/Encode/AUTHORS		patch \| blob \| blame \| history
ext/Encode/CN/CN.pm		patch \| blob \| blame \| history
ext/Encode/Changes		patch \| blob \| blame \| history
ext/Encode/Encode.pm		patch \| blob \| blame \| history
ext/Encode/Encode.xs		patch \| blob \| blame \| history
ext/Encode/Encode/ConfigLocal_PM.e2x	[new file with mode: 0644]	patch \| blob
ext/Encode/JP/JP.pm		patch \| blob \| blame \| history
ext/Encode/MANIFEST		patch \| blob \| blame \| history
ext/Encode/README		patch \| blob \| blame \| history
ext/Encode/bin/enc2xs		patch \| blob \| blame \| history
ext/Encode/encoding.pm		patch \| blob \| blame \| history
ext/Encode/lib/Encode/Config.pm		patch \| blob \| blame \| history
ext/Encode/lib/Encode/Encoder.pm		patch \| blob \| blame \| history
ext/Encode/lib/Encode/JP/2022_JP.pm	[deleted file]	patch \| blob \| blame \| history
ext/Encode/lib/Encode/JP/2022_JP1.pm	[deleted file]	patch \| blob \| blame \| history
ext/Encode/lib/Encode/JP/JIS.pm	[deleted file]	patch \| blob \| blame \| history
ext/Encode/lib/Encode/JP/JIS7.pm	[new file with mode: 0644]	patch \| blob
ext/Encode/lib/Encode/Unicode.pm		patch \| blob \| blame \| history
ext/Encode/t/Unicode.t		patch \| blob \| blame \| history
ext/Encode/t/jperl.t		patch \| blob \| blame \| history
ext/Encode/t/unibench.pl		patch \| blob \| blame \| history