PerlIO::encoding 0.01 from Dan Kogai.
Jarkko Hietaniemi [Wed, 17 Apr 2002 12:32:17 +0000 (12:32 +0000)]
p4raw-id: //depot/perl@15966

MANIFEST
ext/PerlIO/encoding/MANIFEST [new file with mode: 0644]
ext/PerlIO/encoding/Makefile.PL [new file with mode: 0644]
ext/PerlIO/encoding/encoding.pm [new file with mode: 0644]
ext/PerlIO/encoding/encoding.xs [new file with mode: 0644]

index bbd35b6..ea5c80a 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -504,6 +504,10 @@ ext/Opcode/Opcode.xs               Opcode extension external subroutines
 ext/Opcode/ops.pm              "Pragma" form of Opcode extension Perl module
 ext/Opcode/ops.t               See if Opcode works
 ext/Opcode/Safe.pm             Safe extension Perl module
+ext/PerlIO/encoding/MANIFEST   PerlIO::encoding list of files
+ext/PerlIO/encoding/Makefile.PL        PerlIO::encoding makefile writer
+ext/PerlIO/encoding/encoding.pm        PerlIO::encoding
+ext/PerlIO/encoding/encoding.xs        PerlIO::encoding
 ext/PerlIO/PerlIO.t            See if PerlIO works
 ext/PerlIO/Scalar/Makefile.PL  PerlIO layer for scalars
 ext/PerlIO/Scalar/Scalar.pm    PerlIO layer for scalars
diff --git a/ext/PerlIO/encoding/MANIFEST b/ext/PerlIO/encoding/MANIFEST
new file mode 100644 (file)
index 0000000..e3abc81
--- /dev/null
@@ -0,0 +1,4 @@
+MANIFEST                       This list of files
+Makefile.PL
+encoding.pm
+encoding.xs
diff --git a/ext/PerlIO/encoding/Makefile.PL b/ext/PerlIO/encoding/Makefile.PL
new file mode 100644 (file)
index 0000000..b6e0132
--- /dev/null
@@ -0,0 +1,7 @@
+use ExtUtils::MakeMaker;
+WriteMakefile(
+       NAME            => "PerlIO::encoding",
+       MAN3PODS    => {},  # Pods will be built by installman.
+       VERSION_FROM    => 'encoding.pm',
+);
+
diff --git a/ext/PerlIO/encoding/encoding.pm b/ext/PerlIO/encoding/encoding.pm
new file mode 100644 (file)
index 0000000..b29373e
--- /dev/null
@@ -0,0 +1,21 @@
+package PerlIO::encoding;
+our $VERSION = '0.01';
+use XSLoader ();
+use Encode;
+XSLoader::load 'PerlIO::encoding';
+1;
+__END__
+
+=head1 NAME
+
+PerlIO::encoding - encoding layer
+
+=head1 SYNOPSIS
+
+   open($fh,"<...",\$scalar);
+
+=head1 DESCRIPTION
+
+=cut
+
+
diff --git a/ext/PerlIO/encoding/encoding.xs b/ext/PerlIO/encoding/encoding.xs
new file mode 100644 (file)
index 0000000..9d46e01
--- /dev/null
@@ -0,0 +1,470 @@
+#define PERL_NO_GET_CONTEXT
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+#define U8 U8
+
+#if defined(USE_PERLIO) && !defined(USE_SFIO)
+
+/* Define an encoding "layer" in the perliol.h sense.
+
+   The layer defined here "inherits" in an object-oriented sense from
+   the "perlio" layer with its PerlIOBuf_* "methods".  The
+   implementation is particularly efficient as until Encode settles
+   down there is no point in tryint to tune it.
+
+   The layer works by overloading the "fill" and "flush" methods.
+
+   "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
+   perl API to convert the encoded data to UTF-8 form, then copies it
+   back to the buffer. The "base class's" read methods then see the
+   UTF-8 data.
+
+   "flush" transforms the UTF-8 data deposited by the "base class's
+   write method in the buffer back into the encoded form using the
+   encode OO perl API, then copies data back into the buffer and calls
+   "SUPER::flush.
+
+   Note that "flush" is _also_ called for read mode - we still do the
+   (back)-translate so that the the base class's "flush" sees the
+   correct number of encoded chars for positioning the seek
+   pointer. (This double translation is the worst performance issue -
+   particularly with all-perl encode engine.)
+
+*/
+
+#include "perliol.h"
+
+typedef struct {
+    PerlIOBuf base;            /* PerlIOBuf stuff */
+    SV *bufsv;                 /* buffer seen by layers above */
+    SV *dataSV;                        /* data we have read from layer below */
+    SV *enc;                   /* the encoding object */
+} PerlIOEncode;
+
+SV *
+PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
+{
+    PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+    SV *sv = &PL_sv_undef;
+    if (e->enc) {
+       dSP;
+       ENTER;
+       SAVETMPS;
+       PUSHMARK(sp);
+       XPUSHs(e->enc);
+       PUTBACK;
+       if (perl_call_method("name", G_SCALAR) == 1) {
+           SPAGAIN;
+           sv = newSVsv(POPs);
+           PUTBACK;
+       }
+    }
+    return sv;
+}
+
+IV
+PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
+{
+    PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+    dSP;
+    IV code;
+    code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
+    ENTER;
+    SAVETMPS;
+    PUSHMARK(sp);
+    XPUSHs(arg);
+    PUTBACK;
+    if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) {
+       /* should never happen */
+       Perl_die(aTHX_ "Encode::find_encoding did not return a value");
+       return -1;
+    }
+    SPAGAIN;
+    e->enc = POPs;
+    PUTBACK;
+    if (!SvROK(e->enc)) {
+       e->enc = Nullsv;
+       errno = EINVAL;
+       Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
+                   arg); 
+       code = -1;
+    }
+    else {
+       SvREFCNT_inc(e->enc);
+       PerlIOBase(f)->flags |= PERLIO_F_UTF8;
+    }
+    FREETMPS;
+    LEAVE;
+    return code;
+}
+
+IV
+PerlIOEncode_popped(pTHX_ PerlIO * f)
+{
+    PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+    if (e->enc) {
+       SvREFCNT_dec(e->enc);
+       e->enc = Nullsv;
+    }
+    if (e->bufsv) {
+       SvREFCNT_dec(e->bufsv);
+       e->bufsv = Nullsv;
+    }
+    if (e->dataSV) {
+       SvREFCNT_dec(e->dataSV);
+       e->dataSV = Nullsv;
+    }
+    return 0;
+}
+
+STDCHAR *
+PerlIOEncode_get_base(pTHX_ PerlIO * f)
+{
+    PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+    if (!e->base.bufsiz)
+       e->base.bufsiz = 1024;
+    if (!e->bufsv) {
+       e->bufsv = newSV(e->base.bufsiz);
+       sv_setpvn(e->bufsv, "", 0);
+    }
+    e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
+    if (!e->base.ptr)
+       e->base.ptr = e->base.buf;
+    if (!e->base.end)
+       e->base.end = e->base.buf;
+    if (e->base.ptr < e->base.buf
+       || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
+       Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
+                 e->base.buf + SvLEN(e->bufsv));
+       abort();
+    }
+    if (SvLEN(e->bufsv) < e->base.bufsiz) {
+       SSize_t poff = e->base.ptr - e->base.buf;
+       SSize_t eoff = e->base.end - e->base.buf;
+       e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
+       e->base.ptr = e->base.buf + poff;
+       e->base.end = e->base.buf + eoff;
+    }
+    if (e->base.ptr < e->base.buf
+       || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
+       Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
+                 e->base.buf + SvLEN(e->bufsv));
+       abort();
+    }
+    return e->base.buf;
+}
+
+IV
+PerlIOEncode_fill(pTHX_ PerlIO * f)
+{
+    PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+    dSP;
+    IV code = 0;
+    PerlIO *n;
+    SSize_t avail;
+    if (PerlIO_flush(f) != 0)
+       return -1;
+    n  = PerlIONext(f);
+    if (!PerlIO_fast_gets(n)) {
+       /* Things get too messy if we don't have a buffer layer
+          push a :perlio to do the job */
+       char mode[8];
+       n  = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
+       if (!n) {
+           Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
+       }
+    }
+    ENTER;
+    SAVETMPS;
+  retry:
+    avail = PerlIO_get_cnt(n);
+    if (avail <= 0) {
+       avail = PerlIO_fill(n);
+       if (avail == 0) {
+           avail = PerlIO_get_cnt(n);
+       }
+       else {
+           if (!PerlIO_error(n) && PerlIO_eof(n))
+               avail = 0;
+       }
+    }
+    if (avail > 0) {
+       STDCHAR *ptr = PerlIO_get_ptr(n);
+       SSize_t use  = avail;
+       SV *uni;
+       char *s;
+       STRLEN len = 0;
+       e->base.ptr = e->base.end = (STDCHAR *) Nullch;
+       (void) PerlIOEncode_get_base(aTHX_ f);
+       if (!e->dataSV)
+           e->dataSV = newSV(0);
+       if (SvTYPE(e->dataSV) < SVt_PV) {
+           sv_upgrade(e->dataSV,SVt_PV);
+       }
+       if (SvCUR(e->dataSV)) {
+           /* something left over from last time - create a normal
+              SV with new data appended
+            */
+           if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
+              use = e->base.bufsiz - SvCUR(e->dataSV);
+           }
+           sv_catpvn(e->dataSV,(char*)ptr,use);
+       }
+       else {
+           /* Create a "dummy" SV to represent the available data from layer below */
+           if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
+               Safefree(SvPVX(e->dataSV));
+           }
+           if (use > e->base.bufsiz) {
+              use = e->base.bufsiz;
+           }
+           SvPVX(e->dataSV) = (char *) ptr;
+           SvLEN(e->dataSV) = 0;  /* Hands off sv.c - it isn't yours */
+           SvCUR_set(e->dataSV,use);
+           SvPOK_only(e->dataSV);
+       }
+       SvUTF8_off(e->dataSV);
+       PUSHMARK(sp);
+       XPUSHs(e->enc);
+       XPUSHs(e->dataSV);
+       XPUSHs(&PL_sv_yes);
+       PUTBACK;
+       if (perl_call_method("decode", G_SCALAR) != 1) {
+           Perl_die(aTHX_ "panic: decode did not return a value");
+       }
+       SPAGAIN;
+       uni = POPs;
+       PUTBACK;
+       /* Now get translated string (forced to UTF-8) and use as buffer */
+       if (SvPOK(uni)) {
+           s = SvPVutf8(uni, len);
+#ifdef PARANOID_ENCODE_CHECKS
+           if (len && !is_utf8_string((U8*)s,len)) {
+               Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
+           }
+#endif
+       }
+       if (len > 0) {
+           /* Got _something */
+           /* if decode gave us back dataSV then data may vanish when
+              we do ptrcnt adjust - so take our copy now.
+              (The copy is a pain - need a put-it-here option for decode.)
+            */
+           sv_setpvn(e->bufsv,s,len);
+           e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
+           e->base.end = e->base.ptr + SvCUR(e->bufsv);
+           PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
+           SvUTF8_on(e->bufsv);
+
+           /* Adjust ptr/cnt not taking anything which
+              did not translate - not clear this is a win */
+           /* compute amount we took */
+           use -= SvCUR(e->dataSV);
+           PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
+           /* and as we did not take it it isn't pending */
+           SvCUR_set(e->dataSV,0);
+       } else {
+           /* Got nothing - assume partial character so we need some more */
+           /* Make sure e->dataSV is a normal SV before re-filling as
+              buffer alias will change under us
+            */
+           s = SvPV(e->dataSV,len);
+           sv_setpvn(e->dataSV,s,len);
+           PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
+           goto retry;
+       }
+       FREETMPS;
+       LEAVE;
+       return code;
+    }
+    else {
+       if (avail == 0)
+           PerlIOBase(f)->flags |= PERLIO_F_EOF;
+       else
+           PerlIOBase(f)->flags |= PERLIO_F_ERROR;
+       return -1;
+    }
+}
+
+IV
+PerlIOEncode_flush(pTHX_ PerlIO * f)
+{
+    PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+    IV code = 0;
+    if (e->bufsv && (e->base.ptr > e->base.buf)) {
+       dSP;
+       SV *str;
+       char *s;
+       STRLEN len;
+       SSize_t count = 0;
+       if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
+           /* Write case encode the buffer and write() to layer below */
+           ENTER;
+           SAVETMPS;
+           PUSHMARK(sp);
+           XPUSHs(e->enc);
+           SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
+           SvUTF8_on(e->bufsv);
+           XPUSHs(e->bufsv);
+           XPUSHs(&PL_sv_yes);
+           PUTBACK;
+           if (perl_call_method("encode", G_SCALAR) != 1) {
+               Perl_die(aTHX_ "panic: encode did not return a value");
+           }
+           SPAGAIN;
+           str = POPs;
+           PUTBACK;
+           s = SvPV(str, len);
+           count = PerlIO_write(PerlIONext(f),s,len);
+           if (count != len) {
+               code = -1;
+           }
+           FREETMPS;
+           LEAVE;
+           if (PerlIO_flush(PerlIONext(f)) != 0) {
+               code = -1;
+           }
+           if (SvCUR(e->bufsv)) {
+               /* Did not all translate */
+               e->base.ptr = e->base.buf+SvCUR(e->bufsv);
+               return code;
+           }
+       }
+       else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
+           /* read case */
+           /* if we have any untranslated stuff then unread that first */
+           if (e->dataSV && SvCUR(e->dataSV)) {
+               s = SvPV(e->dataSV, len);
+               count = PerlIO_unread(PerlIONext(f),s,len);
+               if (count != len) {
+                   code = -1;
+               }
+           }
+           /* See if there is anything left in the buffer */
+           if (e->base.ptr < e->base.end) {
+               /* Bother - have unread data.
+                  re-encode and unread() to layer below
+                */
+               ENTER;
+               SAVETMPS;
+               str = sv_newmortal();
+               sv_upgrade(str, SVt_PV);
+               SvPVX(str) = (char*)e->base.ptr;
+               SvLEN(str) = 0;
+               SvCUR_set(str, e->base.end - e->base.ptr);
+               SvPOK_only(str);
+               SvUTF8_on(str);
+               PUSHMARK(sp);
+               XPUSHs(e->enc);
+               XPUSHs(str);
+               XPUSHs(&PL_sv_yes);
+               PUTBACK;
+               if (perl_call_method("encode", G_SCALAR) != 1) {
+                    Perl_die(aTHX_ "panic: encode did not return a value");
+               }
+               SPAGAIN;
+               str = POPs;
+               PUTBACK;
+               s = SvPV(str, len);
+               count = PerlIO_unread(PerlIONext(f),s,len);
+               if (count != len) {
+                   code = -1;
+               }
+               FREETMPS;
+               LEAVE;
+           }
+       }
+       e->base.ptr = e->base.end = e->base.buf;
+       PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
+    }
+    return code;
+}
+
+IV
+PerlIOEncode_close(pTHX_ PerlIO * f)
+{
+    PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+    IV code = PerlIOBase_close(aTHX_ f);
+    if (e->bufsv) {
+       if (e->base.buf && e->base.ptr > e->base.buf) {
+           Perl_croak(aTHX_ "Close with partial character");
+       }
+       SvREFCNT_dec(e->bufsv);
+       e->bufsv = Nullsv;
+    }
+    e->base.buf = NULL;
+    e->base.ptr = NULL;
+    e->base.end = NULL;
+    PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
+    return code;
+}
+
+Off_t
+PerlIOEncode_tell(pTHX_ PerlIO * f)
+{
+    PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
+    /* Unfortunately the only way to get a postion is to (re-)translate,
+       the UTF8 we have in bufefr and then ask layer below
+     */
+    PerlIO_flush(f);
+    if (b->buf && b->ptr > b->buf) {
+       Perl_croak(aTHX_ "Cannot tell at partial character");
+    }
+    return PerlIO_tell(PerlIONext(f));
+}
+
+PerlIO *
+PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
+                CLONE_PARAMS * params, int flags)
+{
+    if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
+       PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
+       PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
+       if (oe->enc) {
+           fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
+       }
+    }
+    return f;
+}
+
+PerlIO_funcs PerlIO_encode = {
+    "encoding",
+    sizeof(PerlIOEncode),
+    PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
+    PerlIOEncode_pushed,
+    PerlIOEncode_popped,
+    PerlIOBuf_open,
+    PerlIOEncode_getarg,
+    PerlIOBase_fileno,
+    PerlIOEncode_dup,
+    PerlIOBuf_read,
+    PerlIOBuf_unread,
+    PerlIOBuf_write,
+    PerlIOBuf_seek,
+    PerlIOEncode_tell,
+    PerlIOEncode_close,
+    PerlIOEncode_flush,
+    PerlIOEncode_fill,
+    PerlIOBase_eof,
+    PerlIOBase_error,
+    PerlIOBase_clearerr,
+    PerlIOBase_setlinebuf,
+    PerlIOEncode_get_base,
+    PerlIOBuf_bufsiz,
+    PerlIOBuf_get_ptr,
+    PerlIOBuf_get_cnt,
+    PerlIOBuf_set_ptrcnt,
+};
+#endif                         /* encode layer */
+
+MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
+
+PROTOTYPES: ENABLE
+
+BOOT:
+{
+#ifdef PERLIO_LAYERS
+ PerlIO_define_layer(aTHX_ &PerlIO_encode);
+#endif
+}