From: Jarkko Hietaniemi Date: Wed, 17 Apr 2002 12:32:17 +0000 (+0000) Subject: PerlIO::encoding 0.01 from Dan Kogai. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=59035dcc924f096b164c1e73d87f1ee955a0d9be;p=p5sagit%2Fp5-mst-13.2.git PerlIO::encoding 0.01 from Dan Kogai. p4raw-id: //depot/perl@15966 --- diff --git a/MANIFEST b/MANIFEST index bbd35b6..ea5c80a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -504,6 +504,10 @@ ext/Opcode/Opcode.xs Opcode extension external subroutines ext/Opcode/ops.pm "Pragma" form of Opcode extension Perl module ext/Opcode/ops.t See if Opcode works ext/Opcode/Safe.pm Safe extension Perl module +ext/PerlIO/encoding/MANIFEST PerlIO::encoding list of files +ext/PerlIO/encoding/Makefile.PL PerlIO::encoding makefile writer +ext/PerlIO/encoding/encoding.pm PerlIO::encoding +ext/PerlIO/encoding/encoding.xs PerlIO::encoding ext/PerlIO/PerlIO.t See if PerlIO works ext/PerlIO/Scalar/Makefile.PL PerlIO layer for scalars ext/PerlIO/Scalar/Scalar.pm PerlIO layer for scalars diff --git a/ext/PerlIO/encoding/MANIFEST b/ext/PerlIO/encoding/MANIFEST new file mode 100644 index 0000000..e3abc81 --- /dev/null +++ b/ext/PerlIO/encoding/MANIFEST @@ -0,0 +1,4 @@ +MANIFEST This list of files +Makefile.PL +encoding.pm +encoding.xs diff --git a/ext/PerlIO/encoding/Makefile.PL b/ext/PerlIO/encoding/Makefile.PL new file mode 100644 index 0000000..b6e0132 --- /dev/null +++ b/ext/PerlIO/encoding/Makefile.PL @@ -0,0 +1,7 @@ +use ExtUtils::MakeMaker; +WriteMakefile( + NAME => "PerlIO::encoding", + MAN3PODS => {}, # Pods will be built by installman. + VERSION_FROM => 'encoding.pm', +); + diff --git a/ext/PerlIO/encoding/encoding.pm b/ext/PerlIO/encoding/encoding.pm new file mode 100644 index 0000000..b29373e --- /dev/null +++ b/ext/PerlIO/encoding/encoding.pm @@ -0,0 +1,21 @@ +package PerlIO::encoding; +our $VERSION = '0.01'; +use XSLoader (); +use Encode; +XSLoader::load 'PerlIO::encoding'; +1; +__END__ + +=head1 NAME + +PerlIO::encoding - encoding layer + +=head1 SYNOPSIS + + open($fh,"<...",\$scalar); + +=head1 DESCRIPTION + +=cut + + diff --git a/ext/PerlIO/encoding/encoding.xs b/ext/PerlIO/encoding/encoding.xs new file mode 100644 index 0000000..9d46e01 --- /dev/null +++ b/ext/PerlIO/encoding/encoding.xs @@ -0,0 +1,470 @@ +#define PERL_NO_GET_CONTEXT +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +#define U8 U8 + +#if defined(USE_PERLIO) && !defined(USE_SFIO) + +/* Define an encoding "layer" in the perliol.h sense. + + The layer defined here "inherits" in an object-oriented sense from + the "perlio" layer with its PerlIOBuf_* "methods". The + implementation is particularly efficient as until Encode settles + down there is no point in tryint to tune it. + + The layer works by overloading the "fill" and "flush" methods. + + "fill" calls "SUPER::fill" in perl terms, then calls the encode OO + perl API to convert the encoded data to UTF-8 form, then copies it + back to the buffer. The "base class's" read methods then see the + UTF-8 data. + + "flush" transforms the UTF-8 data deposited by the "base class's + write method in the buffer back into the encoded form using the + encode OO perl API, then copies data back into the buffer and calls + "SUPER::flush. + + Note that "flush" is _also_ called for read mode - we still do the + (back)-translate so that the the base class's "flush" sees the + correct number of encoded chars for positioning the seek + pointer. (This double translation is the worst performance issue - + particularly with all-perl encode engine.) + +*/ + +#include "perliol.h" + +typedef struct { + PerlIOBuf base; /* PerlIOBuf stuff */ + SV *bufsv; /* buffer seen by layers above */ + SV *dataSV; /* data we have read from layer below */ + SV *enc; /* the encoding object */ +} PerlIOEncode; + +SV * +PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags) +{ + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + SV *sv = &PL_sv_undef; + if (e->enc) { + dSP; + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(e->enc); + PUTBACK; + if (perl_call_method("name", G_SCALAR) == 1) { + SPAGAIN; + sv = newSVsv(POPs); + PUTBACK; + } + } + return sv; +} + +IV +PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg) +{ + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + dSP; + IV code; + code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv); + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(arg); + PUTBACK; + if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) { + /* should never happen */ + Perl_die(aTHX_ "Encode::find_encoding did not return a value"); + return -1; + } + SPAGAIN; + e->enc = POPs; + PUTBACK; + if (!SvROK(e->enc)) { + e->enc = Nullsv; + errno = EINVAL; + Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"", + arg); + code = -1; + } + else { + SvREFCNT_inc(e->enc); + PerlIOBase(f)->flags |= PERLIO_F_UTF8; + } + FREETMPS; + LEAVE; + return code; +} + +IV +PerlIOEncode_popped(pTHX_ PerlIO * f) +{ + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + if (e->enc) { + SvREFCNT_dec(e->enc); + e->enc = Nullsv; + } + if (e->bufsv) { + SvREFCNT_dec(e->bufsv); + e->bufsv = Nullsv; + } + if (e->dataSV) { + SvREFCNT_dec(e->dataSV); + e->dataSV = Nullsv; + } + return 0; +} + +STDCHAR * +PerlIOEncode_get_base(pTHX_ PerlIO * f) +{ + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + if (!e->base.bufsiz) + e->base.bufsiz = 1024; + if (!e->bufsv) { + e->bufsv = newSV(e->base.bufsiz); + sv_setpvn(e->bufsv, "", 0); + } + e->base.buf = (STDCHAR *) SvPVX(e->bufsv); + if (!e->base.ptr) + e->base.ptr = e->base.buf; + if (!e->base.end) + e->base.end = e->base.buf; + if (e->base.ptr < e->base.buf + || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { + Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, + e->base.buf + SvLEN(e->bufsv)); + abort(); + } + if (SvLEN(e->bufsv) < e->base.bufsiz) { + SSize_t poff = e->base.ptr - e->base.buf; + SSize_t eoff = e->base.end - e->base.buf; + e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz); + e->base.ptr = e->base.buf + poff; + e->base.end = e->base.buf + eoff; + } + if (e->base.ptr < e->base.buf + || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { + Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, + e->base.buf + SvLEN(e->bufsv)); + abort(); + } + return e->base.buf; +} + +IV +PerlIOEncode_fill(pTHX_ PerlIO * f) +{ + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + dSP; + IV code = 0; + PerlIO *n; + SSize_t avail; + if (PerlIO_flush(f) != 0) + return -1; + n = PerlIONext(f); + if (!PerlIO_fast_gets(n)) { + /* Things get too messy if we don't have a buffer layer + push a :perlio to do the job */ + char mode[8]; + n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv); + if (!n) { + Perl_die(aTHX_ "panic: cannot push :perlio for %p",f); + } + } + ENTER; + SAVETMPS; + retry: + avail = PerlIO_get_cnt(n); + if (avail <= 0) { + avail = PerlIO_fill(n); + if (avail == 0) { + avail = PerlIO_get_cnt(n); + } + else { + if (!PerlIO_error(n) && PerlIO_eof(n)) + avail = 0; + } + } + if (avail > 0) { + STDCHAR *ptr = PerlIO_get_ptr(n); + SSize_t use = avail; + SV *uni; + char *s; + STRLEN len = 0; + e->base.ptr = e->base.end = (STDCHAR *) Nullch; + (void) PerlIOEncode_get_base(aTHX_ f); + if (!e->dataSV) + e->dataSV = newSV(0); + if (SvTYPE(e->dataSV) < SVt_PV) { + sv_upgrade(e->dataSV,SVt_PV); + } + if (SvCUR(e->dataSV)) { + /* something left over from last time - create a normal + SV with new data appended + */ + if (use + SvCUR(e->dataSV) > e->base.bufsiz) { + use = e->base.bufsiz - SvCUR(e->dataSV); + } + sv_catpvn(e->dataSV,(char*)ptr,use); + } + else { + /* Create a "dummy" SV to represent the available data from layer below */ + if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) { + Safefree(SvPVX(e->dataSV)); + } + if (use > e->base.bufsiz) { + use = e->base.bufsiz; + } + SvPVX(e->dataSV) = (char *) ptr; + SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */ + SvCUR_set(e->dataSV,use); + SvPOK_only(e->dataSV); + } + SvUTF8_off(e->dataSV); + PUSHMARK(sp); + XPUSHs(e->enc); + XPUSHs(e->dataSV); + XPUSHs(&PL_sv_yes); + PUTBACK; + if (perl_call_method("decode", G_SCALAR) != 1) { + Perl_die(aTHX_ "panic: decode did not return a value"); + } + SPAGAIN; + uni = POPs; + PUTBACK; + /* Now get translated string (forced to UTF-8) and use as buffer */ + if (SvPOK(uni)) { + s = SvPVutf8(uni, len); +#ifdef PARANOID_ENCODE_CHECKS + if (len && !is_utf8_string((U8*)s,len)) { + Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s); + } +#endif + } + if (len > 0) { + /* Got _something */ + /* if decode gave us back dataSV then data may vanish when + we do ptrcnt adjust - so take our copy now. + (The copy is a pain - need a put-it-here option for decode.) + */ + sv_setpvn(e->bufsv,s,len); + e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv); + e->base.end = e->base.ptr + SvCUR(e->bufsv); + PerlIOBase(f)->flags |= PERLIO_F_RDBUF; + SvUTF8_on(e->bufsv); + + /* Adjust ptr/cnt not taking anything which + did not translate - not clear this is a win */ + /* compute amount we took */ + use -= SvCUR(e->dataSV); + PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); + /* and as we did not take it it isn't pending */ + SvCUR_set(e->dataSV,0); + } else { + /* Got nothing - assume partial character so we need some more */ + /* Make sure e->dataSV is a normal SV before re-filling as + buffer alias will change under us + */ + s = SvPV(e->dataSV,len); + sv_setpvn(e->dataSV,s,len); + PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); + goto retry; + } + FREETMPS; + LEAVE; + return code; + } + else { + if (avail == 0) + PerlIOBase(f)->flags |= PERLIO_F_EOF; + else + PerlIOBase(f)->flags |= PERLIO_F_ERROR; + return -1; + } +} + +IV +PerlIOEncode_flush(pTHX_ PerlIO * f) +{ + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + IV code = 0; + if (e->bufsv && (e->base.ptr > e->base.buf)) { + dSP; + SV *str; + char *s; + STRLEN len; + SSize_t count = 0; + if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) { + /* Write case encode the buffer and write() to layer below */ + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(e->enc); + SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); + SvUTF8_on(e->bufsv); + XPUSHs(e->bufsv); + XPUSHs(&PL_sv_yes); + PUTBACK; + if (perl_call_method("encode", G_SCALAR) != 1) { + Perl_die(aTHX_ "panic: encode did not return a value"); + } + SPAGAIN; + str = POPs; + PUTBACK; + s = SvPV(str, len); + count = PerlIO_write(PerlIONext(f),s,len); + if (count != len) { + code = -1; + } + FREETMPS; + LEAVE; + if (PerlIO_flush(PerlIONext(f)) != 0) { + code = -1; + } + if (SvCUR(e->bufsv)) { + /* Did not all translate */ + e->base.ptr = e->base.buf+SvCUR(e->bufsv); + return code; + } + } + else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) { + /* read case */ + /* if we have any untranslated stuff then unread that first */ + if (e->dataSV && SvCUR(e->dataSV)) { + s = SvPV(e->dataSV, len); + count = PerlIO_unread(PerlIONext(f),s,len); + if (count != len) { + code = -1; + } + } + /* See if there is anything left in the buffer */ + if (e->base.ptr < e->base.end) { + /* Bother - have unread data. + re-encode and unread() to layer below + */ + ENTER; + SAVETMPS; + str = sv_newmortal(); + sv_upgrade(str, SVt_PV); + SvPVX(str) = (char*)e->base.ptr; + SvLEN(str) = 0; + SvCUR_set(str, e->base.end - e->base.ptr); + SvPOK_only(str); + SvUTF8_on(str); + PUSHMARK(sp); + XPUSHs(e->enc); + XPUSHs(str); + XPUSHs(&PL_sv_yes); + PUTBACK; + if (perl_call_method("encode", G_SCALAR) != 1) { + Perl_die(aTHX_ "panic: encode did not return a value"); + } + SPAGAIN; + str = POPs; + PUTBACK; + s = SvPV(str, len); + count = PerlIO_unread(PerlIONext(f),s,len); + if (count != len) { + code = -1; + } + FREETMPS; + LEAVE; + } + } + e->base.ptr = e->base.end = e->base.buf; + PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); + } + return code; +} + +IV +PerlIOEncode_close(pTHX_ PerlIO * f) +{ + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + IV code = PerlIOBase_close(aTHX_ f); + if (e->bufsv) { + if (e->base.buf && e->base.ptr > e->base.buf) { + Perl_croak(aTHX_ "Close with partial character"); + } + SvREFCNT_dec(e->bufsv); + e->bufsv = Nullsv; + } + e->base.buf = NULL; + e->base.ptr = NULL; + e->base.end = NULL; + PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); + return code; +} + +Off_t +PerlIOEncode_tell(pTHX_ PerlIO * f) +{ + PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf); + /* Unfortunately the only way to get a postion is to (re-)translate, + the UTF8 we have in bufefr and then ask layer below + */ + PerlIO_flush(f); + if (b->buf && b->ptr > b->buf) { + Perl_croak(aTHX_ "Cannot tell at partial character"); + } + return PerlIO_tell(PerlIONext(f)); +} + +PerlIO * +PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o, + CLONE_PARAMS * params, int flags) +{ + if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) { + PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode); + PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode); + if (oe->enc) { + fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params); + } + } + return f; +} + +PerlIO_funcs PerlIO_encode = { + "encoding", + sizeof(PerlIOEncode), + PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT, + PerlIOEncode_pushed, + PerlIOEncode_popped, + PerlIOBuf_open, + PerlIOEncode_getarg, + PerlIOBase_fileno, + PerlIOEncode_dup, + PerlIOBuf_read, + PerlIOBuf_unread, + PerlIOBuf_write, + PerlIOBuf_seek, + PerlIOEncode_tell, + PerlIOEncode_close, + PerlIOEncode_flush, + PerlIOEncode_fill, + PerlIOBase_eof, + PerlIOBase_error, + PerlIOBase_clearerr, + PerlIOBase_setlinebuf, + PerlIOEncode_get_base, + PerlIOBuf_bufsiz, + PerlIOBuf_get_ptr, + PerlIOBuf_get_cnt, + PerlIOBuf_set_ptrcnt, +}; +#endif /* encode layer */ + +MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding + +PROTOTYPES: ENABLE + +BOOT: +{ +#ifdef PERLIO_LAYERS + PerlIO_define_layer(aTHX_ &PerlIO_encode); +#endif +}