+#define PERL_NO_GET_CONTEXT
+
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#define U8 U8
#include "encode.h"
-#include "iso8859.h"
-#include "EBCDIC.h"
-#include "Symbols.h"
+/* #include "8859.h" */
+/* #include "EBCDIC.h" */
+/* #include "Symbols.h" */
+#include "def_t.h"
-#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \
+#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
Perl_croak(aTHX_ "panic_unimplemented"); \
return (y)0; /* fool picky compilers */ \
}
UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
-UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
-
-#ifdef USE_PERLIO
+ UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
+#if defined(USE_PERLIO) && !defined(USE_SFIO)
/* Define an encoding "layer" in the perliol.h sense.
The layer defined here "inherits" in an object-oriented sense from the
"perlio" layer with its PerlIOBuf_* "methods".
issue - particularly with all-perl encode engine.)
*/
-
-
#include "perliol.h"
-
-typedef struct
-{
- PerlIOBuf base; /* PerlIOBuf stuff */
- SV * bufsv;
- SV * enc;
+typedef struct {
+ PerlIOBuf base; /* PerlIOBuf stuff */
+ SV *bufsv; /* buffer seen by layers above */
+ SV *dataSV; /* data we have read from layer below */
+ SV *enc; /* the encoding object */
} PerlIOEncode;
+SV *
+PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
+{
+ PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+ SV *sv = &PL_sv_undef;
+ if (e->enc) {
+ dSP;
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(sp);
+ XPUSHs(e->enc);
+ PUTBACK;
+ if (perl_call_method("name", G_SCALAR) == 1) {
+ SPAGAIN;
+ sv = newSVsv(POPs);
+ PUTBACK;
+ }
+ }
+ return sv;
+}
IV
-PerlIOEncode_pushed(PerlIO *f, const char *mode,const char *arg,STRLEN len)
+PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
{
- PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
- dTHX;
- dSP;
- IV code;
- code = PerlIOBuf_pushed(f,mode,Nullch,0);
- ENTER;
- SAVETMPS;
- PUSHMARK(sp);
- XPUSHs(sv_2mortal(newSVpv("Encode",0)));
- XPUSHs(sv_2mortal(newSVpvn(arg,len)));
- PUTBACK;
- if (perl_call_method("getEncoding",G_SCALAR) != 1)
- return -1;
- SPAGAIN;
- e->enc = POPs;
- PUTBACK;
- if (!SvROK(e->enc))
- return -1;
- SvREFCNT_inc(e->enc);
- FREETMPS;
- LEAVE;
- PerlIOBase(f)->flags |= PERLIO_F_UTF8;
- return code;
+ PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+ dSP;
+ IV code;
+ code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(sp);
+ XPUSHs(arg);
+ PUTBACK;
+ if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) {
+ /* should never happen */
+ Perl_die(aTHX_ "Encode::find_encoding did not return a value");
+ return -1;
+ }
+ SPAGAIN;
+ e->enc = POPs;
+ PUTBACK;
+ if (!SvROK(e->enc)) {
+ e->enc = Nullsv;
+ errno = EINVAL;
+ Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
+ arg);
+ code = -1;
+ }
+ else {
+ SvREFCNT_inc(e->enc);
+ PerlIOBase(f)->flags |= PERLIO_F_UTF8;
+ }
+ FREETMPS;
+ LEAVE;
+ return code;
}
IV
-PerlIOEncode_popped(PerlIO *f)
+PerlIOEncode_popped(pTHX_ PerlIO * f)
{
- PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
- dTHX;
- if (e->enc)
- {
- SvREFCNT_dec(e->enc);
- e->enc = Nullsv;
- }
- if (e->bufsv)
- {
- SvREFCNT_dec(e->bufsv);
- e->bufsv = Nullsv;
- }
- return 0;
+ PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+ if (e->enc) {
+ SvREFCNT_dec(e->enc);
+ e->enc = Nullsv;
+ }
+ if (e->bufsv) {
+ SvREFCNT_dec(e->bufsv);
+ e->bufsv = Nullsv;
+ }
+ if (e->dataSV) {
+ SvREFCNT_dec(e->dataSV);
+ e->dataSV = Nullsv;
+ }
+ return 0;
}
STDCHAR *
-PerlIOEncode_get_base(PerlIO *f)
+PerlIOEncode_get_base(pTHX_ PerlIO * f)
{
- PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
- dTHX;
- if (!e->base.bufsiz)
- e->base.bufsiz = 1024;
- if (!e->bufsv)
- {
- e->bufsv = newSV(e->base.bufsiz);
- sv_setpvn(e->bufsv,"",0);
- }
- e->base.buf = (STDCHAR *)SvPVX(e->bufsv);
- if (!e->base.ptr)
- e->base.ptr = e->base.buf;
- if (!e->base.end)
- e->base.end = e->base.buf;
- if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
- {
- Perl_warn(aTHX_ " ptr %p(%p)%p",
- e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
- abort();
- }
- if (SvLEN(e->bufsv) < e->base.bufsiz)
- {
- SSize_t poff = e->base.ptr - e->base.buf;
- SSize_t eoff = e->base.end - e->base.buf;
- e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz);
- e->base.ptr = e->base.buf + poff;
- e->base.end = e->base.buf + eoff;
- }
- if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
- {
- Perl_warn(aTHX_ " ptr %p(%p)%p",
- e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
- abort();
- }
- return e->base.buf;
+ PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+ if (!e->base.bufsiz)
+ e->base.bufsiz = 1024;
+ if (!e->bufsv) {
+ e->bufsv = newSV(e->base.bufsiz);
+ sv_setpvn(e->bufsv, "", 0);
+ }
+ e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
+ if (!e->base.ptr)
+ e->base.ptr = e->base.buf;
+ if (!e->base.end)
+ e->base.end = e->base.buf;
+ if (e->base.ptr < e->base.buf
+ || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
+ Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
+ e->base.buf + SvLEN(e->bufsv));
+ abort();
+ }
+ if (SvLEN(e->bufsv) < e->base.bufsiz) {
+ SSize_t poff = e->base.ptr - e->base.buf;
+ SSize_t eoff = e->base.end - e->base.buf;
+ e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
+ e->base.ptr = e->base.buf + poff;
+ e->base.end = e->base.buf + eoff;
+ }
+ if (e->base.ptr < e->base.buf
+ || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
+ Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
+ e->base.buf + SvLEN(e->bufsv));
+ abort();
+ }
+ return e->base.buf;
}
IV
-PerlIOEncode_fill(PerlIO *f)
+PerlIOEncode_fill(pTHX_ PerlIO * f)
{
- PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
- dTHX;
- dSP;
- IV code;
- code = PerlIOBuf_fill(f);
- if (code == 0)
- {
- SV *uni;
- STRLEN len;
- char *s;
- /* Set SV that is the buffer to be buf..ptr */
- SvCUR_set(e->bufsv, e->base.end - e->base.buf);
- SvUTF8_off(e->bufsv);
- ENTER;
- SAVETMPS;
- PUSHMARK(sp);
- XPUSHs(e->enc);
- XPUSHs(e->bufsv);
- XPUSHs(&PL_sv_yes);
- PUTBACK;
- if (perl_call_method("toUnicode",G_SCALAR) != 1)
- code = -1;
- SPAGAIN;
- uni = POPs;
- PUTBACK;
- /* Now get translated string (forced to UTF-8) and copy back to buffer
- don't use sv_setsv as that may "steal" PV from returned temp
- and so free() our known-large-enough buffer.
- sv_setpvn() should do but let us do it long hand.
- */
- s = SvPVutf8(uni,len);
- if (s != SvPVX(e->bufsv))
- {
- e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
- Move(s,e->base.buf,len,char);
- SvCUR_set(e->bufsv,len);
+ PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+ dSP;
+ IV code = 0;
+ PerlIO *n;
+ SSize_t avail;
+ if (PerlIO_flush(f) != 0)
+ return -1;
+ n = PerlIONext(f);
+ if (!PerlIO_fast_gets(n)) {
+ /* Things get too messy if we don't have a buffer layer
+ push a :perlio to do the job */
+ char mode[8];
+ n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
+ if (!n) {
+ Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
+ }
+ }
+ ENTER;
+ SAVETMPS;
+ retry:
+ avail = PerlIO_get_cnt(n);
+ if (avail <= 0) {
+ avail = PerlIO_fill(n);
+ if (avail == 0) {
+ avail = PerlIO_get_cnt(n);
+ }
+ else {
+ if (!PerlIO_error(n) && PerlIO_eof(n))
+ avail = 0;
+ }
+ }
+ if (avail > 0) {
+ STDCHAR *ptr = PerlIO_get_ptr(n);
+ SSize_t use = avail;
+ SV *uni;
+ char *s;
+ STRLEN len = 0;
+ e->base.ptr = e->base.end = (STDCHAR *) Nullch;
+ (void) PerlIOEncode_get_base(aTHX_ f);
+ if (!e->dataSV)
+ e->dataSV = newSV(0);
+ if (SvTYPE(e->dataSV) < SVt_PV) {
+ sv_upgrade(e->dataSV,SVt_PV);
+ }
+ if (SvCUR(e->dataSV)) {
+ /* something left over from last time - create a normal
+ SV with new data appended
+ */
+ if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
+ use = e->base.bufsiz - SvCUR(e->dataSV);
+ }
+ sv_catpvn(e->dataSV,(char*)ptr,use);
+ }
+ else {
+ /* Create a "dummy" SV to represent the available data from layer below */
+ if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
+ Safefree(SvPVX(e->dataSV));
+ }
+ if (use > e->base.bufsiz) {
+ use = e->base.bufsiz;
+ }
+ SvPVX(e->dataSV) = (char *) ptr;
+ SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
+ SvCUR_set(e->dataSV,use);
+ SvPOK_only(e->dataSV);
+ }
+ SvUTF8_off(e->dataSV);
+ PUSHMARK(sp);
+ XPUSHs(e->enc);
+ XPUSHs(e->dataSV);
+ XPUSHs(&PL_sv_yes);
+ PUTBACK;
+ if (perl_call_method("decode", G_SCALAR) != 1) {
+ Perl_die(aTHX_ "panic: decode did not return a value");
+ }
+ SPAGAIN;
+ uni = POPs;
+ PUTBACK;
+ /* Now get translated string (forced to UTF-8) and use as buffer */
+ if (SvPOK(uni)) {
+ s = SvPVutf8(uni, len);
+ if (len && !is_utf8_string((U8*)s,len)) {
+ Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
+ }
+ }
+ if (len > 0) {
+ /* Got _something */
+ /* if decode gave us back dataSV then data may vanish when
+ we do ptrcnt adjust - so take our copy now.
+ (The copy is a pain - need a put-it-here option for decode.)
+ */
+ sv_setpvn(e->bufsv,s,len);
+ e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
+ e->base.end = e->base.ptr + SvCUR(e->bufsv);
+ PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
+ SvUTF8_on(e->bufsv);
+
+ /* Adjust ptr/cnt not taking anything which
+ did not translate - not clear this is a win */
+ /* compute amount we took */
+ use -= SvCUR(e->dataSV);
+ PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
+ /* and as we did not take it it isn't pending */
+ SvCUR_set(e->dataSV,0);
+ } else {
+ /* Got nothing - assume partial character so we need some more */
+ /* Make sure e->dataSV is a normal SV before re-filling as
+ buffer alias will change under us
+ */
+ s = SvPV(e->dataSV,len);
+ sv_setpvn(e->dataSV,s,len);
+ PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
+ goto retry;
+ }
+ FREETMPS;
+ LEAVE;
+ return code;
+ }
+ else {
+ if (avail == 0)
+ PerlIOBase(f)->flags |= PERLIO_F_EOF;
+ else
+ PerlIOBase(f)->flags |= PERLIO_F_ERROR;
+ return -1;
}
- SvUTF8_on(e->bufsv);
- e->base.end = e->base.buf+len;
- e->base.ptr = e->base.buf;
- FREETMPS;
- LEAVE;
- }
- return code;
}
IV
-PerlIOEncode_flush(PerlIO *f)
+PerlIOEncode_flush(pTHX_ PerlIO * f)
{
- PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
- IV code = 0;
- dTHX;
- if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF)))
- {
- dSP;
- SV *str;
- char *s;
- STRLEN len;
- SSize_t left = 0;
- if (PerlIOBase(f)->flags & PERLIO_F_RDBUF)
- {
- /* This is really just a flag to see if we took all the data, if
- we did PerlIOBase_flush avoids a seek to lower layer.
- Need to revisit if we start getting clever with unreads or seeks-in-buffer
- */
- left = e->base.end - e->base.ptr;
- }
- ENTER;
- SAVETMPS;
- PUSHMARK(sp);
- XPUSHs(e->enc);
- SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
- SvUTF8_on(e->bufsv);
- XPUSHs(e->bufsv);
- XPUSHs(&PL_sv_yes);
- PUTBACK;
- if (perl_call_method("fromUnicode",G_SCALAR) != 1)
- code = -1;
- SPAGAIN;
- str = POPs;
- PUTBACK;
- s = SvPV(str,len);
- if (s != SvPVX(e->bufsv))
- {
- e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
- Move(s,e->base.buf,len,char);
- SvCUR_set(e->bufsv,len);
+ PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+ IV code = 0;
+ if (e->bufsv && (e->base.ptr > e->base.buf)) {
+ dSP;
+ SV *str;
+ char *s;
+ STRLEN len;
+ SSize_t count = 0;
+ if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
+ /* Write case encode the buffer and write() to layer below */
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(sp);
+ XPUSHs(e->enc);
+ SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
+ SvUTF8_on(e->bufsv);
+ XPUSHs(e->bufsv);
+ XPUSHs(&PL_sv_yes);
+ PUTBACK;
+ if (perl_call_method("encode", G_SCALAR) != 1) {
+ Perl_die(aTHX_ "panic: encode did not return a value");
+ }
+ SPAGAIN;
+ str = POPs;
+ PUTBACK;
+ s = SvPV(str, len);
+ count = PerlIO_write(PerlIONext(f),s,len);
+ if (count != len) {
+ code = -1;
+ }
+ FREETMPS;
+ LEAVE;
+ if (PerlIO_flush(PerlIONext(f)) != 0) {
+ code = -1;
+ }
+ if (SvCUR(e->bufsv)) {
+ /* Did not all translate */
+ e->base.ptr = e->base.buf+SvCUR(e->bufsv);
+ return code;
+ }
+ }
+ else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
+ /* read case */
+ /* if we have any untranslated stuff then unread that first */
+ if (e->dataSV && SvCUR(e->dataSV)) {
+ s = SvPV(e->dataSV, len);
+ count = PerlIO_unread(PerlIONext(f),s,len);
+ if (count != len) {
+ code = -1;
+ }
+ }
+ /* See if there is anything left in the buffer */
+ if (e->base.ptr < e->base.end) {
+ /* Bother - have unread data.
+ re-encode and unread() to layer below
+ */
+ ENTER;
+ SAVETMPS;
+ str = sv_newmortal();
+ sv_upgrade(str, SVt_PV);
+ SvPVX(str) = (char*)e->base.ptr;
+ SvLEN(str) = 0;
+ SvCUR_set(str, e->base.end - e->base.ptr);
+ SvPOK_only(str);
+ SvUTF8_on(str);
+ PUSHMARK(sp);
+ XPUSHs(e->enc);
+ XPUSHs(str);
+ XPUSHs(&PL_sv_yes);
+ PUTBACK;
+ if (perl_call_method("encode", G_SCALAR) != 1) {
+ Perl_die(aTHX_ "panic: encode did not return a value");
+ }
+ SPAGAIN;
+ str = POPs;
+ PUTBACK;
+ s = SvPV(str, len);
+ count = PerlIO_unread(PerlIONext(f),s,len);
+ if (count != len) {
+ code = -1;
+ }
+ FREETMPS;
+ LEAVE;
+ }
+ }
+ e->base.ptr = e->base.end = e->base.buf;
+ PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
}
- SvUTF8_off(e->bufsv);
- e->base.ptr = e->base.buf+len;
- /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */
- e->base.end = e->base.ptr + left;
- FREETMPS;
- LEAVE;
- if (PerlIOBuf_flush(f) != 0)
- code = -1;
- }
- return code;
+ return code;
}
IV
-PerlIOEncode_close(PerlIO *f)
+PerlIOEncode_close(pTHX_ PerlIO * f)
{
- PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
- IV code = PerlIOBase_close(f);
- dTHX;
- if (e->bufsv)
- {
- SvREFCNT_dec(e->bufsv);
- e->bufsv = Nullsv;
- }
- e->base.buf = NULL;
- e->base.ptr = NULL;
- e->base.end = NULL;
- PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF);
- return code;
+ PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+ IV code = PerlIOBase_close(aTHX_ f);
+ if (e->bufsv) {
+ if (e->base.buf && e->base.ptr > e->base.buf) {
+ Perl_croak(aTHX_ "Close with partial character");
+ }
+ SvREFCNT_dec(e->bufsv);
+ e->bufsv = Nullsv;
+ }
+ e->base.buf = NULL;
+ e->base.ptr = NULL;
+ e->base.end = NULL;
+ PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
+ return code;
}
Off_t
-PerlIOEncode_tell(PerlIO *f)
+PerlIOEncode_tell(pTHX_ PerlIO * f)
{
- PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf);
- /* Unfortunately the only way to get a postion is to back-translate,
- the UTF8-bytes we have buf..ptr and adjust accordingly.
- But we will try and save any unread data in case stream
- is un-seekable.
- */
- if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end)
- {
- Size_t count = b->end - b->ptr;
- PerlIO_push(f,&PerlIO_pending,"r",Nullch,0);
- /* Save what we have left to read */
- PerlIOSelf(f,PerlIOBuf)->bufsiz = count;
- PerlIO_unread(f,b->ptr,count);
- /* There isn't any unread data - we just saved it - so avoid the lower seek */
- b->end = b->ptr;
- /* Flush ourselves - now one layer down,
- this does the back translate and adjusts position
- */
- PerlIO_flush(PerlIONext(f));
- /* Set position of the saved data */
- PerlIOSelf(f,PerlIOBuf)->posn = b->posn;
- }
- else
- {
- PerlIO_flush(f);
- }
- return b->posn;
+ PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
+ /* Unfortunately the only way to get a postion is to (re-)translate,
+ the UTF8 we have in bufefr and then ask layer below
+ */
+ PerlIO_flush(f);
+ if (b->buf && b->ptr > b->buf) {
+ Perl_croak(aTHX_ "Cannot tell at partial character");
+ }
+ return PerlIO_tell(PerlIONext(f));
+}
+
+PerlIO *
+PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
+ CLONE_PARAMS * params, int flags)
+{
+ if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
+ PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
+ PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
+ if (oe->enc) {
+ fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
+ }
+ }
+ return f;
}
PerlIO_funcs PerlIO_encode = {
- "encoding",
- sizeof(PerlIOEncode),
- PERLIO_K_BUFFERED,
- PerlIOBase_fileno,
- PerlIOBuf_fdopen,
- PerlIOBuf_open,
- PerlIOBuf_reopen,
- PerlIOEncode_pushed,
- PerlIOEncode_popped,
- PerlIOBuf_read,
- PerlIOBuf_unread,
- PerlIOBuf_write,
- PerlIOBuf_seek,
- PerlIOEncode_tell,
- PerlIOEncode_close,
- PerlIOEncode_flush,
- PerlIOEncode_fill,
- PerlIOBase_eof,
- PerlIOBase_error,
- PerlIOBase_clearerr,
- PerlIOBuf_setlinebuf,
- PerlIOEncode_get_base,
- PerlIOBuf_bufsiz,
- PerlIOBuf_get_ptr,
- PerlIOBuf_get_cnt,
- PerlIOBuf_set_ptrcnt,
+ "encoding",
+ sizeof(PerlIOEncode),
+ PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
+ PerlIOEncode_pushed,
+ PerlIOEncode_popped,
+ PerlIOBuf_open,
+ PerlIOEncode_getarg,
+ PerlIOBase_fileno,
+ PerlIOEncode_dup,
+ PerlIOBuf_read,
+ PerlIOBuf_unread,
+ PerlIOBuf_write,
+ PerlIOBuf_seek,
+ PerlIOEncode_tell,
+ PerlIOEncode_close,
+ PerlIOEncode_flush,
+ PerlIOEncode_fill,
+ PerlIOBase_eof,
+ PerlIOBase_error,
+ PerlIOBase_clearerr,
+ PerlIOBase_setlinebuf,
+ PerlIOEncode_get_base,
+ PerlIOBuf_bufsiz,
+ PerlIOBuf_get_ptr,
+ PerlIOBuf_get_cnt,
+ PerlIOBuf_set_ptrcnt,
};
-#endif
+#endif /* encode layer */
void
-Encode_Define(pTHX_ encode_t *enc)
+Encode_XSEncoding(pTHX_ encode_t * enc)
{
- HV *hash = get_hv("Encode::encoding",GV_ADD|GV_ADDMULTI);
- HV *stash = gv_stashpv("Encode::XS", TRUE);
- SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
- hv_store(hash,enc->name,strlen(enc->name),sv,0);
+ dSP;
+ HV *stash = gv_stashpv("Encode::XS", TRUE);
+ SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash);
+ int i = 0;
+ PUSHMARK(sp);
+ XPUSHs(sv);
+ while (enc->name[i]) {
+ const char *name = enc->name[i++];
+ XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
+ }
+ PUTBACK;
+ call_pv("Encode::define_encoding", G_DISCARD);
+ SvREFCNT_dec(sv);
}
-void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {}
+void
+call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
+{
+ /* Exists for breakpointing */
+}
static SV *
-encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check)
+encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
+ int check)
{
- STRLEN slen;
- U8 *s = (U8 *) SvPV(src,slen);
- SV *dst = sv_2mortal(newSV(2*slen+1));
- if (slen)
- {
- U8 *d = (U8 *) SvGROW(dst, 2*slen+1);
- STRLEN dlen = SvLEN(dst);
- int code;
- while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check)))
- {
- SvCUR_set(dst,dlen);
- SvPOK_on(dst);
-
- if (code == ENCODE_FALLBACK)
- break;
-
- switch(code)
- {
- case ENCODE_NOSPACE:
- {
- STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN);
- if (need <= SvLEN(dst))
- need += UTF8_MAXLEN;
- d = (U8 *) SvGROW(dst, need);
- dlen = SvLEN(dst);
- slen = SvCUR(src);
- break;
- }
-
- case ENCODE_NOREP:
- if (dir == enc->f_utf8)
- {
- if (!check && ckWARN_d(WARN_UTF8))
- {
- STRLEN clen;
- UV ch = utf8_to_uv(s+slen,(SvCUR(src)-slen),&clen,0);
- Perl_warner(aTHX_ WARN_UTF8, "\"\\x{%x}\" does not map to %s", ch, enc->name);
- /* FIXME: Skip over the character, copy in replacement and continue
- * but that is messy so for now just fail.
- */
- return &PL_sv_undef;
- }
- else
- {
- return &PL_sv_undef;
- }
- }
- else
- {
- /* UTF-8 is supposed to be "Universal" so should not happen */
- Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
- enc->name, (SvCUR(src)-slen),s+slen);
- }
- break;
-
- case ENCODE_PARTIAL:
- if (!check && ckWARN_d(WARN_UTF8))
- {
- Perl_warner(aTHX_ WARN_UTF8, "Partial %s character",
- (dir == enc->f_utf8) ? "UTF-8" : enc->name);
- }
- return &PL_sv_undef;
-
- default:
- Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
- code, (dir == enc->f_utf8) ? "to" : "from",enc->name);
- return &PL_sv_undef;
- }
+ STRLEN slen;
+ U8 *s = (U8 *) SvPV(src, slen);
+ STRLEN tlen = slen;
+ STRLEN ddone = 0;
+ STRLEN sdone = 0;
+ SV *dst = sv_2mortal(newSV(slen+1));
+ if (slen) {
+ U8 *d = (U8 *) SvPVX(dst);
+ STRLEN dlen = SvLEN(dst)-1;
+ int code;
+ while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) {
+ SvCUR_set(dst, dlen+ddone);
+ SvPOK_only(dst);
+
+#if 0
+ Perl_warn(aTHX_ "code=%d @ s=%d/%d/%d d=%d/%d/%d",code,slen,sdone,tlen,dlen,ddone,SvLEN(dst)-1);
+#endif
+
+ if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL)
+ break;
+
+ switch (code) {
+ case ENCODE_NOSPACE:
+ {
+ STRLEN need ;
+ sdone += slen;
+ ddone += dlen;
+ if (sdone) {
+ need = (tlen*SvLEN(dst)+sdone-1)/sdone+UTF8_MAXLEN;
+ }
+ else {
+ need = SvLEN(dst) + UTF8_MAXLEN;
+ }
+
+ d = (U8 *) SvGROW(dst, need);
+ if (ddone >= SvLEN(dst)) {
+ Perl_croak(aTHX_ "Destination couldn't be grown.");
+ }
+ dlen = SvLEN(dst)-ddone-1;
+ d += ddone;
+ s += slen;
+ slen = tlen-sdone;
+ continue;
+ }
+
+ case ENCODE_NOREP:
+ if (dir == enc->f_utf8) {
+ if (!check && ckWARN_d(WARN_UTF8)) {
+ STRLEN clen;
+ UV ch =
+ utf8n_to_uvuni(s + slen, (SvCUR(src) - slen),
+ &clen, 0);
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),
+ "\"\\N{U+%" UVxf
+ "}\" does not map to %s", ch,
+ enc->name[0]);
+ /* FIXME: Skip over the character, copy in replacement and continue
+ * but that is messy so for now just fail.
+ */
+ return &PL_sv_undef;
+ }
+ else {
+ return &PL_sv_undef;
+ }
+ }
+ else {
+ /* UTF-8 is supposed to be "Universal" so should not happen
+ for real characters, but some encodings have non-assigned
+ codes which may occur.
+ */
+ Perl_croak(aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)",
+ enc->name[0], (U8) s[slen], code);
+ }
+ break;
+
+ default:
+ Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
+ code, (dir == enc->f_utf8) ? "to" : "from",
+ enc->name[0]);
+ return &PL_sv_undef;
+ }
+ }
+ SvCUR_set(dst, dlen+ddone);
+ SvPOK_only(dst);
+ if (check) {
+ sdone = SvCUR(src) - (slen+sdone);
+ if (sdone) {
+#if 1
+ /* FIXME: A Move() is dangerous - PV could be mmap'ed readonly
+ SvOOK would be ideal - but sv_backoff does not understand SvLEN == 0
+ type SVs and sv_clear() calls it ...
+ */
+ sv_setpvn(src, (char*)s+slen, sdone);
+#else
+ Move(s + slen, SvPVX(src), sdone , U8);
+#endif
+ }
+ SvCUR_set(src, sdone);
+ }
}
- SvCUR_set(dst,dlen);
- SvPOK_on(dst);
- if (check)
- {
- if (slen < SvCUR(src))
- {
- Move(s+slen,s,SvCUR(src)-slen,U8);
- }
- SvCUR_set(src,SvCUR(src)-slen);
+ else {
+ SvCUR_set(dst, 0);
+ SvPOK_only(dst);
}
- }
- return dst;
+ *SvEND(dst) = '\0';
+ return dst;
}
-MODULE = Encode PACKAGE = Encode PREFIX = sv_
+MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
+
+PROTOTYPES: ENABLE
void
-valid_utf8(sv)
-SV * sv
+Method_name(obj)
+SV * obj
CODE:
{
- STRLEN len;
- char *s = SvPV(sv,len);
- if (!SvUTF8(sv) || is_utf8_string(s,len))
- XSRETURN_YES;
- else
- XSRETURN_NO;
+ encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
+ ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0])));
+ XSRETURN(1);
}
void
-sv_utf8_encode(sv)
-SV * sv
-
-bool
-sv_utf8_decode(sv)
-SV * sv
-
-void
-sv_utf8_upgrade(sv)
-SV * sv
-
-bool
-sv_utf8_downgrade(sv,failok=0)
-SV * sv
-bool failok
-
-MODULE = Encode PACKAGE = Encode::XS PREFIX = Encode_
-
-PROTOTYPES: ENABLE
-
-void
-Encode_toUnicode(obj,src,check = 0)
+Method_decode(obj,src,check = FALSE)
SV * obj
SV * src
-int check
+bool check
CODE:
{
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
}
void
-Encode_fromUnicode(obj,src,check = 0)
+Method_encode(obj,src,check = FALSE)
SV * obj
SV * src
-int check
+bool check
CODE:
{
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
STRLEN len;
U8 *s = (U8*)SvPV(sv, len);
+ RETVAL = 0;
if (SvTRUE(check)) {
/* Must do things the slow way */
U8 *dest;
OUTPUT:
RETVAL
-SV *
-_chars_to_utf8(sv, from, ...)
- SV * sv
- SV * from
- CODE:
- {
- SV * check = items == 3 ? ST(2) : Nullsv;
- RETVAL = &PL_sv_undef;
- }
- OUTPUT:
- RETVAL
-
-SV *
-_utf8_to_chars(sv, to, ...)
- SV * sv
- SV * to
- CODE:
- {
- SV * check = items == 3 ? ST(2) : Nullsv;
- RETVAL = &PL_sv_undef;
- }
- OUTPUT:
- RETVAL
-
-SV *
-_utf8_to_chars_check(sv, ...)
- SV * sv
- CODE:
- {
- SV * check = items == 2 ? ST(1) : Nullsv;
- RETVAL = &PL_sv_undef;
- }
- OUTPUT:
- RETVAL
-
-SV *
-_bytes_to_chars(sv, from, ...)
- SV * sv
- SV * from
- CODE:
- {
- SV * check = items == 3 ? ST(2) : Nullsv;
- RETVAL = &PL_sv_undef;
- }
- OUTPUT:
- RETVAL
-
-SV *
-_chars_to_bytes(sv, to, ...)
- SV * sv
- SV * to
- CODE:
- {
- SV * check = items == 3 ? ST(2) : Nullsv;
- RETVAL = &PL_sv_undef;
- }
- OUTPUT:
- RETVAL
-
-SV *
-_from_to(sv, from, to, ...)
- SV * sv
- SV * from
- SV * to
- CODE:
- {
- SV * check = items == 4 ? ST(3) : Nullsv;
- RETVAL = &PL_sv_undef;
- }
- OUTPUT:
- RETVAL
-
bool
-_is_utf8(sv, ...)
- SV * sv
+is_utf8(sv, check = FALSE)
+SV * sv
+bool check
CODE:
{
- SV * check = items == 2 ? ST(1) : Nullsv;
+ if (SvGMAGICAL(sv)) /* it could be $1, for example */
+ sv = newSVsv(sv); /* GMAGIG will be done */
if (SvPOK(sv)) {
- RETVAL = SvUTF8(sv) ? 1 : 0;
+ RETVAL = SvUTF8(sv) ? TRUE : FALSE;
if (RETVAL &&
- SvTRUE(check) &&
+ check &&
!is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
RETVAL = FALSE;
} else {
RETVAL = FALSE;
}
+ if (sv != ST(0))
+ SvREFCNT_dec(sv); /* it was a temp copy */
}
OUTPUT:
RETVAL
SV *
-_on_utf8(sv)
+_utf8_on(sv)
SV * sv
CODE:
{
RETVAL
SV *
-_off_utf8(sv)
+_utf8_off(sv)
SV * sv
CODE:
{
OUTPUT:
RETVAL
-SV *
-_utf_to_utf(sv, from, to, ...)
- SV * sv
- SV * from
- SV * to
- CODE:
- {
- SV * check = items == 4 ? ST(3) : Nullsv;
- RETVAL = &PL_sv_undef;
- }
- OUTPUT:
- RETVAL
-
BOOT:
{
-#ifdef USE_PERLIO
- PerlIO_define_layer(&PerlIO_encode);
+#if defined(USE_PERLIO) && !defined(USE_SFIO)
+ PerlIO_define_layer(aTHX_ &PerlIO_encode);
#endif
-#include "iso8859.def"
-#include "EBCDIC.def"
-#include "Symbols.def"
+/* #include "8859_def.h" */
+/* #include "EBCDIC_def.h" */
+/* #include "Symbols_def.h" */
+#include "def_t_def.h"
}