X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FEncode%2FEncode.xs;h=44e5e2243a01035304f8dbe04f424926c58a75e1;hb=f98bc0c60b037c4df60741a875da49b98d04509c;hp=5d33303fff2a8ec3da337c607b137c0d17b40db5;hpb=ee518936bd3eee0065c20591f5182f733dadd4bd;p=p5sagit%2Fp5-mst-13.2.git diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index 5d33303..44e5e22 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,19 +1,21 @@ +#define PERL_NO_GET_CONTEXT + #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #define U8 U8 #include "encode.h" -#include "iso8859.h" +#include "8859.h" #include "EBCDIC.h" #include "Symbols.h" -#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \ + +#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ Perl_croak(aTHX_ "panic_unimplemented"); \ return (y)0; /* fool picky compilers */ \ } UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) -UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) - + UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) #if defined(USE_PERLIO) && !defined(USE_SFIO) /* Define an encoding "layer" in the perliol.h sense. The layer defined here "inherits" in an object-oriented sense from the @@ -37,398 +39,554 @@ UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) issue - particularly with all-perl encode engine.) */ - - #include "perliol.h" - -typedef struct -{ - PerlIOBuf base; /* PerlIOBuf stuff */ - SV * bufsv; - SV * enc; +typedef struct { + PerlIOBuf base; /* PerlIOBuf stuff */ + SV *bufsv; /* buffer seen by layers above */ + SV *dataSV; /* data we have read from layer below */ + SV *enc; /* the encoding object */ } PerlIOEncode; +SV * +PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags) +{ + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + SV *sv = &PL_sv_undef; + if (e->enc) { + dSP; + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(e->enc); + PUTBACK; + if (perl_call_method("name", G_SCALAR) == 1) { + SPAGAIN; + sv = newSVsv(POPs); + PUTBACK; + } + } + return sv; +} IV -PerlIOEncode_pushed(PerlIO *f, const char *mode,const char *arg,STRLEN len) +PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg) { - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - dTHX; - dSP; - IV code; - code = PerlIOBuf_pushed(f,mode,Nullch,0); - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(sv_2mortal(newSVpvn(arg,len))); - PUTBACK; - if (perl_call_pv("Encode::find_encoding",G_SCALAR) != 1) - { - /* should never happen */ - Perl_die(aTHX_ "Encode::find_encoding did not return a value"); - return -1; - } - SPAGAIN; - e->enc = POPs; - PUTBACK; - if (!SvROK(e->enc)) - { - e->enc = Nullsv; - errno = EINVAL; - Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%.*s\"", (int) len, arg); - return -1; - } - SvREFCNT_inc(e->enc); - FREETMPS; - LEAVE; - PerlIOBase(f)->flags |= PERLIO_F_UTF8; - return code; + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + dSP; + IV code; + code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv); + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(arg); + PUTBACK; + if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) { + /* should never happen */ + Perl_die(aTHX_ "Encode::find_encoding did not return a value"); + return -1; + } + SPAGAIN; + e->enc = POPs; + PUTBACK; + if (!SvROK(e->enc)) { + e->enc = Nullsv; + errno = EINVAL; + Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"", + arg); + code = -1; + } + else { + SvREFCNT_inc(e->enc); + PerlIOBase(f)->flags |= PERLIO_F_UTF8; + } + FREETMPS; + LEAVE; + return code; } IV -PerlIOEncode_popped(PerlIO *f) +PerlIOEncode_popped(pTHX_ PerlIO * f) { - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - dTHX; - if (e->enc) - { - SvREFCNT_dec(e->enc); - e->enc = Nullsv; - } - if (e->bufsv) - { - SvREFCNT_dec(e->bufsv); - e->bufsv = Nullsv; - } - return 0; + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + if (e->enc) { + SvREFCNT_dec(e->enc); + e->enc = Nullsv; + } + if (e->bufsv) { + SvREFCNT_dec(e->bufsv); + e->bufsv = Nullsv; + } + if (e->dataSV) { + SvREFCNT_dec(e->dataSV); + e->dataSV = Nullsv; + } + return 0; } STDCHAR * -PerlIOEncode_get_base(PerlIO *f) +PerlIOEncode_get_base(pTHX_ PerlIO * f) { - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - dTHX; - if (!e->base.bufsiz) - e->base.bufsiz = 1024; - if (!e->bufsv) - { - e->bufsv = newSV(e->base.bufsiz); - sv_setpvn(e->bufsv,"",0); - } - e->base.buf = (STDCHAR *)SvPVX(e->bufsv); - if (!e->base.ptr) - e->base.ptr = e->base.buf; - if (!e->base.end) - e->base.end = e->base.buf; - if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) - { - Perl_warn(aTHX_ " ptr %p(%p)%p", - e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); - abort(); - } - if (SvLEN(e->bufsv) < e->base.bufsiz) - { - SSize_t poff = e->base.ptr - e->base.buf; - SSize_t eoff = e->base.end - e->base.buf; - e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz); - e->base.ptr = e->base.buf + poff; - e->base.end = e->base.buf + eoff; - } - if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) - { - Perl_warn(aTHX_ " ptr %p(%p)%p", - e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); - abort(); - } - return e->base.buf; + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + if (!e->base.bufsiz) + e->base.bufsiz = 1024; + if (!e->bufsv) { + e->bufsv = newSV(e->base.bufsiz); + sv_setpvn(e->bufsv, "", 0); + } + e->base.buf = (STDCHAR *) SvPVX(e->bufsv); + if (!e->base.ptr) + e->base.ptr = e->base.buf; + if (!e->base.end) + e->base.end = e->base.buf; + if (e->base.ptr < e->base.buf + || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { + Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, + e->base.buf + SvLEN(e->bufsv)); + abort(); + } + if (SvLEN(e->bufsv) < e->base.bufsiz) { + SSize_t poff = e->base.ptr - e->base.buf; + SSize_t eoff = e->base.end - e->base.buf; + e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz); + e->base.ptr = e->base.buf + poff; + e->base.end = e->base.buf + eoff; + } + if (e->base.ptr < e->base.buf + || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { + Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, + e->base.buf + SvLEN(e->bufsv)); + abort(); + } + return e->base.buf; } IV -PerlIOEncode_fill(PerlIO *f) +PerlIOEncode_fill(pTHX_ PerlIO * f) { - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - dTHX; - dSP; - IV code; - code = PerlIOBuf_fill(f); - if (code == 0) - { - SV *uni; - STRLEN len; - char *s; - /* Set SV that is the buffer to be buf..ptr */ - SvCUR_set(e->bufsv, e->base.end - e->base.buf); - SvUTF8_off(e->bufsv); - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(e->enc); - XPUSHs(e->bufsv); - XPUSHs(&PL_sv_yes); - PUTBACK; - if (perl_call_method("decode",G_SCALAR) != 1) - code = -1; - SPAGAIN; - uni = POPs; - PUTBACK; - /* Now get translated string (forced to UTF-8) and copy back to buffer - don't use sv_setsv as that may "steal" PV from returned temp - and so free() our known-large-enough buffer. - sv_setpvn() should do but let us do it long hand. - */ - s = SvPVutf8(uni,len); - if (s != SvPVX(e->bufsv)) - { - e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); - Move(s,e->base.buf,len,char); - SvCUR_set(e->bufsv,len); + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + dSP; + IV code = 0; + PerlIO *n; + SSize_t avail; + if (PerlIO_flush(f) != 0) + return -1; + n = PerlIONext(f); + if (!PerlIO_fast_gets(n)) { + /* Things get too messy if we don't have a buffer layer + push a :perlio to do the job */ + char mode[8]; + n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv); + if (!n) { + Perl_die(aTHX_ "panic: cannot push :perlio for %p",f); + } + } + ENTER; + SAVETMPS; + retry: + avail = PerlIO_get_cnt(n); + if (avail <= 0) { + avail = PerlIO_fill(n); + if (avail == 0) { + avail = PerlIO_get_cnt(n); + } + else { + if (!PerlIO_error(n) && PerlIO_eof(n)) + avail = 0; + } + } + if (avail > 0) { + STDCHAR *ptr = PerlIO_get_ptr(n); + SSize_t use = avail; + SV *uni; + char *s; + STRLEN len = 0; + e->base.ptr = e->base.end = (STDCHAR *) Nullch; + (void) PerlIOEncode_get_base(aTHX_ f); + if (!e->dataSV) + e->dataSV = newSV(0); + if (SvTYPE(e->dataSV) < SVt_PV) { + sv_upgrade(e->dataSV,SVt_PV); + } + if (SvCUR(e->dataSV)) { + /* something left over from last time - create a normal + SV with new data appended + */ + if (use + SvCUR(e->dataSV) > e->base.bufsiz) { + use = e->base.bufsiz - SvCUR(e->dataSV); + } + sv_catpvn(e->dataSV,(char*)ptr,use); + } + else { + /* Create a "dummy" SV to represent the available data from layer below */ + if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) { + Safefree(SvPVX(e->dataSV)); + } + if (use > e->base.bufsiz) { + use = e->base.bufsiz; + } + SvPVX(e->dataSV) = (char *) ptr; + SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */ + SvCUR_set(e->dataSV,use); + SvPOK_only(e->dataSV); + } + SvUTF8_off(e->dataSV); + PUSHMARK(sp); + XPUSHs(e->enc); + XPUSHs(e->dataSV); + XPUSHs(&PL_sv_yes); + PUTBACK; + if (perl_call_method("decode", G_SCALAR) != 1) { + Perl_die(aTHX_ "panic: decode did not return a value"); + } + SPAGAIN; + uni = POPs; + PUTBACK; + /* Now get translated string (forced to UTF-8) and use as buffer */ + if (SvPOK(uni)) { + s = SvPVutf8(uni, len); + if (len && !is_utf8_string((U8*)s,len)) { + Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s); + } + } + if (len > 0) { + /* Got _something */ + /* if decode gave us back dataSV then data may vanish when + we do ptrcnt adjust - so take our copy now. + (The copy is a pain - need a put-it-here option for decode.) + */ + sv_setpvn(e->bufsv,s,len); + e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv); + e->base.end = e->base.ptr + SvCUR(e->bufsv); + PerlIOBase(f)->flags |= PERLIO_F_RDBUF; + SvUTF8_on(e->bufsv); + + /* Adjust ptr/cnt not taking anything which + did not translate - not clear this is a win */ + /* compute amount we took */ + use -= SvCUR(e->dataSV); + PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); + /* and as we did not take it it isn't pending */ + SvCUR_set(e->dataSV,0); + } else { + /* Got nothing - assume partial character so we need some more */ + /* Make sure e->dataSV is a normal SV before re-filling as + buffer alias will change under us + */ + s = SvPV(e->dataSV,len); + sv_setpvn(e->dataSV,s,len); + PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); + goto retry; + } + FREETMPS; + LEAVE; + return code; + } + else { + if (avail == 0) + PerlIOBase(f)->flags |= PERLIO_F_EOF; + else + PerlIOBase(f)->flags |= PERLIO_F_ERROR; + return -1; } - SvUTF8_on(e->bufsv); - e->base.end = e->base.buf+len; - e->base.ptr = e->base.buf; - FREETMPS; - LEAVE; - } - return code; } IV -PerlIOEncode_flush(PerlIO *f) +PerlIOEncode_flush(pTHX_ PerlIO * f) { - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - IV code = 0; - dTHX; - if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF))) - { - dSP; - SV *str; - char *s; - STRLEN len; - SSize_t left = 0; - if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) - { - /* This is really just a flag to see if we took all the data, if - we did PerlIOBase_flush avoids a seek to lower layer. - Need to revisit if we start getting clever with unreads or seeks-in-buffer - */ - left = e->base.end - e->base.ptr; - } - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(e->enc); - SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); - SvUTF8_on(e->bufsv); - XPUSHs(e->bufsv); - XPUSHs(&PL_sv_yes); - PUTBACK; - if (perl_call_method("encode",G_SCALAR) != 1) - code = -1; - SPAGAIN; - str = POPs; - PUTBACK; - s = SvPV(str,len); - if (s != SvPVX(e->bufsv)) - { - e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); - Move(s,e->base.buf,len,char); - SvCUR_set(e->bufsv,len); + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + IV code = 0; + if (e->bufsv && (e->base.ptr > e->base.buf)) { + dSP; + SV *str; + char *s; + STRLEN len; + SSize_t count = 0; + if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) { + /* Write case encode the buffer and write() to layer below */ + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(e->enc); + SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); + SvUTF8_on(e->bufsv); + XPUSHs(e->bufsv); + XPUSHs(&PL_sv_yes); + PUTBACK; + if (perl_call_method("encode", G_SCALAR) != 1) { + Perl_die(aTHX_ "panic: encode did not return a value"); + } + SPAGAIN; + str = POPs; + PUTBACK; + s = SvPV(str, len); + count = PerlIO_write(PerlIONext(f),s,len); + if (count != len) { + code = -1; + } + FREETMPS; + LEAVE; + if (PerlIO_flush(PerlIONext(f)) != 0) { + code = -1; + } + if (SvCUR(e->bufsv)) { + /* Did not all translate */ + e->base.ptr = e->base.buf+SvCUR(e->bufsv); + return code; + } + } + else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) { + /* read case */ + /* if we have any untranslated stuff then unread that first */ + if (e->dataSV && SvCUR(e->dataSV)) { + s = SvPV(e->dataSV, len); + count = PerlIO_unread(PerlIONext(f),s,len); + if (count != len) { + code = -1; + } + } + /* See if there is anything left in the buffer */ + if (e->base.ptr < e->base.end) { + /* Bother - have unread data. + re-encode and unread() to layer below + */ + ENTER; + SAVETMPS; + str = sv_newmortal(); + sv_upgrade(str, SVt_PV); + SvPVX(str) = (char*)e->base.ptr; + SvLEN(str) = 0; + SvCUR_set(str, e->base.end - e->base.ptr); + SvPOK_only(str); + SvUTF8_on(str); + PUSHMARK(sp); + XPUSHs(e->enc); + XPUSHs(str); + XPUSHs(&PL_sv_yes); + PUTBACK; + if (perl_call_method("encode", G_SCALAR) != 1) { + Perl_die(aTHX_ "panic: encode did not return a value"); + } + SPAGAIN; + str = POPs; + PUTBACK; + s = SvPV(str, len); + count = PerlIO_unread(PerlIONext(f),s,len); + if (count != len) { + code = -1; + } + FREETMPS; + LEAVE; + } + } + e->base.ptr = e->base.end = e->base.buf; + PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); } - SvUTF8_off(e->bufsv); - e->base.ptr = e->base.buf+len; - /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */ - e->base.end = e->base.ptr + left; - FREETMPS; - LEAVE; - if (PerlIOBuf_flush(f) != 0) - code = -1; - } - return code; + return code; } IV -PerlIOEncode_close(PerlIO *f) +PerlIOEncode_close(pTHX_ PerlIO * f) { - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - IV code = PerlIOBase_close(f); - dTHX; - if (e->bufsv) - { - SvREFCNT_dec(e->bufsv); - e->bufsv = Nullsv; - } - e->base.buf = NULL; - e->base.ptr = NULL; - e->base.end = NULL; - PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF); - return code; + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + IV code = PerlIOBase_close(aTHX_ f); + if (e->bufsv) { + if (e->base.buf && e->base.ptr > e->base.buf) { + Perl_croak(aTHX_ "Close with partial character"); + } + SvREFCNT_dec(e->bufsv); + e->bufsv = Nullsv; + } + e->base.buf = NULL; + e->base.ptr = NULL; + e->base.end = NULL; + PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); + return code; } Off_t -PerlIOEncode_tell(PerlIO *f) +PerlIOEncode_tell(pTHX_ PerlIO * f) { - PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf); - /* Unfortunately the only way to get a postion is to back-translate, - the UTF8-bytes we have buf..ptr and adjust accordingly. - But we will try and save any unread data in case stream - is un-seekable. - */ - if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end) - { - Size_t count = b->end - b->ptr; - PerlIO_push(f,&PerlIO_pending,"r",Nullch,0); - /* Save what we have left to read */ - PerlIOSelf(f,PerlIOBuf)->bufsiz = count; - PerlIO_unread(f,b->ptr,count); - /* There isn't any unread data - we just saved it - so avoid the lower seek */ - b->end = b->ptr; - /* Flush ourselves - now one layer down, - this does the back translate and adjusts position - */ - PerlIO_flush(PerlIONext(f)); - /* Set position of the saved data */ - PerlIOSelf(f,PerlIOBuf)->posn = b->posn; - } - else - { - PerlIO_flush(f); - } - return b->posn; + PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf); + /* Unfortunately the only way to get a postion is to (re-)translate, + the UTF8 we have in bufefr and then ask layer below + */ + PerlIO_flush(f); + if (b->buf && b->ptr > b->buf) { + Perl_croak(aTHX_ "Cannot tell at partial character"); + } + return PerlIO_tell(PerlIONext(f)); +} + +PerlIO * +PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o, + CLONE_PARAMS * params, int flags) +{ + if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) { + PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode); + PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode); + if (oe->enc) { + fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params); + } + } + return f; } PerlIO_funcs PerlIO_encode = { - "encoding", - sizeof(PerlIOEncode), - PERLIO_K_BUFFERED, - PerlIOBase_fileno, - PerlIOBuf_open, - PerlIOEncode_pushed, - PerlIOEncode_popped, - PerlIOBuf_read, - PerlIOBuf_unread, - PerlIOBuf_write, - PerlIOBuf_seek, - PerlIOEncode_tell, - PerlIOEncode_close, - PerlIOEncode_flush, - PerlIOEncode_fill, - PerlIOBase_eof, - PerlIOBase_error, - PerlIOBase_clearerr, - PerlIOBuf_setlinebuf, - PerlIOEncode_get_base, - PerlIOBuf_bufsiz, - PerlIOBuf_get_ptr, - PerlIOBuf_get_cnt, - PerlIOBuf_set_ptrcnt, + "encoding", + sizeof(PerlIOEncode), + PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT, + PerlIOEncode_pushed, + PerlIOEncode_popped, + PerlIOBuf_open, + PerlIOEncode_getarg, + PerlIOBase_fileno, + PerlIOEncode_dup, + PerlIOBuf_read, + PerlIOBuf_unread, + PerlIOBuf_write, + PerlIOBuf_seek, + PerlIOEncode_tell, + PerlIOEncode_close, + PerlIOEncode_flush, + PerlIOEncode_fill, + PerlIOBase_eof, + PerlIOBase_error, + PerlIOBase_clearerr, + PerlIOBase_setlinebuf, + PerlIOEncode_get_base, + PerlIOBuf_bufsiz, + PerlIOBuf_get_ptr, + PerlIOBuf_get_cnt, + PerlIOBuf_set_ptrcnt, }; -#endif /* encode layer */ +#endif /* encode layer */ void -Encode_Define(pTHX_ encode_t *enc) +Encode_XSEncoding(pTHX_ encode_t * enc) { - dSP; - HV *stash = gv_stashpv("Encode::XS", TRUE); - SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); - int i = 0; - PUSHMARK(sp); - XPUSHs(sv); - while (enc->name[i]) - { - const char *name = enc->name[i++]; - XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); - } - PUTBACK; - call_pv("Encode::define_encoding",G_DISCARD); - SvREFCNT_dec(sv); + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash); + int i = 0; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding", G_DISCARD); + SvREFCNT_dec(sv); } -void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {} +void +call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) +{ + /* Exists for breakpointing */ +} static SV * -encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check) +encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, + int check) { - STRLEN slen; - U8 *s = (U8 *) SvPV(src,slen); - SV *dst = sv_2mortal(newSV(2*slen+1)); - if (slen) - { - U8 *d = (U8 *) SvGROW(dst, 2*slen+1); - STRLEN dlen = SvLEN(dst); - int code; - while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check))) - { - SvCUR_set(dst,dlen); - SvPOK_on(dst); - - if (code == ENCODE_FALLBACK) - break; - - switch(code) - { - case ENCODE_NOSPACE: - { - STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN); - if (need <= SvLEN(dst)) - need += UTF8_MAXLEN; - d = (U8 *) SvGROW(dst, need); - dlen = SvLEN(dst); - slen = SvCUR(src); - break; - } - - case ENCODE_NOREP: - if (dir == enc->f_utf8) - { - if (!check && ckWARN_d(WARN_UTF8)) - { - STRLEN clen; - UV ch = utf8n_to_uvuni(s+slen,(SvCUR(src)-slen),&clen,0); - Perl_warner(aTHX_ WARN_UTF8, "\"\\N{U+%"UVxf"}\" does not map to %s", ch, enc->name[0]); - /* FIXME: Skip over the character, copy in replacement and continue - * but that is messy so for now just fail. - */ - return &PL_sv_undef; - } - else - { - return &PL_sv_undef; - } - } - else - { - /* UTF-8 is supposed to be "Universal" so should not happen */ - Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8", - enc->name[0], (int)(SvCUR(src)-slen),s+slen); - } - break; - - case ENCODE_PARTIAL: - if (!check && ckWARN_d(WARN_UTF8)) - { - Perl_warner(aTHX_ WARN_UTF8, "Partial %s character", - (dir == enc->f_utf8) ? "UTF-8" : enc->name[0]); - } - return &PL_sv_undef; - - default: - Perl_croak(aTHX_ "Unexpected code %d converting %s %s", - code, (dir == enc->f_utf8) ? "to" : "from",enc->name[0]); - return &PL_sv_undef; - } + STRLEN slen; + U8 *s = (U8 *) SvPV(src, slen); + STRLEN tlen = slen; + STRLEN ddone = 0; + STRLEN sdone = 0; + SV *dst = sv_2mortal(newSV(slen+1)); + if (slen) { + U8 *d = (U8 *) SvPVX(dst); + STRLEN dlen = SvLEN(dst)-1; + int code; + while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) { + SvCUR_set(dst, dlen+ddone); + SvPOK_only(dst); + +#if 0 + Perl_warn(aTHX_ "code=%d @ s=%d/%d/%d d=%d/%d/%d",code,slen,sdone,tlen,dlen,ddone,SvLEN(dst)-1); +#endif + + if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL) + break; + + switch (code) { + case ENCODE_NOSPACE: + { + STRLEN need ; + sdone += slen; + ddone += dlen; + if (sdone) { + need = (tlen*SvLEN(dst)+sdone-1)/sdone+UTF8_MAXLEN; + } + else { + need = SvLEN(dst) + UTF8_MAXLEN; + } + + d = (U8 *) SvGROW(dst, need); + if (ddone >= SvLEN(dst)) { + Perl_croak(aTHX_ "Destination couldn't be grown."); + } + dlen = SvLEN(dst)-ddone-1; + d += ddone; + s += slen; + slen = tlen-sdone; + continue; + } + + case ENCODE_NOREP: + if (dir == enc->f_utf8) { + if (!check && ckWARN_d(WARN_UTF8)) { + STRLEN clen; + UV ch = + utf8n_to_uvuni(s + slen, (SvCUR(src) - slen), + &clen, 0); + Perl_warner(aTHX_ packWARN(WARN_UTF8), + "\"\\N{U+%" UVxf + "}\" does not map to %s", ch, + enc->name[0]); + /* FIXME: Skip over the character, copy in replacement and continue + * but that is messy so for now just fail. + */ + return &PL_sv_undef; + } + else { + return &PL_sv_undef; + } + } + else { + /* UTF-8 is supposed to be "Universal" so should not happen + for real characters, but some encodings have non-assigned + codes which may occur. + */ + Perl_croak(aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)", + enc->name[0], (U8) s[slen], code); + } + break; + + default: + Perl_croak(aTHX_ "Unexpected code %d converting %s %s", + code, (dir == enc->f_utf8) ? "to" : "from", + enc->name[0]); + return &PL_sv_undef; + } + } + SvCUR_set(dst, dlen+ddone); + SvPOK_only(dst); + if (check) { + sdone = SvCUR(src) - (slen+sdone); + if (sdone) { + Move(s + slen, SvPVX(src), sdone , U8); + } + SvCUR_set(src, sdone); + } } - SvCUR_set(dst,dlen); - SvPOK_on(dst); - if (check) - { - if (slen < SvCUR(src)) - { - Move(s+slen,s,SvCUR(src)-slen,U8); - } - SvCUR_set(src,SvCUR(src)-slen); + else { + SvCUR_set(dst, 0); + SvPOK_only(dst); } - } - return dst; + *SvEND(dst) = '\0'; + return dst; } MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ @@ -436,10 +594,20 @@ MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ PROTOTYPES: ENABLE void -Method_decode(obj,src,check = 0) +Method_name(obj) +SV * obj +CODE: + { + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0]))); + XSRETURN(1); + } + +void +Method_decode(obj,src,check = FALSE) SV * obj SV * src -int check +bool check CODE: { encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); @@ -449,10 +617,10 @@ CODE: } void -Method_encode(obj,src,check = 0) +Method_encode(obj,src,check = FALSE) SV * obj SV * src -int check +bool check CODE: { encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); @@ -503,6 +671,7 @@ _utf8_to_bytes(sv, ...) STRLEN len; U8 *s = (U8*)SvPV(sv, len); + RETVAL = 0; if (SvTRUE(check)) { /* Must do things the slow way */ U8 *dest; @@ -559,6 +728,8 @@ SV * sv bool check CODE: { + if (SvGMAGICAL(sv)) /* it could be $1, for example */ + sv = newSVsv(sv); /* GMAGIG will be done */ if (SvPOK(sv)) { RETVAL = SvUTF8(sv) ? TRUE : FALSE; if (RETVAL && @@ -568,6 +739,8 @@ bool check } else { RETVAL = FALSE; } + if (sv != ST(0)) + SvREFCNT_dec(sv); /* it was a temp copy */ } OUTPUT: RETVAL @@ -607,9 +780,9 @@ _utf8_off(sv) BOOT: { #if defined(USE_PERLIO) && !defined(USE_SFIO) - PerlIO_define_layer(&PerlIO_encode); + PerlIO_define_layer(aTHX_ &PerlIO_encode); #endif -#include "iso8859.def" -#include "EBCDIC.def" -#include "Symbols.def" +#include "8859_def.h" +#include "EBCDIC_def.h" +#include "Symbols_def.h" }