X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FEncode%2FEncode.xs;h=36d5f3dac63d02ac8ebcb76536d4a97155b6e020;hb=21d92c23f49d139d8bddefbab6f984eb17e12d43;hp=299af4471f4ac659979bc958ef54fc00e68680f8;hpb=94b898283c35ddc92812a8e8be637f54c8c0989e;p=p5sagit%2Fp5-mst-13.2.git diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index 299af44..36d5f3d 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,516 +1,488 @@ -#define PERL_NO_GET_CONTEXT +/* + $Id: Encode.xs,v 1.55 2003/02/28 01:40:27 dankogai Exp $ + */ +#define PERL_NO_GET_CONTEXT #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #define U8 U8 #include "encode.h" -#include "8859.h" -#include "EBCDIC.h" -#include "Symbols.h" +# define PERLIO_MODNAME "PerlIO::encoding" +# define PERLIO_FILENAME "PerlIO/encoding.pm" + +/* set 1 or more to profile. t/encoding.t dumps core because of + Perl_warner and PerlIO don't work well */ +#define ENCODE_XS_PROFILE 0 + +/* set 0 to disable floating point to calculate buffer size for + encode_method(). 1 is recommended. 2 restores NI-S original */ +#define ENCODE_XS_USEFP 1 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ Perl_croak(aTHX_ "panic_unimplemented"); \ return (y)0; /* fool picky compilers */ \ } +/**/ + UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) -#if defined(USE_PERLIO) && !defined(USE_SFIO) -/* Define an encoding "layer" in the perliol.h sense. - The layer defined here "inherits" in an object-oriented sense from the - "perlio" layer with its PerlIOBuf_* "methods". - The implementation is particularly efficient as until Encode settles down - there is no point in tryint to tune it. - - The layer works by overloading the "fill" and "flush" methods. +void +Encode_XSEncoding(pTHX_ encode_t * enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash); + int i = 0; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding", G_DISCARD); + SvREFCNT_dec(sv); +} - "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API - to convert the encoded data to UTF-8 form, then copies it back to the - buffer. The "base class's" read methods then see the UTF-8 data. +void +call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) +{ + /* Exists for breakpointing */ +} - "flush" transforms the UTF-8 data deposited by the "base class's write - method in the buffer back into the encoded form using the encode OO perl API, - then copies data back into the buffer and calls "SUPER::flush. - Note that "flush" is _also_ called for read mode - we still do the (back)-translate - so that the the base class's "flush" sees the correct number of encoded chars - for positioning the seek pointer. (This double translation is the worst performance - issue - particularly with all-perl encode engine.) +#define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" +#define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" -*/ +static SV * +encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, + int check, STRLEN * offset, SV * term, int * retcode) +{ + STRLEN slen; + U8 *s = (U8 *) SvPV(src, slen); + STRLEN tlen = slen; + STRLEN ddone = 0; + STRLEN sdone = 0; + + /* We allocate slen+1. + PerlIO dumps core if this value is smaller than this. */ + SV *dst = sv_2mortal(newSV(slen+1)); + U8 *d = (U8 *)SvPVX(dst); + STRLEN dlen = SvLEN(dst)-1; + int code = 0; + STRLEN trmlen = 0; + U8 *trm = term ? (U8*) SvPV(term, trmlen) : NULL; + + if (offset) { + s += *offset; + if (slen > *offset){ /* safeguard against slen overflow */ + slen -= *offset; + }else{ + slen = 0; + } + tlen = slen; + } + if (slen == 0){ + SvCUR_set(dst, 0); + SvPOK_only(dst); + goto ENCODE_END; + } -#include "perliol.h" + while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check, + trm, trmlen)) ) + { + SvCUR_set(dst, dlen+ddone); + SvPOK_only(dst); + + if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL || + code == ENCODE_FOUND_TERM) { + break; + } + switch (code) { + case ENCODE_NOSPACE: + { + STRLEN more = 0; /* make sure you initialize! */ + STRLEN sleft; + sdone += slen; + ddone += dlen; + sleft = tlen - sdone; +#if ENCODE_XS_PROFILE >= 2 + Perl_warn(aTHX_ + "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n", + more, sdone, sleft, SvLEN(dst)); +#endif + if (sdone != 0) { /* has src ever been processed ? */ +#if ENCODE_XS_USEFP == 2 + more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone + - SvLEN(dst); +#elif ENCODE_XS_USEFP + more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft); +#else + /* safe until SvLEN(dst) == MAX_INT/16 */ + more = (16*SvLEN(dst)+1)/sdone/16 * sleft; +#endif + } + more += UTF8_MAXLEN; /* insurance policy */ + d = (U8 *) SvGROW(dst, SvLEN(dst) + more); + /* dst need to grow need MORE bytes! */ + if (ddone >= SvLEN(dst)) { + Perl_croak(aTHX_ "Destination couldn't be grown."); + } + dlen = SvLEN(dst)-ddone-1; + d += ddone; + s += slen; + slen = tlen-sdone; + continue; + } + case ENCODE_NOREP: + /* encoding */ + if (dir == enc->f_utf8) { + STRLEN clen; + UV ch = + utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), + &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); + if (check & ENCODE_DIE_ON_ERR) { + Perl_croak(aTHX_ ERR_ENCODE_NOMAP, + (UV)ch, enc->name[0]); + return &PL_sv_undef; /* never reaches but be safe */ + } + if (check & ENCODE_WARN_ON_ERR){ + Perl_warner(aTHX_ packWARN(WARN_UTF8), + ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]); + } + if (check & ENCODE_RETURN_ON_ERR){ + goto ENCODE_SET_SRC; + } + if (check & ENCODE_PERLQQ){ + SV* perlqq = + sv_2mortal(newSVpvf("\\x{%04"UVxf"}", (UV)ch)); + sdone += slen + clen; + ddone += dlen + SvCUR(perlqq); + sv_catsv(dst, perlqq); + }else if (check & ENCODE_HTMLCREF){ + SV* htmlcref = + sv_2mortal(newSVpvf("&#%" UVuf ";", (UV)ch)); + sdone += slen + clen; + ddone += dlen + SvCUR(htmlcref); + sv_catsv(dst, htmlcref); + }else if (check & ENCODE_XMLCREF){ + SV* xmlcref = + sv_2mortal(newSVpvf("&#x%" UVxf ";", (UV)ch)); + sdone += slen + clen; + ddone += dlen + SvCUR(xmlcref); + sv_catsv(dst, xmlcref); + } else { + /* fallback char */ + sdone += slen + clen; + ddone += dlen + enc->replen; + sv_catpvn(dst, (char*)enc->rep, enc->replen); + } + } + /* decoding */ + else { + if (check & ENCODE_DIE_ON_ERR){ + Perl_croak(aTHX_ ERR_DECODE_NOMAP, + enc->name[0], (UV)s[slen]); + return &PL_sv_undef; /* never reaches but be safe */ + } + if (check & ENCODE_WARN_ON_ERR){ + Perl_warner( + aTHX_ packWARN(WARN_UTF8), + ERR_DECODE_NOMAP, + enc->name[0], (UV)s[slen]); + } + if (check & ENCODE_RETURN_ON_ERR){ + goto ENCODE_SET_SRC; + } + if (check & + (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ + SV* perlqq = + sv_2mortal(newSVpvf("\\x%02" UVXf, (UV)s[slen])); + sdone += slen + 1; + ddone += dlen + SvCUR(perlqq); + sv_catsv(dst, perlqq); + } else { + sdone += slen + 1; + ddone += dlen + strlen(FBCHAR_UTF8); + sv_catpv(dst, FBCHAR_UTF8); + } + } + /* settle variables when fallback */ + d = (U8 *)SvEND(dst); + dlen = SvLEN(dst) - ddone - 1; + s = (U8*)SvPVX(src) + sdone; + slen = tlen - sdone; + break; + + default: + Perl_croak(aTHX_ "Unexpected code %d converting %s %s", + code, (dir == enc->f_utf8) ? "to" : "from", + enc->name[0]); + return &PL_sv_undef; + } + } + ENCODE_SET_SRC: + if (check && !(check & ENCODE_LEAVE_SRC)){ + sdone = SvCUR(src) - (slen+sdone); + if (sdone) { + sv_setpvn(src, (char*)s+slen, sdone); + } + SvCUR_set(src, sdone); + } + /* warn("check = 0x%X, code = 0x%d\n", check, code); */ -typedef struct -{ - PerlIOBuf base; /* PerlIOBuf stuff */ - SV * bufsv; - SV * enc; -} PerlIOEncode; + SvCUR_set(dst, dlen+ddone); + SvPOK_only(dst); -SV * -PerlIOEncode_getarg(PerlIO *f) -{ - dTHX; - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - SV *sv = &PL_sv_undef; - if (e->enc) - { - dSP; - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(e->enc); - PUTBACK; - if (perl_call_method("name",G_SCALAR) == 1) - { - SPAGAIN; - sv = newSVsv(POPs); - PUTBACK; +#if ENCODE_XS_PROFILE + if (SvCUR(dst) > SvCUR(src)){ + Perl_warn(aTHX_ + "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n", + SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst), + (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0); } - } - return sv; -} +#endif -IV -PerlIOEncode_pushed(PerlIO *f, const char *mode, SV *arg) -{ - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - dTHX; - dSP; - IV code; - code = PerlIOBuf_pushed(f,mode,Nullsv); - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(arg); - PUTBACK; - if (perl_call_pv("Encode::find_encoding",G_SCALAR) != 1) - { - /* should never happen */ - Perl_die(aTHX_ "Encode::find_encoding did not return a value"); - return -1; - } - SPAGAIN; - e->enc = POPs; - PUTBACK; - if (!SvROK(e->enc)) - { - e->enc = Nullsv; - errno = EINVAL; - Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%"SVf"\"", arg); - return -1; - } - SvREFCNT_inc(e->enc); - FREETMPS; - LEAVE; - PerlIOBase(f)->flags |= PERLIO_F_UTF8; - return code; -} + if (offset) + *offset += sdone + slen; -IV -PerlIOEncode_popped(PerlIO *f) -{ - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - dTHX; - if (e->enc) - { - SvREFCNT_dec(e->enc); - e->enc = Nullsv; - } - if (e->bufsv) - { - SvREFCNT_dec(e->bufsv); - e->bufsv = Nullsv; - } - return 0; + ENCODE_END: + *SvEND(dst) = '\0'; + if (retcode) *retcode = code; + return dst; } -STDCHAR * -PerlIOEncode_get_base(PerlIO *f) -{ - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - dTHX; - if (!e->base.bufsiz) - e->base.bufsiz = 1024; - if (!e->bufsv) - { - e->bufsv = newSV(e->base.bufsiz); - sv_setpvn(e->bufsv,"",0); - } - e->base.buf = (STDCHAR *)SvPVX(e->bufsv); - if (!e->base.ptr) - e->base.ptr = e->base.buf; - if (!e->base.end) - e->base.end = e->base.buf; - if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) - { - Perl_warn(aTHX_ " ptr %p(%p)%p", - e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); - abort(); - } - if (SvLEN(e->bufsv) < e->base.bufsiz) - { - SSize_t poff = e->base.ptr - e->base.buf; - SSize_t eoff = e->base.end - e->base.buf; - e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz); - e->base.ptr = e->base.buf + poff; - e->base.end = e->base.buf + eoff; - } - if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) - { - Perl_warn(aTHX_ " ptr %p(%p)%p", - e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); - abort(); - } - return e->base.buf; -} +MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ -IV -PerlIOEncode_fill(PerlIO *f) +void +Method_decode_xs(obj,src,check = 0) +SV * obj +SV * src +int check +CODE: { - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - dTHX; - dSP; - IV code; - code = PerlIOBuf_fill(f); - if (code == 0) - { - SV *uni; - STRLEN len; - char *s; - /* Set SV that is the buffer to be buf..ptr */ - SvCUR_set(e->bufsv, e->base.end - e->base.buf); - SvUTF8_off(e->bufsv); - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(e->enc); - XPUSHs(e->bufsv); - XPUSHs(&PL_sv_yes); - PUTBACK; - if (perl_call_method("decode",G_SCALAR) != 1) - code = -1; - SPAGAIN; - uni = POPs; - PUTBACK; - /* Now get translated string (forced to UTF-8) and copy back to buffer - don't use sv_setsv as that may "steal" PV from returned temp - and so free() our known-large-enough buffer. - sv_setpvn() should do but let us do it long hand. - */ - s = SvPVutf8(uni,len); - if (s != SvPVX(e->bufsv)) - { - e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); - Move(s,e->base.buf,len,char); - SvCUR_set(e->bufsv,len); + STRLEN slen; + U8 *s = (U8 *) SvPV(src, slen); + U8 *e = (U8 *) SvEND(src); + SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ + SvPOK_only(dst); + SvCUR_set(dst,0); + if (SvUTF8(src)) { + s = utf8_to_bytes(s,&slen); + if (s) { + SvCUR_set(src,slen); + SvUTF8_off(src); + e = s+slen; + } + else { + croak("Cannot decode string with wide characters"); + } } - SvUTF8_on(e->bufsv); - e->base.end = e->base.buf+len; - e->base.ptr = e->base.buf; - FREETMPS; - LEAVE; - } - return code; + while (s < e) { + if (UTF8_IS_INVARIANT(*s) || UTF8_IS_START(*s)) { + U8 skip = UTF8SKIP(s); + if ((s + skip) > e) { + /* Partial character - done */ + break; + } + else if (is_utf8_char(s)) { + /* Whole char is good */ + sv_catpvn(dst,(char *)s,skip); + s += skip; + continue; + } + else { + /* starts ok but isn't "good" */ + } + } + else { + /* Invalid start byte */ + } + /* If we get here there is something wrong with alleged UTF-8 */ + if (check & ENCODE_DIE_ON_ERR){ + Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", (UV)*s); + XSRETURN(0); + } + if (check & ENCODE_WARN_ON_ERR){ + Perl_warner(aTHX_ packWARN(WARN_UTF8), + ERR_DECODE_NOMAP, "utf8", (UV)*s); + } + if (check & ENCODE_RETURN_ON_ERR) { + break; + } + if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ + SV* perlqq = newSVpvf("\\x%02" UVXf, (UV)*s); + sv_catsv(dst, perlqq); + SvREFCNT_dec(perlqq); + } else { + sv_catpv(dst, FBCHAR_UTF8); + } + s++; + } + *SvEND(dst) = '\0'; + + /* Clear out translated part of source unless asked not to */ + if (check && !(check & ENCODE_LEAVE_SRC)){ + slen = e-s; + if (slen) { + sv_setpvn(src, (char*)s, slen); + } + SvCUR_set(src, slen); + } + SvUTF8_on(dst); + ST(0) = sv_2mortal(dst); + XSRETURN(1); } -IV -PerlIOEncode_flush(PerlIO *f) +void +Method_encode_xs(obj,src,check = 0) +SV * obj +SV * src +int check +CODE: { - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - IV code = 0; - if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF)) - &&(e->base.ptr > e->base.buf) - ) - { - dTHX; - dSP; - SV *str; - char *s; - STRLEN len; - SSize_t left = 0; - if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) - { - /* This is really just a flag to see if we took all the data, if - we did PerlIOBase_flush avoids a seek to lower layer. - Need to revisit if we start getting clever with unreads or seeks-in-buffer - */ - left = e->base.end - e->base.ptr; + STRLEN slen; + U8 *s = (U8 *) SvPV(src, slen); + U8 *e = (U8 *) SvEND(src); + SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ + if (SvUTF8(src)) { + /* Already encoded - trust it and just copy the octets */ + sv_setpvn(dst,(char *)s,(e-s)); + s = e; } - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(e->enc); - SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); - SvUTF8_on(e->bufsv); - XPUSHs(e->bufsv); - XPUSHs(&PL_sv_yes); - PUTBACK; - if (perl_call_method("encode",G_SCALAR) != 1) - code = -1; - SPAGAIN; - str = POPs; - PUTBACK; - s = SvPV(str,len); - if (s != SvPVX(e->bufsv)) - { - e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); - Move(s,e->base.buf,len,char); - SvCUR_set(e->bufsv,len); + else { + /* Native bytes - can always encode */ + U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ + while (s < e) { + UV uv = NATIVE_TO_UNI((UV) *s++); + if (UNI_IS_INVARIANT(uv)) + *d++ = (U8)UTF_TO_NATIVE(uv); + else { + *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); + *d++ = (U8)UTF8_EIGHT_BIT_LO(uv); + } + } + SvCUR_set(dst, d- (U8 *)SvPVX(dst)); + *SvEND(dst) = '\0'; } - SvUTF8_off(e->bufsv); - e->base.ptr = e->base.buf+len; - /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */ - e->base.end = e->base.ptr + left; - FREETMPS; - LEAVE; - if (PerlIOBuf_flush(f) != 0) - code = -1; - } - return code; -} -IV -PerlIOEncode_close(PerlIO *f) -{ - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - IV code = PerlIOBase_close(f); - dTHX; - if (e->bufsv) - { - SvREFCNT_dec(e->bufsv); - e->bufsv = Nullsv; - } - e->base.buf = NULL; - e->base.ptr = NULL; - e->base.end = NULL; - PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF); - return code; + /* Clear out translated part of source unless asked not to */ + if (check && !(check & ENCODE_LEAVE_SRC)){ + slen = e-s; + if (slen) { + sv_setpvn(src, (char*)s, slen); + } + SvCUR_set(src, slen); + } + SvPOK_only(dst); + SvUTF8_off(dst); + ST(0) = sv_2mortal(dst); + XSRETURN(1); } -Off_t -PerlIOEncode_tell(PerlIO *f) +MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ + +PROTOTYPES: ENABLE + +void +Method_name(obj) +SV * obj +CODE: { - dTHX; - PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf); - /* Unfortunately the only way to get a postion is to back-translate, - the UTF8-bytes we have buf..ptr and adjust accordingly. - But we will try and save any unread data in case stream - is un-seekable. - */ - if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end) - { - Size_t count = b->end - b->ptr; - PerlIO_push(aTHX_ f,&PerlIO_pending,"r",Nullsv); - /* Save what we have left to read */ - PerlIOSelf(f,PerlIOBuf)->bufsiz = count; - PerlIO_unread(f,b->ptr,count); - /* There isn't any unread data - we just saved it - so avoid the lower seek */ - b->end = b->ptr; - /* Flush ourselves - now one layer down, - this does the back translate and adjusts position - */ - PerlIO_flush(PerlIONext(f)); - /* Set position of the saved data */ - PerlIOSelf(f,PerlIOBuf)->posn = b->posn; - } - else - { - PerlIO_flush(f); - } - return b->posn; + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0]))); + XSRETURN(1); } -PerlIO * -PerlIOEncode_dup(pTHX_ PerlIO *f, PerlIO *o, CLONE_PARAMS *params) +void +Method_cat_decode(obj, dst, src, off, term, check = 0) +SV * obj +SV * dst +SV * src +SV * off +SV * term +int check +CODE: { - if ((f = PerlIOBase_dup(aTHX_ f, o, params))) - { - PerlIOEncode *fe = PerlIOSelf(f,PerlIOEncode); - PerlIOEncode *oe = PerlIOSelf(o,PerlIOEncode); - if (oe->enc) - { - fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params); + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + STRLEN offset = (STRLEN)SvIV(off); + int code = 0; + if (SvUTF8(src)) { + sv_utf8_downgrade(src, FALSE); + } + sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check, + &offset, term, &code)); + SvIVX(off) = (IV)offset; + if (code == ENCODE_FOUND_TERM) { + ST(0) = &PL_sv_yes; + }else{ + ST(0) = &PL_sv_no; } - } - return f; + XSRETURN(1); } -PerlIO_funcs PerlIO_encode = { - "encoding", - sizeof(PerlIOEncode), - PERLIO_K_BUFFERED, - PerlIOEncode_pushed, - PerlIOEncode_popped, - PerlIOBuf_open, - PerlIOEncode_getarg, - PerlIOBase_fileno, - PerlIOEncode_dup, - PerlIOBuf_read, - PerlIOBuf_unread, - PerlIOBuf_write, - PerlIOBuf_seek, - PerlIOEncode_tell, - PerlIOEncode_close, - PerlIOEncode_flush, - PerlIOEncode_fill, - PerlIOBase_eof, - PerlIOBase_error, - PerlIOBase_clearerr, - PerlIOBase_setlinebuf, - PerlIOEncode_get_base, - PerlIOBuf_bufsiz, - PerlIOBuf_get_ptr, - PerlIOBuf_get_cnt, - PerlIOBuf_set_ptrcnt, -}; -#endif /* encode layer */ - void -Encode_Define(pTHX_ encode_t *enc) +Method_decode(obj,src,check = 0) +SV * obj +SV * src +int check +CODE: { - dSP; - HV *stash = gv_stashpv("Encode::XS", TRUE); - SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); - int i = 0; - PUSHMARK(sp); - XPUSHs(sv); - while (enc->name[i]) - { - const char *name = enc->name[i++]; - XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); - } - PUTBACK; - call_pv("Encode::define_encoding",G_DISCARD); - SvREFCNT_dec(sv); + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + if (SvUTF8(src)) { + sv_utf8_downgrade(src, FALSE); + } + ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check, + NULL, Nullsv, NULL); + SvUTF8_on(ST(0)); + XSRETURN(1); } -void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {} - -static SV * -encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check) +void +Method_encode(obj,src,check = 0) +SV * obj +SV * src +int check +CODE: { - STRLEN slen; - U8 *s = (U8 *) SvPV(src,slen); - SV *dst = sv_2mortal(newSV(2*slen+1)); - if (slen) - { - U8 *d = (U8 *) SvGROW(dst, 2*slen+1); - STRLEN dlen = SvLEN(dst); - int code; - while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check))) - { - SvCUR_set(dst,dlen); - SvPOK_on(dst); - - if (code == ENCODE_FALLBACK) - break; - - switch(code) - { - case ENCODE_NOSPACE: - { - STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN); - if (need <= SvLEN(dst)) - need += UTF8_MAXLEN; - d = (U8 *) SvGROW(dst, need); - dlen = SvLEN(dst); - slen = SvCUR(src); - break; - } - - case ENCODE_NOREP: - if (dir == enc->f_utf8) - { - if (!check && ckWARN_d(WARN_UTF8)) - { - STRLEN clen; - UV ch = utf8n_to_uvuni(s+slen,(SvCUR(src)-slen),&clen,0); - Perl_warner(aTHX_ WARN_UTF8, "\"\\N{U+%"UVxf"}\" does not map to %s", ch, enc->name[0]); - /* FIXME: Skip over the character, copy in replacement and continue - * but that is messy so for now just fail. - */ - return &PL_sv_undef; - } - else - { - return &PL_sv_undef; - } - } - else - { - /* UTF-8 is supposed to be "Universal" so should not happen */ - Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8", - enc->name[0], (int)(SvCUR(src)-slen),s+slen); - } - break; - - case ENCODE_PARTIAL: - if (!check && ckWARN_d(WARN_UTF8)) - { - Perl_warner(aTHX_ WARN_UTF8, "Partial %s character", - (dir == enc->f_utf8) ? "UTF-8" : enc->name[0]); - } - return &PL_sv_undef; - - default: - Perl_croak(aTHX_ "Unexpected code %d converting %s %s", - code, (dir == enc->f_utf8) ? "to" : "from",enc->name[0]); - return &PL_sv_undef; - } - } - SvCUR_set(dst,dlen); - SvPOK_on(dst); - if (check) - { - if (slen < SvCUR(src)) - { - Move(s+slen,s,SvCUR(src)-slen,U8); - } - SvCUR_set(src,SvCUR(src)-slen); - } - } - else - { - SvCUR_set(dst,slen); - SvPOK_on(dst); - } - return dst; + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + sv_utf8_upgrade(src); + ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check, + NULL, Nullsv, NULL); + XSRETURN(1); } -MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ - -PROTOTYPES: ENABLE - void -Method_decode(obj,src,check = FALSE) +Method_needs_lines(obj) SV * obj -SV * src -bool check CODE: - { - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); - ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check); - SvUTF8_on(ST(0)); - XSRETURN(1); - } +{ + /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ + ST(0) = &PL_sv_no; + XSRETURN(1); +} void -Method_encode(obj,src,check = FALSE) +Method_perlio_ok(obj) SV * obj -SV * src -bool check CODE: - { - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); - sv_utf8_upgrade(src); - ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check); - XSRETURN(1); - } +{ + /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ + /* require_pv(PERLIO_FILENAME); */ + + eval_pv("require PerlIO::encoding", 0); + + if (SvTRUE(get_sv("@", 0))) { + ST(0) = &PL_sv_no; + }else{ + ST(0) = &PL_sv_yes; + } + XSRETURN(1); +} MODULE = Encode PACKAGE = Encode @@ -518,150 +490,249 @@ PROTOTYPES: ENABLE I32 _bytes_to_utf8(sv, ...) - SV * sv - CODE: - { - SV * encoding = items == 2 ? ST(1) : Nullsv; - - if (encoding) - RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); - else { - STRLEN len; - U8* s = (U8*)SvPV(sv, len); - U8* converted; - - converted = bytes_to_utf8(s, &len); /* This allocs */ - sv_setpvn(sv, (char *)converted, len); - SvUTF8_on(sv); /* XXX Should we? */ - Safefree(converted); /* ... so free it */ - RETVAL = len; - } - } - OUTPUT: - RETVAL +SV * sv +CODE: +{ + SV * encoding = items == 2 ? ST(1) : Nullsv; + + if (encoding) + RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); + else { + STRLEN len; + U8* s = (U8*)SvPV(sv, len); + U8* converted; + + converted = bytes_to_utf8(s, &len); /* This allocs */ + sv_setpvn(sv, (char *)converted, len); + SvUTF8_on(sv); /* XXX Should we? */ + Safefree(converted); /* ... so free it */ + RETVAL = len; + } +} +OUTPUT: + RETVAL I32 _utf8_to_bytes(sv, ...) - SV * sv - CODE: - { - SV * to = items > 1 ? ST(1) : Nullsv; - SV * check = items > 2 ? ST(2) : Nullsv; - - if (to) - RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); - else { - STRLEN len; - U8 *s = (U8*)SvPV(sv, len); - - RETVAL = 0; - if (SvTRUE(check)) { - /* Must do things the slow way */ - U8 *dest; - U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */ - U8 *send = s + len; - - New(83, dest, len, U8); /* I think */ - - while (s < send) { - if (*s < 0x80) - *dest++ = *s++; - else { - STRLEN ulen; - UV uv = *s++; - - /* Have to do it all ourselves because of error routine, - aargh. */ - if (!(uv & 0x40)) - goto failure; - if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } - else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } - else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } - else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } - else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } - else if (!(uv & 0x01)) { ulen = 7; uv = 0; } - else { ulen = 13; uv = 0; } - - /* Note change to utf8.c variable naming, for variety */ - while (ulen--) { - if ((*s & 0xc0) != 0x80) - goto failure; +SV * sv +CODE: +{ + SV * to = items > 1 ? ST(1) : Nullsv; + SV * check = items > 2 ? ST(2) : Nullsv; + + if (to) { + RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); + } else { + STRLEN len; + U8 *s = (U8*)SvPV(sv, len); + + RETVAL = 0; + if (SvTRUE(check)) { + /* Must do things the slow way */ + U8 *dest; + /* We need a copy to pass to check() */ + U8 *src = (U8*)savepv((char *)s); + U8 *send = s + len; + + New(83, dest, len, U8); /* I think */ + + while (s < send) { + if (*s < 0x80){ + *dest++ = *s++; + } else { + STRLEN ulen; + UV uv = *s++; + + /* Have to do it all ourselves because of error routine, + aargh. */ + if (!(uv & 0x40)){ goto failure; } + if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } + else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } + else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } + else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } + else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } + else if (!(uv & 0x01)) { ulen = 7; uv = 0; } + else { ulen = 13; uv = 0; } - else - uv = (uv << 6) | (*s++ & 0x3f); + /* Note change to utf8.c variable naming, for variety */ + while (ulen--) { + if ((*s & 0xc0) != 0x80){ + goto failure; + } else { + uv = (uv << 6) | (*s++ & 0x3f); + } } if (uv > 256) { failure: - call_failure(check, s, dest, src); - /* Now what happens? */ + call_failure(check, s, dest, src); + /* Now what happens? */ } *dest++ = (U8)uv; - } - } - } else - RETVAL = (utf8_to_bytes(s, &len) ? len : 0); - } + } + } + } else { + RETVAL = (utf8_to_bytes(s, &len) ? len : 0); } - OUTPUT: - RETVAL + } +} +OUTPUT: + RETVAL bool -is_utf8(sv, check = FALSE) +is_utf8(sv, check = 0) SV * sv -bool check - CODE: - { - if (SvPOK(sv)) { - RETVAL = SvUTF8(sv) ? TRUE : FALSE; - if (RETVAL && - check && - !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) - RETVAL = FALSE; - } else { +int check +CODE: +{ + if (SvGMAGICAL(sv)) /* it could be $1, for example */ + sv = newSVsv(sv); /* GMAGIG will be done */ + if (SvPOK(sv)) { + RETVAL = SvUTF8(sv) ? TRUE : FALSE; + if (RETVAL && + check && + !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) RETVAL = FALSE; - } - } - OUTPUT: - RETVAL + } else { + RETVAL = FALSE; + } + if (sv != ST(0)) + SvREFCNT_dec(sv); /* it was a temp copy */ +} +OUTPUT: + RETVAL SV * _utf8_on(sv) - SV * sv - CODE: - { - if (SvPOK(sv)) { - SV *rsv = newSViv(SvUTF8(sv)); - RETVAL = rsv; - SvUTF8_on(sv); - } else { - RETVAL = &PL_sv_undef; - } - } - OUTPUT: - RETVAL +SV * sv +CODE: +{ + if (SvPOK(sv)) { + SV *rsv = newSViv(SvUTF8(sv)); + RETVAL = rsv; + SvUTF8_on(sv); + } else { + RETVAL = &PL_sv_undef; + } +} +OUTPUT: + RETVAL SV * _utf8_off(sv) - SV * sv - CODE: - { - if (SvPOK(sv)) { - SV *rsv = newSViv(SvUTF8(sv)); - RETVAL = rsv; - SvUTF8_off(sv); - } else { - RETVAL = &PL_sv_undef; - } - } - OUTPUT: - RETVAL +SV * sv +CODE: +{ + if (SvPOK(sv)) { + SV *rsv = newSViv(SvUTF8(sv)); + RETVAL = rsv; + SvUTF8_off(sv); + } else { + RETVAL = &PL_sv_undef; + } +} +OUTPUT: + RETVAL + +int +DIE_ON_ERR() +CODE: + RETVAL = ENCODE_DIE_ON_ERR; +OUTPUT: + RETVAL + +int +WARN_ON_ERR() +CODE: + RETVAL = ENCODE_WARN_ON_ERR; +OUTPUT: + RETVAL + +int +LEAVE_SRC() +CODE: + RETVAL = ENCODE_LEAVE_SRC; +OUTPUT: + RETVAL + +int +RETURN_ON_ERR() +CODE: + RETVAL = ENCODE_RETURN_ON_ERR; +OUTPUT: + RETVAL + +int +PERLQQ() +CODE: + RETVAL = ENCODE_PERLQQ; +OUTPUT: + RETVAL + +int +HTMLCREF() +CODE: + RETVAL = ENCODE_HTMLCREF; +OUTPUT: + RETVAL + +int +XMLCREF() +CODE: + RETVAL = ENCODE_XMLCREF; +OUTPUT: + RETVAL + +int +FB_DEFAULT() +CODE: + RETVAL = ENCODE_FB_DEFAULT; +OUTPUT: + RETVAL + +int +FB_CROAK() +CODE: + RETVAL = ENCODE_FB_CROAK; +OUTPUT: + RETVAL + +int +FB_QUIET() +CODE: + RETVAL = ENCODE_FB_QUIET; +OUTPUT: + RETVAL + +int +FB_WARN() +CODE: + RETVAL = ENCODE_FB_WARN; +OUTPUT: + RETVAL + +int +FB_PERLQQ() +CODE: + RETVAL = ENCODE_FB_PERLQQ; +OUTPUT: + RETVAL + +int +FB_HTMLCREF() +CODE: + RETVAL = ENCODE_FB_HTMLCREF; +OUTPUT: + RETVAL + +int +FB_XMLCREF() +CODE: + RETVAL = ENCODE_FB_XMLCREF; +OUTPUT: + RETVAL BOOT: { -#if defined(USE_PERLIO) && !defined(USE_SFIO) - PerlIO_define_layer(aTHX_ &PerlIO_encode); -#endif -#include "8859.def" -#include "EBCDIC.def" -#include "Symbols.def" +#include "def_t.h" +#include "def_t.exh" }