X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FEncode%2FEncode.xs;h=36d5f3dac63d02ac8ebcb76536d4a97155b6e020;hb=21d92c23f49d139d8bddefbab6f984eb17e12d43;hp=0f3f1d41854a74bfc2eae9373b5fad44de7565b0;hpb=6e21dc912fff1c74f60032b406b7b96bd0c3ee86;p=p5sagit%2Fp5-mst-13.2.git diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index 0f3f1d4..36d5f3d 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 1.45 2002/05/07 16:22:42 dankogai Exp dankogai $ + $Id: Encode.xs,v 1.55 2003/02/28 01:40:27 dankogai Exp $ */ #define PERL_NO_GET_CONTEXT @@ -59,7 +59,7 @@ call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) static SV * encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, - int check) + int check, STRLEN * offset, SV * term, int * retcode) { STRLEN slen; U8 *s = (U8 *) SvPV(src, slen); @@ -72,20 +72,34 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, SV *dst = sv_2mortal(newSV(slen+1)); U8 *d = (U8 *)SvPVX(dst); STRLEN dlen = SvLEN(dst)-1; - int code; + int code = 0; + STRLEN trmlen = 0; + U8 *trm = term ? (U8*) SvPV(term, trmlen) : NULL; + + if (offset) { + s += *offset; + if (slen > *offset){ /* safeguard against slen overflow */ + slen -= *offset; + }else{ + slen = 0; + } + tlen = slen; + } - if (!slen){ + if (slen == 0){ SvCUR_set(dst, 0); SvPOK_only(dst); goto ENCODE_END; } - while (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check)) + while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check, + trm, trmlen)) ) { SvCUR_set(dst, dlen+ddone); SvPOK_only(dst); - if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL){ + if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL || + code == ENCODE_FOUND_TERM) { break; } switch (code) { @@ -172,14 +186,14 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, else { if (check & ENCODE_DIE_ON_ERR){ Perl_croak(aTHX_ ERR_DECODE_NOMAP, - PTR2UV(enc->name[0]), (U8)s[slen]); + enc->name[0], (UV)s[slen]); return &PL_sv_undef; /* never reaches but be safe */ } if (check & ENCODE_WARN_ON_ERR){ Perl_warner( aTHX_ packWARN(WARN_UTF8), ERR_DECODE_NOMAP, - PTR2UV(enc->name[0]), (U8)s[slen]); + enc->name[0], (UV)s[slen]); } if (check & ENCODE_RETURN_ON_ERR){ goto ENCODE_SET_SRC; @@ -233,11 +247,143 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, } #endif + if (offset) + *offset += sdone + slen; + ENCODE_END: *SvEND(dst) = '\0'; + if (retcode) *retcode = code; return dst; } +MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ + +void +Method_decode_xs(obj,src,check = 0) +SV * obj +SV * src +int check +CODE: +{ + STRLEN slen; + U8 *s = (U8 *) SvPV(src, slen); + U8 *e = (U8 *) SvEND(src); + SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ + SvPOK_only(dst); + SvCUR_set(dst,0); + if (SvUTF8(src)) { + s = utf8_to_bytes(s,&slen); + if (s) { + SvCUR_set(src,slen); + SvUTF8_off(src); + e = s+slen; + } + else { + croak("Cannot decode string with wide characters"); + } + } + while (s < e) { + if (UTF8_IS_INVARIANT(*s) || UTF8_IS_START(*s)) { + U8 skip = UTF8SKIP(s); + if ((s + skip) > e) { + /* Partial character - done */ + break; + } + else if (is_utf8_char(s)) { + /* Whole char is good */ + sv_catpvn(dst,(char *)s,skip); + s += skip; + continue; + } + else { + /* starts ok but isn't "good" */ + } + } + else { + /* Invalid start byte */ + } + /* If we get here there is something wrong with alleged UTF-8 */ + if (check & ENCODE_DIE_ON_ERR){ + Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", (UV)*s); + XSRETURN(0); + } + if (check & ENCODE_WARN_ON_ERR){ + Perl_warner(aTHX_ packWARN(WARN_UTF8), + ERR_DECODE_NOMAP, "utf8", (UV)*s); + } + if (check & ENCODE_RETURN_ON_ERR) { + break; + } + if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ + SV* perlqq = newSVpvf("\\x%02" UVXf, (UV)*s); + sv_catsv(dst, perlqq); + SvREFCNT_dec(perlqq); + } else { + sv_catpv(dst, FBCHAR_UTF8); + } + s++; + } + *SvEND(dst) = '\0'; + + /* Clear out translated part of source unless asked not to */ + if (check && !(check & ENCODE_LEAVE_SRC)){ + slen = e-s; + if (slen) { + sv_setpvn(src, (char*)s, slen); + } + SvCUR_set(src, slen); + } + SvUTF8_on(dst); + ST(0) = sv_2mortal(dst); + XSRETURN(1); +} + +void +Method_encode_xs(obj,src,check = 0) +SV * obj +SV * src +int check +CODE: +{ + STRLEN slen; + U8 *s = (U8 *) SvPV(src, slen); + U8 *e = (U8 *) SvEND(src); + SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ + if (SvUTF8(src)) { + /* Already encoded - trust it and just copy the octets */ + sv_setpvn(dst,(char *)s,(e-s)); + s = e; + } + else { + /* Native bytes - can always encode */ + U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ + while (s < e) { + UV uv = NATIVE_TO_UNI((UV) *s++); + if (UNI_IS_INVARIANT(uv)) + *d++ = (U8)UTF_TO_NATIVE(uv); + else { + *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); + *d++ = (U8)UTF8_EIGHT_BIT_LO(uv); + } + } + SvCUR_set(dst, d- (U8 *)SvPVX(dst)); + *SvEND(dst) = '\0'; + } + + /* Clear out translated part of source unless asked not to */ + if (check && !(check & ENCODE_LEAVE_SRC)){ + slen = e-s; + if (slen) { + sv_setpvn(src, (char*)s, slen); + } + SvCUR_set(src, slen); + } + SvPOK_only(dst); + SvUTF8_off(dst); + ST(0) = sv_2mortal(dst); + XSRETURN(1); +} + MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ PROTOTYPES: ENABLE @@ -253,6 +399,33 @@ CODE: } void +Method_cat_decode(obj, dst, src, off, term, check = 0) +SV * obj +SV * dst +SV * src +SV * off +SV * term +int check +CODE: +{ + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + STRLEN offset = (STRLEN)SvIV(off); + int code = 0; + if (SvUTF8(src)) { + sv_utf8_downgrade(src, FALSE); + } + sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check, + &offset, term, &code)); + SvIVX(off) = (IV)offset; + if (code == ENCODE_FOUND_TERM) { + ST(0) = &PL_sv_yes; + }else{ + ST(0) = &PL_sv_no; + } + XSRETURN(1); +} + +void Method_decode(obj,src,check = 0) SV * obj SV * src @@ -260,7 +433,11 @@ int check CODE: { encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); - ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check); + if (SvUTF8(src)) { + sv_utf8_downgrade(src, FALSE); + } + ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check, + NULL, Nullsv, NULL); SvUTF8_on(ST(0)); XSRETURN(1); } @@ -274,7 +451,8 @@ CODE: { encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); sv_utf8_upgrade(src); - ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check); + ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check, + NULL, Nullsv, NULL); XSRETURN(1); } @@ -283,7 +461,7 @@ Method_needs_lines(obj) SV * obj CODE: { - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ ST(0) = &PL_sv_no; XSRETURN(1); } @@ -293,7 +471,7 @@ Method_perlio_ok(obj) SV * obj CODE: { - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ /* require_pv(PERLIO_FILENAME); */ eval_pv("require PerlIO::encoding", 0);