X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FEncode%2FEncode.xs;h=4d30914995b13f9154de12b7db4eaea1cedabb42;hb=b536bf570905070470ba64c88c9fb4f28bfa54f3;hp=992fbfeb2d9c0463640c7c42f40b4e6db1108cd8;hpb=63b38b604ef39ee14b693bccd8b54d416e832d2c;p=p5sagit%2Fp5-mst-13.2.git diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index 992fbfe..4d30914 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 1.31 2002/04/20 23:43:47 dankogai Exp dankogai $ + $Id: Encode.xs,v 1.52 2002/11/18 17:28:49 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT @@ -9,6 +9,9 @@ #define U8 U8 #include "encode.h" +# define PERLIO_MODNAME "PerlIO::encoding" +# define PERLIO_FILENAME "PerlIO/encoding.pm" + /* set 1 or more to profile. t/encoding.t dumps core because of Perl_warner and PerlIO don't work well */ #define ENCODE_XS_PROFILE 0 @@ -22,8 +25,9 @@ return (y)0; /* fool picky compilers */ \ } /**/ -UNIMPLEMENTED(_encoded_utf8_to_bytes, I32); -UNIMPLEMENTED(_encoded_bytes_to_utf8, I32); + +UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) +UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) void Encode_XSEncoding(pTHX_ encode_t * enc) @@ -50,6 +54,9 @@ call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) } +#define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" +#define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" + static SV * encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, int check) @@ -73,7 +80,7 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, goto ENCODE_END; } - while (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check)) + while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check)) ) { SvCUR_set(dst, dlen+ddone); SvPOK_only(dst); @@ -99,7 +106,7 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone - SvLEN(dst); #elif ENCODE_XS_USEFP - more = (1.0*SvLEN(dst)+1)/sdone * sleft; + more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft); #else /* safe until SvLEN(dst) == MAX_INT/16 */ more = (16*SvLEN(dst)+1)/sdone/16 * sleft; @@ -125,58 +132,69 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); if (check & ENCODE_DIE_ON_ERR) { - Perl_croak( - aTHX_ "\"\\N{U+%" UVxf "}\" does not map to %s, %d", - ch, enc->name[0], __LINE__); - }else{ - if (check & ENCODE_RETURN_ON_ERR){ - if (check & ENCODE_WARN_ON_ERR){ - Perl_warner( - aTHX_ packWARN(WARN_UTF8), - "\"\\N{U+%" UVxf "}\" does not map to %s", - ch,enc->name[0]); - } - goto ENCODE_SET_SRC; - }else if (check & ENCODE_PERLQQ){ - SV* perlqq = - sv_2mortal(newSVpvf("\\x{%04x}", ch)); - sdone += slen + clen; - ddone += dlen + SvCUR(perlqq); - sv_catsv(dst, perlqq); - } else { - /* fallback char */ - sdone += slen + clen; - ddone += dlen + enc->replen; - sv_catpvn(dst, (char*)enc->rep, enc->replen); - } + Perl_croak(aTHX_ ERR_ENCODE_NOMAP, + (UV)ch, enc->name[0]); + return &PL_sv_undef; /* never reaches but be safe */ + } + if (check & ENCODE_WARN_ON_ERR){ + Perl_warner(aTHX_ packWARN(WARN_UTF8), + ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]); + } + if (check & ENCODE_RETURN_ON_ERR){ + goto ENCODE_SET_SRC; + } + if (check & ENCODE_PERLQQ){ + SV* perlqq = + sv_2mortal(newSVpvf("\\x{%04"UVxf"}", (UV)ch)); + sdone += slen + clen; + ddone += dlen + SvCUR(perlqq); + sv_catsv(dst, perlqq); + }else if (check & ENCODE_HTMLCREF){ + SV* htmlcref = + sv_2mortal(newSVpvf("&#%" UVuf ";", (UV)ch)); + sdone += slen + clen; + ddone += dlen + SvCUR(htmlcref); + sv_catsv(dst, htmlcref); + }else if (check & ENCODE_XMLCREF){ + SV* xmlcref = + sv_2mortal(newSVpvf("&#x%" UVxf ";", (UV)ch)); + sdone += slen + clen; + ddone += dlen + SvCUR(xmlcref); + sv_catsv(dst, xmlcref); + } else { + /* fallback char */ + sdone += slen + clen; + ddone += dlen + enc->replen; + sv_catpvn(dst, (char*)enc->rep, enc->replen); } } /* decoding */ else { if (check & ENCODE_DIE_ON_ERR){ - Perl_croak( - aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)", - enc->name[0], (U8) s[slen], code); - }else{ - if (check & ENCODE_RETURN_ON_ERR){ - if (check & ENCODE_WARN_ON_ERR){ - Perl_warner( - aTHX_ packWARN(WARN_UTF8), - "%s \"\\x%02X\" does not map to Unicode (%d)", - enc->name[0], (U8) s[slen], code); - } - goto ENCODE_SET_SRC; - }else if (check & ENCODE_PERLQQ){ - SV* perlqq = - sv_2mortal(newSVpvf("\\x%02X", s[slen])); - sdone += slen + 1; - ddone += dlen + SvCUR(perlqq); - sv_catsv(dst, perlqq); - } else { - sdone += slen + 1; - ddone += dlen + strlen(FBCHAR_UTF8); - sv_catpv(dst, FBCHAR_UTF8); - } + Perl_croak(aTHX_ ERR_DECODE_NOMAP, + enc->name[0], (UV)s[slen]); + return &PL_sv_undef; /* never reaches but be safe */ + } + if (check & ENCODE_WARN_ON_ERR){ + Perl_warner( + aTHX_ packWARN(WARN_UTF8), + ERR_DECODE_NOMAP, + enc->name[0], (UV)s[slen]); + } + if (check & ENCODE_RETURN_ON_ERR){ + goto ENCODE_SET_SRC; + } + if (check & + (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ + SV* perlqq = + sv_2mortal(newSVpvf("\\x%02" UVXf, (UV)s[slen])); + sdone += slen + 1; + ddone += dlen + SvCUR(perlqq); + sv_catsv(dst, perlqq); + } else { + sdone += slen + 1; + ddone += dlen + strlen(FBCHAR_UTF8); + sv_catpv(dst, FBCHAR_UTF8); } } /* settle variables when fallback */ @@ -202,9 +220,6 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, SvCUR_set(src, sdone); } /* warn("check = 0x%X, code = 0x%d\n", check, code); */ - if (code && !(check & ENCODE_RETURN_ON_ERR)) { - return &PL_sv_undef; - } SvCUR_set(dst, dlen+ddone); SvPOK_only(dst); @@ -223,6 +238,134 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, return dst; } +MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ + +void +Method_decode_xs(obj,src,check = 0) +SV * obj +SV * src +int check +CODE: +{ + STRLEN slen; + U8 *s = (U8 *) SvPV(src, slen); + U8 *e = (U8 *) SvEND(src); + SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ + SvPOK_only(dst); + SvCUR_set(dst,0); + if (SvUTF8(src)) { + s = utf8_to_bytes(s,&slen); + if (s) { + SvCUR_set(src,slen); + SvUTF8_off(src); + e = s+slen; + } + else { + croak("Cannot decode string with wide characters"); + } + } + while (s < e) { + if (UTF8_IS_INVARIANT(*s) || UTF8_IS_START(*s)) { + U8 skip = UTF8SKIP(s); + if ((s + skip) > e) { + /* Partial character - done */ + break; + } + else if (is_utf8_char(s)) { + /* Whole char is good */ + sv_catpvn(dst,(char *)s,skip); + s += skip; + continue; + } + else { + /* starts ok but isn't "good" */ + } + } + else { + /* Invalid start byte */ + } + /* If we get here there is something wrong with alleged UTF-8 */ + if (check & ENCODE_DIE_ON_ERR){ + Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", (UV)*s); + XSRETURN(0); + } + if (check & ENCODE_WARN_ON_ERR){ + Perl_warner(aTHX_ packWARN(WARN_UTF8), + ERR_DECODE_NOMAP, "utf8", (UV)*s); + } + if (check & ENCODE_RETURN_ON_ERR) { + break; + } + if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ + SV* perlqq = newSVpvf("\\x%02" UVXf, (UV)*s); + sv_catsv(dst, perlqq); + SvREFCNT_dec(perlqq); + } else { + sv_catpv(dst, FBCHAR_UTF8); + } + s++; + } + *SvEND(dst) = '\0'; + + /* Clear out translated part of source unless asked not to */ + if (check && !(check & ENCODE_LEAVE_SRC)){ + slen = e-s; + if (slen) { + sv_setpvn(src, (char*)s, slen); + } + SvCUR_set(src, slen); + } + SvUTF8_on(dst); + ST(0) = sv_2mortal(dst); + XSRETURN(1); +} + +void +Method_encode_xs(obj,src,check = 0) +SV * obj +SV * src +int check +CODE: +{ + STRLEN slen; + U8 *s = (U8 *) SvPV(src, slen); + U8 *e = (U8 *) SvEND(src); + SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ + if (SvUTF8(src)) { + /* Already encoded - trust it and just copy the octets */ + sv_setpvn(dst,(char *)s,(e-s)); + s = e; + } + else { + /* Native bytes - can always encode */ + U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ + while (s < e) { + UV uv = NATIVE_TO_UNI((UV) *s++); + if (UNI_IS_INVARIANT(uv)) + *d++ = (U8)UTF_TO_NATIVE(uv); + else { + *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); + *d++ = (U8)UTF8_EIGHT_BIT_LO(uv); + } + } + SvCUR_set(dst, d- (U8 *)SvPVX(dst)); + *SvEND(dst) = '\0'; + } + + /* Clear out translated part of source unless asked not to */ + if (check && !(check & ENCODE_LEAVE_SRC)){ + slen = e-s; + if (slen) { + sv_setpvn(src, (char*)s, slen); + } + SvCUR_set(src, slen); + } + SvPOK_only(dst); + SvUTF8_off(dst); + ST(0) = sv_2mortal(dst); + XSRETURN(1); +} + MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ PROTOTYPES: ENABLE @@ -245,6 +388,9 @@ int check CODE: { encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + if (SvUTF8(src)) { + sv_utf8_downgrade(src, FALSE); + } ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check); SvUTF8_on(ST(0)); XSRETURN(1); @@ -263,6 +409,34 @@ CODE: XSRETURN(1); } +void +Method_needs_lines(obj) +SV * obj +CODE: +{ + /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ + ST(0) = &PL_sv_no; + XSRETURN(1); +} + +void +Method_perlio_ok(obj) +SV * obj +CODE: +{ + /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ + /* require_pv(PERLIO_FILENAME); */ + + eval_pv("require PerlIO::encoding", 0); + + if (SvTRUE(get_sv("@", 0))) { + ST(0) = &PL_sv_no; + }else{ + ST(0) = &PL_sv_yes; + } + XSRETURN(1); +} + MODULE = Encode PACKAGE = Encode PROTOTYPES: ENABLE @@ -412,9 +586,6 @@ CODE: OUTPUT: RETVAL -PROTOTYPES: DISABLE - - int DIE_ON_ERR() CODE: @@ -451,6 +622,20 @@ OUTPUT: RETVAL int +HTMLCREF() +CODE: + RETVAL = ENCODE_HTMLCREF; +OUTPUT: + RETVAL + +int +XMLCREF() +CODE: + RETVAL = ENCODE_XMLCREF; +OUTPUT: + RETVAL + +int FB_DEFAULT() CODE: RETVAL = ENCODE_FB_DEFAULT; @@ -485,6 +670,20 @@ CODE: OUTPUT: RETVAL +int +FB_HTMLCREF() +CODE: + RETVAL = ENCODE_FB_HTMLCREF; +OUTPUT: + RETVAL + +int +FB_XMLCREF() +CODE: + RETVAL = ENCODE_FB_XMLCREF; +OUTPUT: + RETVAL + BOOT: { #include "def_t.h"