X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FEncode%2FEncode.xs;h=9bd8a4ce1e0cbf575d6649b4c13d58c00e4e2e4e;hb=5129552cc421a69f6981a03ac0ecc86b5722d1e6;hp=c48a5a017351a3996d6a4c109736cdc651e01447;hpb=0b3236bb1fb664bc9c9ccd069cac189e80c3ef35;p=p5sagit%2Fp5-mst-13.2.git diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index c48a5a0..9bd8a4c 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -5,10 +5,10 @@ #include "XSUB.h" #define U8 U8 #include "encode.h" -#include "8859.h" -#include "EBCDIC.h" -#include "Symbols.h" - +/* #include "8859.h" */ +/* #include "EBCDIC.h" */ +/* #include "Symbols.h" */ +#include "defcodes.h" #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ Perl_croak(aTHX_ "panic_unimplemented"); \ @@ -91,7 +91,7 @@ PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg) if (!SvROK(e->enc)) { e->enc = Nullsv; errno = EINVAL; - Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%" SVf "\"", + Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"", arg); code = -1; } @@ -118,7 +118,7 @@ PerlIOEncode_popped(pTHX_ PerlIO * f) } if (e->dataSV) { SvREFCNT_dec(e->dataSV); - e->bufsv = Nullsv; + e->dataSV = Nullsv; } return 0; } @@ -214,7 +214,7 @@ PerlIOEncode_fill(pTHX_ PerlIO * f) if (use + SvCUR(e->dataSV) > e->base.bufsiz) { use = e->base.bufsiz - SvCUR(e->dataSV); } - sv_catpvn(e->dataSV,ptr,use); + sv_catpvn(e->dataSV,(char*)ptr,use); } else { /* Create a "dummy" SV to represent the available data from layer below */ @@ -227,7 +227,7 @@ PerlIOEncode_fill(pTHX_ PerlIO * f) SvPVX(e->dataSV) = (char *) ptr; SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */ SvCUR_set(e->dataSV,use); - SvPOK_on(e->dataSV); + SvPOK_only(e->dataSV); } SvUTF8_off(e->dataSV); PUSHMARK(sp); @@ -244,7 +244,7 @@ PerlIOEncode_fill(pTHX_ PerlIO * f) /* Now get translated string (forced to UTF-8) and use as buffer */ if (SvPOK(uni)) { s = SvPVutf8(uni, len); - if (len && !is_utf8_string(s,len)) { + if (len && !is_utf8_string((U8*)s,len)) { Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s); } } @@ -255,7 +255,7 @@ PerlIOEncode_fill(pTHX_ PerlIO * f) (The copy is a pain - need a put-it-here option for decode.) */ sv_setpvn(e->bufsv,s,len); - e->base.ptr = e->base.buf = SvPVX(e->bufsv); + e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv); e->base.end = e->base.ptr + SvCUR(e->bufsv); PerlIOBase(f)->flags |= PERLIO_F_RDBUF; SvUTF8_on(e->bufsv); @@ -309,12 +309,12 @@ PerlIOEncode_flush(pTHX_ PerlIO * f) XPUSHs(e->enc); SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); SvUTF8_on(e->bufsv); - Perl_warn(aTHX_ "flush %_",e->bufsv); XPUSHs(e->bufsv); XPUSHs(&PL_sv_yes); PUTBACK; - if (perl_call_method("encode", G_SCALAR) != 1) - code = -1; + if (perl_call_method("encode", G_SCALAR) != 1) { + Perl_die(aTHX_ "panic: encode did not return a value"); + } SPAGAIN; str = POPs; PUTBACK; @@ -328,6 +328,11 @@ PerlIOEncode_flush(pTHX_ PerlIO * f) if (PerlIO_flush(PerlIONext(f)) != 0) { code = -1; } + if (SvCUR(e->bufsv)) { + /* Did not all translate */ + e->base.ptr = e->base.buf+SvCUR(e->bufsv); + return code; + } } else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) { /* read case */ @@ -348,17 +353,19 @@ PerlIOEncode_flush(pTHX_ PerlIO * f) SAVETMPS; str = sv_newmortal(); sv_upgrade(str, SVt_PV); - SvPVX(str) = e->base.ptr; + SvPVX(str) = (char*)e->base.ptr; SvLEN(str) = 0; SvCUR_set(str, e->base.end - e->base.ptr); + SvPOK_only(str); SvUTF8_on(str); PUSHMARK(sp); XPUSHs(e->enc); XPUSHs(str); XPUSHs(&PL_sv_yes); PUTBACK; - if (perl_call_method("encode", G_SCALAR) != 1) - code = -1; + if (perl_call_method("encode", G_SCALAR) != 1) { + Perl_die(aTHX_ "panic: encode did not return a value"); + } SPAGAIN; str = POPs; PUTBACK; @@ -383,6 +390,9 @@ PerlIOEncode_close(pTHX_ PerlIO * f) PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); IV code = PerlIOBase_close(aTHX_ f); if (e->bufsv) { + if (e->base.buf && e->base.ptr > e->base.buf) { + Perl_croak(aTHX_ "Close with partial character"); + } SvREFCNT_dec(e->bufsv); e->bufsv = Nullsv; } @@ -401,6 +411,9 @@ PerlIOEncode_tell(pTHX_ PerlIO * f) the UTF8 we have in bufefr and then ask layer below */ PerlIO_flush(f); + if (b->buf && b->ptr > b->buf) { + Perl_croak(aTHX_ "Cannot tell at partial character"); + } return PerlIO_tell(PerlIONext(f)); } @@ -421,7 +434,7 @@ PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o, PerlIO_funcs PerlIO_encode = { "encoding", sizeof(PerlIOEncode), - PERLIO_K_BUFFERED, + PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT, PerlIOEncode_pushed, PerlIOEncode_popped, PerlIOBuf_open, @@ -469,6 +482,7 @@ Encode_XSEncoding(pTHX_ encode_t * enc) void call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) { + /* Exists for breakpointing */ } static SV * @@ -477,18 +491,20 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, { STRLEN slen; U8 *s = (U8 *) SvPV(src, slen); - STRLEN tlen = slen; + STRLEN tlen = slen; + STRLEN ddone = 0; + STRLEN sdone = 0; SV *dst = sv_2mortal(newSV(slen+1)); if (slen) { U8 *d = (U8 *) SvPVX(dst); STRLEN dlen = SvLEN(dst)-1; int code; while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) { - SvCUR_set(dst, dlen); - SvPOK_on(dst); + SvCUR_set(dst, dlen+ddone); + SvPOK_only(dst); #if 0 - Perl_warn(aTHX_ "code=%d @ s=%d/%d d=%d",code,slen,tlen,dlen); + Perl_warn(aTHX_ "code=%d @ s=%d/%d/%d d=%d/%d/%d",code,slen,sdone,tlen,dlen,ddone,SvLEN(dst)-1); #endif if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL) @@ -497,23 +513,25 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, switch (code) { case ENCODE_NOSPACE: { - STRLEN done = tlen-slen; STRLEN need ; - if (done) { - need = (tlen*dlen)/done+1; + sdone += slen; + ddone += dlen; + if (sdone) { + need = (tlen*SvLEN(dst)+sdone-1)/sdone+UTF8_MAXLEN; } else { - need = dlen + UTF8_MAXLEN; + need = SvLEN(dst) + UTF8_MAXLEN; } d = (U8 *) SvGROW(dst, need); - if (dlen >= SvLEN(dst)) { - Perl_croak(aTHX_ - "Destination couldn't be grown (the need may be miscalculated)."); + if (ddone >= SvLEN(dst)) { + Perl_croak(aTHX_ "Destination couldn't be grown."); } - dlen = SvLEN(dst); - slen = tlen; - break; + dlen = SvLEN(dst)-ddone-1; + d += ddone; + s += slen; + slen = tlen-sdone; + continue; } case ENCODE_NOREP: @@ -523,7 +541,7 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, UV ch = utf8n_to_uvuni(s + slen, (SvCUR(src) - slen), &clen, 0); - Perl_warner(aTHX_ WARN_UTF8, + Perl_warner(aTHX_ packWARN(WARN_UTF8), "\"\\N{U+%" UVxf "}\" does not map to %s", ch, enc->name[0]); @@ -537,10 +555,12 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, } } else { - /* UTF-8 is supposed to be "Universal" so should not happen */ - Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8", - enc->name[0], (int) (SvCUR(src) - slen), - s + slen); + /* UTF-8 is supposed to be "Universal" so should not happen + for real characters, but some encodings have non-assigned + codes which may occur. + */ + Perl_croak(aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)", + enc->name[0], (U8) s[slen], code); } break; @@ -551,19 +571,19 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, return &PL_sv_undef; } } - SvCUR_set(dst, dlen); - SvPOK_on(dst); + SvCUR_set(dst, dlen+ddone); + SvPOK_only(dst); if (check) { - if (slen < SvCUR(src)) { - Move(s + slen, s, SvCUR(src) - slen, U8); + sdone = SvCUR(src) - (slen+sdone); + if (sdone) { + Move(s + slen, SvPVX(src), sdone , U8); } - SvCUR_set(src, SvCUR(src) - slen); - *SvEND(src) = '\0'; + SvCUR_set(src, sdone); } } else { SvCUR_set(dst, 0); - SvPOK_on(dst); + SvPOK_only(dst); } *SvEND(dst) = '\0'; return dst; @@ -574,6 +594,16 @@ MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ PROTOTYPES: ENABLE void +Method_name(obj) +SV * obj +CODE: + { + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0]))); + XSRETURN(1); + } + +void Method_decode(obj,src,check = FALSE) SV * obj SV * src @@ -752,7 +782,8 @@ BOOT: #if defined(USE_PERLIO) && !defined(USE_SFIO) PerlIO_define_layer(aTHX_ &PerlIO_encode); #endif -#include "8859_def.h" -#include "EBCDIC_def.h" -#include "Symbols_def.h" +/* #include "8859_def.h" */ +/* #include "EBCDIC_def.h" */ +/* #include "Symbols_def.h" */ +#include "defcodes_def.h" }