/*
- $Id: Encode.xs,v 2.3 2004/12/03 19:16:53 dankogai Exp dankogai $
+ $Id: Encode.xs,v 2.6 2005/09/08 14:17:17 dankogai Exp dankogai $
*/
#define PERL_NO_GET_CONTEXT
UTF8_ALLOW_NON_CONTINUATION | \
UTF8_ALLOW_LONG))
+static SV* fallback_cb = (SV*)NULL ;
+
void
Encode_XSEncoding(pTHX_ encode_t * enc)
{
#define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode"
static SV *
-encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
+do_fallback_cb(pTHX_ UV ch)
+{
+ dSP;
+ int argc;
+ SV* retval;
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(sp);
+ XPUSHs(sv_2mortal(newSVnv((UV)ch)));
+ PUTBACK;
+ argc = call_sv(fallback_cb, G_SCALAR);
+ SPAGAIN;
+ if (argc != 1){
+ croak("fallback sub must return scalar!");
+ }
+ retval = newSVsv(POPs);
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+ return retval;
+}
+
+static SV *
+encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
int check, STRLEN * offset, SV * term, int * retcode)
{
STRLEN slen;
UV ch =
utf8n_to_uvuni(s+slen, (SvCUR(src)-slen),
&clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY);
+ /* if non-representable multibyte prefix at end of current buffer - break*/
+ if (clen > tlen - sdone) break;
if (check & ENCODE_DIE_ON_ERR) {
Perl_croak(aTHX_ ERR_ENCODE_NOMAP,
(UV)ch, enc->name[0]);
}
if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
SV* subchar =
+ (fallback_cb != (SV*)NULL) ? do_fallback_cb(aTHX_ ch) :
newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" :
check & ENCODE_HTMLCREF ? "&#%" UVuf ";" :
"&#x%" UVxf ";", (UV)ch);
}
if (check &
(ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
- SV* subchar = newSVpvf("\\x%02" UVXf, (UV)s[slen]);
+ SV* subchar =
+ (fallback_cb != (SV*)NULL) ?
+ do_fallback_cb(aTHX_ (UV)s[slen]) :
+ newSVpvf("\\x%02" UVXf, (UV)s[slen]);
sdone += slen + 1;
ddone += dlen + SvCUR(subchar);
sv_catsv(dst, subchar);
if ((s + skip) > e) {
/* Partial character */
/* XXX could check that rest of bytes are UTF8_IS_CONTINUATION(ch) */
- if (stop_at_partial)
+ if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL))
break;
goto malformed_byte;
}
sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check,
&offset, term, &code));
- SvIVX(off) = (IV)offset;
+ SvIV_set(off, (IV)offset);
if (code == ENCODE_FOUND_TERM) {
ST(0) = &PL_sv_yes;
}else{
}
void
-Method_decode(obj,src,check = 0)
+Method_decode(obj,src,check_sv = &PL_sv_no)
SV * obj
SV * src
-int check
+SV * check_sv
CODE:
{
+ int check;
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
if (SvUTF8(src)) {
sv_utf8_downgrade(src, FALSE);
}
+ if (SvROK(check_sv)){
+ if (fallback_cb == (SV*)NULL){
+ fallback_cb = newSVsv(check_sv); /* First time */
+ }else{
+ SvSetSV(fallback_cb, check_sv); /* Been here before */
+ }
+ check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
+ }else{
+ fallback_cb = (SV*)NULL;
+ check = SvIV(check_sv);
+ }
ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check,
NULL, Nullsv, NULL);
SvUTF8_on(ST(0));
XSRETURN(1);
}
+
+
void
-Method_encode(obj,src,check = 0)
+Method_encode(obj,src,check_sv = &PL_sv_no)
SV * obj
SV * src
-int check
+SV * check_sv
CODE:
{
+ int check;
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
sv_utf8_upgrade(src);
+ if (SvROK(check_sv)){
+ if (fallback_cb == (SV*)NULL){
+ fallback_cb = newSVsv(check_sv); /* First time */
+ }else{
+ SvSetSV(fallback_cb, check_sv); /* Been here before */
+ }
+ check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
+ }else{
+ fallback_cb = (SV*)NULL;
+ check = SvIV(check_sv);
+ }
ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check,
NULL, Nullsv, NULL);
XSRETURN(1);
RETVAL
int
+STOP_AT_PARTIAL()
+CODE:
+ RETVAL = ENCODE_STOP_AT_PARTIAL;
+OUTPUT:
+ RETVAL
+
+int
FB_DEFAULT()
CODE:
RETVAL = ENCODE_FB_DEFAULT;