10 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \
11 Perl_croak(aTHX_ "panic_unimplemented"); \
12 return (y)0; /* fool picky compilers */ \
14 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
15 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
17 #if defined(USE_PERLIO) && !defined(USE_SFIO)
18 /* Define an encoding "layer" in the perliol.h sense.
19 The layer defined here "inherits" in an object-oriented sense from the
20 "perlio" layer with its PerlIOBuf_* "methods".
21 The implementation is particularly efficient as until Encode settles down
22 there is no point in tryint to tune it.
24 The layer works by overloading the "fill" and "flush" methods.
26 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
27 to convert the encoded data to UTF-8 form, then copies it back to the
28 buffer. The "base class's" read methods then see the UTF-8 data.
30 "flush" transforms the UTF-8 data deposited by the "base class's write
31 method in the buffer back into the encoded form using the encode OO perl API,
32 then copies data back into the buffer and calls "SUPER::flush.
34 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
35 so that the the base class's "flush" sees the correct number of encoded chars
36 for positioning the seek pointer. (This double translation is the worst performance
37 issue - particularly with all-perl encode engine.)
46 PerlIOBuf base; /* PerlIOBuf stuff */
52 PerlIOEncode_getarg(PerlIO *f)
54 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
55 SV *sv = &PL_sv_undef;
64 if (perl_call_method("name",G_SCALAR) == 1)
75 PerlIOEncode_pushed(PerlIO *f, const char *mode, SV *arg)
77 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
81 code = PerlIOBuf_pushed(f,mode,Nullsv);
87 if (perl_call_pv("Encode::find_encoding",G_SCALAR) != 1)
89 /* should never happen */
90 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
100 Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%_\"", arg);
103 SvREFCNT_inc(e->enc);
106 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
111 PerlIOEncode_popped(PerlIO *f)
113 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
117 SvREFCNT_dec(e->enc);
122 SvREFCNT_dec(e->bufsv);
129 PerlIOEncode_get_base(PerlIO *f)
131 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
134 e->base.bufsiz = 1024;
137 e->bufsv = newSV(e->base.bufsiz);
138 sv_setpvn(e->bufsv,"",0);
140 e->base.buf = (STDCHAR *)SvPVX(e->bufsv);
142 e->base.ptr = e->base.buf;
144 e->base.end = e->base.buf;
145 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
147 Perl_warn(aTHX_ " ptr %p(%p)%p",
148 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
151 if (SvLEN(e->bufsv) < e->base.bufsiz)
153 SSize_t poff = e->base.ptr - e->base.buf;
154 SSize_t eoff = e->base.end - e->base.buf;
155 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz);
156 e->base.ptr = e->base.buf + poff;
157 e->base.end = e->base.buf + eoff;
159 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
161 Perl_warn(aTHX_ " ptr %p(%p)%p",
162 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
169 PerlIOEncode_fill(PerlIO *f)
171 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
175 code = PerlIOBuf_fill(f);
181 /* Set SV that is the buffer to be buf..ptr */
182 SvCUR_set(e->bufsv, e->base.end - e->base.buf);
183 SvUTF8_off(e->bufsv);
191 if (perl_call_method("decode",G_SCALAR) != 1)
196 /* Now get translated string (forced to UTF-8) and copy back to buffer
197 don't use sv_setsv as that may "steal" PV from returned temp
198 and so free() our known-large-enough buffer.
199 sv_setpvn() should do but let us do it long hand.
201 s = SvPVutf8(uni,len);
202 if (s != SvPVX(e->bufsv))
204 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
205 Move(s,e->base.buf,len,char);
206 SvCUR_set(e->bufsv,len);
209 e->base.end = e->base.buf+len;
210 e->base.ptr = e->base.buf;
218 PerlIOEncode_flush(PerlIO *f)
220 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
223 if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF)))
230 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF)
232 /* This is really just a flag to see if we took all the data, if
233 we did PerlIOBase_flush avoids a seek to lower layer.
234 Need to revisit if we start getting clever with unreads or seeks-in-buffer
236 left = e->base.end - e->base.ptr;
242 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
247 if (perl_call_method("encode",G_SCALAR) != 1)
253 if (s != SvPVX(e->bufsv))
255 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
256 Move(s,e->base.buf,len,char);
257 SvCUR_set(e->bufsv,len);
259 SvUTF8_off(e->bufsv);
260 e->base.ptr = e->base.buf+len;
261 /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */
262 e->base.end = e->base.ptr + left;
265 if (PerlIOBuf_flush(f) != 0)
272 PerlIOEncode_close(PerlIO *f)
274 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
275 IV code = PerlIOBase_close(f);
279 SvREFCNT_dec(e->bufsv);
285 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF);
290 PerlIOEncode_tell(PerlIO *f)
293 PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf);
294 /* Unfortunately the only way to get a postion is to back-translate,
295 the UTF8-bytes we have buf..ptr and adjust accordingly.
296 But we will try and save any unread data in case stream
299 if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end)
301 Size_t count = b->end - b->ptr;
302 PerlIO_push(aTHX_ f,&PerlIO_pending,"r",Nullsv);
303 /* Save what we have left to read */
304 PerlIOSelf(f,PerlIOBuf)->bufsiz = count;
305 PerlIO_unread(f,b->ptr,count);
306 /* There isn't any unread data - we just saved it - so avoid the lower seek */
308 /* Flush ourselves - now one layer down,
309 this does the back translate and adjusts position
311 PerlIO_flush(PerlIONext(f));
312 /* Set position of the saved data */
313 PerlIOSelf(f,PerlIOBuf)->posn = b->posn;
322 PerlIO_funcs PerlIO_encode = {
324 sizeof(PerlIOEncode),
342 PerlIOBuf_setlinebuf,
343 PerlIOEncode_get_base,
347 PerlIOBuf_set_ptrcnt,
349 #endif /* encode layer */
352 Encode_Define(pTHX_ encode_t *enc)
355 HV *stash = gv_stashpv("Encode::XS", TRUE);
356 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
362 const char *name = enc->name[i++];
363 XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
366 call_pv("Encode::define_encoding",G_DISCARD);
370 void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {}
373 encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check)
376 U8 *s = (U8 *) SvPV(src,slen);
377 SV *dst = sv_2mortal(newSV(2*slen+1));
380 U8 *d = (U8 *) SvGROW(dst, 2*slen+1);
381 STRLEN dlen = SvLEN(dst);
383 while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check)))
388 if (code == ENCODE_FALLBACK)
395 STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN);
396 if (need <= SvLEN(dst))
398 d = (U8 *) SvGROW(dst, need);
405 if (dir == enc->f_utf8)
407 if (!check && ckWARN_d(WARN_UTF8))
410 UV ch = utf8n_to_uvuni(s+slen,(SvCUR(src)-slen),&clen,0);
411 Perl_warner(aTHX_ WARN_UTF8, "\"\\N{U+%"UVxf"}\" does not map to %s", ch, enc->name[0]);
412 /* FIXME: Skip over the character, copy in replacement and continue
413 * but that is messy so for now just fail.
424 /* UTF-8 is supposed to be "Universal" so should not happen */
425 Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
426 enc->name[0], (int)(SvCUR(src)-slen),s+slen);
431 if (!check && ckWARN_d(WARN_UTF8))
433 Perl_warner(aTHX_ WARN_UTF8, "Partial %s character",
434 (dir == enc->f_utf8) ? "UTF-8" : enc->name[0]);
439 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
440 code, (dir == enc->f_utf8) ? "to" : "from",enc->name[0]);
448 if (slen < SvCUR(src))
450 Move(s+slen,s,SvCUR(src)-slen,U8);
452 SvCUR_set(src,SvCUR(src)-slen);
458 MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
463 Method_decode(obj,src,check = 0)
469 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
470 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
476 Method_encode(obj,src,check = 0)
482 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
483 sv_utf8_upgrade(src);
484 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
488 MODULE = Encode PACKAGE = Encode
493 _bytes_to_utf8(sv, ...)
497 SV * encoding = items == 2 ? ST(1) : Nullsv;
500 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
503 U8* s = (U8*)SvPV(sv, len);
506 converted = bytes_to_utf8(s, &len); /* This allocs */
507 sv_setpvn(sv, (char *)converted, len);
508 SvUTF8_on(sv); /* XXX Should we? */
509 Safefree(converted); /* ... so free it */
517 _utf8_to_bytes(sv, ...)
521 SV * to = items > 1 ? ST(1) : Nullsv;
522 SV * check = items > 2 ? ST(2) : Nullsv;
525 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
528 U8 *s = (U8*)SvPV(sv, len);
531 /* Must do things the slow way */
533 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
536 New(83, dest, len, U8); /* I think */
545 /* Have to do it all ourselves because of error routine,
549 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
550 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
551 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
552 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
553 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
554 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
555 else { ulen = 13; uv = 0; }
557 /* Note change to utf8.c variable naming, for variety */
559 if ((*s & 0xc0) != 0x80)
563 uv = (uv << 6) | (*s++ & 0x3f);
567 call_failure(check, s, dest, src);
568 /* Now what happens? */
574 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
581 is_utf8(sv, check = FALSE)
587 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
590 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
605 SV *rsv = newSViv(SvUTF8(sv));
609 RETVAL = &PL_sv_undef;
621 SV *rsv = newSViv(SvUTF8(sv));
625 RETVAL = &PL_sv_undef;
633 #if defined(USE_PERLIO) && !defined(USE_SFIO)
634 PerlIO_define_layer(aTHX_ &PerlIO_encode);
636 #include "iso8859.def"
637 #include "EBCDIC.def"
638 #include "Symbols.def"