5 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \
6 Perl_croak(aTHX_ "panic_unimplemented"); \
7 return (y)0; /* fool picky compilers */ \
9 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
10 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
13 /* Define an encoding "layer" in the perliol.h sense.
14 The layer defined here "inherits" in an object-oriented sense from the
15 "perlio" layer with its PerlIOBuf_* "methods".
16 The implementation is particularly efficient as until Encode settles down
17 there is no point in tryint to tune it.
19 The layer works by overloading the "fill" and "flush" methods.
21 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
22 to convert the encoded data to UTF-8 form, then copies it back to the
23 buffer. The "base class's" read methods then see the UTF-8 data.
25 "flush" transforms the UTF-8 data deposited by the "base class's write
26 method in the buffer back into the encoded form using the encode OO perl API,
27 then copies data back into the buffer and calls "SUPER::flush.
29 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
30 so that the the base class's "flush" sees the correct number of encoded chars
31 for positioning the seek pointer. (This double translation is the worst performance
32 issue - particularly with all-perl encode engine.)
41 PerlIOBuf base; /* PerlIOBuf stuff */
48 PerlIOEncode_pushed(PerlIO *f, const char *mode,const char *arg,STRLEN len)
50 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
54 code = PerlIOBuf_pushed(f,mode,Nullch,0);
58 XPUSHs(sv_2mortal(newSVpv("Encode",0)));
59 XPUSHs(sv_2mortal(newSVpvn(arg,len)));
61 if (perl_call_method("getEncoding",G_SCALAR) != 1)
71 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
76 PerlIOEncode_popped(PerlIO *f)
78 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
87 SvREFCNT_dec(e->bufsv);
94 PerlIOEncode_get_base(PerlIO *f)
96 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
99 e->base.bufsiz = 1024;
102 e->bufsv = newSV(e->base.bufsiz);
103 sv_setpvn(e->bufsv,"",0);
105 e->base.buf = (STDCHAR *)SvPVX(e->bufsv);
107 e->base.ptr = e->base.buf;
109 e->base.end = e->base.buf;
110 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
112 Perl_warn(aTHX_ " ptr %p(%p)%p",
113 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
116 if (SvLEN(e->bufsv) < e->base.bufsiz)
118 SSize_t poff = e->base.ptr - e->base.buf;
119 SSize_t eoff = e->base.end - e->base.buf;
120 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz);
121 e->base.ptr = e->base.buf + poff;
122 e->base.end = e->base.buf + eoff;
124 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
126 Perl_warn(aTHX_ " ptr %p(%p)%p",
127 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
134 PerlIOEncode_fill(PerlIO *f)
136 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
140 code = PerlIOBuf_fill(f);
146 /* Set SV that is the buffer to be buf..ptr */
147 SvCUR_set(e->bufsv, e->base.end - e->base.buf);
148 SvUTF8_off(e->bufsv);
156 if (perl_call_method("toUnicode",G_SCALAR) != 1)
161 /* Now get translated string (forced to UTF-8) and copy back to buffer
162 don't use sv_setsv as that may "steal" PV from returned temp
163 and so free() our known-large-enough buffer.
164 sv_setpvn() should do but let us do it long hand.
166 s = SvPVutf8(uni,len);
167 if (s != SvPVX(e->bufsv))
169 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
170 Move(s,e->base.buf,len,char);
171 SvCUR_set(e->bufsv,len);
174 e->base.end = e->base.buf+len;
175 e->base.ptr = e->base.buf;
183 PerlIOEncode_flush(PerlIO *f)
185 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
188 if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF)))
195 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF)
197 /* This is really just a flag to see if we took all the data, if
198 we did PerlIOBase_flush avoids a seek to lower layer.
199 Need to revisit if we start getting clever with unreads or seeks-in-buffer
201 left = e->base.end - e->base.ptr;
207 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
212 if (perl_call_method("fromUnicode",G_SCALAR) != 1)
218 if (s != SvPVX(e->bufsv))
220 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
221 Move(s,e->base.buf,len,char);
222 SvCUR_set(e->bufsv,len);
224 SvUTF8_off(e->bufsv);
225 e->base.ptr = e->base.buf+len;
226 /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */
227 e->base.end = e->base.ptr + left;
230 if (PerlIOBuf_flush(f) != 0)
237 PerlIOEncode_close(PerlIO *f)
239 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
240 IV code = PerlIOBase_close(f);
244 SvREFCNT_dec(e->bufsv);
250 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF);
255 PerlIOEncode_tell(PerlIO *f)
257 PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf);
258 /* Unfortunately the only way to get a postion is to back-translate,
259 the UTF8-bytes we have buf..ptr and adjust accordingly.
260 But we will try and save any unread data in case stream
263 if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end)
265 Size_t count = b->end - b->ptr;
266 PerlIO_push(f,&PerlIO_pending,"r",Nullch,0);
267 /* Save what we have left to read */
268 PerlIOSelf(f,PerlIOBuf)->bufsiz = count;
269 PerlIO_unread(f,b->ptr,count);
270 /* There isn't any unread data - we just saved it - so avoid the lower seek */
272 /* Flush ourselves - now one layer down,
273 this does the back translate and adjusts position
275 PerlIO_flush(PerlIONext(f));
276 /* Set position of the saved data */
277 PerlIOSelf(f,PerlIOBuf)->posn = b->posn;
286 PerlIO_funcs PerlIO_encode = {
288 sizeof(PerlIOEncode),
307 PerlIOBuf_setlinebuf,
308 PerlIOEncode_get_base,
312 PerlIOBuf_set_ptrcnt,
316 void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {}
318 MODULE = Encode PACKAGE = Encode
323 _bytes_to_utf8(sv, ...)
327 SV * encoding = items == 2 ? ST(1) : Nullsv;
330 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
333 U8* s = (U8*)SvPV(sv, len);
336 converted = bytes_to_utf8(s, &len); /* This allocs */
337 sv_setpvn(sv, (char *)converted, len);
338 SvUTF8_on(sv); /* XXX Should we? */
339 Safefree(converted); /* ... so free it */
347 _utf8_to_bytes(sv, ...)
351 SV * to = items > 1 ? ST(1) : Nullsv;
352 SV * check = items > 2 ? ST(2) : Nullsv;
355 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
358 U8 *s = (U8*)SvPV(sv, len);
361 /* Must do things the slow way */
363 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
366 New(83, dest, len, U8); /* I think */
375 /* Have to do it all ourselves because of error routine,
379 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
380 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
381 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
382 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
383 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
384 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
385 else { ulen = 13; uv = 0; }
387 /* Note change to utf8.c variable naming, for variety */
389 if ((*s & 0xc0) != 0x80)
393 uv = (uv << 6) | (*s++ & 0x3f);
397 call_failure(check, s, dest, src);
398 /* Now what happens? */
404 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
411 _chars_to_utf8(sv, from, ...)
416 SV * check = items == 3 ? ST(2) : Nullsv;
417 RETVAL = &PL_sv_undef;
423 _utf8_to_chars(sv, to, ...)
428 SV * check = items == 3 ? ST(2) : Nullsv;
429 RETVAL = &PL_sv_undef;
435 _utf8_to_chars_check(sv, ...)
439 SV * check = items == 2 ? ST(1) : Nullsv;
440 RETVAL = &PL_sv_undef;
446 _bytes_to_chars(sv, from, ...)
451 SV * check = items == 3 ? ST(2) : Nullsv;
452 RETVAL = &PL_sv_undef;
458 _chars_to_bytes(sv, to, ...)
463 SV * check = items == 3 ? ST(2) : Nullsv;
464 RETVAL = &PL_sv_undef;
470 _from_to(sv, from, to, ...)
476 SV * check = items == 4 ? ST(3) : Nullsv;
477 RETVAL = &PL_sv_undef;
487 SV * check = items == 2 ? ST(1) : Nullsv;
489 RETVAL = SvUTF8(sv) ? 1 : 0;
492 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
507 SV *rsv = newSViv(SvUTF8(sv));
511 RETVAL = &PL_sv_undef;
523 SV *rsv = newSViv(SvUTF8(sv));
527 RETVAL = &PL_sv_undef;
534 _utf_to_utf(sv, from, to, ...)
540 SV * check = items == 4 ? ST(3) : Nullsv;
541 RETVAL = &PL_sv_undef;
549 PerlIO_define_layer(&PerlIO_encode);