1 #define PERL_NO_GET_CONTEXT
13 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
14 Perl_croak(aTHX_ "panic_unimplemented"); \
15 return (y)0; /* fool picky compilers */ \
17 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
18 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
20 #if defined(USE_PERLIO) && !defined(USE_SFIO)
21 /* Define an encoding "layer" in the perliol.h sense.
22 The layer defined here "inherits" in an object-oriented sense from the
23 "perlio" layer with its PerlIOBuf_* "methods".
24 The implementation is particularly efficient as until Encode settles down
25 there is no point in tryint to tune it.
27 The layer works by overloading the "fill" and "flush" methods.
29 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
30 to convert the encoded data to UTF-8 form, then copies it back to the
31 buffer. The "base class's" read methods then see the UTF-8 data.
33 "flush" transforms the UTF-8 data deposited by the "base class's write
34 method in the buffer back into the encoded form using the encode OO perl API,
35 then copies data back into the buffer and calls "SUPER::flush.
37 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
38 so that the the base class's "flush" sees the correct number of encoded chars
39 for positioning the seek pointer. (This double translation is the worst performance
40 issue - particularly with all-perl encode engine.)
49 PerlIOBuf base; /* PerlIOBuf stuff */
55 PerlIOEncode_getarg(PerlIO *f)
58 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
59 SV *sv = &PL_sv_undef;
68 if (perl_call_method("name",G_SCALAR) == 1)
79 PerlIOEncode_pushed(PerlIO *f, const char *mode, SV *arg)
81 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
85 code = PerlIOBuf_pushed(f,mode,Nullsv);
91 if (perl_call_pv("Encode::find_encoding",G_SCALAR) != 1)
93 /* should never happen */
94 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
104 Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%"SVf"\"", arg);
109 SvREFCNT_inc(e->enc);
110 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
118 PerlIOEncode_popped(PerlIO *f)
120 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
124 SvREFCNT_dec(e->enc);
129 SvREFCNT_dec(e->bufsv);
136 PerlIOEncode_get_base(PerlIO *f)
138 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
141 e->base.bufsiz = 1024;
144 e->bufsv = newSV(e->base.bufsiz);
145 sv_setpvn(e->bufsv,"",0);
147 e->base.buf = (STDCHAR *)SvPVX(e->bufsv);
149 e->base.ptr = e->base.buf;
151 e->base.end = e->base.buf;
152 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
154 Perl_warn(aTHX_ " ptr %p(%p)%p",
155 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
158 if (SvLEN(e->bufsv) < e->base.bufsiz)
160 SSize_t poff = e->base.ptr - e->base.buf;
161 SSize_t eoff = e->base.end - e->base.buf;
162 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz);
163 e->base.ptr = e->base.buf + poff;
164 e->base.end = e->base.buf + eoff;
166 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
168 Perl_warn(aTHX_ " ptr %p(%p)%p",
169 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
176 PerlIOEncode_fill(PerlIO *f)
178 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
182 code = PerlIOBuf_fill(f);
188 /* Set SV that is the buffer to be buf..ptr */
189 SvCUR_set(e->bufsv, e->base.end - e->base.buf);
190 SvUTF8_off(e->bufsv);
198 if (perl_call_method("decode",G_SCALAR) != 1)
203 /* Now get translated string (forced to UTF-8) and copy back to buffer
204 don't use sv_setsv as that may "steal" PV from returned temp
205 and so free() our known-large-enough buffer.
206 sv_setpvn() should do but let us do it long hand.
208 s = SvPVutf8(uni,len);
209 if (s != SvPVX(e->bufsv))
211 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
212 Move(s,e->base.buf,len,char);
213 SvCUR_set(e->bufsv,len);
216 e->base.end = e->base.buf+len;
217 e->base.ptr = e->base.buf;
225 PerlIOEncode_flush(PerlIO *f)
227 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
229 if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF))
230 &&(e->base.ptr > e->base.buf)
239 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF)
241 /* This is really just a flag to see if we took all the data, if
242 we did PerlIOBase_flush avoids a seek to lower layer.
243 Need to revisit if we start getting clever with unreads or seeks-in-buffer
245 left = e->base.end - e->base.ptr;
251 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
256 if (perl_call_method("encode",G_SCALAR) != 1)
262 if (s != SvPVX(e->bufsv))
264 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
265 Move(s,e->base.buf,len,char);
266 SvCUR_set(e->bufsv,len);
268 SvUTF8_off(e->bufsv);
269 e->base.ptr = e->base.buf+len;
270 /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */
271 e->base.end = e->base.ptr + left;
274 if (PerlIOBuf_flush(f) != 0)
281 PerlIOEncode_close(PerlIO *f)
283 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
284 IV code = PerlIOBase_close(f);
288 SvREFCNT_dec(e->bufsv);
294 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF);
299 PerlIOEncode_tell(PerlIO *f)
302 PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf);
303 /* Unfortunately the only way to get a postion is to back-translate,
304 the UTF8-bytes we have buf..ptr and adjust accordingly.
305 But we will try and save any unread data in case stream
308 if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end)
310 Size_t count = b->end - b->ptr;
311 PerlIO_push(aTHX_ f,&PerlIO_pending,"r",Nullsv);
312 /* Save what we have left to read */
313 PerlIOSelf(f,PerlIOBuf)->bufsiz = count;
314 PerlIO_unread(f,b->ptr,count);
315 /* There isn't any unread data - we just saved it - so avoid the lower seek */
317 /* Flush ourselves - now one layer down,
318 this does the back translate and adjusts position
320 PerlIO_flush(PerlIONext(f));
321 /* Set position of the saved data */
322 PerlIOSelf(f,PerlIOBuf)->posn = b->posn;
332 PerlIOEncode_dup(pTHX_ PerlIO *f, PerlIO *o, CLONE_PARAMS *params)
334 if ((f = PerlIOBase_dup(aTHX_ f, o, params)))
336 PerlIOEncode *fe = PerlIOSelf(f,PerlIOEncode);
337 PerlIOEncode *oe = PerlIOSelf(o,PerlIOEncode);
340 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
346 PerlIO_funcs PerlIO_encode = {
348 sizeof(PerlIOEncode),
367 PerlIOBase_setlinebuf,
368 PerlIOEncode_get_base,
372 PerlIOBuf_set_ptrcnt,
374 #endif /* encode layer */
377 Encode_Define(pTHX_ encode_t *enc)
380 HV *stash = gv_stashpv("Encode::XS", TRUE);
381 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
387 const char *name = enc->name[i++];
388 XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
391 call_pv("Encode::define_encoding",G_DISCARD);
395 void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {}
398 encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check)
401 U8 *s = (U8 *) SvPV(src,slen);
402 SV *dst = sv_2mortal(newSV(2*slen+1));
405 U8 *d = (U8 *) SvGROW(dst, 2*slen+1);
406 STRLEN dlen = SvLEN(dst);
408 while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check)))
413 if (code == ENCODE_FALLBACK)
420 STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN);
421 if (need <= SvLEN(dst))
423 d = (U8 *) SvGROW(dst, need);
430 if (dir == enc->f_utf8)
432 if (!check && ckWARN_d(WARN_UTF8))
435 UV ch = utf8n_to_uvuni(s+slen,(SvCUR(src)-slen),&clen,0);
436 Perl_warner(aTHX_ WARN_UTF8, "\"\\N{U+%"UVxf"}\" does not map to %s", ch, enc->name[0]);
437 /* FIXME: Skip over the character, copy in replacement and continue
438 * but that is messy so for now just fail.
449 /* UTF-8 is supposed to be "Universal" so should not happen */
450 Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
451 enc->name[0], (int)(SvCUR(src)-slen),s+slen);
456 if (!check && ckWARN_d(WARN_UTF8))
458 Perl_warner(aTHX_ WARN_UTF8, "Partial %s character",
459 (dir == enc->f_utf8) ? "UTF-8" : enc->name[0]);
464 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
465 code, (dir == enc->f_utf8) ? "to" : "from",enc->name[0]);
473 if (slen < SvCUR(src))
475 Move(s+slen,s,SvCUR(src)-slen,U8);
477 SvCUR_set(src,SvCUR(src)-slen);
488 MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
493 Method_decode(obj,src,check = FALSE)
499 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
500 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
506 Method_encode(obj,src,check = FALSE)
512 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
513 sv_utf8_upgrade(src);
514 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
518 MODULE = Encode PACKAGE = Encode
523 _bytes_to_utf8(sv, ...)
527 SV * encoding = items == 2 ? ST(1) : Nullsv;
530 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
533 U8* s = (U8*)SvPV(sv, len);
536 converted = bytes_to_utf8(s, &len); /* This allocs */
537 sv_setpvn(sv, (char *)converted, len);
538 SvUTF8_on(sv); /* XXX Should we? */
539 Safefree(converted); /* ... so free it */
547 _utf8_to_bytes(sv, ...)
551 SV * to = items > 1 ? ST(1) : Nullsv;
552 SV * check = items > 2 ? ST(2) : Nullsv;
555 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
558 U8 *s = (U8*)SvPV(sv, len);
562 /* Must do things the slow way */
564 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
567 New(83, dest, len, U8); /* I think */
576 /* Have to do it all ourselves because of error routine,
580 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
581 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
582 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
583 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
584 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
585 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
586 else { ulen = 13; uv = 0; }
588 /* Note change to utf8.c variable naming, for variety */
590 if ((*s & 0xc0) != 0x80)
594 uv = (uv << 6) | (*s++ & 0x3f);
598 call_failure(check, s, dest, src);
599 /* Now what happens? */
605 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
612 is_utf8(sv, check = FALSE)
618 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
621 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
636 SV *rsv = newSViv(SvUTF8(sv));
640 RETVAL = &PL_sv_undef;
652 SV *rsv = newSViv(SvUTF8(sv));
656 RETVAL = &PL_sv_undef;
664 #if defined(USE_PERLIO) && !defined(USE_SFIO)
665 PerlIO_define_layer(aTHX_ &PerlIO_encode);
668 #include "EBCDIC.def"
669 #include "Symbols.def"