1 #define PERL_NO_GET_CONTEXT
13 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
14 Perl_croak(aTHX_ "panic_unimplemented"); \
15 return (y)0; /* fool picky compilers */ \
17 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
18 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
20 #if defined(USE_PERLIO) && !defined(USE_SFIO)
21 /* Define an encoding "layer" in the perliol.h sense.
22 The layer defined here "inherits" in an object-oriented sense from the
23 "perlio" layer with its PerlIOBuf_* "methods".
24 The implementation is particularly efficient as until Encode settles down
25 there is no point in tryint to tune it.
27 The layer works by overloading the "fill" and "flush" methods.
29 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
30 to convert the encoded data to UTF-8 form, then copies it back to the
31 buffer. The "base class's" read methods then see the UTF-8 data.
33 "flush" transforms the UTF-8 data deposited by the "base class's write
34 method in the buffer back into the encoded form using the encode OO perl API,
35 then copies data back into the buffer and calls "SUPER::flush.
37 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
38 so that the the base class's "flush" sees the correct number of encoded chars
39 for positioning the seek pointer. (This double translation is the worst performance
40 issue - particularly with all-perl encode engine.)
49 PerlIOBuf base; /* PerlIOBuf stuff */
55 PerlIOEncode_getarg(PerlIO *f)
58 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
59 SV *sv = &PL_sv_undef;
68 if (perl_call_method("name",G_SCALAR) == 1)
79 PerlIOEncode_pushed(PerlIO *f, const char *mode, SV *arg)
81 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
85 code = PerlIOBuf_pushed(f,mode,Nullsv);
91 if (perl_call_pv("Encode::find_encoding",G_SCALAR) != 1)
93 /* should never happen */
94 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
104 Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%"SVf"\"", arg);
107 SvREFCNT_inc(e->enc);
110 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
115 PerlIOEncode_popped(PerlIO *f)
117 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
121 SvREFCNT_dec(e->enc);
126 SvREFCNT_dec(e->bufsv);
133 PerlIOEncode_get_base(PerlIO *f)
135 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
138 e->base.bufsiz = 1024;
141 e->bufsv = newSV(e->base.bufsiz);
142 sv_setpvn(e->bufsv,"",0);
144 e->base.buf = (STDCHAR *)SvPVX(e->bufsv);
146 e->base.ptr = e->base.buf;
148 e->base.end = e->base.buf;
149 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
151 Perl_warn(aTHX_ " ptr %p(%p)%p",
152 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
155 if (SvLEN(e->bufsv) < e->base.bufsiz)
157 SSize_t poff = e->base.ptr - e->base.buf;
158 SSize_t eoff = e->base.end - e->base.buf;
159 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz);
160 e->base.ptr = e->base.buf + poff;
161 e->base.end = e->base.buf + eoff;
163 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
165 Perl_warn(aTHX_ " ptr %p(%p)%p",
166 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
173 PerlIOEncode_fill(PerlIO *f)
175 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
179 code = PerlIOBuf_fill(f);
185 /* Set SV that is the buffer to be buf..ptr */
186 SvCUR_set(e->bufsv, e->base.end - e->base.buf);
187 SvUTF8_off(e->bufsv);
195 if (perl_call_method("decode",G_SCALAR) != 1)
200 /* Now get translated string (forced to UTF-8) and copy back to buffer
201 don't use sv_setsv as that may "steal" PV from returned temp
202 and so free() our known-large-enough buffer.
203 sv_setpvn() should do but let us do it long hand.
205 s = SvPVutf8(uni,len);
206 if (s != SvPVX(e->bufsv))
208 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
209 Move(s,e->base.buf,len,char);
210 SvCUR_set(e->bufsv,len);
213 e->base.end = e->base.buf+len;
214 e->base.ptr = e->base.buf;
222 PerlIOEncode_flush(PerlIO *f)
224 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
226 if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF))
227 &&(e->base.ptr > e->base.buf)
236 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF)
238 /* This is really just a flag to see if we took all the data, if
239 we did PerlIOBase_flush avoids a seek to lower layer.
240 Need to revisit if we start getting clever with unreads or seeks-in-buffer
242 left = e->base.end - e->base.ptr;
248 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
253 if (perl_call_method("encode",G_SCALAR) != 1)
259 if (s != SvPVX(e->bufsv))
261 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
262 Move(s,e->base.buf,len,char);
263 SvCUR_set(e->bufsv,len);
265 SvUTF8_off(e->bufsv);
266 e->base.ptr = e->base.buf+len;
267 /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */
268 e->base.end = e->base.ptr + left;
271 if (PerlIOBuf_flush(f) != 0)
278 PerlIOEncode_close(PerlIO *f)
280 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
281 IV code = PerlIOBase_close(f);
285 SvREFCNT_dec(e->bufsv);
291 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF);
296 PerlIOEncode_tell(PerlIO *f)
299 PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf);
300 /* Unfortunately the only way to get a postion is to back-translate,
301 the UTF8-bytes we have buf..ptr and adjust accordingly.
302 But we will try and save any unread data in case stream
305 if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end)
307 Size_t count = b->end - b->ptr;
308 PerlIO_push(aTHX_ f,&PerlIO_pending,"r",Nullsv);
309 /* Save what we have left to read */
310 PerlIOSelf(f,PerlIOBuf)->bufsiz = count;
311 PerlIO_unread(f,b->ptr,count);
312 /* There isn't any unread data - we just saved it - so avoid the lower seek */
314 /* Flush ourselves - now one layer down,
315 this does the back translate and adjusts position
317 PerlIO_flush(PerlIONext(f));
318 /* Set position of the saved data */
319 PerlIOSelf(f,PerlIOBuf)->posn = b->posn;
329 PerlIOEncode_dup(pTHX_ PerlIO *f, PerlIO *o, CLONE_PARAMS *params)
331 if ((f = PerlIOBase_dup(aTHX_ f, o, params)))
333 PerlIOEncode *fe = PerlIOSelf(f,PerlIOEncode);
334 PerlIOEncode *oe = PerlIOSelf(o,PerlIOEncode);
337 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
343 PerlIO_funcs PerlIO_encode = {
345 sizeof(PerlIOEncode),
364 PerlIOBase_setlinebuf,
365 PerlIOEncode_get_base,
369 PerlIOBuf_set_ptrcnt,
371 #endif /* encode layer */
374 Encode_Define(pTHX_ encode_t *enc)
377 HV *stash = gv_stashpv("Encode::XS", TRUE);
378 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
384 const char *name = enc->name[i++];
385 XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
388 call_pv("Encode::define_encoding",G_DISCARD);
392 void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {}
395 encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check)
398 U8 *s = (U8 *) SvPV(src,slen);
399 SV *dst = sv_2mortal(newSV(2*slen+1));
402 U8 *d = (U8 *) SvGROW(dst, 2*slen+1);
403 STRLEN dlen = SvLEN(dst);
405 while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check)))
410 if (code == ENCODE_FALLBACK)
417 STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN);
418 if (need <= SvLEN(dst))
420 d = (U8 *) SvGROW(dst, need);
427 if (dir == enc->f_utf8)
429 if (!check && ckWARN_d(WARN_UTF8))
432 UV ch = utf8n_to_uvuni(s+slen,(SvCUR(src)-slen),&clen,0);
433 Perl_warner(aTHX_ WARN_UTF8, "\"\\N{U+%"UVxf"}\" does not map to %s", ch, enc->name[0]);
434 /* FIXME: Skip over the character, copy in replacement and continue
435 * but that is messy so for now just fail.
446 /* UTF-8 is supposed to be "Universal" so should not happen */
447 Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
448 enc->name[0], (int)(SvCUR(src)-slen),s+slen);
453 if (!check && ckWARN_d(WARN_UTF8))
455 Perl_warner(aTHX_ WARN_UTF8, "Partial %s character",
456 (dir == enc->f_utf8) ? "UTF-8" : enc->name[0]);
461 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
462 code, (dir == enc->f_utf8) ? "to" : "from",enc->name[0]);
470 if (slen < SvCUR(src))
472 Move(s+slen,s,SvCUR(src)-slen,U8);
474 SvCUR_set(src,SvCUR(src)-slen);
485 MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
490 Method_decode(obj,src,check = FALSE)
496 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
497 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
503 Method_encode(obj,src,check = FALSE)
509 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
510 sv_utf8_upgrade(src);
511 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
515 MODULE = Encode PACKAGE = Encode
520 _bytes_to_utf8(sv, ...)
524 SV * encoding = items == 2 ? ST(1) : Nullsv;
527 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
530 U8* s = (U8*)SvPV(sv, len);
533 converted = bytes_to_utf8(s, &len); /* This allocs */
534 sv_setpvn(sv, (char *)converted, len);
535 SvUTF8_on(sv); /* XXX Should we? */
536 Safefree(converted); /* ... so free it */
544 _utf8_to_bytes(sv, ...)
548 SV * to = items > 1 ? ST(1) : Nullsv;
549 SV * check = items > 2 ? ST(2) : Nullsv;
552 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
555 U8 *s = (U8*)SvPV(sv, len);
559 /* Must do things the slow way */
561 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
564 New(83, dest, len, U8); /* I think */
573 /* Have to do it all ourselves because of error routine,
577 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
578 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
579 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
580 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
581 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
582 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
583 else { ulen = 13; uv = 0; }
585 /* Note change to utf8.c variable naming, for variety */
587 if ((*s & 0xc0) != 0x80)
591 uv = (uv << 6) | (*s++ & 0x3f);
595 call_failure(check, s, dest, src);
596 /* Now what happens? */
602 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
609 is_utf8(sv, check = FALSE)
615 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
618 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
633 SV *rsv = newSViv(SvUTF8(sv));
637 RETVAL = &PL_sv_undef;
649 SV *rsv = newSViv(SvUTF8(sv));
653 RETVAL = &PL_sv_undef;
661 #if defined(USE_PERLIO) && !defined(USE_SFIO)
662 PerlIO_define_layer(aTHX_ &PerlIO_encode);
665 #include "EBCDIC.def"
666 #include "Symbols.def"