1 #define PERL_NO_GET_CONTEXT
13 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
14 Perl_croak(aTHX_ "panic_unimplemented"); \
15 return (y)0; /* fool picky compilers */ \
17 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
18 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
20 #if defined(USE_PERLIO) && !defined(USE_SFIO)
21 /* Define an encoding "layer" in the perliol.h sense.
22 The layer defined here "inherits" in an object-oriented sense from the
23 "perlio" layer with its PerlIOBuf_* "methods".
24 The implementation is particularly efficient as until Encode settles down
25 there is no point in tryint to tune it.
27 The layer works by overloading the "fill" and "flush" methods.
29 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
30 to convert the encoded data to UTF-8 form, then copies it back to the
31 buffer. The "base class's" read methods then see the UTF-8 data.
33 "flush" transforms the UTF-8 data deposited by the "base class's write
34 method in the buffer back into the encoded form using the encode OO perl API,
35 then copies data back into the buffer and calls "SUPER::flush.
37 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
38 so that the the base class's "flush" sees the correct number of encoded chars
39 for positioning the seek pointer. (This double translation is the worst performance
40 issue - particularly with all-perl encode engine.)
49 PerlIOBuf base; /* PerlIOBuf stuff */
55 PerlIOEncode_getarg(pTHX_ PerlIO *f, CLONE_PARAMS *param, int flags)
57 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
58 SV *sv = &PL_sv_undef;
67 if (perl_call_method("name",G_SCALAR) == 1)
78 PerlIOEncode_pushed(PerlIO *f, const char *mode, SV *arg)
80 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
84 code = PerlIOBuf_pushed(f,mode,Nullsv);
90 if (perl_call_pv("Encode::find_encoding",G_SCALAR) != 1)
92 /* should never happen */
93 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
103 Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%"SVf"\"", arg);
108 SvREFCNT_inc(e->enc);
109 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
117 PerlIOEncode_popped(PerlIO *f)
119 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
123 SvREFCNT_dec(e->enc);
128 SvREFCNT_dec(e->bufsv);
135 PerlIOEncode_get_base(PerlIO *f)
137 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
140 e->base.bufsiz = 1024;
143 e->bufsv = newSV(e->base.bufsiz);
144 sv_setpvn(e->bufsv,"",0);
146 e->base.buf = (STDCHAR *)SvPVX(e->bufsv);
148 e->base.ptr = e->base.buf;
150 e->base.end = e->base.buf;
151 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
153 Perl_warn(aTHX_ " ptr %p(%p)%p",
154 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
157 if (SvLEN(e->bufsv) < e->base.bufsiz)
159 SSize_t poff = e->base.ptr - e->base.buf;
160 SSize_t eoff = e->base.end - e->base.buf;
161 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz);
162 e->base.ptr = e->base.buf + poff;
163 e->base.end = e->base.buf + eoff;
165 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
167 Perl_warn(aTHX_ " ptr %p(%p)%p",
168 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
175 PerlIOEncode_fill(PerlIO *f)
177 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
181 code = PerlIOBuf_fill(f);
187 /* Set SV that is the buffer to be buf..ptr */
188 SvCUR_set(e->bufsv, e->base.end - e->base.buf);
189 SvUTF8_off(e->bufsv);
197 if (perl_call_method("decode",G_SCALAR) != 1)
202 /* Now get translated string (forced to UTF-8) and copy back to buffer
203 don't use sv_setsv as that may "steal" PV from returned temp
204 and so free() our known-large-enough buffer.
205 sv_setpvn() should do but let us do it long hand.
207 s = SvPVutf8(uni,len);
208 if (s != SvPVX(e->bufsv))
210 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
211 Move(s,e->base.buf,len,char);
212 SvCUR_set(e->bufsv,len);
215 e->base.end = e->base.buf+len;
216 e->base.ptr = e->base.buf;
224 PerlIOEncode_flush(PerlIO *f)
226 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
228 if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF))
229 &&(e->base.ptr > e->base.buf)
238 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF)
240 /* This is really just a flag to see if we took all the data, if
241 we did PerlIOBase_flush avoids a seek to lower layer.
242 Need to revisit if we start getting clever with unreads or seeks-in-buffer
244 left = e->base.end - e->base.ptr;
250 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
255 if (perl_call_method("encode",G_SCALAR) != 1)
261 if (s != SvPVX(e->bufsv))
263 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
264 Move(s,e->base.buf,len,char);
265 SvCUR_set(e->bufsv,len);
267 SvUTF8_off(e->bufsv);
268 e->base.ptr = e->base.buf+len;
269 /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */
270 e->base.end = e->base.ptr + left;
273 if (PerlIOBuf_flush(f) != 0)
280 PerlIOEncode_close(PerlIO *f)
282 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
283 IV code = PerlIOBase_close(f);
287 SvREFCNT_dec(e->bufsv);
293 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF);
298 PerlIOEncode_tell(PerlIO *f)
301 PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf);
302 /* Unfortunately the only way to get a postion is to back-translate,
303 the UTF8-bytes we have buf..ptr and adjust accordingly.
304 But we will try and save any unread data in case stream
307 if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end)
309 Size_t count = b->end - b->ptr;
310 PerlIO_push(aTHX_ f,&PerlIO_pending,"r",Nullsv);
311 /* Save what we have left to read */
312 PerlIOSelf(f,PerlIOBuf)->bufsiz = count;
313 PerlIO_unread(f,b->ptr,count);
314 /* There isn't any unread data - we just saved it - so avoid the lower seek */
316 /* Flush ourselves - now one layer down,
317 this does the back translate and adjusts position
319 PerlIO_flush(PerlIONext(f));
320 /* Set position of the saved data */
321 PerlIOSelf(f,PerlIOBuf)->posn = b->posn;
331 PerlIOEncode_dup(pTHX_ PerlIO *f, PerlIO *o, CLONE_PARAMS *params, int flags)
333 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags)))
335 PerlIOEncode *fe = PerlIOSelf(f,PerlIOEncode);
336 PerlIOEncode *oe = PerlIOSelf(o,PerlIOEncode);
339 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
345 PerlIO_funcs PerlIO_encode = {
347 sizeof(PerlIOEncode),
366 PerlIOBase_setlinebuf,
367 PerlIOEncode_get_base,
371 PerlIOBuf_set_ptrcnt,
373 #endif /* encode layer */
376 Encode_Define(pTHX_ encode_t *enc)
379 HV *stash = gv_stashpv("Encode::XS", TRUE);
380 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
386 const char *name = enc->name[i++];
387 XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
390 call_pv("Encode::define_encoding",G_DISCARD);
394 void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {}
397 encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check)
400 U8 *s = (U8 *) SvPV(src,slen);
401 SV *dst = sv_2mortal(newSV(2*slen+1));
404 U8 *d = (U8 *) SvGROW(dst, 2*slen+1);
405 STRLEN dlen = SvLEN(dst);
407 while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check)))
412 if (code == ENCODE_FALLBACK)
419 STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN);
420 if (need <= SvLEN(dst))
422 d = (U8 *) SvGROW(dst, need);
429 if (dir == enc->f_utf8)
431 if (!check && ckWARN_d(WARN_UTF8))
434 UV ch = utf8n_to_uvuni(s+slen,(SvCUR(src)-slen),&clen,0);
435 Perl_warner(aTHX_ WARN_UTF8, "\"\\N{U+%"UVxf"}\" does not map to %s", ch, enc->name[0]);
436 /* FIXME: Skip over the character, copy in replacement and continue
437 * but that is messy so for now just fail.
448 /* UTF-8 is supposed to be "Universal" so should not happen */
449 Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
450 enc->name[0], (int)(SvCUR(src)-slen),s+slen);
455 if (!check && ckWARN_d(WARN_UTF8))
457 Perl_warner(aTHX_ WARN_UTF8, "Partial %s character",
458 (dir == enc->f_utf8) ? "UTF-8" : enc->name[0]);
463 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
464 code, (dir == enc->f_utf8) ? "to" : "from",enc->name[0]);
472 if (slen < SvCUR(src))
474 Move(s+slen,s,SvCUR(src)-slen,U8);
476 SvCUR_set(src,SvCUR(src)-slen);
487 MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
492 Method_decode(obj,src,check = FALSE)
498 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
499 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
505 Method_encode(obj,src,check = FALSE)
511 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
512 sv_utf8_upgrade(src);
513 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
517 MODULE = Encode PACKAGE = Encode
522 _bytes_to_utf8(sv, ...)
526 SV * encoding = items == 2 ? ST(1) : Nullsv;
529 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
532 U8* s = (U8*)SvPV(sv, len);
535 converted = bytes_to_utf8(s, &len); /* This allocs */
536 sv_setpvn(sv, (char *)converted, len);
537 SvUTF8_on(sv); /* XXX Should we? */
538 Safefree(converted); /* ... so free it */
546 _utf8_to_bytes(sv, ...)
550 SV * to = items > 1 ? ST(1) : Nullsv;
551 SV * check = items > 2 ? ST(2) : Nullsv;
554 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
557 U8 *s = (U8*)SvPV(sv, len);
561 /* Must do things the slow way */
563 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
566 New(83, dest, len, U8); /* I think */
575 /* Have to do it all ourselves because of error routine,
579 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
580 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
581 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
582 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
583 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
584 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
585 else { ulen = 13; uv = 0; }
587 /* Note change to utf8.c variable naming, for variety */
589 if ((*s & 0xc0) != 0x80)
593 uv = (uv << 6) | (*s++ & 0x3f);
597 call_failure(check, s, dest, src);
598 /* Now what happens? */
604 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
611 is_utf8(sv, check = FALSE)
617 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
620 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
635 SV *rsv = newSViv(SvUTF8(sv));
639 RETVAL = &PL_sv_undef;
651 SV *rsv = newSViv(SvUTF8(sv));
655 RETVAL = &PL_sv_undef;
663 #if defined(USE_PERLIO) && !defined(USE_SFIO)
664 PerlIO_define_layer(aTHX_ &PerlIO_encode);
667 #include "EBCDIC.def"
668 #include "Symbols.def"