1 #define PERL_NO_GET_CONTEXT
13 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
14 Perl_croak(aTHX_ "panic_unimplemented"); \
15 return (y)0; /* fool picky compilers */ \
17 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
18 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
20 #if defined(USE_PERLIO) && !defined(USE_SFIO)
21 /* Define an encoding "layer" in the perliol.h sense.
22 The layer defined here "inherits" in an object-oriented sense from the
23 "perlio" layer with its PerlIOBuf_* "methods".
24 The implementation is particularly efficient as until Encode settles down
25 there is no point in tryint to tune it.
27 The layer works by overloading the "fill" and "flush" methods.
29 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
30 to convert the encoded data to UTF-8 form, then copies it back to the
31 buffer. The "base class's" read methods then see the UTF-8 data.
33 "flush" transforms the UTF-8 data deposited by the "base class's write
34 method in the buffer back into the encoded form using the encode OO perl API,
35 then copies data back into the buffer and calls "SUPER::flush.
37 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
38 so that the the base class's "flush" sees the correct number of encoded chars
39 for positioning the seek pointer. (This double translation is the worst performance
40 issue - particularly with all-perl encode engine.)
49 PerlIOBuf base; /* PerlIOBuf stuff */
55 PerlIOEncode_getarg(pTHX_ PerlIO *f, CLONE_PARAMS *param, int flags)
57 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
58 SV *sv = &PL_sv_undef;
67 if (perl_call_method("name",G_SCALAR) == 1)
78 PerlIOEncode_pushed(pTHX_ PerlIO *f, const char *mode, SV *arg)
80 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
83 code = PerlIOBuf_pushed(aTHX_ f,mode,Nullsv);
89 if (perl_call_pv("Encode::find_encoding",G_SCALAR) != 1)
91 /* should never happen */
92 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
102 Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%"SVf"\"", arg);
107 SvREFCNT_inc(e->enc);
108 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
116 PerlIOEncode_popped(pTHX_ PerlIO *f)
118 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
121 SvREFCNT_dec(e->enc);
126 SvREFCNT_dec(e->bufsv);
133 PerlIOEncode_get_base(pTHX_ PerlIO *f)
135 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
137 e->base.bufsiz = 1024;
140 e->bufsv = newSV(e->base.bufsiz);
141 sv_setpvn(e->bufsv,"",0);
143 e->base.buf = (STDCHAR *)SvPVX(e->bufsv);
145 e->base.ptr = e->base.buf;
147 e->base.end = e->base.buf;
148 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
150 Perl_warn(aTHX_ " ptr %p(%p)%p",
151 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
154 if (SvLEN(e->bufsv) < e->base.bufsiz)
156 SSize_t poff = e->base.ptr - e->base.buf;
157 SSize_t eoff = e->base.end - e->base.buf;
158 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz);
159 e->base.ptr = e->base.buf + poff;
160 e->base.end = e->base.buf + eoff;
162 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
164 Perl_warn(aTHX_ " ptr %p(%p)%p",
165 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
172 PerlIOEncode_fill(pTHX_ PerlIO *f)
174 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
177 code = PerlIOBuf_fill(aTHX_ f);
183 /* Set SV that is the buffer to be buf..ptr */
184 SvCUR_set(e->bufsv, e->base.end - e->base.buf);
185 SvUTF8_off(e->bufsv);
193 if (perl_call_method("decode",G_SCALAR) != 1)
198 /* Now get translated string (forced to UTF-8) and copy back to buffer
199 don't use sv_setsv as that may "steal" PV from returned temp
200 and so free() our known-large-enough buffer.
201 sv_setpvn() should do but let us do it long hand.
203 s = SvPVutf8(uni,len);
204 if (s != SvPVX(e->bufsv))
206 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
207 Move(s,e->base.buf,len,char);
208 SvCUR_set(e->bufsv,len);
211 e->base.end = e->base.buf+len;
212 e->base.ptr = e->base.buf;
220 PerlIOEncode_flush(pTHX_ PerlIO *f)
222 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
224 if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF))
225 &&(e->base.ptr > e->base.buf)
233 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF)
235 /* This is really just a flag to see if we took all the data, if
236 we did PerlIOBase_flush avoids a seek to lower layer.
237 Need to revisit if we start getting clever with unreads or seeks-in-buffer
239 left = e->base.end - e->base.ptr;
245 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
250 if (perl_call_method("encode",G_SCALAR) != 1)
256 if (s != SvPVX(e->bufsv))
258 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
259 Move(s,e->base.buf,len,char);
260 SvCUR_set(e->bufsv,len);
262 SvUTF8_off(e->bufsv);
263 e->base.ptr = e->base.buf+len;
264 /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */
265 e->base.end = e->base.ptr + left;
268 if (PerlIOBuf_flush(aTHX_ f) != 0)
275 PerlIOEncode_close(pTHX_ PerlIO *f)
277 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
278 IV code = PerlIOBase_close(aTHX_ f);
281 SvREFCNT_dec(e->bufsv);
287 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF);
292 PerlIOEncode_tell(pTHX_ PerlIO *f)
294 PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf);
295 /* Unfortunately the only way to get a postion is to back-translate,
296 the UTF8-bytes we have buf..ptr and adjust accordingly.
297 But we will try and save any unread data in case stream
300 if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end)
302 Size_t count = b->end - b->ptr;
303 PerlIO_push(aTHX_ f,&PerlIO_pending,"r",Nullsv);
304 /* Save what we have left to read */
305 PerlIOSelf(f,PerlIOBuf)->bufsiz = count;
306 PerlIO_unread(f,b->ptr,count);
307 /* There isn't any unread data - we just saved it - so avoid the lower seek */
309 /* Flush ourselves - now one layer down,
310 this does the back translate and adjusts position
312 PerlIO_flush(PerlIONext(f));
313 /* Set position of the saved data */
314 PerlIOSelf(f,PerlIOBuf)->posn = b->posn;
324 PerlIOEncode_dup(pTHX_ PerlIO *f, PerlIO *o, CLONE_PARAMS *params, int flags)
326 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags)))
328 PerlIOEncode *fe = PerlIOSelf(f,PerlIOEncode);
329 PerlIOEncode *oe = PerlIOSelf(o,PerlIOEncode);
332 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
338 PerlIO_funcs PerlIO_encode = {
340 sizeof(PerlIOEncode),
359 PerlIOBase_setlinebuf,
360 PerlIOEncode_get_base,
364 PerlIOBuf_set_ptrcnt,
366 #endif /* encode layer */
369 Encode_XSEncoding(pTHX_ encode_t *enc)
372 HV *stash = gv_stashpv("Encode::XS", TRUE);
373 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
379 const char *name = enc->name[i++];
380 XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
383 call_pv("Encode::define_encoding",G_DISCARD);
387 void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {}
390 encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check)
393 U8 *s = (U8 *) SvPV(src,slen);
394 SV *dst = sv_2mortal(newSV(2*slen+1));
397 U8 *d = (U8 *) SvGROW(dst, 2*slen+1);
398 STRLEN dlen = SvLEN(dst);
400 while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check)))
405 if (code == ENCODE_FALLBACK)
412 STRLEN need = dlen + UTF8_MAXLEN * 128; /* 128 is too big or small? */
413 d = (U8 *) SvGROW(dst, need);
414 if (dlen >= SvLEN(dst))
416 Perl_croak(aTHX_ "Destination couldn't be grown (the need may be miscalculated).");
424 if (dir == enc->f_utf8)
426 if (!check && ckWARN_d(WARN_UTF8))
429 UV ch = utf8n_to_uvuni(s+slen,(SvCUR(src)-slen),&clen,0);
430 Perl_warner(aTHX_ WARN_UTF8, "\"\\N{U+%"UVxf"}\" does not map to %s", ch, enc->name[0]);
431 /* FIXME: Skip over the character, copy in replacement and continue
432 * but that is messy so for now just fail.
443 /* UTF-8 is supposed to be "Universal" so should not happen */
444 Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
445 enc->name[0], (int)(SvCUR(src)-slen),s+slen);
450 if (!check && ckWARN_d(WARN_UTF8))
452 Perl_warner(aTHX_ WARN_UTF8, "Partial %s character",
453 (dir == enc->f_utf8) ? "UTF-8" : enc->name[0]);
458 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
459 code, (dir == enc->f_utf8) ? "to" : "from",enc->name[0]);
467 if (slen < SvCUR(src))
469 Move(s+slen,s,SvCUR(src)-slen,U8);
471 SvCUR_set(src,SvCUR(src)-slen);
482 MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
487 Method_decode(obj,src,check = FALSE)
493 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
494 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
500 Method_encode(obj,src,check = FALSE)
506 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
507 sv_utf8_upgrade(src);
508 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
512 MODULE = Encode PACKAGE = Encode
517 _bytes_to_utf8(sv, ...)
521 SV * encoding = items == 2 ? ST(1) : Nullsv;
524 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
527 U8* s = (U8*)SvPV(sv, len);
530 converted = bytes_to_utf8(s, &len); /* This allocs */
531 sv_setpvn(sv, (char *)converted, len);
532 SvUTF8_on(sv); /* XXX Should we? */
533 Safefree(converted); /* ... so free it */
541 _utf8_to_bytes(sv, ...)
545 SV * to = items > 1 ? ST(1) : Nullsv;
546 SV * check = items > 2 ? ST(2) : Nullsv;
549 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
552 U8 *s = (U8*)SvPV(sv, len);
556 /* Must do things the slow way */
558 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
561 New(83, dest, len, U8); /* I think */
570 /* Have to do it all ourselves because of error routine,
574 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
575 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
576 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
577 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
578 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
579 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
580 else { ulen = 13; uv = 0; }
582 /* Note change to utf8.c variable naming, for variety */
584 if ((*s & 0xc0) != 0x80)
588 uv = (uv << 6) | (*s++ & 0x3f);
592 call_failure(check, s, dest, src);
593 /* Now what happens? */
599 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
606 is_utf8(sv, check = FALSE)
611 if (SvGMAGICAL(sv)) /* it could be $1, for example */
612 sv = newSVsv(sv); /* GMAGIG will be done */
614 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
617 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
623 SvREFCNT_dec(sv); /* it was a temp copy */
634 SV *rsv = newSViv(SvUTF8(sv));
638 RETVAL = &PL_sv_undef;
650 SV *rsv = newSViv(SvUTF8(sv));
654 RETVAL = &PL_sv_undef;
662 #if defined(USE_PERLIO) && !defined(USE_SFIO)
663 PerlIO_define_layer(aTHX_ &PerlIO_encode);
665 #include "8859_def.h"
666 #include "EBCDIC_def.h"
667 #include "Symbols_def.h"