10 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \
11 Perl_croak(aTHX_ "panic_unimplemented"); \
12 return (y)0; /* fool picky compilers */ \
14 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
15 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
17 #if defined(USE_PERLIO) && !defined(USE_SFIO)
18 /* Define an encoding "layer" in the perliol.h sense.
19 The layer defined here "inherits" in an object-oriented sense from the
20 "perlio" layer with its PerlIOBuf_* "methods".
21 The implementation is particularly efficient as until Encode settles down
22 there is no point in tryint to tune it.
24 The layer works by overloading the "fill" and "flush" methods.
26 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
27 to convert the encoded data to UTF-8 form, then copies it back to the
28 buffer. The "base class's" read methods then see the UTF-8 data.
30 "flush" transforms the UTF-8 data deposited by the "base class's write
31 method in the buffer back into the encoded form using the encode OO perl API,
32 then copies data back into the buffer and calls "SUPER::flush.
34 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
35 so that the the base class's "flush" sees the correct number of encoded chars
36 for positioning the seek pointer. (This double translation is the worst performance
37 issue - particularly with all-perl encode engine.)
46 PerlIOBuf base; /* PerlIOBuf stuff */
52 PerlIOEncode_getarg(PerlIO *f)
54 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
55 SV *sv = &PL_sv_undef;
64 if (perl_call_method("name",G_SCALAR) == 1)
75 PerlIOEncode_pushed(PerlIO *f, const char *mode, SV *arg)
77 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
81 code = PerlIOBuf_pushed(f,mode,Nullsv);
87 if (perl_call_pv("Encode::find_encoding",G_SCALAR) != 1)
89 /* should never happen */
90 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
100 Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%"SVf"\"", arg);
103 SvREFCNT_inc(e->enc);
106 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
111 PerlIOEncode_popped(PerlIO *f)
113 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
117 SvREFCNT_dec(e->enc);
122 SvREFCNT_dec(e->bufsv);
129 PerlIOEncode_get_base(PerlIO *f)
131 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
134 e->base.bufsiz = 1024;
137 e->bufsv = newSV(e->base.bufsiz);
138 sv_setpvn(e->bufsv,"",0);
140 e->base.buf = (STDCHAR *)SvPVX(e->bufsv);
142 e->base.ptr = e->base.buf;
144 e->base.end = e->base.buf;
145 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
147 Perl_warn(aTHX_ " ptr %p(%p)%p",
148 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
151 if (SvLEN(e->bufsv) < e->base.bufsiz)
153 SSize_t poff = e->base.ptr - e->base.buf;
154 SSize_t eoff = e->base.end - e->base.buf;
155 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz);
156 e->base.ptr = e->base.buf + poff;
157 e->base.end = e->base.buf + eoff;
159 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
161 Perl_warn(aTHX_ " ptr %p(%p)%p",
162 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
169 PerlIOEncode_fill(PerlIO *f)
171 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
175 code = PerlIOBuf_fill(f);
181 /* Set SV that is the buffer to be buf..ptr */
182 SvCUR_set(e->bufsv, e->base.end - e->base.buf);
183 SvUTF8_off(e->bufsv);
191 if (perl_call_method("decode",G_SCALAR) != 1)
196 /* Now get translated string (forced to UTF-8) and copy back to buffer
197 don't use sv_setsv as that may "steal" PV from returned temp
198 and so free() our known-large-enough buffer.
199 sv_setpvn() should do but let us do it long hand.
201 s = SvPVutf8(uni,len);
202 if (s != SvPVX(e->bufsv))
204 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
205 Move(s,e->base.buf,len,char);
206 SvCUR_set(e->bufsv,len);
209 e->base.end = e->base.buf+len;
210 e->base.ptr = e->base.buf;
218 PerlIOEncode_flush(PerlIO *f)
220 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
222 if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF))
223 &&(e->base.ptr > e->base.buf)
232 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF)
234 /* This is really just a flag to see if we took all the data, if
235 we did PerlIOBase_flush avoids a seek to lower layer.
236 Need to revisit if we start getting clever with unreads or seeks-in-buffer
238 left = e->base.end - e->base.ptr;
244 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
249 if (perl_call_method("encode",G_SCALAR) != 1)
255 if (s != SvPVX(e->bufsv))
257 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
258 Move(s,e->base.buf,len,char);
259 SvCUR_set(e->bufsv,len);
261 SvUTF8_off(e->bufsv);
262 e->base.ptr = e->base.buf+len;
263 /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */
264 e->base.end = e->base.ptr + left;
267 if (PerlIOBuf_flush(f) != 0)
274 PerlIOEncode_close(PerlIO *f)
276 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
277 IV code = PerlIOBase_close(f);
281 SvREFCNT_dec(e->bufsv);
287 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF);
292 PerlIOEncode_tell(PerlIO *f)
295 PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf);
296 /* Unfortunately the only way to get a postion is to back-translate,
297 the UTF8-bytes we have buf..ptr and adjust accordingly.
298 But we will try and save any unread data in case stream
301 if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end)
303 Size_t count = b->end - b->ptr;
304 PerlIO_push(aTHX_ f,&PerlIO_pending,"r",Nullsv);
305 /* Save what we have left to read */
306 PerlIOSelf(f,PerlIOBuf)->bufsiz = count;
307 PerlIO_unread(f,b->ptr,count);
308 /* There isn't any unread data - we just saved it - so avoid the lower seek */
310 /* Flush ourselves - now one layer down,
311 this does the back translate and adjusts position
313 PerlIO_flush(PerlIONext(f));
314 /* Set position of the saved data */
315 PerlIOSelf(f,PerlIOBuf)->posn = b->posn;
324 PerlIO_funcs PerlIO_encode = {
326 sizeof(PerlIOEncode),
344 PerlIOBase_setlinebuf,
345 PerlIOEncode_get_base,
349 PerlIOBuf_set_ptrcnt,
351 #endif /* encode layer */
354 Encode_Define(pTHX_ encode_t *enc)
357 HV *stash = gv_stashpv("Encode::XS", TRUE);
358 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
364 const char *name = enc->name[i++];
365 XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
368 call_pv("Encode::define_encoding",G_DISCARD);
372 void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {}
375 encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check)
378 U8 *s = (U8 *) SvPV(src,slen);
379 SV *dst = sv_2mortal(newSV(2*slen+1));
382 U8 *d = (U8 *) SvGROW(dst, 2*slen+1);
383 STRLEN dlen = SvLEN(dst);
385 while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check)))
390 if (code == ENCODE_FALLBACK)
397 STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN);
398 if (need <= SvLEN(dst))
400 d = (U8 *) SvGROW(dst, need);
407 if (dir == enc->f_utf8)
409 if (!check && ckWARN_d(WARN_UTF8))
412 UV ch = utf8n_to_uvuni(s+slen,(SvCUR(src)-slen),&clen,0);
413 Perl_warner(aTHX_ WARN_UTF8, "\"\\N{U+%"UVxf"}\" does not map to %s", ch, enc->name[0]);
414 /* FIXME: Skip over the character, copy in replacement and continue
415 * but that is messy so for now just fail.
426 /* UTF-8 is supposed to be "Universal" so should not happen */
427 Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
428 enc->name[0], (int)(SvCUR(src)-slen),s+slen);
433 if (!check && ckWARN_d(WARN_UTF8))
435 Perl_warner(aTHX_ WARN_UTF8, "Partial %s character",
436 (dir == enc->f_utf8) ? "UTF-8" : enc->name[0]);
441 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
442 code, (dir == enc->f_utf8) ? "to" : "from",enc->name[0]);
450 if (slen < SvCUR(src))
452 Move(s+slen,s,SvCUR(src)-slen,U8);
454 SvCUR_set(src,SvCUR(src)-slen);
465 MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
470 Method_decode(obj,src,check = FALSE)
476 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
477 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
483 Method_encode(obj,src,check = FALSE)
489 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
490 sv_utf8_upgrade(src);
491 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
495 MODULE = Encode PACKAGE = Encode
500 _bytes_to_utf8(sv, ...)
504 SV * encoding = items == 2 ? ST(1) : Nullsv;
507 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
510 U8* s = (U8*)SvPV(sv, len);
513 converted = bytes_to_utf8(s, &len); /* This allocs */
514 sv_setpvn(sv, (char *)converted, len);
515 SvUTF8_on(sv); /* XXX Should we? */
516 Safefree(converted); /* ... so free it */
524 _utf8_to_bytes(sv, ...)
528 SV * to = items > 1 ? ST(1) : Nullsv;
529 SV * check = items > 2 ? ST(2) : Nullsv;
532 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
535 U8 *s = (U8*)SvPV(sv, len);
538 /* Must do things the slow way */
540 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
543 New(83, dest, len, U8); /* I think */
552 /* Have to do it all ourselves because of error routine,
556 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
557 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
558 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
559 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
560 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
561 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
562 else { ulen = 13; uv = 0; }
564 /* Note change to utf8.c variable naming, for variety */
566 if ((*s & 0xc0) != 0x80)
570 uv = (uv << 6) | (*s++ & 0x3f);
574 call_failure(check, s, dest, src);
575 /* Now what happens? */
581 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
588 is_utf8(sv, check = FALSE)
594 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
597 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
612 SV *rsv = newSViv(SvUTF8(sv));
616 RETVAL = &PL_sv_undef;
628 SV *rsv = newSViv(SvUTF8(sv));
632 RETVAL = &PL_sv_undef;
640 #if defined(USE_PERLIO) && !defined(USE_SFIO)
641 PerlIO_define_layer(aTHX_ &PerlIO_encode);
643 #include "iso8859.def"
644 #include "EBCDIC.def"
645 #include "Symbols.def"