10 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \
11 Perl_croak(aTHX_ "panic_unimplemented"); \
12 return (y)0; /* fool picky compilers */ \
14 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
15 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
17 #if defined(USE_PERLIO) && !defined(USE_SFIO)
18 /* Define an encoding "layer" in the perliol.h sense.
19 The layer defined here "inherits" in an object-oriented sense from the
20 "perlio" layer with its PerlIOBuf_* "methods".
21 The implementation is particularly efficient as until Encode settles down
22 there is no point in tryint to tune it.
24 The layer works by overloading the "fill" and "flush" methods.
26 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
27 to convert the encoded data to UTF-8 form, then copies it back to the
28 buffer. The "base class's" read methods then see the UTF-8 data.
30 "flush" transforms the UTF-8 data deposited by the "base class's write
31 method in the buffer back into the encoded form using the encode OO perl API,
32 then copies data back into the buffer and calls "SUPER::flush.
34 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
35 so that the the base class's "flush" sees the correct number of encoded chars
36 for positioning the seek pointer. (This double translation is the worst performance
37 issue - particularly with all-perl encode engine.)
46 PerlIOBuf base; /* PerlIOBuf stuff */
53 PerlIOEncode_pushed(PerlIO *f, const char *mode,const char *arg,STRLEN len)
55 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
59 code = PerlIOBuf_pushed(f,mode,Nullch,0);
63 XPUSHs(sv_2mortal(newSVpvn(arg,len)));
65 if (perl_call_pv("Encode::find_encoding",G_SCALAR) != 1)
67 /* should never happen */
68 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
78 Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%.*s\"", (int) len, arg);
84 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
89 PerlIOEncode_popped(PerlIO *f)
91 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
100 SvREFCNT_dec(e->bufsv);
107 PerlIOEncode_get_base(PerlIO *f)
109 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
112 e->base.bufsiz = 1024;
115 e->bufsv = newSV(e->base.bufsiz);
116 sv_setpvn(e->bufsv,"",0);
118 e->base.buf = (STDCHAR *)SvPVX(e->bufsv);
120 e->base.ptr = e->base.buf;
122 e->base.end = e->base.buf;
123 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
125 Perl_warn(aTHX_ " ptr %p(%p)%p",
126 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
129 if (SvLEN(e->bufsv) < e->base.bufsiz)
131 SSize_t poff = e->base.ptr - e->base.buf;
132 SSize_t eoff = e->base.end - e->base.buf;
133 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz);
134 e->base.ptr = e->base.buf + poff;
135 e->base.end = e->base.buf + eoff;
137 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
139 Perl_warn(aTHX_ " ptr %p(%p)%p",
140 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
147 PerlIOEncode_fill(PerlIO *f)
149 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
153 code = PerlIOBuf_fill(f);
159 /* Set SV that is the buffer to be buf..ptr */
160 SvCUR_set(e->bufsv, e->base.end - e->base.buf);
161 SvUTF8_off(e->bufsv);
169 if (perl_call_method("decode",G_SCALAR) != 1)
174 /* Now get translated string (forced to UTF-8) and copy back to buffer
175 don't use sv_setsv as that may "steal" PV from returned temp
176 and so free() our known-large-enough buffer.
177 sv_setpvn() should do but let us do it long hand.
179 s = SvPVutf8(uni,len);
180 if (s != SvPVX(e->bufsv))
182 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
183 Move(s,e->base.buf,len,char);
184 SvCUR_set(e->bufsv,len);
187 e->base.end = e->base.buf+len;
188 e->base.ptr = e->base.buf;
196 PerlIOEncode_flush(PerlIO *f)
198 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
201 if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF)))
208 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF)
210 /* This is really just a flag to see if we took all the data, if
211 we did PerlIOBase_flush avoids a seek to lower layer.
212 Need to revisit if we start getting clever with unreads or seeks-in-buffer
214 left = e->base.end - e->base.ptr;
220 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
225 if (perl_call_method("encode",G_SCALAR) != 1)
231 if (s != SvPVX(e->bufsv))
233 e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
234 Move(s,e->base.buf,len,char);
235 SvCUR_set(e->bufsv,len);
237 SvUTF8_off(e->bufsv);
238 e->base.ptr = e->base.buf+len;
239 /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */
240 e->base.end = e->base.ptr + left;
243 if (PerlIOBuf_flush(f) != 0)
250 PerlIOEncode_close(PerlIO *f)
252 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
253 IV code = PerlIOBase_close(f);
257 SvREFCNT_dec(e->bufsv);
263 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF);
268 PerlIOEncode_tell(PerlIO *f)
270 PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf);
271 /* Unfortunately the only way to get a postion is to back-translate,
272 the UTF8-bytes we have buf..ptr and adjust accordingly.
273 But we will try and save any unread data in case stream
276 if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end)
278 Size_t count = b->end - b->ptr;
279 PerlIO_push(f,&PerlIO_pending,"r",Nullch,0);
280 /* Save what we have left to read */
281 PerlIOSelf(f,PerlIOBuf)->bufsiz = count;
282 PerlIO_unread(f,b->ptr,count);
283 /* There isn't any unread data - we just saved it - so avoid the lower seek */
285 /* Flush ourselves - now one layer down,
286 this does the back translate and adjusts position
288 PerlIO_flush(PerlIONext(f));
289 /* Set position of the saved data */
290 PerlIOSelf(f,PerlIOBuf)->posn = b->posn;
299 PerlIO_funcs PerlIO_encode = {
301 sizeof(PerlIOEncode),
320 PerlIOBuf_setlinebuf,
321 PerlIOEncode_get_base,
325 PerlIOBuf_set_ptrcnt,
330 Encode_Define(pTHX_ encode_t *enc)
333 HV *stash = gv_stashpv("Encode::XS", TRUE);
334 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
340 const char *name = enc->name[i++];
341 XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
344 call_pv("Encode::define_encoding",G_DISCARD);
348 void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {}
351 encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check)
354 U8 *s = (U8 *) SvPV(src,slen);
355 SV *dst = sv_2mortal(newSV(2*slen+1));
358 U8 *d = (U8 *) SvGROW(dst, 2*slen+1);
359 STRLEN dlen = SvLEN(dst);
361 while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check)))
366 if (code == ENCODE_FALLBACK)
373 STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN);
374 if (need <= SvLEN(dst))
376 d = (U8 *) SvGROW(dst, need);
383 if (dir == enc->f_utf8)
385 if (!check && ckWARN_d(WARN_UTF8))
388 UV ch = utf8n_to_uvuni(s+slen,(SvCUR(src)-slen),&clen,0);
389 Perl_warner(aTHX_ WARN_UTF8, "\"\\N{U+%"UVxf"}\" does not map to %s", ch, enc->name[0]);
390 /* FIXME: Skip over the character, copy in replacement and continue
391 * but that is messy so for now just fail.
402 /* UTF-8 is supposed to be "Universal" so should not happen */
403 Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
404 enc->name[0], (int)(SvCUR(src)-slen),s+slen);
409 if (!check && ckWARN_d(WARN_UTF8))
411 Perl_warner(aTHX_ WARN_UTF8, "Partial %s character",
412 (dir == enc->f_utf8) ? "UTF-8" : enc->name[0]);
417 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
418 code, (dir == enc->f_utf8) ? "to" : "from",enc->name[0]);
426 if (slen < SvCUR(src))
428 Move(s+slen,s,SvCUR(src)-slen,U8);
430 SvCUR_set(src,SvCUR(src)-slen);
436 MODULE = Encode PACKAGE = Encode PREFIX = sv_
444 char *s = SvPV(sv,len);
445 if (!SvUTF8(sv) || is_utf8_string((U8*)s,len))
464 sv_utf8_downgrade(sv,failok=0)
468 MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
473 Method_decode(obj,src,check = 0)
479 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
480 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
486 Method_encode(obj,src,check = 0)
492 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
493 sv_utf8_upgrade(src);
494 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
498 MODULE = Encode PACKAGE = Encode
503 _bytes_to_utf8(sv, ...)
507 SV * encoding = items == 2 ? ST(1) : Nullsv;
510 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
513 U8* s = (U8*)SvPV(sv, len);
516 converted = bytes_to_utf8(s, &len); /* This allocs */
517 sv_setpvn(sv, (char *)converted, len);
518 SvUTF8_on(sv); /* XXX Should we? */
519 Safefree(converted); /* ... so free it */
527 _utf8_to_bytes(sv, ...)
531 SV * to = items > 1 ? ST(1) : Nullsv;
532 SV * check = items > 2 ? ST(2) : Nullsv;
535 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
538 U8 *s = (U8*)SvPV(sv, len);
541 /* Must do things the slow way */
543 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
546 New(83, dest, len, U8); /* I think */
555 /* Have to do it all ourselves because of error routine,
559 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
560 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
561 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
562 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
563 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
564 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
565 else { ulen = 13; uv = 0; }
567 /* Note change to utf8.c variable naming, for variety */
569 if ((*s & 0xc0) != 0x80)
573 uv = (uv << 6) | (*s++ & 0x3f);
577 call_failure(check, s, dest, src);
578 /* Now what happens? */
584 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
591 is_utf8(sv, check = FALSE)
597 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
600 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
615 SV *rsv = newSViv(SvUTF8(sv));
619 RETVAL = &PL_sv_undef;
631 SV *rsv = newSViv(SvUTF8(sv));
635 RETVAL = &PL_sv_undef;
643 #if defined(USE_PERLIO) && !defined(USE_SFIO)
644 PerlIO_define_layer(&PerlIO_encode);
646 #include "iso8859.def"
647 #include "EBCDIC.def"
648 #include "Symbols.def"