1 #define PERL_NO_GET_CONTEXT
13 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
14 Perl_croak(aTHX_ "panic_unimplemented"); \
15 return (y)0; /* fool picky compilers */ \
17 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
18 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
19 #if defined(USE_PERLIO) && !defined(USE_SFIO)
20 /* Define an encoding "layer" in the perliol.h sense.
21 The layer defined here "inherits" in an object-oriented sense from the
22 "perlio" layer with its PerlIOBuf_* "methods".
23 The implementation is particularly efficient as until Encode settles down
24 there is no point in tryint to tune it.
26 The layer works by overloading the "fill" and "flush" methods.
28 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
29 to convert the encoded data to UTF-8 form, then copies it back to the
30 buffer. The "base class's" read methods then see the UTF-8 data.
32 "flush" transforms the UTF-8 data deposited by the "base class's write
33 method in the buffer back into the encoded form using the encode OO perl API,
34 then copies data back into the buffer and calls "SUPER::flush.
36 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
37 so that the the base class's "flush" sees the correct number of encoded chars
38 for positioning the seek pointer. (This double translation is the worst performance
39 issue - particularly with all-perl encode engine.)
44 PerlIOBuf base; /* PerlIOBuf stuff */
50 PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
52 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
53 SV *sv = &PL_sv_undef;
61 if (perl_call_method("name", G_SCALAR) == 1) {
71 PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
73 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
76 code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
82 if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) {
83 /* should never happen */
84 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
93 Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%" SVf "\"",
99 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
107 PerlIOEncode_popped(pTHX_ PerlIO * f)
109 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
111 SvREFCNT_dec(e->enc);
115 SvREFCNT_dec(e->bufsv);
122 PerlIOEncode_get_base(pTHX_ PerlIO * f)
124 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
126 e->base.bufsiz = 1024;
128 e->bufsv = newSV(e->base.bufsiz);
129 sv_setpvn(e->bufsv, "", 0);
131 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
133 e->base.ptr = e->base.buf;
135 e->base.end = e->base.buf;
136 if (e->base.ptr < e->base.buf
137 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
138 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
139 e->base.buf + SvLEN(e->bufsv));
142 if (SvLEN(e->bufsv) < e->base.bufsiz) {
143 SSize_t poff = e->base.ptr - e->base.buf;
144 SSize_t eoff = e->base.end - e->base.buf;
145 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
146 e->base.ptr = e->base.buf + poff;
147 e->base.end = e->base.buf + eoff;
149 if (e->base.ptr < e->base.buf
150 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
151 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
152 e->base.buf + SvLEN(e->bufsv));
159 PerlIOEncode_fill(pTHX_ PerlIO * f)
161 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
164 code = PerlIOBuf_fill(aTHX_ f);
169 /* Set SV that is the buffer to be buf..ptr */
170 SvCUR_set(e->bufsv, e->base.end - e->base.buf);
171 SvUTF8_off(e->bufsv);
179 if (perl_call_method("decode", G_SCALAR) != 1)
184 /* Now get translated string (forced to UTF-8) and copy back to buffer
185 don't use sv_setsv as that may "steal" PV from returned temp
186 and so free() our known-large-enough buffer.
187 sv_setpvn() should do but let us do it long hand.
189 s = SvPVutf8(uni, len);
190 if (s != SvPVX(e->bufsv)) {
191 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, len);
192 Move(s, e->base.buf, len, char);
193 SvCUR_set(e->bufsv, len);
196 e->base.end = e->base.buf + len;
197 e->base.ptr = e->base.buf;
205 PerlIOEncode_flush(pTHX_ PerlIO * f)
207 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
210 && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF | PERLIO_F_WRBUF))
211 && (e->base.ptr > e->base.buf)
218 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
219 /* This is really just a flag to see if we took all the data, if
220 we did PerlIOBase_flush avoids a seek to lower layer.
221 Need to revisit if we start getting clever with unreads or seeks-in-buffer
223 left = e->base.end - e->base.ptr;
229 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
234 if (perl_call_method("encode", G_SCALAR) != 1)
240 if (s != SvPVX(e->bufsv)) {
241 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, len);
242 Move(s, e->base.buf, len, char);
243 SvCUR_set(e->bufsv, len);
245 SvUTF8_off(e->bufsv);
246 e->base.ptr = e->base.buf + len;
247 /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */
248 e->base.end = e->base.ptr + left;
251 if (PerlIOBuf_flush(aTHX_ f) != 0)
258 PerlIOEncode_close(pTHX_ PerlIO * f)
260 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
261 IV code = PerlIOBase_close(aTHX_ f);
263 SvREFCNT_dec(e->bufsv);
269 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
274 PerlIOEncode_tell(pTHX_ PerlIO * f)
276 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
277 /* Unfortunately the only way to get a postion is to back-translate,
278 the UTF8-bytes we have buf..ptr and adjust accordingly.
279 But we will try and save any unread data in case stream
282 if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end) {
283 Size_t count = b->end - b->ptr;
284 PerlIO_push(aTHX_ f, &PerlIO_pending, "r", Nullsv);
285 /* Save what we have left to read */
286 PerlIOSelf(f, PerlIOBuf)->bufsiz = count;
287 PerlIO_unread(f, b->ptr, count);
288 /* There isn't any unread data - we just saved it - so avoid the lower seek */
290 /* Flush ourselves - now one layer down,
291 this does the back translate and adjusts position
293 PerlIO_flush(PerlIONext(f));
294 /* Set position of the saved data */
295 PerlIOSelf(f, PerlIOBuf)->posn = b->posn;
304 PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
305 CLONE_PARAMS * params, int flags)
307 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
308 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
309 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
311 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
317 PerlIO_funcs PerlIO_encode = {
319 sizeof(PerlIOEncode),
338 PerlIOBase_setlinebuf,
339 PerlIOEncode_get_base,
343 PerlIOBuf_set_ptrcnt,
345 #endif /* encode layer */
348 Encode_XSEncoding(pTHX_ encode_t * enc)
351 HV *stash = gv_stashpv("Encode::XS", TRUE);
352 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash);
356 while (enc->name[i]) {
357 const char *name = enc->name[i++];
358 XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
361 call_pv("Encode::define_encoding", G_DISCARD);
366 call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
371 encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
375 U8 *s = (U8 *) SvPV(src, slen);
376 SV *dst = sv_2mortal(newSV(2 * slen + 1));
378 U8 *d = (U8 *) SvGROW(dst, 2 * slen + 1);
379 STRLEN dlen = SvLEN(dst);
381 while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) {
382 SvCUR_set(dst, dlen);
385 if (code == ENCODE_FALLBACK)
391 STRLEN need = dlen + UTF8_MAXLEN * 128; /* 128 is too big or small? */
392 d = (U8 *) SvGROW(dst, need);
393 if (dlen >= SvLEN(dst)) {
395 "Destination couldn't be grown (the need may be miscalculated).");
403 if (dir == enc->f_utf8) {
404 if (!check && ckWARN_d(WARN_UTF8)) {
407 utf8n_to_uvuni(s + slen, (SvCUR(src) - slen),
409 Perl_warner(aTHX_ WARN_UTF8,
411 "}\" does not map to %s", ch,
413 /* FIXME: Skip over the character, copy in replacement and continue
414 * but that is messy so for now just fail.
423 /* UTF-8 is supposed to be "Universal" so should not happen */
424 Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
425 enc->name[0], (int) (SvCUR(src) - slen),
431 if (!check && ckWARN_d(WARN_UTF8)) {
432 Perl_warner(aTHX_ WARN_UTF8, "Partial %s character",
434 enc->f_utf8) ? "UTF-8" : enc->name[0]);
439 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
440 code, (dir == enc->f_utf8) ? "to" : "from",
445 SvCUR_set(dst, dlen);
448 if (slen < SvCUR(src)) {
449 Move(s + slen, s, SvCUR(src) - slen, U8);
451 SvCUR_set(src, SvCUR(src) - slen);
455 SvCUR_set(dst, slen);
461 MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
466 Method_decode(obj,src,check = FALSE)
472 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
473 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
479 Method_encode(obj,src,check = FALSE)
485 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
486 sv_utf8_upgrade(src);
487 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
491 MODULE = Encode PACKAGE = Encode
496 _bytes_to_utf8(sv, ...)
500 SV * encoding = items == 2 ? ST(1) : Nullsv;
503 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
506 U8* s = (U8*)SvPV(sv, len);
509 converted = bytes_to_utf8(s, &len); /* This allocs */
510 sv_setpvn(sv, (char *)converted, len);
511 SvUTF8_on(sv); /* XXX Should we? */
512 Safefree(converted); /* ... so free it */
520 _utf8_to_bytes(sv, ...)
524 SV * to = items > 1 ? ST(1) : Nullsv;
525 SV * check = items > 2 ? ST(2) : Nullsv;
528 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
531 U8 *s = (U8*)SvPV(sv, len);
535 /* Must do things the slow way */
537 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
540 New(83, dest, len, U8); /* I think */
549 /* Have to do it all ourselves because of error routine,
553 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
554 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
555 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
556 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
557 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
558 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
559 else { ulen = 13; uv = 0; }
561 /* Note change to utf8.c variable naming, for variety */
563 if ((*s & 0xc0) != 0x80)
567 uv = (uv << 6) | (*s++ & 0x3f);
571 call_failure(check, s, dest, src);
572 /* Now what happens? */
578 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
585 is_utf8(sv, check = FALSE)
590 if (SvGMAGICAL(sv)) /* it could be $1, for example */
591 sv = newSVsv(sv); /* GMAGIG will be done */
593 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
596 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
602 SvREFCNT_dec(sv); /* it was a temp copy */
613 SV *rsv = newSViv(SvUTF8(sv));
617 RETVAL = &PL_sv_undef;
629 SV *rsv = newSViv(SvUTF8(sv));
633 RETVAL = &PL_sv_undef;
641 #if defined(USE_PERLIO) && !defined(USE_SFIO)
642 PerlIO_define_layer(aTHX_ &PerlIO_encode);
644 #include "8859_def.h"
645 #include "EBCDIC_def.h"
646 #include "Symbols_def.h"