2 * $Id: encoding.xs,v 0.3 2002/04/21 22:14:41 dankogai Exp $
5 #define PERL_NO_GET_CONTEXT
11 #define OUR_DEFAULT_FB "Encode::PERLQQ"
13 #if defined(USE_PERLIO) && !defined(USE_SFIO)
15 /* Define an encoding "layer" in the perliol.h sense.
17 The layer defined here "inherits" in an object-oriented sense from
18 the "perlio" layer with its PerlIOBuf_* "methods". The
19 implementation is particularly efficient as until Encode settles
20 down there is no point in tryint to tune it.
22 The layer works by overloading the "fill" and "flush" methods.
24 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
25 perl API to convert the encoded data to UTF-8 form, then copies it
26 back to the buffer. The "base class's" read methods then see the
29 "flush" transforms the UTF-8 data deposited by the "base class's
30 write method in the buffer back into the encoded form using the
31 encode OO perl API, then copies data back into the buffer and calls
34 Note that "flush" is _also_ called for read mode - we still do the
35 (back)-translate so that the base class's "flush" sees the
36 correct number of encoded chars for positioning the seek
37 pointer. (This double translation is the worst performance issue -
38 particularly with all-perl encode engine.)
45 PerlIOBuf base; /* PerlIOBuf stuff */
46 SV *bufsv; /* buffer seen by layers above */
47 SV *dataSV; /* data we have read from layer below */
48 SV *enc; /* the encoding object */
49 SV *chk; /* CHECK in Encode methods */
50 int flags; /* Flags currently just needs lines */
51 int inEncodeCall; /* trap recursive encode calls */
57 PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
59 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
60 SV *sv = &PL_sv_undef;
63 /* Not 100% sure stack swap is right thing to do during dup ... */
64 PUSHSTACKi(PERLSI_MAGIC);
71 if (call_method("name", G_SCALAR) == 1) {
84 PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg, PerlIO_funcs *tab)
86 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
88 IV code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv,tab);
91 PUSHSTACKi(PERLSI_MAGIC);
100 if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
101 /* should never happen */
102 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
109 if (!SvROK(result) || !SvOBJECT(SvRV(result))) {
111 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
122 if (call_method("renew",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
123 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support renew method",
131 e->enc = newSVsv(result);
135 if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
136 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines",
143 if (SvTRUE(result)) {
144 e->flags |= NEEDS_LINES;
147 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
150 e->chk = newSVsv(get_sv("PerlIO::encoding::fallback", 0));
160 PerlIOEncode_popped(pTHX_ PerlIO * f)
162 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
164 SvREFCNT_dec(e->enc);
168 SvREFCNT_dec(e->bufsv);
172 SvREFCNT_dec(e->dataSV);
176 SvREFCNT_dec(e->chk);
183 PerlIOEncode_get_base(pTHX_ PerlIO * f)
185 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
187 e->base.bufsiz = 1024;
189 e->bufsv = newSV(e->base.bufsiz);
190 sv_setpvn(e->bufsv, "", 0);
192 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
194 e->base.ptr = e->base.buf;
196 e->base.end = e->base.buf;
197 if (e->base.ptr < e->base.buf
198 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
199 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
200 e->base.buf + SvLEN(e->bufsv));
203 if (SvLEN(e->bufsv) < e->base.bufsiz) {
204 SSize_t poff = e->base.ptr - e->base.buf;
205 SSize_t eoff = e->base.end - e->base.buf;
206 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
207 e->base.ptr = e->base.buf + poff;
208 e->base.end = e->base.buf + eoff;
210 if (e->base.ptr < e->base.buf
211 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
212 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
213 e->base.buf + SvLEN(e->bufsv));
220 PerlIOEncode_fill(pTHX_ PerlIO * f)
222 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
228 if (PerlIO_flush(f) != 0)
231 if (!PerlIO_fast_gets(n)) {
232 /* Things get too messy if we don't have a buffer layer
233 push a :perlio to do the job */
235 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
237 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
240 PUSHSTACKi(PERLSI_MAGIC);
245 avail = PerlIO_get_cnt(n);
247 avail = PerlIO_fill(n);
249 avail = PerlIO_get_cnt(n);
252 if (!PerlIO_error(n) && PerlIO_eof(n))
256 if (avail > 0 || (e->flags & NEEDS_LINES)) {
257 STDCHAR *ptr = PerlIO_get_ptr(n);
258 SSize_t use = (avail >= 0) ? avail : 0;
262 e->base.ptr = e->base.end = (STDCHAR *) NULL;
263 (void) PerlIOEncode_get_base(aTHX_ f);
265 e->dataSV = newSV(0);
266 if (SvTYPE(e->dataSV) < SVt_PV) {
267 sv_upgrade(e->dataSV,SVt_PV);
269 if (e->flags & NEEDS_LINES) {
270 /* Encoding needs whole lines (e.g. iso-2022-*)
271 search back from end of available data for
274 STDCHAR *nl = ptr+use-1;
281 if (nl >= ptr && *nl == '\n') {
282 /* found a line - take up to and including that */
285 else if (avail > 0) {
286 /* No line, but not EOF - append avail to the pending data */
287 sv_catpvn(e->dataSV, (char*)ptr, use);
288 PerlIO_set_ptrcnt(n, ptr+use, 0);
291 else if (!SvCUR(e->dataSV)) {
295 if (SvCUR(e->dataSV)) {
296 /* something left over from last time - create a normal
297 SV with new data appended
299 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
300 if (e->flags & NEEDS_LINES) {
301 /* Have to grow buffer */
302 e->base.bufsiz = use + SvCUR(e->dataSV);
303 PerlIOEncode_get_base(aTHX_ f);
306 use = e->base.bufsiz - SvCUR(e->dataSV);
309 sv_catpvn(e->dataSV,(char*)ptr,use);
312 /* Create a "dummy" SV to represent the available data from layer below */
313 if (SvLEN(e->dataSV) && SvPVX_const(e->dataSV)) {
314 Safefree(SvPVX_mutable(e->dataSV));
316 if (use > (SSize_t)e->base.bufsiz) {
317 if (e->flags & NEEDS_LINES) {
318 /* Have to grow buffer */
319 e->base.bufsiz = use;
320 PerlIOEncode_get_base(aTHX_ f);
323 use = e->base.bufsiz;
326 SvPV_set(e->dataSV, (char *) ptr);
327 SvLEN_set(e->dataSV, 0); /* Hands off sv.c - it isn't yours */
328 SvCUR_set(e->dataSV,use);
329 SvPOK_only(e->dataSV);
331 SvUTF8_off(e->dataSV);
337 if (call_method("decode", G_SCALAR) != 1) {
338 Perl_die(aTHX_ "panic: decode did not return a value");
343 /* Now get translated string (forced to UTF-8) and use as buffer */
345 s = SvPVutf8(uni, len);
346 #ifdef PARANOID_ENCODE_CHECKS
347 if (len && !is_utf8_string((U8*)s,len)) {
348 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
354 /* if decode gave us back dataSV then data may vanish when
355 we do ptrcnt adjust - so take our copy now.
356 (The copy is a pain - need a put-it-here option for decode.)
358 sv_setpvn(e->bufsv,s,len);
359 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
360 e->base.end = e->base.ptr + SvCUR(e->bufsv);
361 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
364 /* Adjust ptr/cnt not taking anything which
365 did not translate - not clear this is a win */
366 /* compute amount we took */
367 use -= SvCUR(e->dataSV);
368 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
369 /* and as we did not take it it isn't pending */
370 SvCUR_set(e->dataSV,0);
372 /* Got nothing - assume partial character so we need some more */
373 /* Make sure e->dataSV is a normal SV before re-filling as
374 buffer alias will change under us
376 s = SvPV(e->dataSV,len);
377 sv_setpvn(e->dataSV,s,len);
378 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
386 PerlIOBase(f)->flags |= PERLIO_F_EOF;
388 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
397 PerlIOEncode_flush(pTHX_ PerlIO * f)
399 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
408 if ((PerlIOBase(f)->flags & PERLIO_F_WRBUF) && (e->base.ptr > e->base.buf)) {
409 if (e->inEncodeCall) return 0;
410 /* Write case - encode the buffer and write() to layer below */
411 PUSHSTACKi(PERLSI_MAGIC);
417 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
423 if (call_method("encode", G_SCALAR) != 1) {
425 Perl_die(aTHX_ "panic: encode did not return a value");
432 count = PerlIO_write(PerlIONext(f),s,len);
433 if ((STRLEN)count != len) {
439 if (PerlIO_flush(PerlIONext(f)) != 0) {
442 if (SvCUR(e->bufsv)) {
443 /* Did not all translate */
444 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
448 else if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF)) {
450 /* if we have any untranslated stuff then unread that first */
451 /* FIXME - unread is fragile is there a better way ? */
452 if (e->dataSV && SvCUR(e->dataSV)) {
453 s = SvPV(e->dataSV, len);
454 count = PerlIO_unread(PerlIONext(f),s,len);
455 if ((STRLEN)count != len) {
458 SvCUR_set(e->dataSV,0);
460 /* See if there is anything left in the buffer */
461 if (e->base.ptr < e->base.end) {
462 if (e->inEncodeCall) return 0;
463 /* Bother - have unread data.
464 re-encode and unread() to layer below
466 PUSHSTACKi(PERLSI_MAGIC);
470 str = sv_newmortal();
471 sv_upgrade(str, SVt_PV);
472 SvPV_set(str, (char*)e->base.ptr);
474 SvCUR_set(str, e->base.end - e->base.ptr);
483 if (call_method("encode", G_SCALAR) != 1) {
485 Perl_die(aTHX_ "panic: encode did not return a value");
492 count = PerlIO_unread(PerlIONext(f),s,len);
493 if ((STRLEN)count != len) {
501 e->base.ptr = e->base.end = e->base.buf;
502 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
508 PerlIOEncode_close(pTHX_ PerlIO * f)
510 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
512 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
513 /* Discard partial character */
515 SvCUR_set(e->dataSV,0);
517 /* Don't back decode and unread any pending data */
518 e->base.ptr = e->base.end = e->base.buf;
520 code = PerlIOBase_close(aTHX_ f);
522 /* This should only fire for write case */
523 if (e->base.buf && e->base.ptr > e->base.buf) {
524 Perl_croak(aTHX_ "Close with partial character");
526 SvREFCNT_dec(e->bufsv);
532 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
537 PerlIOEncode_tell(pTHX_ PerlIO * f)
539 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
540 /* Unfortunately the only way to get a postion is to (re-)translate,
541 the UTF8 we have in bufefr and then ask layer below
544 if (b->buf && b->ptr > b->buf) {
545 Perl_croak(aTHX_ "Cannot tell at partial character");
547 return PerlIO_tell(PerlIONext(f));
551 PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
552 CLONE_PARAMS * params, int flags)
554 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
555 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
556 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
558 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
565 PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
567 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
568 if (e->flags & NEEDS_LINES) {
570 const char *ptr = (const char *) vbuf;
571 const char *end = ptr+count;
573 const char *nl = ptr;
574 while (nl < end && *nl++ != '\n') /* empty body */;
575 done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr);
576 if (done != nl-ptr) {
583 if (ptr[-1] == '\n') {
584 if (PerlIOEncode_flush(aTHX_ f) != 0) {
589 return (SSize_t) (ptr - (const char *) vbuf);
592 return PerlIOBuf_write(aTHX_ f, vbuf, count);
596 PerlIO_funcs PerlIO_encode = {
597 sizeof(PerlIO_funcs),
599 sizeof(PerlIOEncode),
600 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
604 NULL, /* binmode - always pop */
619 PerlIOBase_setlinebuf,
620 PerlIOEncode_get_base,
624 PerlIOBuf_set_ptrcnt,
626 #endif /* encode layer */
628 MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
634 SV *chk = get_sv("PerlIO::encoding::fallback", GV_ADD|GV_ADDMULTI);
636 * we now "use Encode ()" here instead of
637 * PerlIO/encoding.pm. This avoids SEGV when ":encoding()"
638 * is invoked without prior "use Encode". -- dankogai
640 PUSHSTACKi(PERLSI_MAGIC);
642 if (!get_cvs(OUR_DEFAULT_FB, 0)) {
644 /* This would just be an irritant now loading works */
645 Perl_warner(aTHX_ packWARN(WARN_IO), ":encoding without 'use Encode'");
648 /* Encode needs a lot of stack - it is likely to move ... */
650 /* The SV is magically freed by load_module */
651 load_module(PERL_LOADMOD_NOIMPORT, newSVpvn("Encode", 6), Nullsv, Nullsv);
657 if (call_pv(OUR_DEFAULT_FB, G_SCALAR) != 1) {
658 /* should never happen */
659 Perl_die(aTHX_ "%s did not return a value",OUR_DEFAULT_FB);
665 PerlIO_define_layer(aTHX_ &PerlIO_encode);