2 * $Id: encoding.xs,v 0.3 2002/04/21 22:14:41 dankogai Exp $
5 #define PERL_NO_GET_CONTEXT
11 #define OUR_DEFAULT_FB "Encode::PERLQQ"
13 #if defined(USE_PERLIO) && !defined(USE_SFIO)
15 /* Define an encoding "layer" in the perliol.h sense.
17 The layer defined here "inherits" in an object-oriented sense from
18 the "perlio" layer with its PerlIOBuf_* "methods". The
19 implementation is particularly efficient as until Encode settles
20 down there is no point in tryint to tune it.
22 The layer works by overloading the "fill" and "flush" methods.
24 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
25 perl API to convert the encoded data to UTF-8 form, then copies it
26 back to the buffer. The "base class's" read methods then see the
29 "flush" transforms the UTF-8 data deposited by the "base class's
30 write method in the buffer back into the encoded form using the
31 encode OO perl API, then copies data back into the buffer and calls
34 Note that "flush" is _also_ called for read mode - we still do the
35 (back)-translate so that the base class's "flush" sees the
36 correct number of encoded chars for positioning the seek
37 pointer. (This double translation is the worst performance issue -
38 particularly with all-perl encode engine.)
45 PerlIOBuf base; /* PerlIOBuf stuff */
46 SV *bufsv; /* buffer seen by layers above */
47 SV *dataSV; /* data we have read from layer below */
48 SV *enc; /* the encoding object */
49 SV *chk; /* CHECK in Encode methods */
50 int flags; /* Flags currently just needs lines */
56 PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
58 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
59 SV *sv = &PL_sv_undef;
62 /* Not 100% sure stack swap is right thing to do during dup ... */
63 PUSHSTACKi(PERLSI_MAGIC);
70 if (call_method("name", G_SCALAR) == 1) {
83 PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg, PerlIO_funcs *tab)
85 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
87 IV code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv,tab);
90 PUSHSTACKi(PERLSI_MAGIC);
99 if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
100 /* should never happen */
101 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
108 if (!SvROK(result) || !SvOBJECT(SvRV(result))) {
110 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
121 if (call_method("renew",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
122 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support renew method",
130 e->enc = newSVsv(result);
134 if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
135 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines",
142 if (SvTRUE(result)) {
143 e->flags |= NEEDS_LINES;
146 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
149 e->chk = newSVsv(get_sv("PerlIO::encoding::fallback", 0));
158 PerlIOEncode_popped(pTHX_ PerlIO * f)
160 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
162 SvREFCNT_dec(e->enc);
166 SvREFCNT_dec(e->bufsv);
170 SvREFCNT_dec(e->dataSV);
174 SvREFCNT_dec(e->chk);
181 PerlIOEncode_get_base(pTHX_ PerlIO * f)
183 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
185 e->base.bufsiz = 1024;
187 e->bufsv = newSV(e->base.bufsiz);
188 sv_setpvn(e->bufsv, "", 0);
190 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
192 e->base.ptr = e->base.buf;
194 e->base.end = e->base.buf;
195 if (e->base.ptr < e->base.buf
196 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
197 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
198 e->base.buf + SvLEN(e->bufsv));
201 if (SvLEN(e->bufsv) < e->base.bufsiz) {
202 SSize_t poff = e->base.ptr - e->base.buf;
203 SSize_t eoff = e->base.end - e->base.buf;
204 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
205 e->base.ptr = e->base.buf + poff;
206 e->base.end = e->base.buf + eoff;
208 if (e->base.ptr < e->base.buf
209 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
210 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
211 e->base.buf + SvLEN(e->bufsv));
218 PerlIOEncode_fill(pTHX_ PerlIO * f)
220 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
226 if (PerlIO_flush(f) != 0)
229 if (!PerlIO_fast_gets(n)) {
230 /* Things get too messy if we don't have a buffer layer
231 push a :perlio to do the job */
233 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
235 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
238 PUSHSTACKi(PERLSI_MAGIC);
243 avail = PerlIO_get_cnt(n);
245 avail = PerlIO_fill(n);
247 avail = PerlIO_get_cnt(n);
250 if (!PerlIO_error(n) && PerlIO_eof(n))
254 if (avail > 0 || (e->flags & NEEDS_LINES)) {
255 STDCHAR *ptr = PerlIO_get_ptr(n);
256 SSize_t use = (avail >= 0) ? avail : 0;
260 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
261 (void) PerlIOEncode_get_base(aTHX_ f);
263 e->dataSV = newSV(0);
264 if (SvTYPE(e->dataSV) < SVt_PV) {
265 sv_upgrade(e->dataSV,SVt_PV);
267 if (e->flags & NEEDS_LINES) {
268 /* Encoding needs whole lines (e.g. iso-2022-*)
269 search back from end of available data for
272 STDCHAR *nl = ptr+use-1;
279 if (nl >= ptr && *nl == '\n') {
280 /* found a line - take up to and including that */
283 else if (avail > 0) {
284 /* No line, but not EOF - append avail to the pending data */
285 sv_catpvn(e->dataSV, (char*)ptr, use);
286 PerlIO_set_ptrcnt(n, ptr+use, 0);
289 else if (!SvCUR(e->dataSV)) {
293 if (SvCUR(e->dataSV)) {
294 /* something left over from last time - create a normal
295 SV with new data appended
297 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
298 if (e->flags & NEEDS_LINES) {
299 /* Have to grow buffer */
300 e->base.bufsiz = use + SvCUR(e->dataSV);
301 PerlIOEncode_get_base(aTHX_ f);
304 use = e->base.bufsiz - SvCUR(e->dataSV);
307 sv_catpvn(e->dataSV,(char*)ptr,use);
310 /* Create a "dummy" SV to represent the available data from layer below */
311 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
312 Safefree(SvPVX(e->dataSV));
314 if (use > (SSize_t)e->base.bufsiz) {
315 if (e->flags & NEEDS_LINES) {
316 /* Have to grow buffer */
317 e->base.bufsiz = use;
318 PerlIOEncode_get_base(aTHX_ f);
321 use = e->base.bufsiz;
324 SvPVX(e->dataSV) = (char *) ptr;
325 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
326 SvCUR_set(e->dataSV,use);
327 SvPOK_only(e->dataSV);
329 SvUTF8_off(e->dataSV);
335 if (call_method("decode", G_SCALAR) != 1) {
336 Perl_die(aTHX_ "panic: decode did not return a value");
341 /* Now get translated string (forced to UTF-8) and use as buffer */
343 s = SvPVutf8(uni, len);
344 #ifdef PARANOID_ENCODE_CHECKS
345 if (len && !is_utf8_string((U8*)s,len)) {
346 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
352 /* if decode gave us back dataSV then data may vanish when
353 we do ptrcnt adjust - so take our copy now.
354 (The copy is a pain - need a put-it-here option for decode.)
356 sv_setpvn(e->bufsv,s,len);
357 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
358 e->base.end = e->base.ptr + SvCUR(e->bufsv);
359 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
362 /* Adjust ptr/cnt not taking anything which
363 did not translate - not clear this is a win */
364 /* compute amount we took */
365 use -= SvCUR(e->dataSV);
366 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
367 /* and as we did not take it it isn't pending */
368 SvCUR_set(e->dataSV,0);
370 /* Got nothing - assume partial character so we need some more */
371 /* Make sure e->dataSV is a normal SV before re-filling as
372 buffer alias will change under us
374 s = SvPV(e->dataSV,len);
375 sv_setpvn(e->dataSV,s,len);
376 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
384 PerlIOBase(f)->flags |= PERLIO_F_EOF;
386 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
395 PerlIOEncode_flush(pTHX_ PerlIO * f)
397 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
406 if ((PerlIOBase(f)->flags & PERLIO_F_WRBUF) && (e->base.ptr > e->base.buf)) {
407 /* Write case - encode the buffer and write() to layer below */
408 PUSHSTACKi(PERLSI_MAGIC);
414 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
419 if (call_method("encode", G_SCALAR) != 1) {
420 Perl_die(aTHX_ "panic: encode did not return a value");
426 count = PerlIO_write(PerlIONext(f),s,len);
427 if ((STRLEN)count != len) {
433 if (PerlIO_flush(PerlIONext(f)) != 0) {
436 if (SvCUR(e->bufsv)) {
437 /* Did not all translate */
438 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
442 else if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF)) {
444 /* if we have any untranslated stuff then unread that first */
445 /* FIXME - unread is fragile is there a better way ? */
446 if (e->dataSV && SvCUR(e->dataSV)) {
447 s = SvPV(e->dataSV, len);
448 count = PerlIO_unread(PerlIONext(f),s,len);
449 if ((STRLEN)count != len) {
452 SvCUR_set(e->dataSV,0);
454 /* See if there is anything left in the buffer */
455 if (e->base.ptr < e->base.end) {
456 /* Bother - have unread data.
457 re-encode and unread() to layer below
459 PUSHSTACKi(PERLSI_MAGIC);
463 str = sv_newmortal();
464 sv_upgrade(str, SVt_PV);
465 SvPVX(str) = (char*)e->base.ptr;
467 SvCUR_set(str, e->base.end - e->base.ptr);
475 if (call_method("encode", G_SCALAR) != 1) {
476 Perl_die(aTHX_ "panic: encode did not return a value");
482 count = PerlIO_unread(PerlIONext(f),s,len);
483 if ((STRLEN)count != len) {
491 e->base.ptr = e->base.end = e->base.buf;
492 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
498 PerlIOEncode_close(pTHX_ PerlIO * f)
500 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
502 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
503 /* Discard partial character */
505 SvCUR_set(e->dataSV,0);
507 /* Don't back decode and unread any pending data */
508 e->base.ptr = e->base.end = e->base.buf;
510 code = PerlIOBase_close(aTHX_ f);
512 /* This should only fire for write case */
513 if (e->base.buf && e->base.ptr > e->base.buf) {
514 Perl_croak(aTHX_ "Close with partial character");
516 SvREFCNT_dec(e->bufsv);
522 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
527 PerlIOEncode_tell(pTHX_ PerlIO * f)
529 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
530 /* Unfortunately the only way to get a postion is to (re-)translate,
531 the UTF8 we have in bufefr and then ask layer below
534 if (b->buf && b->ptr > b->buf) {
535 Perl_croak(aTHX_ "Cannot tell at partial character");
537 return PerlIO_tell(PerlIONext(f));
541 PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
542 CLONE_PARAMS * params, int flags)
544 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
545 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
546 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
548 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
555 PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
557 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
558 if (e->flags & NEEDS_LINES) {
560 const char *ptr = (const char *) vbuf;
561 const char *end = ptr+count;
563 const char *nl = ptr;
564 while (nl < end && *nl++ != '\n') /* empty body */;
565 done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr);
566 if (done != nl-ptr) {
573 if (ptr[-1] == '\n') {
574 if (PerlIOEncode_flush(aTHX_ f) != 0) {
579 return (SSize_t) (ptr - (const char *) vbuf);
582 return PerlIOBuf_write(aTHX_ f, vbuf, count);
586 PerlIO_funcs PerlIO_encode = {
587 sizeof(PerlIO_funcs),
589 sizeof(PerlIOEncode),
590 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
594 NULL, /* binmode - always pop */
609 PerlIOBase_setlinebuf,
610 PerlIOEncode_get_base,
614 PerlIOBuf_set_ptrcnt,
616 #endif /* encode layer */
618 MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
624 SV *chk = get_sv("PerlIO::encoding::fallback", GV_ADD|GV_ADDMULTI);
626 * we now "use Encode ()" here instead of
627 * PerlIO/encoding.pm. This avoids SEGV when ":encoding()"
628 * is invoked without prior "use Encode". -- dankogai
630 PUSHSTACKi(PERLSI_MAGIC);
632 if (!get_cv(OUR_DEFAULT_FB, 0)) {
634 /* This would just be an irritant now loading works */
635 Perl_warner(aTHX_ packWARN(WARN_IO), ":encoding without 'use Encode'");
638 /* Encode needs a lot of stack - it is likely to move ... */
640 /* The SV is magically freed by load_module */
641 load_module(PERL_LOADMOD_NOIMPORT, newSVpvn("Encode", 6), Nullsv, Nullsv);
647 if (call_pv(OUR_DEFAULT_FB, G_SCALAR) != 1) {
648 /* should never happen */
649 Perl_die(aTHX_ "%s did not return a value",OUR_DEFAULT_FB);
655 PerlIO_define_layer(aTHX_ &PerlIO_encode);