2 * $Id: encoding.xs,v 0.3 2002/04/21 22:14:41 dankogai Exp $
5 #define PERL_NO_GET_CONTEXT
11 #if defined(USE_PERLIO) && !defined(USE_SFIO)
13 /* Define an encoding "layer" in the perliol.h sense.
15 The layer defined here "inherits" in an object-oriented sense from
16 the "perlio" layer with its PerlIOBuf_* "methods". The
17 implementation is particularly efficient as until Encode settles
18 down there is no point in tryint to tune it.
20 The layer works by overloading the "fill" and "flush" methods.
22 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
23 perl API to convert the encoded data to UTF-8 form, then copies it
24 back to the buffer. The "base class's" read methods then see the
27 "flush" transforms the UTF-8 data deposited by the "base class's
28 write method in the buffer back into the encoded form using the
29 encode OO perl API, then copies data back into the buffer and calls
32 Note that "flush" is _also_ called for read mode - we still do the
33 (back)-translate so that the the base class's "flush" sees the
34 correct number of encoded chars for positioning the seek
35 pointer. (This double translation is the worst performance issue -
36 particularly with all-perl encode engine.)
43 PerlIOBuf base; /* PerlIOBuf stuff */
44 SV *bufsv; /* buffer seen by layers above */
45 SV *dataSV; /* data we have read from layer below */
46 SV *enc; /* the encoding object */
47 SV *chk; /* CHECK in Encode methods */
48 int flags; /* Flags currently just needs lines */
52 #define OUR_DEFAULT_FB "Encode::FB_QUIET"
55 PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
57 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
58 SV *sv = &PL_sv_undef;
61 /* Not 100% sure stack swap is right thing to do during dup ... */
62 PUSHSTACKi(PERLSI_MAGIC);
69 if (call_method("name", G_SCALAR) == 1) {
82 PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
84 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
86 IV code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
89 PUSHSTACKi(PERLSI_MAGIC);
98 if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
99 /* should never happen */
100 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
107 if (!SvROK(result) || !SvOBJECT(SvRV(result))) {
109 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
115 #ifdef USE_NEW_SEQUENCE
119 if (call_method("new_sequence",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
120 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support new_sequence",
129 e->enc = newSVsv(result);
133 if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
134 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines",
141 if (SvTRUE(result)) {
142 e->flags |= NEEDS_LINES;
145 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
148 e->chk = newSVsv(get_sv("PerlIO::encoding::check", 0));
157 PerlIOEncode_popped(pTHX_ PerlIO * f)
159 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
161 SvREFCNT_dec(e->enc);
165 SvREFCNT_dec(e->bufsv);
169 SvREFCNT_dec(e->dataSV);
173 SvREFCNT_dec(e->chk);
180 PerlIOEncode_get_base(pTHX_ PerlIO * f)
182 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
184 e->base.bufsiz = 1024;
186 e->bufsv = newSV(e->base.bufsiz);
187 sv_setpvn(e->bufsv, "", 0);
189 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
191 e->base.ptr = e->base.buf;
193 e->base.end = e->base.buf;
194 if (e->base.ptr < e->base.buf
195 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
196 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
197 e->base.buf + SvLEN(e->bufsv));
200 if (SvLEN(e->bufsv) < e->base.bufsiz) {
201 SSize_t poff = e->base.ptr - e->base.buf;
202 SSize_t eoff = e->base.end - e->base.buf;
203 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
204 e->base.ptr = e->base.buf + poff;
205 e->base.end = e->base.buf + eoff;
207 if (e->base.ptr < e->base.buf
208 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
209 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
210 e->base.buf + SvLEN(e->bufsv));
217 PerlIOEncode_fill(pTHX_ PerlIO * f)
219 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
225 if (PerlIO_flush(f) != 0)
228 if (!PerlIO_fast_gets(n)) {
229 /* Things get too messy if we don't have a buffer layer
230 push a :perlio to do the job */
232 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
234 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
237 PUSHSTACKi(PERLSI_MAGIC);
242 avail = PerlIO_get_cnt(n);
244 avail = PerlIO_fill(n);
246 avail = PerlIO_get_cnt(n);
249 if (!PerlIO_error(n) && PerlIO_eof(n))
253 if (avail > 0 || (e->flags & NEEDS_LINES)) {
254 STDCHAR *ptr = PerlIO_get_ptr(n);
255 SSize_t use = (avail >= 0) ? avail : 0;
259 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
260 (void) PerlIOEncode_get_base(aTHX_ f);
262 e->dataSV = newSV(0);
263 if (SvTYPE(e->dataSV) < SVt_PV) {
264 sv_upgrade(e->dataSV,SVt_PV);
266 if (e->flags & NEEDS_LINES) {
267 /* Encoding needs whole lines (e.g. iso-2022-*)
268 search back from end of available data for
271 STDCHAR *nl = ptr+use-1;
278 if (nl >= ptr && *nl == '\n') {
279 /* found a line - take up to and including that */
282 else if (avail > 0) {
283 /* No line, but not EOF - append avail to the pending data */
284 sv_catpvn(e->dataSV, (char*)ptr, use);
285 PerlIO_set_ptrcnt(n, ptr+use, 0);
288 else if (!SvCUR(e->dataSV)) {
292 if (SvCUR(e->dataSV)) {
293 /* something left over from last time - create a normal
294 SV with new data appended
296 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
297 if (e->flags & NEEDS_LINES) {
298 /* Have to grow buffer */
299 e->base.bufsiz = use + SvCUR(e->dataSV);
300 PerlIOEncode_get_base(aTHX_ f);
303 use = e->base.bufsiz - SvCUR(e->dataSV);
306 sv_catpvn(e->dataSV,(char*)ptr,use);
309 /* Create a "dummy" SV to represent the available data from layer below */
310 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
311 Safefree(SvPVX(e->dataSV));
313 if (use > (SSize_t)e->base.bufsiz) {
314 if (e->flags & NEEDS_LINES) {
315 /* Have to grow buffer */
316 e->base.bufsiz = use;
317 PerlIOEncode_get_base(aTHX_ f);
320 use = e->base.bufsiz;
323 SvPVX(e->dataSV) = (char *) ptr;
324 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
325 SvCUR_set(e->dataSV,use);
326 SvPOK_only(e->dataSV);
328 SvUTF8_off(e->dataSV);
334 if (call_method("decode", G_SCALAR) != 1) {
335 Perl_die(aTHX_ "panic: decode did not return a value");
340 /* Now get translated string (forced to UTF-8) and use as buffer */
342 s = SvPVutf8(uni, len);
343 #ifdef PARANOID_ENCODE_CHECKS
344 if (len && !is_utf8_string((U8*)s,len)) {
345 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
351 /* if decode gave us back dataSV then data may vanish when
352 we do ptrcnt adjust - so take our copy now.
353 (The copy is a pain - need a put-it-here option for decode.)
355 sv_setpvn(e->bufsv,s,len);
356 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
357 e->base.end = e->base.ptr + SvCUR(e->bufsv);
358 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
361 /* Adjust ptr/cnt not taking anything which
362 did not translate - not clear this is a win */
363 /* compute amount we took */
364 use -= SvCUR(e->dataSV);
365 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
366 /* and as we did not take it it isn't pending */
367 SvCUR_set(e->dataSV,0);
369 /* Got nothing - assume partial character so we need some more */
370 /* Make sure e->dataSV is a normal SV before re-filling as
371 buffer alias will change under us
373 s = SvPV(e->dataSV,len);
374 sv_setpvn(e->dataSV,s,len);
375 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
383 PerlIOBase(f)->flags |= PERLIO_F_EOF;
385 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
394 PerlIOEncode_flush(pTHX_ PerlIO * f)
396 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
399 if (e->bufsv && (e->base.ptr > e->base.buf)) {
405 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
406 /* Write case encode the buffer and write() to layer below */
407 PUSHSTACKi(PERLSI_MAGIC);
413 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
418 if (call_method("encode", G_SCALAR) != 1) {
419 Perl_die(aTHX_ "panic: encode did not return a value");
425 count = PerlIO_write(PerlIONext(f),s,len);
426 if ((STRLEN)count != len) {
432 if (PerlIO_flush(PerlIONext(f)) != 0) {
435 if (SvCUR(e->bufsv)) {
436 /* Did not all translate */
437 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
441 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
443 /* if we have any untranslated stuff then unread that first */
444 if (e->dataSV && SvCUR(e->dataSV)) {
445 s = SvPV(e->dataSV, len);
446 count = PerlIO_unread(PerlIONext(f),s,len);
447 if ((STRLEN)count != len) {
451 /* See if there is anything left in the buffer */
452 if (e->base.ptr < e->base.end) {
453 /* Bother - have unread data.
454 re-encode and unread() to layer below
456 PUSHSTACKi(PERLSI_MAGIC);
460 str = sv_newmortal();
461 sv_upgrade(str, SVt_PV);
462 SvPVX(str) = (char*)e->base.ptr;
464 SvCUR_set(str, e->base.end - e->base.ptr);
472 if (call_method("encode", G_SCALAR) != 1) {
473 Perl_die(aTHX_ "panic: encode did not return a value");
479 count = PerlIO_unread(PerlIONext(f),s,len);
480 if ((STRLEN)count != len) {
488 e->base.ptr = e->base.end = e->base.buf;
489 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
495 PerlIOEncode_close(pTHX_ PerlIO * f)
497 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
498 IV code = PerlIOBase_close(aTHX_ f);
501 if (e->base.buf && e->base.ptr > e->base.buf) {
502 Perl_croak(aTHX_ "Close with partial character");
504 SvREFCNT_dec(e->bufsv);
510 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
515 PerlIOEncode_tell(pTHX_ PerlIO * f)
517 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
518 /* Unfortunately the only way to get a postion is to (re-)translate,
519 the UTF8 we have in bufefr and then ask layer below
522 if (b->buf && b->ptr > b->buf) {
523 Perl_croak(aTHX_ "Cannot tell at partial character");
525 return PerlIO_tell(PerlIONext(f));
529 PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
530 CLONE_PARAMS * params, int flags)
532 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
533 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
534 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
536 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
543 PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
545 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
546 if (e->flags & NEEDS_LINES) {
548 const char *ptr = (const char *) vbuf;
549 const char *end = ptr+count;
551 const char *nl = ptr;
552 while (nl < end && *nl++ != '\n') /* empty body */;
553 done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr);
554 if (done != nl-ptr) {
561 if (ptr[-1] == '\n') {
562 if (PerlIOEncode_flush(aTHX_ f) != 0) {
567 return (SSize_t) (ptr - (const char *) vbuf);
570 return PerlIOBuf_write(aTHX_ f, vbuf, count);
574 PerlIO_funcs PerlIO_encode = {
576 sizeof(PerlIOEncode),
577 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
595 PerlIOBase_setlinebuf,
596 PerlIOEncode_get_base,
600 PerlIOBuf_set_ptrcnt,
602 #endif /* encode layer */
604 MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
610 SV *chk = get_sv("PerlIO::encoding::check", GV_ADD|GV_ADDMULTI);
612 * we now "use Encode ()" here instead of
613 * PerlIO/encoding.pm. This avoids SEGV when ":encoding()"
614 * is invoked without prior "use Encode". -- dankogai
616 PUSHSTACKi(PERLSI_MAGIC);
618 if (!gv_stashpvn("Encode", 6, FALSE)) {
620 /* This would just be an irritant now loading works */
621 Perl_warner(aTHX_ packWARN(WARN_IO), ":encoding without 'use Encode'");
624 /* Encode needs a lot of stack - it is likely to move ... */
626 /* The SV is magically freed by load_module */
627 load_module(PERL_LOADMOD_NOIMPORT, newSVpvn("Encode", 6), Nullsv, Nullsv);
634 if (call_pv(OUR_DEFAULT_FB, G_SCALAR) != 1) {
635 /* should never happen */
636 Perl_die(aTHX_ "%s did not return a value",OUR_DEFAULT_FB);
641 PerlIO_define_layer(aTHX_ &PerlIO_encode);