2 * $Id: encoding.xs,v 0.3 2002/04/21 22:14:41 dankogai Exp $
5 #define PERL_NO_GET_CONTEXT
11 #if defined(USE_PERLIO) && !defined(USE_SFIO)
13 /* Define an encoding "layer" in the perliol.h sense.
15 The layer defined here "inherits" in an object-oriented sense from
16 the "perlio" layer with its PerlIOBuf_* "methods". The
17 implementation is particularly efficient as until Encode settles
18 down there is no point in tryint to tune it.
20 The layer works by overloading the "fill" and "flush" methods.
22 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
23 perl API to convert the encoded data to UTF-8 form, then copies it
24 back to the buffer. The "base class's" read methods then see the
27 "flush" transforms the UTF-8 data deposited by the "base class's
28 write method in the buffer back into the encoded form using the
29 encode OO perl API, then copies data back into the buffer and calls
32 Note that "flush" is _also_ called for read mode - we still do the
33 (back)-translate so that the the base class's "flush" sees the
34 correct number of encoded chars for positioning the seek
35 pointer. (This double translation is the worst performance issue -
36 particularly with all-perl encode engine.)
43 PerlIOBuf base; /* PerlIOBuf stuff */
44 SV *bufsv; /* buffer seen by layers above */
45 SV *dataSV; /* data we have read from layer below */
46 SV *enc; /* the encoding object */
47 SV *chk; /* CHECK in Encode methods */
48 int flags; /* Flags currently just needs lines */
52 #define OUR_DEFAULT_FB "Encode::FB_QUIET"
55 PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
57 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
58 SV *sv = &PL_sv_undef;
66 if (call_method("name", G_SCALAR) == 1) {
76 PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
78 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
80 IV code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
89 if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
90 /* should never happen */
91 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
98 if (!SvROK(result) || !SvOBJECT(SvRV(result))) {
100 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
106 #ifdef USE_NEW_SEQUENCE
110 if (call_method("new_sequence",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
111 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support new_sequence",
120 e->enc = newSVsv(result);
124 if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
125 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines",
132 if (SvTRUE(result)) {
133 e->flags |= NEEDS_LINES;
136 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
139 e->chk = newSVsv(get_sv("PerlIO::encoding::check", 0));
147 PerlIOEncode_popped(pTHX_ PerlIO * f)
149 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
151 SvREFCNT_dec(e->enc);
155 SvREFCNT_dec(e->bufsv);
159 SvREFCNT_dec(e->dataSV);
163 SvREFCNT_dec(e->chk);
170 PerlIOEncode_get_base(pTHX_ PerlIO * f)
172 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
174 e->base.bufsiz = 1024;
176 e->bufsv = newSV(e->base.bufsiz);
177 sv_setpvn(e->bufsv, "", 0);
179 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
181 e->base.ptr = e->base.buf;
183 e->base.end = e->base.buf;
184 if (e->base.ptr < e->base.buf
185 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
186 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
187 e->base.buf + SvLEN(e->bufsv));
190 if (SvLEN(e->bufsv) < e->base.bufsiz) {
191 SSize_t poff = e->base.ptr - e->base.buf;
192 SSize_t eoff = e->base.end - e->base.buf;
193 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
194 e->base.ptr = e->base.buf + poff;
195 e->base.end = e->base.buf + eoff;
197 if (e->base.ptr < e->base.buf
198 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
199 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
200 e->base.buf + SvLEN(e->bufsv));
207 PerlIOEncode_fill(pTHX_ PerlIO * f)
209 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
215 if (PerlIO_flush(f) != 0)
218 if (!PerlIO_fast_gets(n)) {
219 /* Things get too messy if we don't have a buffer layer
220 push a :perlio to do the job */
222 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
224 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
230 avail = PerlIO_get_cnt(n);
232 avail = PerlIO_fill(n);
234 avail = PerlIO_get_cnt(n);
237 if (!PerlIO_error(n) && PerlIO_eof(n))
241 if (avail > 0 || (e->flags & NEEDS_LINES)) {
242 STDCHAR *ptr = PerlIO_get_ptr(n);
243 SSize_t use = (avail >= 0) ? avail : 0;
247 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
248 (void) PerlIOEncode_get_base(aTHX_ f);
250 e->dataSV = newSV(0);
251 if (SvTYPE(e->dataSV) < SVt_PV) {
252 sv_upgrade(e->dataSV,SVt_PV);
254 if (e->flags & NEEDS_LINES) {
255 /* Encoding needs whole lines (e.g. iso-2022-*)
256 search back from end of available data for
259 STDCHAR *nl = ptr+use-1;
266 if (nl >= ptr && *nl == '\n') {
267 /* found a line - take up to and including that */
270 else if (avail > 0) {
271 /* No line, but not EOF - append avail to the pending data */
272 sv_catpvn(e->dataSV, (char*)ptr, use);
273 PerlIO_set_ptrcnt(n, ptr+use, 0);
276 else if (!SvCUR(e->dataSV)) {
280 if (SvCUR(e->dataSV)) {
281 /* something left over from last time - create a normal
282 SV with new data appended
284 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
285 if (e->flags & NEEDS_LINES) {
286 /* Have to grow buffer */
287 e->base.bufsiz = use + SvCUR(e->dataSV);
288 PerlIOEncode_get_base(aTHX_ f);
291 use = e->base.bufsiz - SvCUR(e->dataSV);
294 sv_catpvn(e->dataSV,(char*)ptr,use);
297 /* Create a "dummy" SV to represent the available data from layer below */
298 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
299 Safefree(SvPVX(e->dataSV));
301 if (use > (SSize_t)e->base.bufsiz) {
302 if (e->flags & NEEDS_LINES) {
303 /* Have to grow buffer */
304 e->base.bufsiz = use;
305 PerlIOEncode_get_base(aTHX_ f);
308 use = e->base.bufsiz;
311 SvPVX(e->dataSV) = (char *) ptr;
312 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
313 SvCUR_set(e->dataSV,use);
314 SvPOK_only(e->dataSV);
316 SvUTF8_off(e->dataSV);
322 if (call_method("decode", G_SCALAR) != 1) {
323 Perl_die(aTHX_ "panic: decode did not return a value");
328 /* Now get translated string (forced to UTF-8) and use as buffer */
330 s = SvPVutf8(uni, len);
331 #ifdef PARANOID_ENCODE_CHECKS
332 if (len && !is_utf8_string((U8*)s,len)) {
333 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
339 /* if decode gave us back dataSV then data may vanish when
340 we do ptrcnt adjust - so take our copy now.
341 (The copy is a pain - need a put-it-here option for decode.)
343 sv_setpvn(e->bufsv,s,len);
344 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
345 e->base.end = e->base.ptr + SvCUR(e->bufsv);
346 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
349 /* Adjust ptr/cnt not taking anything which
350 did not translate - not clear this is a win */
351 /* compute amount we took */
352 use -= SvCUR(e->dataSV);
353 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
354 /* and as we did not take it it isn't pending */
355 SvCUR_set(e->dataSV,0);
357 /* Got nothing - assume partial character so we need some more */
358 /* Make sure e->dataSV is a normal SV before re-filling as
359 buffer alias will change under us
361 s = SvPV(e->dataSV,len);
362 sv_setpvn(e->dataSV,s,len);
363 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
373 PerlIOBase(f)->flags |= PERLIO_F_EOF;
375 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
381 PerlIOEncode_flush(pTHX_ PerlIO * f)
383 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
386 if (e->bufsv && (e->base.ptr > e->base.buf)) {
392 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
393 /* Write case encode the buffer and write() to layer below */
398 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
403 if (call_method("encode", G_SCALAR) != 1) {
404 Perl_die(aTHX_ "panic: encode did not return a value");
410 count = PerlIO_write(PerlIONext(f),s,len);
411 if ((STRLEN)count != len) {
416 if (PerlIO_flush(PerlIONext(f)) != 0) {
419 if (SvCUR(e->bufsv)) {
420 /* Did not all translate */
421 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
425 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
427 /* if we have any untranslated stuff then unread that first */
428 if (e->dataSV && SvCUR(e->dataSV)) {
429 s = SvPV(e->dataSV, len);
430 count = PerlIO_unread(PerlIONext(f),s,len);
431 if ((STRLEN)count != len) {
435 /* See if there is anything left in the buffer */
436 if (e->base.ptr < e->base.end) {
437 /* Bother - have unread data.
438 re-encode and unread() to layer below
442 str = sv_newmortal();
443 sv_upgrade(str, SVt_PV);
444 SvPVX(str) = (char*)e->base.ptr;
446 SvCUR_set(str, e->base.end - e->base.ptr);
454 if (call_method("encode", G_SCALAR) != 1) {
455 Perl_die(aTHX_ "panic: encode did not return a value");
461 count = PerlIO_unread(PerlIONext(f),s,len);
462 if ((STRLEN)count != len) {
469 e->base.ptr = e->base.end = e->base.buf;
470 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
476 PerlIOEncode_close(pTHX_ PerlIO * f)
478 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
479 IV code = PerlIOBase_close(aTHX_ f);
482 if (e->base.buf && e->base.ptr > e->base.buf) {
483 Perl_croak(aTHX_ "Close with partial character");
485 SvREFCNT_dec(e->bufsv);
491 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
496 PerlIOEncode_tell(pTHX_ PerlIO * f)
498 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
499 /* Unfortunately the only way to get a postion is to (re-)translate,
500 the UTF8 we have in bufefr and then ask layer below
503 if (b->buf && b->ptr > b->buf) {
504 Perl_croak(aTHX_ "Cannot tell at partial character");
506 return PerlIO_tell(PerlIONext(f));
510 PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
511 CLONE_PARAMS * params, int flags)
513 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
514 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
515 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
517 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
524 PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
526 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
527 if (e->flags & NEEDS_LINES) {
529 const char *ptr = (const char *) vbuf;
530 const char *end = ptr+count;
532 const char *nl = ptr;
533 while (nl < end && *nl++ != '\n') /* empty body */;
534 done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr);
535 if (done != nl-ptr) {
542 if (ptr[-1] == '\n') {
543 if (PerlIOEncode_flush(aTHX_ f) != 0) {
548 return (SSize_t) (ptr - (const char *) vbuf);
551 return PerlIOBuf_write(aTHX_ f, vbuf, count);
555 PerlIO_funcs PerlIO_encode = {
557 sizeof(PerlIOEncode),
558 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
576 PerlIOBase_setlinebuf,
577 PerlIOEncode_get_base,
581 PerlIOBuf_set_ptrcnt,
583 #endif /* encode layer */
585 MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
591 SV *chk = get_sv("PerlIO::encoding::check", GV_ADD|GV_ADDMULTI);
593 * we now "use Encode ()" here instead of
594 * PerlIO/encoding.pm. This avoids SEGV when ":encoding()"
595 * is invoked without prior "use Encode". -- dankogai
597 if (!gv_stashpvn("Encode", 6, FALSE)) {
598 Perl_warner(aTHX_ packWARN(WARN_IO), ":encoding without 'use Encode'");
600 /* The SV is magically freed by load_module */
601 load_module(PERL_LOADMOD_NOIMPORT, newSVpvn("Encode", 6), Nullsv, Nullsv);
606 if (call_pv(OUR_DEFAULT_FB, G_SCALAR) != 1) {
607 /* should never happen */
608 Perl_die(aTHX_ "%s did not return a value",OUR_DEFAULT_FB);
614 PerlIO_define_layer(aTHX_ &PerlIO_encode);