Re-instate $PerlIO::encoding::check at boot.
[p5sagit/p5-mst-13.2.git] / ext / PerlIO / encoding / encoding.xs
CommitLineData
918951dd 1/*
c657f685 2 * $Id: encoding.xs,v 0.3 2002/04/21 22:14:41 dankogai Exp $
918951dd 3 */
4
59035dcc 5#define PERL_NO_GET_CONTEXT
6#include "EXTERN.h"
7#include "perl.h"
8#include "XSUB.h"
9#define U8 U8
10
11#if defined(USE_PERLIO) && !defined(USE_SFIO)
12
13/* Define an encoding "layer" in the perliol.h sense.
14
15 The layer defined here "inherits" in an object-oriented sense from
16 the "perlio" layer with its PerlIOBuf_* "methods". The
17 implementation is particularly efficient as until Encode settles
18 down there is no point in tryint to tune it.
19
20 The layer works by overloading the "fill" and "flush" methods.
21
22 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
23 perl API to convert the encoded data to UTF-8 form, then copies it
24 back to the buffer. The "base class's" read methods then see the
25 UTF-8 data.
26
27 "flush" transforms the UTF-8 data deposited by the "base class's
28 write method in the buffer back into the encoded form using the
29 encode OO perl API, then copies data back into the buffer and calls
30 "SUPER::flush.
31
32 Note that "flush" is _also_ called for read mode - we still do the
33 (back)-translate so that the the base class's "flush" sees the
34 correct number of encoded chars for positioning the seek
35 pointer. (This double translation is the worst performance issue -
36 particularly with all-perl encode engine.)
37
38*/
39
40#include "perliol.h"
41
42typedef struct {
43 PerlIOBuf base; /* PerlIOBuf stuff */
44 SV *bufsv; /* buffer seen by layers above */
45 SV *dataSV; /* data we have read from layer below */
46 SV *enc; /* the encoding object */
918951dd 47 SV *chk; /* CHECK in Encode methods */
c00aecee 48 int flags; /* Flags currently just needs lines */
59035dcc 49} PerlIOEncode;
50
c00aecee 51#define NEEDS_LINES 1
54871a3c 52#define OUR_DEFAULT_FB "Encode::FB_QUIET"
918951dd 53
59035dcc 54SV *
55PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
56{
57 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
58 SV *sv = &PL_sv_undef;
59 if (e->enc) {
60 dSP;
61 ENTER;
62 SAVETMPS;
63 PUSHMARK(sp);
64 XPUSHs(e->enc);
65 PUTBACK;
918951dd 66 if (call_method("name", G_SCALAR) == 1) {
59035dcc 67 SPAGAIN;
68 sv = newSVsv(POPs);
69 PUTBACK;
70 }
71 }
72 return sv;
73}
74
75IV
76PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
77{
78 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
79 dSP;
c657f685 80 IV code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
c00aecee 81 SV *result = Nullsv;
c657f685 82
59035dcc 83 ENTER;
84 SAVETMPS;
918951dd 85
86 PUSHMARK(sp);
59035dcc 87 XPUSHs(arg);
88 PUTBACK;
918951dd 89 if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
59035dcc 90 /* should never happen */
91 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
92 return -1;
93 }
94 SPAGAIN;
c00aecee 95 result = POPs;
59035dcc 96 PUTBACK;
918951dd 97
c00aecee 98 if (!SvROK(result) || !SvOBJECT(SvRV(result))) {
59035dcc 99 e->enc = Nullsv;
59035dcc 100 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
54871a3c 101 arg);
c00aecee 102 errno = EINVAL;
59035dcc 103 code = -1;
104 }
105 else {
c00aecee 106#ifdef USE_NEW_SEQUENCE
107 PUSHMARK(sp);
108 XPUSHs(result);
109 PUTBACK;
110 if (call_method("new_sequence",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
111 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support new_sequence",
112 arg);
113 }
114 else {
115 SPAGAIN;
116 result = POPs;
117 PUTBACK;
118 }
119#endif
120 e->enc = newSVsv(result);
121 PUSHMARK(sp);
122 XPUSHs(e->enc);
123 PUTBACK;
124 if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
125 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines",
126 arg);
127 }
128 else {
129 SPAGAIN;
130 result = POPs;
131 PUTBACK;
132 if (SvTRUE(result)) {
133 e->flags |= NEEDS_LINES;
134 }
135 }
59035dcc 136 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
137 }
c00aecee 138
54871a3c 139 e->chk = newSVsv(get_sv("PerlIO::encoding::check", 0));
140
59035dcc 141 FREETMPS;
142 LEAVE;
143 return code;
144}
145
146IV
147PerlIOEncode_popped(pTHX_ PerlIO * f)
148{
149 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
150 if (e->enc) {
151 SvREFCNT_dec(e->enc);
152 e->enc = Nullsv;
153 }
154 if (e->bufsv) {
155 SvREFCNT_dec(e->bufsv);
156 e->bufsv = Nullsv;
157 }
158 if (e->dataSV) {
159 SvREFCNT_dec(e->dataSV);
160 e->dataSV = Nullsv;
161 }
c00aecee 162 if (e->chk) {
163 SvREFCNT_dec(e->chk);
164 e->dataSV = Nullsv;
165 }
59035dcc 166 return 0;
167}
168
169STDCHAR *
170PerlIOEncode_get_base(pTHX_ PerlIO * f)
171{
172 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
173 if (!e->base.bufsiz)
174 e->base.bufsiz = 1024;
175 if (!e->bufsv) {
176 e->bufsv = newSV(e->base.bufsiz);
177 sv_setpvn(e->bufsv, "", 0);
178 }
179 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
180 if (!e->base.ptr)
181 e->base.ptr = e->base.buf;
182 if (!e->base.end)
183 e->base.end = e->base.buf;
184 if (e->base.ptr < e->base.buf
185 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
186 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
187 e->base.buf + SvLEN(e->bufsv));
188 abort();
189 }
190 if (SvLEN(e->bufsv) < e->base.bufsiz) {
191 SSize_t poff = e->base.ptr - e->base.buf;
192 SSize_t eoff = e->base.end - e->base.buf;
193 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
194 e->base.ptr = e->base.buf + poff;
195 e->base.end = e->base.buf + eoff;
196 }
197 if (e->base.ptr < e->base.buf
198 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
199 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
200 e->base.buf + SvLEN(e->bufsv));
201 abort();
202 }
203 return e->base.buf;
204}
205
206IV
207PerlIOEncode_fill(pTHX_ PerlIO * f)
208{
209 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
210 dSP;
211 IV code = 0;
212 PerlIO *n;
213 SSize_t avail;
c657f685 214
59035dcc 215 if (PerlIO_flush(f) != 0)
216 return -1;
217 n = PerlIONext(f);
218 if (!PerlIO_fast_gets(n)) {
219 /* Things get too messy if we don't have a buffer layer
220 push a :perlio to do the job */
221 char mode[8];
222 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
223 if (!n) {
224 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
225 }
226 }
227 ENTER;
228 SAVETMPS;
229 retry:
230 avail = PerlIO_get_cnt(n);
231 if (avail <= 0) {
232 avail = PerlIO_fill(n);
233 if (avail == 0) {
234 avail = PerlIO_get_cnt(n);
235 }
236 else {
237 if (!PerlIO_error(n) && PerlIO_eof(n))
238 avail = 0;
239 }
240 }
c00aecee 241 if (avail > 0 || (e->flags & NEEDS_LINES)) {
59035dcc 242 STDCHAR *ptr = PerlIO_get_ptr(n);
c00aecee 243 SSize_t use = (avail >= 0) ? avail : 0;
59035dcc 244 SV *uni;
245 char *s;
246 STRLEN len = 0;
247 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
248 (void) PerlIOEncode_get_base(aTHX_ f);
249 if (!e->dataSV)
250 e->dataSV = newSV(0);
251 if (SvTYPE(e->dataSV) < SVt_PV) {
252 sv_upgrade(e->dataSV,SVt_PV);
253 }
c00aecee 254 if (e->flags & NEEDS_LINES) {
255 /* Encoding needs whole lines (e.g. iso-2022-*)
256 search back from end of available data for
257 and line marker
258 */
259 STDCHAR *nl = ptr+use-1;
260 while (nl >= ptr) {
261 if (*nl == '\n') {
262 break;
263 }
264 nl--;
265 }
266 if (nl >= ptr && *nl == '\n') {
267 /* found a line - take up to and including that */
268 use = (nl+1)-ptr;
269 }
270 else if (avail > 0) {
271 /* No line, but not EOF - append avail to the pending data */
8994bf69 272 sv_catpvn(e->dataSV, (char*)ptr, use);
c00aecee 273 PerlIO_set_ptrcnt(n, ptr+use, 0);
274 goto retry;
275 }
276 else if (!SvCUR(e->dataSV)) {
277 goto end_of_file;
278 }
279 }
59035dcc 280 if (SvCUR(e->dataSV)) {
281 /* something left over from last time - create a normal
282 SV with new data appended
283 */
284 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
c00aecee 285 if (e->flags & NEEDS_LINES) {
286 /* Have to grow buffer */
287 e->base.bufsiz = use + SvCUR(e->dataSV);
288 PerlIOEncode_get_base(aTHX_ f);
289 }
290 else {
c657f685 291 use = e->base.bufsiz - SvCUR(e->dataSV);
292 }
59035dcc 293 }
294 sv_catpvn(e->dataSV,(char*)ptr,use);
295 }
296 else {
297 /* Create a "dummy" SV to represent the available data from layer below */
298 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
299 Safefree(SvPVX(e->dataSV));
300 }
7c436af3 301 if (use > (SSize_t)e->base.bufsiz) {
c00aecee 302 if (e->flags & NEEDS_LINES) {
303 /* Have to grow buffer */
304 e->base.bufsiz = use;
305 PerlIOEncode_get_base(aTHX_ f);
306 }
307 else {
c657f685 308 use = e->base.bufsiz;
309 }
59035dcc 310 }
311 SvPVX(e->dataSV) = (char *) ptr;
312 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
313 SvCUR_set(e->dataSV,use);
314 SvPOK_only(e->dataSV);
315 }
316 SvUTF8_off(e->dataSV);
317 PUSHMARK(sp);
318 XPUSHs(e->enc);
319 XPUSHs(e->dataSV);
918951dd 320 XPUSHs(e->chk);
59035dcc 321 PUTBACK;
918951dd 322 if (call_method("decode", G_SCALAR) != 1) {
59035dcc 323 Perl_die(aTHX_ "panic: decode did not return a value");
324 }
325 SPAGAIN;
326 uni = POPs;
327 PUTBACK;
328 /* Now get translated string (forced to UTF-8) and use as buffer */
329 if (SvPOK(uni)) {
330 s = SvPVutf8(uni, len);
331#ifdef PARANOID_ENCODE_CHECKS
332 if (len && !is_utf8_string((U8*)s,len)) {
333 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
334 }
335#endif
336 }
337 if (len > 0) {
338 /* Got _something */
339 /* if decode gave us back dataSV then data may vanish when
340 we do ptrcnt adjust - so take our copy now.
341 (The copy is a pain - need a put-it-here option for decode.)
342 */
343 sv_setpvn(e->bufsv,s,len);
344 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
345 e->base.end = e->base.ptr + SvCUR(e->bufsv);
346 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
347 SvUTF8_on(e->bufsv);
348
349 /* Adjust ptr/cnt not taking anything which
350 did not translate - not clear this is a win */
351 /* compute amount we took */
352 use -= SvCUR(e->dataSV);
353 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
354 /* and as we did not take it it isn't pending */
355 SvCUR_set(e->dataSV,0);
356 } else {
357 /* Got nothing - assume partial character so we need some more */
358 /* Make sure e->dataSV is a normal SV before re-filling as
359 buffer alias will change under us
360 */
361 s = SvPV(e->dataSV,len);
362 sv_setpvn(e->dataSV,s,len);
363 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
364 goto retry;
365 }
366 FREETMPS;
367 LEAVE;
368 return code;
369 }
370 else {
c00aecee 371 end_of_file:
59035dcc 372 if (avail == 0)
373 PerlIOBase(f)->flags |= PERLIO_F_EOF;
374 else
375 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
376 return -1;
377 }
378}
379
380IV
381PerlIOEncode_flush(pTHX_ PerlIO * f)
382{
383 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
384 IV code = 0;
c657f685 385
59035dcc 386 if (e->bufsv && (e->base.ptr > e->base.buf)) {
387 dSP;
388 SV *str;
389 char *s;
390 STRLEN len;
391 SSize_t count = 0;
392 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
393 /* Write case encode the buffer and write() to layer below */
394 ENTER;
395 SAVETMPS;
396 PUSHMARK(sp);
397 XPUSHs(e->enc);
398 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
399 SvUTF8_on(e->bufsv);
400 XPUSHs(e->bufsv);
918951dd 401 XPUSHs(e->chk);
59035dcc 402 PUTBACK;
918951dd 403 if (call_method("encode", G_SCALAR) != 1) {
59035dcc 404 Perl_die(aTHX_ "panic: encode did not return a value");
405 }
406 SPAGAIN;
407 str = POPs;
408 PUTBACK;
409 s = SvPV(str, len);
410 count = PerlIO_write(PerlIONext(f),s,len);
7c436af3 411 if ((STRLEN)count != len) {
59035dcc 412 code = -1;
413 }
414 FREETMPS;
415 LEAVE;
416 if (PerlIO_flush(PerlIONext(f)) != 0) {
417 code = -1;
418 }
419 if (SvCUR(e->bufsv)) {
420 /* Did not all translate */
421 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
422 return code;
423 }
424 }
425 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
426 /* read case */
427 /* if we have any untranslated stuff then unread that first */
428 if (e->dataSV && SvCUR(e->dataSV)) {
429 s = SvPV(e->dataSV, len);
430 count = PerlIO_unread(PerlIONext(f),s,len);
7c436af3 431 if ((STRLEN)count != len) {
59035dcc 432 code = -1;
433 }
434 }
435 /* See if there is anything left in the buffer */
436 if (e->base.ptr < e->base.end) {
437 /* Bother - have unread data.
438 re-encode and unread() to layer below
439 */
440 ENTER;
441 SAVETMPS;
442 str = sv_newmortal();
443 sv_upgrade(str, SVt_PV);
444 SvPVX(str) = (char*)e->base.ptr;
445 SvLEN(str) = 0;
446 SvCUR_set(str, e->base.end - e->base.ptr);
447 SvPOK_only(str);
448 SvUTF8_on(str);
449 PUSHMARK(sp);
450 XPUSHs(e->enc);
451 XPUSHs(str);
918951dd 452 XPUSHs(e->chk);
59035dcc 453 PUTBACK;
918951dd 454 if (call_method("encode", G_SCALAR) != 1) {
59035dcc 455 Perl_die(aTHX_ "panic: encode did not return a value");
456 }
457 SPAGAIN;
458 str = POPs;
459 PUTBACK;
460 s = SvPV(str, len);
461 count = PerlIO_unread(PerlIONext(f),s,len);
7c436af3 462 if ((STRLEN)count != len) {
59035dcc 463 code = -1;
464 }
465 FREETMPS;
466 LEAVE;
467 }
468 }
469 e->base.ptr = e->base.end = e->base.buf;
470 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
471 }
472 return code;
473}
474
475IV
476PerlIOEncode_close(pTHX_ PerlIO * f)
477{
478 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
479 IV code = PerlIOBase_close(aTHX_ f);
c657f685 480
59035dcc 481 if (e->bufsv) {
482 if (e->base.buf && e->base.ptr > e->base.buf) {
483 Perl_croak(aTHX_ "Close with partial character");
484 }
485 SvREFCNT_dec(e->bufsv);
486 e->bufsv = Nullsv;
487 }
488 e->base.buf = NULL;
489 e->base.ptr = NULL;
490 e->base.end = NULL;
491 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
492 return code;
493}
494
495Off_t
496PerlIOEncode_tell(pTHX_ PerlIO * f)
497{
498 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
499 /* Unfortunately the only way to get a postion is to (re-)translate,
500 the UTF8 we have in bufefr and then ask layer below
501 */
502 PerlIO_flush(f);
503 if (b->buf && b->ptr > b->buf) {
504 Perl_croak(aTHX_ "Cannot tell at partial character");
505 }
506 return PerlIO_tell(PerlIONext(f));
507}
508
509PerlIO *
510PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
511 CLONE_PARAMS * params, int flags)
512{
513 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
514 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
515 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
516 if (oe->enc) {
517 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
518 }
519 }
520 return f;
521}
522
c00aecee 523SSize_t
524PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
525{
526 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
527 if (e->flags & NEEDS_LINES) {
528 SSize_t done = 0;
529 const char *ptr = (const char *) vbuf;
530 const char *end = ptr+count;
531 while (ptr < end) {
532 const char *nl = ptr;
533 while (nl < end && *nl++ != '\n') /* empty body */;
534 done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr);
535 if (done != nl-ptr) {
536 if (done > 0) {
537 ptr += done;
538 }
539 break;
540 }
541 ptr += done;
542 if (ptr[-1] == '\n') {
543 if (PerlIOEncode_flush(aTHX_ f) != 0) {
544 break;
545 }
546 }
547 }
548 return (SSize_t) (ptr - (const char *) vbuf);
549 }
550 else {
551 return PerlIOBuf_write(aTHX_ f, vbuf, count);
552 }
553}
554
59035dcc 555PerlIO_funcs PerlIO_encode = {
556 "encoding",
557 sizeof(PerlIOEncode),
558 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
559 PerlIOEncode_pushed,
560 PerlIOEncode_popped,
561 PerlIOBuf_open,
562 PerlIOEncode_getarg,
563 PerlIOBase_fileno,
564 PerlIOEncode_dup,
565 PerlIOBuf_read,
566 PerlIOBuf_unread,
c00aecee 567 PerlIOEncode_write,
59035dcc 568 PerlIOBuf_seek,
569 PerlIOEncode_tell,
570 PerlIOEncode_close,
571 PerlIOEncode_flush,
572 PerlIOEncode_fill,
573 PerlIOBase_eof,
574 PerlIOBase_error,
575 PerlIOBase_clearerr,
576 PerlIOBase_setlinebuf,
577 PerlIOEncode_get_base,
578 PerlIOBuf_bufsiz,
579 PerlIOBuf_get_ptr,
580 PerlIOBuf_get_cnt,
581 PerlIOBuf_set_ptrcnt,
582};
583#endif /* encode layer */
584
585MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
586
587PROTOTYPES: ENABLE
588
589BOOT:
590{
54871a3c 591 SV *chk = get_sv("PerlIO::encoding::check", GV_ADD|GV_ADDMULTI);
592 /*
593 * we now "use Encode ()" here instead of
594 * PerlIO/encoding.pm. This avoids SEGV when ":encoding()"
595 * is invoked without prior "use Encode". -- dankogai
596 */
597 if (!gv_stashpvn("Encode", 6, FALSE)) {
598 Perl_warner(aTHX_ packWARN(WARN_IO), ":encoding without 'use Encode'");
599 ENTER;
600 /* The SV is magically freed by load_module */
601 load_module(PERL_LOADMOD_NOIMPORT, newSVpvn("Encode", 6), Nullsv, Nullsv);
602 LEAVE;
603 }
604 PUSHMARK(sp);
605 PUTBACK;
606 if (call_pv(OUR_DEFAULT_FB, G_SCALAR) != 1) {
607 /* should never happen */
608 Perl_die(aTHX_ "%s did not return a value",OUR_DEFAULT_FB);
609 }
610 SPAGAIN;
611 sv_setsv(chk, POPs);
612 PUTBACK;
59035dcc 613#ifdef PERLIO_LAYERS
54871a3c 614 PerlIO_define_layer(aTHX_ &PerlIO_encode);
59035dcc 615#endif
616}