various signed/unsigned mismatch nits
[p5sagit/p5-mst-13.2.git] / ext / PerlIO / encoding / encoding.xs
CommitLineData
918951dd 1/*
c657f685 2 * $Id: encoding.xs,v 0.3 2002/04/21 22:14:41 dankogai Exp $
918951dd 3 */
4
59035dcc 5#define PERL_NO_GET_CONTEXT
6#include "EXTERN.h"
7#include "perl.h"
8#include "XSUB.h"
9#define U8 U8
10
11#if defined(USE_PERLIO) && !defined(USE_SFIO)
12
13/* Define an encoding "layer" in the perliol.h sense.
14
15 The layer defined here "inherits" in an object-oriented sense from
16 the "perlio" layer with its PerlIOBuf_* "methods". The
17 implementation is particularly efficient as until Encode settles
18 down there is no point in tryint to tune it.
19
20 The layer works by overloading the "fill" and "flush" methods.
21
22 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
23 perl API to convert the encoded data to UTF-8 form, then copies it
24 back to the buffer. The "base class's" read methods then see the
25 UTF-8 data.
26
27 "flush" transforms the UTF-8 data deposited by the "base class's
28 write method in the buffer back into the encoded form using the
29 encode OO perl API, then copies data back into the buffer and calls
30 "SUPER::flush.
31
32 Note that "flush" is _also_ called for read mode - we still do the
33 (back)-translate so that the the base class's "flush" sees the
34 correct number of encoded chars for positioning the seek
35 pointer. (This double translation is the worst performance issue -
36 particularly with all-perl encode engine.)
37
38*/
39
40#include "perliol.h"
41
42typedef struct {
43 PerlIOBuf base; /* PerlIOBuf stuff */
44 SV *bufsv; /* buffer seen by layers above */
45 SV *dataSV; /* data we have read from layer below */
46 SV *enc; /* the encoding object */
918951dd 47 SV *chk; /* CHECK in Encode methods */
c00aecee 48 int flags; /* Flags currently just needs lines */
59035dcc 49} PerlIOEncode;
50
c00aecee 51#define NEEDS_LINES 1
918951dd 52
59035dcc 53SV *
54PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
55{
56 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
57 SV *sv = &PL_sv_undef;
58 if (e->enc) {
59 dSP;
60 ENTER;
61 SAVETMPS;
62 PUSHMARK(sp);
63 XPUSHs(e->enc);
64 PUTBACK;
918951dd 65 if (call_method("name", G_SCALAR) == 1) {
59035dcc 66 SPAGAIN;
67 sv = newSVsv(POPs);
68 PUTBACK;
69 }
70 }
71 return sv;
72}
73
74IV
75PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
76{
77 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
78 dSP;
c657f685 79 IV code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
c00aecee 80 SV *result = Nullsv;
c657f685 81
82 /*
83 * we now "use Encode qw(:fallbacks)" here instead of
84 * PerlIO/encoding.pm. This avoids SEGV when ":encoding()"
85 * is invoked without prior "use Encode". -- dankogai
86 */
87 require_pv("Encode.pm");
88
59035dcc 89 ENTER;
90 SAVETMPS;
918951dd 91
92 PUSHMARK(sp);
59035dcc 93 XPUSHs(arg);
94 PUTBACK;
918951dd 95 if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
59035dcc 96 /* should never happen */
97 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
98 return -1;
99 }
100 SPAGAIN;
c00aecee 101 result = POPs;
59035dcc 102 PUTBACK;
918951dd 103
c00aecee 104 if (!SvROK(result) || !SvOBJECT(SvRV(result))) {
59035dcc 105 e->enc = Nullsv;
59035dcc 106 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
c657f685 107 arg);
c00aecee 108 errno = EINVAL;
59035dcc 109 code = -1;
110 }
111 else {
c00aecee 112#ifdef USE_NEW_SEQUENCE
113 PUSHMARK(sp);
114 XPUSHs(result);
115 PUTBACK;
116 if (call_method("new_sequence",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
117 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support new_sequence",
118 arg);
119 }
120 else {
121 SPAGAIN;
122 result = POPs;
123 PUTBACK;
124 }
125#endif
126 e->enc = newSVsv(result);
127 PUSHMARK(sp);
128 XPUSHs(e->enc);
129 PUTBACK;
130 if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
131 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines",
132 arg);
133 }
134 else {
135 SPAGAIN;
136 result = POPs;
137 PUTBACK;
138 if (SvTRUE(result)) {
139 e->flags |= NEEDS_LINES;
140 }
141 }
59035dcc 142 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
143 }
c00aecee 144
c657f685 145 if (SvIV(result = get_sv("PerlIO::encoding::check", 1)) == 0){
146 PUSHMARK(sp);
147 PUTBACK;
f33abd47 148 if (call_pv("Encode::FB_QUIET", G_SCALAR) != 1) {
c657f685 149 /* should never happen */
150 Perl_die(aTHX_ "Encode::FB_QUIET did not return a value");
151 return -1;
152 }
153 SPAGAIN;
154 e->chk = newSVsv(POPs);
155 PUTBACK;
156 sv_setsv(result, e->chk);
157 }else{
158 e->chk = newSVsv(result);
159 }
59035dcc 160 FREETMPS;
161 LEAVE;
162 return code;
163}
164
165IV
166PerlIOEncode_popped(pTHX_ PerlIO * f)
167{
168 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
169 if (e->enc) {
170 SvREFCNT_dec(e->enc);
171 e->enc = Nullsv;
172 }
173 if (e->bufsv) {
174 SvREFCNT_dec(e->bufsv);
175 e->bufsv = Nullsv;
176 }
177 if (e->dataSV) {
178 SvREFCNT_dec(e->dataSV);
179 e->dataSV = Nullsv;
180 }
c00aecee 181 if (e->chk) {
182 SvREFCNT_dec(e->chk);
183 e->dataSV = Nullsv;
184 }
59035dcc 185 return 0;
186}
187
188STDCHAR *
189PerlIOEncode_get_base(pTHX_ PerlIO * f)
190{
191 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
192 if (!e->base.bufsiz)
193 e->base.bufsiz = 1024;
194 if (!e->bufsv) {
195 e->bufsv = newSV(e->base.bufsiz);
196 sv_setpvn(e->bufsv, "", 0);
197 }
198 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
199 if (!e->base.ptr)
200 e->base.ptr = e->base.buf;
201 if (!e->base.end)
202 e->base.end = e->base.buf;
203 if (e->base.ptr < e->base.buf
204 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
205 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
206 e->base.buf + SvLEN(e->bufsv));
207 abort();
208 }
209 if (SvLEN(e->bufsv) < e->base.bufsiz) {
210 SSize_t poff = e->base.ptr - e->base.buf;
211 SSize_t eoff = e->base.end - e->base.buf;
212 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
213 e->base.ptr = e->base.buf + poff;
214 e->base.end = e->base.buf + eoff;
215 }
216 if (e->base.ptr < e->base.buf
217 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
218 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
219 e->base.buf + SvLEN(e->bufsv));
220 abort();
221 }
222 return e->base.buf;
223}
224
225IV
226PerlIOEncode_fill(pTHX_ PerlIO * f)
227{
228 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
229 dSP;
230 IV code = 0;
231 PerlIO *n;
232 SSize_t avail;
c657f685 233
59035dcc 234 if (PerlIO_flush(f) != 0)
235 return -1;
236 n = PerlIONext(f);
237 if (!PerlIO_fast_gets(n)) {
238 /* Things get too messy if we don't have a buffer layer
239 push a :perlio to do the job */
240 char mode[8];
241 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
242 if (!n) {
243 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
244 }
245 }
246 ENTER;
247 SAVETMPS;
248 retry:
249 avail = PerlIO_get_cnt(n);
250 if (avail <= 0) {
251 avail = PerlIO_fill(n);
252 if (avail == 0) {
253 avail = PerlIO_get_cnt(n);
254 }
255 else {
256 if (!PerlIO_error(n) && PerlIO_eof(n))
257 avail = 0;
258 }
259 }
c00aecee 260 if (avail > 0 || (e->flags & NEEDS_LINES)) {
59035dcc 261 STDCHAR *ptr = PerlIO_get_ptr(n);
c00aecee 262 SSize_t use = (avail >= 0) ? avail : 0;
59035dcc 263 SV *uni;
264 char *s;
265 STRLEN len = 0;
266 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
267 (void) PerlIOEncode_get_base(aTHX_ f);
268 if (!e->dataSV)
269 e->dataSV = newSV(0);
270 if (SvTYPE(e->dataSV) < SVt_PV) {
271 sv_upgrade(e->dataSV,SVt_PV);
272 }
c00aecee 273 if (e->flags & NEEDS_LINES) {
274 /* Encoding needs whole lines (e.g. iso-2022-*)
275 search back from end of available data for
276 and line marker
277 */
278 STDCHAR *nl = ptr+use-1;
279 while (nl >= ptr) {
280 if (*nl == '\n') {
281 break;
282 }
283 nl--;
284 }
285 if (nl >= ptr && *nl == '\n') {
286 /* found a line - take up to and including that */
287 use = (nl+1)-ptr;
288 }
289 else if (avail > 0) {
290 /* No line, but not EOF - append avail to the pending data */
8994bf69 291 sv_catpvn(e->dataSV, (char*)ptr, use);
c00aecee 292 PerlIO_set_ptrcnt(n, ptr+use, 0);
293 goto retry;
294 }
295 else if (!SvCUR(e->dataSV)) {
296 goto end_of_file;
297 }
298 }
59035dcc 299 if (SvCUR(e->dataSV)) {
300 /* something left over from last time - create a normal
301 SV with new data appended
302 */
303 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
c00aecee 304 if (e->flags & NEEDS_LINES) {
305 /* Have to grow buffer */
306 e->base.bufsiz = use + SvCUR(e->dataSV);
307 PerlIOEncode_get_base(aTHX_ f);
308 }
309 else {
c657f685 310 use = e->base.bufsiz - SvCUR(e->dataSV);
311 }
59035dcc 312 }
313 sv_catpvn(e->dataSV,(char*)ptr,use);
314 }
315 else {
316 /* Create a "dummy" SV to represent the available data from layer below */
317 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
318 Safefree(SvPVX(e->dataSV));
319 }
7c436af3 320 if (use > (SSize_t)e->base.bufsiz) {
c00aecee 321 if (e->flags & NEEDS_LINES) {
322 /* Have to grow buffer */
323 e->base.bufsiz = use;
324 PerlIOEncode_get_base(aTHX_ f);
325 }
326 else {
c657f685 327 use = e->base.bufsiz;
328 }
59035dcc 329 }
330 SvPVX(e->dataSV) = (char *) ptr;
331 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
332 SvCUR_set(e->dataSV,use);
333 SvPOK_only(e->dataSV);
334 }
335 SvUTF8_off(e->dataSV);
336 PUSHMARK(sp);
337 XPUSHs(e->enc);
338 XPUSHs(e->dataSV);
918951dd 339 XPUSHs(e->chk);
59035dcc 340 PUTBACK;
918951dd 341 if (call_method("decode", G_SCALAR) != 1) {
59035dcc 342 Perl_die(aTHX_ "panic: decode did not return a value");
343 }
344 SPAGAIN;
345 uni = POPs;
346 PUTBACK;
347 /* Now get translated string (forced to UTF-8) and use as buffer */
348 if (SvPOK(uni)) {
349 s = SvPVutf8(uni, len);
350#ifdef PARANOID_ENCODE_CHECKS
351 if (len && !is_utf8_string((U8*)s,len)) {
352 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
353 }
354#endif
355 }
356 if (len > 0) {
357 /* Got _something */
358 /* if decode gave us back dataSV then data may vanish when
359 we do ptrcnt adjust - so take our copy now.
360 (The copy is a pain - need a put-it-here option for decode.)
361 */
362 sv_setpvn(e->bufsv,s,len);
363 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
364 e->base.end = e->base.ptr + SvCUR(e->bufsv);
365 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
366 SvUTF8_on(e->bufsv);
367
368 /* Adjust ptr/cnt not taking anything which
369 did not translate - not clear this is a win */
370 /* compute amount we took */
371 use -= SvCUR(e->dataSV);
372 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
373 /* and as we did not take it it isn't pending */
374 SvCUR_set(e->dataSV,0);
375 } else {
376 /* Got nothing - assume partial character so we need some more */
377 /* Make sure e->dataSV is a normal SV before re-filling as
378 buffer alias will change under us
379 */
380 s = SvPV(e->dataSV,len);
381 sv_setpvn(e->dataSV,s,len);
382 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
383 goto retry;
384 }
385 FREETMPS;
386 LEAVE;
387 return code;
388 }
389 else {
c00aecee 390 end_of_file:
59035dcc 391 if (avail == 0)
392 PerlIOBase(f)->flags |= PERLIO_F_EOF;
393 else
394 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
395 return -1;
396 }
397}
398
399IV
400PerlIOEncode_flush(pTHX_ PerlIO * f)
401{
402 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
403 IV code = 0;
c657f685 404
59035dcc 405 if (e->bufsv && (e->base.ptr > e->base.buf)) {
406 dSP;
407 SV *str;
408 char *s;
409 STRLEN len;
410 SSize_t count = 0;
411 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
412 /* Write case encode the buffer and write() to layer below */
413 ENTER;
414 SAVETMPS;
415 PUSHMARK(sp);
416 XPUSHs(e->enc);
417 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
418 SvUTF8_on(e->bufsv);
419 XPUSHs(e->bufsv);
918951dd 420 XPUSHs(e->chk);
59035dcc 421 PUTBACK;
918951dd 422 if (call_method("encode", G_SCALAR) != 1) {
59035dcc 423 Perl_die(aTHX_ "panic: encode did not return a value");
424 }
425 SPAGAIN;
426 str = POPs;
427 PUTBACK;
428 s = SvPV(str, len);
429 count = PerlIO_write(PerlIONext(f),s,len);
7c436af3 430 if ((STRLEN)count != len) {
59035dcc 431 code = -1;
432 }
433 FREETMPS;
434 LEAVE;
435 if (PerlIO_flush(PerlIONext(f)) != 0) {
436 code = -1;
437 }
438 if (SvCUR(e->bufsv)) {
439 /* Did not all translate */
440 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
441 return code;
442 }
443 }
444 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
445 /* read case */
446 /* if we have any untranslated stuff then unread that first */
447 if (e->dataSV && SvCUR(e->dataSV)) {
448 s = SvPV(e->dataSV, len);
449 count = PerlIO_unread(PerlIONext(f),s,len);
7c436af3 450 if ((STRLEN)count != len) {
59035dcc 451 code = -1;
452 }
453 }
454 /* See if there is anything left in the buffer */
455 if (e->base.ptr < e->base.end) {
456 /* Bother - have unread data.
457 re-encode and unread() to layer below
458 */
459 ENTER;
460 SAVETMPS;
461 str = sv_newmortal();
462 sv_upgrade(str, SVt_PV);
463 SvPVX(str) = (char*)e->base.ptr;
464 SvLEN(str) = 0;
465 SvCUR_set(str, e->base.end - e->base.ptr);
466 SvPOK_only(str);
467 SvUTF8_on(str);
468 PUSHMARK(sp);
469 XPUSHs(e->enc);
470 XPUSHs(str);
918951dd 471 XPUSHs(e->chk);
59035dcc 472 PUTBACK;
918951dd 473 if (call_method("encode", G_SCALAR) != 1) {
59035dcc 474 Perl_die(aTHX_ "panic: encode did not return a value");
475 }
476 SPAGAIN;
477 str = POPs;
478 PUTBACK;
479 s = SvPV(str, len);
480 count = PerlIO_unread(PerlIONext(f),s,len);
7c436af3 481 if ((STRLEN)count != len) {
59035dcc 482 code = -1;
483 }
484 FREETMPS;
485 LEAVE;
486 }
487 }
488 e->base.ptr = e->base.end = e->base.buf;
489 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
490 }
491 return code;
492}
493
494IV
495PerlIOEncode_close(pTHX_ PerlIO * f)
496{
497 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
498 IV code = PerlIOBase_close(aTHX_ f);
c657f685 499
59035dcc 500 if (e->bufsv) {
501 if (e->base.buf && e->base.ptr > e->base.buf) {
502 Perl_croak(aTHX_ "Close with partial character");
503 }
504 SvREFCNT_dec(e->bufsv);
505 e->bufsv = Nullsv;
506 }
507 e->base.buf = NULL;
508 e->base.ptr = NULL;
509 e->base.end = NULL;
510 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
511 return code;
512}
513
514Off_t
515PerlIOEncode_tell(pTHX_ PerlIO * f)
516{
517 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
518 /* Unfortunately the only way to get a postion is to (re-)translate,
519 the UTF8 we have in bufefr and then ask layer below
520 */
521 PerlIO_flush(f);
522 if (b->buf && b->ptr > b->buf) {
523 Perl_croak(aTHX_ "Cannot tell at partial character");
524 }
525 return PerlIO_tell(PerlIONext(f));
526}
527
528PerlIO *
529PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
530 CLONE_PARAMS * params, int flags)
531{
532 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
533 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
534 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
535 if (oe->enc) {
536 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
537 }
538 }
539 return f;
540}
541
c00aecee 542SSize_t
543PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
544{
545 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
546 if (e->flags & NEEDS_LINES) {
547 SSize_t done = 0;
548 const char *ptr = (const char *) vbuf;
549 const char *end = ptr+count;
550 while (ptr < end) {
551 const char *nl = ptr;
552 while (nl < end && *nl++ != '\n') /* empty body */;
553 done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr);
554 if (done != nl-ptr) {
555 if (done > 0) {
556 ptr += done;
557 }
558 break;
559 }
560 ptr += done;
561 if (ptr[-1] == '\n') {
562 if (PerlIOEncode_flush(aTHX_ f) != 0) {
563 break;
564 }
565 }
566 }
567 return (SSize_t) (ptr - (const char *) vbuf);
568 }
569 else {
570 return PerlIOBuf_write(aTHX_ f, vbuf, count);
571 }
572}
573
59035dcc 574PerlIO_funcs PerlIO_encode = {
575 "encoding",
576 sizeof(PerlIOEncode),
577 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
578 PerlIOEncode_pushed,
579 PerlIOEncode_popped,
580 PerlIOBuf_open,
581 PerlIOEncode_getarg,
582 PerlIOBase_fileno,
583 PerlIOEncode_dup,
584 PerlIOBuf_read,
585 PerlIOBuf_unread,
c00aecee 586 PerlIOEncode_write,
59035dcc 587 PerlIOBuf_seek,
588 PerlIOEncode_tell,
589 PerlIOEncode_close,
590 PerlIOEncode_flush,
591 PerlIOEncode_fill,
592 PerlIOBase_eof,
593 PerlIOBase_error,
594 PerlIOBase_clearerr,
595 PerlIOBase_setlinebuf,
596 PerlIOEncode_get_base,
597 PerlIOBuf_bufsiz,
598 PerlIOBuf_get_ptr,
599 PerlIOBuf_get_cnt,
600 PerlIOBuf_set_ptrcnt,
601};
602#endif /* encode layer */
603
604MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
605
606PROTOTYPES: ENABLE
607
608BOOT:
609{
610#ifdef PERLIO_LAYERS
611 PerlIO_define_layer(aTHX_ &PerlIO_encode);
612#endif
613}