Upgrade to Encode 1.56, from Dan Kogai.
[p5sagit/p5-mst-13.2.git] / ext / PerlIO / encoding / encoding.xs
CommitLineData
918951dd 1/*
2 * $Id$
3 */
4
59035dcc 5#define PERL_NO_GET_CONTEXT
6#include "EXTERN.h"
7#include "perl.h"
8#include "XSUB.h"
9#define U8 U8
10
11#if defined(USE_PERLIO) && !defined(USE_SFIO)
12
13/* Define an encoding "layer" in the perliol.h sense.
14
15 The layer defined here "inherits" in an object-oriented sense from
16 the "perlio" layer with its PerlIOBuf_* "methods". The
17 implementation is particularly efficient as until Encode settles
18 down there is no point in tryint to tune it.
19
20 The layer works by overloading the "fill" and "flush" methods.
21
22 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
23 perl API to convert the encoded data to UTF-8 form, then copies it
24 back to the buffer. The "base class's" read methods then see the
25 UTF-8 data.
26
27 "flush" transforms the UTF-8 data deposited by the "base class's
28 write method in the buffer back into the encoded form using the
29 encode OO perl API, then copies data back into the buffer and calls
30 "SUPER::flush.
31
32 Note that "flush" is _also_ called for read mode - we still do the
33 (back)-translate so that the the base class's "flush" sees the
34 correct number of encoded chars for positioning the seek
35 pointer. (This double translation is the worst performance issue -
36 particularly with all-perl encode engine.)
37
38*/
39
40#include "perliol.h"
41
42typedef struct {
43 PerlIOBuf base; /* PerlIOBuf stuff */
44 SV *bufsv; /* buffer seen by layers above */
45 SV *dataSV; /* data we have read from layer below */
46 SV *enc; /* the encoding object */
918951dd 47 SV *chk; /* CHECK in Encode methods */
c00aecee 48 int flags; /* Flags currently just needs lines */
59035dcc 49} PerlIOEncode;
50
c00aecee 51#define NEEDS_LINES 1
918951dd 52
c00aecee 53#if 0
54#define OUR_ENCODE_FB "Encode::FB_PERLQQ"
55#else
56#define OUR_ENCODE_FB "Encode::FB_QUIET"
57#endif
918951dd 58
59035dcc 59SV *
60PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
61{
62 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
63 SV *sv = &PL_sv_undef;
64 if (e->enc) {
65 dSP;
66 ENTER;
67 SAVETMPS;
68 PUSHMARK(sp);
69 XPUSHs(e->enc);
70 PUTBACK;
918951dd 71 if (call_method("name", G_SCALAR) == 1) {
59035dcc 72 SPAGAIN;
73 sv = newSVsv(POPs);
74 PUTBACK;
75 }
76 }
77 return sv;
78}
79
80IV
81PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
82{
83 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
84 dSP;
85 IV code;
c00aecee 86 SV *result = Nullsv;
59035dcc 87 code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
88 ENTER;
89 SAVETMPS;
918951dd 90
91 PUSHMARK(sp);
59035dcc 92 XPUSHs(arg);
93 PUTBACK;
918951dd 94 if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
59035dcc 95 /* should never happen */
96 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
97 return -1;
98 }
99 SPAGAIN;
c00aecee 100 result = POPs;
59035dcc 101 PUTBACK;
918951dd 102
c00aecee 103 if (!SvROK(result) || !SvOBJECT(SvRV(result))) {
59035dcc 104 e->enc = Nullsv;
59035dcc 105 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
c00aecee 106 arg);
107 errno = EINVAL;
59035dcc 108 code = -1;
109 }
110 else {
c00aecee 111#ifdef USE_NEW_SEQUENCE
112 PUSHMARK(sp);
113 XPUSHs(result);
114 PUTBACK;
115 if (call_method("new_sequence",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
116 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support new_sequence",
117 arg);
118 }
119 else {
120 SPAGAIN;
121 result = POPs;
122 PUTBACK;
123 }
124#endif
125 e->enc = newSVsv(result);
126 PUSHMARK(sp);
127 XPUSHs(e->enc);
128 PUTBACK;
129 if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
130 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines",
131 arg);
132 }
133 else {
134 SPAGAIN;
135 result = POPs;
136 PUTBACK;
137 if (SvTRUE(result)) {
138 e->flags |= NEEDS_LINES;
139 }
140 }
59035dcc 141 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
142 }
c00aecee 143
144 e->chk = newSVsv(get_sv("PerlIO::encoding::check",0));
145
59035dcc 146 FREETMPS;
147 LEAVE;
148 return code;
149}
150
151IV
152PerlIOEncode_popped(pTHX_ PerlIO * f)
153{
154 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
155 if (e->enc) {
156 SvREFCNT_dec(e->enc);
157 e->enc = Nullsv;
158 }
159 if (e->bufsv) {
160 SvREFCNT_dec(e->bufsv);
161 e->bufsv = Nullsv;
162 }
163 if (e->dataSV) {
164 SvREFCNT_dec(e->dataSV);
165 e->dataSV = Nullsv;
166 }
c00aecee 167 if (e->chk) {
168 SvREFCNT_dec(e->chk);
169 e->dataSV = Nullsv;
170 }
59035dcc 171 return 0;
172}
173
174STDCHAR *
175PerlIOEncode_get_base(pTHX_ PerlIO * f)
176{
177 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
178 if (!e->base.bufsiz)
179 e->base.bufsiz = 1024;
180 if (!e->bufsv) {
181 e->bufsv = newSV(e->base.bufsiz);
182 sv_setpvn(e->bufsv, "", 0);
183 }
184 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
185 if (!e->base.ptr)
186 e->base.ptr = e->base.buf;
187 if (!e->base.end)
188 e->base.end = e->base.buf;
189 if (e->base.ptr < e->base.buf
190 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
191 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
192 e->base.buf + SvLEN(e->bufsv));
193 abort();
194 }
195 if (SvLEN(e->bufsv) < e->base.bufsiz) {
196 SSize_t poff = e->base.ptr - e->base.buf;
197 SSize_t eoff = e->base.end - e->base.buf;
198 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
199 e->base.ptr = e->base.buf + poff;
200 e->base.end = e->base.buf + eoff;
201 }
202 if (e->base.ptr < e->base.buf
203 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
204 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
205 e->base.buf + SvLEN(e->bufsv));
206 abort();
207 }
208 return e->base.buf;
209}
210
211IV
212PerlIOEncode_fill(pTHX_ PerlIO * f)
213{
214 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
215 dSP;
216 IV code = 0;
217 PerlIO *n;
218 SSize_t avail;
219 if (PerlIO_flush(f) != 0)
220 return -1;
221 n = PerlIONext(f);
222 if (!PerlIO_fast_gets(n)) {
223 /* Things get too messy if we don't have a buffer layer
224 push a :perlio to do the job */
225 char mode[8];
226 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
227 if (!n) {
228 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
229 }
230 }
231 ENTER;
232 SAVETMPS;
233 retry:
234 avail = PerlIO_get_cnt(n);
235 if (avail <= 0) {
236 avail = PerlIO_fill(n);
237 if (avail == 0) {
238 avail = PerlIO_get_cnt(n);
239 }
240 else {
241 if (!PerlIO_error(n) && PerlIO_eof(n))
242 avail = 0;
243 }
244 }
c00aecee 245 if (avail > 0 || (e->flags & NEEDS_LINES)) {
59035dcc 246 STDCHAR *ptr = PerlIO_get_ptr(n);
c00aecee 247 SSize_t use = (avail >= 0) ? avail : 0;
59035dcc 248 SV *uni;
249 char *s;
250 STRLEN len = 0;
251 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
252 (void) PerlIOEncode_get_base(aTHX_ f);
253 if (!e->dataSV)
254 e->dataSV = newSV(0);
255 if (SvTYPE(e->dataSV) < SVt_PV) {
256 sv_upgrade(e->dataSV,SVt_PV);
257 }
c00aecee 258 if (e->flags & NEEDS_LINES) {
259 /* Encoding needs whole lines (e.g. iso-2022-*)
260 search back from end of available data for
261 and line marker
262 */
263 STDCHAR *nl = ptr+use-1;
264 while (nl >= ptr) {
265 if (*nl == '\n') {
266 break;
267 }
268 nl--;
269 }
270 if (nl >= ptr && *nl == '\n') {
271 /* found a line - take up to and including that */
272 use = (nl+1)-ptr;
273 }
274 else if (avail > 0) {
275 /* No line, but not EOF - append avail to the pending data */
276 sv_catpvn(e->dataSV, ptr, use);
277 PerlIO_set_ptrcnt(n, ptr+use, 0);
278 goto retry;
279 }
280 else if (!SvCUR(e->dataSV)) {
281 goto end_of_file;
282 }
283 }
59035dcc 284 if (SvCUR(e->dataSV)) {
285 /* something left over from last time - create a normal
286 SV with new data appended
287 */
288 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
c00aecee 289 if (e->flags & NEEDS_LINES) {
290 /* Have to grow buffer */
291 e->base.bufsiz = use + SvCUR(e->dataSV);
292 PerlIOEncode_get_base(aTHX_ f);
293 }
294 else {
295 use = e->base.bufsiz - SvCUR(e->dataSV);
296 }
59035dcc 297 }
298 sv_catpvn(e->dataSV,(char*)ptr,use);
299 }
300 else {
301 /* Create a "dummy" SV to represent the available data from layer below */
302 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
303 Safefree(SvPVX(e->dataSV));
304 }
91f3b821 305 if (use > (SSize_t)e->base.bufsiz) {
c00aecee 306 if (e->flags & NEEDS_LINES) {
307 /* Have to grow buffer */
308 e->base.bufsiz = use;
309 PerlIOEncode_get_base(aTHX_ f);
310 }
311 else {
312 use = e->base.bufsiz;
313 }
59035dcc 314 }
315 SvPVX(e->dataSV) = (char *) ptr;
316 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
317 SvCUR_set(e->dataSV,use);
318 SvPOK_only(e->dataSV);
319 }
320 SvUTF8_off(e->dataSV);
321 PUSHMARK(sp);
322 XPUSHs(e->enc);
323 XPUSHs(e->dataSV);
918951dd 324 XPUSHs(e->chk);
59035dcc 325 PUTBACK;
918951dd 326 if (call_method("decode", G_SCALAR) != 1) {
59035dcc 327 Perl_die(aTHX_ "panic: decode did not return a value");
328 }
329 SPAGAIN;
330 uni = POPs;
331 PUTBACK;
332 /* Now get translated string (forced to UTF-8) and use as buffer */
333 if (SvPOK(uni)) {
334 s = SvPVutf8(uni, len);
335#ifdef PARANOID_ENCODE_CHECKS
336 if (len && !is_utf8_string((U8*)s,len)) {
337 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
338 }
339#endif
340 }
341 if (len > 0) {
342 /* Got _something */
343 /* if decode gave us back dataSV then data may vanish when
344 we do ptrcnt adjust - so take our copy now.
345 (The copy is a pain - need a put-it-here option for decode.)
346 */
347 sv_setpvn(e->bufsv,s,len);
348 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
349 e->base.end = e->base.ptr + SvCUR(e->bufsv);
350 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
351 SvUTF8_on(e->bufsv);
352
353 /* Adjust ptr/cnt not taking anything which
354 did not translate - not clear this is a win */
355 /* compute amount we took */
356 use -= SvCUR(e->dataSV);
357 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
358 /* and as we did not take it it isn't pending */
359 SvCUR_set(e->dataSV,0);
360 } else {
361 /* Got nothing - assume partial character so we need some more */
362 /* Make sure e->dataSV is a normal SV before re-filling as
363 buffer alias will change under us
364 */
365 s = SvPV(e->dataSV,len);
366 sv_setpvn(e->dataSV,s,len);
367 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
368 goto retry;
369 }
370 FREETMPS;
371 LEAVE;
372 return code;
373 }
374 else {
c00aecee 375 end_of_file:
59035dcc 376 if (avail == 0)
377 PerlIOBase(f)->flags |= PERLIO_F_EOF;
378 else
379 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
380 return -1;
381 }
382}
383
384IV
385PerlIOEncode_flush(pTHX_ PerlIO * f)
386{
387 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
388 IV code = 0;
389 if (e->bufsv && (e->base.ptr > e->base.buf)) {
390 dSP;
391 SV *str;
392 char *s;
393 STRLEN len;
394 SSize_t count = 0;
395 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
396 /* Write case encode the buffer and write() to layer below */
397 ENTER;
398 SAVETMPS;
399 PUSHMARK(sp);
400 XPUSHs(e->enc);
401 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
402 SvUTF8_on(e->bufsv);
403 XPUSHs(e->bufsv);
918951dd 404 XPUSHs(e->chk);
59035dcc 405 PUTBACK;
918951dd 406 if (call_method("encode", G_SCALAR) != 1) {
59035dcc 407 Perl_die(aTHX_ "panic: encode did not return a value");
408 }
409 SPAGAIN;
410 str = POPs;
411 PUTBACK;
412 s = SvPV(str, len);
413 count = PerlIO_write(PerlIONext(f),s,len);
414 if (count != len) {
415 code = -1;
416 }
417 FREETMPS;
418 LEAVE;
419 if (PerlIO_flush(PerlIONext(f)) != 0) {
420 code = -1;
421 }
422 if (SvCUR(e->bufsv)) {
423 /* Did not all translate */
424 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
425 return code;
426 }
427 }
428 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
429 /* read case */
430 /* if we have any untranslated stuff then unread that first */
431 if (e->dataSV && SvCUR(e->dataSV)) {
432 s = SvPV(e->dataSV, len);
433 count = PerlIO_unread(PerlIONext(f),s,len);
434 if (count != len) {
435 code = -1;
436 }
437 }
438 /* See if there is anything left in the buffer */
439 if (e->base.ptr < e->base.end) {
440 /* Bother - have unread data.
441 re-encode and unread() to layer below
442 */
443 ENTER;
444 SAVETMPS;
445 str = sv_newmortal();
446 sv_upgrade(str, SVt_PV);
447 SvPVX(str) = (char*)e->base.ptr;
448 SvLEN(str) = 0;
449 SvCUR_set(str, e->base.end - e->base.ptr);
450 SvPOK_only(str);
451 SvUTF8_on(str);
452 PUSHMARK(sp);
453 XPUSHs(e->enc);
454 XPUSHs(str);
918951dd 455 XPUSHs(e->chk);
59035dcc 456 PUTBACK;
918951dd 457 if (call_method("encode", G_SCALAR) != 1) {
59035dcc 458 Perl_die(aTHX_ "panic: encode did not return a value");
459 }
460 SPAGAIN;
461 str = POPs;
462 PUTBACK;
463 s = SvPV(str, len);
464 count = PerlIO_unread(PerlIONext(f),s,len);
465 if (count != len) {
466 code = -1;
467 }
468 FREETMPS;
469 LEAVE;
470 }
471 }
472 e->base.ptr = e->base.end = e->base.buf;
473 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
474 }
475 return code;
476}
477
478IV
479PerlIOEncode_close(pTHX_ PerlIO * f)
480{
481 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
482 IV code = PerlIOBase_close(aTHX_ f);
483 if (e->bufsv) {
484 if (e->base.buf && e->base.ptr > e->base.buf) {
485 Perl_croak(aTHX_ "Close with partial character");
486 }
487 SvREFCNT_dec(e->bufsv);
488 e->bufsv = Nullsv;
489 }
490 e->base.buf = NULL;
491 e->base.ptr = NULL;
492 e->base.end = NULL;
493 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
494 return code;
495}
496
497Off_t
498PerlIOEncode_tell(pTHX_ PerlIO * f)
499{
500 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
501 /* Unfortunately the only way to get a postion is to (re-)translate,
502 the UTF8 we have in bufefr and then ask layer below
503 */
504 PerlIO_flush(f);
505 if (b->buf && b->ptr > b->buf) {
506 Perl_croak(aTHX_ "Cannot tell at partial character");
507 }
508 return PerlIO_tell(PerlIONext(f));
509}
510
511PerlIO *
512PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
513 CLONE_PARAMS * params, int flags)
514{
515 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
516 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
517 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
518 if (oe->enc) {
519 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
520 }
521 }
522 return f;
523}
524
c00aecee 525SSize_t
526PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
527{
528 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
529 if (e->flags & NEEDS_LINES) {
530 SSize_t done = 0;
531 const char *ptr = (const char *) vbuf;
532 const char *end = ptr+count;
533 while (ptr < end) {
534 const char *nl = ptr;
535 while (nl < end && *nl++ != '\n') /* empty body */;
536 done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr);
537 if (done != nl-ptr) {
538 if (done > 0) {
539 ptr += done;
540 }
541 break;
542 }
543 ptr += done;
544 if (ptr[-1] == '\n') {
545 if (PerlIOEncode_flush(aTHX_ f) != 0) {
546 break;
547 }
548 }
549 }
550 return (SSize_t) (ptr - (const char *) vbuf);
551 }
552 else {
553 return PerlIOBuf_write(aTHX_ f, vbuf, count);
554 }
555}
556
59035dcc 557PerlIO_funcs PerlIO_encode = {
558 "encoding",
559 sizeof(PerlIOEncode),
560 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
561 PerlIOEncode_pushed,
562 PerlIOEncode_popped,
563 PerlIOBuf_open,
564 PerlIOEncode_getarg,
565 PerlIOBase_fileno,
566 PerlIOEncode_dup,
567 PerlIOBuf_read,
568 PerlIOBuf_unread,
c00aecee 569 PerlIOEncode_write,
59035dcc 570 PerlIOBuf_seek,
571 PerlIOEncode_tell,
572 PerlIOEncode_close,
573 PerlIOEncode_flush,
574 PerlIOEncode_fill,
575 PerlIOBase_eof,
576 PerlIOBase_error,
577 PerlIOBase_clearerr,
578 PerlIOBase_setlinebuf,
579 PerlIOEncode_get_base,
580 PerlIOBuf_bufsiz,
581 PerlIOBuf_get_ptr,
582 PerlIOBuf_get_cnt,
583 PerlIOBuf_set_ptrcnt,
584};
585#endif /* encode layer */
586
587MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
588
589PROTOTYPES: ENABLE
590
591BOOT:
592{
c00aecee 593 SV *sv = get_sv("PerlIO::encoding::check", GV_ADD|GV_ADDMULTI);
594 sv_setiv(sv,0);
595 PUSHMARK(sp);
596 PUTBACK;
597 if (call_pv(OUR_ENCODE_FB, G_SCALAR) != 1) {
598 Perl_warner(aTHX_ packWARN(WARN_IO),
599 "Call to %s failed!",OUR_ENCODE_FB);
600 }
601 else {
602 SPAGAIN;
603 sv_setsv(sv,POPs);
604 PUTBACK;
605 }
59035dcc 606#ifdef PERLIO_LAYERS
607 PerlIO_define_layer(aTHX_ &PerlIO_encode);
608#endif
609}