Upgrade to PerlIO::encoding 0.02, from Dan Kogai.
[p5sagit/p5-mst-13.2.git] / ext / PerlIO / encoding / encoding.xs
CommitLineData
918951dd 1/*
2 * $Id$
3 */
4
59035dcc 5#define PERL_NO_GET_CONTEXT
6#include "EXTERN.h"
7#include "perl.h"
8#include "XSUB.h"
9#define U8 U8
10
11#if defined(USE_PERLIO) && !defined(USE_SFIO)
12
13/* Define an encoding "layer" in the perliol.h sense.
14
15 The layer defined here "inherits" in an object-oriented sense from
16 the "perlio" layer with its PerlIOBuf_* "methods". The
17 implementation is particularly efficient as until Encode settles
18 down there is no point in tryint to tune it.
19
20 The layer works by overloading the "fill" and "flush" methods.
21
22 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
23 perl API to convert the encoded data to UTF-8 form, then copies it
24 back to the buffer. The "base class's" read methods then see the
25 UTF-8 data.
26
27 "flush" transforms the UTF-8 data deposited by the "base class's
28 write method in the buffer back into the encoded form using the
29 encode OO perl API, then copies data back into the buffer and calls
30 "SUPER::flush.
31
32 Note that "flush" is _also_ called for read mode - we still do the
33 (back)-translate so that the the base class's "flush" sees the
34 correct number of encoded chars for positioning the seek
35 pointer. (This double translation is the worst performance issue -
36 particularly with all-perl encode engine.)
37
38*/
39
40#include "perliol.h"
41
42typedef struct {
43 PerlIOBuf base; /* PerlIOBuf stuff */
44 SV *bufsv; /* buffer seen by layers above */
45 SV *dataSV; /* data we have read from layer below */
46 SV *enc; /* the encoding object */
918951dd 47 SV *chk; /* CHECK in Encode methods */
59035dcc 48} PerlIOEncode;
49
918951dd 50
51#define ENCODE_FB_QUIET "Encode::FB_QUIET"
52
53
59035dcc 54SV *
55PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
56{
57 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
58 SV *sv = &PL_sv_undef;
59 if (e->enc) {
60 dSP;
61 ENTER;
62 SAVETMPS;
63 PUSHMARK(sp);
64 XPUSHs(e->enc);
65 PUTBACK;
918951dd 66 if (call_method("name", G_SCALAR) == 1) {
59035dcc 67 SPAGAIN;
68 sv = newSVsv(POPs);
69 PUTBACK;
70 }
71 }
72 return sv;
73}
74
75IV
76PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
77{
78 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
79 dSP;
80 IV code;
81 code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
82 ENTER;
83 SAVETMPS;
918951dd 84
85 PUSHMARK(sp);
86 PUTBACK;
87 if (call_pv(ENCODE_FB_QUIET, G_SCALAR|G_NOARGS) != 1) {
88 Perl_die(aTHX_ "Call to Encode::FB_QUIET failed!");
89 code = -1;
90 }
91 SPAGAIN;
92 e->chk = newSVsv(POPs);
93 PUTBACK;
94
59035dcc 95 PUSHMARK(sp);
96 XPUSHs(arg);
97 PUTBACK;
918951dd 98 if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
59035dcc 99 /* should never happen */
100 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
101 return -1;
102 }
103 SPAGAIN;
104 e->enc = POPs;
105 PUTBACK;
918951dd 106
59035dcc 107 if (!SvROK(e->enc)) {
108 e->enc = Nullsv;
109 errno = EINVAL;
110 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
111 arg);
112 code = -1;
113 }
114 else {
115 SvREFCNT_inc(e->enc);
116 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
117 }
118 FREETMPS;
119 LEAVE;
120 return code;
121}
122
123IV
124PerlIOEncode_popped(pTHX_ PerlIO * f)
125{
126 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
127 if (e->enc) {
128 SvREFCNT_dec(e->enc);
129 e->enc = Nullsv;
130 }
131 if (e->bufsv) {
132 SvREFCNT_dec(e->bufsv);
133 e->bufsv = Nullsv;
134 }
135 if (e->dataSV) {
136 SvREFCNT_dec(e->dataSV);
137 e->dataSV = Nullsv;
138 }
139 return 0;
140}
141
142STDCHAR *
143PerlIOEncode_get_base(pTHX_ PerlIO * f)
144{
145 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
146 if (!e->base.bufsiz)
147 e->base.bufsiz = 1024;
148 if (!e->bufsv) {
149 e->bufsv = newSV(e->base.bufsiz);
150 sv_setpvn(e->bufsv, "", 0);
151 }
152 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
153 if (!e->base.ptr)
154 e->base.ptr = e->base.buf;
155 if (!e->base.end)
156 e->base.end = e->base.buf;
157 if (e->base.ptr < e->base.buf
158 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
159 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
160 e->base.buf + SvLEN(e->bufsv));
161 abort();
162 }
163 if (SvLEN(e->bufsv) < e->base.bufsiz) {
164 SSize_t poff = e->base.ptr - e->base.buf;
165 SSize_t eoff = e->base.end - e->base.buf;
166 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
167 e->base.ptr = e->base.buf + poff;
168 e->base.end = e->base.buf + eoff;
169 }
170 if (e->base.ptr < e->base.buf
171 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
172 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
173 e->base.buf + SvLEN(e->bufsv));
174 abort();
175 }
176 return e->base.buf;
177}
178
179IV
180PerlIOEncode_fill(pTHX_ PerlIO * f)
181{
182 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
183 dSP;
184 IV code = 0;
185 PerlIO *n;
186 SSize_t avail;
187 if (PerlIO_flush(f) != 0)
188 return -1;
189 n = PerlIONext(f);
190 if (!PerlIO_fast_gets(n)) {
191 /* Things get too messy if we don't have a buffer layer
192 push a :perlio to do the job */
193 char mode[8];
194 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
195 if (!n) {
196 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
197 }
198 }
199 ENTER;
200 SAVETMPS;
201 retry:
202 avail = PerlIO_get_cnt(n);
203 if (avail <= 0) {
204 avail = PerlIO_fill(n);
205 if (avail == 0) {
206 avail = PerlIO_get_cnt(n);
207 }
208 else {
209 if (!PerlIO_error(n) && PerlIO_eof(n))
210 avail = 0;
211 }
212 }
213 if (avail > 0) {
214 STDCHAR *ptr = PerlIO_get_ptr(n);
215 SSize_t use = avail;
216 SV *uni;
217 char *s;
218 STRLEN len = 0;
219 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
220 (void) PerlIOEncode_get_base(aTHX_ f);
221 if (!e->dataSV)
222 e->dataSV = newSV(0);
223 if (SvTYPE(e->dataSV) < SVt_PV) {
224 sv_upgrade(e->dataSV,SVt_PV);
225 }
226 if (SvCUR(e->dataSV)) {
227 /* something left over from last time - create a normal
228 SV with new data appended
229 */
230 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
231 use = e->base.bufsiz - SvCUR(e->dataSV);
232 }
233 sv_catpvn(e->dataSV,(char*)ptr,use);
234 }
235 else {
236 /* Create a "dummy" SV to represent the available data from layer below */
237 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
238 Safefree(SvPVX(e->dataSV));
239 }
240 if (use > e->base.bufsiz) {
241 use = e->base.bufsiz;
242 }
243 SvPVX(e->dataSV) = (char *) ptr;
244 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
245 SvCUR_set(e->dataSV,use);
246 SvPOK_only(e->dataSV);
247 }
248 SvUTF8_off(e->dataSV);
249 PUSHMARK(sp);
250 XPUSHs(e->enc);
251 XPUSHs(e->dataSV);
918951dd 252 XPUSHs(e->chk);
59035dcc 253 PUTBACK;
918951dd 254 if (call_method("decode", G_SCALAR) != 1) {
59035dcc 255 Perl_die(aTHX_ "panic: decode did not return a value");
256 }
257 SPAGAIN;
258 uni = POPs;
259 PUTBACK;
260 /* Now get translated string (forced to UTF-8) and use as buffer */
261 if (SvPOK(uni)) {
262 s = SvPVutf8(uni, len);
263#ifdef PARANOID_ENCODE_CHECKS
264 if (len && !is_utf8_string((U8*)s,len)) {
265 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
266 }
267#endif
268 }
269 if (len > 0) {
270 /* Got _something */
271 /* if decode gave us back dataSV then data may vanish when
272 we do ptrcnt adjust - so take our copy now.
273 (The copy is a pain - need a put-it-here option for decode.)
274 */
275 sv_setpvn(e->bufsv,s,len);
276 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
277 e->base.end = e->base.ptr + SvCUR(e->bufsv);
278 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
279 SvUTF8_on(e->bufsv);
280
281 /* Adjust ptr/cnt not taking anything which
282 did not translate - not clear this is a win */
283 /* compute amount we took */
284 use -= SvCUR(e->dataSV);
285 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
286 /* and as we did not take it it isn't pending */
287 SvCUR_set(e->dataSV,0);
288 } else {
289 /* Got nothing - assume partial character so we need some more */
290 /* Make sure e->dataSV is a normal SV before re-filling as
291 buffer alias will change under us
292 */
293 s = SvPV(e->dataSV,len);
294 sv_setpvn(e->dataSV,s,len);
295 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
296 goto retry;
297 }
298 FREETMPS;
299 LEAVE;
300 return code;
301 }
302 else {
303 if (avail == 0)
304 PerlIOBase(f)->flags |= PERLIO_F_EOF;
305 else
306 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
307 return -1;
308 }
309}
310
311IV
312PerlIOEncode_flush(pTHX_ PerlIO * f)
313{
314 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
315 IV code = 0;
316 if (e->bufsv && (e->base.ptr > e->base.buf)) {
317 dSP;
318 SV *str;
319 char *s;
320 STRLEN len;
321 SSize_t count = 0;
322 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
323 /* Write case encode the buffer and write() to layer below */
324 ENTER;
325 SAVETMPS;
326 PUSHMARK(sp);
327 XPUSHs(e->enc);
328 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
329 SvUTF8_on(e->bufsv);
330 XPUSHs(e->bufsv);
918951dd 331 XPUSHs(e->chk);
59035dcc 332 PUTBACK;
918951dd 333 if (call_method("encode", G_SCALAR) != 1) {
59035dcc 334 Perl_die(aTHX_ "panic: encode did not return a value");
335 }
336 SPAGAIN;
337 str = POPs;
338 PUTBACK;
339 s = SvPV(str, len);
340 count = PerlIO_write(PerlIONext(f),s,len);
341 if (count != len) {
342 code = -1;
343 }
344 FREETMPS;
345 LEAVE;
346 if (PerlIO_flush(PerlIONext(f)) != 0) {
347 code = -1;
348 }
349 if (SvCUR(e->bufsv)) {
350 /* Did not all translate */
351 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
352 return code;
353 }
354 }
355 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
356 /* read case */
357 /* if we have any untranslated stuff then unread that first */
358 if (e->dataSV && SvCUR(e->dataSV)) {
359 s = SvPV(e->dataSV, len);
360 count = PerlIO_unread(PerlIONext(f),s,len);
361 if (count != len) {
362 code = -1;
363 }
364 }
365 /* See if there is anything left in the buffer */
366 if (e->base.ptr < e->base.end) {
367 /* Bother - have unread data.
368 re-encode and unread() to layer below
369 */
370 ENTER;
371 SAVETMPS;
372 str = sv_newmortal();
373 sv_upgrade(str, SVt_PV);
374 SvPVX(str) = (char*)e->base.ptr;
375 SvLEN(str) = 0;
376 SvCUR_set(str, e->base.end - e->base.ptr);
377 SvPOK_only(str);
378 SvUTF8_on(str);
379 PUSHMARK(sp);
380 XPUSHs(e->enc);
381 XPUSHs(str);
918951dd 382 XPUSHs(e->chk);
59035dcc 383 PUTBACK;
918951dd 384 if (call_method("encode", G_SCALAR) != 1) {
59035dcc 385 Perl_die(aTHX_ "panic: encode did not return a value");
386 }
387 SPAGAIN;
388 str = POPs;
389 PUTBACK;
390 s = SvPV(str, len);
391 count = PerlIO_unread(PerlIONext(f),s,len);
392 if (count != len) {
393 code = -1;
394 }
395 FREETMPS;
396 LEAVE;
397 }
398 }
399 e->base.ptr = e->base.end = e->base.buf;
400 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
401 }
402 return code;
403}
404
405IV
406PerlIOEncode_close(pTHX_ PerlIO * f)
407{
408 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
409 IV code = PerlIOBase_close(aTHX_ f);
410 if (e->bufsv) {
411 if (e->base.buf && e->base.ptr > e->base.buf) {
412 Perl_croak(aTHX_ "Close with partial character");
413 }
414 SvREFCNT_dec(e->bufsv);
415 e->bufsv = Nullsv;
416 }
417 e->base.buf = NULL;
418 e->base.ptr = NULL;
419 e->base.end = NULL;
420 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
421 return code;
422}
423
424Off_t
425PerlIOEncode_tell(pTHX_ PerlIO * f)
426{
427 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
428 /* Unfortunately the only way to get a postion is to (re-)translate,
429 the UTF8 we have in bufefr and then ask layer below
430 */
431 PerlIO_flush(f);
432 if (b->buf && b->ptr > b->buf) {
433 Perl_croak(aTHX_ "Cannot tell at partial character");
434 }
435 return PerlIO_tell(PerlIONext(f));
436}
437
438PerlIO *
439PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
440 CLONE_PARAMS * params, int flags)
441{
442 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
443 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
444 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
445 if (oe->enc) {
446 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
447 }
448 }
449 return f;
450}
451
452PerlIO_funcs PerlIO_encode = {
453 "encoding",
454 sizeof(PerlIOEncode),
455 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
456 PerlIOEncode_pushed,
457 PerlIOEncode_popped,
458 PerlIOBuf_open,
459 PerlIOEncode_getarg,
460 PerlIOBase_fileno,
461 PerlIOEncode_dup,
462 PerlIOBuf_read,
463 PerlIOBuf_unread,
464 PerlIOBuf_write,
465 PerlIOBuf_seek,
466 PerlIOEncode_tell,
467 PerlIOEncode_close,
468 PerlIOEncode_flush,
469 PerlIOEncode_fill,
470 PerlIOBase_eof,
471 PerlIOBase_error,
472 PerlIOBase_clearerr,
473 PerlIOBase_setlinebuf,
474 PerlIOEncode_get_base,
475 PerlIOBuf_bufsiz,
476 PerlIOBuf_get_ptr,
477 PerlIOBuf_get_cnt,
478 PerlIOBuf_set_ptrcnt,
479};
480#endif /* encode layer */
481
482MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
483
484PROTOTYPES: ENABLE
485
486BOOT:
487{
488#ifdef PERLIO_LAYERS
489 PerlIO_define_layer(aTHX_ &PerlIO_encode);
490#endif
491}