The previous change on this was *ahem* slightly off-topic.
[p5sagit/p5-mst-13.2.git] / ext / PerlIO / encoding / encoding.xs
CommitLineData
59035dcc 1#define PERL_NO_GET_CONTEXT
2#include "EXTERN.h"
3#include "perl.h"
4#include "XSUB.h"
5#define U8 U8
6
7#if defined(USE_PERLIO) && !defined(USE_SFIO)
8
9/* Define an encoding "layer" in the perliol.h sense.
10
11 The layer defined here "inherits" in an object-oriented sense from
12 the "perlio" layer with its PerlIOBuf_* "methods". The
13 implementation is particularly efficient as until Encode settles
14 down there is no point in tryint to tune it.
15
16 The layer works by overloading the "fill" and "flush" methods.
17
18 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
19 perl API to convert the encoded data to UTF-8 form, then copies it
20 back to the buffer. The "base class's" read methods then see the
21 UTF-8 data.
22
23 "flush" transforms the UTF-8 data deposited by the "base class's
24 write method in the buffer back into the encoded form using the
25 encode OO perl API, then copies data back into the buffer and calls
26 "SUPER::flush.
27
28 Note that "flush" is _also_ called for read mode - we still do the
29 (back)-translate so that the the base class's "flush" sees the
30 correct number of encoded chars for positioning the seek
31 pointer. (This double translation is the worst performance issue -
32 particularly with all-perl encode engine.)
33
34*/
35
36#include "perliol.h"
37
38typedef struct {
39 PerlIOBuf base; /* PerlIOBuf stuff */
40 SV *bufsv; /* buffer seen by layers above */
41 SV *dataSV; /* data we have read from layer below */
42 SV *enc; /* the encoding object */
43} PerlIOEncode;
44
45SV *
46PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
47{
48 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
49 SV *sv = &PL_sv_undef;
50 if (e->enc) {
51 dSP;
52 ENTER;
53 SAVETMPS;
54 PUSHMARK(sp);
55 XPUSHs(e->enc);
56 PUTBACK;
57 if (perl_call_method("name", G_SCALAR) == 1) {
58 SPAGAIN;
59 sv = newSVsv(POPs);
60 PUTBACK;
61 }
62 }
63 return sv;
64}
65
66IV
67PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
68{
69 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
70 dSP;
71 IV code;
72 code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
73 ENTER;
74 SAVETMPS;
75 PUSHMARK(sp);
76 XPUSHs(arg);
77 PUTBACK;
78 if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) {
79 /* should never happen */
80 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
81 return -1;
82 }
83 SPAGAIN;
84 e->enc = POPs;
85 PUTBACK;
86 if (!SvROK(e->enc)) {
87 e->enc = Nullsv;
88 errno = EINVAL;
89 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
90 arg);
91 code = -1;
92 }
93 else {
94 SvREFCNT_inc(e->enc);
95 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
96 }
97 FREETMPS;
98 LEAVE;
99 return code;
100}
101
102IV
103PerlIOEncode_popped(pTHX_ PerlIO * f)
104{
105 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
106 if (e->enc) {
107 SvREFCNT_dec(e->enc);
108 e->enc = Nullsv;
109 }
110 if (e->bufsv) {
111 SvREFCNT_dec(e->bufsv);
112 e->bufsv = Nullsv;
113 }
114 if (e->dataSV) {
115 SvREFCNT_dec(e->dataSV);
116 e->dataSV = Nullsv;
117 }
118 return 0;
119}
120
121STDCHAR *
122PerlIOEncode_get_base(pTHX_ PerlIO * f)
123{
124 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
125 if (!e->base.bufsiz)
126 e->base.bufsiz = 1024;
127 if (!e->bufsv) {
128 e->bufsv = newSV(e->base.bufsiz);
129 sv_setpvn(e->bufsv, "", 0);
130 }
131 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
132 if (!e->base.ptr)
133 e->base.ptr = e->base.buf;
134 if (!e->base.end)
135 e->base.end = e->base.buf;
136 if (e->base.ptr < e->base.buf
137 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
138 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
139 e->base.buf + SvLEN(e->bufsv));
140 abort();
141 }
142 if (SvLEN(e->bufsv) < e->base.bufsiz) {
143 SSize_t poff = e->base.ptr - e->base.buf;
144 SSize_t eoff = e->base.end - e->base.buf;
145 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
146 e->base.ptr = e->base.buf + poff;
147 e->base.end = e->base.buf + eoff;
148 }
149 if (e->base.ptr < e->base.buf
150 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
151 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
152 e->base.buf + SvLEN(e->bufsv));
153 abort();
154 }
155 return e->base.buf;
156}
157
158IV
159PerlIOEncode_fill(pTHX_ PerlIO * f)
160{
161 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
162 dSP;
163 IV code = 0;
164 PerlIO *n;
165 SSize_t avail;
166 if (PerlIO_flush(f) != 0)
167 return -1;
168 n = PerlIONext(f);
169 if (!PerlIO_fast_gets(n)) {
170 /* Things get too messy if we don't have a buffer layer
171 push a :perlio to do the job */
172 char mode[8];
173 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
174 if (!n) {
175 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
176 }
177 }
178 ENTER;
179 SAVETMPS;
180 retry:
181 avail = PerlIO_get_cnt(n);
182 if (avail <= 0) {
183 avail = PerlIO_fill(n);
184 if (avail == 0) {
185 avail = PerlIO_get_cnt(n);
186 }
187 else {
188 if (!PerlIO_error(n) && PerlIO_eof(n))
189 avail = 0;
190 }
191 }
192 if (avail > 0) {
193 STDCHAR *ptr = PerlIO_get_ptr(n);
194 SSize_t use = avail;
195 SV *uni;
196 char *s;
197 STRLEN len = 0;
198 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
199 (void) PerlIOEncode_get_base(aTHX_ f);
200 if (!e->dataSV)
201 e->dataSV = newSV(0);
202 if (SvTYPE(e->dataSV) < SVt_PV) {
203 sv_upgrade(e->dataSV,SVt_PV);
204 }
205 if (SvCUR(e->dataSV)) {
206 /* something left over from last time - create a normal
207 SV with new data appended
208 */
209 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
210 use = e->base.bufsiz - SvCUR(e->dataSV);
211 }
212 sv_catpvn(e->dataSV,(char*)ptr,use);
213 }
214 else {
215 /* Create a "dummy" SV to represent the available data from layer below */
216 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
217 Safefree(SvPVX(e->dataSV));
218 }
219 if (use > e->base.bufsiz) {
220 use = e->base.bufsiz;
221 }
222 SvPVX(e->dataSV) = (char *) ptr;
223 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
224 SvCUR_set(e->dataSV,use);
225 SvPOK_only(e->dataSV);
226 }
227 SvUTF8_off(e->dataSV);
228 PUSHMARK(sp);
229 XPUSHs(e->enc);
230 XPUSHs(e->dataSV);
231 XPUSHs(&PL_sv_yes);
232 PUTBACK;
233 if (perl_call_method("decode", G_SCALAR) != 1) {
234 Perl_die(aTHX_ "panic: decode did not return a value");
235 }
236 SPAGAIN;
237 uni = POPs;
238 PUTBACK;
239 /* Now get translated string (forced to UTF-8) and use as buffer */
240 if (SvPOK(uni)) {
241 s = SvPVutf8(uni, len);
242#ifdef PARANOID_ENCODE_CHECKS
243 if (len && !is_utf8_string((U8*)s,len)) {
244 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
245 }
246#endif
247 }
248 if (len > 0) {
249 /* Got _something */
250 /* if decode gave us back dataSV then data may vanish when
251 we do ptrcnt adjust - so take our copy now.
252 (The copy is a pain - need a put-it-here option for decode.)
253 */
254 sv_setpvn(e->bufsv,s,len);
255 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
256 e->base.end = e->base.ptr + SvCUR(e->bufsv);
257 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
258 SvUTF8_on(e->bufsv);
259
260 /* Adjust ptr/cnt not taking anything which
261 did not translate - not clear this is a win */
262 /* compute amount we took */
263 use -= SvCUR(e->dataSV);
264 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
265 /* and as we did not take it it isn't pending */
266 SvCUR_set(e->dataSV,0);
267 } else {
268 /* Got nothing - assume partial character so we need some more */
269 /* Make sure e->dataSV is a normal SV before re-filling as
270 buffer alias will change under us
271 */
272 s = SvPV(e->dataSV,len);
273 sv_setpvn(e->dataSV,s,len);
274 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
275 goto retry;
276 }
277 FREETMPS;
278 LEAVE;
279 return code;
280 }
281 else {
282 if (avail == 0)
283 PerlIOBase(f)->flags |= PERLIO_F_EOF;
284 else
285 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
286 return -1;
287 }
288}
289
290IV
291PerlIOEncode_flush(pTHX_ PerlIO * f)
292{
293 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
294 IV code = 0;
295 if (e->bufsv && (e->base.ptr > e->base.buf)) {
296 dSP;
297 SV *str;
298 char *s;
299 STRLEN len;
300 SSize_t count = 0;
301 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
302 /* Write case encode the buffer and write() to layer below */
303 ENTER;
304 SAVETMPS;
305 PUSHMARK(sp);
306 XPUSHs(e->enc);
307 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
308 SvUTF8_on(e->bufsv);
309 XPUSHs(e->bufsv);
310 XPUSHs(&PL_sv_yes);
311 PUTBACK;
312 if (perl_call_method("encode", G_SCALAR) != 1) {
313 Perl_die(aTHX_ "panic: encode did not return a value");
314 }
315 SPAGAIN;
316 str = POPs;
317 PUTBACK;
318 s = SvPV(str, len);
319 count = PerlIO_write(PerlIONext(f),s,len);
320 if (count != len) {
321 code = -1;
322 }
323 FREETMPS;
324 LEAVE;
325 if (PerlIO_flush(PerlIONext(f)) != 0) {
326 code = -1;
327 }
328 if (SvCUR(e->bufsv)) {
329 /* Did not all translate */
330 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
331 return code;
332 }
333 }
334 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
335 /* read case */
336 /* if we have any untranslated stuff then unread that first */
337 if (e->dataSV && SvCUR(e->dataSV)) {
338 s = SvPV(e->dataSV, len);
339 count = PerlIO_unread(PerlIONext(f),s,len);
340 if (count != len) {
341 code = -1;
342 }
343 }
344 /* See if there is anything left in the buffer */
345 if (e->base.ptr < e->base.end) {
346 /* Bother - have unread data.
347 re-encode and unread() to layer below
348 */
349 ENTER;
350 SAVETMPS;
351 str = sv_newmortal();
352 sv_upgrade(str, SVt_PV);
353 SvPVX(str) = (char*)e->base.ptr;
354 SvLEN(str) = 0;
355 SvCUR_set(str, e->base.end - e->base.ptr);
356 SvPOK_only(str);
357 SvUTF8_on(str);
358 PUSHMARK(sp);
359 XPUSHs(e->enc);
360 XPUSHs(str);
361 XPUSHs(&PL_sv_yes);
362 PUTBACK;
363 if (perl_call_method("encode", G_SCALAR) != 1) {
364 Perl_die(aTHX_ "panic: encode did not return a value");
365 }
366 SPAGAIN;
367 str = POPs;
368 PUTBACK;
369 s = SvPV(str, len);
370 count = PerlIO_unread(PerlIONext(f),s,len);
371 if (count != len) {
372 code = -1;
373 }
374 FREETMPS;
375 LEAVE;
376 }
377 }
378 e->base.ptr = e->base.end = e->base.buf;
379 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
380 }
381 return code;
382}
383
384IV
385PerlIOEncode_close(pTHX_ PerlIO * f)
386{
387 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
388 IV code = PerlIOBase_close(aTHX_ f);
389 if (e->bufsv) {
390 if (e->base.buf && e->base.ptr > e->base.buf) {
391 Perl_croak(aTHX_ "Close with partial character");
392 }
393 SvREFCNT_dec(e->bufsv);
394 e->bufsv = Nullsv;
395 }
396 e->base.buf = NULL;
397 e->base.ptr = NULL;
398 e->base.end = NULL;
399 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
400 return code;
401}
402
403Off_t
404PerlIOEncode_tell(pTHX_ PerlIO * f)
405{
406 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
407 /* Unfortunately the only way to get a postion is to (re-)translate,
408 the UTF8 we have in bufefr and then ask layer below
409 */
410 PerlIO_flush(f);
411 if (b->buf && b->ptr > b->buf) {
412 Perl_croak(aTHX_ "Cannot tell at partial character");
413 }
414 return PerlIO_tell(PerlIONext(f));
415}
416
417PerlIO *
418PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
419 CLONE_PARAMS * params, int flags)
420{
421 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
422 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
423 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
424 if (oe->enc) {
425 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
426 }
427 }
428 return f;
429}
430
431PerlIO_funcs PerlIO_encode = {
432 "encoding",
433 sizeof(PerlIOEncode),
434 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
435 PerlIOEncode_pushed,
436 PerlIOEncode_popped,
437 PerlIOBuf_open,
438 PerlIOEncode_getarg,
439 PerlIOBase_fileno,
440 PerlIOEncode_dup,
441 PerlIOBuf_read,
442 PerlIOBuf_unread,
443 PerlIOBuf_write,
444 PerlIOBuf_seek,
445 PerlIOEncode_tell,
446 PerlIOEncode_close,
447 PerlIOEncode_flush,
448 PerlIOEncode_fill,
449 PerlIOBase_eof,
450 PerlIOBase_error,
451 PerlIOBase_clearerr,
452 PerlIOBase_setlinebuf,
453 PerlIOEncode_get_base,
454 PerlIOBuf_bufsiz,
455 PerlIOBuf_get_ptr,
456 PerlIOBuf_get_cnt,
457 PerlIOBuf_set_ptrcnt,
458};
459#endif /* encode layer */
460
461MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
462
463PROTOTYPES: ENABLE
464
465BOOT:
466{
467#ifdef PERLIO_LAYERS
468 PerlIO_define_layer(aTHX_ &PerlIO_encode);
469#endif
470}