Commit | Line | Data |
918951dd |
1 | /* |
2 | * $Id$ |
3 | */ |
4 | |
59035dcc |
5 | #define PERL_NO_GET_CONTEXT |
6 | #include "EXTERN.h" |
7 | #include "perl.h" |
8 | #include "XSUB.h" |
9 | #define U8 U8 |
10 | |
11 | #if defined(USE_PERLIO) && !defined(USE_SFIO) |
12 | |
13 | /* Define an encoding "layer" in the perliol.h sense. |
14 | |
15 | The layer defined here "inherits" in an object-oriented sense from |
16 | the "perlio" layer with its PerlIOBuf_* "methods". The |
17 | implementation is particularly efficient as until Encode settles |
18 | down there is no point in tryint to tune it. |
19 | |
20 | The layer works by overloading the "fill" and "flush" methods. |
21 | |
22 | "fill" calls "SUPER::fill" in perl terms, then calls the encode OO |
23 | perl API to convert the encoded data to UTF-8 form, then copies it |
24 | back to the buffer. The "base class's" read methods then see the |
25 | UTF-8 data. |
26 | |
27 | "flush" transforms the UTF-8 data deposited by the "base class's |
28 | write method in the buffer back into the encoded form using the |
29 | encode OO perl API, then copies data back into the buffer and calls |
30 | "SUPER::flush. |
31 | |
32 | Note that "flush" is _also_ called for read mode - we still do the |
33 | (back)-translate so that the the base class's "flush" sees the |
34 | correct number of encoded chars for positioning the seek |
35 | pointer. (This double translation is the worst performance issue - |
36 | particularly with all-perl encode engine.) |
37 | |
38 | */ |
39 | |
40 | #include "perliol.h" |
41 | |
42 | typedef struct { |
43 | PerlIOBuf base; /* PerlIOBuf stuff */ |
44 | SV *bufsv; /* buffer seen by layers above */ |
45 | SV *dataSV; /* data we have read from layer below */ |
46 | SV *enc; /* the encoding object */ |
918951dd |
47 | SV *chk; /* CHECK in Encode methods */ |
59035dcc |
48 | } PerlIOEncode; |
49 | |
918951dd |
50 | |
51 | #define ENCODE_FB_QUIET "Encode::FB_QUIET" |
52 | |
53 | |
59035dcc |
54 | SV * |
55 | PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags) |
56 | { |
57 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
58 | SV *sv = &PL_sv_undef; |
59 | if (e->enc) { |
60 | dSP; |
61 | ENTER; |
62 | SAVETMPS; |
63 | PUSHMARK(sp); |
64 | XPUSHs(e->enc); |
65 | PUTBACK; |
918951dd |
66 | if (call_method("name", G_SCALAR) == 1) { |
59035dcc |
67 | SPAGAIN; |
68 | sv = newSVsv(POPs); |
69 | PUTBACK; |
70 | } |
71 | } |
72 | return sv; |
73 | } |
74 | |
75 | IV |
76 | PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg) |
77 | { |
78 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
79 | dSP; |
80 | IV code; |
81 | code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv); |
82 | ENTER; |
83 | SAVETMPS; |
918951dd |
84 | |
85 | PUSHMARK(sp); |
86 | PUTBACK; |
87 | if (call_pv(ENCODE_FB_QUIET, G_SCALAR|G_NOARGS) != 1) { |
88 | Perl_die(aTHX_ "Call to Encode::FB_QUIET failed!"); |
89 | code = -1; |
90 | } |
91 | SPAGAIN; |
92 | e->chk = newSVsv(POPs); |
93 | PUTBACK; |
94 | |
59035dcc |
95 | PUSHMARK(sp); |
96 | XPUSHs(arg); |
97 | PUTBACK; |
918951dd |
98 | if (call_pv("Encode::find_encoding", G_SCALAR) != 1) { |
59035dcc |
99 | /* should never happen */ |
100 | Perl_die(aTHX_ "Encode::find_encoding did not return a value"); |
101 | return -1; |
102 | } |
103 | SPAGAIN; |
104 | e->enc = POPs; |
105 | PUTBACK; |
918951dd |
106 | |
59035dcc |
107 | if (!SvROK(e->enc)) { |
108 | e->enc = Nullsv; |
109 | errno = EINVAL; |
110 | Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"", |
111 | arg); |
112 | code = -1; |
113 | } |
114 | else { |
115 | SvREFCNT_inc(e->enc); |
116 | PerlIOBase(f)->flags |= PERLIO_F_UTF8; |
117 | } |
118 | FREETMPS; |
119 | LEAVE; |
120 | return code; |
121 | } |
122 | |
123 | IV |
124 | PerlIOEncode_popped(pTHX_ PerlIO * f) |
125 | { |
126 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
127 | if (e->enc) { |
128 | SvREFCNT_dec(e->enc); |
129 | e->enc = Nullsv; |
130 | } |
131 | if (e->bufsv) { |
132 | SvREFCNT_dec(e->bufsv); |
133 | e->bufsv = Nullsv; |
134 | } |
135 | if (e->dataSV) { |
136 | SvREFCNT_dec(e->dataSV); |
137 | e->dataSV = Nullsv; |
138 | } |
139 | return 0; |
140 | } |
141 | |
142 | STDCHAR * |
143 | PerlIOEncode_get_base(pTHX_ PerlIO * f) |
144 | { |
145 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
146 | if (!e->base.bufsiz) |
147 | e->base.bufsiz = 1024; |
148 | if (!e->bufsv) { |
149 | e->bufsv = newSV(e->base.bufsiz); |
150 | sv_setpvn(e->bufsv, "", 0); |
151 | } |
152 | e->base.buf = (STDCHAR *) SvPVX(e->bufsv); |
153 | if (!e->base.ptr) |
154 | e->base.ptr = e->base.buf; |
155 | if (!e->base.end) |
156 | e->base.end = e->base.buf; |
157 | if (e->base.ptr < e->base.buf |
158 | || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { |
159 | Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, |
160 | e->base.buf + SvLEN(e->bufsv)); |
161 | abort(); |
162 | } |
163 | if (SvLEN(e->bufsv) < e->base.bufsiz) { |
164 | SSize_t poff = e->base.ptr - e->base.buf; |
165 | SSize_t eoff = e->base.end - e->base.buf; |
166 | e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz); |
167 | e->base.ptr = e->base.buf + poff; |
168 | e->base.end = e->base.buf + eoff; |
169 | } |
170 | if (e->base.ptr < e->base.buf |
171 | || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { |
172 | Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, |
173 | e->base.buf + SvLEN(e->bufsv)); |
174 | abort(); |
175 | } |
176 | return e->base.buf; |
177 | } |
178 | |
179 | IV |
180 | PerlIOEncode_fill(pTHX_ PerlIO * f) |
181 | { |
182 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
183 | dSP; |
184 | IV code = 0; |
185 | PerlIO *n; |
186 | SSize_t avail; |
187 | if (PerlIO_flush(f) != 0) |
188 | return -1; |
189 | n = PerlIONext(f); |
190 | if (!PerlIO_fast_gets(n)) { |
191 | /* Things get too messy if we don't have a buffer layer |
192 | push a :perlio to do the job */ |
193 | char mode[8]; |
194 | n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv); |
195 | if (!n) { |
196 | Perl_die(aTHX_ "panic: cannot push :perlio for %p",f); |
197 | } |
198 | } |
199 | ENTER; |
200 | SAVETMPS; |
201 | retry: |
202 | avail = PerlIO_get_cnt(n); |
203 | if (avail <= 0) { |
204 | avail = PerlIO_fill(n); |
205 | if (avail == 0) { |
206 | avail = PerlIO_get_cnt(n); |
207 | } |
208 | else { |
209 | if (!PerlIO_error(n) && PerlIO_eof(n)) |
210 | avail = 0; |
211 | } |
212 | } |
213 | if (avail > 0) { |
214 | STDCHAR *ptr = PerlIO_get_ptr(n); |
215 | SSize_t use = avail; |
216 | SV *uni; |
217 | char *s; |
218 | STRLEN len = 0; |
219 | e->base.ptr = e->base.end = (STDCHAR *) Nullch; |
220 | (void) PerlIOEncode_get_base(aTHX_ f); |
221 | if (!e->dataSV) |
222 | e->dataSV = newSV(0); |
223 | if (SvTYPE(e->dataSV) < SVt_PV) { |
224 | sv_upgrade(e->dataSV,SVt_PV); |
225 | } |
226 | if (SvCUR(e->dataSV)) { |
227 | /* something left over from last time - create a normal |
228 | SV with new data appended |
229 | */ |
230 | if (use + SvCUR(e->dataSV) > e->base.bufsiz) { |
231 | use = e->base.bufsiz - SvCUR(e->dataSV); |
232 | } |
233 | sv_catpvn(e->dataSV,(char*)ptr,use); |
234 | } |
235 | else { |
236 | /* Create a "dummy" SV to represent the available data from layer below */ |
237 | if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) { |
238 | Safefree(SvPVX(e->dataSV)); |
239 | } |
240 | if (use > e->base.bufsiz) { |
241 | use = e->base.bufsiz; |
242 | } |
243 | SvPVX(e->dataSV) = (char *) ptr; |
244 | SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */ |
245 | SvCUR_set(e->dataSV,use); |
246 | SvPOK_only(e->dataSV); |
247 | } |
248 | SvUTF8_off(e->dataSV); |
249 | PUSHMARK(sp); |
250 | XPUSHs(e->enc); |
251 | XPUSHs(e->dataSV); |
918951dd |
252 | XPUSHs(e->chk); |
59035dcc |
253 | PUTBACK; |
918951dd |
254 | if (call_method("decode", G_SCALAR) != 1) { |
59035dcc |
255 | Perl_die(aTHX_ "panic: decode did not return a value"); |
256 | } |
257 | SPAGAIN; |
258 | uni = POPs; |
259 | PUTBACK; |
260 | /* Now get translated string (forced to UTF-8) and use as buffer */ |
261 | if (SvPOK(uni)) { |
262 | s = SvPVutf8(uni, len); |
263 | #ifdef PARANOID_ENCODE_CHECKS |
264 | if (len && !is_utf8_string((U8*)s,len)) { |
265 | Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s); |
266 | } |
267 | #endif |
268 | } |
269 | if (len > 0) { |
270 | /* Got _something */ |
271 | /* if decode gave us back dataSV then data may vanish when |
272 | we do ptrcnt adjust - so take our copy now. |
273 | (The copy is a pain - need a put-it-here option for decode.) |
274 | */ |
275 | sv_setpvn(e->bufsv,s,len); |
276 | e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv); |
277 | e->base.end = e->base.ptr + SvCUR(e->bufsv); |
278 | PerlIOBase(f)->flags |= PERLIO_F_RDBUF; |
279 | SvUTF8_on(e->bufsv); |
280 | |
281 | /* Adjust ptr/cnt not taking anything which |
282 | did not translate - not clear this is a win */ |
283 | /* compute amount we took */ |
284 | use -= SvCUR(e->dataSV); |
285 | PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); |
286 | /* and as we did not take it it isn't pending */ |
287 | SvCUR_set(e->dataSV,0); |
288 | } else { |
289 | /* Got nothing - assume partial character so we need some more */ |
290 | /* Make sure e->dataSV is a normal SV before re-filling as |
291 | buffer alias will change under us |
292 | */ |
293 | s = SvPV(e->dataSV,len); |
294 | sv_setpvn(e->dataSV,s,len); |
295 | PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); |
296 | goto retry; |
297 | } |
298 | FREETMPS; |
299 | LEAVE; |
300 | return code; |
301 | } |
302 | else { |
303 | if (avail == 0) |
304 | PerlIOBase(f)->flags |= PERLIO_F_EOF; |
305 | else |
306 | PerlIOBase(f)->flags |= PERLIO_F_ERROR; |
307 | return -1; |
308 | } |
309 | } |
310 | |
311 | IV |
312 | PerlIOEncode_flush(pTHX_ PerlIO * f) |
313 | { |
314 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
315 | IV code = 0; |
316 | if (e->bufsv && (e->base.ptr > e->base.buf)) { |
317 | dSP; |
318 | SV *str; |
319 | char *s; |
320 | STRLEN len; |
321 | SSize_t count = 0; |
322 | if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) { |
323 | /* Write case encode the buffer and write() to layer below */ |
324 | ENTER; |
325 | SAVETMPS; |
326 | PUSHMARK(sp); |
327 | XPUSHs(e->enc); |
328 | SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); |
329 | SvUTF8_on(e->bufsv); |
330 | XPUSHs(e->bufsv); |
918951dd |
331 | XPUSHs(e->chk); |
59035dcc |
332 | PUTBACK; |
918951dd |
333 | if (call_method("encode", G_SCALAR) != 1) { |
59035dcc |
334 | Perl_die(aTHX_ "panic: encode did not return a value"); |
335 | } |
336 | SPAGAIN; |
337 | str = POPs; |
338 | PUTBACK; |
339 | s = SvPV(str, len); |
340 | count = PerlIO_write(PerlIONext(f),s,len); |
341 | if (count != len) { |
342 | code = -1; |
343 | } |
344 | FREETMPS; |
345 | LEAVE; |
346 | if (PerlIO_flush(PerlIONext(f)) != 0) { |
347 | code = -1; |
348 | } |
349 | if (SvCUR(e->bufsv)) { |
350 | /* Did not all translate */ |
351 | e->base.ptr = e->base.buf+SvCUR(e->bufsv); |
352 | return code; |
353 | } |
354 | } |
355 | else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) { |
356 | /* read case */ |
357 | /* if we have any untranslated stuff then unread that first */ |
358 | if (e->dataSV && SvCUR(e->dataSV)) { |
359 | s = SvPV(e->dataSV, len); |
360 | count = PerlIO_unread(PerlIONext(f),s,len); |
361 | if (count != len) { |
362 | code = -1; |
363 | } |
364 | } |
365 | /* See if there is anything left in the buffer */ |
366 | if (e->base.ptr < e->base.end) { |
367 | /* Bother - have unread data. |
368 | re-encode and unread() to layer below |
369 | */ |
370 | ENTER; |
371 | SAVETMPS; |
372 | str = sv_newmortal(); |
373 | sv_upgrade(str, SVt_PV); |
374 | SvPVX(str) = (char*)e->base.ptr; |
375 | SvLEN(str) = 0; |
376 | SvCUR_set(str, e->base.end - e->base.ptr); |
377 | SvPOK_only(str); |
378 | SvUTF8_on(str); |
379 | PUSHMARK(sp); |
380 | XPUSHs(e->enc); |
381 | XPUSHs(str); |
918951dd |
382 | XPUSHs(e->chk); |
59035dcc |
383 | PUTBACK; |
918951dd |
384 | if (call_method("encode", G_SCALAR) != 1) { |
59035dcc |
385 | Perl_die(aTHX_ "panic: encode did not return a value"); |
386 | } |
387 | SPAGAIN; |
388 | str = POPs; |
389 | PUTBACK; |
390 | s = SvPV(str, len); |
391 | count = PerlIO_unread(PerlIONext(f),s,len); |
392 | if (count != len) { |
393 | code = -1; |
394 | } |
395 | FREETMPS; |
396 | LEAVE; |
397 | } |
398 | } |
399 | e->base.ptr = e->base.end = e->base.buf; |
400 | PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); |
401 | } |
402 | return code; |
403 | } |
404 | |
405 | IV |
406 | PerlIOEncode_close(pTHX_ PerlIO * f) |
407 | { |
408 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
409 | IV code = PerlIOBase_close(aTHX_ f); |
410 | if (e->bufsv) { |
411 | if (e->base.buf && e->base.ptr > e->base.buf) { |
412 | Perl_croak(aTHX_ "Close with partial character"); |
413 | } |
414 | SvREFCNT_dec(e->bufsv); |
415 | e->bufsv = Nullsv; |
416 | } |
417 | e->base.buf = NULL; |
418 | e->base.ptr = NULL; |
419 | e->base.end = NULL; |
420 | PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); |
421 | return code; |
422 | } |
423 | |
424 | Off_t |
425 | PerlIOEncode_tell(pTHX_ PerlIO * f) |
426 | { |
427 | PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf); |
428 | /* Unfortunately the only way to get a postion is to (re-)translate, |
429 | the UTF8 we have in bufefr and then ask layer below |
430 | */ |
431 | PerlIO_flush(f); |
432 | if (b->buf && b->ptr > b->buf) { |
433 | Perl_croak(aTHX_ "Cannot tell at partial character"); |
434 | } |
435 | return PerlIO_tell(PerlIONext(f)); |
436 | } |
437 | |
438 | PerlIO * |
439 | PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o, |
440 | CLONE_PARAMS * params, int flags) |
441 | { |
442 | if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) { |
443 | PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode); |
444 | PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode); |
445 | if (oe->enc) { |
446 | fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params); |
447 | } |
448 | } |
449 | return f; |
450 | } |
451 | |
452 | PerlIO_funcs PerlIO_encode = { |
453 | "encoding", |
454 | sizeof(PerlIOEncode), |
455 | PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT, |
456 | PerlIOEncode_pushed, |
457 | PerlIOEncode_popped, |
458 | PerlIOBuf_open, |
459 | PerlIOEncode_getarg, |
460 | PerlIOBase_fileno, |
461 | PerlIOEncode_dup, |
462 | PerlIOBuf_read, |
463 | PerlIOBuf_unread, |
464 | PerlIOBuf_write, |
465 | PerlIOBuf_seek, |
466 | PerlIOEncode_tell, |
467 | PerlIOEncode_close, |
468 | PerlIOEncode_flush, |
469 | PerlIOEncode_fill, |
470 | PerlIOBase_eof, |
471 | PerlIOBase_error, |
472 | PerlIOBase_clearerr, |
473 | PerlIOBase_setlinebuf, |
474 | PerlIOEncode_get_base, |
475 | PerlIOBuf_bufsiz, |
476 | PerlIOBuf_get_ptr, |
477 | PerlIOBuf_get_cnt, |
478 | PerlIOBuf_set_ptrcnt, |
479 | }; |
480 | #endif /* encode layer */ |
481 | |
482 | MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding |
483 | |
484 | PROTOTYPES: ENABLE |
485 | |
486 | BOOT: |
487 | { |
488 | #ifdef PERLIO_LAYERS |
489 | PerlIO_define_layer(aTHX_ &PerlIO_encode); |
490 | #endif |
491 | } |