Commit | Line | Data |
59035dcc |
1 | #define PERL_NO_GET_CONTEXT |
2 | #include "EXTERN.h" |
3 | #include "perl.h" |
4 | #include "XSUB.h" |
5 | #define U8 U8 |
6 | |
7 | #if defined(USE_PERLIO) && !defined(USE_SFIO) |
8 | |
9 | /* Define an encoding "layer" in the perliol.h sense. |
10 | |
11 | The layer defined here "inherits" in an object-oriented sense from |
12 | the "perlio" layer with its PerlIOBuf_* "methods". The |
13 | implementation is particularly efficient as until Encode settles |
14 | down there is no point in tryint to tune it. |
15 | |
16 | The layer works by overloading the "fill" and "flush" methods. |
17 | |
18 | "fill" calls "SUPER::fill" in perl terms, then calls the encode OO |
19 | perl API to convert the encoded data to UTF-8 form, then copies it |
20 | back to the buffer. The "base class's" read methods then see the |
21 | UTF-8 data. |
22 | |
23 | "flush" transforms the UTF-8 data deposited by the "base class's |
24 | write method in the buffer back into the encoded form using the |
25 | encode OO perl API, then copies data back into the buffer and calls |
26 | "SUPER::flush. |
27 | |
28 | Note that "flush" is _also_ called for read mode - we still do the |
29 | (back)-translate so that the the base class's "flush" sees the |
30 | correct number of encoded chars for positioning the seek |
31 | pointer. (This double translation is the worst performance issue - |
32 | particularly with all-perl encode engine.) |
33 | |
34 | */ |
35 | |
36 | #include "perliol.h" |
37 | |
38 | typedef struct { |
39 | PerlIOBuf base; /* PerlIOBuf stuff */ |
40 | SV *bufsv; /* buffer seen by layers above */ |
41 | SV *dataSV; /* data we have read from layer below */ |
42 | SV *enc; /* the encoding object */ |
43 | } PerlIOEncode; |
44 | |
45 | SV * |
46 | PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags) |
47 | { |
48 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
49 | SV *sv = &PL_sv_undef; |
50 | if (e->enc) { |
51 | dSP; |
52 | ENTER; |
53 | SAVETMPS; |
54 | PUSHMARK(sp); |
55 | XPUSHs(e->enc); |
56 | PUTBACK; |
57 | if (perl_call_method("name", G_SCALAR) == 1) { |
58 | SPAGAIN; |
59 | sv = newSVsv(POPs); |
60 | PUTBACK; |
61 | } |
62 | } |
63 | return sv; |
64 | } |
65 | |
66 | IV |
67 | PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg) |
68 | { |
69 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
70 | dSP; |
71 | IV code; |
72 | code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv); |
73 | ENTER; |
74 | SAVETMPS; |
75 | PUSHMARK(sp); |
76 | XPUSHs(arg); |
77 | PUTBACK; |
78 | if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) { |
79 | /* should never happen */ |
80 | Perl_die(aTHX_ "Encode::find_encoding did not return a value"); |
81 | return -1; |
82 | } |
83 | SPAGAIN; |
84 | e->enc = POPs; |
85 | PUTBACK; |
86 | if (!SvROK(e->enc)) { |
87 | e->enc = Nullsv; |
88 | errno = EINVAL; |
89 | Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"", |
90 | arg); |
91 | code = -1; |
92 | } |
93 | else { |
94 | SvREFCNT_inc(e->enc); |
95 | PerlIOBase(f)->flags |= PERLIO_F_UTF8; |
96 | } |
97 | FREETMPS; |
98 | LEAVE; |
99 | return code; |
100 | } |
101 | |
102 | IV |
103 | PerlIOEncode_popped(pTHX_ PerlIO * f) |
104 | { |
105 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
106 | if (e->enc) { |
107 | SvREFCNT_dec(e->enc); |
108 | e->enc = Nullsv; |
109 | } |
110 | if (e->bufsv) { |
111 | SvREFCNT_dec(e->bufsv); |
112 | e->bufsv = Nullsv; |
113 | } |
114 | if (e->dataSV) { |
115 | SvREFCNT_dec(e->dataSV); |
116 | e->dataSV = Nullsv; |
117 | } |
118 | return 0; |
119 | } |
120 | |
121 | STDCHAR * |
122 | PerlIOEncode_get_base(pTHX_ PerlIO * f) |
123 | { |
124 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
125 | if (!e->base.bufsiz) |
126 | e->base.bufsiz = 1024; |
127 | if (!e->bufsv) { |
128 | e->bufsv = newSV(e->base.bufsiz); |
129 | sv_setpvn(e->bufsv, "", 0); |
130 | } |
131 | e->base.buf = (STDCHAR *) SvPVX(e->bufsv); |
132 | if (!e->base.ptr) |
133 | e->base.ptr = e->base.buf; |
134 | if (!e->base.end) |
135 | e->base.end = e->base.buf; |
136 | if (e->base.ptr < e->base.buf |
137 | || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { |
138 | Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, |
139 | e->base.buf + SvLEN(e->bufsv)); |
140 | abort(); |
141 | } |
142 | if (SvLEN(e->bufsv) < e->base.bufsiz) { |
143 | SSize_t poff = e->base.ptr - e->base.buf; |
144 | SSize_t eoff = e->base.end - e->base.buf; |
145 | e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz); |
146 | e->base.ptr = e->base.buf + poff; |
147 | e->base.end = e->base.buf + eoff; |
148 | } |
149 | if (e->base.ptr < e->base.buf |
150 | || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { |
151 | Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, |
152 | e->base.buf + SvLEN(e->bufsv)); |
153 | abort(); |
154 | } |
155 | return e->base.buf; |
156 | } |
157 | |
158 | IV |
159 | PerlIOEncode_fill(pTHX_ PerlIO * f) |
160 | { |
161 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
162 | dSP; |
163 | IV code = 0; |
164 | PerlIO *n; |
165 | SSize_t avail; |
166 | if (PerlIO_flush(f) != 0) |
167 | return -1; |
168 | n = PerlIONext(f); |
169 | if (!PerlIO_fast_gets(n)) { |
170 | /* Things get too messy if we don't have a buffer layer |
171 | push a :perlio to do the job */ |
172 | char mode[8]; |
173 | n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv); |
174 | if (!n) { |
175 | Perl_die(aTHX_ "panic: cannot push :perlio for %p",f); |
176 | } |
177 | } |
178 | ENTER; |
179 | SAVETMPS; |
180 | retry: |
181 | avail = PerlIO_get_cnt(n); |
182 | if (avail <= 0) { |
183 | avail = PerlIO_fill(n); |
184 | if (avail == 0) { |
185 | avail = PerlIO_get_cnt(n); |
186 | } |
187 | else { |
188 | if (!PerlIO_error(n) && PerlIO_eof(n)) |
189 | avail = 0; |
190 | } |
191 | } |
192 | if (avail > 0) { |
193 | STDCHAR *ptr = PerlIO_get_ptr(n); |
194 | SSize_t use = avail; |
195 | SV *uni; |
196 | char *s; |
197 | STRLEN len = 0; |
198 | e->base.ptr = e->base.end = (STDCHAR *) Nullch; |
199 | (void) PerlIOEncode_get_base(aTHX_ f); |
200 | if (!e->dataSV) |
201 | e->dataSV = newSV(0); |
202 | if (SvTYPE(e->dataSV) < SVt_PV) { |
203 | sv_upgrade(e->dataSV,SVt_PV); |
204 | } |
205 | if (SvCUR(e->dataSV)) { |
206 | /* something left over from last time - create a normal |
207 | SV with new data appended |
208 | */ |
209 | if (use + SvCUR(e->dataSV) > e->base.bufsiz) { |
210 | use = e->base.bufsiz - SvCUR(e->dataSV); |
211 | } |
212 | sv_catpvn(e->dataSV,(char*)ptr,use); |
213 | } |
214 | else { |
215 | /* Create a "dummy" SV to represent the available data from layer below */ |
216 | if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) { |
217 | Safefree(SvPVX(e->dataSV)); |
218 | } |
219 | if (use > e->base.bufsiz) { |
220 | use = e->base.bufsiz; |
221 | } |
222 | SvPVX(e->dataSV) = (char *) ptr; |
223 | SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */ |
224 | SvCUR_set(e->dataSV,use); |
225 | SvPOK_only(e->dataSV); |
226 | } |
227 | SvUTF8_off(e->dataSV); |
228 | PUSHMARK(sp); |
229 | XPUSHs(e->enc); |
230 | XPUSHs(e->dataSV); |
231 | XPUSHs(&PL_sv_yes); |
232 | PUTBACK; |
233 | if (perl_call_method("decode", G_SCALAR) != 1) { |
234 | Perl_die(aTHX_ "panic: decode did not return a value"); |
235 | } |
236 | SPAGAIN; |
237 | uni = POPs; |
238 | PUTBACK; |
239 | /* Now get translated string (forced to UTF-8) and use as buffer */ |
240 | if (SvPOK(uni)) { |
241 | s = SvPVutf8(uni, len); |
242 | #ifdef PARANOID_ENCODE_CHECKS |
243 | if (len && !is_utf8_string((U8*)s,len)) { |
244 | Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s); |
245 | } |
246 | #endif |
247 | } |
248 | if (len > 0) { |
249 | /* Got _something */ |
250 | /* if decode gave us back dataSV then data may vanish when |
251 | we do ptrcnt adjust - so take our copy now. |
252 | (The copy is a pain - need a put-it-here option for decode.) |
253 | */ |
254 | sv_setpvn(e->bufsv,s,len); |
255 | e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv); |
256 | e->base.end = e->base.ptr + SvCUR(e->bufsv); |
257 | PerlIOBase(f)->flags |= PERLIO_F_RDBUF; |
258 | SvUTF8_on(e->bufsv); |
259 | |
260 | /* Adjust ptr/cnt not taking anything which |
261 | did not translate - not clear this is a win */ |
262 | /* compute amount we took */ |
263 | use -= SvCUR(e->dataSV); |
264 | PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); |
265 | /* and as we did not take it it isn't pending */ |
266 | SvCUR_set(e->dataSV,0); |
267 | } else { |
268 | /* Got nothing - assume partial character so we need some more */ |
269 | /* Make sure e->dataSV is a normal SV before re-filling as |
270 | buffer alias will change under us |
271 | */ |
272 | s = SvPV(e->dataSV,len); |
273 | sv_setpvn(e->dataSV,s,len); |
274 | PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); |
275 | goto retry; |
276 | } |
277 | FREETMPS; |
278 | LEAVE; |
279 | return code; |
280 | } |
281 | else { |
282 | if (avail == 0) |
283 | PerlIOBase(f)->flags |= PERLIO_F_EOF; |
284 | else |
285 | PerlIOBase(f)->flags |= PERLIO_F_ERROR; |
286 | return -1; |
287 | } |
288 | } |
289 | |
290 | IV |
291 | PerlIOEncode_flush(pTHX_ PerlIO * f) |
292 | { |
293 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
294 | IV code = 0; |
295 | if (e->bufsv && (e->base.ptr > e->base.buf)) { |
296 | dSP; |
297 | SV *str; |
298 | char *s; |
299 | STRLEN len; |
300 | SSize_t count = 0; |
301 | if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) { |
302 | /* Write case encode the buffer and write() to layer below */ |
303 | ENTER; |
304 | SAVETMPS; |
305 | PUSHMARK(sp); |
306 | XPUSHs(e->enc); |
307 | SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); |
308 | SvUTF8_on(e->bufsv); |
309 | XPUSHs(e->bufsv); |
310 | XPUSHs(&PL_sv_yes); |
311 | PUTBACK; |
312 | if (perl_call_method("encode", G_SCALAR) != 1) { |
313 | Perl_die(aTHX_ "panic: encode did not return a value"); |
314 | } |
315 | SPAGAIN; |
316 | str = POPs; |
317 | PUTBACK; |
318 | s = SvPV(str, len); |
319 | count = PerlIO_write(PerlIONext(f),s,len); |
320 | if (count != len) { |
321 | code = -1; |
322 | } |
323 | FREETMPS; |
324 | LEAVE; |
325 | if (PerlIO_flush(PerlIONext(f)) != 0) { |
326 | code = -1; |
327 | } |
328 | if (SvCUR(e->bufsv)) { |
329 | /* Did not all translate */ |
330 | e->base.ptr = e->base.buf+SvCUR(e->bufsv); |
331 | return code; |
332 | } |
333 | } |
334 | else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) { |
335 | /* read case */ |
336 | /* if we have any untranslated stuff then unread that first */ |
337 | if (e->dataSV && SvCUR(e->dataSV)) { |
338 | s = SvPV(e->dataSV, len); |
339 | count = PerlIO_unread(PerlIONext(f),s,len); |
340 | if (count != len) { |
341 | code = -1; |
342 | } |
343 | } |
344 | /* See if there is anything left in the buffer */ |
345 | if (e->base.ptr < e->base.end) { |
346 | /* Bother - have unread data. |
347 | re-encode and unread() to layer below |
348 | */ |
349 | ENTER; |
350 | SAVETMPS; |
351 | str = sv_newmortal(); |
352 | sv_upgrade(str, SVt_PV); |
353 | SvPVX(str) = (char*)e->base.ptr; |
354 | SvLEN(str) = 0; |
355 | SvCUR_set(str, e->base.end - e->base.ptr); |
356 | SvPOK_only(str); |
357 | SvUTF8_on(str); |
358 | PUSHMARK(sp); |
359 | XPUSHs(e->enc); |
360 | XPUSHs(str); |
361 | XPUSHs(&PL_sv_yes); |
362 | PUTBACK; |
363 | if (perl_call_method("encode", G_SCALAR) != 1) { |
364 | Perl_die(aTHX_ "panic: encode did not return a value"); |
365 | } |
366 | SPAGAIN; |
367 | str = POPs; |
368 | PUTBACK; |
369 | s = SvPV(str, len); |
370 | count = PerlIO_unread(PerlIONext(f),s,len); |
371 | if (count != len) { |
372 | code = -1; |
373 | } |
374 | FREETMPS; |
375 | LEAVE; |
376 | } |
377 | } |
378 | e->base.ptr = e->base.end = e->base.buf; |
379 | PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); |
380 | } |
381 | return code; |
382 | } |
383 | |
384 | IV |
385 | PerlIOEncode_close(pTHX_ PerlIO * f) |
386 | { |
387 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
388 | IV code = PerlIOBase_close(aTHX_ f); |
389 | if (e->bufsv) { |
390 | if (e->base.buf && e->base.ptr > e->base.buf) { |
391 | Perl_croak(aTHX_ "Close with partial character"); |
392 | } |
393 | SvREFCNT_dec(e->bufsv); |
394 | e->bufsv = Nullsv; |
395 | } |
396 | e->base.buf = NULL; |
397 | e->base.ptr = NULL; |
398 | e->base.end = NULL; |
399 | PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); |
400 | return code; |
401 | } |
402 | |
403 | Off_t |
404 | PerlIOEncode_tell(pTHX_ PerlIO * f) |
405 | { |
406 | PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf); |
407 | /* Unfortunately the only way to get a postion is to (re-)translate, |
408 | the UTF8 we have in bufefr and then ask layer below |
409 | */ |
410 | PerlIO_flush(f); |
411 | if (b->buf && b->ptr > b->buf) { |
412 | Perl_croak(aTHX_ "Cannot tell at partial character"); |
413 | } |
414 | return PerlIO_tell(PerlIONext(f)); |
415 | } |
416 | |
417 | PerlIO * |
418 | PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o, |
419 | CLONE_PARAMS * params, int flags) |
420 | { |
421 | if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) { |
422 | PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode); |
423 | PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode); |
424 | if (oe->enc) { |
425 | fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params); |
426 | } |
427 | } |
428 | return f; |
429 | } |
430 | |
431 | PerlIO_funcs PerlIO_encode = { |
432 | "encoding", |
433 | sizeof(PerlIOEncode), |
434 | PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT, |
435 | PerlIOEncode_pushed, |
436 | PerlIOEncode_popped, |
437 | PerlIOBuf_open, |
438 | PerlIOEncode_getarg, |
439 | PerlIOBase_fileno, |
440 | PerlIOEncode_dup, |
441 | PerlIOBuf_read, |
442 | PerlIOBuf_unread, |
443 | PerlIOBuf_write, |
444 | PerlIOBuf_seek, |
445 | PerlIOEncode_tell, |
446 | PerlIOEncode_close, |
447 | PerlIOEncode_flush, |
448 | PerlIOEncode_fill, |
449 | PerlIOBase_eof, |
450 | PerlIOBase_error, |
451 | PerlIOBase_clearerr, |
452 | PerlIOBase_setlinebuf, |
453 | PerlIOEncode_get_base, |
454 | PerlIOBuf_bufsiz, |
455 | PerlIOBuf_get_ptr, |
456 | PerlIOBuf_get_cnt, |
457 | PerlIOBuf_set_ptrcnt, |
458 | }; |
459 | #endif /* encode layer */ |
460 | |
461 | MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding |
462 | |
463 | PROTOTYPES: ENABLE |
464 | |
465 | BOOT: |
466 | { |
467 | #ifdef PERLIO_LAYERS |
468 | PerlIO_define_layer(aTHX_ &PerlIO_encode); |
469 | #endif |
470 | } |