Commit | Line | Data |
fcf2db38 |
1 | #define PERL_NO_GET_CONTEXT |
2 | |
2c674647 |
3 | #include "EXTERN.h" |
4 | #include "perl.h" |
5 | #include "XSUB.h" |
2f2b4ff2 |
6 | #define U8 U8 |
7 | #include "encode.h" |
94b89828 |
8 | #include "8859.h" |
2f2b4ff2 |
9 | #include "EBCDIC.h" |
10 | #include "Symbols.h" |
2c674647 |
11 | |
fcf2db38 |
12 | |
13 | #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ |
2f5768b8 |
14 | Perl_croak(aTHX_ "panic_unimplemented"); \ |
4a83738a |
15 | return (y)0; /* fool picky compilers */ \ |
87714904 |
16 | } |
67e989fb |
17 | UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) |
aa0053b7 |
18 | UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) |
9df9a5cd |
19 | #if defined(USE_PERLIO) && !defined(USE_SFIO) |
72e44f29 |
20 | /* Define an encoding "layer" in the perliol.h sense. |
21 | The layer defined here "inherits" in an object-oriented sense from the |
22 | "perlio" layer with its PerlIOBuf_* "methods". |
23 | The implementation is particularly efficient as until Encode settles down |
24 | there is no point in tryint to tune it. |
25 | |
26 | The layer works by overloading the "fill" and "flush" methods. |
27 | |
28 | "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API |
29 | to convert the encoded data to UTF-8 form, then copies it back to the |
30 | buffer. The "base class's" read methods then see the UTF-8 data. |
31 | |
32 | "flush" transforms the UTF-8 data deposited by the "base class's write |
33 | method in the buffer back into the encoded form using the encode OO perl API, |
34 | then copies data back into the buffer and calls "SUPER::flush. |
35 | |
36 | Note that "flush" is _also_ called for read mode - we still do the (back)-translate |
37 | so that the the base class's "flush" sees the correct number of encoded chars |
38 | for positioning the seek pointer. (This double translation is the worst performance |
39 | issue - particularly with all-perl encode engine.) |
40 | |
41 | */ |
33af2bc7 |
42 | #include "perliol.h" |
aa0053b7 |
43 | typedef struct { |
44 | PerlIOBuf base; /* PerlIOBuf stuff */ |
0b3236bb |
45 | SV *bufsv; /* buffer seen by layers above */ |
46 | SV *dataSV; /* data we have read from layer below */ |
47 | SV *enc; /* the encoding object */ |
33af2bc7 |
48 | } PerlIOEncode; |
49 | |
e3f3bf95 |
50 | SV * |
aa0053b7 |
51 | PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags) |
e3f3bf95 |
52 | { |
aa0053b7 |
53 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
54 | SV *sv = &PL_sv_undef; |
55 | if (e->enc) { |
56 | dSP; |
57 | ENTER; |
58 | SAVETMPS; |
59 | PUSHMARK(sp); |
60 | XPUSHs(e->enc); |
61 | PUTBACK; |
62 | if (perl_call_method("name", G_SCALAR) == 1) { |
63 | SPAGAIN; |
64 | sv = newSVsv(POPs); |
65 | PUTBACK; |
66 | } |
e3f3bf95 |
67 | } |
aa0053b7 |
68 | return sv; |
e3f3bf95 |
69 | } |
33af2bc7 |
70 | |
71 | IV |
aa0053b7 |
72 | PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg) |
33af2bc7 |
73 | { |
aa0053b7 |
74 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
75 | dSP; |
76 | IV code; |
77 | code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv); |
78 | ENTER; |
79 | SAVETMPS; |
80 | PUSHMARK(sp); |
81 | XPUSHs(arg); |
82 | PUTBACK; |
83 | if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) { |
84 | /* should never happen */ |
85 | Perl_die(aTHX_ "Encode::find_encoding did not return a value"); |
86 | return -1; |
87 | } |
88 | SPAGAIN; |
89 | e->enc = POPs; |
90 | PUTBACK; |
91 | if (!SvROK(e->enc)) { |
92 | e->enc = Nullsv; |
93 | errno = EINVAL; |
94 | Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%" SVf "\"", |
95 | arg); |
96 | code = -1; |
97 | } |
98 | else { |
99 | SvREFCNT_inc(e->enc); |
100 | PerlIOBase(f)->flags |= PERLIO_F_UTF8; |
101 | } |
102 | FREETMPS; |
103 | LEAVE; |
104 | return code; |
33af2bc7 |
105 | } |
106 | |
107 | IV |
aa0053b7 |
108 | PerlIOEncode_popped(pTHX_ PerlIO * f) |
33af2bc7 |
109 | { |
aa0053b7 |
110 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
111 | if (e->enc) { |
112 | SvREFCNT_dec(e->enc); |
113 | e->enc = Nullsv; |
114 | } |
115 | if (e->bufsv) { |
116 | SvREFCNT_dec(e->bufsv); |
117 | e->bufsv = Nullsv; |
118 | } |
0b3236bb |
119 | if (e->dataSV) { |
120 | SvREFCNT_dec(e->dataSV); |
121 | e->bufsv = Nullsv; |
122 | } |
aa0053b7 |
123 | return 0; |
33af2bc7 |
124 | } |
125 | |
126 | STDCHAR * |
aa0053b7 |
127 | PerlIOEncode_get_base(pTHX_ PerlIO * f) |
33af2bc7 |
128 | { |
aa0053b7 |
129 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
130 | if (!e->base.bufsiz) |
131 | e->base.bufsiz = 1024; |
132 | if (!e->bufsv) { |
133 | e->bufsv = newSV(e->base.bufsiz); |
134 | sv_setpvn(e->bufsv, "", 0); |
135 | } |
136 | e->base.buf = (STDCHAR *) SvPVX(e->bufsv); |
137 | if (!e->base.ptr) |
138 | e->base.ptr = e->base.buf; |
139 | if (!e->base.end) |
140 | e->base.end = e->base.buf; |
141 | if (e->base.ptr < e->base.buf |
142 | || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { |
143 | Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, |
144 | e->base.buf + SvLEN(e->bufsv)); |
145 | abort(); |
146 | } |
147 | if (SvLEN(e->bufsv) < e->base.bufsiz) { |
148 | SSize_t poff = e->base.ptr - e->base.buf; |
149 | SSize_t eoff = e->base.end - e->base.buf; |
150 | e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz); |
151 | e->base.ptr = e->base.buf + poff; |
152 | e->base.end = e->base.buf + eoff; |
153 | } |
154 | if (e->base.ptr < e->base.buf |
155 | || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { |
156 | Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, |
157 | e->base.buf + SvLEN(e->bufsv)); |
158 | abort(); |
159 | } |
160 | return e->base.buf; |
33af2bc7 |
161 | } |
162 | |
33af2bc7 |
163 | IV |
aa0053b7 |
164 | PerlIOEncode_fill(pTHX_ PerlIO * f) |
33af2bc7 |
165 | { |
aa0053b7 |
166 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
167 | dSP; |
0b3236bb |
168 | IV code = 0; |
169 | PerlIO *n; |
170 | SSize_t avail; |
171 | if (PerlIO_flush(f) != 0) |
172 | return -1; |
173 | n = PerlIONext(f); |
174 | if (!PerlIO_fast_gets(n)) { |
175 | /* Things get too messy if we don't have a buffer layer |
176 | push a :perlio to do the job */ |
177 | char mode[8]; |
178 | n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv); |
179 | if (!n) { |
180 | Perl_die(aTHX_ "panic: cannot push :perlio for %p",f); |
181 | } |
182 | } |
183 | ENTER; |
184 | SAVETMPS; |
185 | retry: |
186 | avail = PerlIO_get_cnt(n); |
187 | if (avail <= 0) { |
188 | avail = PerlIO_fill(n); |
189 | if (avail == 0) { |
190 | avail = PerlIO_get_cnt(n); |
191 | } |
192 | else { |
193 | if (!PerlIO_error(n) && PerlIO_eof(n)) |
194 | avail = 0; |
195 | } |
196 | } |
197 | if (avail > 0) { |
198 | STDCHAR *ptr = PerlIO_get_ptr(n); |
199 | SSize_t use = avail; |
aa0053b7 |
200 | SV *uni; |
aa0053b7 |
201 | char *s; |
0b3236bb |
202 | STRLEN len = 0; |
203 | e->base.ptr = e->base.end = (STDCHAR *) Nullch; |
204 | (void) PerlIOEncode_get_base(aTHX_ f); |
205 | if (!e->dataSV) |
206 | e->dataSV = newSV(0); |
207 | if (SvTYPE(e->dataSV) < SVt_PV) { |
208 | sv_upgrade(e->dataSV,SVt_PV); |
209 | } |
210 | if (SvCUR(e->dataSV)) { |
211 | /* something left over from last time - create a normal |
212 | SV with new data appended |
213 | */ |
214 | if (use + SvCUR(e->dataSV) > e->base.bufsiz) { |
215 | use = e->base.bufsiz - SvCUR(e->dataSV); |
216 | } |
217 | sv_catpvn(e->dataSV,ptr,use); |
218 | } |
219 | else { |
220 | /* Create a "dummy" SV to represent the available data from layer below */ |
221 | if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) { |
222 | Safefree(SvPVX(e->dataSV)); |
223 | } |
224 | if (use > e->base.bufsiz) { |
225 | use = e->base.bufsiz; |
226 | } |
227 | SvPVX(e->dataSV) = (char *) ptr; |
228 | SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */ |
229 | SvCUR_set(e->dataSV,use); |
230 | SvPOK_on(e->dataSV); |
231 | } |
232 | SvUTF8_off(e->dataSV); |
aa0053b7 |
233 | PUSHMARK(sp); |
234 | XPUSHs(e->enc); |
0b3236bb |
235 | XPUSHs(e->dataSV); |
aa0053b7 |
236 | XPUSHs(&PL_sv_yes); |
237 | PUTBACK; |
0b3236bb |
238 | if (perl_call_method("decode", G_SCALAR) != 1) { |
239 | Perl_die(aTHX_ "panic: decode did not return a value"); |
240 | } |
aa0053b7 |
241 | SPAGAIN; |
242 | uni = POPs; |
243 | PUTBACK; |
0b3236bb |
244 | /* Now get translated string (forced to UTF-8) and use as buffer */ |
245 | if (SvPOK(uni)) { |
246 | s = SvPVutf8(uni, len); |
247 | if (len && !is_utf8_string(s,len)) { |
248 | Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s); |
249 | } |
250 | } |
251 | if (len > 0) { |
252 | /* Got _something */ |
253 | /* if decode gave us back dataSV then data may vanish when |
254 | we do ptrcnt adjust - so take our copy now. |
255 | (The copy is a pain - need a put-it-here option for decode.) |
256 | */ |
257 | sv_setpvn(e->bufsv,s,len); |
258 | e->base.ptr = e->base.buf = SvPVX(e->bufsv); |
259 | e->base.end = e->base.ptr + SvCUR(e->bufsv); |
260 | PerlIOBase(f)->flags |= PERLIO_F_RDBUF; |
261 | SvUTF8_on(e->bufsv); |
262 | |
263 | /* Adjust ptr/cnt not taking anything which |
264 | did not translate - not clear this is a win */ |
265 | /* compute amount we took */ |
266 | use -= SvCUR(e->dataSV); |
267 | PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); |
268 | /* and as we did not take it it isn't pending */ |
269 | SvCUR_set(e->dataSV,0); |
270 | } else { |
271 | /* Got nothing - assume partial character so we need some more */ |
272 | /* Make sure e->dataSV is a normal SV before re-filling as |
273 | buffer alias will change under us |
274 | */ |
275 | s = SvPV(e->dataSV,len); |
276 | sv_setpvn(e->dataSV,s,len); |
277 | PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); |
278 | goto retry; |
aa0053b7 |
279 | } |
aa0053b7 |
280 | FREETMPS; |
281 | LEAVE; |
0b3236bb |
282 | return code; |
283 | } |
284 | else { |
285 | if (avail == 0) |
286 | PerlIOBase(f)->flags |= PERLIO_F_EOF; |
287 | else |
288 | PerlIOBase(f)->flags |= PERLIO_F_ERROR; |
289 | return -1; |
72e44f29 |
290 | } |
33af2bc7 |
291 | } |
292 | |
293 | IV |
aa0053b7 |
294 | PerlIOEncode_flush(pTHX_ PerlIO * f) |
33af2bc7 |
295 | { |
aa0053b7 |
296 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
297 | IV code = 0; |
0b3236bb |
298 | if (e->bufsv && (e->base.ptr > e->base.buf)) { |
aa0053b7 |
299 | dSP; |
300 | SV *str; |
301 | char *s; |
302 | STRLEN len; |
0b3236bb |
303 | SSize_t count = 0; |
304 | if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) { |
305 | /* Write case encode the buffer and write() to layer below */ |
306 | ENTER; |
307 | SAVETMPS; |
308 | PUSHMARK(sp); |
309 | XPUSHs(e->enc); |
310 | SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); |
311 | SvUTF8_on(e->bufsv); |
312 | Perl_warn(aTHX_ "flush %_",e->bufsv); |
313 | XPUSHs(e->bufsv); |
314 | XPUSHs(&PL_sv_yes); |
315 | PUTBACK; |
316 | if (perl_call_method("encode", G_SCALAR) != 1) |
317 | code = -1; |
318 | SPAGAIN; |
319 | str = POPs; |
320 | PUTBACK; |
321 | s = SvPV(str, len); |
322 | count = PerlIO_write(PerlIONext(f),s,len); |
323 | if (count != len) { |
324 | code = -1; |
325 | } |
326 | FREETMPS; |
327 | LEAVE; |
328 | if (PerlIO_flush(PerlIONext(f)) != 0) { |
329 | code = -1; |
330 | } |
aa0053b7 |
331 | } |
0b3236bb |
332 | else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) { |
333 | /* read case */ |
334 | /* if we have any untranslated stuff then unread that first */ |
335 | if (e->dataSV && SvCUR(e->dataSV)) { |
336 | s = SvPV(e->dataSV, len); |
337 | count = PerlIO_unread(PerlIONext(f),s,len); |
338 | if (count != len) { |
339 | code = -1; |
340 | } |
341 | } |
342 | /* See if there is anything left in the buffer */ |
343 | if (e->base.ptr < e->base.end) { |
344 | /* Bother - have unread data. |
345 | re-encode and unread() to layer below |
346 | */ |
347 | ENTER; |
348 | SAVETMPS; |
349 | str = sv_newmortal(); |
350 | sv_upgrade(str, SVt_PV); |
351 | SvPVX(str) = e->base.ptr; |
352 | SvLEN(str) = 0; |
353 | SvCUR_set(str, e->base.end - e->base.ptr); |
354 | SvUTF8_on(str); |
355 | PUSHMARK(sp); |
356 | XPUSHs(e->enc); |
357 | XPUSHs(str); |
358 | XPUSHs(&PL_sv_yes); |
359 | PUTBACK; |
360 | if (perl_call_method("encode", G_SCALAR) != 1) |
361 | code = -1; |
362 | SPAGAIN; |
363 | str = POPs; |
364 | PUTBACK; |
365 | s = SvPV(str, len); |
366 | count = PerlIO_unread(PerlIONext(f),s,len); |
367 | if (count != len) { |
368 | code = -1; |
369 | } |
370 | FREETMPS; |
371 | LEAVE; |
372 | } |
aa0053b7 |
373 | } |
0b3236bb |
374 | e->base.ptr = e->base.end = e->base.buf; |
375 | PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); |
72e44f29 |
376 | } |
aa0053b7 |
377 | return code; |
33af2bc7 |
378 | } |
379 | |
380 | IV |
aa0053b7 |
381 | PerlIOEncode_close(pTHX_ PerlIO * f) |
33af2bc7 |
382 | { |
aa0053b7 |
383 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
384 | IV code = PerlIOBase_close(aTHX_ f); |
385 | if (e->bufsv) { |
386 | SvREFCNT_dec(e->bufsv); |
387 | e->bufsv = Nullsv; |
388 | } |
389 | e->base.buf = NULL; |
390 | e->base.ptr = NULL; |
391 | e->base.end = NULL; |
392 | PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); |
393 | return code; |
33af2bc7 |
394 | } |
395 | |
72e44f29 |
396 | Off_t |
aa0053b7 |
397 | PerlIOEncode_tell(pTHX_ PerlIO * f) |
72e44f29 |
398 | { |
aa0053b7 |
399 | PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf); |
0b3236bb |
400 | /* Unfortunately the only way to get a postion is to (re-)translate, |
401 | the UTF8 we have in bufefr and then ask layer below |
aa0053b7 |
402 | */ |
0b3236bb |
403 | PerlIO_flush(f); |
404 | return PerlIO_tell(PerlIONext(f)); |
72e44f29 |
405 | } |
406 | |
8cf8f3d1 |
407 | PerlIO * |
aa0053b7 |
408 | PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o, |
409 | CLONE_PARAMS * params, int flags) |
8cf8f3d1 |
410 | { |
aa0053b7 |
411 | if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) { |
412 | PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode); |
413 | PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode); |
414 | if (oe->enc) { |
415 | fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params); |
416 | } |
9f16d962 |
417 | } |
aa0053b7 |
418 | return f; |
8cf8f3d1 |
419 | } |
420 | |
33af2bc7 |
421 | PerlIO_funcs PerlIO_encode = { |
aa0053b7 |
422 | "encoding", |
423 | sizeof(PerlIOEncode), |
424 | PERLIO_K_BUFFERED, |
425 | PerlIOEncode_pushed, |
426 | PerlIOEncode_popped, |
427 | PerlIOBuf_open, |
428 | PerlIOEncode_getarg, |
429 | PerlIOBase_fileno, |
430 | PerlIOEncode_dup, |
431 | PerlIOBuf_read, |
432 | PerlIOBuf_unread, |
433 | PerlIOBuf_write, |
434 | PerlIOBuf_seek, |
435 | PerlIOEncode_tell, |
436 | PerlIOEncode_close, |
437 | PerlIOEncode_flush, |
438 | PerlIOEncode_fill, |
439 | PerlIOBase_eof, |
440 | PerlIOBase_error, |
441 | PerlIOBase_clearerr, |
442 | PerlIOBase_setlinebuf, |
443 | PerlIOEncode_get_base, |
444 | PerlIOBuf_bufsiz, |
445 | PerlIOBuf_get_ptr, |
446 | PerlIOBuf_get_cnt, |
447 | PerlIOBuf_set_ptrcnt, |
33af2bc7 |
448 | }; |
aa0053b7 |
449 | #endif /* encode layer */ |
33af2bc7 |
450 | |
2f2b4ff2 |
451 | void |
aa0053b7 |
452 | Encode_XSEncoding(pTHX_ encode_t * enc) |
2f2b4ff2 |
453 | { |
aa0053b7 |
454 | dSP; |
455 | HV *stash = gv_stashpv("Encode::XS", TRUE); |
456 | SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash); |
457 | int i = 0; |
458 | PUSHMARK(sp); |
459 | XPUSHs(sv); |
460 | while (enc->name[i]) { |
461 | const char *name = enc->name[i++]; |
462 | XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); |
463 | } |
464 | PUTBACK; |
465 | call_pv("Encode::define_encoding", G_DISCARD); |
466 | SvREFCNT_dec(sv); |
2f2b4ff2 |
467 | } |
468 | |
aa0053b7 |
469 | void |
470 | call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) |
471 | { |
472 | } |
67e989fb |
473 | |
2f2b4ff2 |
474 | static SV * |
aa0053b7 |
475 | encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, |
476 | int check) |
2f2b4ff2 |
477 | { |
aa0053b7 |
478 | STRLEN slen; |
479 | U8 *s = (U8 *) SvPV(src, slen); |
0b3236bb |
480 | STRLEN tlen = slen; |
481 | SV *dst = sv_2mortal(newSV(slen+1)); |
aa0053b7 |
482 | if (slen) { |
0b3236bb |
483 | U8 *d = (U8 *) SvPVX(dst); |
484 | STRLEN dlen = SvLEN(dst)-1; |
aa0053b7 |
485 | int code; |
486 | while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) { |
487 | SvCUR_set(dst, dlen); |
488 | SvPOK_on(dst); |
9b37254d |
489 | |
0b3236bb |
490 | #if 0 |
491 | Perl_warn(aTHX_ "code=%d @ s=%d/%d d=%d",code,slen,tlen,dlen); |
492 | #endif |
493 | |
494 | if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL) |
aa0053b7 |
495 | break; |
9b37254d |
496 | |
aa0053b7 |
497 | switch (code) { |
498 | case ENCODE_NOSPACE: |
499 | { |
0b3236bb |
500 | STRLEN done = tlen-slen; |
501 | STRLEN need ; |
502 | if (done) { |
503 | need = (tlen*dlen)/done+1; |
504 | } |
505 | else { |
506 | need = dlen + UTF8_MAXLEN; |
507 | } |
508 | |
aa0053b7 |
509 | d = (U8 *) SvGROW(dst, need); |
510 | if (dlen >= SvLEN(dst)) { |
511 | Perl_croak(aTHX_ |
512 | "Destination couldn't be grown (the need may be miscalculated)."); |
513 | } |
514 | dlen = SvLEN(dst); |
0b3236bb |
515 | slen = tlen; |
aa0053b7 |
516 | break; |
517 | } |
2f2b4ff2 |
518 | |
aa0053b7 |
519 | case ENCODE_NOREP: |
520 | if (dir == enc->f_utf8) { |
521 | if (!check && ckWARN_d(WARN_UTF8)) { |
522 | STRLEN clen; |
523 | UV ch = |
524 | utf8n_to_uvuni(s + slen, (SvCUR(src) - slen), |
525 | &clen, 0); |
526 | Perl_warner(aTHX_ WARN_UTF8, |
527 | "\"\\N{U+%" UVxf |
528 | "}\" does not map to %s", ch, |
529 | enc->name[0]); |
530 | /* FIXME: Skip over the character, copy in replacement and continue |
531 | * but that is messy so for now just fail. |
532 | */ |
533 | return &PL_sv_undef; |
534 | } |
535 | else { |
536 | return &PL_sv_undef; |
537 | } |
538 | } |
539 | else { |
540 | /* UTF-8 is supposed to be "Universal" so should not happen */ |
541 | Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8", |
542 | enc->name[0], (int) (SvCUR(src) - slen), |
543 | s + slen); |
544 | } |
545 | break; |
2f2b4ff2 |
546 | |
aa0053b7 |
547 | default: |
548 | Perl_croak(aTHX_ "Unexpected code %d converting %s %s", |
549 | code, (dir == enc->f_utf8) ? "to" : "from", |
550 | enc->name[0]); |
551 | return &PL_sv_undef; |
552 | } |
553 | } |
554 | SvCUR_set(dst, dlen); |
555 | SvPOK_on(dst); |
556 | if (check) { |
557 | if (slen < SvCUR(src)) { |
558 | Move(s + slen, s, SvCUR(src) - slen, U8); |
559 | } |
560 | SvCUR_set(src, SvCUR(src) - slen); |
0b3236bb |
561 | *SvEND(src) = '\0'; |
aa0053b7 |
562 | } |
2f2b4ff2 |
563 | } |
aa0053b7 |
564 | else { |
0b3236bb |
565 | SvCUR_set(dst, 0); |
aa0053b7 |
566 | SvPOK_on(dst); |
2f2b4ff2 |
567 | } |
0b3236bb |
568 | *SvEND(dst) = '\0'; |
aa0053b7 |
569 | return dst; |
2f2b4ff2 |
570 | } |
571 | |
50d26985 |
572 | MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ |
2f2b4ff2 |
573 | |
574 | PROTOTYPES: ENABLE |
575 | |
576 | void |
691638dd |
577 | Method_decode(obj,src,check = FALSE) |
2f2b4ff2 |
578 | SV * obj |
579 | SV * src |
691638dd |
580 | bool check |
2f2b4ff2 |
581 | CODE: |
582 | { |
583 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
584 | ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check); |
585 | SvUTF8_on(ST(0)); |
586 | XSRETURN(1); |
587 | } |
588 | |
589 | void |
691638dd |
590 | Method_encode(obj,src,check = FALSE) |
2f2b4ff2 |
591 | SV * obj |
592 | SV * src |
691638dd |
593 | bool check |
2f2b4ff2 |
594 | CODE: |
595 | { |
596 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
597 | sv_utf8_upgrade(src); |
598 | ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check); |
599 | XSRETURN(1); |
600 | } |
601 | |
67e989fb |
602 | MODULE = Encode PACKAGE = Encode |
2c674647 |
603 | |
604 | PROTOTYPES: ENABLE |
605 | |
67e989fb |
606 | I32 |
2c674647 |
607 | _bytes_to_utf8(sv, ...) |
67e989fb |
608 | SV * sv |
2c674647 |
609 | CODE: |
67e989fb |
610 | { |
611 | SV * encoding = items == 2 ? ST(1) : Nullsv; |
612 | |
613 | if (encoding) |
614 | RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); |
615 | else { |
616 | STRLEN len; |
183a2d84 |
617 | U8* s = (U8*)SvPV(sv, len); |
67e989fb |
618 | U8* converted; |
619 | |
620 | converted = bytes_to_utf8(s, &len); /* This allocs */ |
183a2d84 |
621 | sv_setpvn(sv, (char *)converted, len); |
67e989fb |
622 | SvUTF8_on(sv); /* XXX Should we? */ |
623 | Safefree(converted); /* ... so free it */ |
624 | RETVAL = len; |
625 | } |
626 | } |
2c674647 |
627 | OUTPUT: |
67e989fb |
628 | RETVAL |
2c674647 |
629 | |
67e989fb |
630 | I32 |
2c674647 |
631 | _utf8_to_bytes(sv, ...) |
67e989fb |
632 | SV * sv |
2c674647 |
633 | CODE: |
67e989fb |
634 | { |
635 | SV * to = items > 1 ? ST(1) : Nullsv; |
636 | SV * check = items > 2 ? ST(2) : Nullsv; |
87714904 |
637 | |
67e989fb |
638 | if (to) |
639 | RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); |
640 | else { |
67e989fb |
641 | STRLEN len; |
b113ac0e |
642 | U8 *s = (U8*)SvPV(sv, len); |
67e989fb |
643 | |
9c5ffd7c |
644 | RETVAL = 0; |
67e989fb |
645 | if (SvTRUE(check)) { |
646 | /* Must do things the slow way */ |
647 | U8 *dest; |
87714904 |
648 | U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */ |
67e989fb |
649 | U8 *send = s + len; |
650 | |
651 | New(83, dest, len, U8); /* I think */ |
652 | |
653 | while (s < send) { |
654 | if (*s < 0x80) |
655 | *dest++ = *s++; |
656 | else { |
b113ac0e |
657 | STRLEN ulen; |
658 | UV uv = *s++; |
87714904 |
659 | |
67e989fb |
660 | /* Have to do it all ourselves because of error routine, |
661 | aargh. */ |
662 | if (!(uv & 0x40)) |
663 | goto failure; |
664 | if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } |
665 | else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } |
666 | else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } |
667 | else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } |
668 | else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } |
669 | else if (!(uv & 0x01)) { ulen = 7; uv = 0; } |
670 | else { ulen = 13; uv = 0; } |
87714904 |
671 | |
67e989fb |
672 | /* Note change to utf8.c variable naming, for variety */ |
673 | while (ulen--) { |
674 | if ((*s & 0xc0) != 0x80) |
675 | goto failure; |
87714904 |
676 | |
67e989fb |
677 | else |
678 | uv = (uv << 6) | (*s++ & 0x3f); |
87714904 |
679 | } |
67e989fb |
680 | if (uv > 256) { |
681 | failure: |
682 | call_failure(check, s, dest, src); |
683 | /* Now what happens? */ |
684 | } |
685 | *dest++ = (U8)uv; |
686 | } |
687 | } |
688 | } else |
689 | RETVAL = (utf8_to_bytes(s, &len) ? len : 0); |
690 | } |
2c674647 |
691 | } |
692 | OUTPUT: |
693 | RETVAL |
694 | |
2c674647 |
695 | bool |
4411f3b6 |
696 | is_utf8(sv, check = FALSE) |
697 | SV * sv |
698 | bool check |
2c674647 |
699 | CODE: |
700 | { |
2eebba1d |
701 | if (SvGMAGICAL(sv)) /* it could be $1, for example */ |
702 | sv = newSVsv(sv); /* GMAGIG will be done */ |
2c674647 |
703 | if (SvPOK(sv)) { |
4411f3b6 |
704 | RETVAL = SvUTF8(sv) ? TRUE : FALSE; |
2c674647 |
705 | if (RETVAL && |
4411f3b6 |
706 | check && |
2c674647 |
707 | !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) |
708 | RETVAL = FALSE; |
709 | } else { |
710 | RETVAL = FALSE; |
711 | } |
2eebba1d |
712 | if (sv != ST(0)) |
713 | SvREFCNT_dec(sv); /* it was a temp copy */ |
2c674647 |
714 | } |
715 | OUTPUT: |
716 | RETVAL |
717 | |
718 | SV * |
4411f3b6 |
719 | _utf8_on(sv) |
2c674647 |
720 | SV * sv |
721 | CODE: |
722 | { |
723 | if (SvPOK(sv)) { |
87714904 |
724 | SV *rsv = newSViv(SvUTF8(sv)); |
2c674647 |
725 | RETVAL = rsv; |
726 | SvUTF8_on(sv); |
727 | } else { |
728 | RETVAL = &PL_sv_undef; |
729 | } |
730 | } |
731 | OUTPUT: |
732 | RETVAL |
733 | |
734 | SV * |
4411f3b6 |
735 | _utf8_off(sv) |
2c674647 |
736 | SV * sv |
737 | CODE: |
738 | { |
739 | if (SvPOK(sv)) { |
87714904 |
740 | SV *rsv = newSViv(SvUTF8(sv)); |
2c674647 |
741 | RETVAL = rsv; |
742 | SvUTF8_off(sv); |
743 | } else { |
744 | RETVAL = &PL_sv_undef; |
745 | } |
746 | } |
747 | OUTPUT: |
748 | RETVAL |
749 | |
33af2bc7 |
750 | BOOT: |
751 | { |
6a59c517 |
752 | #if defined(USE_PERLIO) && !defined(USE_SFIO) |
a999f61b |
753 | PerlIO_define_layer(aTHX_ &PerlIO_encode); |
33af2bc7 |
754 | #endif |
023d8852 |
755 | #include "8859_def.h" |
756 | #include "EBCDIC_def.h" |
757 | #include "Symbols_def.h" |
33af2bc7 |
758 | } |