Commit | Line | Data |
fcf2db38 |
1 | #define PERL_NO_GET_CONTEXT |
2 | |
2c674647 |
3 | #include "EXTERN.h" |
4 | #include "perl.h" |
5 | #include "XSUB.h" |
2f2b4ff2 |
6 | #define U8 U8 |
7 | #include "encode.h" |
94b89828 |
8 | #include "8859.h" |
2f2b4ff2 |
9 | #include "EBCDIC.h" |
10 | #include "Symbols.h" |
2c674647 |
11 | |
fcf2db38 |
12 | |
13 | #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ |
2f5768b8 |
14 | Perl_croak(aTHX_ "panic_unimplemented"); \ |
4a83738a |
15 | return (y)0; /* fool picky compilers */ \ |
87714904 |
16 | } |
67e989fb |
17 | UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) |
aa0053b7 |
18 | UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) |
9df9a5cd |
19 | #if defined(USE_PERLIO) && !defined(USE_SFIO) |
72e44f29 |
20 | /* Define an encoding "layer" in the perliol.h sense. |
21 | The layer defined here "inherits" in an object-oriented sense from the |
22 | "perlio" layer with its PerlIOBuf_* "methods". |
23 | The implementation is particularly efficient as until Encode settles down |
24 | there is no point in tryint to tune it. |
25 | |
26 | The layer works by overloading the "fill" and "flush" methods. |
27 | |
28 | "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API |
29 | to convert the encoded data to UTF-8 form, then copies it back to the |
30 | buffer. The "base class's" read methods then see the UTF-8 data. |
31 | |
32 | "flush" transforms the UTF-8 data deposited by the "base class's write |
33 | method in the buffer back into the encoded form using the encode OO perl API, |
34 | then copies data back into the buffer and calls "SUPER::flush. |
35 | |
36 | Note that "flush" is _also_ called for read mode - we still do the (back)-translate |
37 | so that the the base class's "flush" sees the correct number of encoded chars |
38 | for positioning the seek pointer. (This double translation is the worst performance |
39 | issue - particularly with all-perl encode engine.) |
40 | |
41 | */ |
33af2bc7 |
42 | #include "perliol.h" |
aa0053b7 |
43 | typedef struct { |
44 | PerlIOBuf base; /* PerlIOBuf stuff */ |
45 | SV *bufsv; |
46 | SV *enc; |
33af2bc7 |
47 | } PerlIOEncode; |
48 | |
e3f3bf95 |
49 | SV * |
aa0053b7 |
50 | PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags) |
e3f3bf95 |
51 | { |
aa0053b7 |
52 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
53 | SV *sv = &PL_sv_undef; |
54 | if (e->enc) { |
55 | dSP; |
56 | ENTER; |
57 | SAVETMPS; |
58 | PUSHMARK(sp); |
59 | XPUSHs(e->enc); |
60 | PUTBACK; |
61 | if (perl_call_method("name", G_SCALAR) == 1) { |
62 | SPAGAIN; |
63 | sv = newSVsv(POPs); |
64 | PUTBACK; |
65 | } |
e3f3bf95 |
66 | } |
aa0053b7 |
67 | return sv; |
e3f3bf95 |
68 | } |
33af2bc7 |
69 | |
70 | IV |
aa0053b7 |
71 | PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg) |
33af2bc7 |
72 | { |
aa0053b7 |
73 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
74 | dSP; |
75 | IV code; |
76 | code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv); |
77 | ENTER; |
78 | SAVETMPS; |
79 | PUSHMARK(sp); |
80 | XPUSHs(arg); |
81 | PUTBACK; |
82 | if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) { |
83 | /* should never happen */ |
84 | Perl_die(aTHX_ "Encode::find_encoding did not return a value"); |
85 | return -1; |
86 | } |
87 | SPAGAIN; |
88 | e->enc = POPs; |
89 | PUTBACK; |
90 | if (!SvROK(e->enc)) { |
91 | e->enc = Nullsv; |
92 | errno = EINVAL; |
93 | Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%" SVf "\"", |
94 | arg); |
95 | code = -1; |
96 | } |
97 | else { |
98 | SvREFCNT_inc(e->enc); |
99 | PerlIOBase(f)->flags |= PERLIO_F_UTF8; |
100 | } |
101 | FREETMPS; |
102 | LEAVE; |
103 | return code; |
33af2bc7 |
104 | } |
105 | |
106 | IV |
aa0053b7 |
107 | PerlIOEncode_popped(pTHX_ PerlIO * f) |
33af2bc7 |
108 | { |
aa0053b7 |
109 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
110 | if (e->enc) { |
111 | SvREFCNT_dec(e->enc); |
112 | e->enc = Nullsv; |
113 | } |
114 | if (e->bufsv) { |
115 | SvREFCNT_dec(e->bufsv); |
116 | e->bufsv = Nullsv; |
117 | } |
118 | return 0; |
33af2bc7 |
119 | } |
120 | |
121 | STDCHAR * |
aa0053b7 |
122 | PerlIOEncode_get_base(pTHX_ PerlIO * f) |
33af2bc7 |
123 | { |
aa0053b7 |
124 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
125 | if (!e->base.bufsiz) |
126 | e->base.bufsiz = 1024; |
127 | if (!e->bufsv) { |
128 | e->bufsv = newSV(e->base.bufsiz); |
129 | sv_setpvn(e->bufsv, "", 0); |
130 | } |
131 | e->base.buf = (STDCHAR *) SvPVX(e->bufsv); |
132 | if (!e->base.ptr) |
133 | e->base.ptr = e->base.buf; |
134 | if (!e->base.end) |
135 | e->base.end = e->base.buf; |
136 | if (e->base.ptr < e->base.buf |
137 | || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { |
138 | Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, |
139 | e->base.buf + SvLEN(e->bufsv)); |
140 | abort(); |
141 | } |
142 | if (SvLEN(e->bufsv) < e->base.bufsiz) { |
143 | SSize_t poff = e->base.ptr - e->base.buf; |
144 | SSize_t eoff = e->base.end - e->base.buf; |
145 | e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz); |
146 | e->base.ptr = e->base.buf + poff; |
147 | e->base.end = e->base.buf + eoff; |
148 | } |
149 | if (e->base.ptr < e->base.buf |
150 | || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { |
151 | Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, |
152 | e->base.buf + SvLEN(e->bufsv)); |
153 | abort(); |
154 | } |
155 | return e->base.buf; |
33af2bc7 |
156 | } |
157 | |
33af2bc7 |
158 | IV |
aa0053b7 |
159 | PerlIOEncode_fill(pTHX_ PerlIO * f) |
33af2bc7 |
160 | { |
aa0053b7 |
161 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
162 | dSP; |
163 | IV code; |
164 | code = PerlIOBuf_fill(aTHX_ f); |
165 | if (code == 0) { |
166 | SV *uni; |
167 | STRLEN len; |
168 | char *s; |
169 | /* Set SV that is the buffer to be buf..ptr */ |
170 | SvCUR_set(e->bufsv, e->base.end - e->base.buf); |
171 | SvUTF8_off(e->bufsv); |
172 | ENTER; |
173 | SAVETMPS; |
174 | PUSHMARK(sp); |
175 | XPUSHs(e->enc); |
176 | XPUSHs(e->bufsv); |
177 | XPUSHs(&PL_sv_yes); |
178 | PUTBACK; |
179 | if (perl_call_method("decode", G_SCALAR) != 1) |
180 | code = -1; |
181 | SPAGAIN; |
182 | uni = POPs; |
183 | PUTBACK; |
184 | /* Now get translated string (forced to UTF-8) and copy back to buffer |
185 | don't use sv_setsv as that may "steal" PV from returned temp |
186 | and so free() our known-large-enough buffer. |
187 | sv_setpvn() should do but let us do it long hand. |
188 | */ |
189 | s = SvPVutf8(uni, len); |
190 | if (s != SvPVX(e->bufsv)) { |
191 | e->base.buf = (STDCHAR *) SvGROW(e->bufsv, len); |
192 | Move(s, e->base.buf, len, char); |
193 | SvCUR_set(e->bufsv, len); |
194 | } |
195 | SvUTF8_on(e->bufsv); |
196 | e->base.end = e->base.buf + len; |
197 | e->base.ptr = e->base.buf; |
198 | FREETMPS; |
199 | LEAVE; |
72e44f29 |
200 | } |
aa0053b7 |
201 | return code; |
33af2bc7 |
202 | } |
203 | |
204 | IV |
aa0053b7 |
205 | PerlIOEncode_flush(pTHX_ PerlIO * f) |
33af2bc7 |
206 | { |
aa0053b7 |
207 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
208 | IV code = 0; |
209 | if (e->bufsv |
210 | && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF | PERLIO_F_WRBUF)) |
211 | && (e->base.ptr > e->base.buf) |
212 | ) { |
213 | dSP; |
214 | SV *str; |
215 | char *s; |
216 | STRLEN len; |
217 | SSize_t left = 0; |
218 | if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) { |
219 | /* This is really just a flag to see if we took all the data, if |
220 | we did PerlIOBase_flush avoids a seek to lower layer. |
221 | Need to revisit if we start getting clever with unreads or seeks-in-buffer |
222 | */ |
223 | left = e->base.end - e->base.ptr; |
224 | } |
225 | ENTER; |
226 | SAVETMPS; |
227 | PUSHMARK(sp); |
228 | XPUSHs(e->enc); |
229 | SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); |
230 | SvUTF8_on(e->bufsv); |
231 | XPUSHs(e->bufsv); |
232 | XPUSHs(&PL_sv_yes); |
233 | PUTBACK; |
234 | if (perl_call_method("encode", G_SCALAR) != 1) |
235 | code = -1; |
236 | SPAGAIN; |
237 | str = POPs; |
238 | PUTBACK; |
239 | s = SvPV(str, len); |
240 | if (s != SvPVX(e->bufsv)) { |
241 | e->base.buf = (STDCHAR *) SvGROW(e->bufsv, len); |
242 | Move(s, e->base.buf, len, char); |
243 | SvCUR_set(e->bufsv, len); |
244 | } |
245 | SvUTF8_off(e->bufsv); |
246 | e->base.ptr = e->base.buf + len; |
247 | /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */ |
248 | e->base.end = e->base.ptr + left; |
249 | FREETMPS; |
250 | LEAVE; |
251 | if (PerlIOBuf_flush(aTHX_ f) != 0) |
252 | code = -1; |
72e44f29 |
253 | } |
aa0053b7 |
254 | return code; |
33af2bc7 |
255 | } |
256 | |
257 | IV |
aa0053b7 |
258 | PerlIOEncode_close(pTHX_ PerlIO * f) |
33af2bc7 |
259 | { |
aa0053b7 |
260 | PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); |
261 | IV code = PerlIOBase_close(aTHX_ f); |
262 | if (e->bufsv) { |
263 | SvREFCNT_dec(e->bufsv); |
264 | e->bufsv = Nullsv; |
265 | } |
266 | e->base.buf = NULL; |
267 | e->base.ptr = NULL; |
268 | e->base.end = NULL; |
269 | PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); |
270 | return code; |
33af2bc7 |
271 | } |
272 | |
72e44f29 |
273 | Off_t |
aa0053b7 |
274 | PerlIOEncode_tell(pTHX_ PerlIO * f) |
72e44f29 |
275 | { |
aa0053b7 |
276 | PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf); |
277 | /* Unfortunately the only way to get a postion is to back-translate, |
278 | the UTF8-bytes we have buf..ptr and adjust accordingly. |
279 | But we will try and save any unread data in case stream |
280 | is un-seekable. |
281 | */ |
282 | if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end) { |
283 | Size_t count = b->end - b->ptr; |
284 | PerlIO_push(aTHX_ f, &PerlIO_pending, "r", Nullsv); |
285 | /* Save what we have left to read */ |
286 | PerlIOSelf(f, PerlIOBuf)->bufsiz = count; |
287 | PerlIO_unread(f, b->ptr, count); |
288 | /* There isn't any unread data - we just saved it - so avoid the lower seek */ |
289 | b->end = b->ptr; |
290 | /* Flush ourselves - now one layer down, |
291 | this does the back translate and adjusts position |
292 | */ |
293 | PerlIO_flush(PerlIONext(f)); |
294 | /* Set position of the saved data */ |
295 | PerlIOSelf(f, PerlIOBuf)->posn = b->posn; |
296 | } |
297 | else { |
298 | PerlIO_flush(f); |
299 | } |
300 | return b->posn; |
72e44f29 |
301 | } |
302 | |
8cf8f3d1 |
303 | PerlIO * |
aa0053b7 |
304 | PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o, |
305 | CLONE_PARAMS * params, int flags) |
8cf8f3d1 |
306 | { |
aa0053b7 |
307 | if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) { |
308 | PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode); |
309 | PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode); |
310 | if (oe->enc) { |
311 | fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params); |
312 | } |
9f16d962 |
313 | } |
aa0053b7 |
314 | return f; |
8cf8f3d1 |
315 | } |
316 | |
33af2bc7 |
317 | PerlIO_funcs PerlIO_encode = { |
aa0053b7 |
318 | "encoding", |
319 | sizeof(PerlIOEncode), |
320 | PERLIO_K_BUFFERED, |
321 | PerlIOEncode_pushed, |
322 | PerlIOEncode_popped, |
323 | PerlIOBuf_open, |
324 | PerlIOEncode_getarg, |
325 | PerlIOBase_fileno, |
326 | PerlIOEncode_dup, |
327 | PerlIOBuf_read, |
328 | PerlIOBuf_unread, |
329 | PerlIOBuf_write, |
330 | PerlIOBuf_seek, |
331 | PerlIOEncode_tell, |
332 | PerlIOEncode_close, |
333 | PerlIOEncode_flush, |
334 | PerlIOEncode_fill, |
335 | PerlIOBase_eof, |
336 | PerlIOBase_error, |
337 | PerlIOBase_clearerr, |
338 | PerlIOBase_setlinebuf, |
339 | PerlIOEncode_get_base, |
340 | PerlIOBuf_bufsiz, |
341 | PerlIOBuf_get_ptr, |
342 | PerlIOBuf_get_cnt, |
343 | PerlIOBuf_set_ptrcnt, |
33af2bc7 |
344 | }; |
aa0053b7 |
345 | #endif /* encode layer */ |
33af2bc7 |
346 | |
2f2b4ff2 |
347 | void |
aa0053b7 |
348 | Encode_XSEncoding(pTHX_ encode_t * enc) |
2f2b4ff2 |
349 | { |
aa0053b7 |
350 | dSP; |
351 | HV *stash = gv_stashpv("Encode::XS", TRUE); |
352 | SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash); |
353 | int i = 0; |
354 | PUSHMARK(sp); |
355 | XPUSHs(sv); |
356 | while (enc->name[i]) { |
357 | const char *name = enc->name[i++]; |
358 | XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); |
359 | } |
360 | PUTBACK; |
361 | call_pv("Encode::define_encoding", G_DISCARD); |
362 | SvREFCNT_dec(sv); |
2f2b4ff2 |
363 | } |
364 | |
aa0053b7 |
365 | void |
366 | call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) |
367 | { |
368 | } |
67e989fb |
369 | |
2f2b4ff2 |
370 | static SV * |
aa0053b7 |
371 | encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, |
372 | int check) |
2f2b4ff2 |
373 | { |
aa0053b7 |
374 | STRLEN slen; |
375 | U8 *s = (U8 *) SvPV(src, slen); |
376 | SV *dst = sv_2mortal(newSV(2 * slen + 1)); |
377 | if (slen) { |
378 | U8 *d = (U8 *) SvGROW(dst, 2 * slen + 1); |
379 | STRLEN dlen = SvLEN(dst); |
380 | int code; |
381 | while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) { |
382 | SvCUR_set(dst, dlen); |
383 | SvPOK_on(dst); |
9b37254d |
384 | |
aa0053b7 |
385 | if (code == ENCODE_FALLBACK) |
386 | break; |
9b37254d |
387 | |
aa0053b7 |
388 | switch (code) { |
389 | case ENCODE_NOSPACE: |
390 | { |
391 | STRLEN need = dlen + UTF8_MAXLEN * 128; /* 128 is too big or small? */ |
392 | d = (U8 *) SvGROW(dst, need); |
393 | if (dlen >= SvLEN(dst)) { |
394 | Perl_croak(aTHX_ |
395 | "Destination couldn't be grown (the need may be miscalculated)."); |
396 | } |
397 | dlen = SvLEN(dst); |
398 | slen = SvCUR(src); |
399 | break; |
400 | } |
2f2b4ff2 |
401 | |
aa0053b7 |
402 | case ENCODE_NOREP: |
403 | if (dir == enc->f_utf8) { |
404 | if (!check && ckWARN_d(WARN_UTF8)) { |
405 | STRLEN clen; |
406 | UV ch = |
407 | utf8n_to_uvuni(s + slen, (SvCUR(src) - slen), |
408 | &clen, 0); |
409 | Perl_warner(aTHX_ WARN_UTF8, |
410 | "\"\\N{U+%" UVxf |
411 | "}\" does not map to %s", ch, |
412 | enc->name[0]); |
413 | /* FIXME: Skip over the character, copy in replacement and continue |
414 | * but that is messy so for now just fail. |
415 | */ |
416 | return &PL_sv_undef; |
417 | } |
418 | else { |
419 | return &PL_sv_undef; |
420 | } |
421 | } |
422 | else { |
423 | /* UTF-8 is supposed to be "Universal" so should not happen */ |
424 | Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8", |
425 | enc->name[0], (int) (SvCUR(src) - slen), |
426 | s + slen); |
427 | } |
428 | break; |
2f2b4ff2 |
429 | |
aa0053b7 |
430 | case ENCODE_PARTIAL: |
431 | if (!check && ckWARN_d(WARN_UTF8)) { |
432 | Perl_warner(aTHX_ WARN_UTF8, "Partial %s character", |
433 | (dir == |
434 | enc->f_utf8) ? "UTF-8" : enc->name[0]); |
435 | } |
436 | return &PL_sv_undef; |
2f2b4ff2 |
437 | |
aa0053b7 |
438 | default: |
439 | Perl_croak(aTHX_ "Unexpected code %d converting %s %s", |
440 | code, (dir == enc->f_utf8) ? "to" : "from", |
441 | enc->name[0]); |
442 | return &PL_sv_undef; |
443 | } |
444 | } |
445 | SvCUR_set(dst, dlen); |
446 | SvPOK_on(dst); |
447 | if (check) { |
448 | if (slen < SvCUR(src)) { |
449 | Move(s + slen, s, SvCUR(src) - slen, U8); |
450 | } |
451 | SvCUR_set(src, SvCUR(src) - slen); |
452 | } |
2f2b4ff2 |
453 | } |
aa0053b7 |
454 | else { |
455 | SvCUR_set(dst, slen); |
456 | SvPOK_on(dst); |
2f2b4ff2 |
457 | } |
aa0053b7 |
458 | return dst; |
2f2b4ff2 |
459 | } |
460 | |
50d26985 |
461 | MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ |
2f2b4ff2 |
462 | |
463 | PROTOTYPES: ENABLE |
464 | |
465 | void |
691638dd |
466 | Method_decode(obj,src,check = FALSE) |
2f2b4ff2 |
467 | SV * obj |
468 | SV * src |
691638dd |
469 | bool check |
2f2b4ff2 |
470 | CODE: |
471 | { |
472 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
473 | ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check); |
474 | SvUTF8_on(ST(0)); |
475 | XSRETURN(1); |
476 | } |
477 | |
478 | void |
691638dd |
479 | Method_encode(obj,src,check = FALSE) |
2f2b4ff2 |
480 | SV * obj |
481 | SV * src |
691638dd |
482 | bool check |
2f2b4ff2 |
483 | CODE: |
484 | { |
485 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
486 | sv_utf8_upgrade(src); |
487 | ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check); |
488 | XSRETURN(1); |
489 | } |
490 | |
67e989fb |
491 | MODULE = Encode PACKAGE = Encode |
2c674647 |
492 | |
493 | PROTOTYPES: ENABLE |
494 | |
67e989fb |
495 | I32 |
2c674647 |
496 | _bytes_to_utf8(sv, ...) |
67e989fb |
497 | SV * sv |
2c674647 |
498 | CODE: |
67e989fb |
499 | { |
500 | SV * encoding = items == 2 ? ST(1) : Nullsv; |
501 | |
502 | if (encoding) |
503 | RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); |
504 | else { |
505 | STRLEN len; |
183a2d84 |
506 | U8* s = (U8*)SvPV(sv, len); |
67e989fb |
507 | U8* converted; |
508 | |
509 | converted = bytes_to_utf8(s, &len); /* This allocs */ |
183a2d84 |
510 | sv_setpvn(sv, (char *)converted, len); |
67e989fb |
511 | SvUTF8_on(sv); /* XXX Should we? */ |
512 | Safefree(converted); /* ... so free it */ |
513 | RETVAL = len; |
514 | } |
515 | } |
2c674647 |
516 | OUTPUT: |
67e989fb |
517 | RETVAL |
2c674647 |
518 | |
67e989fb |
519 | I32 |
2c674647 |
520 | _utf8_to_bytes(sv, ...) |
67e989fb |
521 | SV * sv |
2c674647 |
522 | CODE: |
67e989fb |
523 | { |
524 | SV * to = items > 1 ? ST(1) : Nullsv; |
525 | SV * check = items > 2 ? ST(2) : Nullsv; |
87714904 |
526 | |
67e989fb |
527 | if (to) |
528 | RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); |
529 | else { |
67e989fb |
530 | STRLEN len; |
b113ac0e |
531 | U8 *s = (U8*)SvPV(sv, len); |
67e989fb |
532 | |
9c5ffd7c |
533 | RETVAL = 0; |
67e989fb |
534 | if (SvTRUE(check)) { |
535 | /* Must do things the slow way */ |
536 | U8 *dest; |
87714904 |
537 | U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */ |
67e989fb |
538 | U8 *send = s + len; |
539 | |
540 | New(83, dest, len, U8); /* I think */ |
541 | |
542 | while (s < send) { |
543 | if (*s < 0x80) |
544 | *dest++ = *s++; |
545 | else { |
b113ac0e |
546 | STRLEN ulen; |
547 | UV uv = *s++; |
87714904 |
548 | |
67e989fb |
549 | /* Have to do it all ourselves because of error routine, |
550 | aargh. */ |
551 | if (!(uv & 0x40)) |
552 | goto failure; |
553 | if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } |
554 | else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } |
555 | else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } |
556 | else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } |
557 | else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } |
558 | else if (!(uv & 0x01)) { ulen = 7; uv = 0; } |
559 | else { ulen = 13; uv = 0; } |
87714904 |
560 | |
67e989fb |
561 | /* Note change to utf8.c variable naming, for variety */ |
562 | while (ulen--) { |
563 | if ((*s & 0xc0) != 0x80) |
564 | goto failure; |
87714904 |
565 | |
67e989fb |
566 | else |
567 | uv = (uv << 6) | (*s++ & 0x3f); |
87714904 |
568 | } |
67e989fb |
569 | if (uv > 256) { |
570 | failure: |
571 | call_failure(check, s, dest, src); |
572 | /* Now what happens? */ |
573 | } |
574 | *dest++ = (U8)uv; |
575 | } |
576 | } |
577 | } else |
578 | RETVAL = (utf8_to_bytes(s, &len) ? len : 0); |
579 | } |
2c674647 |
580 | } |
581 | OUTPUT: |
582 | RETVAL |
583 | |
2c674647 |
584 | bool |
4411f3b6 |
585 | is_utf8(sv, check = FALSE) |
586 | SV * sv |
587 | bool check |
2c674647 |
588 | CODE: |
589 | { |
2eebba1d |
590 | if (SvGMAGICAL(sv)) /* it could be $1, for example */ |
591 | sv = newSVsv(sv); /* GMAGIG will be done */ |
2c674647 |
592 | if (SvPOK(sv)) { |
4411f3b6 |
593 | RETVAL = SvUTF8(sv) ? TRUE : FALSE; |
2c674647 |
594 | if (RETVAL && |
4411f3b6 |
595 | check && |
2c674647 |
596 | !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) |
597 | RETVAL = FALSE; |
598 | } else { |
599 | RETVAL = FALSE; |
600 | } |
2eebba1d |
601 | if (sv != ST(0)) |
602 | SvREFCNT_dec(sv); /* it was a temp copy */ |
2c674647 |
603 | } |
604 | OUTPUT: |
605 | RETVAL |
606 | |
607 | SV * |
4411f3b6 |
608 | _utf8_on(sv) |
2c674647 |
609 | SV * sv |
610 | CODE: |
611 | { |
612 | if (SvPOK(sv)) { |
87714904 |
613 | SV *rsv = newSViv(SvUTF8(sv)); |
2c674647 |
614 | RETVAL = rsv; |
615 | SvUTF8_on(sv); |
616 | } else { |
617 | RETVAL = &PL_sv_undef; |
618 | } |
619 | } |
620 | OUTPUT: |
621 | RETVAL |
622 | |
623 | SV * |
4411f3b6 |
624 | _utf8_off(sv) |
2c674647 |
625 | SV * sv |
626 | CODE: |
627 | { |
628 | if (SvPOK(sv)) { |
87714904 |
629 | SV *rsv = newSViv(SvUTF8(sv)); |
2c674647 |
630 | RETVAL = rsv; |
631 | SvUTF8_off(sv); |
632 | } else { |
633 | RETVAL = &PL_sv_undef; |
634 | } |
635 | } |
636 | OUTPUT: |
637 | RETVAL |
638 | |
33af2bc7 |
639 | BOOT: |
640 | { |
6a59c517 |
641 | #if defined(USE_PERLIO) && !defined(USE_SFIO) |
a999f61b |
642 | PerlIO_define_layer(aTHX_ &PerlIO_encode); |
33af2bc7 |
643 | #endif |
023d8852 |
644 | #include "8859_def.h" |
645 | #include "EBCDIC_def.h" |
646 | #include "Symbols_def.h" |
33af2bc7 |
647 | } |