Commit | Line | Data |
fcf2db38 |
1 | #define PERL_NO_GET_CONTEXT |
2 | |
2c674647 |
3 | #include "EXTERN.h" |
4 | #include "perl.h" |
5 | #include "XSUB.h" |
2f2b4ff2 |
6 | #define U8 U8 |
7 | #include "encode.h" |
94b89828 |
8 | #include "8859.h" |
2f2b4ff2 |
9 | #include "EBCDIC.h" |
10 | #include "Symbols.h" |
2c674647 |
11 | |
fcf2db38 |
12 | |
13 | #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ |
2f5768b8 |
14 | Perl_croak(aTHX_ "panic_unimplemented"); \ |
4a83738a |
15 | return (y)0; /* fool picky compilers */ \ |
87714904 |
16 | } |
67e989fb |
17 | UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) |
18 | UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) |
19 | |
9df9a5cd |
20 | #if defined(USE_PERLIO) && !defined(USE_SFIO) |
72e44f29 |
21 | /* Define an encoding "layer" in the perliol.h sense. |
22 | The layer defined here "inherits" in an object-oriented sense from the |
23 | "perlio" layer with its PerlIOBuf_* "methods". |
24 | The implementation is particularly efficient as until Encode settles down |
25 | there is no point in tryint to tune it. |
26 | |
27 | The layer works by overloading the "fill" and "flush" methods. |
28 | |
29 | "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API |
30 | to convert the encoded data to UTF-8 form, then copies it back to the |
31 | buffer. The "base class's" read methods then see the UTF-8 data. |
32 | |
33 | "flush" transforms the UTF-8 data deposited by the "base class's write |
34 | method in the buffer back into the encoded form using the encode OO perl API, |
35 | then copies data back into the buffer and calls "SUPER::flush. |
36 | |
37 | Note that "flush" is _also_ called for read mode - we still do the (back)-translate |
38 | so that the the base class's "flush" sees the correct number of encoded chars |
39 | for positioning the seek pointer. (This double translation is the worst performance |
40 | issue - particularly with all-perl encode engine.) |
41 | |
42 | */ |
43 | |
44 | |
33af2bc7 |
45 | #include "perliol.h" |
46 | |
47 | typedef struct |
48 | { |
49 | PerlIOBuf base; /* PerlIOBuf stuff */ |
50 | SV * bufsv; |
51 | SV * enc; |
52 | } PerlIOEncode; |
53 | |
e3f3bf95 |
54 | SV * |
ecdeb87c |
55 | PerlIOEncode_getarg(pTHX_ PerlIO *f, CLONE_PARAMS *param, int flags) |
e3f3bf95 |
56 | { |
57 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); |
58 | SV *sv = &PL_sv_undef; |
59 | if (e->enc) |
60 | { |
61 | dSP; |
62 | ENTER; |
63 | SAVETMPS; |
64 | PUSHMARK(sp); |
65 | XPUSHs(e->enc); |
66 | PUTBACK; |
67 | if (perl_call_method("name",G_SCALAR) == 1) |
68 | { |
69 | SPAGAIN; |
70 | sv = newSVsv(POPs); |
71 | PUTBACK; |
72 | } |
73 | } |
74 | return sv; |
75 | } |
33af2bc7 |
76 | |
77 | IV |
f62ce20a |
78 | PerlIOEncode_pushed(pTHX_ PerlIO *f, const char *mode, SV *arg) |
33af2bc7 |
79 | { |
80 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); |
33af2bc7 |
81 | dSP; |
82 | IV code; |
f62ce20a |
83 | code = PerlIOBuf_pushed(aTHX_ f,mode,Nullsv); |
33af2bc7 |
84 | ENTER; |
85 | SAVETMPS; |
86 | PUSHMARK(sp); |
e3f3bf95 |
87 | XPUSHs(arg); |
33af2bc7 |
88 | PUTBACK; |
51ef4e11 |
89 | if (perl_call_pv("Encode::find_encoding",G_SCALAR) != 1) |
52744f63 |
90 | { |
91 | /* should never happen */ |
51ef4e11 |
92 | Perl_die(aTHX_ "Encode::find_encoding did not return a value"); |
52744f63 |
93 | return -1; |
94 | } |
33af2bc7 |
95 | SPAGAIN; |
96 | e->enc = POPs; |
97 | PUTBACK; |
98 | if (!SvROK(e->enc)) |
52744f63 |
99 | { |
100 | e->enc = Nullsv; |
101 | errno = EINVAL; |
29b291f7 |
102 | Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%"SVf"\"", arg); |
b26b1ab5 |
103 | code = -1; |
104 | } |
105 | else |
106 | { |
107 | SvREFCNT_inc(e->enc); |
108 | PerlIOBase(f)->flags |= PERLIO_F_UTF8; |
52744f63 |
109 | } |
33af2bc7 |
110 | FREETMPS; |
111 | LEAVE; |
33af2bc7 |
112 | return code; |
113 | } |
114 | |
115 | IV |
f62ce20a |
116 | PerlIOEncode_popped(pTHX_ PerlIO *f) |
33af2bc7 |
117 | { |
118 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); |
33af2bc7 |
119 | if (e->enc) |
120 | { |
121 | SvREFCNT_dec(e->enc); |
122 | e->enc = Nullsv; |
123 | } |
124 | if (e->bufsv) |
125 | { |
126 | SvREFCNT_dec(e->bufsv); |
127 | e->bufsv = Nullsv; |
128 | } |
129 | return 0; |
130 | } |
131 | |
132 | STDCHAR * |
f62ce20a |
133 | PerlIOEncode_get_base(pTHX_ PerlIO *f) |
33af2bc7 |
134 | { |
135 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); |
33af2bc7 |
136 | if (!e->base.bufsiz) |
137 | e->base.bufsiz = 1024; |
138 | if (!e->bufsv) |
139 | { |
140 | e->bufsv = newSV(e->base.bufsiz); |
141 | sv_setpvn(e->bufsv,"",0); |
142 | } |
62e8870c |
143 | e->base.buf = (STDCHAR *)SvPVX(e->bufsv); |
33af2bc7 |
144 | if (!e->base.ptr) |
145 | e->base.ptr = e->base.buf; |
146 | if (!e->base.end) |
147 | e->base.end = e->base.buf; |
148 | if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) |
149 | { |
150 | Perl_warn(aTHX_ " ptr %p(%p)%p", |
151 | e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); |
152 | abort(); |
153 | } |
154 | if (SvLEN(e->bufsv) < e->base.bufsiz) |
155 | { |
156 | SSize_t poff = e->base.ptr - e->base.buf; |
157 | SSize_t eoff = e->base.end - e->base.buf; |
62e8870c |
158 | e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz); |
33af2bc7 |
159 | e->base.ptr = e->base.buf + poff; |
160 | e->base.end = e->base.buf + eoff; |
161 | } |
162 | if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) |
163 | { |
164 | Perl_warn(aTHX_ " ptr %p(%p)%p", |
165 | e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); |
166 | abort(); |
167 | } |
168 | return e->base.buf; |
169 | } |
170 | |
33af2bc7 |
171 | IV |
f62ce20a |
172 | PerlIOEncode_fill(pTHX_ PerlIO *f) |
33af2bc7 |
173 | { |
174 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); |
33af2bc7 |
175 | dSP; |
176 | IV code; |
f62ce20a |
177 | code = PerlIOBuf_fill(aTHX_ f); |
33af2bc7 |
178 | if (code == 0) |
179 | { |
180 | SV *uni; |
72e44f29 |
181 | STRLEN len; |
182 | char *s; |
183 | /* Set SV that is the buffer to be buf..ptr */ |
33af2bc7 |
184 | SvCUR_set(e->bufsv, e->base.end - e->base.buf); |
185 | SvUTF8_off(e->bufsv); |
186 | ENTER; |
187 | SAVETMPS; |
188 | PUSHMARK(sp); |
189 | XPUSHs(e->enc); |
190 | XPUSHs(e->bufsv); |
191 | XPUSHs(&PL_sv_yes); |
192 | PUTBACK; |
50d26985 |
193 | if (perl_call_method("decode",G_SCALAR) != 1) |
33af2bc7 |
194 | code = -1; |
195 | SPAGAIN; |
196 | uni = POPs; |
197 | PUTBACK; |
72e44f29 |
198 | /* Now get translated string (forced to UTF-8) and copy back to buffer |
199 | don't use sv_setsv as that may "steal" PV from returned temp |
200 | and so free() our known-large-enough buffer. |
201 | sv_setpvn() should do but let us do it long hand. |
202 | */ |
203 | s = SvPVutf8(uni,len); |
204 | if (s != SvPVX(e->bufsv)) |
205 | { |
62e8870c |
206 | e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); |
72e44f29 |
207 | Move(s,e->base.buf,len,char); |
208 | SvCUR_set(e->bufsv,len); |
209 | } |
210 | SvUTF8_on(e->bufsv); |
211 | e->base.end = e->base.buf+len; |
33af2bc7 |
212 | e->base.ptr = e->base.buf; |
213 | FREETMPS; |
214 | LEAVE; |
215 | } |
216 | return code; |
217 | } |
218 | |
219 | IV |
f62ce20a |
220 | PerlIOEncode_flush(pTHX_ PerlIO *f) |
33af2bc7 |
221 | { |
222 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); |
223 | IV code = 0; |
8040349a |
224 | if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF)) |
225 | &&(e->base.ptr > e->base.buf) |
226 | ) |
33af2bc7 |
227 | { |
228 | dSP; |
229 | SV *str; |
230 | char *s; |
231 | STRLEN len; |
72e44f29 |
232 | SSize_t left = 0; |
233 | if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) |
234 | { |
235 | /* This is really just a flag to see if we took all the data, if |
236 | we did PerlIOBase_flush avoids a seek to lower layer. |
237 | Need to revisit if we start getting clever with unreads or seeks-in-buffer |
238 | */ |
239 | left = e->base.end - e->base.ptr; |
240 | } |
33af2bc7 |
241 | ENTER; |
242 | SAVETMPS; |
243 | PUSHMARK(sp); |
244 | XPUSHs(e->enc); |
72e44f29 |
245 | SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); |
33af2bc7 |
246 | SvUTF8_on(e->bufsv); |
247 | XPUSHs(e->bufsv); |
248 | XPUSHs(&PL_sv_yes); |
249 | PUTBACK; |
50d26985 |
250 | if (perl_call_method("encode",G_SCALAR) != 1) |
33af2bc7 |
251 | code = -1; |
252 | SPAGAIN; |
253 | str = POPs; |
254 | PUTBACK; |
72e44f29 |
255 | s = SvPV(str,len); |
256 | if (s != SvPVX(e->bufsv)) |
257 | { |
62e8870c |
258 | e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); |
72e44f29 |
259 | Move(s,e->base.buf,len,char); |
260 | SvCUR_set(e->bufsv,len); |
261 | } |
33af2bc7 |
262 | SvUTF8_off(e->bufsv); |
72e44f29 |
263 | e->base.ptr = e->base.buf+len; |
264 | /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */ |
265 | e->base.end = e->base.ptr + left; |
33af2bc7 |
266 | FREETMPS; |
267 | LEAVE; |
f62ce20a |
268 | if (PerlIOBuf_flush(aTHX_ f) != 0) |
33af2bc7 |
269 | code = -1; |
270 | } |
271 | return code; |
272 | } |
273 | |
274 | IV |
f62ce20a |
275 | PerlIOEncode_close(pTHX_ PerlIO *f) |
33af2bc7 |
276 | { |
277 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); |
f62ce20a |
278 | IV code = PerlIOBase_close(aTHX_ f); |
33af2bc7 |
279 | if (e->bufsv) |
280 | { |
281 | SvREFCNT_dec(e->bufsv); |
282 | e->bufsv = Nullsv; |
283 | } |
284 | e->base.buf = NULL; |
285 | e->base.ptr = NULL; |
286 | e->base.end = NULL; |
287 | PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF); |
288 | return code; |
289 | } |
290 | |
72e44f29 |
291 | Off_t |
f62ce20a |
292 | PerlIOEncode_tell(pTHX_ PerlIO *f) |
72e44f29 |
293 | { |
294 | PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf); |
295 | /* Unfortunately the only way to get a postion is to back-translate, |
296 | the UTF8-bytes we have buf..ptr and adjust accordingly. |
297 | But we will try and save any unread data in case stream |
298 | is un-seekable. |
299 | */ |
300 | if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end) |
301 | { |
302 | Size_t count = b->end - b->ptr; |
e3f3bf95 |
303 | PerlIO_push(aTHX_ f,&PerlIO_pending,"r",Nullsv); |
72e44f29 |
304 | /* Save what we have left to read */ |
305 | PerlIOSelf(f,PerlIOBuf)->bufsiz = count; |
306 | PerlIO_unread(f,b->ptr,count); |
307 | /* There isn't any unread data - we just saved it - so avoid the lower seek */ |
308 | b->end = b->ptr; |
309 | /* Flush ourselves - now one layer down, |
310 | this does the back translate and adjusts position |
311 | */ |
312 | PerlIO_flush(PerlIONext(f)); |
313 | /* Set position of the saved data */ |
314 | PerlIOSelf(f,PerlIOBuf)->posn = b->posn; |
315 | } |
316 | else |
317 | { |
318 | PerlIO_flush(f); |
319 | } |
320 | return b->posn; |
321 | } |
322 | |
8cf8f3d1 |
323 | PerlIO * |
ecdeb87c |
324 | PerlIOEncode_dup(pTHX_ PerlIO *f, PerlIO *o, CLONE_PARAMS *params, int flags) |
8cf8f3d1 |
325 | { |
ecdeb87c |
326 | if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) |
9f16d962 |
327 | { |
328 | PerlIOEncode *fe = PerlIOSelf(f,PerlIOEncode); |
329 | PerlIOEncode *oe = PerlIOSelf(o,PerlIOEncode); |
330 | if (oe->enc) |
331 | { |
332 | fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params); |
333 | } |
334 | } |
335 | return f; |
8cf8f3d1 |
336 | } |
337 | |
33af2bc7 |
338 | PerlIO_funcs PerlIO_encode = { |
72e44f29 |
339 | "encoding", |
33af2bc7 |
340 | sizeof(PerlIOEncode), |
341 | PERLIO_K_BUFFERED, |
33af2bc7 |
342 | PerlIOEncode_pushed, |
343 | PerlIOEncode_popped, |
e3f3bf95 |
344 | PerlIOBuf_open, |
345 | PerlIOEncode_getarg, |
346 | PerlIOBase_fileno, |
8cf8f3d1 |
347 | PerlIOEncode_dup, |
33af2bc7 |
348 | PerlIOBuf_read, |
349 | PerlIOBuf_unread, |
350 | PerlIOBuf_write, |
351 | PerlIOBuf_seek, |
72e44f29 |
352 | PerlIOEncode_tell, |
33af2bc7 |
353 | PerlIOEncode_close, |
354 | PerlIOEncode_flush, |
355 | PerlIOEncode_fill, |
356 | PerlIOBase_eof, |
357 | PerlIOBase_error, |
358 | PerlIOBase_clearerr, |
f6c77cf1 |
359 | PerlIOBase_setlinebuf, |
33af2bc7 |
360 | PerlIOEncode_get_base, |
361 | PerlIOBuf_bufsiz, |
362 | PerlIOBuf_get_ptr, |
363 | PerlIOBuf_get_cnt, |
364 | PerlIOBuf_set_ptrcnt, |
365 | }; |
9df9a5cd |
366 | #endif /* encode layer */ |
33af2bc7 |
367 | |
2f2b4ff2 |
368 | void |
d811239c |
369 | Encode_XSEncoding(pTHX_ encode_t *enc) |
2f2b4ff2 |
370 | { |
51ef4e11 |
371 | dSP; |
2f2b4ff2 |
372 | HV *stash = gv_stashpv("Encode::XS", TRUE); |
373 | SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); |
dcda1f94 |
374 | int i = 0; |
51ef4e11 |
375 | PUSHMARK(sp); |
376 | XPUSHs(sv); |
dcda1f94 |
377 | while (enc->name[i]) |
378 | { |
379 | const char *name = enc->name[i++]; |
51ef4e11 |
380 | XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); |
dcda1f94 |
381 | } |
51ef4e11 |
382 | PUTBACK; |
383 | call_pv("Encode::define_encoding",G_DISCARD); |
dcda1f94 |
384 | SvREFCNT_dec(sv); |
2f2b4ff2 |
385 | } |
386 | |
183a2d84 |
387 | void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {} |
67e989fb |
388 | |
2f2b4ff2 |
389 | static SV * |
390 | encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check) |
391 | { |
392 | STRLEN slen; |
393 | U8 *s = (U8 *) SvPV(src,slen); |
394 | SV *dst = sv_2mortal(newSV(2*slen+1)); |
395 | if (slen) |
396 | { |
397 | U8 *d = (U8 *) SvGROW(dst, 2*slen+1); |
398 | STRLEN dlen = SvLEN(dst); |
399 | int code; |
9b37254d |
400 | while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check))) |
2f2b4ff2 |
401 | { |
402 | SvCUR_set(dst,dlen); |
403 | SvPOK_on(dst); |
9b37254d |
404 | |
405 | if (code == ENCODE_FALLBACK) |
406 | break; |
407 | |
2f2b4ff2 |
408 | switch(code) |
409 | { |
410 | case ENCODE_NOSPACE: |
411 | { |
6fa35b06 |
412 | STRLEN need = dlen + UTF8_MAXLEN * 128; /* 128 is too big or small? */ |
2f2b4ff2 |
413 | d = (U8 *) SvGROW(dst, need); |
6fa35b06 |
414 | if (dlen >= SvLEN(dst)) |
415 | { |
416 | Perl_croak(aTHX_ "Destination couldn't be grown (the need may be miscalculated)."); |
417 | } |
2f2b4ff2 |
418 | dlen = SvLEN(dst); |
419 | slen = SvCUR(src); |
420 | break; |
421 | } |
422 | |
423 | case ENCODE_NOREP: |
424 | if (dir == enc->f_utf8) |
425 | { |
426 | if (!check && ckWARN_d(WARN_UTF8)) |
427 | { |
428 | STRLEN clen; |
9041c2e3 |
429 | UV ch = utf8n_to_uvuni(s+slen,(SvCUR(src)-slen),&clen,0); |
430 | Perl_warner(aTHX_ WARN_UTF8, "\"\\N{U+%"UVxf"}\" does not map to %s", ch, enc->name[0]); |
2f2b4ff2 |
431 | /* FIXME: Skip over the character, copy in replacement and continue |
432 | * but that is messy so for now just fail. |
433 | */ |
434 | return &PL_sv_undef; |
435 | } |
436 | else |
437 | { |
438 | return &PL_sv_undef; |
439 | } |
440 | } |
441 | else |
442 | { |
443 | /* UTF-8 is supposed to be "Universal" so should not happen */ |
444 | Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8", |
dcda1f94 |
445 | enc->name[0], (int)(SvCUR(src)-slen),s+slen); |
2f2b4ff2 |
446 | } |
447 | break; |
448 | |
449 | case ENCODE_PARTIAL: |
450 | if (!check && ckWARN_d(WARN_UTF8)) |
451 | { |
452 | Perl_warner(aTHX_ WARN_UTF8, "Partial %s character", |
dcda1f94 |
453 | (dir == enc->f_utf8) ? "UTF-8" : enc->name[0]); |
2f2b4ff2 |
454 | } |
455 | return &PL_sv_undef; |
456 | |
457 | default: |
458 | Perl_croak(aTHX_ "Unexpected code %d converting %s %s", |
dcda1f94 |
459 | code, (dir == enc->f_utf8) ? "to" : "from",enc->name[0]); |
2f2b4ff2 |
460 | return &PL_sv_undef; |
461 | } |
462 | } |
463 | SvCUR_set(dst,dlen); |
464 | SvPOK_on(dst); |
465 | if (check) |
466 | { |
467 | if (slen < SvCUR(src)) |
468 | { |
469 | Move(s+slen,s,SvCUR(src)-slen,U8); |
470 | } |
471 | SvCUR_set(src,SvCUR(src)-slen); |
472 | } |
473 | } |
8040349a |
474 | else |
475 | { |
476 | SvCUR_set(dst,slen); |
477 | SvPOK_on(dst); |
478 | } |
2f2b4ff2 |
479 | return dst; |
480 | } |
481 | |
50d26985 |
482 | MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ |
2f2b4ff2 |
483 | |
484 | PROTOTYPES: ENABLE |
485 | |
486 | void |
691638dd |
487 | Method_decode(obj,src,check = FALSE) |
2f2b4ff2 |
488 | SV * obj |
489 | SV * src |
691638dd |
490 | bool check |
2f2b4ff2 |
491 | CODE: |
492 | { |
493 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
494 | ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check); |
495 | SvUTF8_on(ST(0)); |
496 | XSRETURN(1); |
497 | } |
498 | |
499 | void |
691638dd |
500 | Method_encode(obj,src,check = FALSE) |
2f2b4ff2 |
501 | SV * obj |
502 | SV * src |
691638dd |
503 | bool check |
2f2b4ff2 |
504 | CODE: |
505 | { |
506 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
507 | sv_utf8_upgrade(src); |
508 | ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check); |
509 | XSRETURN(1); |
510 | } |
511 | |
67e989fb |
512 | MODULE = Encode PACKAGE = Encode |
2c674647 |
513 | |
514 | PROTOTYPES: ENABLE |
515 | |
67e989fb |
516 | I32 |
2c674647 |
517 | _bytes_to_utf8(sv, ...) |
67e989fb |
518 | SV * sv |
2c674647 |
519 | CODE: |
67e989fb |
520 | { |
521 | SV * encoding = items == 2 ? ST(1) : Nullsv; |
522 | |
523 | if (encoding) |
524 | RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); |
525 | else { |
526 | STRLEN len; |
183a2d84 |
527 | U8* s = (U8*)SvPV(sv, len); |
67e989fb |
528 | U8* converted; |
529 | |
530 | converted = bytes_to_utf8(s, &len); /* This allocs */ |
183a2d84 |
531 | sv_setpvn(sv, (char *)converted, len); |
67e989fb |
532 | SvUTF8_on(sv); /* XXX Should we? */ |
533 | Safefree(converted); /* ... so free it */ |
534 | RETVAL = len; |
535 | } |
536 | } |
2c674647 |
537 | OUTPUT: |
67e989fb |
538 | RETVAL |
2c674647 |
539 | |
67e989fb |
540 | I32 |
2c674647 |
541 | _utf8_to_bytes(sv, ...) |
67e989fb |
542 | SV * sv |
2c674647 |
543 | CODE: |
67e989fb |
544 | { |
545 | SV * to = items > 1 ? ST(1) : Nullsv; |
546 | SV * check = items > 2 ? ST(2) : Nullsv; |
87714904 |
547 | |
67e989fb |
548 | if (to) |
549 | RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); |
550 | else { |
67e989fb |
551 | STRLEN len; |
b113ac0e |
552 | U8 *s = (U8*)SvPV(sv, len); |
67e989fb |
553 | |
9c5ffd7c |
554 | RETVAL = 0; |
67e989fb |
555 | if (SvTRUE(check)) { |
556 | /* Must do things the slow way */ |
557 | U8 *dest; |
87714904 |
558 | U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */ |
67e989fb |
559 | U8 *send = s + len; |
560 | |
561 | New(83, dest, len, U8); /* I think */ |
562 | |
563 | while (s < send) { |
564 | if (*s < 0x80) |
565 | *dest++ = *s++; |
566 | else { |
b113ac0e |
567 | STRLEN ulen; |
568 | UV uv = *s++; |
87714904 |
569 | |
67e989fb |
570 | /* Have to do it all ourselves because of error routine, |
571 | aargh. */ |
572 | if (!(uv & 0x40)) |
573 | goto failure; |
574 | if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } |
575 | else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } |
576 | else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } |
577 | else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } |
578 | else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } |
579 | else if (!(uv & 0x01)) { ulen = 7; uv = 0; } |
580 | else { ulen = 13; uv = 0; } |
87714904 |
581 | |
67e989fb |
582 | /* Note change to utf8.c variable naming, for variety */ |
583 | while (ulen--) { |
584 | if ((*s & 0xc0) != 0x80) |
585 | goto failure; |
87714904 |
586 | |
67e989fb |
587 | else |
588 | uv = (uv << 6) | (*s++ & 0x3f); |
87714904 |
589 | } |
67e989fb |
590 | if (uv > 256) { |
591 | failure: |
592 | call_failure(check, s, dest, src); |
593 | /* Now what happens? */ |
594 | } |
595 | *dest++ = (U8)uv; |
596 | } |
597 | } |
598 | } else |
599 | RETVAL = (utf8_to_bytes(s, &len) ? len : 0); |
600 | } |
2c674647 |
601 | } |
602 | OUTPUT: |
603 | RETVAL |
604 | |
2c674647 |
605 | bool |
4411f3b6 |
606 | is_utf8(sv, check = FALSE) |
607 | SV * sv |
608 | bool check |
2c674647 |
609 | CODE: |
610 | { |
2eebba1d |
611 | if (SvGMAGICAL(sv)) /* it could be $1, for example */ |
612 | sv = newSVsv(sv); /* GMAGIG will be done */ |
2c674647 |
613 | if (SvPOK(sv)) { |
4411f3b6 |
614 | RETVAL = SvUTF8(sv) ? TRUE : FALSE; |
2c674647 |
615 | if (RETVAL && |
4411f3b6 |
616 | check && |
2c674647 |
617 | !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) |
618 | RETVAL = FALSE; |
619 | } else { |
620 | RETVAL = FALSE; |
621 | } |
2eebba1d |
622 | if (sv != ST(0)) |
623 | SvREFCNT_dec(sv); /* it was a temp copy */ |
2c674647 |
624 | } |
625 | OUTPUT: |
626 | RETVAL |
627 | |
628 | SV * |
4411f3b6 |
629 | _utf8_on(sv) |
2c674647 |
630 | SV * sv |
631 | CODE: |
632 | { |
633 | if (SvPOK(sv)) { |
87714904 |
634 | SV *rsv = newSViv(SvUTF8(sv)); |
2c674647 |
635 | RETVAL = rsv; |
636 | SvUTF8_on(sv); |
637 | } else { |
638 | RETVAL = &PL_sv_undef; |
639 | } |
640 | } |
641 | OUTPUT: |
642 | RETVAL |
643 | |
644 | SV * |
4411f3b6 |
645 | _utf8_off(sv) |
2c674647 |
646 | SV * sv |
647 | CODE: |
648 | { |
649 | if (SvPOK(sv)) { |
87714904 |
650 | SV *rsv = newSViv(SvUTF8(sv)); |
2c674647 |
651 | RETVAL = rsv; |
652 | SvUTF8_off(sv); |
653 | } else { |
654 | RETVAL = &PL_sv_undef; |
655 | } |
656 | } |
657 | OUTPUT: |
658 | RETVAL |
659 | |
33af2bc7 |
660 | BOOT: |
661 | { |
6a59c517 |
662 | #if defined(USE_PERLIO) && !defined(USE_SFIO) |
a999f61b |
663 | PerlIO_define_layer(aTHX_ &PerlIO_encode); |
33af2bc7 |
664 | #endif |
94b89828 |
665 | #include "8859.def" |
2f2b4ff2 |
666 | #include "EBCDIC.def" |
667 | #include "Symbols.def" |
33af2bc7 |
668 | } |