Commit | Line | Data |
2c674647 |
1 | #include "EXTERN.h" |
2 | #include "perl.h" |
3 | #include "XSUB.h" |
2f2b4ff2 |
4 | #define U8 U8 |
5 | #include "encode.h" |
6 | #include "iso8859.h" |
7 | #include "EBCDIC.h" |
8 | #include "Symbols.h" |
2c674647 |
9 | |
67e989fb |
10 | #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \ |
2f5768b8 |
11 | Perl_croak(aTHX_ "panic_unimplemented"); \ |
4a83738a |
12 | return (y)0; /* fool picky compilers */ \ |
87714904 |
13 | } |
67e989fb |
14 | UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) |
15 | UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) |
16 | |
33af2bc7 |
17 | #ifdef USE_PERLIO |
72e44f29 |
18 | /* Define an encoding "layer" in the perliol.h sense. |
19 | The layer defined here "inherits" in an object-oriented sense from the |
20 | "perlio" layer with its PerlIOBuf_* "methods". |
21 | The implementation is particularly efficient as until Encode settles down |
22 | there is no point in tryint to tune it. |
23 | |
24 | The layer works by overloading the "fill" and "flush" methods. |
25 | |
26 | "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API |
27 | to convert the encoded data to UTF-8 form, then copies it back to the |
28 | buffer. The "base class's" read methods then see the UTF-8 data. |
29 | |
30 | "flush" transforms the UTF-8 data deposited by the "base class's write |
31 | method in the buffer back into the encoded form using the encode OO perl API, |
32 | then copies data back into the buffer and calls "SUPER::flush. |
33 | |
34 | Note that "flush" is _also_ called for read mode - we still do the (back)-translate |
35 | so that the the base class's "flush" sees the correct number of encoded chars |
36 | for positioning the seek pointer. (This double translation is the worst performance |
37 | issue - particularly with all-perl encode engine.) |
38 | |
39 | */ |
40 | |
41 | |
33af2bc7 |
42 | #include "perliol.h" |
43 | |
44 | typedef struct |
45 | { |
46 | PerlIOBuf base; /* PerlIOBuf stuff */ |
47 | SV * bufsv; |
48 | SV * enc; |
49 | } PerlIOEncode; |
50 | |
51 | |
52 | IV |
53 | PerlIOEncode_pushed(PerlIO *f, const char *mode,const char *arg,STRLEN len) |
54 | { |
55 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); |
56 | dTHX; |
57 | dSP; |
58 | IV code; |
59 | code = PerlIOBuf_pushed(f,mode,Nullch,0); |
60 | ENTER; |
61 | SAVETMPS; |
62 | PUSHMARK(sp); |
63 | XPUSHs(sv_2mortal(newSVpv("Encode",0))); |
64 | XPUSHs(sv_2mortal(newSVpvn(arg,len))); |
65 | PUTBACK; |
66 | if (perl_call_method("getEncoding",G_SCALAR) != 1) |
67 | return -1; |
68 | SPAGAIN; |
69 | e->enc = POPs; |
70 | PUTBACK; |
71 | if (!SvROK(e->enc)) |
72 | return -1; |
73 | SvREFCNT_inc(e->enc); |
74 | FREETMPS; |
75 | LEAVE; |
76 | PerlIOBase(f)->flags |= PERLIO_F_UTF8; |
77 | return code; |
78 | } |
79 | |
80 | IV |
81 | PerlIOEncode_popped(PerlIO *f) |
82 | { |
83 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); |
84 | dTHX; |
85 | if (e->enc) |
86 | { |
87 | SvREFCNT_dec(e->enc); |
88 | e->enc = Nullsv; |
89 | } |
90 | if (e->bufsv) |
91 | { |
92 | SvREFCNT_dec(e->bufsv); |
93 | e->bufsv = Nullsv; |
94 | } |
95 | return 0; |
96 | } |
97 | |
98 | STDCHAR * |
99 | PerlIOEncode_get_base(PerlIO *f) |
100 | { |
101 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); |
102 | dTHX; |
103 | if (!e->base.bufsiz) |
104 | e->base.bufsiz = 1024; |
105 | if (!e->bufsv) |
106 | { |
107 | e->bufsv = newSV(e->base.bufsiz); |
108 | sv_setpvn(e->bufsv,"",0); |
109 | } |
62e8870c |
110 | e->base.buf = (STDCHAR *)SvPVX(e->bufsv); |
33af2bc7 |
111 | if (!e->base.ptr) |
112 | e->base.ptr = e->base.buf; |
113 | if (!e->base.end) |
114 | e->base.end = e->base.buf; |
115 | if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) |
116 | { |
117 | Perl_warn(aTHX_ " ptr %p(%p)%p", |
118 | e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); |
119 | abort(); |
120 | } |
121 | if (SvLEN(e->bufsv) < e->base.bufsiz) |
122 | { |
123 | SSize_t poff = e->base.ptr - e->base.buf; |
124 | SSize_t eoff = e->base.end - e->base.buf; |
62e8870c |
125 | e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz); |
33af2bc7 |
126 | e->base.ptr = e->base.buf + poff; |
127 | e->base.end = e->base.buf + eoff; |
128 | } |
129 | if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) |
130 | { |
131 | Perl_warn(aTHX_ " ptr %p(%p)%p", |
132 | e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); |
133 | abort(); |
134 | } |
135 | return e->base.buf; |
136 | } |
137 | |
33af2bc7 |
138 | IV |
139 | PerlIOEncode_fill(PerlIO *f) |
140 | { |
141 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); |
142 | dTHX; |
143 | dSP; |
144 | IV code; |
33af2bc7 |
145 | code = PerlIOBuf_fill(f); |
146 | if (code == 0) |
147 | { |
148 | SV *uni; |
72e44f29 |
149 | STRLEN len; |
150 | char *s; |
151 | /* Set SV that is the buffer to be buf..ptr */ |
33af2bc7 |
152 | SvCUR_set(e->bufsv, e->base.end - e->base.buf); |
153 | SvUTF8_off(e->bufsv); |
154 | ENTER; |
155 | SAVETMPS; |
156 | PUSHMARK(sp); |
157 | XPUSHs(e->enc); |
158 | XPUSHs(e->bufsv); |
159 | XPUSHs(&PL_sv_yes); |
160 | PUTBACK; |
161 | if (perl_call_method("toUnicode",G_SCALAR) != 1) |
162 | code = -1; |
163 | SPAGAIN; |
164 | uni = POPs; |
165 | PUTBACK; |
72e44f29 |
166 | /* Now get translated string (forced to UTF-8) and copy back to buffer |
167 | don't use sv_setsv as that may "steal" PV from returned temp |
168 | and so free() our known-large-enough buffer. |
169 | sv_setpvn() should do but let us do it long hand. |
170 | */ |
171 | s = SvPVutf8(uni,len); |
172 | if (s != SvPVX(e->bufsv)) |
173 | { |
62e8870c |
174 | e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); |
72e44f29 |
175 | Move(s,e->base.buf,len,char); |
176 | SvCUR_set(e->bufsv,len); |
177 | } |
178 | SvUTF8_on(e->bufsv); |
179 | e->base.end = e->base.buf+len; |
33af2bc7 |
180 | e->base.ptr = e->base.buf; |
181 | FREETMPS; |
182 | LEAVE; |
183 | } |
184 | return code; |
185 | } |
186 | |
187 | IV |
188 | PerlIOEncode_flush(PerlIO *f) |
189 | { |
190 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); |
191 | IV code = 0; |
192 | dTHX; |
193 | if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF))) |
194 | { |
195 | dSP; |
196 | SV *str; |
197 | char *s; |
198 | STRLEN len; |
72e44f29 |
199 | SSize_t left = 0; |
200 | if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) |
201 | { |
202 | /* This is really just a flag to see if we took all the data, if |
203 | we did PerlIOBase_flush avoids a seek to lower layer. |
204 | Need to revisit if we start getting clever with unreads or seeks-in-buffer |
205 | */ |
206 | left = e->base.end - e->base.ptr; |
207 | } |
33af2bc7 |
208 | ENTER; |
209 | SAVETMPS; |
210 | PUSHMARK(sp); |
211 | XPUSHs(e->enc); |
72e44f29 |
212 | SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); |
33af2bc7 |
213 | SvUTF8_on(e->bufsv); |
214 | XPUSHs(e->bufsv); |
215 | XPUSHs(&PL_sv_yes); |
216 | PUTBACK; |
217 | if (perl_call_method("fromUnicode",G_SCALAR) != 1) |
218 | code = -1; |
219 | SPAGAIN; |
220 | str = POPs; |
221 | PUTBACK; |
72e44f29 |
222 | s = SvPV(str,len); |
223 | if (s != SvPVX(e->bufsv)) |
224 | { |
62e8870c |
225 | e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); |
72e44f29 |
226 | Move(s,e->base.buf,len,char); |
227 | SvCUR_set(e->bufsv,len); |
228 | } |
33af2bc7 |
229 | SvUTF8_off(e->bufsv); |
72e44f29 |
230 | e->base.ptr = e->base.buf+len; |
231 | /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */ |
232 | e->base.end = e->base.ptr + left; |
33af2bc7 |
233 | FREETMPS; |
234 | LEAVE; |
235 | if (PerlIOBuf_flush(f) != 0) |
236 | code = -1; |
237 | } |
238 | return code; |
239 | } |
240 | |
241 | IV |
242 | PerlIOEncode_close(PerlIO *f) |
243 | { |
244 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); |
245 | IV code = PerlIOBase_close(f); |
246 | dTHX; |
247 | if (e->bufsv) |
248 | { |
249 | SvREFCNT_dec(e->bufsv); |
250 | e->bufsv = Nullsv; |
251 | } |
252 | e->base.buf = NULL; |
253 | e->base.ptr = NULL; |
254 | e->base.end = NULL; |
255 | PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF); |
256 | return code; |
257 | } |
258 | |
72e44f29 |
259 | Off_t |
260 | PerlIOEncode_tell(PerlIO *f) |
261 | { |
262 | PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf); |
263 | /* Unfortunately the only way to get a postion is to back-translate, |
264 | the UTF8-bytes we have buf..ptr and adjust accordingly. |
265 | But we will try and save any unread data in case stream |
266 | is un-seekable. |
267 | */ |
268 | if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end) |
269 | { |
270 | Size_t count = b->end - b->ptr; |
271 | PerlIO_push(f,&PerlIO_pending,"r",Nullch,0); |
272 | /* Save what we have left to read */ |
273 | PerlIOSelf(f,PerlIOBuf)->bufsiz = count; |
274 | PerlIO_unread(f,b->ptr,count); |
275 | /* There isn't any unread data - we just saved it - so avoid the lower seek */ |
276 | b->end = b->ptr; |
277 | /* Flush ourselves - now one layer down, |
278 | this does the back translate and adjusts position |
279 | */ |
280 | PerlIO_flush(PerlIONext(f)); |
281 | /* Set position of the saved data */ |
282 | PerlIOSelf(f,PerlIOBuf)->posn = b->posn; |
283 | } |
284 | else |
285 | { |
286 | PerlIO_flush(f); |
287 | } |
288 | return b->posn; |
289 | } |
290 | |
33af2bc7 |
291 | PerlIO_funcs PerlIO_encode = { |
72e44f29 |
292 | "encoding", |
33af2bc7 |
293 | sizeof(PerlIOEncode), |
294 | PERLIO_K_BUFFERED, |
295 | PerlIOBase_fileno, |
296 | PerlIOBuf_fdopen, |
297 | PerlIOBuf_open, |
298 | PerlIOBuf_reopen, |
299 | PerlIOEncode_pushed, |
300 | PerlIOEncode_popped, |
301 | PerlIOBuf_read, |
302 | PerlIOBuf_unread, |
303 | PerlIOBuf_write, |
304 | PerlIOBuf_seek, |
72e44f29 |
305 | PerlIOEncode_tell, |
33af2bc7 |
306 | PerlIOEncode_close, |
307 | PerlIOEncode_flush, |
308 | PerlIOEncode_fill, |
309 | PerlIOBase_eof, |
310 | PerlIOBase_error, |
311 | PerlIOBase_clearerr, |
312 | PerlIOBuf_setlinebuf, |
313 | PerlIOEncode_get_base, |
314 | PerlIOBuf_bufsiz, |
315 | PerlIOBuf_get_ptr, |
316 | PerlIOBuf_get_cnt, |
317 | PerlIOBuf_set_ptrcnt, |
318 | }; |
319 | #endif |
320 | |
2f2b4ff2 |
321 | void |
322 | Encode_Define(pTHX_ encode_t *enc) |
323 | { |
324 | HV *hash = get_hv("Encode::encoding",GV_ADD|GV_ADDMULTI); |
325 | HV *stash = gv_stashpv("Encode::XS", TRUE); |
326 | SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); |
327 | hv_store(hash,enc->name,strlen(enc->name),sv,0); |
328 | } |
329 | |
183a2d84 |
330 | void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {} |
67e989fb |
331 | |
2f2b4ff2 |
332 | static SV * |
333 | encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check) |
334 | { |
335 | STRLEN slen; |
336 | U8 *s = (U8 *) SvPV(src,slen); |
337 | SV *dst = sv_2mortal(newSV(2*slen+1)); |
338 | if (slen) |
339 | { |
340 | U8 *d = (U8 *) SvGROW(dst, 2*slen+1); |
341 | STRLEN dlen = SvLEN(dst); |
342 | int code; |
9b37254d |
343 | while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check))) |
2f2b4ff2 |
344 | { |
345 | SvCUR_set(dst,dlen); |
346 | SvPOK_on(dst); |
9b37254d |
347 | |
348 | if (code == ENCODE_FALLBACK) |
349 | break; |
350 | |
2f2b4ff2 |
351 | switch(code) |
352 | { |
353 | case ENCODE_NOSPACE: |
354 | { |
355 | STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN); |
356 | if (need <= SvLEN(dst)) |
357 | need += UTF8_MAXLEN; |
358 | d = (U8 *) SvGROW(dst, need); |
359 | dlen = SvLEN(dst); |
360 | slen = SvCUR(src); |
361 | break; |
362 | } |
363 | |
364 | case ENCODE_NOREP: |
365 | if (dir == enc->f_utf8) |
366 | { |
367 | if (!check && ckWARN_d(WARN_UTF8)) |
368 | { |
369 | STRLEN clen; |
370 | UV ch = utf8_to_uv(s+slen,(SvCUR(src)-slen),&clen,0); |
371 | Perl_warner(aTHX_ WARN_UTF8, "\"\\x{%x}\" does not map to %s", ch, enc->name); |
372 | /* FIXME: Skip over the character, copy in replacement and continue |
373 | * but that is messy so for now just fail. |
374 | */ |
375 | return &PL_sv_undef; |
376 | } |
377 | else |
378 | { |
379 | return &PL_sv_undef; |
380 | } |
381 | } |
382 | else |
383 | { |
384 | /* UTF-8 is supposed to be "Universal" so should not happen */ |
385 | Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8", |
386 | enc->name, (SvCUR(src)-slen),s+slen); |
387 | } |
388 | break; |
389 | |
390 | case ENCODE_PARTIAL: |
391 | if (!check && ckWARN_d(WARN_UTF8)) |
392 | { |
393 | Perl_warner(aTHX_ WARN_UTF8, "Partial %s character", |
394 | (dir == enc->f_utf8) ? "UTF-8" : enc->name); |
395 | } |
396 | return &PL_sv_undef; |
397 | |
398 | default: |
399 | Perl_croak(aTHX_ "Unexpected code %d converting %s %s", |
400 | code, (dir == enc->f_utf8) ? "to" : "from",enc->name); |
401 | return &PL_sv_undef; |
402 | } |
403 | } |
404 | SvCUR_set(dst,dlen); |
405 | SvPOK_on(dst); |
406 | if (check) |
407 | { |
408 | if (slen < SvCUR(src)) |
409 | { |
410 | Move(s+slen,s,SvCUR(src)-slen,U8); |
411 | } |
412 | SvCUR_set(src,SvCUR(src)-slen); |
413 | } |
414 | } |
415 | return dst; |
416 | } |
417 | |
418 | MODULE = Encode PACKAGE = Encode::XS PREFIX = Encode_ |
419 | |
420 | PROTOTYPES: ENABLE |
421 | |
422 | void |
423 | Encode_toUnicode(obj,src,check = 0) |
424 | SV * obj |
425 | SV * src |
426 | int check |
427 | CODE: |
428 | { |
429 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
430 | ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check); |
431 | SvUTF8_on(ST(0)); |
432 | XSRETURN(1); |
433 | } |
434 | |
435 | void |
436 | Encode_fromUnicode(obj,src,check = 0) |
437 | SV * obj |
438 | SV * src |
439 | int check |
440 | CODE: |
441 | { |
442 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
443 | sv_utf8_upgrade(src); |
444 | ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check); |
445 | XSRETURN(1); |
446 | } |
447 | |
67e989fb |
448 | MODULE = Encode PACKAGE = Encode |
2c674647 |
449 | |
450 | PROTOTYPES: ENABLE |
451 | |
67e989fb |
452 | I32 |
2c674647 |
453 | _bytes_to_utf8(sv, ...) |
67e989fb |
454 | SV * sv |
2c674647 |
455 | CODE: |
67e989fb |
456 | { |
457 | SV * encoding = items == 2 ? ST(1) : Nullsv; |
458 | |
459 | if (encoding) |
460 | RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); |
461 | else { |
462 | STRLEN len; |
183a2d84 |
463 | U8* s = (U8*)SvPV(sv, len); |
67e989fb |
464 | U8* converted; |
465 | |
466 | converted = bytes_to_utf8(s, &len); /* This allocs */ |
183a2d84 |
467 | sv_setpvn(sv, (char *)converted, len); |
67e989fb |
468 | SvUTF8_on(sv); /* XXX Should we? */ |
469 | Safefree(converted); /* ... so free it */ |
470 | RETVAL = len; |
471 | } |
472 | } |
2c674647 |
473 | OUTPUT: |
67e989fb |
474 | RETVAL |
2c674647 |
475 | |
67e989fb |
476 | I32 |
2c674647 |
477 | _utf8_to_bytes(sv, ...) |
67e989fb |
478 | SV * sv |
2c674647 |
479 | CODE: |
67e989fb |
480 | { |
481 | SV * to = items > 1 ? ST(1) : Nullsv; |
482 | SV * check = items > 2 ? ST(2) : Nullsv; |
87714904 |
483 | |
67e989fb |
484 | if (to) |
485 | RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); |
486 | else { |
67e989fb |
487 | STRLEN len; |
b113ac0e |
488 | U8 *s = (U8*)SvPV(sv, len); |
67e989fb |
489 | |
490 | if (SvTRUE(check)) { |
491 | /* Must do things the slow way */ |
492 | U8 *dest; |
87714904 |
493 | U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */ |
67e989fb |
494 | U8 *send = s + len; |
495 | |
496 | New(83, dest, len, U8); /* I think */ |
497 | |
498 | while (s < send) { |
499 | if (*s < 0x80) |
500 | *dest++ = *s++; |
501 | else { |
b113ac0e |
502 | STRLEN ulen; |
503 | UV uv = *s++; |
87714904 |
504 | |
67e989fb |
505 | /* Have to do it all ourselves because of error routine, |
506 | aargh. */ |
507 | if (!(uv & 0x40)) |
508 | goto failure; |
509 | if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } |
510 | else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } |
511 | else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } |
512 | else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } |
513 | else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } |
514 | else if (!(uv & 0x01)) { ulen = 7; uv = 0; } |
515 | else { ulen = 13; uv = 0; } |
87714904 |
516 | |
67e989fb |
517 | /* Note change to utf8.c variable naming, for variety */ |
518 | while (ulen--) { |
519 | if ((*s & 0xc0) != 0x80) |
520 | goto failure; |
87714904 |
521 | |
67e989fb |
522 | else |
523 | uv = (uv << 6) | (*s++ & 0x3f); |
87714904 |
524 | } |
67e989fb |
525 | if (uv > 256) { |
526 | failure: |
527 | call_failure(check, s, dest, src); |
528 | /* Now what happens? */ |
529 | } |
530 | *dest++ = (U8)uv; |
531 | } |
532 | } |
533 | } else |
534 | RETVAL = (utf8_to_bytes(s, &len) ? len : 0); |
535 | } |
2c674647 |
536 | } |
537 | OUTPUT: |
538 | RETVAL |
539 | |
540 | SV * |
541 | _chars_to_utf8(sv, from, ...) |
542 | SV * sv |
543 | SV * from |
544 | CODE: |
545 | { |
546 | SV * check = items == 3 ? ST(2) : Nullsv; |
547 | RETVAL = &PL_sv_undef; |
548 | } |
549 | OUTPUT: |
550 | RETVAL |
551 | |
552 | SV * |
553 | _utf8_to_chars(sv, to, ...) |
554 | SV * sv |
555 | SV * to |
556 | CODE: |
557 | { |
558 | SV * check = items == 3 ? ST(2) : Nullsv; |
559 | RETVAL = &PL_sv_undef; |
560 | } |
561 | OUTPUT: |
562 | RETVAL |
563 | |
564 | SV * |
565 | _utf8_to_chars_check(sv, ...) |
566 | SV * sv |
567 | CODE: |
568 | { |
569 | SV * check = items == 2 ? ST(1) : Nullsv; |
570 | RETVAL = &PL_sv_undef; |
571 | } |
572 | OUTPUT: |
573 | RETVAL |
574 | |
575 | SV * |
576 | _bytes_to_chars(sv, from, ...) |
577 | SV * sv |
578 | SV * from |
579 | CODE: |
580 | { |
581 | SV * check = items == 3 ? ST(2) : Nullsv; |
582 | RETVAL = &PL_sv_undef; |
583 | } |
584 | OUTPUT: |
585 | RETVAL |
586 | |
587 | SV * |
588 | _chars_to_bytes(sv, to, ...) |
589 | SV * sv |
590 | SV * to |
591 | CODE: |
592 | { |
593 | SV * check = items == 3 ? ST(2) : Nullsv; |
594 | RETVAL = &PL_sv_undef; |
595 | } |
596 | OUTPUT: |
597 | RETVAL |
598 | |
599 | SV * |
600 | _from_to(sv, from, to, ...) |
601 | SV * sv |
602 | SV * from |
603 | SV * to |
604 | CODE: |
605 | { |
606 | SV * check = items == 4 ? ST(3) : Nullsv; |
607 | RETVAL = &PL_sv_undef; |
608 | } |
609 | OUTPUT: |
610 | RETVAL |
611 | |
612 | bool |
613 | _is_utf8(sv, ...) |
614 | SV * sv |
615 | CODE: |
616 | { |
617 | SV * check = items == 2 ? ST(1) : Nullsv; |
618 | if (SvPOK(sv)) { |
067a85ef |
619 | RETVAL = SvUTF8(sv) ? 1 : 0; |
2c674647 |
620 | if (RETVAL && |
621 | SvTRUE(check) && |
622 | !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) |
623 | RETVAL = FALSE; |
624 | } else { |
625 | RETVAL = FALSE; |
626 | } |
627 | } |
628 | OUTPUT: |
629 | RETVAL |
630 | |
631 | SV * |
632 | _on_utf8(sv) |
633 | SV * sv |
634 | CODE: |
635 | { |
636 | if (SvPOK(sv)) { |
87714904 |
637 | SV *rsv = newSViv(SvUTF8(sv)); |
2c674647 |
638 | RETVAL = rsv; |
639 | SvUTF8_on(sv); |
640 | } else { |
641 | RETVAL = &PL_sv_undef; |
642 | } |
643 | } |
644 | OUTPUT: |
645 | RETVAL |
646 | |
647 | SV * |
648 | _off_utf8(sv) |
649 | SV * sv |
650 | CODE: |
651 | { |
652 | if (SvPOK(sv)) { |
87714904 |
653 | SV *rsv = newSViv(SvUTF8(sv)); |
2c674647 |
654 | RETVAL = rsv; |
655 | SvUTF8_off(sv); |
656 | } else { |
657 | RETVAL = &PL_sv_undef; |
658 | } |
659 | } |
660 | OUTPUT: |
661 | RETVAL |
662 | |
663 | SV * |
664 | _utf_to_utf(sv, from, to, ...) |
665 | SV * sv |
666 | SV * from |
667 | SV * to |
668 | CODE: |
669 | { |
670 | SV * check = items == 4 ? ST(3) : Nullsv; |
671 | RETVAL = &PL_sv_undef; |
672 | } |
673 | OUTPUT: |
674 | RETVAL |
675 | |
33af2bc7 |
676 | BOOT: |
677 | { |
678 | #ifdef USE_PERLIO |
679 | PerlIO_define_layer(&PerlIO_encode); |
680 | #endif |
2f2b4ff2 |
681 | #include "iso8859.def" |
682 | #include "EBCDIC.def" |
683 | #include "Symbols.def" |
33af2bc7 |
684 | } |