Integrate mainline
[p5sagit/p5-mst-13.2.git] / ext / Encode / Encode.xs
CommitLineData
fcf2db38 1#define PERL_NO_GET_CONTEXT
2
2c674647 3#include "EXTERN.h"
4#include "perl.h"
5#include "XSUB.h"
2f2b4ff2 6#define U8 U8
7#include "encode.h"
94b89828 8#include "8859.h"
2f2b4ff2 9#include "EBCDIC.h"
10#include "Symbols.h"
2c674647 11
fcf2db38 12
13#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
2f5768b8 14 Perl_croak(aTHX_ "panic_unimplemented"); \
4a83738a 15 return (y)0; /* fool picky compilers */ \
87714904 16 }
67e989fb 17UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
aa0053b7 18 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
9df9a5cd 19#if defined(USE_PERLIO) && !defined(USE_SFIO)
72e44f29 20/* Define an encoding "layer" in the perliol.h sense.
21 The layer defined here "inherits" in an object-oriented sense from the
22 "perlio" layer with its PerlIOBuf_* "methods".
23 The implementation is particularly efficient as until Encode settles down
24 there is no point in tryint to tune it.
25
26 The layer works by overloading the "fill" and "flush" methods.
27
28 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
29 to convert the encoded data to UTF-8 form, then copies it back to the
30 buffer. The "base class's" read methods then see the UTF-8 data.
31
32 "flush" transforms the UTF-8 data deposited by the "base class's write
33 method in the buffer back into the encoded form using the encode OO perl API,
34 then copies data back into the buffer and calls "SUPER::flush.
35
36 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
37 so that the the base class's "flush" sees the correct number of encoded chars
38 for positioning the seek pointer. (This double translation is the worst performance
39 issue - particularly with all-perl encode engine.)
40
41*/
33af2bc7 42#include "perliol.h"
aa0053b7 43typedef struct {
44 PerlIOBuf base; /* PerlIOBuf stuff */
0b3236bb 45 SV *bufsv; /* buffer seen by layers above */
46 SV *dataSV; /* data we have read from layer below */
47 SV *enc; /* the encoding object */
33af2bc7 48} PerlIOEncode;
49
e3f3bf95 50SV *
aa0053b7 51PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
e3f3bf95 52{
aa0053b7 53 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
54 SV *sv = &PL_sv_undef;
55 if (e->enc) {
56 dSP;
57 ENTER;
58 SAVETMPS;
59 PUSHMARK(sp);
60 XPUSHs(e->enc);
61 PUTBACK;
62 if (perl_call_method("name", G_SCALAR) == 1) {
63 SPAGAIN;
64 sv = newSVsv(POPs);
65 PUTBACK;
66 }
e3f3bf95 67 }
aa0053b7 68 return sv;
e3f3bf95 69}
33af2bc7 70
71IV
aa0053b7 72PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
33af2bc7 73{
aa0053b7 74 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
75 dSP;
76 IV code;
77 code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
78 ENTER;
79 SAVETMPS;
80 PUSHMARK(sp);
81 XPUSHs(arg);
82 PUTBACK;
83 if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) {
84 /* should never happen */
85 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
86 return -1;
87 }
88 SPAGAIN;
89 e->enc = POPs;
90 PUTBACK;
91 if (!SvROK(e->enc)) {
92 e->enc = Nullsv;
93 errno = EINVAL;
94 Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%" SVf "\"",
95 arg);
96 code = -1;
97 }
98 else {
99 SvREFCNT_inc(e->enc);
100 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
101 }
102 FREETMPS;
103 LEAVE;
104 return code;
33af2bc7 105}
106
107IV
aa0053b7 108PerlIOEncode_popped(pTHX_ PerlIO * f)
33af2bc7 109{
aa0053b7 110 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
111 if (e->enc) {
112 SvREFCNT_dec(e->enc);
113 e->enc = Nullsv;
114 }
115 if (e->bufsv) {
116 SvREFCNT_dec(e->bufsv);
117 e->bufsv = Nullsv;
118 }
0b3236bb 119 if (e->dataSV) {
120 SvREFCNT_dec(e->dataSV);
121 e->bufsv = Nullsv;
122 }
aa0053b7 123 return 0;
33af2bc7 124}
125
126STDCHAR *
aa0053b7 127PerlIOEncode_get_base(pTHX_ PerlIO * f)
33af2bc7 128{
aa0053b7 129 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
130 if (!e->base.bufsiz)
131 e->base.bufsiz = 1024;
132 if (!e->bufsv) {
133 e->bufsv = newSV(e->base.bufsiz);
134 sv_setpvn(e->bufsv, "", 0);
135 }
136 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
137 if (!e->base.ptr)
138 e->base.ptr = e->base.buf;
139 if (!e->base.end)
140 e->base.end = e->base.buf;
141 if (e->base.ptr < e->base.buf
142 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
143 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
144 e->base.buf + SvLEN(e->bufsv));
145 abort();
146 }
147 if (SvLEN(e->bufsv) < e->base.bufsiz) {
148 SSize_t poff = e->base.ptr - e->base.buf;
149 SSize_t eoff = e->base.end - e->base.buf;
150 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
151 e->base.ptr = e->base.buf + poff;
152 e->base.end = e->base.buf + eoff;
153 }
154 if (e->base.ptr < e->base.buf
155 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
156 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
157 e->base.buf + SvLEN(e->bufsv));
158 abort();
159 }
160 return e->base.buf;
33af2bc7 161}
162
33af2bc7 163IV
aa0053b7 164PerlIOEncode_fill(pTHX_ PerlIO * f)
33af2bc7 165{
aa0053b7 166 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
167 dSP;
0b3236bb 168 IV code = 0;
169 PerlIO *n;
170 SSize_t avail;
171 if (PerlIO_flush(f) != 0)
172 return -1;
173 n = PerlIONext(f);
174 if (!PerlIO_fast_gets(n)) {
175 /* Things get too messy if we don't have a buffer layer
176 push a :perlio to do the job */
177 char mode[8];
178 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
179 if (!n) {
180 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
181 }
182 }
183 ENTER;
184 SAVETMPS;
185 retry:
186 avail = PerlIO_get_cnt(n);
187 if (avail <= 0) {
188 avail = PerlIO_fill(n);
189 if (avail == 0) {
190 avail = PerlIO_get_cnt(n);
191 }
192 else {
193 if (!PerlIO_error(n) && PerlIO_eof(n))
194 avail = 0;
195 }
196 }
197 if (avail > 0) {
198 STDCHAR *ptr = PerlIO_get_ptr(n);
199 SSize_t use = avail;
aa0053b7 200 SV *uni;
aa0053b7 201 char *s;
0b3236bb 202 STRLEN len = 0;
203 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
204 (void) PerlIOEncode_get_base(aTHX_ f);
205 if (!e->dataSV)
206 e->dataSV = newSV(0);
207 if (SvTYPE(e->dataSV) < SVt_PV) {
208 sv_upgrade(e->dataSV,SVt_PV);
209 }
210 if (SvCUR(e->dataSV)) {
211 /* something left over from last time - create a normal
212 SV with new data appended
213 */
214 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
215 use = e->base.bufsiz - SvCUR(e->dataSV);
216 }
217 sv_catpvn(e->dataSV,ptr,use);
218 }
219 else {
220 /* Create a "dummy" SV to represent the available data from layer below */
221 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
222 Safefree(SvPVX(e->dataSV));
223 }
224 if (use > e->base.bufsiz) {
225 use = e->base.bufsiz;
226 }
227 SvPVX(e->dataSV) = (char *) ptr;
228 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
229 SvCUR_set(e->dataSV,use);
230 SvPOK_on(e->dataSV);
231 }
232 SvUTF8_off(e->dataSV);
aa0053b7 233 PUSHMARK(sp);
234 XPUSHs(e->enc);
0b3236bb 235 XPUSHs(e->dataSV);
aa0053b7 236 XPUSHs(&PL_sv_yes);
237 PUTBACK;
0b3236bb 238 if (perl_call_method("decode", G_SCALAR) != 1) {
239 Perl_die(aTHX_ "panic: decode did not return a value");
240 }
aa0053b7 241 SPAGAIN;
242 uni = POPs;
243 PUTBACK;
0b3236bb 244 /* Now get translated string (forced to UTF-8) and use as buffer */
245 if (SvPOK(uni)) {
246 s = SvPVutf8(uni, len);
247 if (len && !is_utf8_string(s,len)) {
248 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
249 }
250 }
251 if (len > 0) {
252 /* Got _something */
253 /* if decode gave us back dataSV then data may vanish when
254 we do ptrcnt adjust - so take our copy now.
255 (The copy is a pain - need a put-it-here option for decode.)
256 */
257 sv_setpvn(e->bufsv,s,len);
258 e->base.ptr = e->base.buf = SvPVX(e->bufsv);
259 e->base.end = e->base.ptr + SvCUR(e->bufsv);
260 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
261 SvUTF8_on(e->bufsv);
262
263 /* Adjust ptr/cnt not taking anything which
264 did not translate - not clear this is a win */
265 /* compute amount we took */
266 use -= SvCUR(e->dataSV);
267 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
268 /* and as we did not take it it isn't pending */
269 SvCUR_set(e->dataSV,0);
270 } else {
271 /* Got nothing - assume partial character so we need some more */
272 /* Make sure e->dataSV is a normal SV before re-filling as
273 buffer alias will change under us
274 */
275 s = SvPV(e->dataSV,len);
276 sv_setpvn(e->dataSV,s,len);
277 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
278 goto retry;
aa0053b7 279 }
aa0053b7 280 FREETMPS;
281 LEAVE;
0b3236bb 282 return code;
283 }
284 else {
285 if (avail == 0)
286 PerlIOBase(f)->flags |= PERLIO_F_EOF;
287 else
288 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
289 return -1;
72e44f29 290 }
33af2bc7 291}
292
293IV
aa0053b7 294PerlIOEncode_flush(pTHX_ PerlIO * f)
33af2bc7 295{
aa0053b7 296 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
297 IV code = 0;
0b3236bb 298 if (e->bufsv && (e->base.ptr > e->base.buf)) {
aa0053b7 299 dSP;
300 SV *str;
301 char *s;
302 STRLEN len;
0b3236bb 303 SSize_t count = 0;
304 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
305 /* Write case encode the buffer and write() to layer below */
306 ENTER;
307 SAVETMPS;
308 PUSHMARK(sp);
309 XPUSHs(e->enc);
310 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
311 SvUTF8_on(e->bufsv);
312 Perl_warn(aTHX_ "flush %_",e->bufsv);
313 XPUSHs(e->bufsv);
314 XPUSHs(&PL_sv_yes);
315 PUTBACK;
316 if (perl_call_method("encode", G_SCALAR) != 1)
317 code = -1;
318 SPAGAIN;
319 str = POPs;
320 PUTBACK;
321 s = SvPV(str, len);
322 count = PerlIO_write(PerlIONext(f),s,len);
323 if (count != len) {
324 code = -1;
325 }
326 FREETMPS;
327 LEAVE;
328 if (PerlIO_flush(PerlIONext(f)) != 0) {
329 code = -1;
330 }
aa0053b7 331 }
0b3236bb 332 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
333 /* read case */
334 /* if we have any untranslated stuff then unread that first */
335 if (e->dataSV && SvCUR(e->dataSV)) {
336 s = SvPV(e->dataSV, len);
337 count = PerlIO_unread(PerlIONext(f),s,len);
338 if (count != len) {
339 code = -1;
340 }
341 }
342 /* See if there is anything left in the buffer */
343 if (e->base.ptr < e->base.end) {
344 /* Bother - have unread data.
345 re-encode and unread() to layer below
346 */
347 ENTER;
348 SAVETMPS;
349 str = sv_newmortal();
350 sv_upgrade(str, SVt_PV);
351 SvPVX(str) = e->base.ptr;
352 SvLEN(str) = 0;
353 SvCUR_set(str, e->base.end - e->base.ptr);
354 SvUTF8_on(str);
355 PUSHMARK(sp);
356 XPUSHs(e->enc);
357 XPUSHs(str);
358 XPUSHs(&PL_sv_yes);
359 PUTBACK;
360 if (perl_call_method("encode", G_SCALAR) != 1)
361 code = -1;
362 SPAGAIN;
363 str = POPs;
364 PUTBACK;
365 s = SvPV(str, len);
366 count = PerlIO_unread(PerlIONext(f),s,len);
367 if (count != len) {
368 code = -1;
369 }
370 FREETMPS;
371 LEAVE;
372 }
aa0053b7 373 }
0b3236bb 374 e->base.ptr = e->base.end = e->base.buf;
375 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
72e44f29 376 }
aa0053b7 377 return code;
33af2bc7 378}
379
380IV
aa0053b7 381PerlIOEncode_close(pTHX_ PerlIO * f)
33af2bc7 382{
aa0053b7 383 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
384 IV code = PerlIOBase_close(aTHX_ f);
385 if (e->bufsv) {
386 SvREFCNT_dec(e->bufsv);
387 e->bufsv = Nullsv;
388 }
389 e->base.buf = NULL;
390 e->base.ptr = NULL;
391 e->base.end = NULL;
392 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
393 return code;
33af2bc7 394}
395
72e44f29 396Off_t
aa0053b7 397PerlIOEncode_tell(pTHX_ PerlIO * f)
72e44f29 398{
aa0053b7 399 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
0b3236bb 400 /* Unfortunately the only way to get a postion is to (re-)translate,
401 the UTF8 we have in bufefr and then ask layer below
aa0053b7 402 */
0b3236bb 403 PerlIO_flush(f);
404 return PerlIO_tell(PerlIONext(f));
72e44f29 405}
406
8cf8f3d1 407PerlIO *
aa0053b7 408PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
409 CLONE_PARAMS * params, int flags)
8cf8f3d1 410{
aa0053b7 411 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
412 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
413 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
414 if (oe->enc) {
415 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
416 }
9f16d962 417 }
aa0053b7 418 return f;
8cf8f3d1 419}
420
33af2bc7 421PerlIO_funcs PerlIO_encode = {
aa0053b7 422 "encoding",
423 sizeof(PerlIOEncode),
424 PERLIO_K_BUFFERED,
425 PerlIOEncode_pushed,
426 PerlIOEncode_popped,
427 PerlIOBuf_open,
428 PerlIOEncode_getarg,
429 PerlIOBase_fileno,
430 PerlIOEncode_dup,
431 PerlIOBuf_read,
432 PerlIOBuf_unread,
433 PerlIOBuf_write,
434 PerlIOBuf_seek,
435 PerlIOEncode_tell,
436 PerlIOEncode_close,
437 PerlIOEncode_flush,
438 PerlIOEncode_fill,
439 PerlIOBase_eof,
440 PerlIOBase_error,
441 PerlIOBase_clearerr,
442 PerlIOBase_setlinebuf,
443 PerlIOEncode_get_base,
444 PerlIOBuf_bufsiz,
445 PerlIOBuf_get_ptr,
446 PerlIOBuf_get_cnt,
447 PerlIOBuf_set_ptrcnt,
33af2bc7 448};
aa0053b7 449#endif /* encode layer */
33af2bc7 450
2f2b4ff2 451void
aa0053b7 452Encode_XSEncoding(pTHX_ encode_t * enc)
2f2b4ff2 453{
aa0053b7 454 dSP;
455 HV *stash = gv_stashpv("Encode::XS", TRUE);
456 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash);
457 int i = 0;
458 PUSHMARK(sp);
459 XPUSHs(sv);
460 while (enc->name[i]) {
461 const char *name = enc->name[i++];
462 XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
463 }
464 PUTBACK;
465 call_pv("Encode::define_encoding", G_DISCARD);
466 SvREFCNT_dec(sv);
2f2b4ff2 467}
468
aa0053b7 469void
470call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
471{
472}
67e989fb 473
2f2b4ff2 474static SV *
aa0053b7 475encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
476 int check)
2f2b4ff2 477{
aa0053b7 478 STRLEN slen;
479 U8 *s = (U8 *) SvPV(src, slen);
0b3236bb 480 STRLEN tlen = slen;
481 SV *dst = sv_2mortal(newSV(slen+1));
aa0053b7 482 if (slen) {
0b3236bb 483 U8 *d = (U8 *) SvPVX(dst);
484 STRLEN dlen = SvLEN(dst)-1;
aa0053b7 485 int code;
486 while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) {
487 SvCUR_set(dst, dlen);
488 SvPOK_on(dst);
9b37254d 489
0b3236bb 490#if 0
491 Perl_warn(aTHX_ "code=%d @ s=%d/%d d=%d",code,slen,tlen,dlen);
492#endif
493
494 if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL)
aa0053b7 495 break;
9b37254d 496
aa0053b7 497 switch (code) {
498 case ENCODE_NOSPACE:
499 {
0b3236bb 500 STRLEN done = tlen-slen;
501 STRLEN need ;
502 if (done) {
503 need = (tlen*dlen)/done+1;
504 }
505 else {
506 need = dlen + UTF8_MAXLEN;
507 }
508
aa0053b7 509 d = (U8 *) SvGROW(dst, need);
510 if (dlen >= SvLEN(dst)) {
511 Perl_croak(aTHX_
512 "Destination couldn't be grown (the need may be miscalculated).");
513 }
514 dlen = SvLEN(dst);
0b3236bb 515 slen = tlen;
aa0053b7 516 break;
517 }
2f2b4ff2 518
aa0053b7 519 case ENCODE_NOREP:
520 if (dir == enc->f_utf8) {
521 if (!check && ckWARN_d(WARN_UTF8)) {
522 STRLEN clen;
523 UV ch =
524 utf8n_to_uvuni(s + slen, (SvCUR(src) - slen),
525 &clen, 0);
526 Perl_warner(aTHX_ WARN_UTF8,
527 "\"\\N{U+%" UVxf
528 "}\" does not map to %s", ch,
529 enc->name[0]);
530 /* FIXME: Skip over the character, copy in replacement and continue
531 * but that is messy so for now just fail.
532 */
533 return &PL_sv_undef;
534 }
535 else {
536 return &PL_sv_undef;
537 }
538 }
539 else {
540 /* UTF-8 is supposed to be "Universal" so should not happen */
541 Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
542 enc->name[0], (int) (SvCUR(src) - slen),
543 s + slen);
544 }
545 break;
2f2b4ff2 546
aa0053b7 547 default:
548 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
549 code, (dir == enc->f_utf8) ? "to" : "from",
550 enc->name[0]);
551 return &PL_sv_undef;
552 }
553 }
554 SvCUR_set(dst, dlen);
555 SvPOK_on(dst);
556 if (check) {
557 if (slen < SvCUR(src)) {
558 Move(s + slen, s, SvCUR(src) - slen, U8);
559 }
560 SvCUR_set(src, SvCUR(src) - slen);
0b3236bb 561 *SvEND(src) = '\0';
aa0053b7 562 }
2f2b4ff2 563 }
aa0053b7 564 else {
0b3236bb 565 SvCUR_set(dst, 0);
aa0053b7 566 SvPOK_on(dst);
2f2b4ff2 567 }
0b3236bb 568 *SvEND(dst) = '\0';
aa0053b7 569 return dst;
2f2b4ff2 570}
571
50d26985 572MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
2f2b4ff2 573
574PROTOTYPES: ENABLE
575
576void
691638dd 577Method_decode(obj,src,check = FALSE)
2f2b4ff2 578SV * obj
579SV * src
691638dd 580bool check
2f2b4ff2 581CODE:
582 {
583 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
584 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
585 SvUTF8_on(ST(0));
586 XSRETURN(1);
587 }
588
589void
691638dd 590Method_encode(obj,src,check = FALSE)
2f2b4ff2 591SV * obj
592SV * src
691638dd 593bool check
2f2b4ff2 594CODE:
595 {
596 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
597 sv_utf8_upgrade(src);
598 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
599 XSRETURN(1);
600 }
601
67e989fb 602MODULE = Encode PACKAGE = Encode
2c674647 603
604PROTOTYPES: ENABLE
605
67e989fb 606I32
2c674647 607_bytes_to_utf8(sv, ...)
67e989fb 608 SV * sv
2c674647 609 CODE:
67e989fb 610 {
611 SV * encoding = items == 2 ? ST(1) : Nullsv;
612
613 if (encoding)
614 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
615 else {
616 STRLEN len;
183a2d84 617 U8* s = (U8*)SvPV(sv, len);
67e989fb 618 U8* converted;
619
620 converted = bytes_to_utf8(s, &len); /* This allocs */
183a2d84 621 sv_setpvn(sv, (char *)converted, len);
67e989fb 622 SvUTF8_on(sv); /* XXX Should we? */
623 Safefree(converted); /* ... so free it */
624 RETVAL = len;
625 }
626 }
2c674647 627 OUTPUT:
67e989fb 628 RETVAL
2c674647 629
67e989fb 630I32
2c674647 631_utf8_to_bytes(sv, ...)
67e989fb 632 SV * sv
2c674647 633 CODE:
67e989fb 634 {
635 SV * to = items > 1 ? ST(1) : Nullsv;
636 SV * check = items > 2 ? ST(2) : Nullsv;
87714904 637
67e989fb 638 if (to)
639 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
640 else {
67e989fb 641 STRLEN len;
b113ac0e 642 U8 *s = (U8*)SvPV(sv, len);
67e989fb 643
9c5ffd7c 644 RETVAL = 0;
67e989fb 645 if (SvTRUE(check)) {
646 /* Must do things the slow way */
647 U8 *dest;
87714904 648 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
67e989fb 649 U8 *send = s + len;
650
651 New(83, dest, len, U8); /* I think */
652
653 while (s < send) {
654 if (*s < 0x80)
655 *dest++ = *s++;
656 else {
b113ac0e 657 STRLEN ulen;
658 UV uv = *s++;
87714904 659
67e989fb 660 /* Have to do it all ourselves because of error routine,
661 aargh. */
662 if (!(uv & 0x40))
663 goto failure;
664 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
665 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
666 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
667 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
668 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
669 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
670 else { ulen = 13; uv = 0; }
87714904 671
67e989fb 672 /* Note change to utf8.c variable naming, for variety */
673 while (ulen--) {
674 if ((*s & 0xc0) != 0x80)
675 goto failure;
87714904 676
67e989fb 677 else
678 uv = (uv << 6) | (*s++ & 0x3f);
87714904 679 }
67e989fb 680 if (uv > 256) {
681 failure:
682 call_failure(check, s, dest, src);
683 /* Now what happens? */
684 }
685 *dest++ = (U8)uv;
686 }
687 }
688 } else
689 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
690 }
2c674647 691 }
692 OUTPUT:
693 RETVAL
694
2c674647 695bool
4411f3b6 696is_utf8(sv, check = FALSE)
697SV * sv
698bool check
2c674647 699 CODE:
700 {
2eebba1d 701 if (SvGMAGICAL(sv)) /* it could be $1, for example */
702 sv = newSVsv(sv); /* GMAGIG will be done */
2c674647 703 if (SvPOK(sv)) {
4411f3b6 704 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
2c674647 705 if (RETVAL &&
4411f3b6 706 check &&
2c674647 707 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
708 RETVAL = FALSE;
709 } else {
710 RETVAL = FALSE;
711 }
2eebba1d 712 if (sv != ST(0))
713 SvREFCNT_dec(sv); /* it was a temp copy */
2c674647 714 }
715 OUTPUT:
716 RETVAL
717
718SV *
4411f3b6 719_utf8_on(sv)
2c674647 720 SV * sv
721 CODE:
722 {
723 if (SvPOK(sv)) {
87714904 724 SV *rsv = newSViv(SvUTF8(sv));
2c674647 725 RETVAL = rsv;
726 SvUTF8_on(sv);
727 } else {
728 RETVAL = &PL_sv_undef;
729 }
730 }
731 OUTPUT:
732 RETVAL
733
734SV *
4411f3b6 735_utf8_off(sv)
2c674647 736 SV * sv
737 CODE:
738 {
739 if (SvPOK(sv)) {
87714904 740 SV *rsv = newSViv(SvUTF8(sv));
2c674647 741 RETVAL = rsv;
742 SvUTF8_off(sv);
743 } else {
744 RETVAL = &PL_sv_undef;
745 }
746 }
747 OUTPUT:
748 RETVAL
749
33af2bc7 750BOOT:
751{
6a59c517 752#if defined(USE_PERLIO) && !defined(USE_SFIO)
a999f61b 753 PerlIO_define_layer(aTHX_ &PerlIO_encode);
33af2bc7 754#endif
023d8852 755#include "8859_def.h"
756#include "EBCDIC_def.h"
757#include "Symbols_def.h"
33af2bc7 758}