left-over "use 5.7.2" in threads.pm
[p5sagit/p5-mst-13.2.git] / ext / Encode / Encode.xs
CommitLineData
fcf2db38 1#define PERL_NO_GET_CONTEXT
2c674647 2#include "EXTERN.h"
3#include "perl.h"
4#include "XSUB.h"
2f2b4ff2 5#define U8 U8
6#include "encode.h"
071db25d 7#include "def_t.h"
fcf2db38 8
39cf9a5e 9#define ENCODE_XS_PROFILE 0 /* set 1 to profile.
10 t/encoding.t dumps core because of
11 Perl_warner and PerlIO don't work well */
12
13#define ENCODE_XS_USEFP 1 /* set 0 to disable floating point to calculate
14 buffer size for encode_method().
15 1 is recommended. 2 restores NI-S original */
16
fcf2db38 17#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
2f5768b8 18 Perl_croak(aTHX_ "panic_unimplemented"); \
4a83738a 19 return (y)0; /* fool picky compilers */ \
87714904 20 }
67e989fb 21UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
aa0053b7 22 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
9df9a5cd 23#if defined(USE_PERLIO) && !defined(USE_SFIO)
72e44f29 24/* Define an encoding "layer" in the perliol.h sense.
25 The layer defined here "inherits" in an object-oriented sense from the
26 "perlio" layer with its PerlIOBuf_* "methods".
27 The implementation is particularly efficient as until Encode settles down
28 there is no point in tryint to tune it.
29
30 The layer works by overloading the "fill" and "flush" methods.
31
32 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
33 to convert the encoded data to UTF-8 form, then copies it back to the
34 buffer. The "base class's" read methods then see the UTF-8 data.
35
36 "flush" transforms the UTF-8 data deposited by the "base class's write
37 method in the buffer back into the encoded form using the encode OO perl API,
38 then copies data back into the buffer and calls "SUPER::flush.
39
40 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
41 so that the the base class's "flush" sees the correct number of encoded chars
42 for positioning the seek pointer. (This double translation is the worst performance
43 issue - particularly with all-perl encode engine.)
44
45*/
33af2bc7 46#include "perliol.h"
aa0053b7 47typedef struct {
48 PerlIOBuf base; /* PerlIOBuf stuff */
0b3236bb 49 SV *bufsv; /* buffer seen by layers above */
50 SV *dataSV; /* data we have read from layer below */
51 SV *enc; /* the encoding object */
33af2bc7 52} PerlIOEncode;
53
e3f3bf95 54SV *
aa0053b7 55PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
e3f3bf95 56{
aa0053b7 57 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
58 SV *sv = &PL_sv_undef;
59 if (e->enc) {
60 dSP;
61 ENTER;
62 SAVETMPS;
63 PUSHMARK(sp);
64 XPUSHs(e->enc);
65 PUTBACK;
66 if (perl_call_method("name", G_SCALAR) == 1) {
67 SPAGAIN;
68 sv = newSVsv(POPs);
69 PUTBACK;
70 }
e3f3bf95 71 }
aa0053b7 72 return sv;
e3f3bf95 73}
33af2bc7 74
75IV
aa0053b7 76PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
33af2bc7 77{
aa0053b7 78 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
79 dSP;
80 IV code;
81 code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
82 ENTER;
83 SAVETMPS;
84 PUSHMARK(sp);
85 XPUSHs(arg);
86 PUTBACK;
87 if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) {
88 /* should never happen */
89 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
90 return -1;
91 }
92 SPAGAIN;
93 e->enc = POPs;
94 PUTBACK;
95 if (!SvROK(e->enc)) {
96 e->enc = Nullsv;
97 errno = EINVAL;
f98bc0c6 98 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
aa0053b7 99 arg);
100 code = -1;
101 }
102 else {
103 SvREFCNT_inc(e->enc);
104 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
105 }
106 FREETMPS;
107 LEAVE;
108 return code;
33af2bc7 109}
110
111IV
aa0053b7 112PerlIOEncode_popped(pTHX_ PerlIO * f)
33af2bc7 113{
aa0053b7 114 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
115 if (e->enc) {
116 SvREFCNT_dec(e->enc);
117 e->enc = Nullsv;
118 }
119 if (e->bufsv) {
120 SvREFCNT_dec(e->bufsv);
121 e->bufsv = Nullsv;
122 }
0b3236bb 123 if (e->dataSV) {
124 SvREFCNT_dec(e->dataSV);
284ee456 125 e->dataSV = Nullsv;
0b3236bb 126 }
aa0053b7 127 return 0;
33af2bc7 128}
129
130STDCHAR *
aa0053b7 131PerlIOEncode_get_base(pTHX_ PerlIO * f)
33af2bc7 132{
aa0053b7 133 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
134 if (!e->base.bufsiz)
135 e->base.bufsiz = 1024;
136 if (!e->bufsv) {
137 e->bufsv = newSV(e->base.bufsiz);
138 sv_setpvn(e->bufsv, "", 0);
139 }
140 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
141 if (!e->base.ptr)
142 e->base.ptr = e->base.buf;
143 if (!e->base.end)
144 e->base.end = e->base.buf;
145 if (e->base.ptr < e->base.buf
146 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
147 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
148 e->base.buf + SvLEN(e->bufsv));
149 abort();
150 }
151 if (SvLEN(e->bufsv) < e->base.bufsiz) {
152 SSize_t poff = e->base.ptr - e->base.buf;
153 SSize_t eoff = e->base.end - e->base.buf;
154 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
155 e->base.ptr = e->base.buf + poff;
156 e->base.end = e->base.buf + eoff;
157 }
158 if (e->base.ptr < e->base.buf
159 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
160 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
161 e->base.buf + SvLEN(e->bufsv));
162 abort();
163 }
164 return e->base.buf;
33af2bc7 165}
166
33af2bc7 167IV
aa0053b7 168PerlIOEncode_fill(pTHX_ PerlIO * f)
33af2bc7 169{
aa0053b7 170 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
171 dSP;
0b3236bb 172 IV code = 0;
173 PerlIO *n;
174 SSize_t avail;
175 if (PerlIO_flush(f) != 0)
176 return -1;
177 n = PerlIONext(f);
178 if (!PerlIO_fast_gets(n)) {
179 /* Things get too messy if we don't have a buffer layer
180 push a :perlio to do the job */
181 char mode[8];
182 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
183 if (!n) {
184 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
185 }
186 }
187 ENTER;
188 SAVETMPS;
189 retry:
190 avail = PerlIO_get_cnt(n);
191 if (avail <= 0) {
192 avail = PerlIO_fill(n);
193 if (avail == 0) {
194 avail = PerlIO_get_cnt(n);
195 }
196 else {
197 if (!PerlIO_error(n) && PerlIO_eof(n))
198 avail = 0;
199 }
200 }
201 if (avail > 0) {
202 STDCHAR *ptr = PerlIO_get_ptr(n);
203 SSize_t use = avail;
aa0053b7 204 SV *uni;
aa0053b7 205 char *s;
0b3236bb 206 STRLEN len = 0;
207 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
208 (void) PerlIOEncode_get_base(aTHX_ f);
209 if (!e->dataSV)
210 e->dataSV = newSV(0);
211 if (SvTYPE(e->dataSV) < SVt_PV) {
212 sv_upgrade(e->dataSV,SVt_PV);
213 }
214 if (SvCUR(e->dataSV)) {
215 /* something left over from last time - create a normal
216 SV with new data appended
217 */
218 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
219 use = e->base.bufsiz - SvCUR(e->dataSV);
220 }
9dbbe389 221 sv_catpvn(e->dataSV,(char*)ptr,use);
0b3236bb 222 }
223 else {
224 /* Create a "dummy" SV to represent the available data from layer below */
225 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
226 Safefree(SvPVX(e->dataSV));
227 }
228 if (use > e->base.bufsiz) {
229 use = e->base.bufsiz;
230 }
231 SvPVX(e->dataSV) = (char *) ptr;
232 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
233 SvCUR_set(e->dataSV,use);
284ee456 234 SvPOK_only(e->dataSV);
0b3236bb 235 }
236 SvUTF8_off(e->dataSV);
aa0053b7 237 PUSHMARK(sp);
238 XPUSHs(e->enc);
0b3236bb 239 XPUSHs(e->dataSV);
aa0053b7 240 XPUSHs(&PL_sv_yes);
241 PUTBACK;
0b3236bb 242 if (perl_call_method("decode", G_SCALAR) != 1) {
243 Perl_die(aTHX_ "panic: decode did not return a value");
244 }
aa0053b7 245 SPAGAIN;
246 uni = POPs;
247 PUTBACK;
0b3236bb 248 /* Now get translated string (forced to UTF-8) and use as buffer */
249 if (SvPOK(uni)) {
250 s = SvPVutf8(uni, len);
85f7ebdf 251 if (len && !is_utf8_string((U8*)s,len)) {
0b3236bb 252 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
253 }
254 }
255 if (len > 0) {
256 /* Got _something */
257 /* if decode gave us back dataSV then data may vanish when
258 we do ptrcnt adjust - so take our copy now.
259 (The copy is a pain - need a put-it-here option for decode.)
260 */
261 sv_setpvn(e->bufsv,s,len);
284ee456 262 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
0b3236bb 263 e->base.end = e->base.ptr + SvCUR(e->bufsv);
264 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
265 SvUTF8_on(e->bufsv);
266
267 /* Adjust ptr/cnt not taking anything which
268 did not translate - not clear this is a win */
269 /* compute amount we took */
270 use -= SvCUR(e->dataSV);
271 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
272 /* and as we did not take it it isn't pending */
273 SvCUR_set(e->dataSV,0);
274 } else {
275 /* Got nothing - assume partial character so we need some more */
276 /* Make sure e->dataSV is a normal SV before re-filling as
277 buffer alias will change under us
278 */
279 s = SvPV(e->dataSV,len);
280 sv_setpvn(e->dataSV,s,len);
281 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
282 goto retry;
aa0053b7 283 }
aa0053b7 284 FREETMPS;
285 LEAVE;
0b3236bb 286 return code;
287 }
288 else {
289 if (avail == 0)
290 PerlIOBase(f)->flags |= PERLIO_F_EOF;
291 else
292 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
293 return -1;
72e44f29 294 }
33af2bc7 295}
296
297IV
aa0053b7 298PerlIOEncode_flush(pTHX_ PerlIO * f)
33af2bc7 299{
aa0053b7 300 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
301 IV code = 0;
0b3236bb 302 if (e->bufsv && (e->base.ptr > e->base.buf)) {
aa0053b7 303 dSP;
304 SV *str;
305 char *s;
306 STRLEN len;
0b3236bb 307 SSize_t count = 0;
308 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
309 /* Write case encode the buffer and write() to layer below */
310 ENTER;
311 SAVETMPS;
312 PUSHMARK(sp);
313 XPUSHs(e->enc);
314 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
315 SvUTF8_on(e->bufsv);
0b3236bb 316 XPUSHs(e->bufsv);
317 XPUSHs(&PL_sv_yes);
318 PUTBACK;
8bbd9480 319 if (perl_call_method("encode", G_SCALAR) != 1) {
320 Perl_die(aTHX_ "panic: encode did not return a value");
321 }
0b3236bb 322 SPAGAIN;
323 str = POPs;
324 PUTBACK;
325 s = SvPV(str, len);
326 count = PerlIO_write(PerlIONext(f),s,len);
327 if (count != len) {
328 code = -1;
329 }
330 FREETMPS;
331 LEAVE;
332 if (PerlIO_flush(PerlIONext(f)) != 0) {
333 code = -1;
334 }
25f7d9d3 335 if (SvCUR(e->bufsv)) {
336 /* Did not all translate */
337 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
338 return code;
339 }
aa0053b7 340 }
0b3236bb 341 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
342 /* read case */
343 /* if we have any untranslated stuff then unread that first */
344 if (e->dataSV && SvCUR(e->dataSV)) {
345 s = SvPV(e->dataSV, len);
346 count = PerlIO_unread(PerlIONext(f),s,len);
347 if (count != len) {
348 code = -1;
349 }
350 }
351 /* See if there is anything left in the buffer */
352 if (e->base.ptr < e->base.end) {
353 /* Bother - have unread data.
354 re-encode and unread() to layer below
355 */
356 ENTER;
357 SAVETMPS;
358 str = sv_newmortal();
359 sv_upgrade(str, SVt_PV);
9dbbe389 360 SvPVX(str) = (char*)e->base.ptr;
0b3236bb 361 SvLEN(str) = 0;
362 SvCUR_set(str, e->base.end - e->base.ptr);
284ee456 363 SvPOK_only(str);
0b3236bb 364 SvUTF8_on(str);
365 PUSHMARK(sp);
366 XPUSHs(e->enc);
367 XPUSHs(str);
368 XPUSHs(&PL_sv_yes);
369 PUTBACK;
8bbd9480 370 if (perl_call_method("encode", G_SCALAR) != 1) {
371 Perl_die(aTHX_ "panic: encode did not return a value");
372 }
0b3236bb 373 SPAGAIN;
374 str = POPs;
375 PUTBACK;
376 s = SvPV(str, len);
377 count = PerlIO_unread(PerlIONext(f),s,len);
378 if (count != len) {
379 code = -1;
380 }
381 FREETMPS;
382 LEAVE;
383 }
aa0053b7 384 }
0b3236bb 385 e->base.ptr = e->base.end = e->base.buf;
386 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
72e44f29 387 }
aa0053b7 388 return code;
33af2bc7 389}
390
391IV
aa0053b7 392PerlIOEncode_close(pTHX_ PerlIO * f)
33af2bc7 393{
aa0053b7 394 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
395 IV code = PerlIOBase_close(aTHX_ f);
396 if (e->bufsv) {
25f7d9d3 397 if (e->base.buf && e->base.ptr > e->base.buf) {
398 Perl_croak(aTHX_ "Close with partial character");
399 }
aa0053b7 400 SvREFCNT_dec(e->bufsv);
401 e->bufsv = Nullsv;
402 }
403 e->base.buf = NULL;
404 e->base.ptr = NULL;
405 e->base.end = NULL;
406 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
407 return code;
33af2bc7 408}
409
72e44f29 410Off_t
aa0053b7 411PerlIOEncode_tell(pTHX_ PerlIO * f)
72e44f29 412{
aa0053b7 413 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
0b3236bb 414 /* Unfortunately the only way to get a postion is to (re-)translate,
415 the UTF8 we have in bufefr and then ask layer below
aa0053b7 416 */
0b3236bb 417 PerlIO_flush(f);
25f7d9d3 418 if (b->buf && b->ptr > b->buf) {
419 Perl_croak(aTHX_ "Cannot tell at partial character");
420 }
0b3236bb 421 return PerlIO_tell(PerlIONext(f));
72e44f29 422}
423
8cf8f3d1 424PerlIO *
aa0053b7 425PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
426 CLONE_PARAMS * params, int flags)
8cf8f3d1 427{
aa0053b7 428 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
429 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
430 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
431 if (oe->enc) {
432 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
433 }
9f16d962 434 }
aa0053b7 435 return f;
8cf8f3d1 436}
437
33af2bc7 438PerlIO_funcs PerlIO_encode = {
aa0053b7 439 "encoding",
440 sizeof(PerlIOEncode),
284ee456 441 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
aa0053b7 442 PerlIOEncode_pushed,
443 PerlIOEncode_popped,
444 PerlIOBuf_open,
445 PerlIOEncode_getarg,
446 PerlIOBase_fileno,
447 PerlIOEncode_dup,
448 PerlIOBuf_read,
449 PerlIOBuf_unread,
450 PerlIOBuf_write,
451 PerlIOBuf_seek,
452 PerlIOEncode_tell,
453 PerlIOEncode_close,
454 PerlIOEncode_flush,
455 PerlIOEncode_fill,
456 PerlIOBase_eof,
457 PerlIOBase_error,
458 PerlIOBase_clearerr,
459 PerlIOBase_setlinebuf,
460 PerlIOEncode_get_base,
461 PerlIOBuf_bufsiz,
462 PerlIOBuf_get_ptr,
463 PerlIOBuf_get_cnt,
464 PerlIOBuf_set_ptrcnt,
33af2bc7 465};
aa0053b7 466#endif /* encode layer */
33af2bc7 467
2f2b4ff2 468void
aa0053b7 469Encode_XSEncoding(pTHX_ encode_t * enc)
2f2b4ff2 470{
aa0053b7 471 dSP;
472 HV *stash = gv_stashpv("Encode::XS", TRUE);
473 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash);
474 int i = 0;
475 PUSHMARK(sp);
476 XPUSHs(sv);
477 while (enc->name[i]) {
478 const char *name = enc->name[i++];
479 XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
480 }
481 PUTBACK;
482 call_pv("Encode::define_encoding", G_DISCARD);
483 SvREFCNT_dec(sv);
2f2b4ff2 484}
485
aa0053b7 486void
487call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
488{
25f7d9d3 489 /* Exists for breakpointing */
aa0053b7 490}
67e989fb 491
2f2b4ff2 492static SV *
aa0053b7 493encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
494 int check)
2f2b4ff2 495{
aa0053b7 496 STRLEN slen;
497 U8 *s = (U8 *) SvPV(src, slen);
3aececda 498 STRLEN tlen = slen;
499 STRLEN ddone = 0;
500 STRLEN sdone = 0;
39cf9a5e 501
502 /* We allocate slen+1.
503 PerlIO dumps core if this value is smaller than this. */
0b3236bb 504 SV *dst = sv_2mortal(newSV(slen+1));
aa0053b7 505 if (slen) {
0b3236bb 506 U8 *d = (U8 *) SvPVX(dst);
507 STRLEN dlen = SvLEN(dst)-1;
aa0053b7 508 int code;
509 while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) {
3aececda 510 SvCUR_set(dst, dlen+ddone);
284ee456 511 SvPOK_only(dst);
9b37254d 512
39cf9a5e 513#if ENCODE_XS_PROFILE >= 3
514 Perl_warn(aTHX_ "code=%d @ s=%d/%d/%d d=%d/%d/%d\n",code,slen,sdone,tlen,dlen,ddone,SvLEN(dst)-1);
0b3236bb 515#endif
516
517 if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL)
aa0053b7 518 break;
9b37254d 519
aa0053b7 520 switch (code) {
521 case ENCODE_NOSPACE:
39cf9a5e 522 {
523 STRLEN more, sleft;
3aececda 524 sdone += slen;
525 ddone += dlen;
39cf9a5e 526 sleft = tlen - sdone;
527 if (sdone) { /* has src ever been processed ? */
528#if ENCODE_XS_USEFP == 2
529 more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone
530 - SvLEN(dst);
531#elif ENCODE_XS_USEFP
532 more = (1.0*SvLEN(dst)+1)/sdone * sleft;
533#else
534 /* safe until SvLEN(dst) == MAX_INT/16 */
535 more = (16*SvLEN(dst)+1)/sdone/16 * sleft;
c98ca92f 536#endif
0b3236bb 537 }
0b3236bb 538
39cf9a5e 539 more += UTF8_MAXLEN; /* insurance policy */
540#if ENCODE_XS_PROFILE >= 2
541 Perl_warn(aTHX_
542 "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n",
543 more, sdone, sleft, SvLEN(dst));
544#endif
545 d = (U8 *) SvGROW(dst, SvLEN(dst) + more);
546 /* dst need to grow need MORE bytes! */
3aececda 547 if (ddone >= SvLEN(dst)) {
548 Perl_croak(aTHX_ "Destination couldn't be grown.");
aa0053b7 549 }
3aececda 550 dlen = SvLEN(dst)-ddone-1;
551 d += ddone;
552 s += slen;
553 slen = tlen-sdone;
554 continue;
39cf9a5e 555 }
2f2b4ff2 556
aa0053b7 557 case ENCODE_NOREP:
558 if (dir == enc->f_utf8) {
559 if (!check && ckWARN_d(WARN_UTF8)) {
560 STRLEN clen;
561 UV ch =
562 utf8n_to_uvuni(s + slen, (SvCUR(src) - slen),
563 &clen, 0);
f98bc0c6 564 Perl_warner(aTHX_ packWARN(WARN_UTF8),
aa0053b7 565 "\"\\N{U+%" UVxf
566 "}\" does not map to %s", ch,
567 enc->name[0]);
568 /* FIXME: Skip over the character, copy in replacement and continue
569 * but that is messy so for now just fail.
570 */
571 return &PL_sv_undef;
572 }
573 else {
574 return &PL_sv_undef;
575 }
576 }
577 else {
d0ee0454 578 /* UTF-8 is supposed to be "Universal" so should not happen
579 for real characters, but some encodings have non-assigned
580 codes which may occur.
581 */
ee981de6 582 Perl_croak(aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)",
583 enc->name[0], (U8) s[slen], code);
aa0053b7 584 }
585 break;
2f2b4ff2 586
aa0053b7 587 default:
588 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
589 code, (dir == enc->f_utf8) ? "to" : "from",
590 enc->name[0]);
591 return &PL_sv_undef;
592 }
593 }
3aececda 594 SvCUR_set(dst, dlen+ddone);
284ee456 595 SvPOK_only(dst);
aa0053b7 596 if (check) {
3aececda 597 sdone = SvCUR(src) - (slen+sdone);
598 if (sdone) {
f54fca96 599#if 1
600 /* FIXME: A Move() is dangerous - PV could be mmap'ed readonly
601 SvOOK would be ideal - but sv_backoff does not understand SvLEN == 0
602 type SVs and sv_clear() calls it ...
603 */
64ffdd5e 604 sv_setpvn(src, (char*)s+slen, sdone);
f54fca96 605#else
3aececda 606 Move(s + slen, SvPVX(src), sdone , U8);
f54fca96 607#endif
aa0053b7 608 }
3aececda 609 SvCUR_set(src, sdone);
aa0053b7 610 }
2f2b4ff2 611 }
aa0053b7 612 else {
0b3236bb 613 SvCUR_set(dst, 0);
284ee456 614 SvPOK_only(dst);
2f2b4ff2 615 }
39cf9a5e 616#if ENCODE_XS_PROFILE
617 if (SvCUR(dst) > SvCUR(src)){
618 Perl_warn(aTHX_
619 "SvLEN(dst)=%d, SvCUR(dst)=%d. "
620 "%d bytes unused(%f %%)\n",
621 SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst),
622 (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0);
623
624 }
625#endif
0b3236bb 626 *SvEND(dst) = '\0';
aa0053b7 627 return dst;
2f2b4ff2 628}
629
50d26985 630MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
2f2b4ff2 631
632PROTOTYPES: ENABLE
633
634void
0a95303c 635Method_name(obj)
636SV * obj
637CODE:
638 {
639 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
640 ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0])));
641 XSRETURN(1);
642 }
643
644void
691638dd 645Method_decode(obj,src,check = FALSE)
2f2b4ff2 646SV * obj
647SV * src
691638dd 648bool check
2f2b4ff2 649CODE:
650 {
651 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
652 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
653 SvUTF8_on(ST(0));
654 XSRETURN(1);
655 }
656
657void
691638dd 658Method_encode(obj,src,check = FALSE)
2f2b4ff2 659SV * obj
660SV * src
691638dd 661bool check
2f2b4ff2 662CODE:
663 {
664 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
665 sv_utf8_upgrade(src);
666 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
667 XSRETURN(1);
668 }
669
67e989fb 670MODULE = Encode PACKAGE = Encode
2c674647 671
672PROTOTYPES: ENABLE
673
67e989fb 674I32
2c674647 675_bytes_to_utf8(sv, ...)
67e989fb 676 SV * sv
2c674647 677 CODE:
67e989fb 678 {
679 SV * encoding = items == 2 ? ST(1) : Nullsv;
680
681 if (encoding)
682 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
683 else {
684 STRLEN len;
183a2d84 685 U8* s = (U8*)SvPV(sv, len);
67e989fb 686 U8* converted;
687
688 converted = bytes_to_utf8(s, &len); /* This allocs */
183a2d84 689 sv_setpvn(sv, (char *)converted, len);
67e989fb 690 SvUTF8_on(sv); /* XXX Should we? */
691 Safefree(converted); /* ... so free it */
692 RETVAL = len;
693 }
694 }
2c674647 695 OUTPUT:
67e989fb 696 RETVAL
2c674647 697
67e989fb 698I32
2c674647 699_utf8_to_bytes(sv, ...)
67e989fb 700 SV * sv
2c674647 701 CODE:
67e989fb 702 {
703 SV * to = items > 1 ? ST(1) : Nullsv;
704 SV * check = items > 2 ? ST(2) : Nullsv;
87714904 705
67e989fb 706 if (to)
707 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
708 else {
67e989fb 709 STRLEN len;
b113ac0e 710 U8 *s = (U8*)SvPV(sv, len);
67e989fb 711
9c5ffd7c 712 RETVAL = 0;
67e989fb 713 if (SvTRUE(check)) {
714 /* Must do things the slow way */
715 U8 *dest;
87714904 716 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
67e989fb 717 U8 *send = s + len;
718
719 New(83, dest, len, U8); /* I think */
720
721 while (s < send) {
722 if (*s < 0x80)
723 *dest++ = *s++;
724 else {
b113ac0e 725 STRLEN ulen;
726 UV uv = *s++;
87714904 727
67e989fb 728 /* Have to do it all ourselves because of error routine,
729 aargh. */
730 if (!(uv & 0x40))
731 goto failure;
732 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
733 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
734 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
735 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
736 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
737 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
738 else { ulen = 13; uv = 0; }
87714904 739
67e989fb 740 /* Note change to utf8.c variable naming, for variety */
741 while (ulen--) {
742 if ((*s & 0xc0) != 0x80)
743 goto failure;
87714904 744
67e989fb 745 else
746 uv = (uv << 6) | (*s++ & 0x3f);
87714904 747 }
67e989fb 748 if (uv > 256) {
749 failure:
750 call_failure(check, s, dest, src);
751 /* Now what happens? */
752 }
753 *dest++ = (U8)uv;
754 }
755 }
756 } else
757 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
758 }
2c674647 759 }
760 OUTPUT:
761 RETVAL
762
2c674647 763bool
4411f3b6 764is_utf8(sv, check = FALSE)
765SV * sv
766bool check
2c674647 767 CODE:
768 {
2eebba1d 769 if (SvGMAGICAL(sv)) /* it could be $1, for example */
770 sv = newSVsv(sv); /* GMAGIG will be done */
2c674647 771 if (SvPOK(sv)) {
4411f3b6 772 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
2c674647 773 if (RETVAL &&
4411f3b6 774 check &&
2c674647 775 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
776 RETVAL = FALSE;
777 } else {
778 RETVAL = FALSE;
779 }
2eebba1d 780 if (sv != ST(0))
781 SvREFCNT_dec(sv); /* it was a temp copy */
2c674647 782 }
783 OUTPUT:
784 RETVAL
785
786SV *
4411f3b6 787_utf8_on(sv)
2c674647 788 SV * sv
789 CODE:
790 {
791 if (SvPOK(sv)) {
87714904 792 SV *rsv = newSViv(SvUTF8(sv));
2c674647 793 RETVAL = rsv;
794 SvUTF8_on(sv);
795 } else {
796 RETVAL = &PL_sv_undef;
797 }
798 }
799 OUTPUT:
800 RETVAL
801
802SV *
4411f3b6 803_utf8_off(sv)
2c674647 804 SV * sv
805 CODE:
806 {
807 if (SvPOK(sv)) {
87714904 808 SV *rsv = newSViv(SvUTF8(sv));
2c674647 809 RETVAL = rsv;
810 SvUTF8_off(sv);
811 } else {
812 RETVAL = &PL_sv_undef;
813 }
814 }
815 OUTPUT:
816 RETVAL
817
33af2bc7 818BOOT:
819{
6a59c517 820#if defined(USE_PERLIO) && !defined(USE_SFIO)
a999f61b 821 PerlIO_define_layer(aTHX_ &PerlIO_encode);
33af2bc7 822#endif
e7cbefb8 823#include "def_t.exh"
33af2bc7 824}