Retract #15745 for now; won't work without more Encode fixes.
[p5sagit/p5-mst-13.2.git] / ext / Encode / Encode.xs
CommitLineData
fcf2db38 1#define PERL_NO_GET_CONTEXT
2c674647 2#include "EXTERN.h"
3#include "perl.h"
4#include "XSUB.h"
2f2b4ff2 5#define U8 U8
6#include "encode.h"
071db25d 7#include "def_t.h"
fcf2db38 8
39cf9a5e 9#define ENCODE_XS_PROFILE 0 /* set 1 to profile.
a999c27c 10 t/encoding.t dumps core because of
11 Perl_warner and PerlIO don't work well */
39cf9a5e 12
13#define ENCODE_XS_USEFP 1 /* set 0 to disable floating point to calculate
a999c27c 14 buffer size for encode_method().
15 1 is recommended. 2 restores NI-S original */
39cf9a5e 16
fcf2db38 17#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
2f5768b8 18 Perl_croak(aTHX_ "panic_unimplemented"); \
4a83738a 19 return (y)0; /* fool picky compilers */ \
87714904 20 }
67e989fb 21UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
aa0053b7 22 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
9df9a5cd 23#if defined(USE_PERLIO) && !defined(USE_SFIO)
72e44f29 24/* Define an encoding "layer" in the perliol.h sense.
25 The layer defined here "inherits" in an object-oriented sense from the
26 "perlio" layer with its PerlIOBuf_* "methods".
27 The implementation is particularly efficient as until Encode settles down
28 there is no point in tryint to tune it.
29
30 The layer works by overloading the "fill" and "flush" methods.
31
32 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
33 to convert the encoded data to UTF-8 form, then copies it back to the
34 buffer. The "base class's" read methods then see the UTF-8 data.
35
36 "flush" transforms the UTF-8 data deposited by the "base class's write
37 method in the buffer back into the encoded form using the encode OO perl API,
38 then copies data back into the buffer and calls "SUPER::flush.
39
40 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
41 so that the the base class's "flush" sees the correct number of encoded chars
42 for positioning the seek pointer. (This double translation is the worst performance
43 issue - particularly with all-perl encode engine.)
44
45*/
33af2bc7 46#include "perliol.h"
aa0053b7 47typedef struct {
48 PerlIOBuf base; /* PerlIOBuf stuff */
0b3236bb 49 SV *bufsv; /* buffer seen by layers above */
50 SV *dataSV; /* data we have read from layer below */
51 SV *enc; /* the encoding object */
33af2bc7 52} PerlIOEncode;
53
e3f3bf95 54SV *
aa0053b7 55PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
e3f3bf95 56{
aa0053b7 57 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
58 SV *sv = &PL_sv_undef;
59 if (e->enc) {
60 dSP;
61 ENTER;
62 SAVETMPS;
63 PUSHMARK(sp);
64 XPUSHs(e->enc);
65 PUTBACK;
66 if (perl_call_method("name", G_SCALAR) == 1) {
67 SPAGAIN;
68 sv = newSVsv(POPs);
69 PUTBACK;
70 }
e3f3bf95 71 }
aa0053b7 72 return sv;
e3f3bf95 73}
33af2bc7 74
75IV
aa0053b7 76PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
33af2bc7 77{
aa0053b7 78 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
79 dSP;
80 IV code;
81 code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
82 ENTER;
83 SAVETMPS;
84 PUSHMARK(sp);
85 XPUSHs(arg);
86 PUTBACK;
87 if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) {
88 /* should never happen */
89 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
90 return -1;
91 }
92 SPAGAIN;
93 e->enc = POPs;
94 PUTBACK;
95 if (!SvROK(e->enc)) {
96 e->enc = Nullsv;
97 errno = EINVAL;
f98bc0c6 98 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
aa0053b7 99 arg);
100 code = -1;
101 }
102 else {
103 SvREFCNT_inc(e->enc);
104 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
105 }
106 FREETMPS;
107 LEAVE;
108 return code;
33af2bc7 109}
110
111IV
aa0053b7 112PerlIOEncode_popped(pTHX_ PerlIO * f)
33af2bc7 113{
aa0053b7 114 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
115 if (e->enc) {
116 SvREFCNT_dec(e->enc);
117 e->enc = Nullsv;
118 }
119 if (e->bufsv) {
120 SvREFCNT_dec(e->bufsv);
121 e->bufsv = Nullsv;
122 }
0b3236bb 123 if (e->dataSV) {
124 SvREFCNT_dec(e->dataSV);
284ee456 125 e->dataSV = Nullsv;
0b3236bb 126 }
aa0053b7 127 return 0;
33af2bc7 128}
129
130STDCHAR *
aa0053b7 131PerlIOEncode_get_base(pTHX_ PerlIO * f)
33af2bc7 132{
aa0053b7 133 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
134 if (!e->base.bufsiz)
135 e->base.bufsiz = 1024;
136 if (!e->bufsv) {
137 e->bufsv = newSV(e->base.bufsiz);
138 sv_setpvn(e->bufsv, "", 0);
139 }
140 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
141 if (!e->base.ptr)
142 e->base.ptr = e->base.buf;
143 if (!e->base.end)
144 e->base.end = e->base.buf;
145 if (e->base.ptr < e->base.buf
146 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
147 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
148 e->base.buf + SvLEN(e->bufsv));
149 abort();
150 }
151 if (SvLEN(e->bufsv) < e->base.bufsiz) {
152 SSize_t poff = e->base.ptr - e->base.buf;
153 SSize_t eoff = e->base.end - e->base.buf;
154 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
155 e->base.ptr = e->base.buf + poff;
156 e->base.end = e->base.buf + eoff;
157 }
158 if (e->base.ptr < e->base.buf
159 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
160 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
161 e->base.buf + SvLEN(e->bufsv));
162 abort();
163 }
164 return e->base.buf;
33af2bc7 165}
166
33af2bc7 167IV
aa0053b7 168PerlIOEncode_fill(pTHX_ PerlIO * f)
33af2bc7 169{
aa0053b7 170 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
171 dSP;
0b3236bb 172 IV code = 0;
173 PerlIO *n;
174 SSize_t avail;
175 if (PerlIO_flush(f) != 0)
176 return -1;
177 n = PerlIONext(f);
178 if (!PerlIO_fast_gets(n)) {
179 /* Things get too messy if we don't have a buffer layer
180 push a :perlio to do the job */
181 char mode[8];
182 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
183 if (!n) {
184 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
185 }
186 }
187 ENTER;
188 SAVETMPS;
189 retry:
190 avail = PerlIO_get_cnt(n);
191 if (avail <= 0) {
192 avail = PerlIO_fill(n);
193 if (avail == 0) {
194 avail = PerlIO_get_cnt(n);
195 }
196 else {
197 if (!PerlIO_error(n) && PerlIO_eof(n))
198 avail = 0;
199 }
200 }
201 if (avail > 0) {
202 STDCHAR *ptr = PerlIO_get_ptr(n);
203 SSize_t use = avail;
aa0053b7 204 SV *uni;
aa0053b7 205 char *s;
0b3236bb 206 STRLEN len = 0;
207 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
208 (void) PerlIOEncode_get_base(aTHX_ f);
209 if (!e->dataSV)
210 e->dataSV = newSV(0);
211 if (SvTYPE(e->dataSV) < SVt_PV) {
212 sv_upgrade(e->dataSV,SVt_PV);
213 }
214 if (SvCUR(e->dataSV)) {
215 /* something left over from last time - create a normal
216 SV with new data appended
217 */
218 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
219 use = e->base.bufsiz - SvCUR(e->dataSV);
220 }
9dbbe389 221 sv_catpvn(e->dataSV,(char*)ptr,use);
0b3236bb 222 }
223 else {
224 /* Create a "dummy" SV to represent the available data from layer below */
225 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
226 Safefree(SvPVX(e->dataSV));
227 }
228 if (use > e->base.bufsiz) {
229 use = e->base.bufsiz;
230 }
231 SvPVX(e->dataSV) = (char *) ptr;
232 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
233 SvCUR_set(e->dataSV,use);
284ee456 234 SvPOK_only(e->dataSV);
0b3236bb 235 }
236 SvUTF8_off(e->dataSV);
aa0053b7 237 PUSHMARK(sp);
238 XPUSHs(e->enc);
0b3236bb 239 XPUSHs(e->dataSV);
aa0053b7 240 XPUSHs(&PL_sv_yes);
241 PUTBACK;
0b3236bb 242 if (perl_call_method("decode", G_SCALAR) != 1) {
243 Perl_die(aTHX_ "panic: decode did not return a value");
244 }
aa0053b7 245 SPAGAIN;
246 uni = POPs;
247 PUTBACK;
0b3236bb 248 /* Now get translated string (forced to UTF-8) and use as buffer */
249 if (SvPOK(uni)) {
250 s = SvPVutf8(uni, len);
85f7ebdf 251 if (len && !is_utf8_string((U8*)s,len)) {
0b3236bb 252 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
253 }
254 }
255 if (len > 0) {
256 /* Got _something */
257 /* if decode gave us back dataSV then data may vanish when
258 we do ptrcnt adjust - so take our copy now.
259 (The copy is a pain - need a put-it-here option for decode.)
260 */
261 sv_setpvn(e->bufsv,s,len);
284ee456 262 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
0b3236bb 263 e->base.end = e->base.ptr + SvCUR(e->bufsv);
264 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
265 SvUTF8_on(e->bufsv);
266
267 /* Adjust ptr/cnt not taking anything which
268 did not translate - not clear this is a win */
269 /* compute amount we took */
270 use -= SvCUR(e->dataSV);
271 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
272 /* and as we did not take it it isn't pending */
273 SvCUR_set(e->dataSV,0);
274 } else {
275 /* Got nothing - assume partial character so we need some more */
276 /* Make sure e->dataSV is a normal SV before re-filling as
277 buffer alias will change under us
278 */
279 s = SvPV(e->dataSV,len);
280 sv_setpvn(e->dataSV,s,len);
281 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
282 goto retry;
aa0053b7 283 }
aa0053b7 284 FREETMPS;
285 LEAVE;
0b3236bb 286 return code;
287 }
288 else {
289 if (avail == 0)
290 PerlIOBase(f)->flags |= PERLIO_F_EOF;
291 else
292 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
293 return -1;
72e44f29 294 }
33af2bc7 295}
296
297IV
aa0053b7 298PerlIOEncode_flush(pTHX_ PerlIO * f)
33af2bc7 299{
aa0053b7 300 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
301 IV code = 0;
0b3236bb 302 if (e->bufsv && (e->base.ptr > e->base.buf)) {
aa0053b7 303 dSP;
304 SV *str;
305 char *s;
306 STRLEN len;
0b3236bb 307 SSize_t count = 0;
308 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
309 /* Write case encode the buffer and write() to layer below */
310 ENTER;
311 SAVETMPS;
312 PUSHMARK(sp);
313 XPUSHs(e->enc);
314 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
315 SvUTF8_on(e->bufsv);
0b3236bb 316 XPUSHs(e->bufsv);
317 XPUSHs(&PL_sv_yes);
318 PUTBACK;
8bbd9480 319 if (perl_call_method("encode", G_SCALAR) != 1) {
320 Perl_die(aTHX_ "panic: encode did not return a value");
321 }
0b3236bb 322 SPAGAIN;
323 str = POPs;
324 PUTBACK;
325 s = SvPV(str, len);
326 count = PerlIO_write(PerlIONext(f),s,len);
327 if (count != len) {
328 code = -1;
329 }
330 FREETMPS;
331 LEAVE;
332 if (PerlIO_flush(PerlIONext(f)) != 0) {
333 code = -1;
334 }
25f7d9d3 335 if (SvCUR(e->bufsv)) {
336 /* Did not all translate */
337 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
338 return code;
339 }
aa0053b7 340 }
0b3236bb 341 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
342 /* read case */
343 /* if we have any untranslated stuff then unread that first */
344 if (e->dataSV && SvCUR(e->dataSV)) {
345 s = SvPV(e->dataSV, len);
346 count = PerlIO_unread(PerlIONext(f),s,len);
347 if (count != len) {
348 code = -1;
349 }
350 }
351 /* See if there is anything left in the buffer */
352 if (e->base.ptr < e->base.end) {
353 /* Bother - have unread data.
354 re-encode and unread() to layer below
355 */
356 ENTER;
357 SAVETMPS;
358 str = sv_newmortal();
359 sv_upgrade(str, SVt_PV);
9dbbe389 360 SvPVX(str) = (char*)e->base.ptr;
0b3236bb 361 SvLEN(str) = 0;
362 SvCUR_set(str, e->base.end - e->base.ptr);
284ee456 363 SvPOK_only(str);
0b3236bb 364 SvUTF8_on(str);
365 PUSHMARK(sp);
366 XPUSHs(e->enc);
367 XPUSHs(str);
368 XPUSHs(&PL_sv_yes);
369 PUTBACK;
8bbd9480 370 if (perl_call_method("encode", G_SCALAR) != 1) {
371 Perl_die(aTHX_ "panic: encode did not return a value");
372 }
0b3236bb 373 SPAGAIN;
374 str = POPs;
375 PUTBACK;
376 s = SvPV(str, len);
377 count = PerlIO_unread(PerlIONext(f),s,len);
378 if (count != len) {
379 code = -1;
380 }
381 FREETMPS;
382 LEAVE;
383 }
aa0053b7 384 }
0b3236bb 385 e->base.ptr = e->base.end = e->base.buf;
386 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
72e44f29 387 }
aa0053b7 388 return code;
33af2bc7 389}
390
391IV
aa0053b7 392PerlIOEncode_close(pTHX_ PerlIO * f)
33af2bc7 393{
aa0053b7 394 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
395 IV code = PerlIOBase_close(aTHX_ f);
396 if (e->bufsv) {
25f7d9d3 397 if (e->base.buf && e->base.ptr > e->base.buf) {
398 Perl_croak(aTHX_ "Close with partial character");
399 }
aa0053b7 400 SvREFCNT_dec(e->bufsv);
401 e->bufsv = Nullsv;
402 }
403 e->base.buf = NULL;
404 e->base.ptr = NULL;
405 e->base.end = NULL;
406 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
407 return code;
33af2bc7 408}
409
72e44f29 410Off_t
aa0053b7 411PerlIOEncode_tell(pTHX_ PerlIO * f)
72e44f29 412{
aa0053b7 413 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
0b3236bb 414 /* Unfortunately the only way to get a postion is to (re-)translate,
415 the UTF8 we have in bufefr and then ask layer below
aa0053b7 416 */
0b3236bb 417 PerlIO_flush(f);
25f7d9d3 418 if (b->buf && b->ptr > b->buf) {
419 Perl_croak(aTHX_ "Cannot tell at partial character");
420 }
0b3236bb 421 return PerlIO_tell(PerlIONext(f));
72e44f29 422}
423
8cf8f3d1 424PerlIO *
aa0053b7 425PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
426 CLONE_PARAMS * params, int flags)
8cf8f3d1 427{
aa0053b7 428 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
429 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
430 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
431 if (oe->enc) {
432 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
433 }
9f16d962 434 }
aa0053b7 435 return f;
8cf8f3d1 436}
437
33af2bc7 438PerlIO_funcs PerlIO_encode = {
aa0053b7 439 "encoding",
440 sizeof(PerlIOEncode),
284ee456 441 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
aa0053b7 442 PerlIOEncode_pushed,
443 PerlIOEncode_popped,
444 PerlIOBuf_open,
445 PerlIOEncode_getarg,
446 PerlIOBase_fileno,
447 PerlIOEncode_dup,
448 PerlIOBuf_read,
449 PerlIOBuf_unread,
450 PerlIOBuf_write,
451 PerlIOBuf_seek,
452 PerlIOEncode_tell,
453 PerlIOEncode_close,
454 PerlIOEncode_flush,
455 PerlIOEncode_fill,
456 PerlIOBase_eof,
457 PerlIOBase_error,
458 PerlIOBase_clearerr,
459 PerlIOBase_setlinebuf,
460 PerlIOEncode_get_base,
461 PerlIOBuf_bufsiz,
462 PerlIOBuf_get_ptr,
463 PerlIOBuf_get_cnt,
464 PerlIOBuf_set_ptrcnt,
33af2bc7 465};
aa0053b7 466#endif /* encode layer */
33af2bc7 467
2f2b4ff2 468void
aa0053b7 469Encode_XSEncoding(pTHX_ encode_t * enc)
2f2b4ff2 470{
aa0053b7 471 dSP;
472 HV *stash = gv_stashpv("Encode::XS", TRUE);
473 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash);
474 int i = 0;
475 PUSHMARK(sp);
476 XPUSHs(sv);
477 while (enc->name[i]) {
478 const char *name = enc->name[i++];
479 XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
480 }
481 PUTBACK;
482 call_pv("Encode::define_encoding", G_DISCARD);
483 SvREFCNT_dec(sv);
2f2b4ff2 484}
485
aa0053b7 486void
487call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
488{
25f7d9d3 489 /* Exists for breakpointing */
aa0053b7 490}
67e989fb 491
2f2b4ff2 492static SV *
aa0053b7 493encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
494 int check)
2f2b4ff2 495{
aa0053b7 496 STRLEN slen;
497 U8 *s = (U8 *) SvPV(src, slen);
3aececda 498 STRLEN tlen = slen;
499 STRLEN ddone = 0;
500 STRLEN sdone = 0;
39cf9a5e 501
a999c27c 502 /* We allocate slen+1.
503 PerlIO dumps core if this value is smaller than this. */
504 SV *dst = sv_2mortal(newSV(slen+1));
aa0053b7 505 if (slen) {
0b3236bb 506 U8 *d = (U8 *) SvPVX(dst);
507 STRLEN dlen = SvLEN(dst)-1;
aa0053b7 508 int code;
509 while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) {
3aececda 510 SvCUR_set(dst, dlen+ddone);
284ee456 511 SvPOK_only(dst);
9b37254d 512
39cf9a5e 513#if ENCODE_XS_PROFILE >= 3
514 Perl_warn(aTHX_ "code=%d @ s=%d/%d/%d d=%d/%d/%d\n",code,slen,sdone,tlen,dlen,ddone,SvLEN(dst)-1);
0b3236bb 515#endif
516
517 if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL)
aa0053b7 518 break;
9b37254d 519
aa0053b7 520 switch (code) {
521 case ENCODE_NOSPACE:
a999c27c 522 {
39cf9a5e 523 STRLEN more, sleft;
3aececda 524 sdone += slen;
525 ddone += dlen;
39cf9a5e 526 sleft = tlen - sdone;
527 if (sdone) { /* has src ever been processed ? */
528#if ENCODE_XS_USEFP == 2
a999c27c 529 more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone
530 - SvLEN(dst);
39cf9a5e 531#elif ENCODE_XS_USEFP
a999c27c 532 more = (1.0*SvLEN(dst)+1)/sdone * sleft;
39cf9a5e 533#else
a999c27c 534 /* safe until SvLEN(dst) == MAX_INT/16 */
535 more = (16*SvLEN(dst)+1)/sdone/16 * sleft;
c98ca92f 536#endif
0b3236bb 537 }
39cf9a5e 538 more += UTF8_MAXLEN; /* insurance policy */
539#if ENCODE_XS_PROFILE >= 2
a999c27c 540 Perl_warn(aTHX_
541 "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n",
542 more, sdone, sleft, SvLEN(dst));
39cf9a5e 543#endif
544 d = (U8 *) SvGROW(dst, SvLEN(dst) + more);
545 /* dst need to grow need MORE bytes! */
3aececda 546 if (ddone >= SvLEN(dst)) {
547 Perl_croak(aTHX_ "Destination couldn't be grown.");
aa0053b7 548 }
3aececda 549 dlen = SvLEN(dst)-ddone-1;
550 d += ddone;
551 s += slen;
552 slen = tlen-sdone;
553 continue;
39cf9a5e 554 }
2f2b4ff2 555
aa0053b7 556 case ENCODE_NOREP:
557 if (dir == enc->f_utf8) {
558 if (!check && ckWARN_d(WARN_UTF8)) {
559 STRLEN clen;
560 UV ch =
561 utf8n_to_uvuni(s + slen, (SvCUR(src) - slen),
562 &clen, 0);
f98bc0c6 563 Perl_warner(aTHX_ packWARN(WARN_UTF8),
aa0053b7 564 "\"\\N{U+%" UVxf
565 "}\" does not map to %s", ch,
566 enc->name[0]);
567 /* FIXME: Skip over the character, copy in replacement and continue
568 * but that is messy so for now just fail.
569 */
570 return &PL_sv_undef;
571 }
572 else {
573 return &PL_sv_undef;
574 }
575 }
576 else {
d0ee0454 577 /* UTF-8 is supposed to be "Universal" so should not happen
578 for real characters, but some encodings have non-assigned
579 codes which may occur.
580 */
ee981de6 581 Perl_croak(aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)",
582 enc->name[0], (U8) s[slen], code);
aa0053b7 583 }
584 break;
2f2b4ff2 585
aa0053b7 586 default:
587 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
588 code, (dir == enc->f_utf8) ? "to" : "from",
589 enc->name[0]);
590 return &PL_sv_undef;
591 }
592 }
3aececda 593 SvCUR_set(dst, dlen+ddone);
284ee456 594 SvPOK_only(dst);
aa0053b7 595 if (check) {
3aececda 596 sdone = SvCUR(src) - (slen+sdone);
597 if (sdone) {
f54fca96 598#if 1
599 /* FIXME: A Move() is dangerous - PV could be mmap'ed readonly
600 SvOOK would be ideal - but sv_backoff does not understand SvLEN == 0
601 type SVs and sv_clear() calls it ...
602 */
64ffdd5e 603 sv_setpvn(src, (char*)s+slen, sdone);
f54fca96 604#else
3aececda 605 Move(s + slen, SvPVX(src), sdone , U8);
f54fca96 606#endif
aa0053b7 607 }
3aececda 608 SvCUR_set(src, sdone);
aa0053b7 609 }
2f2b4ff2 610 }
aa0053b7 611 else {
0b3236bb 612 SvCUR_set(dst, 0);
284ee456 613 SvPOK_only(dst);
2f2b4ff2 614 }
39cf9a5e 615#if ENCODE_XS_PROFILE
616 if (SvCUR(dst) > SvCUR(src)){
a999c27c 617 Perl_warn(aTHX_
618 "SvLEN(dst)=%d, SvCUR(dst)=%d. "
619 "%d bytes unused(%f %%)\n",
620 SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst),
621 (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0);
622
39cf9a5e 623 }
a999c27c 624#endif
0b3236bb 625 *SvEND(dst) = '\0';
aa0053b7 626 return dst;
2f2b4ff2 627}
628
50d26985 629MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
2f2b4ff2 630
631PROTOTYPES: ENABLE
632
633void
0a95303c 634Method_name(obj)
635SV * obj
636CODE:
637 {
638 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
639 ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0])));
640 XSRETURN(1);
641 }
642
643void
691638dd 644Method_decode(obj,src,check = FALSE)
2f2b4ff2 645SV * obj
646SV * src
691638dd 647bool check
2f2b4ff2 648CODE:
649 {
650 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
651 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
652 SvUTF8_on(ST(0));
653 XSRETURN(1);
654 }
655
656void
691638dd 657Method_encode(obj,src,check = FALSE)
2f2b4ff2 658SV * obj
659SV * src
691638dd 660bool check
2f2b4ff2 661CODE:
662 {
663 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
664 sv_utf8_upgrade(src);
665 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
666 XSRETURN(1);
667 }
668
67e989fb 669MODULE = Encode PACKAGE = Encode
2c674647 670
671PROTOTYPES: ENABLE
672
67e989fb 673I32
2c674647 674_bytes_to_utf8(sv, ...)
67e989fb 675 SV * sv
2c674647 676 CODE:
67e989fb 677 {
678 SV * encoding = items == 2 ? ST(1) : Nullsv;
679
680 if (encoding)
681 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
682 else {
683 STRLEN len;
183a2d84 684 U8* s = (U8*)SvPV(sv, len);
67e989fb 685 U8* converted;
686
687 converted = bytes_to_utf8(s, &len); /* This allocs */
183a2d84 688 sv_setpvn(sv, (char *)converted, len);
67e989fb 689 SvUTF8_on(sv); /* XXX Should we? */
690 Safefree(converted); /* ... so free it */
691 RETVAL = len;
692 }
693 }
2c674647 694 OUTPUT:
67e989fb 695 RETVAL
2c674647 696
67e989fb 697I32
2c674647 698_utf8_to_bytes(sv, ...)
67e989fb 699 SV * sv
2c674647 700 CODE:
67e989fb 701 {
702 SV * to = items > 1 ? ST(1) : Nullsv;
703 SV * check = items > 2 ? ST(2) : Nullsv;
87714904 704
67e989fb 705 if (to)
706 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
707 else {
67e989fb 708 STRLEN len;
b113ac0e 709 U8 *s = (U8*)SvPV(sv, len);
67e989fb 710
9c5ffd7c 711 RETVAL = 0;
67e989fb 712 if (SvTRUE(check)) {
713 /* Must do things the slow way */
714 U8 *dest;
87714904 715 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
67e989fb 716 U8 *send = s + len;
717
718 New(83, dest, len, U8); /* I think */
719
720 while (s < send) {
721 if (*s < 0x80)
722 *dest++ = *s++;
723 else {
b113ac0e 724 STRLEN ulen;
725 UV uv = *s++;
87714904 726
67e989fb 727 /* Have to do it all ourselves because of error routine,
728 aargh. */
729 if (!(uv & 0x40))
730 goto failure;
731 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
732 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
733 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
734 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
735 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
736 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
737 else { ulen = 13; uv = 0; }
87714904 738
67e989fb 739 /* Note change to utf8.c variable naming, for variety */
740 while (ulen--) {
741 if ((*s & 0xc0) != 0x80)
742 goto failure;
87714904 743
67e989fb 744 else
745 uv = (uv << 6) | (*s++ & 0x3f);
87714904 746 }
67e989fb 747 if (uv > 256) {
748 failure:
749 call_failure(check, s, dest, src);
750 /* Now what happens? */
751 }
752 *dest++ = (U8)uv;
753 }
754 }
755 } else
756 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
757 }
2c674647 758 }
759 OUTPUT:
760 RETVAL
761
2c674647 762bool
4411f3b6 763is_utf8(sv, check = FALSE)
764SV * sv
765bool check
2c674647 766 CODE:
767 {
2eebba1d 768 if (SvGMAGICAL(sv)) /* it could be $1, for example */
769 sv = newSVsv(sv); /* GMAGIG will be done */
2c674647 770 if (SvPOK(sv)) {
4411f3b6 771 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
2c674647 772 if (RETVAL &&
4411f3b6 773 check &&
2c674647 774 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
775 RETVAL = FALSE;
776 } else {
777 RETVAL = FALSE;
778 }
2eebba1d 779 if (sv != ST(0))
780 SvREFCNT_dec(sv); /* it was a temp copy */
2c674647 781 }
782 OUTPUT:
783 RETVAL
784
785SV *
4411f3b6 786_utf8_on(sv)
2c674647 787 SV * sv
788 CODE:
789 {
790 if (SvPOK(sv)) {
87714904 791 SV *rsv = newSViv(SvUTF8(sv));
2c674647 792 RETVAL = rsv;
793 SvUTF8_on(sv);
794 } else {
795 RETVAL = &PL_sv_undef;
796 }
797 }
798 OUTPUT:
799 RETVAL
800
801SV *
4411f3b6 802_utf8_off(sv)
2c674647 803 SV * sv
804 CODE:
805 {
806 if (SvPOK(sv)) {
87714904 807 SV *rsv = newSViv(SvUTF8(sv));
2c674647 808 RETVAL = rsv;
809 SvUTF8_off(sv);
810 } else {
811 RETVAL = &PL_sv_undef;
812 }
813 }
814 OUTPUT:
815 RETVAL
816
33af2bc7 817BOOT:
818{
6a59c517 819#if defined(USE_PERLIO) && !defined(USE_SFIO)
a999f61b 820 PerlIO_define_layer(aTHX_ &PerlIO_encode);
33af2bc7 821#endif
e7cbefb8 822#include "def_t.exh"
33af2bc7 823}