Add test for Japanese encoding (well euc-jp anyway).
[p5sagit/p5-mst-13.2.git] / ext / Encode / Encode.xs
CommitLineData
fcf2db38 1#define PERL_NO_GET_CONTEXT
2
2c674647 3#include "EXTERN.h"
4#include "perl.h"
5#include "XSUB.h"
2f2b4ff2 6#define U8 U8
7#include "encode.h"
94b89828 8#include "8859.h"
2f2b4ff2 9#include "EBCDIC.h"
10#include "Symbols.h"
2c674647 11
fcf2db38 12
13#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
2f5768b8 14 Perl_croak(aTHX_ "panic_unimplemented"); \
4a83738a 15 return (y)0; /* fool picky compilers */ \
87714904 16 }
67e989fb 17UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
aa0053b7 18 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
9df9a5cd 19#if defined(USE_PERLIO) && !defined(USE_SFIO)
72e44f29 20/* Define an encoding "layer" in the perliol.h sense.
21 The layer defined here "inherits" in an object-oriented sense from the
22 "perlio" layer with its PerlIOBuf_* "methods".
23 The implementation is particularly efficient as until Encode settles down
24 there is no point in tryint to tune it.
25
26 The layer works by overloading the "fill" and "flush" methods.
27
28 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
29 to convert the encoded data to UTF-8 form, then copies it back to the
30 buffer. The "base class's" read methods then see the UTF-8 data.
31
32 "flush" transforms the UTF-8 data deposited by the "base class's write
33 method in the buffer back into the encoded form using the encode OO perl API,
34 then copies data back into the buffer and calls "SUPER::flush.
35
36 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
37 so that the the base class's "flush" sees the correct number of encoded chars
38 for positioning the seek pointer. (This double translation is the worst performance
39 issue - particularly with all-perl encode engine.)
40
41*/
33af2bc7 42#include "perliol.h"
aa0053b7 43typedef struct {
44 PerlIOBuf base; /* PerlIOBuf stuff */
0b3236bb 45 SV *bufsv; /* buffer seen by layers above */
46 SV *dataSV; /* data we have read from layer below */
47 SV *enc; /* the encoding object */
33af2bc7 48} PerlIOEncode;
49
e3f3bf95 50SV *
aa0053b7 51PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
e3f3bf95 52{
aa0053b7 53 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
54 SV *sv = &PL_sv_undef;
55 if (e->enc) {
56 dSP;
57 ENTER;
58 SAVETMPS;
59 PUSHMARK(sp);
60 XPUSHs(e->enc);
61 PUTBACK;
62 if (perl_call_method("name", G_SCALAR) == 1) {
63 SPAGAIN;
64 sv = newSVsv(POPs);
65 PUTBACK;
66 }
e3f3bf95 67 }
aa0053b7 68 return sv;
e3f3bf95 69}
33af2bc7 70
71IV
aa0053b7 72PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
33af2bc7 73{
aa0053b7 74 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
75 dSP;
76 IV code;
77 code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
78 ENTER;
79 SAVETMPS;
80 PUSHMARK(sp);
81 XPUSHs(arg);
82 PUTBACK;
83 if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) {
84 /* should never happen */
85 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
86 return -1;
87 }
88 SPAGAIN;
89 e->enc = POPs;
90 PUTBACK;
91 if (!SvROK(e->enc)) {
92 e->enc = Nullsv;
93 errno = EINVAL;
94 Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%" SVf "\"",
95 arg);
96 code = -1;
97 }
98 else {
99 SvREFCNT_inc(e->enc);
100 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
101 }
102 FREETMPS;
103 LEAVE;
104 return code;
33af2bc7 105}
106
107IV
aa0053b7 108PerlIOEncode_popped(pTHX_ PerlIO * f)
33af2bc7 109{
aa0053b7 110 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
111 if (e->enc) {
112 SvREFCNT_dec(e->enc);
113 e->enc = Nullsv;
114 }
115 if (e->bufsv) {
116 SvREFCNT_dec(e->bufsv);
117 e->bufsv = Nullsv;
118 }
0b3236bb 119 if (e->dataSV) {
120 SvREFCNT_dec(e->dataSV);
121 e->bufsv = Nullsv;
122 }
aa0053b7 123 return 0;
33af2bc7 124}
125
126STDCHAR *
aa0053b7 127PerlIOEncode_get_base(pTHX_ PerlIO * f)
33af2bc7 128{
aa0053b7 129 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
130 if (!e->base.bufsiz)
131 e->base.bufsiz = 1024;
132 if (!e->bufsv) {
133 e->bufsv = newSV(e->base.bufsiz);
134 sv_setpvn(e->bufsv, "", 0);
135 }
136 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
137 if (!e->base.ptr)
138 e->base.ptr = e->base.buf;
139 if (!e->base.end)
140 e->base.end = e->base.buf;
141 if (e->base.ptr < e->base.buf
142 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
143 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
144 e->base.buf + SvLEN(e->bufsv));
145 abort();
146 }
147 if (SvLEN(e->bufsv) < e->base.bufsiz) {
148 SSize_t poff = e->base.ptr - e->base.buf;
149 SSize_t eoff = e->base.end - e->base.buf;
150 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
151 e->base.ptr = e->base.buf + poff;
152 e->base.end = e->base.buf + eoff;
153 }
154 if (e->base.ptr < e->base.buf
155 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
156 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
157 e->base.buf + SvLEN(e->bufsv));
158 abort();
159 }
160 return e->base.buf;
33af2bc7 161}
162
33af2bc7 163IV
aa0053b7 164PerlIOEncode_fill(pTHX_ PerlIO * f)
33af2bc7 165{
aa0053b7 166 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
167 dSP;
0b3236bb 168 IV code = 0;
169 PerlIO *n;
170 SSize_t avail;
171 if (PerlIO_flush(f) != 0)
172 return -1;
173 n = PerlIONext(f);
174 if (!PerlIO_fast_gets(n)) {
175 /* Things get too messy if we don't have a buffer layer
176 push a :perlio to do the job */
177 char mode[8];
178 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
179 if (!n) {
180 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
181 }
182 }
183 ENTER;
184 SAVETMPS;
185 retry:
186 avail = PerlIO_get_cnt(n);
187 if (avail <= 0) {
188 avail = PerlIO_fill(n);
189 if (avail == 0) {
190 avail = PerlIO_get_cnt(n);
191 }
192 else {
193 if (!PerlIO_error(n) && PerlIO_eof(n))
194 avail = 0;
195 }
196 }
197 if (avail > 0) {
198 STDCHAR *ptr = PerlIO_get_ptr(n);
199 SSize_t use = avail;
aa0053b7 200 SV *uni;
aa0053b7 201 char *s;
0b3236bb 202 STRLEN len = 0;
203 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
204 (void) PerlIOEncode_get_base(aTHX_ f);
205 if (!e->dataSV)
206 e->dataSV = newSV(0);
207 if (SvTYPE(e->dataSV) < SVt_PV) {
208 sv_upgrade(e->dataSV,SVt_PV);
209 }
210 if (SvCUR(e->dataSV)) {
211 /* something left over from last time - create a normal
212 SV with new data appended
213 */
214 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
215 use = e->base.bufsiz - SvCUR(e->dataSV);
216 }
9dbbe389 217 sv_catpvn(e->dataSV,(char*)ptr,use);
0b3236bb 218 }
219 else {
220 /* Create a "dummy" SV to represent the available data from layer below */
221 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
222 Safefree(SvPVX(e->dataSV));
223 }
224 if (use > e->base.bufsiz) {
225 use = e->base.bufsiz;
226 }
227 SvPVX(e->dataSV) = (char *) ptr;
228 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
229 SvCUR_set(e->dataSV,use);
230 SvPOK_on(e->dataSV);
231 }
232 SvUTF8_off(e->dataSV);
aa0053b7 233 PUSHMARK(sp);
234 XPUSHs(e->enc);
0b3236bb 235 XPUSHs(e->dataSV);
aa0053b7 236 XPUSHs(&PL_sv_yes);
237 PUTBACK;
0b3236bb 238 if (perl_call_method("decode", G_SCALAR) != 1) {
239 Perl_die(aTHX_ "panic: decode did not return a value");
240 }
aa0053b7 241 SPAGAIN;
242 uni = POPs;
243 PUTBACK;
0b3236bb 244 /* Now get translated string (forced to UTF-8) and use as buffer */
245 if (SvPOK(uni)) {
246 s = SvPVutf8(uni, len);
85f7ebdf 247 if (len && !is_utf8_string((U8*)s,len)) {
0b3236bb 248 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
249 }
250 }
251 if (len > 0) {
252 /* Got _something */
253 /* if decode gave us back dataSV then data may vanish when
254 we do ptrcnt adjust - so take our copy now.
255 (The copy is a pain - need a put-it-here option for decode.)
256 */
257 sv_setpvn(e->bufsv,s,len);
9b7cf638 258 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
0b3236bb 259 e->base.end = e->base.ptr + SvCUR(e->bufsv);
260 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
261 SvUTF8_on(e->bufsv);
262
263 /* Adjust ptr/cnt not taking anything which
264 did not translate - not clear this is a win */
265 /* compute amount we took */
266 use -= SvCUR(e->dataSV);
267 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
268 /* and as we did not take it it isn't pending */
269 SvCUR_set(e->dataSV,0);
270 } else {
271 /* Got nothing - assume partial character so we need some more */
272 /* Make sure e->dataSV is a normal SV before re-filling as
273 buffer alias will change under us
274 */
275 s = SvPV(e->dataSV,len);
276 sv_setpvn(e->dataSV,s,len);
277 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
278 goto retry;
aa0053b7 279 }
aa0053b7 280 FREETMPS;
281 LEAVE;
0b3236bb 282 return code;
283 }
284 else {
285 if (avail == 0)
286 PerlIOBase(f)->flags |= PERLIO_F_EOF;
287 else
288 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
289 return -1;
72e44f29 290 }
33af2bc7 291}
292
293IV
aa0053b7 294PerlIOEncode_flush(pTHX_ PerlIO * f)
33af2bc7 295{
aa0053b7 296 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
297 IV code = 0;
0b3236bb 298 if (e->bufsv && (e->base.ptr > e->base.buf)) {
aa0053b7 299 dSP;
300 SV *str;
301 char *s;
302 STRLEN len;
0b3236bb 303 SSize_t count = 0;
304 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
305 /* Write case encode the buffer and write() to layer below */
306 ENTER;
307 SAVETMPS;
308 PUSHMARK(sp);
309 XPUSHs(e->enc);
310 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
311 SvUTF8_on(e->bufsv);
0b3236bb 312 XPUSHs(e->bufsv);
313 XPUSHs(&PL_sv_yes);
314 PUTBACK;
8bbd9480 315 if (perl_call_method("encode", G_SCALAR) != 1) {
316 Perl_die(aTHX_ "panic: encode did not return a value");
317 }
0b3236bb 318 SPAGAIN;
319 str = POPs;
320 PUTBACK;
321 s = SvPV(str, len);
322 count = PerlIO_write(PerlIONext(f),s,len);
323 if (count != len) {
324 code = -1;
325 }
326 FREETMPS;
327 LEAVE;
328 if (PerlIO_flush(PerlIONext(f)) != 0) {
329 code = -1;
330 }
aa0053b7 331 }
0b3236bb 332 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
333 /* read case */
334 /* if we have any untranslated stuff then unread that first */
335 if (e->dataSV && SvCUR(e->dataSV)) {
336 s = SvPV(e->dataSV, len);
337 count = PerlIO_unread(PerlIONext(f),s,len);
338 if (count != len) {
339 code = -1;
340 }
341 }
342 /* See if there is anything left in the buffer */
343 if (e->base.ptr < e->base.end) {
344 /* Bother - have unread data.
345 re-encode and unread() to layer below
346 */
347 ENTER;
348 SAVETMPS;
349 str = sv_newmortal();
350 sv_upgrade(str, SVt_PV);
9dbbe389 351 SvPVX(str) = (char*)e->base.ptr;
0b3236bb 352 SvLEN(str) = 0;
353 SvCUR_set(str, e->base.end - e->base.ptr);
354 SvUTF8_on(str);
355 PUSHMARK(sp);
356 XPUSHs(e->enc);
357 XPUSHs(str);
358 XPUSHs(&PL_sv_yes);
359 PUTBACK;
8bbd9480 360 if (perl_call_method("encode", G_SCALAR) != 1) {
361 Perl_die(aTHX_ "panic: encode did not return a value");
362 }
0b3236bb 363 SPAGAIN;
364 str = POPs;
365 PUTBACK;
366 s = SvPV(str, len);
367 count = PerlIO_unread(PerlIONext(f),s,len);
368 if (count != len) {
369 code = -1;
370 }
371 FREETMPS;
372 LEAVE;
373 }
aa0053b7 374 }
0b3236bb 375 e->base.ptr = e->base.end = e->base.buf;
376 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
72e44f29 377 }
aa0053b7 378 return code;
33af2bc7 379}
380
381IV
aa0053b7 382PerlIOEncode_close(pTHX_ PerlIO * f)
33af2bc7 383{
aa0053b7 384 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
385 IV code = PerlIOBase_close(aTHX_ f);
386 if (e->bufsv) {
387 SvREFCNT_dec(e->bufsv);
388 e->bufsv = Nullsv;
389 }
390 e->base.buf = NULL;
391 e->base.ptr = NULL;
392 e->base.end = NULL;
393 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
394 return code;
33af2bc7 395}
396
72e44f29 397Off_t
aa0053b7 398PerlIOEncode_tell(pTHX_ PerlIO * f)
72e44f29 399{
aa0053b7 400 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
0b3236bb 401 /* Unfortunately the only way to get a postion is to (re-)translate,
402 the UTF8 we have in bufefr and then ask layer below
aa0053b7 403 */
0b3236bb 404 PerlIO_flush(f);
405 return PerlIO_tell(PerlIONext(f));
72e44f29 406}
407
8cf8f3d1 408PerlIO *
aa0053b7 409PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
410 CLONE_PARAMS * params, int flags)
8cf8f3d1 411{
aa0053b7 412 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
413 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
414 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
415 if (oe->enc) {
416 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
417 }
9f16d962 418 }
aa0053b7 419 return f;
8cf8f3d1 420}
421
33af2bc7 422PerlIO_funcs PerlIO_encode = {
aa0053b7 423 "encoding",
424 sizeof(PerlIOEncode),
425 PERLIO_K_BUFFERED,
426 PerlIOEncode_pushed,
427 PerlIOEncode_popped,
428 PerlIOBuf_open,
429 PerlIOEncode_getarg,
430 PerlIOBase_fileno,
431 PerlIOEncode_dup,
432 PerlIOBuf_read,
433 PerlIOBuf_unread,
434 PerlIOBuf_write,
435 PerlIOBuf_seek,
436 PerlIOEncode_tell,
437 PerlIOEncode_close,
438 PerlIOEncode_flush,
439 PerlIOEncode_fill,
440 PerlIOBase_eof,
441 PerlIOBase_error,
442 PerlIOBase_clearerr,
443 PerlIOBase_setlinebuf,
444 PerlIOEncode_get_base,
445 PerlIOBuf_bufsiz,
446 PerlIOBuf_get_ptr,
447 PerlIOBuf_get_cnt,
448 PerlIOBuf_set_ptrcnt,
33af2bc7 449};
aa0053b7 450#endif /* encode layer */
33af2bc7 451
2f2b4ff2 452void
aa0053b7 453Encode_XSEncoding(pTHX_ encode_t * enc)
2f2b4ff2 454{
aa0053b7 455 dSP;
456 HV *stash = gv_stashpv("Encode::XS", TRUE);
457 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash);
458 int i = 0;
459 PUSHMARK(sp);
460 XPUSHs(sv);
461 while (enc->name[i]) {
462 const char *name = enc->name[i++];
463 XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
464 }
465 PUTBACK;
466 call_pv("Encode::define_encoding", G_DISCARD);
467 SvREFCNT_dec(sv);
2f2b4ff2 468}
469
aa0053b7 470void
471call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
472{
473}
67e989fb 474
2f2b4ff2 475static SV *
aa0053b7 476encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
477 int check)
2f2b4ff2 478{
aa0053b7 479 STRLEN slen;
480 U8 *s = (U8 *) SvPV(src, slen);
3aececda 481 STRLEN tlen = slen;
482 STRLEN ddone = 0;
483 STRLEN sdone = 0;
0b3236bb 484 SV *dst = sv_2mortal(newSV(slen+1));
aa0053b7 485 if (slen) {
0b3236bb 486 U8 *d = (U8 *) SvPVX(dst);
487 STRLEN dlen = SvLEN(dst)-1;
aa0053b7 488 int code;
489 while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) {
3aececda 490 SvCUR_set(dst, dlen+ddone);
aa0053b7 491 SvPOK_on(dst);
9b37254d 492
0b3236bb 493#if 0
3aececda 494 Perl_warn(aTHX_ "code=%d @ s=%d/%d/%d d=%d/%d/%d",code,slen,sdone,tlen,dlen,ddone,SvLEN(dst)-1);
0b3236bb 495#endif
496
497 if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL)
aa0053b7 498 break;
9b37254d 499
aa0053b7 500 switch (code) {
501 case ENCODE_NOSPACE:
502 {
0b3236bb 503 STRLEN need ;
3aececda 504 sdone += slen;
505 ddone += dlen;
506 if (sdone) {
507 need = (tlen*SvLEN(dst)+sdone-1)/sdone+UTF8_MAXLEN;
0b3236bb 508 }
509 else {
3aececda 510 need = SvLEN(dst) + UTF8_MAXLEN;
0b3236bb 511 }
512
aa0053b7 513 d = (U8 *) SvGROW(dst, need);
3aececda 514 if (ddone >= SvLEN(dst)) {
515 Perl_croak(aTHX_ "Destination couldn't be grown.");
aa0053b7 516 }
3aececda 517 dlen = SvLEN(dst)-ddone-1;
518 d += ddone;
519 s += slen;
520 slen = tlen-sdone;
521 continue;
aa0053b7 522 }
2f2b4ff2 523
aa0053b7 524 case ENCODE_NOREP:
525 if (dir == enc->f_utf8) {
526 if (!check && ckWARN_d(WARN_UTF8)) {
527 STRLEN clen;
528 UV ch =
529 utf8n_to_uvuni(s + slen, (SvCUR(src) - slen),
530 &clen, 0);
531 Perl_warner(aTHX_ WARN_UTF8,
532 "\"\\N{U+%" UVxf
533 "}\" does not map to %s", ch,
534 enc->name[0]);
535 /* FIXME: Skip over the character, copy in replacement and continue
536 * but that is messy so for now just fail.
537 */
538 return &PL_sv_undef;
539 }
540 else {
541 return &PL_sv_undef;
542 }
543 }
544 else {
545 /* UTF-8 is supposed to be "Universal" so should not happen */
546 Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
547 enc->name[0], (int) (SvCUR(src) - slen),
548 s + slen);
549 }
550 break;
2f2b4ff2 551
aa0053b7 552 default:
553 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
554 code, (dir == enc->f_utf8) ? "to" : "from",
555 enc->name[0]);
556 return &PL_sv_undef;
557 }
558 }
3aececda 559 SvCUR_set(dst, dlen+ddone);
aa0053b7 560 SvPOK_on(dst);
561 if (check) {
3aececda 562 sdone = SvCUR(src) - (slen+sdone);
563 if (sdone) {
564 Move(s + slen, SvPVX(src), sdone , U8);
aa0053b7 565 }
3aececda 566 SvCUR_set(src, sdone);
aa0053b7 567 }
2f2b4ff2 568 }
aa0053b7 569 else {
0b3236bb 570 SvCUR_set(dst, 0);
aa0053b7 571 SvPOK_on(dst);
2f2b4ff2 572 }
0b3236bb 573 *SvEND(dst) = '\0';
aa0053b7 574 return dst;
2f2b4ff2 575}
576
50d26985 577MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
2f2b4ff2 578
579PROTOTYPES: ENABLE
580
581void
0a95303c 582Method_name(obj)
583SV * obj
584CODE:
585 {
586 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
587 ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0])));
588 XSRETURN(1);
589 }
590
591void
691638dd 592Method_decode(obj,src,check = FALSE)
2f2b4ff2 593SV * obj
594SV * src
691638dd 595bool check
2f2b4ff2 596CODE:
597 {
598 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
599 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
600 SvUTF8_on(ST(0));
601 XSRETURN(1);
602 }
603
604void
691638dd 605Method_encode(obj,src,check = FALSE)
2f2b4ff2 606SV * obj
607SV * src
691638dd 608bool check
2f2b4ff2 609CODE:
610 {
611 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
612 sv_utf8_upgrade(src);
613 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
614 XSRETURN(1);
615 }
616
67e989fb 617MODULE = Encode PACKAGE = Encode
2c674647 618
619PROTOTYPES: ENABLE
620
67e989fb 621I32
2c674647 622_bytes_to_utf8(sv, ...)
67e989fb 623 SV * sv
2c674647 624 CODE:
67e989fb 625 {
626 SV * encoding = items == 2 ? ST(1) : Nullsv;
627
628 if (encoding)
629 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
630 else {
631 STRLEN len;
183a2d84 632 U8* s = (U8*)SvPV(sv, len);
67e989fb 633 U8* converted;
634
635 converted = bytes_to_utf8(s, &len); /* This allocs */
183a2d84 636 sv_setpvn(sv, (char *)converted, len);
67e989fb 637 SvUTF8_on(sv); /* XXX Should we? */
638 Safefree(converted); /* ... so free it */
639 RETVAL = len;
640 }
641 }
2c674647 642 OUTPUT:
67e989fb 643 RETVAL
2c674647 644
67e989fb 645I32
2c674647 646_utf8_to_bytes(sv, ...)
67e989fb 647 SV * sv
2c674647 648 CODE:
67e989fb 649 {
650 SV * to = items > 1 ? ST(1) : Nullsv;
651 SV * check = items > 2 ? ST(2) : Nullsv;
87714904 652
67e989fb 653 if (to)
654 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
655 else {
67e989fb 656 STRLEN len;
b113ac0e 657 U8 *s = (U8*)SvPV(sv, len);
67e989fb 658
9c5ffd7c 659 RETVAL = 0;
67e989fb 660 if (SvTRUE(check)) {
661 /* Must do things the slow way */
662 U8 *dest;
87714904 663 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
67e989fb 664 U8 *send = s + len;
665
666 New(83, dest, len, U8); /* I think */
667
668 while (s < send) {
669 if (*s < 0x80)
670 *dest++ = *s++;
671 else {
b113ac0e 672 STRLEN ulen;
673 UV uv = *s++;
87714904 674
67e989fb 675 /* Have to do it all ourselves because of error routine,
676 aargh. */
677 if (!(uv & 0x40))
678 goto failure;
679 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
680 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
681 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
682 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
683 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
684 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
685 else { ulen = 13; uv = 0; }
87714904 686
67e989fb 687 /* Note change to utf8.c variable naming, for variety */
688 while (ulen--) {
689 if ((*s & 0xc0) != 0x80)
690 goto failure;
87714904 691
67e989fb 692 else
693 uv = (uv << 6) | (*s++ & 0x3f);
87714904 694 }
67e989fb 695 if (uv > 256) {
696 failure:
697 call_failure(check, s, dest, src);
698 /* Now what happens? */
699 }
700 *dest++ = (U8)uv;
701 }
702 }
703 } else
704 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
705 }
2c674647 706 }
707 OUTPUT:
708 RETVAL
709
2c674647 710bool
4411f3b6 711is_utf8(sv, check = FALSE)
712SV * sv
713bool check
2c674647 714 CODE:
715 {
2eebba1d 716 if (SvGMAGICAL(sv)) /* it could be $1, for example */
717 sv = newSVsv(sv); /* GMAGIG will be done */
2c674647 718 if (SvPOK(sv)) {
4411f3b6 719 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
2c674647 720 if (RETVAL &&
4411f3b6 721 check &&
2c674647 722 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
723 RETVAL = FALSE;
724 } else {
725 RETVAL = FALSE;
726 }
2eebba1d 727 if (sv != ST(0))
728 SvREFCNT_dec(sv); /* it was a temp copy */
2c674647 729 }
730 OUTPUT:
731 RETVAL
732
733SV *
4411f3b6 734_utf8_on(sv)
2c674647 735 SV * sv
736 CODE:
737 {
738 if (SvPOK(sv)) {
87714904 739 SV *rsv = newSViv(SvUTF8(sv));
2c674647 740 RETVAL = rsv;
741 SvUTF8_on(sv);
742 } else {
743 RETVAL = &PL_sv_undef;
744 }
745 }
746 OUTPUT:
747 RETVAL
748
749SV *
4411f3b6 750_utf8_off(sv)
2c674647 751 SV * sv
752 CODE:
753 {
754 if (SvPOK(sv)) {
87714904 755 SV *rsv = newSViv(SvUTF8(sv));
2c674647 756 RETVAL = rsv;
757 SvUTF8_off(sv);
758 } else {
759 RETVAL = &PL_sv_undef;
760 }
761 }
762 OUTPUT:
763 RETVAL
764
33af2bc7 765BOOT:
766{
6a59c517 767#if defined(USE_PERLIO) && !defined(USE_SFIO)
a999f61b 768 PerlIO_define_layer(aTHX_ &PerlIO_encode);
33af2bc7 769#endif
023d8852 770#include "8859_def.h"
771#include "EBCDIC_def.h"
772#include "Symbols_def.h"
33af2bc7 773}