Integrate perlio:
[p5sagit/p5-mst-13.2.git] / ext / Encode / Encode.xs
CommitLineData
fcf2db38 1#define PERL_NO_GET_CONTEXT
2
2c674647 3#include "EXTERN.h"
4#include "perl.h"
5#include "XSUB.h"
2f2b4ff2 6#define U8 U8
7#include "encode.h"
5129552c 8/* #include "8859.h" */
9/* #include "EBCDIC.h" */
10/* #include "Symbols.h" */
071db25d 11#include "def_t.h"
fcf2db38 12
13#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
2f5768b8 14 Perl_croak(aTHX_ "panic_unimplemented"); \
4a83738a 15 return (y)0; /* fool picky compilers */ \
87714904 16 }
67e989fb 17UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
aa0053b7 18 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
9df9a5cd 19#if defined(USE_PERLIO) && !defined(USE_SFIO)
72e44f29 20/* Define an encoding "layer" in the perliol.h sense.
21 The layer defined here "inherits" in an object-oriented sense from the
22 "perlio" layer with its PerlIOBuf_* "methods".
23 The implementation is particularly efficient as until Encode settles down
24 there is no point in tryint to tune it.
25
26 The layer works by overloading the "fill" and "flush" methods.
27
28 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
29 to convert the encoded data to UTF-8 form, then copies it back to the
30 buffer. The "base class's" read methods then see the UTF-8 data.
31
32 "flush" transforms the UTF-8 data deposited by the "base class's write
33 method in the buffer back into the encoded form using the encode OO perl API,
34 then copies data back into the buffer and calls "SUPER::flush.
35
36 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
37 so that the the base class's "flush" sees the correct number of encoded chars
38 for positioning the seek pointer. (This double translation is the worst performance
39 issue - particularly with all-perl encode engine.)
40
41*/
33af2bc7 42#include "perliol.h"
aa0053b7 43typedef struct {
44 PerlIOBuf base; /* PerlIOBuf stuff */
0b3236bb 45 SV *bufsv; /* buffer seen by layers above */
46 SV *dataSV; /* data we have read from layer below */
47 SV *enc; /* the encoding object */
33af2bc7 48} PerlIOEncode;
49
e3f3bf95 50SV *
aa0053b7 51PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
e3f3bf95 52{
aa0053b7 53 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
54 SV *sv = &PL_sv_undef;
55 if (e->enc) {
56 dSP;
57 ENTER;
58 SAVETMPS;
59 PUSHMARK(sp);
60 XPUSHs(e->enc);
61 PUTBACK;
62 if (perl_call_method("name", G_SCALAR) == 1) {
63 SPAGAIN;
64 sv = newSVsv(POPs);
65 PUTBACK;
66 }
e3f3bf95 67 }
aa0053b7 68 return sv;
e3f3bf95 69}
33af2bc7 70
71IV
aa0053b7 72PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
33af2bc7 73{
aa0053b7 74 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
75 dSP;
76 IV code;
77 code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
78 ENTER;
79 SAVETMPS;
80 PUSHMARK(sp);
81 XPUSHs(arg);
82 PUTBACK;
83 if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) {
84 /* should never happen */
85 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
86 return -1;
87 }
88 SPAGAIN;
89 e->enc = POPs;
90 PUTBACK;
91 if (!SvROK(e->enc)) {
92 e->enc = Nullsv;
93 errno = EINVAL;
f98bc0c6 94 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
aa0053b7 95 arg);
96 code = -1;
97 }
98 else {
99 SvREFCNT_inc(e->enc);
100 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
101 }
102 FREETMPS;
103 LEAVE;
104 return code;
33af2bc7 105}
106
107IV
aa0053b7 108PerlIOEncode_popped(pTHX_ PerlIO * f)
33af2bc7 109{
aa0053b7 110 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
111 if (e->enc) {
112 SvREFCNT_dec(e->enc);
113 e->enc = Nullsv;
114 }
115 if (e->bufsv) {
116 SvREFCNT_dec(e->bufsv);
117 e->bufsv = Nullsv;
118 }
0b3236bb 119 if (e->dataSV) {
120 SvREFCNT_dec(e->dataSV);
284ee456 121 e->dataSV = Nullsv;
0b3236bb 122 }
aa0053b7 123 return 0;
33af2bc7 124}
125
126STDCHAR *
aa0053b7 127PerlIOEncode_get_base(pTHX_ PerlIO * f)
33af2bc7 128{
aa0053b7 129 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
130 if (!e->base.bufsiz)
131 e->base.bufsiz = 1024;
132 if (!e->bufsv) {
133 e->bufsv = newSV(e->base.bufsiz);
134 sv_setpvn(e->bufsv, "", 0);
135 }
136 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
137 if (!e->base.ptr)
138 e->base.ptr = e->base.buf;
139 if (!e->base.end)
140 e->base.end = e->base.buf;
141 if (e->base.ptr < e->base.buf
142 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
143 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
144 e->base.buf + SvLEN(e->bufsv));
145 abort();
146 }
147 if (SvLEN(e->bufsv) < e->base.bufsiz) {
148 SSize_t poff = e->base.ptr - e->base.buf;
149 SSize_t eoff = e->base.end - e->base.buf;
150 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
151 e->base.ptr = e->base.buf + poff;
152 e->base.end = e->base.buf + eoff;
153 }
154 if (e->base.ptr < e->base.buf
155 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
156 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
157 e->base.buf + SvLEN(e->bufsv));
158 abort();
159 }
160 return e->base.buf;
33af2bc7 161}
162
33af2bc7 163IV
aa0053b7 164PerlIOEncode_fill(pTHX_ PerlIO * f)
33af2bc7 165{
aa0053b7 166 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
167 dSP;
0b3236bb 168 IV code = 0;
169 PerlIO *n;
170 SSize_t avail;
171 if (PerlIO_flush(f) != 0)
172 return -1;
173 n = PerlIONext(f);
174 if (!PerlIO_fast_gets(n)) {
175 /* Things get too messy if we don't have a buffer layer
176 push a :perlio to do the job */
177 char mode[8];
178 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
179 if (!n) {
180 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
181 }
182 }
183 ENTER;
184 SAVETMPS;
185 retry:
186 avail = PerlIO_get_cnt(n);
187 if (avail <= 0) {
188 avail = PerlIO_fill(n);
189 if (avail == 0) {
190 avail = PerlIO_get_cnt(n);
191 }
192 else {
193 if (!PerlIO_error(n) && PerlIO_eof(n))
194 avail = 0;
195 }
196 }
197 if (avail > 0) {
198 STDCHAR *ptr = PerlIO_get_ptr(n);
199 SSize_t use = avail;
aa0053b7 200 SV *uni;
aa0053b7 201 char *s;
0b3236bb 202 STRLEN len = 0;
203 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
204 (void) PerlIOEncode_get_base(aTHX_ f);
205 if (!e->dataSV)
206 e->dataSV = newSV(0);
207 if (SvTYPE(e->dataSV) < SVt_PV) {
208 sv_upgrade(e->dataSV,SVt_PV);
209 }
210 if (SvCUR(e->dataSV)) {
211 /* something left over from last time - create a normal
212 SV with new data appended
213 */
214 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
215 use = e->base.bufsiz - SvCUR(e->dataSV);
216 }
9dbbe389 217 sv_catpvn(e->dataSV,(char*)ptr,use);
0b3236bb 218 }
219 else {
220 /* Create a "dummy" SV to represent the available data from layer below */
221 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
222 Safefree(SvPVX(e->dataSV));
223 }
224 if (use > e->base.bufsiz) {
225 use = e->base.bufsiz;
226 }
227 SvPVX(e->dataSV) = (char *) ptr;
228 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
229 SvCUR_set(e->dataSV,use);
284ee456 230 SvPOK_only(e->dataSV);
0b3236bb 231 }
232 SvUTF8_off(e->dataSV);
aa0053b7 233 PUSHMARK(sp);
234 XPUSHs(e->enc);
0b3236bb 235 XPUSHs(e->dataSV);
aa0053b7 236 XPUSHs(&PL_sv_yes);
237 PUTBACK;
0b3236bb 238 if (perl_call_method("decode", G_SCALAR) != 1) {
239 Perl_die(aTHX_ "panic: decode did not return a value");
240 }
aa0053b7 241 SPAGAIN;
242 uni = POPs;
243 PUTBACK;
0b3236bb 244 /* Now get translated string (forced to UTF-8) and use as buffer */
245 if (SvPOK(uni)) {
246 s = SvPVutf8(uni, len);
85f7ebdf 247 if (len && !is_utf8_string((U8*)s,len)) {
0b3236bb 248 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
249 }
250 }
251 if (len > 0) {
252 /* Got _something */
253 /* if decode gave us back dataSV then data may vanish when
254 we do ptrcnt adjust - so take our copy now.
255 (The copy is a pain - need a put-it-here option for decode.)
256 */
257 sv_setpvn(e->bufsv,s,len);
284ee456 258 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
0b3236bb 259 e->base.end = e->base.ptr + SvCUR(e->bufsv);
260 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
261 SvUTF8_on(e->bufsv);
262
263 /* Adjust ptr/cnt not taking anything which
264 did not translate - not clear this is a win */
265 /* compute amount we took */
266 use -= SvCUR(e->dataSV);
267 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
268 /* and as we did not take it it isn't pending */
269 SvCUR_set(e->dataSV,0);
270 } else {
271 /* Got nothing - assume partial character so we need some more */
272 /* Make sure e->dataSV is a normal SV before re-filling as
273 buffer alias will change under us
274 */
275 s = SvPV(e->dataSV,len);
276 sv_setpvn(e->dataSV,s,len);
277 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
278 goto retry;
aa0053b7 279 }
aa0053b7 280 FREETMPS;
281 LEAVE;
0b3236bb 282 return code;
283 }
284 else {
285 if (avail == 0)
286 PerlIOBase(f)->flags |= PERLIO_F_EOF;
287 else
288 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
289 return -1;
72e44f29 290 }
33af2bc7 291}
292
293IV
aa0053b7 294PerlIOEncode_flush(pTHX_ PerlIO * f)
33af2bc7 295{
aa0053b7 296 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
297 IV code = 0;
0b3236bb 298 if (e->bufsv && (e->base.ptr > e->base.buf)) {
aa0053b7 299 dSP;
300 SV *str;
301 char *s;
302 STRLEN len;
0b3236bb 303 SSize_t count = 0;
304 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
305 /* Write case encode the buffer and write() to layer below */
306 ENTER;
307 SAVETMPS;
308 PUSHMARK(sp);
309 XPUSHs(e->enc);
310 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
311 SvUTF8_on(e->bufsv);
0b3236bb 312 XPUSHs(e->bufsv);
313 XPUSHs(&PL_sv_yes);
314 PUTBACK;
8bbd9480 315 if (perl_call_method("encode", G_SCALAR) != 1) {
316 Perl_die(aTHX_ "panic: encode did not return a value");
317 }
0b3236bb 318 SPAGAIN;
319 str = POPs;
320 PUTBACK;
321 s = SvPV(str, len);
322 count = PerlIO_write(PerlIONext(f),s,len);
323 if (count != len) {
324 code = -1;
325 }
326 FREETMPS;
327 LEAVE;
328 if (PerlIO_flush(PerlIONext(f)) != 0) {
329 code = -1;
330 }
25f7d9d3 331 if (SvCUR(e->bufsv)) {
332 /* Did not all translate */
333 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
334 return code;
335 }
aa0053b7 336 }
0b3236bb 337 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
338 /* read case */
339 /* if we have any untranslated stuff then unread that first */
340 if (e->dataSV && SvCUR(e->dataSV)) {
341 s = SvPV(e->dataSV, len);
342 count = PerlIO_unread(PerlIONext(f),s,len);
343 if (count != len) {
344 code = -1;
345 }
346 }
347 /* See if there is anything left in the buffer */
348 if (e->base.ptr < e->base.end) {
349 /* Bother - have unread data.
350 re-encode and unread() to layer below
351 */
352 ENTER;
353 SAVETMPS;
354 str = sv_newmortal();
355 sv_upgrade(str, SVt_PV);
9dbbe389 356 SvPVX(str) = (char*)e->base.ptr;
0b3236bb 357 SvLEN(str) = 0;
358 SvCUR_set(str, e->base.end - e->base.ptr);
284ee456 359 SvPOK_only(str);
0b3236bb 360 SvUTF8_on(str);
361 PUSHMARK(sp);
362 XPUSHs(e->enc);
363 XPUSHs(str);
364 XPUSHs(&PL_sv_yes);
365 PUTBACK;
8bbd9480 366 if (perl_call_method("encode", G_SCALAR) != 1) {
367 Perl_die(aTHX_ "panic: encode did not return a value");
368 }
0b3236bb 369 SPAGAIN;
370 str = POPs;
371 PUTBACK;
372 s = SvPV(str, len);
373 count = PerlIO_unread(PerlIONext(f),s,len);
374 if (count != len) {
375 code = -1;
376 }
377 FREETMPS;
378 LEAVE;
379 }
aa0053b7 380 }
0b3236bb 381 e->base.ptr = e->base.end = e->base.buf;
382 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
72e44f29 383 }
aa0053b7 384 return code;
33af2bc7 385}
386
387IV
aa0053b7 388PerlIOEncode_close(pTHX_ PerlIO * f)
33af2bc7 389{
aa0053b7 390 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
391 IV code = PerlIOBase_close(aTHX_ f);
392 if (e->bufsv) {
25f7d9d3 393 if (e->base.buf && e->base.ptr > e->base.buf) {
394 Perl_croak(aTHX_ "Close with partial character");
395 }
aa0053b7 396 SvREFCNT_dec(e->bufsv);
397 e->bufsv = Nullsv;
398 }
399 e->base.buf = NULL;
400 e->base.ptr = NULL;
401 e->base.end = NULL;
402 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
403 return code;
33af2bc7 404}
405
72e44f29 406Off_t
aa0053b7 407PerlIOEncode_tell(pTHX_ PerlIO * f)
72e44f29 408{
aa0053b7 409 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
0b3236bb 410 /* Unfortunately the only way to get a postion is to (re-)translate,
411 the UTF8 we have in bufefr and then ask layer below
aa0053b7 412 */
0b3236bb 413 PerlIO_flush(f);
25f7d9d3 414 if (b->buf && b->ptr > b->buf) {
415 Perl_croak(aTHX_ "Cannot tell at partial character");
416 }
0b3236bb 417 return PerlIO_tell(PerlIONext(f));
72e44f29 418}
419
8cf8f3d1 420PerlIO *
aa0053b7 421PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
422 CLONE_PARAMS * params, int flags)
8cf8f3d1 423{
aa0053b7 424 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
425 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
426 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
427 if (oe->enc) {
428 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
429 }
9f16d962 430 }
aa0053b7 431 return f;
8cf8f3d1 432}
433
33af2bc7 434PerlIO_funcs PerlIO_encode = {
aa0053b7 435 "encoding",
436 sizeof(PerlIOEncode),
284ee456 437 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
aa0053b7 438 PerlIOEncode_pushed,
439 PerlIOEncode_popped,
440 PerlIOBuf_open,
441 PerlIOEncode_getarg,
442 PerlIOBase_fileno,
443 PerlIOEncode_dup,
444 PerlIOBuf_read,
445 PerlIOBuf_unread,
446 PerlIOBuf_write,
447 PerlIOBuf_seek,
448 PerlIOEncode_tell,
449 PerlIOEncode_close,
450 PerlIOEncode_flush,
451 PerlIOEncode_fill,
452 PerlIOBase_eof,
453 PerlIOBase_error,
454 PerlIOBase_clearerr,
455 PerlIOBase_setlinebuf,
456 PerlIOEncode_get_base,
457 PerlIOBuf_bufsiz,
458 PerlIOBuf_get_ptr,
459 PerlIOBuf_get_cnt,
460 PerlIOBuf_set_ptrcnt,
33af2bc7 461};
aa0053b7 462#endif /* encode layer */
33af2bc7 463
2f2b4ff2 464void
aa0053b7 465Encode_XSEncoding(pTHX_ encode_t * enc)
2f2b4ff2 466{
aa0053b7 467 dSP;
468 HV *stash = gv_stashpv("Encode::XS", TRUE);
469 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash);
470 int i = 0;
471 PUSHMARK(sp);
472 XPUSHs(sv);
473 while (enc->name[i]) {
474 const char *name = enc->name[i++];
475 XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
476 }
477 PUTBACK;
478 call_pv("Encode::define_encoding", G_DISCARD);
479 SvREFCNT_dec(sv);
2f2b4ff2 480}
481
aa0053b7 482void
483call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
484{
25f7d9d3 485 /* Exists for breakpointing */
aa0053b7 486}
67e989fb 487
2f2b4ff2 488static SV *
aa0053b7 489encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
490 int check)
2f2b4ff2 491{
aa0053b7 492 STRLEN slen;
493 U8 *s = (U8 *) SvPV(src, slen);
3aececda 494 STRLEN tlen = slen;
495 STRLEN ddone = 0;
496 STRLEN sdone = 0;
0b3236bb 497 SV *dst = sv_2mortal(newSV(slen+1));
aa0053b7 498 if (slen) {
0b3236bb 499 U8 *d = (U8 *) SvPVX(dst);
500 STRLEN dlen = SvLEN(dst)-1;
aa0053b7 501 int code;
502 while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) {
3aececda 503 SvCUR_set(dst, dlen+ddone);
284ee456 504 SvPOK_only(dst);
9b37254d 505
0b3236bb 506#if 0
3aececda 507 Perl_warn(aTHX_ "code=%d @ s=%d/%d/%d d=%d/%d/%d",code,slen,sdone,tlen,dlen,ddone,SvLEN(dst)-1);
0b3236bb 508#endif
509
510 if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL)
aa0053b7 511 break;
9b37254d 512
aa0053b7 513 switch (code) {
514 case ENCODE_NOSPACE:
515 {
0b3236bb 516 STRLEN need ;
3aececda 517 sdone += slen;
518 ddone += dlen;
519 if (sdone) {
c98ca92f 520 need = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone+UTF8_MAXLEN;
521#if 0
522 Perl_warn(aTHX_ "Done %d/%d had %d need %d",
523 sdone, tlen, SvLEN(dst), need);
524#endif
0b3236bb 525 }
526 else {
3aececda 527 need = SvLEN(dst) + UTF8_MAXLEN;
0b3236bb 528 }
529
aa0053b7 530 d = (U8 *) SvGROW(dst, need);
3aececda 531 if (ddone >= SvLEN(dst)) {
532 Perl_croak(aTHX_ "Destination couldn't be grown.");
aa0053b7 533 }
3aececda 534 dlen = SvLEN(dst)-ddone-1;
535 d += ddone;
536 s += slen;
537 slen = tlen-sdone;
538 continue;
aa0053b7 539 }
2f2b4ff2 540
aa0053b7 541 case ENCODE_NOREP:
542 if (dir == enc->f_utf8) {
543 if (!check && ckWARN_d(WARN_UTF8)) {
544 STRLEN clen;
545 UV ch =
546 utf8n_to_uvuni(s + slen, (SvCUR(src) - slen),
547 &clen, 0);
f98bc0c6 548 Perl_warner(aTHX_ packWARN(WARN_UTF8),
aa0053b7 549 "\"\\N{U+%" UVxf
550 "}\" does not map to %s", ch,
551 enc->name[0]);
552 /* FIXME: Skip over the character, copy in replacement and continue
553 * but that is messy so for now just fail.
554 */
555 return &PL_sv_undef;
556 }
557 else {
558 return &PL_sv_undef;
559 }
560 }
561 else {
d0ee0454 562 /* UTF-8 is supposed to be "Universal" so should not happen
563 for real characters, but some encodings have non-assigned
564 codes which may occur.
565 */
ee981de6 566 Perl_croak(aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)",
567 enc->name[0], (U8) s[slen], code);
aa0053b7 568 }
569 break;
2f2b4ff2 570
aa0053b7 571 default:
572 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
573 code, (dir == enc->f_utf8) ? "to" : "from",
574 enc->name[0]);
575 return &PL_sv_undef;
576 }
577 }
3aececda 578 SvCUR_set(dst, dlen+ddone);
284ee456 579 SvPOK_only(dst);
aa0053b7 580 if (check) {
3aececda 581 sdone = SvCUR(src) - (slen+sdone);
582 if (sdone) {
f54fca96 583#if 1
584 /* FIXME: A Move() is dangerous - PV could be mmap'ed readonly
585 SvOOK would be ideal - but sv_backoff does not understand SvLEN == 0
586 type SVs and sv_clear() calls it ...
587 */
64ffdd5e 588 sv_setpvn(src, (char*)s+slen, sdone);
f54fca96 589#else
3aececda 590 Move(s + slen, SvPVX(src), sdone , U8);
f54fca96 591#endif
aa0053b7 592 }
3aececda 593 SvCUR_set(src, sdone);
aa0053b7 594 }
2f2b4ff2 595 }
aa0053b7 596 else {
0b3236bb 597 SvCUR_set(dst, 0);
284ee456 598 SvPOK_only(dst);
2f2b4ff2 599 }
0b3236bb 600 *SvEND(dst) = '\0';
aa0053b7 601 return dst;
2f2b4ff2 602}
603
50d26985 604MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
2f2b4ff2 605
606PROTOTYPES: ENABLE
607
608void
0a95303c 609Method_name(obj)
610SV * obj
611CODE:
612 {
613 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
614 ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0])));
615 XSRETURN(1);
616 }
617
618void
691638dd 619Method_decode(obj,src,check = FALSE)
2f2b4ff2 620SV * obj
621SV * src
691638dd 622bool check
2f2b4ff2 623CODE:
624 {
625 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
626 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
627 SvUTF8_on(ST(0));
628 XSRETURN(1);
629 }
630
631void
691638dd 632Method_encode(obj,src,check = FALSE)
2f2b4ff2 633SV * obj
634SV * src
691638dd 635bool check
2f2b4ff2 636CODE:
637 {
638 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
639 sv_utf8_upgrade(src);
640 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
641 XSRETURN(1);
642 }
643
67e989fb 644MODULE = Encode PACKAGE = Encode
2c674647 645
646PROTOTYPES: ENABLE
647
67e989fb 648I32
2c674647 649_bytes_to_utf8(sv, ...)
67e989fb 650 SV * sv
2c674647 651 CODE:
67e989fb 652 {
653 SV * encoding = items == 2 ? ST(1) : Nullsv;
654
655 if (encoding)
656 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
657 else {
658 STRLEN len;
183a2d84 659 U8* s = (U8*)SvPV(sv, len);
67e989fb 660 U8* converted;
661
662 converted = bytes_to_utf8(s, &len); /* This allocs */
183a2d84 663 sv_setpvn(sv, (char *)converted, len);
67e989fb 664 SvUTF8_on(sv); /* XXX Should we? */
665 Safefree(converted); /* ... so free it */
666 RETVAL = len;
667 }
668 }
2c674647 669 OUTPUT:
67e989fb 670 RETVAL
2c674647 671
67e989fb 672I32
2c674647 673_utf8_to_bytes(sv, ...)
67e989fb 674 SV * sv
2c674647 675 CODE:
67e989fb 676 {
677 SV * to = items > 1 ? ST(1) : Nullsv;
678 SV * check = items > 2 ? ST(2) : Nullsv;
87714904 679
67e989fb 680 if (to)
681 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
682 else {
67e989fb 683 STRLEN len;
b113ac0e 684 U8 *s = (U8*)SvPV(sv, len);
67e989fb 685
9c5ffd7c 686 RETVAL = 0;
67e989fb 687 if (SvTRUE(check)) {
688 /* Must do things the slow way */
689 U8 *dest;
87714904 690 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
67e989fb 691 U8 *send = s + len;
692
693 New(83, dest, len, U8); /* I think */
694
695 while (s < send) {
696 if (*s < 0x80)
697 *dest++ = *s++;
698 else {
b113ac0e 699 STRLEN ulen;
700 UV uv = *s++;
87714904 701
67e989fb 702 /* Have to do it all ourselves because of error routine,
703 aargh. */
704 if (!(uv & 0x40))
705 goto failure;
706 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
707 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
708 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
709 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
710 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
711 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
712 else { ulen = 13; uv = 0; }
87714904 713
67e989fb 714 /* Note change to utf8.c variable naming, for variety */
715 while (ulen--) {
716 if ((*s & 0xc0) != 0x80)
717 goto failure;
87714904 718
67e989fb 719 else
720 uv = (uv << 6) | (*s++ & 0x3f);
87714904 721 }
67e989fb 722 if (uv > 256) {
723 failure:
724 call_failure(check, s, dest, src);
725 /* Now what happens? */
726 }
727 *dest++ = (U8)uv;
728 }
729 }
730 } else
731 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
732 }
2c674647 733 }
734 OUTPUT:
735 RETVAL
736
2c674647 737bool
4411f3b6 738is_utf8(sv, check = FALSE)
739SV * sv
740bool check
2c674647 741 CODE:
742 {
2eebba1d 743 if (SvGMAGICAL(sv)) /* it could be $1, for example */
744 sv = newSVsv(sv); /* GMAGIG will be done */
2c674647 745 if (SvPOK(sv)) {
4411f3b6 746 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
2c674647 747 if (RETVAL &&
4411f3b6 748 check &&
2c674647 749 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
750 RETVAL = FALSE;
751 } else {
752 RETVAL = FALSE;
753 }
2eebba1d 754 if (sv != ST(0))
755 SvREFCNT_dec(sv); /* it was a temp copy */
2c674647 756 }
757 OUTPUT:
758 RETVAL
759
760SV *
4411f3b6 761_utf8_on(sv)
2c674647 762 SV * sv
763 CODE:
764 {
765 if (SvPOK(sv)) {
87714904 766 SV *rsv = newSViv(SvUTF8(sv));
2c674647 767 RETVAL = rsv;
768 SvUTF8_on(sv);
769 } else {
770 RETVAL = &PL_sv_undef;
771 }
772 }
773 OUTPUT:
774 RETVAL
775
776SV *
4411f3b6 777_utf8_off(sv)
2c674647 778 SV * sv
779 CODE:
780 {
781 if (SvPOK(sv)) {
87714904 782 SV *rsv = newSViv(SvUTF8(sv));
2c674647 783 RETVAL = rsv;
784 SvUTF8_off(sv);
785 } else {
786 RETVAL = &PL_sv_undef;
787 }
788 }
789 OUTPUT:
790 RETVAL
791
33af2bc7 792BOOT:
793{
6a59c517 794#if defined(USE_PERLIO) && !defined(USE_SFIO)
a999f61b 795 PerlIO_define_layer(aTHX_ &PerlIO_encode);
33af2bc7 796#endif
5129552c 797/* #include "8859_def.h" */
798/* #include "EBCDIC_def.h" */
799/* #include "Symbols_def.h" */
071db25d 800#include "def_t_def.h"
33af2bc7 801}