Make dependencies more precise (for nmake)
[p5sagit/p5-mst-13.2.git] / ext / Encode / Encode.xs
CommitLineData
fcf2db38 1#define PERL_NO_GET_CONTEXT
2
2c674647 3#include "EXTERN.h"
4#include "perl.h"
5#include "XSUB.h"
2f2b4ff2 6#define U8 U8
7#include "encode.h"
94b89828 8#include "8859.h"
2f2b4ff2 9#include "EBCDIC.h"
10#include "Symbols.h"
2c674647 11
fcf2db38 12
13#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
2f5768b8 14 Perl_croak(aTHX_ "panic_unimplemented"); \
4a83738a 15 return (y)0; /* fool picky compilers */ \
87714904 16 }
67e989fb 17UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
aa0053b7 18 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
9df9a5cd 19#if defined(USE_PERLIO) && !defined(USE_SFIO)
72e44f29 20/* Define an encoding "layer" in the perliol.h sense.
21 The layer defined here "inherits" in an object-oriented sense from the
22 "perlio" layer with its PerlIOBuf_* "methods".
23 The implementation is particularly efficient as until Encode settles down
24 there is no point in tryint to tune it.
25
26 The layer works by overloading the "fill" and "flush" methods.
27
28 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
29 to convert the encoded data to UTF-8 form, then copies it back to the
30 buffer. The "base class's" read methods then see the UTF-8 data.
31
32 "flush" transforms the UTF-8 data deposited by the "base class's write
33 method in the buffer back into the encoded form using the encode OO perl API,
34 then copies data back into the buffer and calls "SUPER::flush.
35
36 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
37 so that the the base class's "flush" sees the correct number of encoded chars
38 for positioning the seek pointer. (This double translation is the worst performance
39 issue - particularly with all-perl encode engine.)
40
41*/
33af2bc7 42#include "perliol.h"
aa0053b7 43typedef struct {
44 PerlIOBuf base; /* PerlIOBuf stuff */
0b3236bb 45 SV *bufsv; /* buffer seen by layers above */
46 SV *dataSV; /* data we have read from layer below */
47 SV *enc; /* the encoding object */
33af2bc7 48} PerlIOEncode;
49
e3f3bf95 50SV *
aa0053b7 51PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
e3f3bf95 52{
aa0053b7 53 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
54 SV *sv = &PL_sv_undef;
55 if (e->enc) {
56 dSP;
57 ENTER;
58 SAVETMPS;
59 PUSHMARK(sp);
60 XPUSHs(e->enc);
61 PUTBACK;
62 if (perl_call_method("name", G_SCALAR) == 1) {
63 SPAGAIN;
64 sv = newSVsv(POPs);
65 PUTBACK;
66 }
e3f3bf95 67 }
aa0053b7 68 return sv;
e3f3bf95 69}
33af2bc7 70
71IV
aa0053b7 72PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
33af2bc7 73{
aa0053b7 74 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
75 dSP;
76 IV code;
77 code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
78 ENTER;
79 SAVETMPS;
80 PUSHMARK(sp);
81 XPUSHs(arg);
82 PUTBACK;
83 if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) {
84 /* should never happen */
85 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
86 return -1;
87 }
88 SPAGAIN;
89 e->enc = POPs;
90 PUTBACK;
91 if (!SvROK(e->enc)) {
92 e->enc = Nullsv;
93 errno = EINVAL;
94 Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%" SVf "\"",
95 arg);
96 code = -1;
97 }
98 else {
99 SvREFCNT_inc(e->enc);
100 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
101 }
102 FREETMPS;
103 LEAVE;
104 return code;
33af2bc7 105}
106
107IV
aa0053b7 108PerlIOEncode_popped(pTHX_ PerlIO * f)
33af2bc7 109{
aa0053b7 110 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
111 if (e->enc) {
112 SvREFCNT_dec(e->enc);
113 e->enc = Nullsv;
114 }
115 if (e->bufsv) {
116 SvREFCNT_dec(e->bufsv);
117 e->bufsv = Nullsv;
118 }
0b3236bb 119 if (e->dataSV) {
120 SvREFCNT_dec(e->dataSV);
121 e->bufsv = Nullsv;
122 }
aa0053b7 123 return 0;
33af2bc7 124}
125
126STDCHAR *
aa0053b7 127PerlIOEncode_get_base(pTHX_ PerlIO * f)
33af2bc7 128{
aa0053b7 129 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
130 if (!e->base.bufsiz)
131 e->base.bufsiz = 1024;
132 if (!e->bufsv) {
133 e->bufsv = newSV(e->base.bufsiz);
134 sv_setpvn(e->bufsv, "", 0);
135 }
136 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
137 if (!e->base.ptr)
138 e->base.ptr = e->base.buf;
139 if (!e->base.end)
140 e->base.end = e->base.buf;
141 if (e->base.ptr < e->base.buf
142 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
143 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
144 e->base.buf + SvLEN(e->bufsv));
145 abort();
146 }
147 if (SvLEN(e->bufsv) < e->base.bufsiz) {
148 SSize_t poff = e->base.ptr - e->base.buf;
149 SSize_t eoff = e->base.end - e->base.buf;
150 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
151 e->base.ptr = e->base.buf + poff;
152 e->base.end = e->base.buf + eoff;
153 }
154 if (e->base.ptr < e->base.buf
155 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
156 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
157 e->base.buf + SvLEN(e->bufsv));
158 abort();
159 }
160 return e->base.buf;
33af2bc7 161}
162
33af2bc7 163IV
aa0053b7 164PerlIOEncode_fill(pTHX_ PerlIO * f)
33af2bc7 165{
aa0053b7 166 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
167 dSP;
0b3236bb 168 IV code = 0;
169 PerlIO *n;
170 SSize_t avail;
171 if (PerlIO_flush(f) != 0)
172 return -1;
173 n = PerlIONext(f);
174 if (!PerlIO_fast_gets(n)) {
175 /* Things get too messy if we don't have a buffer layer
176 push a :perlio to do the job */
177 char mode[8];
178 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
179 if (!n) {
180 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
181 }
182 }
183 ENTER;
184 SAVETMPS;
185 retry:
186 avail = PerlIO_get_cnt(n);
187 if (avail <= 0) {
188 avail = PerlIO_fill(n);
189 if (avail == 0) {
190 avail = PerlIO_get_cnt(n);
191 }
192 else {
193 if (!PerlIO_error(n) && PerlIO_eof(n))
194 avail = 0;
195 }
196 }
197 if (avail > 0) {
198 STDCHAR *ptr = PerlIO_get_ptr(n);
199 SSize_t use = avail;
aa0053b7 200 SV *uni;
aa0053b7 201 char *s;
0b3236bb 202 STRLEN len = 0;
203 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
204 (void) PerlIOEncode_get_base(aTHX_ f);
205 if (!e->dataSV)
206 e->dataSV = newSV(0);
207 if (SvTYPE(e->dataSV) < SVt_PV) {
208 sv_upgrade(e->dataSV,SVt_PV);
209 }
210 if (SvCUR(e->dataSV)) {
211 /* something left over from last time - create a normal
212 SV with new data appended
213 */
214 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
215 use = e->base.bufsiz - SvCUR(e->dataSV);
216 }
9dbbe389 217 sv_catpvn(e->dataSV,(char*)ptr,use);
0b3236bb 218 }
219 else {
220 /* Create a "dummy" SV to represent the available data from layer below */
221 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
222 Safefree(SvPVX(e->dataSV));
223 }
224 if (use > e->base.bufsiz) {
225 use = e->base.bufsiz;
226 }
227 SvPVX(e->dataSV) = (char *) ptr;
228 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
229 SvCUR_set(e->dataSV,use);
230 SvPOK_on(e->dataSV);
231 }
232 SvUTF8_off(e->dataSV);
aa0053b7 233 PUSHMARK(sp);
234 XPUSHs(e->enc);
0b3236bb 235 XPUSHs(e->dataSV);
aa0053b7 236 XPUSHs(&PL_sv_yes);
237 PUTBACK;
0b3236bb 238 if (perl_call_method("decode", G_SCALAR) != 1) {
239 Perl_die(aTHX_ "panic: decode did not return a value");
240 }
aa0053b7 241 SPAGAIN;
242 uni = POPs;
243 PUTBACK;
0b3236bb 244 /* Now get translated string (forced to UTF-8) and use as buffer */
245 if (SvPOK(uni)) {
246 s = SvPVutf8(uni, len);
85f7ebdf 247 if (len && !is_utf8_string((U8*)s,len)) {
0b3236bb 248 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
249 }
250 }
251 if (len > 0) {
252 /* Got _something */
253 /* if decode gave us back dataSV then data may vanish when
254 we do ptrcnt adjust - so take our copy now.
255 (The copy is a pain - need a put-it-here option for decode.)
256 */
257 sv_setpvn(e->bufsv,s,len);
9b7cf638 258 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
0b3236bb 259 e->base.end = e->base.ptr + SvCUR(e->bufsv);
260 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
261 SvUTF8_on(e->bufsv);
262
263 /* Adjust ptr/cnt not taking anything which
264 did not translate - not clear this is a win */
265 /* compute amount we took */
266 use -= SvCUR(e->dataSV);
267 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
268 /* and as we did not take it it isn't pending */
269 SvCUR_set(e->dataSV,0);
270 } else {
271 /* Got nothing - assume partial character so we need some more */
272 /* Make sure e->dataSV is a normal SV before re-filling as
273 buffer alias will change under us
274 */
275 s = SvPV(e->dataSV,len);
276 sv_setpvn(e->dataSV,s,len);
277 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
278 goto retry;
aa0053b7 279 }
aa0053b7 280 FREETMPS;
281 LEAVE;
0b3236bb 282 return code;
283 }
284 else {
285 if (avail == 0)
286 PerlIOBase(f)->flags |= PERLIO_F_EOF;
287 else
288 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
289 return -1;
72e44f29 290 }
33af2bc7 291}
292
293IV
aa0053b7 294PerlIOEncode_flush(pTHX_ PerlIO * f)
33af2bc7 295{
aa0053b7 296 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
297 IV code = 0;
0b3236bb 298 if (e->bufsv && (e->base.ptr > e->base.buf)) {
aa0053b7 299 dSP;
300 SV *str;
301 char *s;
302 STRLEN len;
0b3236bb 303 SSize_t count = 0;
304 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
305 /* Write case encode the buffer and write() to layer below */
306 ENTER;
307 SAVETMPS;
308 PUSHMARK(sp);
309 XPUSHs(e->enc);
310 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
311 SvUTF8_on(e->bufsv);
0b3236bb 312 XPUSHs(e->bufsv);
313 XPUSHs(&PL_sv_yes);
314 PUTBACK;
8bbd9480 315 if (perl_call_method("encode", G_SCALAR) != 1) {
316 Perl_die(aTHX_ "panic: encode did not return a value");
317 }
0b3236bb 318 SPAGAIN;
319 str = POPs;
320 PUTBACK;
321 s = SvPV(str, len);
322 count = PerlIO_write(PerlIONext(f),s,len);
323 if (count != len) {
324 code = -1;
325 }
326 FREETMPS;
327 LEAVE;
328 if (PerlIO_flush(PerlIONext(f)) != 0) {
329 code = -1;
330 }
25f7d9d3 331 if (SvCUR(e->bufsv)) {
332 /* Did not all translate */
333 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
334 return code;
335 }
aa0053b7 336 }
0b3236bb 337 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
338 /* read case */
339 /* if we have any untranslated stuff then unread that first */
340 if (e->dataSV && SvCUR(e->dataSV)) {
341 s = SvPV(e->dataSV, len);
342 count = PerlIO_unread(PerlIONext(f),s,len);
343 if (count != len) {
344 code = -1;
345 }
346 }
347 /* See if there is anything left in the buffer */
348 if (e->base.ptr < e->base.end) {
349 /* Bother - have unread data.
350 re-encode and unread() to layer below
351 */
352 ENTER;
353 SAVETMPS;
354 str = sv_newmortal();
355 sv_upgrade(str, SVt_PV);
9dbbe389 356 SvPVX(str) = (char*)e->base.ptr;
0b3236bb 357 SvLEN(str) = 0;
358 SvCUR_set(str, e->base.end - e->base.ptr);
359 SvUTF8_on(str);
360 PUSHMARK(sp);
361 XPUSHs(e->enc);
362 XPUSHs(str);
363 XPUSHs(&PL_sv_yes);
364 PUTBACK;
8bbd9480 365 if (perl_call_method("encode", G_SCALAR) != 1) {
366 Perl_die(aTHX_ "panic: encode did not return a value");
367 }
0b3236bb 368 SPAGAIN;
369 str = POPs;
370 PUTBACK;
371 s = SvPV(str, len);
372 count = PerlIO_unread(PerlIONext(f),s,len);
373 if (count != len) {
374 code = -1;
375 }
376 FREETMPS;
377 LEAVE;
378 }
aa0053b7 379 }
0b3236bb 380 e->base.ptr = e->base.end = e->base.buf;
381 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
72e44f29 382 }
aa0053b7 383 return code;
33af2bc7 384}
385
386IV
aa0053b7 387PerlIOEncode_close(pTHX_ PerlIO * f)
33af2bc7 388{
aa0053b7 389 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
390 IV code = PerlIOBase_close(aTHX_ f);
391 if (e->bufsv) {
25f7d9d3 392 if (e->base.buf && e->base.ptr > e->base.buf) {
393 Perl_croak(aTHX_ "Close with partial character");
394 }
aa0053b7 395 SvREFCNT_dec(e->bufsv);
396 e->bufsv = Nullsv;
397 }
398 e->base.buf = NULL;
399 e->base.ptr = NULL;
400 e->base.end = NULL;
401 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
402 return code;
33af2bc7 403}
404
72e44f29 405Off_t
aa0053b7 406PerlIOEncode_tell(pTHX_ PerlIO * f)
72e44f29 407{
aa0053b7 408 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
0b3236bb 409 /* Unfortunately the only way to get a postion is to (re-)translate,
410 the UTF8 we have in bufefr and then ask layer below
aa0053b7 411 */
0b3236bb 412 PerlIO_flush(f);
25f7d9d3 413 if (b->buf && b->ptr > b->buf) {
414 Perl_croak(aTHX_ "Cannot tell at partial character");
415 }
0b3236bb 416 return PerlIO_tell(PerlIONext(f));
72e44f29 417}
418
8cf8f3d1 419PerlIO *
aa0053b7 420PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
421 CLONE_PARAMS * params, int flags)
8cf8f3d1 422{
aa0053b7 423 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
424 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
425 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
426 if (oe->enc) {
427 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
428 }
9f16d962 429 }
aa0053b7 430 return f;
8cf8f3d1 431}
432
33af2bc7 433PerlIO_funcs PerlIO_encode = {
aa0053b7 434 "encoding",
435 sizeof(PerlIOEncode),
436 PERLIO_K_BUFFERED,
437 PerlIOEncode_pushed,
438 PerlIOEncode_popped,
439 PerlIOBuf_open,
440 PerlIOEncode_getarg,
441 PerlIOBase_fileno,
442 PerlIOEncode_dup,
443 PerlIOBuf_read,
444 PerlIOBuf_unread,
445 PerlIOBuf_write,
446 PerlIOBuf_seek,
447 PerlIOEncode_tell,
448 PerlIOEncode_close,
449 PerlIOEncode_flush,
450 PerlIOEncode_fill,
451 PerlIOBase_eof,
452 PerlIOBase_error,
453 PerlIOBase_clearerr,
454 PerlIOBase_setlinebuf,
455 PerlIOEncode_get_base,
456 PerlIOBuf_bufsiz,
457 PerlIOBuf_get_ptr,
458 PerlIOBuf_get_cnt,
459 PerlIOBuf_set_ptrcnt,
33af2bc7 460};
aa0053b7 461#endif /* encode layer */
33af2bc7 462
2f2b4ff2 463void
aa0053b7 464Encode_XSEncoding(pTHX_ encode_t * enc)
2f2b4ff2 465{
aa0053b7 466 dSP;
467 HV *stash = gv_stashpv("Encode::XS", TRUE);
468 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash);
469 int i = 0;
470 PUSHMARK(sp);
471 XPUSHs(sv);
472 while (enc->name[i]) {
473 const char *name = enc->name[i++];
474 XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
475 }
476 PUTBACK;
477 call_pv("Encode::define_encoding", G_DISCARD);
478 SvREFCNT_dec(sv);
2f2b4ff2 479}
480
aa0053b7 481void
482call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
483{
25f7d9d3 484 /* Exists for breakpointing */
aa0053b7 485}
67e989fb 486
2f2b4ff2 487static SV *
aa0053b7 488encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
489 int check)
2f2b4ff2 490{
aa0053b7 491 STRLEN slen;
492 U8 *s = (U8 *) SvPV(src, slen);
3aececda 493 STRLEN tlen = slen;
494 STRLEN ddone = 0;
495 STRLEN sdone = 0;
0b3236bb 496 SV *dst = sv_2mortal(newSV(slen+1));
aa0053b7 497 if (slen) {
0b3236bb 498 U8 *d = (U8 *) SvPVX(dst);
499 STRLEN dlen = SvLEN(dst)-1;
aa0053b7 500 int code;
501 while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) {
3aececda 502 SvCUR_set(dst, dlen+ddone);
aa0053b7 503 SvPOK_on(dst);
9b37254d 504
0b3236bb 505#if 0
3aececda 506 Perl_warn(aTHX_ "code=%d @ s=%d/%d/%d d=%d/%d/%d",code,slen,sdone,tlen,dlen,ddone,SvLEN(dst)-1);
0b3236bb 507#endif
508
509 if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL)
aa0053b7 510 break;
9b37254d 511
aa0053b7 512 switch (code) {
513 case ENCODE_NOSPACE:
514 {
0b3236bb 515 STRLEN need ;
3aececda 516 sdone += slen;
517 ddone += dlen;
518 if (sdone) {
519 need = (tlen*SvLEN(dst)+sdone-1)/sdone+UTF8_MAXLEN;
0b3236bb 520 }
521 else {
3aececda 522 need = SvLEN(dst) + UTF8_MAXLEN;
0b3236bb 523 }
524
aa0053b7 525 d = (U8 *) SvGROW(dst, need);
3aececda 526 if (ddone >= SvLEN(dst)) {
527 Perl_croak(aTHX_ "Destination couldn't be grown.");
aa0053b7 528 }
3aececda 529 dlen = SvLEN(dst)-ddone-1;
530 d += ddone;
531 s += slen;
532 slen = tlen-sdone;
533 continue;
aa0053b7 534 }
2f2b4ff2 535
aa0053b7 536 case ENCODE_NOREP:
537 if (dir == enc->f_utf8) {
538 if (!check && ckWARN_d(WARN_UTF8)) {
539 STRLEN clen;
540 UV ch =
541 utf8n_to_uvuni(s + slen, (SvCUR(src) - slen),
542 &clen, 0);
543 Perl_warner(aTHX_ WARN_UTF8,
544 "\"\\N{U+%" UVxf
545 "}\" does not map to %s", ch,
546 enc->name[0]);
547 /* FIXME: Skip over the character, copy in replacement and continue
548 * but that is messy so for now just fail.
549 */
550 return &PL_sv_undef;
551 }
552 else {
553 return &PL_sv_undef;
554 }
555 }
556 else {
557 /* UTF-8 is supposed to be "Universal" so should not happen */
558 Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
559 enc->name[0], (int) (SvCUR(src) - slen),
560 s + slen);
561 }
562 break;
2f2b4ff2 563
aa0053b7 564 default:
565 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
566 code, (dir == enc->f_utf8) ? "to" : "from",
567 enc->name[0]);
568 return &PL_sv_undef;
569 }
570 }
3aececda 571 SvCUR_set(dst, dlen+ddone);
aa0053b7 572 SvPOK_on(dst);
573 if (check) {
3aececda 574 sdone = SvCUR(src) - (slen+sdone);
575 if (sdone) {
576 Move(s + slen, SvPVX(src), sdone , U8);
aa0053b7 577 }
3aececda 578 SvCUR_set(src, sdone);
aa0053b7 579 }
2f2b4ff2 580 }
aa0053b7 581 else {
0b3236bb 582 SvCUR_set(dst, 0);
aa0053b7 583 SvPOK_on(dst);
2f2b4ff2 584 }
0b3236bb 585 *SvEND(dst) = '\0';
aa0053b7 586 return dst;
2f2b4ff2 587}
588
50d26985 589MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
2f2b4ff2 590
591PROTOTYPES: ENABLE
592
593void
0a95303c 594Method_name(obj)
595SV * obj
596CODE:
597 {
598 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
599 ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0])));
600 XSRETURN(1);
601 }
602
603void
691638dd 604Method_decode(obj,src,check = FALSE)
2f2b4ff2 605SV * obj
606SV * src
691638dd 607bool check
2f2b4ff2 608CODE:
609 {
610 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
611 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
612 SvUTF8_on(ST(0));
613 XSRETURN(1);
614 }
615
616void
691638dd 617Method_encode(obj,src,check = FALSE)
2f2b4ff2 618SV * obj
619SV * src
691638dd 620bool check
2f2b4ff2 621CODE:
622 {
623 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
624 sv_utf8_upgrade(src);
625 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
626 XSRETURN(1);
627 }
628
67e989fb 629MODULE = Encode PACKAGE = Encode
2c674647 630
631PROTOTYPES: ENABLE
632
67e989fb 633I32
2c674647 634_bytes_to_utf8(sv, ...)
67e989fb 635 SV * sv
2c674647 636 CODE:
67e989fb 637 {
638 SV * encoding = items == 2 ? ST(1) : Nullsv;
639
640 if (encoding)
641 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
642 else {
643 STRLEN len;
183a2d84 644 U8* s = (U8*)SvPV(sv, len);
67e989fb 645 U8* converted;
646
647 converted = bytes_to_utf8(s, &len); /* This allocs */
183a2d84 648 sv_setpvn(sv, (char *)converted, len);
67e989fb 649 SvUTF8_on(sv); /* XXX Should we? */
650 Safefree(converted); /* ... so free it */
651 RETVAL = len;
652 }
653 }
2c674647 654 OUTPUT:
67e989fb 655 RETVAL
2c674647 656
67e989fb 657I32
2c674647 658_utf8_to_bytes(sv, ...)
67e989fb 659 SV * sv
2c674647 660 CODE:
67e989fb 661 {
662 SV * to = items > 1 ? ST(1) : Nullsv;
663 SV * check = items > 2 ? ST(2) : Nullsv;
87714904 664
67e989fb 665 if (to)
666 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
667 else {
67e989fb 668 STRLEN len;
b113ac0e 669 U8 *s = (U8*)SvPV(sv, len);
67e989fb 670
9c5ffd7c 671 RETVAL = 0;
67e989fb 672 if (SvTRUE(check)) {
673 /* Must do things the slow way */
674 U8 *dest;
87714904 675 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
67e989fb 676 U8 *send = s + len;
677
678 New(83, dest, len, U8); /* I think */
679
680 while (s < send) {
681 if (*s < 0x80)
682 *dest++ = *s++;
683 else {
b113ac0e 684 STRLEN ulen;
685 UV uv = *s++;
87714904 686
67e989fb 687 /* Have to do it all ourselves because of error routine,
688 aargh. */
689 if (!(uv & 0x40))
690 goto failure;
691 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
692 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
693 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
694 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
695 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
696 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
697 else { ulen = 13; uv = 0; }
87714904 698
67e989fb 699 /* Note change to utf8.c variable naming, for variety */
700 while (ulen--) {
701 if ((*s & 0xc0) != 0x80)
702 goto failure;
87714904 703
67e989fb 704 else
705 uv = (uv << 6) | (*s++ & 0x3f);
87714904 706 }
67e989fb 707 if (uv > 256) {
708 failure:
709 call_failure(check, s, dest, src);
710 /* Now what happens? */
711 }
712 *dest++ = (U8)uv;
713 }
714 }
715 } else
716 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
717 }
2c674647 718 }
719 OUTPUT:
720 RETVAL
721
2c674647 722bool
4411f3b6 723is_utf8(sv, check = FALSE)
724SV * sv
725bool check
2c674647 726 CODE:
727 {
2eebba1d 728 if (SvGMAGICAL(sv)) /* it could be $1, for example */
729 sv = newSVsv(sv); /* GMAGIG will be done */
2c674647 730 if (SvPOK(sv)) {
4411f3b6 731 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
2c674647 732 if (RETVAL &&
4411f3b6 733 check &&
2c674647 734 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
735 RETVAL = FALSE;
736 } else {
737 RETVAL = FALSE;
738 }
2eebba1d 739 if (sv != ST(0))
740 SvREFCNT_dec(sv); /* it was a temp copy */
2c674647 741 }
742 OUTPUT:
743 RETVAL
744
745SV *
4411f3b6 746_utf8_on(sv)
2c674647 747 SV * sv
748 CODE:
749 {
750 if (SvPOK(sv)) {
87714904 751 SV *rsv = newSViv(SvUTF8(sv));
2c674647 752 RETVAL = rsv;
753 SvUTF8_on(sv);
754 } else {
755 RETVAL = &PL_sv_undef;
756 }
757 }
758 OUTPUT:
759 RETVAL
760
761SV *
4411f3b6 762_utf8_off(sv)
2c674647 763 SV * sv
764 CODE:
765 {
766 if (SvPOK(sv)) {
87714904 767 SV *rsv = newSViv(SvUTF8(sv));
2c674647 768 RETVAL = rsv;
769 SvUTF8_off(sv);
770 } else {
771 RETVAL = &PL_sv_undef;
772 }
773 }
774 OUTPUT:
775 RETVAL
776
33af2bc7 777BOOT:
778{
6a59c517 779#if defined(USE_PERLIO) && !defined(USE_SFIO)
a999f61b 780 PerlIO_define_layer(aTHX_ &PerlIO_encode);
33af2bc7 781#endif
023d8852 782#include "8859_def.h"
783#include "EBCDIC_def.h"
784#include "Symbols_def.h"
33af2bc7 785}