Restore mmap function (broken by tweaks to shared buffer
[p5sagit/p5-mst-13.2.git] / ext / Encode / Encode.xs
CommitLineData
2c674647 1#include "EXTERN.h"
2#include "perl.h"
3#include "XSUB.h"
4
67e989fb 5#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \
2f5768b8 6 Perl_croak(aTHX_ "panic_unimplemented"); \
4a83738a 7 return (y)0; /* fool picky compilers */ \
87714904 8 }
67e989fb 9UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
10UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
11
33af2bc7 12#ifdef USE_PERLIO
13#include "perliol.h"
14
15typedef struct
16{
17 PerlIOBuf base; /* PerlIOBuf stuff */
18 SV * bufsv;
19 SV * enc;
20} PerlIOEncode;
21
22
23IV
24PerlIOEncode_pushed(PerlIO *f, const char *mode,const char *arg,STRLEN len)
25{
26 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
27 dTHX;
28 dSP;
29 IV code;
30 code = PerlIOBuf_pushed(f,mode,Nullch,0);
31 ENTER;
32 SAVETMPS;
33 PUSHMARK(sp);
34 XPUSHs(sv_2mortal(newSVpv("Encode",0)));
35 XPUSHs(sv_2mortal(newSVpvn(arg,len)));
36 PUTBACK;
37 if (perl_call_method("getEncoding",G_SCALAR) != 1)
38 return -1;
39 SPAGAIN;
40 e->enc = POPs;
41 PUTBACK;
42 if (!SvROK(e->enc))
43 return -1;
44 SvREFCNT_inc(e->enc);
45 FREETMPS;
46 LEAVE;
47 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
48 return code;
49}
50
51IV
52PerlIOEncode_popped(PerlIO *f)
53{
54 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
55 dTHX;
56 if (e->enc)
57 {
58 SvREFCNT_dec(e->enc);
59 e->enc = Nullsv;
60 }
61 if (e->bufsv)
62 {
63 SvREFCNT_dec(e->bufsv);
64 e->bufsv = Nullsv;
65 }
66 return 0;
67}
68
69STDCHAR *
70PerlIOEncode_get_base(PerlIO *f)
71{
72 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
73 dTHX;
74 if (!e->base.bufsiz)
75 e->base.bufsiz = 1024;
76 if (!e->bufsv)
77 {
78 e->bufsv = newSV(e->base.bufsiz);
79 sv_setpvn(e->bufsv,"",0);
80 }
81 e->base.buf = SvPVX(e->bufsv);
82 if (!e->base.ptr)
83 e->base.ptr = e->base.buf;
84 if (!e->base.end)
85 e->base.end = e->base.buf;
86 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
87 {
88 Perl_warn(aTHX_ " ptr %p(%p)%p",
89 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
90 abort();
91 }
92 if (SvLEN(e->bufsv) < e->base.bufsiz)
93 {
94 SSize_t poff = e->base.ptr - e->base.buf;
95 SSize_t eoff = e->base.end - e->base.buf;
96 e->base.buf = SvGROW(e->bufsv,e->base.bufsiz);
97 e->base.ptr = e->base.buf + poff;
98 e->base.end = e->base.buf + eoff;
99 }
100 if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
101 {
102 Perl_warn(aTHX_ " ptr %p(%p)%p",
103 e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
104 abort();
105 }
106 return e->base.buf;
107}
108
109static void
110Break(void)
111{
112
113}
114
115IV
116PerlIOEncode_fill(PerlIO *f)
117{
118 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
119 dTHX;
120 dSP;
121 IV code;
122 Break();
123 code = PerlIOBuf_fill(f);
124 if (code == 0)
125 {
126 SV *uni;
127 SvCUR_set(e->bufsv, e->base.end - e->base.buf);
128 SvUTF8_off(e->bufsv);
129 ENTER;
130 SAVETMPS;
131 PUSHMARK(sp);
132 XPUSHs(e->enc);
133 XPUSHs(e->bufsv);
134 XPUSHs(&PL_sv_yes);
135 PUTBACK;
136 if (perl_call_method("toUnicode",G_SCALAR) != 1)
137 code = -1;
138 SPAGAIN;
139 uni = POPs;
140 PUTBACK;
141 sv_setsv(e->bufsv,uni);
142 sv_utf8_upgrade(e->bufsv);
143 e->base.buf = SvPVX(e->bufsv);
144 e->base.end = e->base.buf+SvCUR(e->bufsv);
145 e->base.ptr = e->base.buf;
146 FREETMPS;
147 LEAVE;
148 }
149 return code;
150}
151
152IV
153PerlIOEncode_flush(PerlIO *f)
154{
155 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
156 IV code = 0;
157 dTHX;
158 if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF)))
159 {
160 dSP;
161 SV *str;
162 char *s;
163 STRLEN len;
164 ENTER;
165 SAVETMPS;
166 PUSHMARK(sp);
167 XPUSHs(e->enc);
168 SvCUR_set(e->bufsv, e->base.end - e->base.buf);
169 SvUTF8_on(e->bufsv);
170 XPUSHs(e->bufsv);
171 XPUSHs(&PL_sv_yes);
172 PUTBACK;
173 if (perl_call_method("fromUnicode",G_SCALAR) != 1)
174 code = -1;
175 SPAGAIN;
176 str = POPs;
177 PUTBACK;
178 sv_setsv(e->bufsv,str);
179 SvUTF8_off(e->bufsv);
180 e->base.buf = SvPVX(e->bufsv);
181 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
182 FREETMPS;
183 LEAVE;
184 if (PerlIOBuf_flush(f) != 0)
185 code = -1;
186 }
187 return code;
188}
189
190IV
191PerlIOEncode_close(PerlIO *f)
192{
193 PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
194 IV code = PerlIOBase_close(f);
195 dTHX;
196 if (e->bufsv)
197 {
198 SvREFCNT_dec(e->bufsv);
199 e->bufsv = Nullsv;
200 }
201 e->base.buf = NULL;
202 e->base.ptr = NULL;
203 e->base.end = NULL;
204 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF);
205 return code;
206}
207
208PerlIO_funcs PerlIO_encode = {
209 "encode",
210 sizeof(PerlIOEncode),
211 PERLIO_K_BUFFERED,
212 PerlIOBase_fileno,
213 PerlIOBuf_fdopen,
214 PerlIOBuf_open,
215 PerlIOBuf_reopen,
216 PerlIOEncode_pushed,
217 PerlIOEncode_popped,
218 PerlIOBuf_read,
219 PerlIOBuf_unread,
220 PerlIOBuf_write,
221 PerlIOBuf_seek,
222 PerlIOBuf_tell,
223 PerlIOEncode_close,
224 PerlIOEncode_flush,
225 PerlIOEncode_fill,
226 PerlIOBase_eof,
227 PerlIOBase_error,
228 PerlIOBase_clearerr,
229 PerlIOBuf_setlinebuf,
230 PerlIOEncode_get_base,
231 PerlIOBuf_bufsiz,
232 PerlIOBuf_get_ptr,
233 PerlIOBuf_get_cnt,
234 PerlIOBuf_set_ptrcnt,
235};
236#endif
237
183a2d84 238void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {}
67e989fb 239
240MODULE = Encode PACKAGE = Encode
2c674647 241
242PROTOTYPES: ENABLE
243
67e989fb 244I32
2c674647 245_bytes_to_utf8(sv, ...)
67e989fb 246 SV * sv
2c674647 247 CODE:
67e989fb 248 {
249 SV * encoding = items == 2 ? ST(1) : Nullsv;
250
251 if (encoding)
252 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
253 else {
254 STRLEN len;
183a2d84 255 U8* s = (U8*)SvPV(sv, len);
67e989fb 256 U8* converted;
257
258 converted = bytes_to_utf8(s, &len); /* This allocs */
183a2d84 259 sv_setpvn(sv, (char *)converted, len);
67e989fb 260 SvUTF8_on(sv); /* XXX Should we? */
261 Safefree(converted); /* ... so free it */
262 RETVAL = len;
263 }
264 }
2c674647 265 OUTPUT:
67e989fb 266 RETVAL
2c674647 267
67e989fb 268I32
2c674647 269_utf8_to_bytes(sv, ...)
67e989fb 270 SV * sv
2c674647 271 CODE:
67e989fb 272 {
273 SV * to = items > 1 ? ST(1) : Nullsv;
274 SV * check = items > 2 ? ST(2) : Nullsv;
87714904 275
67e989fb 276 if (to)
277 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
278 else {
67e989fb 279 STRLEN len;
b113ac0e 280 U8 *s = (U8*)SvPV(sv, len);
67e989fb 281
282 if (SvTRUE(check)) {
283 /* Must do things the slow way */
284 U8 *dest;
87714904 285 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
67e989fb 286 U8 *send = s + len;
287
288 New(83, dest, len, U8); /* I think */
289
290 while (s < send) {
291 if (*s < 0x80)
292 *dest++ = *s++;
293 else {
b113ac0e 294 STRLEN ulen;
295 UV uv = *s++;
87714904 296
67e989fb 297 /* Have to do it all ourselves because of error routine,
298 aargh. */
299 if (!(uv & 0x40))
300 goto failure;
301 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
302 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
303 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
304 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
305 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
306 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
307 else { ulen = 13; uv = 0; }
87714904 308
67e989fb 309 /* Note change to utf8.c variable naming, for variety */
310 while (ulen--) {
311 if ((*s & 0xc0) != 0x80)
312 goto failure;
87714904 313
67e989fb 314 else
315 uv = (uv << 6) | (*s++ & 0x3f);
87714904 316 }
67e989fb 317 if (uv > 256) {
318 failure:
319 call_failure(check, s, dest, src);
320 /* Now what happens? */
321 }
322 *dest++ = (U8)uv;
323 }
324 }
325 } else
326 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
327 }
2c674647 328 }
329 OUTPUT:
330 RETVAL
331
332SV *
333_chars_to_utf8(sv, from, ...)
334 SV * sv
335 SV * from
336 CODE:
337 {
338 SV * check = items == 3 ? ST(2) : Nullsv;
339 RETVAL = &PL_sv_undef;
340 }
341 OUTPUT:
342 RETVAL
343
344SV *
345_utf8_to_chars(sv, to, ...)
346 SV * sv
347 SV * to
348 CODE:
349 {
350 SV * check = items == 3 ? ST(2) : Nullsv;
351 RETVAL = &PL_sv_undef;
352 }
353 OUTPUT:
354 RETVAL
355
356SV *
357_utf8_to_chars_check(sv, ...)
358 SV * sv
359 CODE:
360 {
361 SV * check = items == 2 ? ST(1) : Nullsv;
362 RETVAL = &PL_sv_undef;
363 }
364 OUTPUT:
365 RETVAL
366
367SV *
368_bytes_to_chars(sv, from, ...)
369 SV * sv
370 SV * from
371 CODE:
372 {
373 SV * check = items == 3 ? ST(2) : Nullsv;
374 RETVAL = &PL_sv_undef;
375 }
376 OUTPUT:
377 RETVAL
378
379SV *
380_chars_to_bytes(sv, to, ...)
381 SV * sv
382 SV * to
383 CODE:
384 {
385 SV * check = items == 3 ? ST(2) : Nullsv;
386 RETVAL = &PL_sv_undef;
387 }
388 OUTPUT:
389 RETVAL
390
391SV *
392_from_to(sv, from, to, ...)
393 SV * sv
394 SV * from
395 SV * to
396 CODE:
397 {
398 SV * check = items == 4 ? ST(3) : Nullsv;
399 RETVAL = &PL_sv_undef;
400 }
401 OUTPUT:
402 RETVAL
403
404bool
405_is_utf8(sv, ...)
406 SV * sv
407 CODE:
408 {
409 SV * check = items == 2 ? ST(1) : Nullsv;
410 if (SvPOK(sv)) {
067a85ef 411 RETVAL = SvUTF8(sv) ? 1 : 0;
2c674647 412 if (RETVAL &&
413 SvTRUE(check) &&
414 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
415 RETVAL = FALSE;
416 } else {
417 RETVAL = FALSE;
418 }
419 }
420 OUTPUT:
421 RETVAL
422
423SV *
424_on_utf8(sv)
425 SV * sv
426 CODE:
427 {
428 if (SvPOK(sv)) {
87714904 429 SV *rsv = newSViv(SvUTF8(sv));
2c674647 430 RETVAL = rsv;
431 SvUTF8_on(sv);
432 } else {
433 RETVAL = &PL_sv_undef;
434 }
435 }
436 OUTPUT:
437 RETVAL
438
439SV *
440_off_utf8(sv)
441 SV * sv
442 CODE:
443 {
444 if (SvPOK(sv)) {
87714904 445 SV *rsv = newSViv(SvUTF8(sv));
2c674647 446 RETVAL = rsv;
447 SvUTF8_off(sv);
448 } else {
449 RETVAL = &PL_sv_undef;
450 }
451 }
452 OUTPUT:
453 RETVAL
454
455SV *
456_utf_to_utf(sv, from, to, ...)
457 SV * sv
458 SV * from
459 SV * to
460 CODE:
461 {
462 SV * check = items == 4 ? ST(3) : Nullsv;
463 RETVAL = &PL_sv_undef;
464 }
465 OUTPUT:
466 RETVAL
467
33af2bc7 468BOOT:
469{
470#ifdef USE_PERLIO
471 PerlIO_define_layer(&PerlIO_encode);
472#endif
473}