Commit | Line | Data |
85982a32 |
1 | /* |
05ef2f67 |
2 | $Id: Encode.xs,v 1.55 2003/02/28 01:40:27 dankogai Exp $ |
85982a32 |
3 | */ |
4 | |
fcf2db38 |
5 | #define PERL_NO_GET_CONTEXT |
2c674647 |
6 | #include "EXTERN.h" |
7 | #include "perl.h" |
8 | #include "XSUB.h" |
2f2b4ff2 |
9 | #define U8 U8 |
10 | #include "encode.h" |
10c5ecbb |
11 | |
12 | # define PERLIO_MODNAME "PerlIO::encoding" |
0ab8f81e |
13 | # define PERLIO_FILENAME "PerlIO/encoding.pm" |
aae85ceb |
14 | |
85982a32 |
15 | /* set 1 or more to profile. t/encoding.t dumps core because of |
16 | Perl_warner and PerlIO don't work well */ |
c6a7db43 |
17 | #define ENCODE_XS_PROFILE 0 |
39cf9a5e |
18 | |
85982a32 |
19 | /* set 0 to disable floating point to calculate buffer size for |
20 | encode_method(). 1 is recommended. 2 restores NI-S original */ |
c6a7db43 |
21 | #define ENCODE_XS_USEFP 1 |
39cf9a5e |
22 | |
fcf2db38 |
23 | #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ |
2f5768b8 |
24 | Perl_croak(aTHX_ "panic_unimplemented"); \ |
4a83738a |
25 | return (y)0; /* fool picky compilers */ \ |
87714904 |
26 | } |
85982a32 |
27 | /**/ |
011b2d2f |
28 | |
7e9a885a |
29 | UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) |
30 | UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) |
33af2bc7 |
31 | |
b0b300a3 |
32 | void |
aa0053b7 |
33 | Encode_XSEncoding(pTHX_ encode_t * enc) |
2f2b4ff2 |
34 | { |
aa0053b7 |
35 | dSP; |
36 | HV *stash = gv_stashpv("Encode::XS", TRUE); |
37 | SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash); |
38 | int i = 0; |
39 | PUSHMARK(sp); |
40 | XPUSHs(sv); |
41 | while (enc->name[i]) { |
42 | const char *name = enc->name[i++]; |
43 | XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); |
44 | } |
45 | PUTBACK; |
46 | call_pv("Encode::define_encoding", G_DISCARD); |
47 | SvREFCNT_dec(sv); |
2f2b4ff2 |
48 | } |
49 | |
aa0053b7 |
50 | void |
51 | call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) |
52 | { |
85982a32 |
53 | /* Exists for breakpointing */ |
aa0053b7 |
54 | } |
67e989fb |
55 | |
85982a32 |
56 | |
2fc614e0 |
57 | #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" |
58 | #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" |
59 | |
2f2b4ff2 |
60 | static SV * |
aa0053b7 |
61 | encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, |
220e2d4e |
62 | int check, STRLEN * offset, SV * term, int * retcode) |
2f2b4ff2 |
63 | { |
aa0053b7 |
64 | STRLEN slen; |
65 | U8 *s = (U8 *) SvPV(src, slen); |
3aececda |
66 | STRLEN tlen = slen; |
67 | STRLEN ddone = 0; |
68 | STRLEN sdone = 0; |
39cf9a5e |
69 | |
3c49ab08 |
70 | /* We allocate slen+1. |
85982a32 |
71 | PerlIO dumps core if this value is smaller than this. */ |
3c49ab08 |
72 | SV *dst = sv_2mortal(newSV(slen+1)); |
85982a32 |
73 | U8 *d = (U8 *)SvPVX(dst); |
74 | STRLEN dlen = SvLEN(dst)-1; |
220e2d4e |
75 | int code = 0; |
76 | STRLEN trmlen = 0; |
cc7dbc11 |
77 | U8 *trm = term ? (U8*) SvPV(term, trmlen) : NULL; |
220e2d4e |
78 | |
79 | if (offset) { |
80 | s += *offset; |
6be7c101 |
81 | if (slen > *offset){ /* safeguard against slen overflow */ |
82 | slen -= *offset; |
83 | }else{ |
84 | slen = 0; |
85 | } |
220e2d4e |
86 | tlen = slen; |
87 | } |
85982a32 |
88 | |
6be7c101 |
89 | if (slen == 0){ |
85982a32 |
90 | SvCUR_set(dst, 0); |
91 | SvPOK_only(dst); |
92 | goto ENCODE_END; |
93 | } |
94 | |
220e2d4e |
95 | while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check, |
96 | trm, trmlen)) ) |
85982a32 |
97 | { |
98 | SvCUR_set(dst, dlen+ddone); |
99 | SvPOK_only(dst); |
0b3236bb |
100 | |
220e2d4e |
101 | if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL || |
102 | code == ENCODE_FOUND_TERM) { |
85982a32 |
103 | break; |
104 | } |
105 | switch (code) { |
106 | case ENCODE_NOSPACE: |
107 | { |
108 | STRLEN more = 0; /* make sure you initialize! */ |
109 | STRLEN sleft; |
110 | sdone += slen; |
111 | ddone += dlen; |
112 | sleft = tlen - sdone; |
fcb875d4 |
113 | #if ENCODE_XS_PROFILE >= 2 |
85982a32 |
114 | Perl_warn(aTHX_ |
115 | "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n", |
116 | more, sdone, sleft, SvLEN(dst)); |
fcb875d4 |
117 | #endif |
85982a32 |
118 | if (sdone != 0) { /* has src ever been processed ? */ |
39cf9a5e |
119 | #if ENCODE_XS_USEFP == 2 |
85982a32 |
120 | more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone |
121 | - SvLEN(dst); |
39cf9a5e |
122 | #elif ENCODE_XS_USEFP |
6e21dc91 |
123 | more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft); |
39cf9a5e |
124 | #else |
85982a32 |
125 | /* safe until SvLEN(dst) == MAX_INT/16 */ |
126 | more = (16*SvLEN(dst)+1)/sdone/16 * sleft; |
39cf9a5e |
127 | #endif |
39cf9a5e |
128 | } |
85982a32 |
129 | more += UTF8_MAXLEN; /* insurance policy */ |
130 | d = (U8 *) SvGROW(dst, SvLEN(dst) + more); |
131 | /* dst need to grow need MORE bytes! */ |
132 | if (ddone >= SvLEN(dst)) { |
133 | Perl_croak(aTHX_ "Destination couldn't be grown."); |
134 | } |
135 | dlen = SvLEN(dst)-ddone-1; |
136 | d += ddone; |
137 | s += slen; |
138 | slen = tlen-sdone; |
139 | continue; |
140 | } |
141 | case ENCODE_NOREP: |
142 | /* encoding */ |
c6a7db43 |
143 | if (dir == enc->f_utf8) { |
85982a32 |
144 | STRLEN clen; |
145 | UV ch = |
3e952a88 |
146 | utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), |
b0b300a3 |
147 | &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); |
85982a32 |
148 | if (check & ENCODE_DIE_ON_ERR) { |
2fc614e0 |
149 | Perl_croak(aTHX_ ERR_ENCODE_NOMAP, |
150 | (UV)ch, enc->name[0]); |
4089adc4 |
151 | return &PL_sv_undef; /* never reaches but be safe */ |
152 | } |
153 | if (check & ENCODE_WARN_ON_ERR){ |
154 | Perl_warner(aTHX_ packWARN(WARN_UTF8), |
2fc614e0 |
155 | ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]); |
4089adc4 |
156 | } |
157 | if (check & ENCODE_RETURN_ON_ERR){ |
158 | goto ENCODE_SET_SRC; |
159 | } |
160 | if (check & ENCODE_PERLQQ){ |
161 | SV* perlqq = |
162 | sv_2mortal(newSVpvf("\\x{%04"UVxf"}", (UV)ch)); |
163 | sdone += slen + clen; |
164 | ddone += dlen + SvCUR(perlqq); |
165 | sv_catsv(dst, perlqq); |
166 | }else if (check & ENCODE_HTMLCREF){ |
167 | SV* htmlcref = |
168 | sv_2mortal(newSVpvf("&#%" UVuf ";", (UV)ch)); |
169 | sdone += slen + clen; |
170 | ddone += dlen + SvCUR(htmlcref); |
171 | sv_catsv(dst, htmlcref); |
172 | }else if (check & ENCODE_XMLCREF){ |
173 | SV* xmlcref = |
174 | sv_2mortal(newSVpvf("&#x%" UVxf ";", (UV)ch)); |
175 | sdone += slen + clen; |
176 | ddone += dlen + SvCUR(xmlcref); |
177 | sv_catsv(dst, xmlcref); |
178 | } else { |
179 | /* fallback char */ |
180 | sdone += slen + clen; |
181 | ddone += dlen + enc->replen; |
182 | sv_catpvn(dst, (char*)enc->rep, enc->replen); |
c6a7db43 |
183 | } |
b2704119 |
184 | } |
85982a32 |
185 | /* decoding */ |
c6a7db43 |
186 | else { |
85982a32 |
187 | if (check & ENCODE_DIE_ON_ERR){ |
2fc614e0 |
188 | Perl_croak(aTHX_ ERR_DECODE_NOMAP, |
436c6dd3 |
189 | enc->name[0], (UV)s[slen]); |
4089adc4 |
190 | return &PL_sv_undef; /* never reaches but be safe */ |
191 | } |
192 | if (check & ENCODE_WARN_ON_ERR){ |
193 | Perl_warner( |
194 | aTHX_ packWARN(WARN_UTF8), |
2fc614e0 |
195 | ERR_DECODE_NOMAP, |
436c6dd3 |
196 | enc->name[0], (UV)s[slen]); |
4089adc4 |
197 | } |
198 | if (check & ENCODE_RETURN_ON_ERR){ |
199 | goto ENCODE_SET_SRC; |
200 | } |
201 | if (check & |
202 | (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ |
203 | SV* perlqq = |
204 | sv_2mortal(newSVpvf("\\x%02" UVXf, (UV)s[slen])); |
205 | sdone += slen + 1; |
206 | ddone += dlen + SvCUR(perlqq); |
207 | sv_catsv(dst, perlqq); |
208 | } else { |
209 | sdone += slen + 1; |
210 | ddone += dlen + strlen(FBCHAR_UTF8); |
211 | sv_catpv(dst, FBCHAR_UTF8); |
aa0053b7 |
212 | } |
b2704119 |
213 | } |
85982a32 |
214 | /* settle variables when fallback */ |
b0b300a3 |
215 | d = (U8 *)SvEND(dst); |
216 | dlen = SvLEN(dst) - ddone - 1; |
3e952a88 |
217 | s = (U8*)SvPVX(src) + sdone; |
b2704119 |
218 | slen = tlen - sdone; |
219 | break; |
2f2b4ff2 |
220 | |
85982a32 |
221 | default: |
222 | Perl_croak(aTHX_ "Unexpected code %d converting %s %s", |
223 | code, (dir == enc->f_utf8) ? "to" : "from", |
224 | enc->name[0]); |
225 | return &PL_sv_undef; |
aa0053b7 |
226 | } |
85982a32 |
227 | } |
228 | ENCODE_SET_SRC: |
ca777f1c |
229 | if (check && !(check & ENCODE_LEAVE_SRC)){ |
230 | sdone = SvCUR(src) - (slen+sdone); |
85982a32 |
231 | if (sdone) { |
232 | sv_setpvn(src, (char*)s+slen, sdone); |
aa0053b7 |
233 | } |
85982a32 |
234 | SvCUR_set(src, sdone); |
2f2b4ff2 |
235 | } |
85982a32 |
236 | /* warn("check = 0x%X, code = 0x%d\n", check, code); */ |
c6a7db43 |
237 | |
85982a32 |
238 | SvCUR_set(dst, dlen+ddone); |
239 | SvPOK_only(dst); |
c6a7db43 |
240 | |
39cf9a5e |
241 | #if ENCODE_XS_PROFILE |
242 | if (SvCUR(dst) > SvCUR(src)){ |
85982a32 |
243 | Perl_warn(aTHX_ |
244 | "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n", |
245 | SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst), |
246 | (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0); |
39cf9a5e |
247 | } |
3c49ab08 |
248 | #endif |
c6a7db43 |
249 | |
220e2d4e |
250 | if (offset) |
251 | *offset += sdone + slen; |
252 | |
85982a32 |
253 | ENCODE_END: |
0b3236bb |
254 | *SvEND(dst) = '\0'; |
220e2d4e |
255 | if (retcode) *retcode = code; |
aa0053b7 |
256 | return dst; |
2f2b4ff2 |
257 | } |
258 | |
ab3374e4 |
259 | MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ |
260 | |
261 | void |
b536bf57 |
262 | Method_decode_xs(obj,src,check = 0) |
ab3374e4 |
263 | SV * obj |
264 | SV * src |
265 | int check |
266 | CODE: |
267 | { |
268 | STRLEN slen; |
269 | U8 *s = (U8 *) SvPV(src, slen); |
270 | U8 *e = (U8 *) SvEND(src); |
b536bf57 |
271 | SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ |
ab3374e4 |
272 | SvPOK_only(dst); |
273 | SvCUR_set(dst,0); |
274 | if (SvUTF8(src)) { |
275 | s = utf8_to_bytes(s,&slen); |
276 | if (s) { |
277 | SvCUR_set(src,slen); |
278 | SvUTF8_off(src); |
279 | e = s+slen; |
280 | } |
281 | else { |
282 | croak("Cannot decode string with wide characters"); |
283 | } |
284 | } |
285 | while (s < e) { |
286 | if (UTF8_IS_INVARIANT(*s) || UTF8_IS_START(*s)) { |
287 | U8 skip = UTF8SKIP(s); |
288 | if ((s + skip) > e) { |
289 | /* Partial character - done */ |
290 | break; |
291 | } |
292 | else if (is_utf8_char(s)) { |
293 | /* Whole char is good */ |
294 | sv_catpvn(dst,(char *)s,skip); |
295 | s += skip; |
296 | continue; |
297 | } |
298 | else { |
299 | /* starts ok but isn't "good" */ |
300 | } |
301 | } |
302 | else { |
303 | /* Invalid start byte */ |
304 | } |
305 | /* If we get here there is something wrong with alleged UTF-8 */ |
306 | if (check & ENCODE_DIE_ON_ERR){ |
307 | Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", (UV)*s); |
308 | XSRETURN(0); |
309 | } |
310 | if (check & ENCODE_WARN_ON_ERR){ |
311 | Perl_warner(aTHX_ packWARN(WARN_UTF8), |
312 | ERR_DECODE_NOMAP, "utf8", (UV)*s); |
313 | } |
314 | if (check & ENCODE_RETURN_ON_ERR) { |
315 | break; |
316 | } |
317 | if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ |
318 | SV* perlqq = newSVpvf("\\x%02" UVXf, (UV)*s); |
319 | sv_catsv(dst, perlqq); |
320 | SvREFCNT_dec(perlqq); |
321 | } else { |
322 | sv_catpv(dst, FBCHAR_UTF8); |
323 | } |
324 | s++; |
325 | } |
326 | *SvEND(dst) = '\0'; |
327 | |
328 | /* Clear out translated part of source unless asked not to */ |
329 | if (check && !(check & ENCODE_LEAVE_SRC)){ |
330 | slen = e-s; |
331 | if (slen) { |
332 | sv_setpvn(src, (char*)s, slen); |
333 | } |
334 | SvCUR_set(src, slen); |
335 | } |
336 | SvUTF8_on(dst); |
337 | ST(0) = sv_2mortal(dst); |
338 | XSRETURN(1); |
339 | } |
340 | |
341 | void |
b536bf57 |
342 | Method_encode_xs(obj,src,check = 0) |
ab3374e4 |
343 | SV * obj |
344 | SV * src |
345 | int check |
346 | CODE: |
347 | { |
348 | STRLEN slen; |
349 | U8 *s = (U8 *) SvPV(src, slen); |
350 | U8 *e = (U8 *) SvEND(src); |
b536bf57 |
351 | SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ |
ab3374e4 |
352 | if (SvUTF8(src)) { |
353 | /* Already encoded - trust it and just copy the octets */ |
354 | sv_setpvn(dst,(char *)s,(e-s)); |
355 | s = e; |
356 | } |
357 | else { |
358 | /* Native bytes - can always encode */ |
b536bf57 |
359 | U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ |
ab3374e4 |
360 | while (s < e) { |
361 | UV uv = NATIVE_TO_UNI((UV) *s++); |
362 | if (UNI_IS_INVARIANT(uv)) |
363 | *d++ = (U8)UTF_TO_NATIVE(uv); |
364 | else { |
365 | *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); |
366 | *d++ = (U8)UTF8_EIGHT_BIT_LO(uv); |
367 | } |
368 | } |
369 | SvCUR_set(dst, d- (U8 *)SvPVX(dst)); |
370 | *SvEND(dst) = '\0'; |
371 | } |
372 | |
373 | /* Clear out translated part of source unless asked not to */ |
374 | if (check && !(check & ENCODE_LEAVE_SRC)){ |
375 | slen = e-s; |
376 | if (slen) { |
377 | sv_setpvn(src, (char*)s, slen); |
378 | } |
379 | SvCUR_set(src, slen); |
380 | } |
381 | SvPOK_only(dst); |
382 | SvUTF8_off(dst); |
383 | ST(0) = sv_2mortal(dst); |
384 | XSRETURN(1); |
385 | } |
386 | |
50d26985 |
387 | MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ |
2f2b4ff2 |
388 | |
389 | PROTOTYPES: ENABLE |
390 | |
391 | void |
0a95303c |
392 | Method_name(obj) |
393 | SV * obj |
394 | CODE: |
85982a32 |
395 | { |
396 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
397 | ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0]))); |
398 | XSRETURN(1); |
399 | } |
0a95303c |
400 | |
401 | void |
220e2d4e |
402 | Method_cat_decode(obj, dst, src, off, term, check = 0) |
403 | SV * obj |
404 | SV * dst |
405 | SV * src |
406 | SV * off |
407 | SV * term |
408 | int check |
409 | CODE: |
410 | { |
411 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
412 | STRLEN offset = (STRLEN)SvIV(off); |
413 | int code = 0; |
414 | if (SvUTF8(src)) { |
415 | sv_utf8_downgrade(src, FALSE); |
416 | } |
417 | sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check, |
418 | &offset, term, &code)); |
419 | SvIVX(off) = (IV)offset; |
420 | if (code == ENCODE_FOUND_TERM) { |
421 | ST(0) = &PL_sv_yes; |
422 | }else{ |
423 | ST(0) = &PL_sv_no; |
424 | } |
425 | XSRETURN(1); |
426 | } |
427 | |
428 | void |
b2704119 |
429 | Method_decode(obj,src,check = 0) |
2f2b4ff2 |
430 | SV * obj |
431 | SV * src |
b2704119 |
432 | int check |
2f2b4ff2 |
433 | CODE: |
aae85ceb |
434 | { |
85982a32 |
435 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
ab3374e4 |
436 | if (SvUTF8(src)) { |
437 | sv_utf8_downgrade(src, FALSE); |
438 | } |
220e2d4e |
439 | ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check, |
440 | NULL, Nullsv, NULL); |
85982a32 |
441 | SvUTF8_on(ST(0)); |
aae85ceb |
442 | XSRETURN(1); |
443 | } |
444 | |
445 | void |
85982a32 |
446 | Method_encode(obj,src,check = 0) |
aae85ceb |
447 | SV * obj |
85982a32 |
448 | SV * src |
449 | int check |
aae85ceb |
450 | CODE: |
451 | { |
85982a32 |
452 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
453 | sv_utf8_upgrade(src); |
220e2d4e |
454 | ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check, |
455 | NULL, Nullsv, NULL); |
aae85ceb |
456 | XSRETURN(1); |
457 | } |
458 | |
0ab8f81e |
459 | void |
460 | Method_needs_lines(obj) |
461 | SV * obj |
462 | CODE: |
463 | { |
b32afa7c |
464 | /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ |
0ab8f81e |
465 | ST(0) = &PL_sv_no; |
466 | XSRETURN(1); |
467 | } |
468 | |
469 | void |
470 | Method_perlio_ok(obj) |
471 | SV * obj |
472 | CODE: |
473 | { |
b32afa7c |
474 | /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ |
10c5ecbb |
475 | /* require_pv(PERLIO_FILENAME); */ |
476 | |
477 | eval_pv("require PerlIO::encoding", 0); |
478 | |
479 | if (SvTRUE(get_sv("@", 0))) { |
0ab8f81e |
480 | ST(0) = &PL_sv_no; |
10c5ecbb |
481 | }else{ |
482 | ST(0) = &PL_sv_yes; |
0ab8f81e |
483 | } |
484 | XSRETURN(1); |
485 | } |
486 | |
67e989fb |
487 | MODULE = Encode PACKAGE = Encode |
2c674647 |
488 | |
489 | PROTOTYPES: ENABLE |
490 | |
67e989fb |
491 | I32 |
2c674647 |
492 | _bytes_to_utf8(sv, ...) |
85982a32 |
493 | SV * sv |
494 | CODE: |
495 | { |
496 | SV * encoding = items == 2 ? ST(1) : Nullsv; |
c6a7db43 |
497 | |
85982a32 |
498 | if (encoding) |
499 | RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); |
500 | else { |
501 | STRLEN len; |
502 | U8* s = (U8*)SvPV(sv, len); |
503 | U8* converted; |
504 | |
505 | converted = bytes_to_utf8(s, &len); /* This allocs */ |
506 | sv_setpvn(sv, (char *)converted, len); |
507 | SvUTF8_on(sv); /* XXX Should we? */ |
508 | Safefree(converted); /* ... so free it */ |
509 | RETVAL = len; |
510 | } |
511 | } |
512 | OUTPUT: |
513 | RETVAL |
2c674647 |
514 | |
67e989fb |
515 | I32 |
2c674647 |
516 | _utf8_to_bytes(sv, ...) |
85982a32 |
517 | SV * sv |
518 | CODE: |
519 | { |
520 | SV * to = items > 1 ? ST(1) : Nullsv; |
521 | SV * check = items > 2 ? ST(2) : Nullsv; |
522 | |
523 | if (to) { |
524 | RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); |
525 | } else { |
526 | STRLEN len; |
527 | U8 *s = (U8*)SvPV(sv, len); |
528 | |
529 | RETVAL = 0; |
530 | if (SvTRUE(check)) { |
531 | /* Must do things the slow way */ |
532 | U8 *dest; |
533 | /* We need a copy to pass to check() */ |
c6a7db43 |
534 | U8 *src = (U8*)savepv((char *)s); |
85982a32 |
535 | U8 *send = s + len; |
536 | |
537 | New(83, dest, len, U8); /* I think */ |
538 | |
539 | while (s < send) { |
540 | if (*s < 0x80){ |
541 | *dest++ = *s++; |
542 | } else { |
543 | STRLEN ulen; |
544 | UV uv = *s++; |
545 | |
546 | /* Have to do it all ourselves because of error routine, |
547 | aargh. */ |
548 | if (!(uv & 0x40)){ goto failure; } |
549 | if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } |
550 | else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } |
551 | else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } |
552 | else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } |
553 | else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } |
554 | else if (!(uv & 0x01)) { ulen = 7; uv = 0; } |
555 | else { ulen = 13; uv = 0; } |
87714904 |
556 | |
85982a32 |
557 | /* Note change to utf8.c variable naming, for variety */ |
558 | while (ulen--) { |
c6a7db43 |
559 | if ((*s & 0xc0) != 0x80){ |
560 | goto failure; |
85982a32 |
561 | } else { |
562 | uv = (uv << 6) | (*s++ & 0x3f); |
563 | } |
87714904 |
564 | } |
67e989fb |
565 | if (uv > 256) { |
566 | failure: |
85982a32 |
567 | call_failure(check, s, dest, src); |
568 | /* Now what happens? */ |
67e989fb |
569 | } |
570 | *dest++ = (U8)uv; |
85982a32 |
571 | } |
572 | } |
573 | } else { |
574 | RETVAL = (utf8_to_bytes(s, &len) ? len : 0); |
2c674647 |
575 | } |
85982a32 |
576 | } |
577 | } |
578 | OUTPUT: |
579 | RETVAL |
2c674647 |
580 | |
2c674647 |
581 | bool |
b2704119 |
582 | is_utf8(sv, check = 0) |
4411f3b6 |
583 | SV * sv |
b2704119 |
584 | int check |
85982a32 |
585 | CODE: |
586 | { |
587 | if (SvGMAGICAL(sv)) /* it could be $1, for example */ |
588 | sv = newSVsv(sv); /* GMAGIG will be done */ |
589 | if (SvPOK(sv)) { |
590 | RETVAL = SvUTF8(sv) ? TRUE : FALSE; |
591 | if (RETVAL && |
592 | check && |
593 | !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) |
2c674647 |
594 | RETVAL = FALSE; |
85982a32 |
595 | } else { |
596 | RETVAL = FALSE; |
597 | } |
598 | if (sv != ST(0)) |
599 | SvREFCNT_dec(sv); /* it was a temp copy */ |
600 | } |
601 | OUTPUT: |
602 | RETVAL |
2c674647 |
603 | |
604 | SV * |
4411f3b6 |
605 | _utf8_on(sv) |
85982a32 |
606 | SV * sv |
607 | CODE: |
608 | { |
609 | if (SvPOK(sv)) { |
610 | SV *rsv = newSViv(SvUTF8(sv)); |
611 | RETVAL = rsv; |
612 | SvUTF8_on(sv); |
613 | } else { |
614 | RETVAL = &PL_sv_undef; |
615 | } |
616 | } |
617 | OUTPUT: |
618 | RETVAL |
2c674647 |
619 | |
620 | SV * |
4411f3b6 |
621 | _utf8_off(sv) |
85982a32 |
622 | SV * sv |
623 | CODE: |
624 | { |
625 | if (SvPOK(sv)) { |
626 | SV *rsv = newSViv(SvUTF8(sv)); |
627 | RETVAL = rsv; |
628 | SvUTF8_off(sv); |
629 | } else { |
630 | RETVAL = &PL_sv_undef; |
631 | } |
632 | } |
633 | OUTPUT: |
634 | RETVAL |
635 | |
85982a32 |
636 | int |
637 | DIE_ON_ERR() |
638 | CODE: |
639 | RETVAL = ENCODE_DIE_ON_ERR; |
640 | OUTPUT: |
641 | RETVAL |
642 | |
c6a7db43 |
643 | int |
85982a32 |
644 | WARN_ON_ERR() |
645 | CODE: |
646 | RETVAL = ENCODE_WARN_ON_ERR; |
647 | OUTPUT: |
648 | RETVAL |
649 | |
650 | int |
651 | LEAVE_SRC() |
652 | CODE: |
653 | RETVAL = ENCODE_LEAVE_SRC; |
654 | OUTPUT: |
655 | RETVAL |
656 | |
657 | int |
658 | RETURN_ON_ERR() |
659 | CODE: |
660 | RETVAL = ENCODE_RETURN_ON_ERR; |
661 | OUTPUT: |
662 | RETVAL |
663 | |
664 | int |
665 | PERLQQ() |
666 | CODE: |
667 | RETVAL = ENCODE_PERLQQ; |
668 | OUTPUT: |
669 | RETVAL |
670 | |
671 | int |
af1f55d9 |
672 | HTMLCREF() |
673 | CODE: |
674 | RETVAL = ENCODE_HTMLCREF; |
675 | OUTPUT: |
676 | RETVAL |
677 | |
678 | int |
679 | XMLCREF() |
680 | CODE: |
681 | RETVAL = ENCODE_XMLCREF; |
682 | OUTPUT: |
683 | RETVAL |
684 | |
685 | int |
85982a32 |
686 | FB_DEFAULT() |
687 | CODE: |
688 | RETVAL = ENCODE_FB_DEFAULT; |
689 | OUTPUT: |
690 | RETVAL |
691 | |
692 | int |
693 | FB_CROAK() |
694 | CODE: |
695 | RETVAL = ENCODE_FB_CROAK; |
696 | OUTPUT: |
697 | RETVAL |
698 | |
699 | int |
700 | FB_QUIET() |
701 | CODE: |
702 | RETVAL = ENCODE_FB_QUIET; |
703 | OUTPUT: |
704 | RETVAL |
705 | |
706 | int |
707 | FB_WARN() |
708 | CODE: |
709 | RETVAL = ENCODE_FB_WARN; |
710 | OUTPUT: |
711 | RETVAL |
712 | |
713 | int |
714 | FB_PERLQQ() |
715 | CODE: |
716 | RETVAL = ENCODE_FB_PERLQQ; |
717 | OUTPUT: |
718 | RETVAL |
2c674647 |
719 | |
af1f55d9 |
720 | int |
721 | FB_HTMLCREF() |
722 | CODE: |
723 | RETVAL = ENCODE_FB_HTMLCREF; |
724 | OUTPUT: |
725 | RETVAL |
726 | |
727 | int |
728 | FB_XMLCREF() |
729 | CODE: |
730 | RETVAL = ENCODE_FB_XMLCREF; |
731 | OUTPUT: |
732 | RETVAL |
733 | |
33af2bc7 |
734 | BOOT: |
735 | { |
85982a32 |
736 | #include "def_t.h" |
e7cbefb8 |
737 | #include "def_t.exh" |
33af2bc7 |
738 | } |