Commit | Line | Data |
2c674647 |
1 | #include "EXTERN.h" |
2 | #include "perl.h" |
3 | #include "XSUB.h" |
4 | |
67e989fb |
5 | #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \ |
6 | Perl_croak("panic_unimplemented"); \ |
7 | } |
8 | UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) |
9 | UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) |
10 | |
11 | void call_failure (SV *routine, U8* done, U8* dest, U8* orig); |
12 | |
13 | MODULE = Encode PACKAGE = Encode |
2c674647 |
14 | |
15 | PROTOTYPES: ENABLE |
16 | |
67e989fb |
17 | I32 |
2c674647 |
18 | _bytes_to_utf8(sv, ...) |
67e989fb |
19 | SV * sv |
2c674647 |
20 | CODE: |
67e989fb |
21 | { |
22 | SV * encoding = items == 2 ? ST(1) : Nullsv; |
23 | |
24 | if (encoding) |
25 | RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); |
26 | else { |
27 | STRLEN len; |
28 | U8* s = SvPV(sv, len); |
29 | U8* converted; |
30 | |
31 | converted = bytes_to_utf8(s, &len); /* This allocs */ |
32 | sv_setpvn(sv, converted, len); |
33 | SvUTF8_on(sv); /* XXX Should we? */ |
34 | Safefree(converted); /* ... so free it */ |
35 | RETVAL = len; |
36 | } |
37 | } |
2c674647 |
38 | OUTPUT: |
67e989fb |
39 | RETVAL |
2c674647 |
40 | |
67e989fb |
41 | I32 |
2c674647 |
42 | _utf8_to_bytes(sv, ...) |
67e989fb |
43 | SV * sv |
2c674647 |
44 | CODE: |
67e989fb |
45 | { |
46 | SV * to = items > 1 ? ST(1) : Nullsv; |
47 | SV * check = items > 2 ? ST(2) : Nullsv; |
48 | |
49 | if (to) |
50 | RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); |
51 | else { |
52 | U8 *s; |
53 | STRLEN len; |
54 | s = SvPV(sv, len); |
55 | |
56 | if (SvTRUE(check)) { |
57 | /* Must do things the slow way */ |
58 | U8 *dest; |
59 | U8 *src = savepv(s); /* We need a copy to pass to check() */ |
60 | U8 *send = s + len; |
61 | |
62 | New(83, dest, len, U8); /* I think */ |
63 | |
64 | while (s < send) { |
65 | if (*s < 0x80) |
66 | *dest++ = *s++; |
67 | else { |
68 | I32 ulen; |
69 | I32 byte; |
70 | I32 uv = *s++; |
71 | |
72 | /* Have to do it all ourselves because of error routine, |
73 | aargh. */ |
74 | if (!(uv & 0x40)) |
75 | goto failure; |
76 | if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } |
77 | else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } |
78 | else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } |
79 | else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } |
80 | else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } |
81 | else if (!(uv & 0x01)) { ulen = 7; uv = 0; } |
82 | else { ulen = 13; uv = 0; } |
83 | |
84 | /* Note change to utf8.c variable naming, for variety */ |
85 | while (ulen--) { |
86 | if ((*s & 0xc0) != 0x80) |
87 | goto failure; |
88 | |
89 | else |
90 | uv = (uv << 6) | (*s++ & 0x3f); |
91 | } |
92 | if (uv > 256) { |
93 | failure: |
94 | call_failure(check, s, dest, src); |
95 | /* Now what happens? */ |
96 | } |
97 | *dest++ = (U8)uv; |
98 | } |
99 | } |
100 | } else |
101 | RETVAL = (utf8_to_bytes(s, &len) ? len : 0); |
102 | } |
2c674647 |
103 | } |
104 | OUTPUT: |
105 | RETVAL |
106 | |
107 | SV * |
108 | _chars_to_utf8(sv, from, ...) |
109 | SV * sv |
110 | SV * from |
111 | CODE: |
112 | { |
113 | SV * check = items == 3 ? ST(2) : Nullsv; |
114 | RETVAL = &PL_sv_undef; |
115 | } |
116 | OUTPUT: |
117 | RETVAL |
118 | |
119 | SV * |
120 | _utf8_to_chars(sv, to, ...) |
121 | SV * sv |
122 | SV * to |
123 | CODE: |
124 | { |
125 | SV * check = items == 3 ? ST(2) : Nullsv; |
126 | RETVAL = &PL_sv_undef; |
127 | } |
128 | OUTPUT: |
129 | RETVAL |
130 | |
131 | SV * |
132 | _utf8_to_chars_check(sv, ...) |
133 | SV * sv |
134 | CODE: |
135 | { |
136 | SV * check = items == 2 ? ST(1) : Nullsv; |
137 | RETVAL = &PL_sv_undef; |
138 | } |
139 | OUTPUT: |
140 | RETVAL |
141 | |
142 | SV * |
143 | _bytes_to_chars(sv, from, ...) |
144 | SV * sv |
145 | SV * from |
146 | CODE: |
147 | { |
148 | SV * check = items == 3 ? ST(2) : Nullsv; |
149 | RETVAL = &PL_sv_undef; |
150 | } |
151 | OUTPUT: |
152 | RETVAL |
153 | |
154 | SV * |
155 | _chars_to_bytes(sv, to, ...) |
156 | SV * sv |
157 | SV * to |
158 | CODE: |
159 | { |
160 | SV * check = items == 3 ? ST(2) : Nullsv; |
161 | RETVAL = &PL_sv_undef; |
162 | } |
163 | OUTPUT: |
164 | RETVAL |
165 | |
166 | SV * |
167 | _from_to(sv, from, to, ...) |
168 | SV * sv |
169 | SV * from |
170 | SV * to |
171 | CODE: |
172 | { |
173 | SV * check = items == 4 ? ST(3) : Nullsv; |
174 | RETVAL = &PL_sv_undef; |
175 | } |
176 | OUTPUT: |
177 | RETVAL |
178 | |
179 | bool |
180 | _is_utf8(sv, ...) |
181 | SV * sv |
182 | CODE: |
183 | { |
184 | SV * check = items == 2 ? ST(1) : Nullsv; |
185 | if (SvPOK(sv)) { |
186 | RETVAL = SvUTF8(sv); |
187 | if (RETVAL && |
188 | SvTRUE(check) && |
189 | !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) |
190 | RETVAL = FALSE; |
191 | } else { |
192 | RETVAL = FALSE; |
193 | } |
194 | } |
195 | OUTPUT: |
196 | RETVAL |
197 | |
198 | SV * |
199 | _on_utf8(sv) |
200 | SV * sv |
201 | CODE: |
202 | { |
203 | if (SvPOK(sv)) { |
204 | SV *rsv = newSViv(SvUTF8(sv)); |
205 | sv_2mortal(rsv); |
206 | RETVAL = rsv; |
207 | SvUTF8_on(sv); |
208 | } else { |
209 | RETVAL = &PL_sv_undef; |
210 | } |
211 | } |
212 | OUTPUT: |
213 | RETVAL |
214 | |
215 | SV * |
216 | _off_utf8(sv) |
217 | SV * sv |
218 | CODE: |
219 | { |
220 | if (SvPOK(sv)) { |
221 | SV *rsv = newSViv(SvUTF8(sv)); |
222 | sv_2mortal(rsv); |
223 | RETVAL = rsv; |
224 | SvUTF8_off(sv); |
225 | } else { |
226 | RETVAL = &PL_sv_undef; |
227 | } |
228 | } |
229 | OUTPUT: |
230 | RETVAL |
231 | |
232 | SV * |
233 | _utf_to_utf(sv, from, to, ...) |
234 | SV * sv |
235 | SV * from |
236 | SV * to |
237 | CODE: |
238 | { |
239 | SV * check = items == 4 ? ST(3) : Nullsv; |
240 | RETVAL = &PL_sv_undef; |
241 | } |
242 | OUTPUT: |
243 | RETVAL |
244 | |