Commit | Line | Data |
a0ed51b3 |
1 | /* utf8.c |
2 | * |
3 | * Copyright (c) 1998, Larry Wall |
4 | * |
5 | * You may distribute under the terms of either the GNU General Public |
6 | * License or the Artistic License, as specified in the README file. |
7 | * |
8 | */ |
9 | |
10 | /* |
11 | * 'What a fix!' said Sam. 'That's the one place in all the lands we've ever |
12 | * heard of that we don't want to see any closer; and that's the one place |
13 | * we're trying to get to! And that's just where we can't get, nohow.' |
14 | * |
15 | * 'Well do I understand your speech,' he answered in the same language; |
16 | * 'yet few strangers do so. Why then do you not speak in the Common Tongue, |
17 | * as is the custom in the West, if you wish to be answered?' |
18 | * |
19 | * ...the travellers perceived that the floor was paved with stones of many |
20 | * hues; branching runes and strange devices intertwined beneath their feet. |
21 | */ |
22 | |
23 | #include "EXTERN.h" |
24 | #include "perl.h" |
25 | |
26 | /* Unicode support */ |
27 | |
dfe13c55 |
28 | U8 * |
29 | uv_to_utf8(U8 *d, UV uv) |
a0ed51b3 |
30 | { |
31 | if (uv < 0x80) { |
32 | *d++ = uv; |
33 | return d; |
34 | } |
35 | if (uv < 0x800) { |
36 | *d++ = (( uv >> 6) | 0xc0); |
37 | *d++ = (( uv & 0x3f) | 0x80); |
38 | return d; |
39 | } |
40 | if (uv < 0x10000) { |
41 | *d++ = (( uv >> 12) | 0xe0); |
42 | *d++ = (((uv >> 6) & 0x3f) | 0x80); |
43 | *d++ = (( uv & 0x3f) | 0x80); |
44 | return d; |
45 | } |
46 | if (uv < 0x200000) { |
47 | *d++ = (( uv >> 18) | 0xf0); |
48 | *d++ = (((uv >> 12) & 0x3f) | 0x80); |
49 | *d++ = (((uv >> 6) & 0x3f) | 0x80); |
50 | *d++ = (( uv & 0x3f) | 0x80); |
51 | return d; |
52 | } |
53 | if (uv < 0x4000000) { |
54 | *d++ = (( uv >> 24) | 0xf8); |
55 | *d++ = (((uv >> 18) & 0x3f) | 0x80); |
56 | *d++ = (((uv >> 12) & 0x3f) | 0x80); |
57 | *d++ = (((uv >> 6) & 0x3f) | 0x80); |
58 | *d++ = (( uv & 0x3f) | 0x80); |
59 | return d; |
60 | } |
61 | if (uv < 0x80000000) { |
62 | *d++ = (( uv >> 30) | 0xfc); |
63 | *d++ = (((uv >> 24) & 0x3f) | 0x80); |
64 | *d++ = (((uv >> 18) & 0x3f) | 0x80); |
65 | *d++ = (((uv >> 12) & 0x3f) | 0x80); |
66 | *d++ = (((uv >> 6) & 0x3f) | 0x80); |
67 | *d++ = (( uv & 0x3f) | 0x80); |
68 | return d; |
69 | } |
70 | #ifdef Quad_t |
71 | if (uv < 0x2000000000) |
72 | #endif |
73 | { |
74 | *d++ = 0xfe; /* Can't match U+FEFF! */ |
75 | *d++ = (((uv >> 30) & 0x3f) | 0x80); |
76 | *d++ = (((uv >> 24) & 0x3f) | 0x80); |
77 | *d++ = (((uv >> 18) & 0x3f) | 0x80); |
78 | *d++ = (((uv >> 12) & 0x3f) | 0x80); |
79 | *d++ = (((uv >> 6) & 0x3f) | 0x80); |
80 | *d++ = (( uv & 0x3f) | 0x80); |
81 | return d; |
82 | } |
83 | #ifdef Quad_t |
84 | { |
85 | *d++ = 0xff; /* Can't match U+FFFE! */ |
86 | *d++ = (((uv >> 36) & 0x3f) | 0x80); |
87 | *d++ = (((uv >> 30) & 0x3f) | 0x80); |
88 | *d++ = (((uv >> 24) & 0x3f) | 0x80); |
89 | *d++ = (((uv >> 18) & 0x3f) | 0x80); |
90 | *d++ = (((uv >> 12) & 0x3f) | 0x80); |
91 | *d++ = (((uv >> 6) & 0x3f) | 0x80); |
92 | *d++ = (( uv & 0x3f) | 0x80); |
93 | return d; |
94 | } |
95 | #endif |
96 | } |
97 | |
98 | UV |
dfe13c55 |
99 | utf8_to_uv(U8* s, I32* retlen) |
a0ed51b3 |
100 | { |
101 | UV uv = *s; |
102 | int len; |
103 | if (!(uv & 0x80)) { |
104 | if (retlen) |
105 | *retlen = 1; |
106 | return *s; |
107 | } |
108 | if (!(uv & 0x40)) { |
109 | warn("Malformed UTF-8 character"); |
110 | if (retlen) |
111 | *retlen = 1; |
112 | return *s; |
113 | } |
114 | |
115 | if (!(uv & 0x20)) { len = 2; uv &= 0x1f; } |
116 | else if (!(uv & 0x10)) { len = 3; uv &= 0x0f; } |
117 | else if (!(uv & 0x08)) { len = 4; uv &= 0x07; } |
118 | else if (!(uv & 0x04)) { len = 5; uv &= 0x03; } |
119 | else if (!(uv & 0x02)) { len = 6; uv &= 0x01; } |
120 | else if (!(uv & 0x01)) { len = 7; uv &= 0x00; } |
121 | else len = 8; /* whoa! */ |
122 | |
123 | if (retlen) |
124 | *retlen = len; |
125 | --len; |
126 | s++; |
127 | while (len--) { |
128 | if ((*s & 0xc0) != 0x80) { |
129 | warn("Malformed UTF-8 character"); |
130 | if (retlen) |
131 | *retlen -= len + 1; |
132 | return 0xfffd; |
133 | } |
134 | else |
135 | uv = (uv << 6) | (*s++ & 0x3f); |
136 | } |
137 | return uv; |
138 | } |
139 | |
140 | /* utf8_distance(a,b) is intended to be a - b in pointer arithmetic */ |
141 | |
142 | I32 |
dfe13c55 |
143 | utf8_distance(U8 *a, U8 *b) |
a0ed51b3 |
144 | { |
145 | I32 off = 0; |
146 | if (a < b) { |
147 | while (a < b) { |
148 | a += UTF8SKIP(a); |
149 | off--; |
150 | } |
151 | } |
152 | else { |
153 | while (b < a) { |
154 | b += UTF8SKIP(b); |
155 | off++; |
156 | } |
157 | } |
158 | return off; |
159 | } |
160 | |
161 | /* WARNING: do not use the following unless you *know* off is within bounds */ |
162 | |
163 | U8 * |
dfe13c55 |
164 | utf8_hop(U8 *s, I32 off) |
a0ed51b3 |
165 | { |
166 | if (off >= 0) { |
167 | while (off--) |
168 | s += UTF8SKIP(s); |
169 | } |
170 | else { |
171 | while (off++) { |
172 | s--; |
173 | if (*s & 0x80) { |
174 | while ((*s & 0xc0) == 0x80) |
175 | s--; |
176 | } |
177 | } |
178 | } |
179 | return s; |
180 | } |
181 | |
182 | /* XXX NOTHING CALLS THE FOLLOWING TWO ROUTINES YET!!! */ |
183 | /* |
184 | * Convert native or reversed UTF-16 to UTF-8. |
185 | * |
186 | * Destination must be pre-extended to 3/2 source. Do not use in-place. |
187 | * We optimize for native, for obvious reasons. */ |
188 | |
189 | U8* |
190 | utf16_to_utf8(U16* p, U8* d, I32 bytelen) |
191 | { |
192 | U16* pend = p + bytelen / 2; |
193 | while (p < pend) { |
194 | UV uv = *p++; |
195 | if (uv < 0x80) { |
196 | *d++ = uv; |
197 | continue; |
198 | } |
199 | if (uv < 0x800) { |
200 | *d++ = (( uv >> 6) | 0xc0); |
201 | *d++ = (( uv & 0x3f) | 0x80); |
202 | continue; |
203 | } |
204 | if (uv >= 0xd800 && uv < 0xdbff) { /* surrogates */ |
205 | int low = *p++; |
206 | if (low < 0xdc00 || low >= 0xdfff) { |
207 | warn("Malformed UTF-16 surrogate"); |
208 | p--; |
209 | uv = 0xfffd; |
210 | } |
211 | uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000; |
212 | } |
213 | if (uv < 0x10000) { |
214 | *d++ = (( uv >> 12) | 0xe0); |
215 | *d++ = (((uv >> 6) & 0x3f) | 0x80); |
216 | *d++ = (( uv & 0x3f) | 0x80); |
217 | continue; |
218 | } |
219 | else { |
220 | *d++ = (( uv >> 18) | 0xf0); |
221 | *d++ = (((uv >> 12) & 0x3f) | 0x80); |
222 | *d++ = (((uv >> 6) & 0x3f) | 0x80); |
223 | *d++ = (( uv & 0x3f) | 0x80); |
224 | continue; |
225 | } |
226 | } |
227 | return d; |
228 | } |
229 | |
230 | /* Note: this one is slightly destructive of the source. */ |
231 | |
232 | U8* |
233 | utf16_to_utf8_reversed(U16* p, U8* d, I32 bytelen) |
234 | { |
235 | U8* s = (U8*)p; |
236 | U8* send = s + bytelen; |
237 | while (s < send) { |
238 | U8 tmp = s[0]; |
239 | s[0] = s[1]; |
240 | s[1] = tmp; |
241 | s += 2; |
242 | } |
243 | return utf16_to_utf8(p, d, bytelen); |
244 | } |
245 | |
246 | /* for now these are all defined (inefficiently) in terms of the utf8 versions */ |
247 | |
248 | bool |
249 | is_uni_alnum(U32 c) |
250 | { |
dfe13c55 |
251 | U8 tmpbuf[10]; |
a0ed51b3 |
252 | uv_to_utf8(tmpbuf, (UV)c); |
253 | return is_utf8_alnum(tmpbuf); |
254 | } |
255 | |
256 | bool |
257 | is_uni_idfirst(U32 c) |
258 | { |
dfe13c55 |
259 | U8 tmpbuf[10]; |
a0ed51b3 |
260 | uv_to_utf8(tmpbuf, (UV)c); |
261 | return is_utf8_idfirst(tmpbuf); |
262 | } |
263 | |
264 | bool |
265 | is_uni_alpha(U32 c) |
266 | { |
dfe13c55 |
267 | U8 tmpbuf[10]; |
a0ed51b3 |
268 | uv_to_utf8(tmpbuf, (UV)c); |
269 | return is_utf8_alpha(tmpbuf); |
270 | } |
271 | |
272 | bool |
273 | is_uni_space(U32 c) |
274 | { |
dfe13c55 |
275 | U8 tmpbuf[10]; |
a0ed51b3 |
276 | uv_to_utf8(tmpbuf, (UV)c); |
277 | return is_utf8_space(tmpbuf); |
278 | } |
279 | |
280 | bool |
281 | is_uni_digit(U32 c) |
282 | { |
dfe13c55 |
283 | U8 tmpbuf[10]; |
a0ed51b3 |
284 | uv_to_utf8(tmpbuf, (UV)c); |
285 | return is_utf8_digit(tmpbuf); |
286 | } |
287 | |
288 | bool |
289 | is_uni_upper(U32 c) |
290 | { |
dfe13c55 |
291 | U8 tmpbuf[10]; |
a0ed51b3 |
292 | uv_to_utf8(tmpbuf, (UV)c); |
293 | return is_utf8_upper(tmpbuf); |
294 | } |
295 | |
296 | bool |
297 | is_uni_lower(U32 c) |
298 | { |
dfe13c55 |
299 | U8 tmpbuf[10]; |
a0ed51b3 |
300 | uv_to_utf8(tmpbuf, (UV)c); |
301 | return is_utf8_lower(tmpbuf); |
302 | } |
303 | |
304 | bool |
305 | is_uni_print(U32 c) |
306 | { |
dfe13c55 |
307 | U8 tmpbuf[10]; |
a0ed51b3 |
308 | uv_to_utf8(tmpbuf, (UV)c); |
309 | return is_utf8_print(tmpbuf); |
310 | } |
311 | |
312 | U32 |
313 | to_uni_upper(U32 c) |
314 | { |
dfe13c55 |
315 | U8 tmpbuf[10]; |
a0ed51b3 |
316 | uv_to_utf8(tmpbuf, (UV)c); |
317 | return to_utf8_upper(tmpbuf); |
318 | } |
319 | |
320 | U32 |
321 | to_uni_title(U32 c) |
322 | { |
dfe13c55 |
323 | U8 tmpbuf[10]; |
a0ed51b3 |
324 | uv_to_utf8(tmpbuf, (UV)c); |
325 | return to_utf8_title(tmpbuf); |
326 | } |
327 | |
328 | U32 |
329 | to_uni_lower(U32 c) |
330 | { |
dfe13c55 |
331 | U8 tmpbuf[10]; |
a0ed51b3 |
332 | uv_to_utf8(tmpbuf, (UV)c); |
333 | return to_utf8_lower(tmpbuf); |
334 | } |
335 | |
336 | /* for now these all assume no locale info available for Unicode > 255 */ |
337 | |
338 | bool |
339 | is_uni_alnum_lc(U32 c) |
340 | { |
341 | return is_uni_alnum(c); /* XXX no locale support yet */ |
342 | } |
343 | |
344 | bool |
345 | is_uni_idfirst_lc(U32 c) |
346 | { |
347 | return is_uni_idfirst(c); /* XXX no locale support yet */ |
348 | } |
349 | |
350 | bool |
351 | is_uni_alpha_lc(U32 c) |
352 | { |
353 | return is_uni_alpha(c); /* XXX no locale support yet */ |
354 | } |
355 | |
356 | bool |
357 | is_uni_space_lc(U32 c) |
358 | { |
359 | return is_uni_space(c); /* XXX no locale support yet */ |
360 | } |
361 | |
362 | bool |
363 | is_uni_digit_lc(U32 c) |
364 | { |
365 | return is_uni_digit(c); /* XXX no locale support yet */ |
366 | } |
367 | |
368 | bool |
369 | is_uni_upper_lc(U32 c) |
370 | { |
371 | return is_uni_upper(c); /* XXX no locale support yet */ |
372 | } |
373 | |
374 | bool |
375 | is_uni_lower_lc(U32 c) |
376 | { |
377 | return is_uni_lower(c); /* XXX no locale support yet */ |
378 | } |
379 | |
380 | bool |
381 | is_uni_print_lc(U32 c) |
382 | { |
383 | return is_uni_print(c); /* XXX no locale support yet */ |
384 | } |
385 | |
386 | U32 |
387 | to_uni_upper_lc(U32 c) |
388 | { |
389 | return to_uni_upper(c); /* XXX no locale support yet */ |
390 | } |
391 | |
392 | U32 |
393 | to_uni_title_lc(U32 c) |
394 | { |
395 | return to_uni_title(c); /* XXX no locale support yet */ |
396 | } |
397 | |
398 | U32 |
399 | to_uni_lower_lc(U32 c) |
400 | { |
401 | return to_uni_lower(c); /* XXX no locale support yet */ |
402 | } |
403 | |
404 | |
405 | bool |
dfe13c55 |
406 | is_utf8_alnum(U8 *p) |
a0ed51b3 |
407 | { |
408 | if (!PL_utf8_alnum) |
e24b16f9 |
409 | PL_utf8_alnum = swash_init("utf8", "IsAlnum", &PL_sv_undef, 0, 0); |
a0ed51b3 |
410 | return swash_fetch(PL_utf8_alnum, p); |
411 | /* return *p == '_' || is_utf8_alpha(p) || is_utf8_digit(p); */ |
412 | #ifdef SURPRISINGLY_SLOWER /* probably because alpha is usually true */ |
413 | if (!PL_utf8_alnum) |
414 | PL_utf8_alnum = swash_init("utf8", "", |
415 | sv_2mortal(newSVpv("+utf8::IsAlpha\n+utf8::IsDigit\n005F\n",0)), 0, 0); |
416 | return swash_fetch(PL_utf8_alnum, p); |
417 | #endif |
418 | } |
419 | |
420 | bool |
dfe13c55 |
421 | is_utf8_idfirst(U8 *p) |
a0ed51b3 |
422 | { |
423 | return *p == '_' || is_utf8_alpha(p); |
424 | } |
425 | |
426 | bool |
dfe13c55 |
427 | is_utf8_alpha(U8 *p) |
a0ed51b3 |
428 | { |
429 | if (!PL_utf8_alpha) |
e24b16f9 |
430 | PL_utf8_alpha = swash_init("utf8", "IsAlpha", &PL_sv_undef, 0, 0); |
a0ed51b3 |
431 | return swash_fetch(PL_utf8_alpha, p); |
432 | } |
433 | |
434 | bool |
dfe13c55 |
435 | is_utf8_space(U8 *p) |
a0ed51b3 |
436 | { |
437 | if (!PL_utf8_space) |
e24b16f9 |
438 | PL_utf8_space = swash_init("utf8", "IsSpace", &PL_sv_undef, 0, 0); |
a0ed51b3 |
439 | return swash_fetch(PL_utf8_space, p); |
440 | } |
441 | |
442 | bool |
dfe13c55 |
443 | is_utf8_digit(U8 *p) |
a0ed51b3 |
444 | { |
445 | if (!PL_utf8_digit) |
e24b16f9 |
446 | PL_utf8_digit = swash_init("utf8", "IsDigit", &PL_sv_undef, 0, 0); |
a0ed51b3 |
447 | return swash_fetch(PL_utf8_digit, p); |
448 | } |
449 | |
450 | bool |
dfe13c55 |
451 | is_utf8_upper(U8 *p) |
a0ed51b3 |
452 | { |
453 | if (!PL_utf8_upper) |
e24b16f9 |
454 | PL_utf8_upper = swash_init("utf8", "IsUpper", &PL_sv_undef, 0, 0); |
a0ed51b3 |
455 | return swash_fetch(PL_utf8_upper, p); |
456 | } |
457 | |
458 | bool |
dfe13c55 |
459 | is_utf8_lower(U8 *p) |
a0ed51b3 |
460 | { |
461 | if (!PL_utf8_lower) |
e24b16f9 |
462 | PL_utf8_lower = swash_init("utf8", "IsLower", &PL_sv_undef, 0, 0); |
a0ed51b3 |
463 | return swash_fetch(PL_utf8_lower, p); |
464 | } |
465 | |
466 | bool |
dfe13c55 |
467 | is_utf8_print(U8 *p) |
a0ed51b3 |
468 | { |
469 | if (!PL_utf8_print) |
e24b16f9 |
470 | PL_utf8_print = swash_init("utf8", "IsPrint", &PL_sv_undef, 0, 0); |
a0ed51b3 |
471 | return swash_fetch(PL_utf8_print, p); |
472 | } |
473 | |
474 | bool |
dfe13c55 |
475 | is_utf8_mark(U8 *p) |
a0ed51b3 |
476 | { |
477 | if (!PL_utf8_mark) |
e24b16f9 |
478 | PL_utf8_mark = swash_init("utf8", "IsM", &PL_sv_undef, 0, 0); |
a0ed51b3 |
479 | return swash_fetch(PL_utf8_mark, p); |
480 | } |
481 | |
2104c8d9 |
482 | UV |
dfe13c55 |
483 | to_utf8_upper(U8 *p) |
a0ed51b3 |
484 | { |
485 | UV uv; |
486 | |
487 | if (!PL_utf8_toupper) |
e24b16f9 |
488 | PL_utf8_toupper = swash_init("utf8", "ToUpper", &PL_sv_undef, 4, 0); |
a0ed51b3 |
489 | uv = swash_fetch(PL_utf8_toupper, p); |
490 | return uv ? uv : utf8_to_uv(p,0); |
491 | } |
492 | |
2104c8d9 |
493 | UV |
dfe13c55 |
494 | to_utf8_title(U8 *p) |
a0ed51b3 |
495 | { |
496 | UV uv; |
497 | |
498 | if (!PL_utf8_totitle) |
e24b16f9 |
499 | PL_utf8_totitle = swash_init("utf8", "ToTitle", &PL_sv_undef, 4, 0); |
a0ed51b3 |
500 | uv = swash_fetch(PL_utf8_totitle, p); |
501 | return uv ? uv : utf8_to_uv(p,0); |
502 | } |
503 | |
2104c8d9 |
504 | UV |
dfe13c55 |
505 | to_utf8_lower(U8 *p) |
a0ed51b3 |
506 | { |
507 | UV uv; |
508 | |
509 | if (!PL_utf8_tolower) |
e24b16f9 |
510 | PL_utf8_tolower = swash_init("utf8", "ToLower", &PL_sv_undef, 4, 0); |
a0ed51b3 |
511 | uv = swash_fetch(PL_utf8_tolower, p); |
512 | return uv ? uv : utf8_to_uv(p,0); |
513 | } |
514 | |
515 | /* a "swash" is a swatch hash */ |
516 | |
517 | SV* |
518 | swash_init(char* pkg, char* name, SV *listsv, I32 minbits, I32 none) |
519 | { |
520 | SV* retval; |
521 | char tmpbuf[256]; |
522 | dSP; |
523 | PUSHSTACKi(PERLSI_MAGIC); |
524 | PUSHMARK(SP); |
525 | EXTEND(SP,5); |
526 | PUSHs(sv_2mortal(newSVpvn(pkg, strlen(pkg)))); |
527 | PUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); |
528 | PUSHs(listsv); |
529 | PUSHs(sv_2mortal(newSViv(minbits))); |
530 | PUSHs(sv_2mortal(newSViv(none))); |
531 | PUTBACK; |
532 | ENTER; |
533 | SAVEI32(PL_hints); |
534 | PL_hints = 0; |
535 | save_re_context(); |
e24b16f9 |
536 | if (PL_curcop == &PL_compiling) /* XXX ought to be handled by lex_start */ |
a0ed51b3 |
537 | strncpy(tmpbuf, PL_tokenbuf, sizeof tmpbuf); |
538 | if (perl_call_method("SWASHNEW", G_SCALAR)) |
e24b16f9 |
539 | retval = newSVsv(*PL_stack_sp--); |
a0ed51b3 |
540 | else |
e24b16f9 |
541 | retval = &PL_sv_undef; |
a0ed51b3 |
542 | LEAVE; |
543 | POPSTACK; |
e24b16f9 |
544 | if (PL_curcop == &PL_compiling) { |
a0ed51b3 |
545 | strncpy(PL_tokenbuf, tmpbuf, sizeof tmpbuf); |
e24b16f9 |
546 | PL_curcop->op_private = PL_hints; |
a0ed51b3 |
547 | } |
548 | if (!SvROK(retval) || SvTYPE(SvRV(retval)) != SVt_PVHV) |
549 | croak("SWASHNEW didn't return an HV ref"); |
550 | return retval; |
551 | } |
552 | |
553 | UV |
dfe13c55 |
554 | swash_fetch(SV *sv, U8 *ptr) |
a0ed51b3 |
555 | { |
556 | HV* hv = (HV*)SvRV(sv); |
557 | U32 klen = UTF8SKIP(ptr) - 1; |
558 | U32 off = ptr[klen] & 127; /* NB: 64 bit always 0 when len > 1 */ |
559 | STRLEN slen; |
560 | STRLEN needents = (klen ? 64 : 128); |
dfe13c55 |
561 | U8 *tmps; |
a0ed51b3 |
562 | U32 bit; |
563 | SV *retval; |
564 | |
565 | /* |
566 | * This single-entry cache saves about 1/3 of the utf8 overhead in test |
567 | * suite. (That is, only 7-8% overall over just a hash cache. Still, |
568 | * it's nothing to sniff at.) Pity we usually come through at least |
569 | * two function calls to get here... |
570 | * |
571 | * NB: this code assumes that swatches are never modified, once generated! |
572 | */ |
573 | |
574 | if (hv == PL_last_swash_hv && |
575 | klen == PL_last_swash_klen && |
576 | (!klen || memEQ(ptr,PL_last_swash_key,klen)) ) |
577 | { |
578 | tmps = PL_last_swash_tmps; |
579 | slen = PL_last_swash_slen; |
580 | } |
581 | else { |
582 | /* Try our second-level swatch cache, kept in a hash. */ |
dfe13c55 |
583 | SV** svp = hv_fetch(hv, (char*)ptr, klen, FALSE); |
a0ed51b3 |
584 | |
585 | /* If not cached, generate it via utf8::SWASHGET */ |
dfe13c55 |
586 | if (!svp || !SvPOK(*svp) || !(tmps = (U8*)SvPV(*svp, slen))) { |
a0ed51b3 |
587 | dSP; |
588 | ENTER; |
589 | SAVETMPS; |
590 | save_re_context(); |
591 | PUSHSTACKi(PERLSI_MAGIC); |
592 | PUSHMARK(SP); |
593 | EXTEND(SP,3); |
594 | PUSHs((SV*)sv); |
595 | PUSHs(sv_2mortal(newSViv(utf8_to_uv(ptr, 0) & ~(needents - 1)))); |
596 | PUSHs(sv_2mortal(newSViv(needents))); |
597 | PUTBACK; |
598 | if (perl_call_method("SWASHGET", G_SCALAR)) |
e24b16f9 |
599 | retval = newSVsv(*PL_stack_sp--); |
a0ed51b3 |
600 | else |
e24b16f9 |
601 | retval = &PL_sv_undef; |
a0ed51b3 |
602 | POPSTACK; |
603 | FREETMPS; |
604 | LEAVE; |
e24b16f9 |
605 | if (PL_curcop == &PL_compiling) |
606 | PL_curcop->op_private = PL_hints; |
a0ed51b3 |
607 | |
dfe13c55 |
608 | svp = hv_store(hv, (char*)ptr, klen, retval, 0); |
a0ed51b3 |
609 | |
dfe13c55 |
610 | if (!svp || !(tmps = (U8*)SvPV(*svp, slen)) || slen < 8) |
a0ed51b3 |
611 | croak("SWASHGET didn't return result of proper length"); |
612 | } |
613 | |
614 | PL_last_swash_hv = hv; |
615 | PL_last_swash_klen = klen; |
616 | PL_last_swash_tmps = tmps; |
617 | PL_last_swash_slen = slen; |
618 | if (klen) |
619 | Copy(ptr, PL_last_swash_key, klen, U8); |
620 | } |
621 | |
622 | switch ((slen << 3) / needents) { |
623 | case 1: |
624 | bit = 1 << (off & 7); |
625 | off >>= 3; |
626 | return (tmps[off] & bit) != 0; |
627 | case 8: |
628 | return tmps[off]; |
629 | case 16: |
630 | off <<= 1; |
631 | return (tmps[off] << 8) + tmps[off + 1] ; |
632 | case 32: |
633 | off <<= 2; |
634 | return (tmps[off] << 24) + (tmps[off+1] << 16) + (tmps[off+2] << 8) + tmps[off + 3] ; |
635 | } |
636 | croak("panic: swash_fetch"); |
637 | return 0; |
638 | } |