Commit | Line | Data |
ac5ea531 |
1 | |
2 | #include "EXTERN.h" |
3 | #include "perl.h" |
4 | #include "XSUB.h" |
5 | |
6 | /* These 5 files are prepared by mkheader */ |
7 | #include "unfcmb.h" |
8 | #include "unfcan.h" |
9 | #include "unfcpt.h" |
10 | #include "unfcmp.h" |
11 | #include "unfexc.h" |
12 | |
13 | /* Perl 5.6.1 ? */ |
14 | #ifndef uvuni_to_utf8 |
15 | #define uvuni_to_utf8 uv_to_utf8 |
6c941e0c |
16 | #endif /* uvuni_to_utf8 */ |
ac5ea531 |
17 | |
18 | /* Perl 5.6.1 ? */ |
ab8fe378 |
19 | #ifndef utf8n_to_uvuni |
20 | #define utf8n_to_uvuni utf8_to_uv |
6c941e0c |
21 | #endif /* utf8n_to_uvuni */ |
ac5ea531 |
22 | |
23 | /* At present, char > 0x10ffff are unaffected without complaint, right? */ |
24 | #define VALID_UTF_MAX (0x10ffff) |
25 | #define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv)) |
26 | |
27 | /* HANGUL_H */ |
28 | #define Hangul_SBase 0xAC00 |
29 | #define Hangul_SFinal 0xD7A3 |
30 | #define Hangul_SCount 11172 |
31 | |
32 | #define Hangul_NCount 588 |
33 | |
34 | #define Hangul_LBase 0x1100 |
35 | #define Hangul_LFinal 0x1112 |
36 | #define Hangul_LCount 19 |
37 | |
38 | #define Hangul_VBase 0x1161 |
39 | #define Hangul_VFinal 0x1175 |
40 | #define Hangul_VCount 21 |
41 | |
42 | #define Hangul_TBase 0x11A7 |
43 | #define Hangul_TFinal 0x11C2 |
44 | #define Hangul_TCount 28 |
45 | |
46 | #define Hangul_IsS(u) ((Hangul_SBase <= (u)) && ((u) <= Hangul_SFinal)) |
2a204b45 |
47 | #define Hangul_IsN(u) (((u) - Hangul_SBase) % Hangul_TCount == 0) |
ac5ea531 |
48 | #define Hangul_IsLV(u) (Hangul_IsS(u) && Hangul_IsN(u)) |
49 | #define Hangul_IsL(u) ((Hangul_LBase <= (u)) && ((u) <= Hangul_LFinal)) |
50 | #define Hangul_IsV(u) ((Hangul_VBase <= (u)) && ((u) <= Hangul_VFinal)) |
51 | #define Hangul_IsT(u) ((Hangul_TBase < (u)) && ((u) <= Hangul_TFinal)) |
52 | /* HANGUL_H */ |
53 | |
54 | /* this is used for canonical ordering of combining characters (c.c.). */ |
55 | typedef struct { |
56 | U8 cc; /* combining class */ |
57 | UV uv; /* codepoint */ |
58 | STRLEN pos; /* position */ |
59 | } UNF_cc; |
60 | |
6c941e0c |
61 | int compare_cc (const void *a, const void *b) |
ac5ea531 |
62 | { |
63 | int ret_cc; |
6c941e0c |
64 | ret_cc = ((UNF_cc*) a)->cc - ((UNF_cc*) b)->cc; |
8f118dcd |
65 | if (ret_cc) |
66 | return ret_cc; |
6c941e0c |
67 | |
68 | return ( ((UNF_cc*) a)->pos > ((UNF_cc*) b)->pos ) |
69 | - ( ((UNF_cc*) a)->pos < ((UNF_cc*) b)->pos ); |
ac5ea531 |
70 | } |
71 | |
72 | U8* dec_canonical (UV uv) |
73 | { |
74 | U8 ***plane, **row; |
8f118dcd |
75 | if (OVER_UTF_MAX(uv)) |
76 | return NULL; |
ac5ea531 |
77 | plane = (U8***)UNF_canon[uv >> 16]; |
8f118dcd |
78 | if (! plane) |
79 | return NULL; |
ac5ea531 |
80 | row = plane[(uv >> 8) & 0xff]; |
81 | return row ? row[uv & 0xff] : NULL; |
82 | } |
83 | |
84 | U8* dec_compat (UV uv) |
85 | { |
86 | U8 ***plane, **row; |
8f118dcd |
87 | if (OVER_UTF_MAX(uv)) |
88 | return NULL; |
ac5ea531 |
89 | plane = (U8***)UNF_compat[uv >> 16]; |
8f118dcd |
90 | if (! plane) |
91 | return NULL; |
ac5ea531 |
92 | row = plane[(uv >> 8) & 0xff]; |
93 | return row ? row[uv & 0xff] : NULL; |
94 | } |
95 | |
2a204b45 |
96 | UV composite_uv (UV uv, UV uv2) |
ac5ea531 |
97 | { |
98 | UNF_complist ***plane, **row, *cell, *i; |
99 | |
8f118dcd |
100 | if (! uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2)) |
101 | return 0; |
ac5ea531 |
102 | |
8f118dcd |
103 | if (Hangul_IsL(uv) && Hangul_IsV(uv2)) { |
ac5ea531 |
104 | uv -= Hangul_LBase; /* lindex */ |
105 | uv2 -= Hangul_VBase; /* vindex */ |
106 | return(Hangul_SBase + (uv * Hangul_VCount + uv2) * Hangul_TCount); |
107 | } |
8f118dcd |
108 | if (Hangul_IsLV(uv) && Hangul_IsT(uv2)) { |
ac5ea531 |
109 | uv2 -= Hangul_TBase; /* tindex */ |
2a204b45 |
110 | return(uv + uv2); |
ac5ea531 |
111 | } |
112 | plane = UNF_compos[uv >> 16]; |
8f118dcd |
113 | if (! plane) |
114 | return 0; |
ac5ea531 |
115 | row = plane[(uv >> 8) & 0xff]; |
8f118dcd |
116 | if (! row) |
117 | return 0; |
ac5ea531 |
118 | cell = row[uv & 0xff]; |
8f118dcd |
119 | if (! cell) |
120 | return 0; |
121 | for (i = cell; i->nextchar; i++) { |
122 | if (uv2 == i->nextchar) |
123 | return i->composite; |
ac5ea531 |
124 | } |
125 | return 0; |
126 | } |
127 | |
128 | U8 getCombinClass (UV uv) |
129 | { |
130 | U8 **plane, *row; |
8f118dcd |
131 | if (OVER_UTF_MAX(uv)) |
132 | return 0; |
ac5ea531 |
133 | plane = (U8**)UNF_combin[uv >> 16]; |
8f118dcd |
134 | if (! plane) |
135 | return 0; |
ac5ea531 |
136 | row = plane[(uv >> 8) & 0xff]; |
137 | return row ? row[uv & 0xff] : 0; |
138 | } |
139 | |
140 | void sv_cat_decompHangul (SV* sv, UV uv) |
141 | { |
142 | UV sindex, lindex, vindex, tindex; |
2a204b45 |
143 | U8 *t, tmp[3 * UTF8_MAXLEN + 1]; |
ac5ea531 |
144 | |
8f118dcd |
145 | if (! Hangul_IsS(uv)) |
146 | return; |
ac5ea531 |
147 | |
148 | sindex = uv - Hangul_SBase; |
149 | lindex = sindex / Hangul_NCount; |
150 | vindex = (sindex % Hangul_NCount) / Hangul_TCount; |
151 | tindex = sindex % Hangul_TCount; |
152 | |
2a204b45 |
153 | t = tmp; |
ac5ea531 |
154 | t = uvuni_to_utf8(t, (lindex + Hangul_LBase)); |
155 | t = uvuni_to_utf8(t, (vindex + Hangul_VBase)); |
8f118dcd |
156 | if (tindex) |
157 | t = uvuni_to_utf8(t, (tindex + Hangul_TBase)); |
ac5ea531 |
158 | *t = '\0'; |
2a204b45 |
159 | sv_catpvn(sv, (char *)tmp, strlen((char *)tmp)); |
ac5ea531 |
160 | } |
161 | |
162 | MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize |
163 | |
ac5ea531 |
164 | SV* |
d85850a7 |
165 | decompose(arg, compat = &PL_sv_no) |
ac5ea531 |
166 | SV * arg |
167 | SV * compat |
d85850a7 |
168 | PROTOTYPE: $;$ |
ac5ea531 |
169 | PREINIT: |
2a204b45 |
170 | UV uv; |
ac5ea531 |
171 | SV *src, *dst; |
73263a9c |
172 | STRLEN srclen, retlen; |
173 | U8 *s, *e, *p, *r; |
ac5ea531 |
174 | bool iscompat; |
175 | CODE: |
8f118dcd |
176 | if (SvUTF8(arg)) { |
ac5ea531 |
177 | src = arg; |
178 | } else { |
179 | src = sv_mortalcopy(arg); |
180 | sv_utf8_upgrade(src); |
181 | } |
ac5ea531 |
182 | iscompat = SvTRUE(compat); |
183 | |
184 | dst = newSV(1); |
185 | (void)SvPOK_only(dst); |
186 | SvUTF8_on(dst); |
187 | |
188 | s = (U8*)SvPV(src,srclen); |
189 | e = s + srclen; |
8f118dcd |
190 | for (p = s; p < e;) { |
ab8fe378 |
191 | uv = utf8n_to_uvuni(p, e - p, &retlen, 0); |
ac5ea531 |
192 | p += retlen; |
8f118dcd |
193 | if (Hangul_IsS(uv)) |
194 | sv_cat_decompHangul(dst, uv); |
ac5ea531 |
195 | else { |
196 | r = iscompat ? dec_compat(uv) : dec_canonical(uv); |
8f118dcd |
197 | if (r) |
198 | sv_catpv(dst, (char *)r); |
199 | else |
200 | sv_catpvn(dst, (char *)p - retlen, retlen); |
ac5ea531 |
201 | } |
202 | } |
203 | RETVAL = dst; |
204 | OUTPUT: |
205 | RETVAL |
206 | |
207 | |
208 | |
209 | SV* |
210 | reorder(arg) |
211 | SV * arg |
212 | PROTOTYPE: $ |
213 | PREINIT: |
8f118dcd |
214 | SV *src, *dst; |
215 | STRLEN srclen, dstlen, retlen, stk_cc_max; |
216 | U8 *s, *e, *p, *d, curCC; |
ac5ea531 |
217 | UV uv; |
218 | UNF_cc * stk_cc; |
219 | CODE: |
8f118dcd |
220 | if (SvUTF8(arg)) { |
221 | src = arg; |
222 | } else { |
223 | src = sv_mortalcopy(arg); |
224 | sv_utf8_upgrade(src); |
225 | } |
226 | |
227 | s = (U8*)SvPV(src, srclen); |
228 | |
229 | dstlen = srclen + 1; |
230 | dst = newSV(dstlen); |
1aab597d |
231 | sv_setpvn(dst,(const char*)s,srclen); |
8f118dcd |
232 | SvUTF8_on(dst); |
ac5ea531 |
233 | |
234 | stk_cc_max = 10; /* enough as an initial value? */ |
235 | New(0, stk_cc, stk_cc_max, UNF_cc); |
236 | |
8f118dcd |
237 | d = (U8*)SvPV(dst,dstlen); |
238 | e = d + dstlen; |
2a204b45 |
239 | |
8f118dcd |
240 | for (p = d; p < e;) { |
ac5ea531 |
241 | U8 *cc_in; |
242 | STRLEN cc_len, cc_iter, cc_pos; |
243 | |
ab8fe378 |
244 | uv = utf8n_to_uvuni(p, e - p, &retlen, 0); |
ac5ea531 |
245 | curCC = getCombinClass(uv); |
2a204b45 |
246 | p += retlen; |
247 | |
8f118dcd |
248 | if (! (curCC && p < e)) |
249 | continue; |
250 | else |
251 | cc_in = p - retlen; |
ac5ea531 |
252 | |
2a204b45 |
253 | cc_pos = 0; |
ac5ea531 |
254 | stk_cc[cc_pos].cc = curCC; |
255 | stk_cc[cc_pos].uv = uv; |
256 | stk_cc[cc_pos].pos = cc_pos; |
257 | |
8f118dcd |
258 | while (p < e) { |
ab8fe378 |
259 | uv = utf8n_to_uvuni(p, e - p, &retlen, 0); |
ac5ea531 |
260 | curCC = getCombinClass(uv); |
8f118dcd |
261 | if (!curCC) |
262 | break; |
ac5ea531 |
263 | p += retlen; |
264 | cc_pos++; |
8f118dcd |
265 | if (stk_cc_max <= cc_pos) { /* extend if need */ |
ac5ea531 |
266 | stk_cc_max = cc_pos + 1; |
267 | Renew(stk_cc, stk_cc_max, UNF_cc); |
268 | } |
269 | stk_cc[cc_pos].cc = curCC; |
270 | stk_cc[cc_pos].uv = uv; |
271 | stk_cc[cc_pos].pos = cc_pos; |
272 | } |
273 | |
274 | /* only one c.c. in cc_len from cc_in, no need of reordering */ |
8f118dcd |
275 | if (!cc_pos) |
276 | continue; |
ac5ea531 |
277 | |
278 | qsort((void*)stk_cc, cc_pos + 1, sizeof(UNF_cc), compare_cc); |
279 | |
280 | cc_len = p - cc_in; |
281 | p = cc_in; |
8f118dcd |
282 | for (cc_iter = 0; cc_iter <= cc_pos; cc_iter++) { |
ac5ea531 |
283 | p = uvuni_to_utf8(p, stk_cc[cc_iter].uv); |
284 | } |
285 | } |
286 | Safefree(stk_cc); |
8f118dcd |
287 | RETVAL = dst; |
ac5ea531 |
288 | OUTPUT: |
289 | RETVAL |
290 | |
291 | |
292 | |
2a204b45 |
293 | SV* |
ac5ea531 |
294 | compose(arg) |
295 | SV * arg |
296 | PROTOTYPE: $ |
297 | PREINIT: |
298 | SV *src, *dst, *tmp; |
299 | U8 *s, *p, *e, *d, *t, *tmp_start, curCC, preCC; |
300 | UV uv, uvS, uvComp; |
2a204b45 |
301 | STRLEN srclen, dstlen, tmplen, retlen; |
ac5ea531 |
302 | bool beginning = TRUE; |
2a204b45 |
303 | CODE: |
8f118dcd |
304 | if (SvUTF8(arg)) { |
ac5ea531 |
305 | src = arg; |
306 | } else { |
307 | src = sv_mortalcopy(arg); |
308 | sv_utf8_upgrade(src); |
309 | } |
2a204b45 |
310 | |
ac5ea531 |
311 | s = (U8*)SvPV(src, srclen); |
312 | e = s + srclen; |
d85850a7 |
313 | dstlen = srclen + 1; |
2a204b45 |
314 | dst = newSV(dstlen); |
ac5ea531 |
315 | (void)SvPOK_only(dst); |
316 | SvUTF8_on(dst); |
317 | d = (U8*)SvPVX(dst); |
318 | |
319 | /* for uncomposed combining char */ |
320 | tmp = sv_2mortal(newSV(dstlen)); |
321 | (void)SvPOK_only(tmp); |
322 | SvUTF8_on(tmp); |
323 | |
8f118dcd |
324 | for (p = s; p < e;) { |
325 | if (beginning) { |
ab8fe378 |
326 | uvS = utf8n_to_uvuni(p, e - p, &retlen, 0); |
ac5ea531 |
327 | p += retlen; |
328 | |
8f118dcd |
329 | if (getCombinClass(uvS)) { /* no Starter found yet */ |
ac5ea531 |
330 | d = uvuni_to_utf8(d, uvS); |
331 | continue; |
332 | } |
333 | beginning = FALSE; |
334 | } |
335 | |
336 | /* Starter */ |
337 | t = tmp_start = (U8*)SvPVX(tmp); |
338 | preCC = 0; |
339 | |
340 | /* to the next Starter */ |
8f118dcd |
341 | while (p < e) { |
ab8fe378 |
342 | uv = utf8n_to_uvuni(p, e - p, &retlen, 0); |
ac5ea531 |
343 | p += retlen; |
344 | curCC = getCombinClass(uv); |
345 | |
8f118dcd |
346 | if (preCC && preCC == curCC) { |
ac5ea531 |
347 | preCC = curCC; |
348 | t = uvuni_to_utf8(t, uv); |
349 | } else { |
2a204b45 |
350 | uvComp = composite_uv(uvS, uv); |
ac5ea531 |
351 | |
8f118dcd |
352 | if (uvComp && ! isExclusion(uvComp) && preCC <= curCC) { |
d85850a7 |
353 | STRLEN leftcur, rightcur, dstcur; |
354 | leftcur = UNISKIP(uvComp); |
355 | rightcur = UNISKIP(uvS) + UNISKIP(uv); |
356 | |
357 | if (leftcur > rightcur) { |
358 | dstcur = d - (U8*)SvPVX(dst); |
359 | dstlen += leftcur - rightcur; |
360 | d = (U8*)SvGROW(dst,dstlen) + dstcur; |
361 | } |
ac5ea531 |
362 | /* preCC not changed to curCC */ |
363 | uvS = uvComp; |
364 | } else if (! curCC && p < e) { /* blocked */ |
365 | break; |
366 | } else { |
367 | preCC = curCC; |
368 | t = uvuni_to_utf8(t, uv); |
369 | } |
370 | } |
371 | } |
2a204b45 |
372 | d = uvuni_to_utf8(d, uvS); /* starter (composed or not) */ |
d85850a7 |
373 | tmplen = t - tmp_start; |
374 | if (tmplen) { /* uncomposed combining char */ |
ac5ea531 |
375 | t = (U8*)SvPVX(tmp); |
8f118dcd |
376 | while (tmplen--) |
377 | *d++ = *t++; |
ac5ea531 |
378 | } |
379 | uvS = uv; |
380 | } /* for */ |
d85850a7 |
381 | SvCUR_set(dst, d - (U8*)SvPVX(dst)); |
2a204b45 |
382 | RETVAL = dst; |
383 | OUTPUT: |
384 | RETVAL |
ac5ea531 |
385 | |
386 | |
387 | |
8f118dcd |
388 | void |
389 | checkNFD(arg) |
390 | SV * arg |
391 | PROTOTYPE: $ |
392 | ALIAS: |
393 | checkNFKD = 1 |
394 | PREINIT: |
395 | UV uv; |
396 | SV *src; |
397 | STRLEN srclen, retlen; |
398 | U8 *s, *e, *p, curCC, preCC; |
399 | PPCODE: |
400 | if (SvUTF8(arg)) { |
401 | src = arg; |
402 | } else { |
403 | src = sv_mortalcopy(arg); |
404 | sv_utf8_upgrade(src); |
405 | } |
406 | |
407 | s = (U8*)SvPV(src,srclen); |
408 | e = s + srclen; |
409 | |
410 | preCC = 0; |
411 | for (p = s; p < e; p += retlen) { |
412 | uv = utf8n_to_uvuni(p, e - p, &retlen, 0); |
413 | curCC = getCombinClass(uv); |
414 | if (preCC > curCC && curCC != 0) /* canonical ordering violated */ |
415 | XSRETURN_NO; |
416 | if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv))) |
417 | XSRETURN_NO; |
418 | preCC = curCC; |
419 | } |
420 | XSRETURN_YES; |
421 | |
422 | |
423 | |
424 | void |
425 | checkNFC(arg) |
426 | SV * arg |
427 | PROTOTYPE: $ |
428 | ALIAS: |
429 | checkNFKC = 1 |
430 | PREINIT: |
431 | UV uv; |
432 | SV *src; |
433 | STRLEN srclen, retlen; |
434 | U8 *s, *e, *p, curCC, preCC; |
435 | bool isMAYBE; |
436 | PPCODE: |
437 | if (SvUTF8(arg)) { |
438 | src = arg; |
439 | } else { |
440 | src = sv_mortalcopy(arg); |
441 | sv_utf8_upgrade(src); |
442 | } |
443 | |
444 | s = (U8*)SvPV(src,srclen); |
445 | e = s + srclen; |
446 | |
447 | preCC = 0; |
448 | isMAYBE = FALSE; |
449 | for (p = s; p < e; p += retlen) { |
450 | uv = utf8n_to_uvuni(p, e - p, &retlen, 0); |
451 | curCC = getCombinClass(uv); |
452 | |
453 | if (preCC > curCC && curCC != 0) /* canonical ordering violated */ |
454 | XSRETURN_NO; |
455 | |
456 | /* get NFC/NFKC property */ |
457 | if (Hangul_IsS(uv)) /* Hangul syllables are canonical composites */ |
458 | ; /* YES */ |
459 | else if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) |
460 | XSRETURN_NO; |
461 | else if (isComp2nd(uv)) |
462 | isMAYBE = TRUE; |
463 | else if (ix) { |
464 | char *canon, *compat; |
6c941e0c |
465 | /* NFKC_NO when having compatibility mapping. */ |
8f118dcd |
466 | canon = (char *) dec_canonical(uv); |
467 | compat = (char *) dec_compat(uv); |
6c941e0c |
468 | if (compat && !(canon && strEQ(canon, compat))) |
8f118dcd |
469 | XSRETURN_NO; |
470 | } /* end of get NFC/NFKC property */ |
471 | |
472 | preCC = curCC; |
473 | } |
474 | if (isMAYBE) |
475 | XSRETURN_UNDEF; |
476 | else |
477 | XSRETURN_YES; |
478 | |
479 | |
480 | |
ac5ea531 |
481 | U8 |
482 | getCombinClass(uv) |
483 | UV uv |
8f118dcd |
484 | PROTOTYPE: $ |
ac5ea531 |
485 | |
486 | bool |
2a204b45 |
487 | isExclusion(uv) |
ac5ea531 |
488 | UV uv |
8f118dcd |
489 | PROTOTYPE: $ |
490 | |
491 | bool |
492 | isSingleton(uv) |
493 | UV uv |
494 | PROTOTYPE: $ |
495 | |
496 | bool |
497 | isNonStDecomp(uv) |
498 | UV uv |
499 | PROTOTYPE: $ |
500 | |
501 | bool |
502 | isComp2nd(uv) |
503 | UV uv |
504 | PROTOTYPE: $ |
505 | ALIAS: |
506 | isNFC_MAYBE = 1 |
507 | isNFKC_MAYBE = 2 |
508 | |
509 | |
510 | |
511 | void |
512 | isNFD_NO(uv) |
513 | UV uv |
514 | PROTOTYPE: $ |
515 | ALIAS: |
516 | isNFKD_NO = 1 |
517 | PPCODE: |
518 | if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv))) |
519 | XSRETURN_YES; /* NFD_NO or NFKD_NO */ |
520 | else |
521 | XSRETURN_NO; |
522 | |
523 | |
524 | |
525 | void |
526 | isComp_Ex(uv) |
527 | UV uv |
528 | PROTOTYPE: $ |
529 | ALIAS: |
530 | isNFC_NO = 0 |
531 | isNFKC_NO = 1 |
532 | PPCODE: |
533 | if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) |
534 | XSRETURN_YES; /* NFC_NO or NFKC_NO */ |
535 | else if (ix) { |
536 | char *canon, *compat; |
537 | canon = (char *) dec_canonical(uv); |
538 | compat = (char *) dec_compat(uv); |
539 | if (compat && (!canon || strNE(canon, compat))) |
540 | XSRETURN_YES; /* NFC_NO or NFKC_NO */ |
541 | else |
542 | XSRETURN_NO; |
543 | } |
544 | else |
545 | XSRETURN_NO; |
546 | |
547 | |
ac5ea531 |
548 | |
2a204b45 |
549 | SV* |
ac5ea531 |
550 | getComposite(uv, uv2) |
551 | UV uv |
552 | UV uv2 |
2a204b45 |
553 | PROTOTYPE: $$ |
554 | PREINIT: |
bcdb689b |
555 | UV composite; |
2a204b45 |
556 | CODE: |
bcdb689b |
557 | composite = composite_uv(uv, uv2); |
558 | RETVAL = composite ? newSVuv(composite) : &PL_sv_undef; |
2a204b45 |
559 | OUTPUT: |
560 | RETVAL |
ac5ea531 |
561 | |
8f118dcd |
562 | |
563 | |
ac5ea531 |
564 | SV* |
565 | getCanon(uv) |
566 | UV uv |
567 | PROTOTYPE: $ |
568 | ALIAS: |
569 | getCompat = 1 |
570 | PREINIT: |
571 | U8 * rstr; |
572 | CODE: |
8f118dcd |
573 | if (Hangul_IsS(uv)) { |
ac5ea531 |
574 | SV * dst; |
575 | dst = newSV(1); |
576 | (void)SvPOK_only(dst); |
577 | sv_cat_decompHangul(dst, uv); |
578 | RETVAL = dst; |
579 | } else { |
580 | rstr = ix ? dec_compat(uv) : dec_canonical(uv); |
8f118dcd |
581 | if (!rstr) |
582 | XSRETURN_UNDEF; |
ac5ea531 |
583 | RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr)); |
584 | } |
585 | SvUTF8_on(RETVAL); |
586 | OUTPUT: |
587 | RETVAL |
588 | |