Down with C++ reserved names
[p5sagit/p5-mst-13.2.git] / ext / Unicode / Normalize / Normalize.xs
CommitLineData
ac5ea531 1
2#include "EXTERN.h"
3#include "perl.h"
4#include "XSUB.h"
5
6/* These 5 files are prepared by mkheader */
7#include "unfcmb.h"
8#include "unfcan.h"
9#include "unfcpt.h"
10#include "unfcmp.h"
11#include "unfexc.h"
12
13/* Perl 5.6.1 ? */
14#ifndef uvuni_to_utf8
15#define uvuni_to_utf8 uv_to_utf8
6c941e0c 16#endif /* uvuni_to_utf8 */
ac5ea531 17
18/* Perl 5.6.1 ? */
ab8fe378 19#ifndef utf8n_to_uvuni
20#define utf8n_to_uvuni utf8_to_uv
6c941e0c 21#endif /* utf8n_to_uvuni */
ac5ea531 22
e524f5b2 23/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */
24#ifdef UTF8_ALLOW_BOM
25#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FFFF)
26#else
27#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF)
28#endif
29
30/* if utf8n_to_uvuni() sets retlen to 0 (?) */
82e740b6 31#define ErrRetlenIsZero "panic (Unicode::Normalize): zero-length character"
32
33/* utf8_hop() hops back before start. Maybe broken UTF-8 */
34#define ErrHopBeforeStart "panic (Unicode::Normalize): hopping before start"
35
ac5ea531 36/* At present, char > 0x10ffff are unaffected without complaint, right? */
37#define VALID_UTF_MAX (0x10ffff)
38#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
39
40/* HANGUL_H */
41#define Hangul_SBase 0xAC00
42#define Hangul_SFinal 0xD7A3
43#define Hangul_SCount 11172
44
45#define Hangul_NCount 588
46
47#define Hangul_LBase 0x1100
48#define Hangul_LFinal 0x1112
49#define Hangul_LCount 19
50
51#define Hangul_VBase 0x1161
52#define Hangul_VFinal 0x1175
53#define Hangul_VCount 21
54
55#define Hangul_TBase 0x11A7
56#define Hangul_TFinal 0x11C2
57#define Hangul_TCount 28
58
59#define Hangul_IsS(u) ((Hangul_SBase <= (u)) && ((u) <= Hangul_SFinal))
2a204b45 60#define Hangul_IsN(u) (((u) - Hangul_SBase) % Hangul_TCount == 0)
ac5ea531 61#define Hangul_IsLV(u) (Hangul_IsS(u) && Hangul_IsN(u))
62#define Hangul_IsL(u) ((Hangul_LBase <= (u)) && ((u) <= Hangul_LFinal))
63#define Hangul_IsV(u) ((Hangul_VBase <= (u)) && ((u) <= Hangul_VFinal))
64#define Hangul_IsT(u) ((Hangul_TBase < (u)) && ((u) <= Hangul_TFinal))
65/* HANGUL_H */
66
67/* this is used for canonical ordering of combining characters (c.c.). */
68typedef struct {
69 U8 cc; /* combining class */
70 UV uv; /* codepoint */
71 STRLEN pos; /* position */
72} UNF_cc;
73
e524f5b2 74static int compare_cc (const void *a, const void *b)
ac5ea531 75{
76 int ret_cc;
6c941e0c 77 ret_cc = ((UNF_cc*) a)->cc - ((UNF_cc*) b)->cc;
8f118dcd 78 if (ret_cc)
79 return ret_cc;
6c941e0c 80
81 return ( ((UNF_cc*) a)->pos > ((UNF_cc*) b)->pos )
82 - ( ((UNF_cc*) a)->pos < ((UNF_cc*) b)->pos );
ac5ea531 83}
84
e524f5b2 85static U8* dec_canonical (UV uv)
ac5ea531 86{
87 U8 ***plane, **row;
8f118dcd 88 if (OVER_UTF_MAX(uv))
89 return NULL;
ac5ea531 90 plane = (U8***)UNF_canon[uv >> 16];
8f118dcd 91 if (! plane)
92 return NULL;
ac5ea531 93 row = plane[(uv >> 8) & 0xff];
94 return row ? row[uv & 0xff] : NULL;
95}
96
e524f5b2 97static U8* dec_compat (UV uv)
ac5ea531 98{
99 U8 ***plane, **row;
8f118dcd 100 if (OVER_UTF_MAX(uv))
101 return NULL;
ac5ea531 102 plane = (U8***)UNF_compat[uv >> 16];
8f118dcd 103 if (! plane)
104 return NULL;
ac5ea531 105 row = plane[(uv >> 8) & 0xff];
106 return row ? row[uv & 0xff] : NULL;
107}
108
e524f5b2 109static UV composite_uv (UV uv, UV uv2)
ac5ea531 110{
111 UNF_complist ***plane, **row, *cell, *i;
112
8f118dcd 113 if (! uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2))
114 return 0;
ac5ea531 115
8f118dcd 116 if (Hangul_IsL(uv) && Hangul_IsV(uv2)) {
ac5ea531 117 uv -= Hangul_LBase; /* lindex */
118 uv2 -= Hangul_VBase; /* vindex */
119 return(Hangul_SBase + (uv * Hangul_VCount + uv2) * Hangul_TCount);
120 }
8f118dcd 121 if (Hangul_IsLV(uv) && Hangul_IsT(uv2)) {
ac5ea531 122 uv2 -= Hangul_TBase; /* tindex */
2a204b45 123 return(uv + uv2);
ac5ea531 124 }
125 plane = UNF_compos[uv >> 16];
8f118dcd 126 if (! plane)
127 return 0;
ac5ea531 128 row = plane[(uv >> 8) & 0xff];
8f118dcd 129 if (! row)
130 return 0;
ac5ea531 131 cell = row[uv & 0xff];
8f118dcd 132 if (! cell)
133 return 0;
134 for (i = cell; i->nextchar; i++) {
135 if (uv2 == i->nextchar)
136 return i->composite;
ac5ea531 137 }
138 return 0;
139}
140
e524f5b2 141static U8 getCombinClass (UV uv)
ac5ea531 142{
143 U8 **plane, *row;
8f118dcd 144 if (OVER_UTF_MAX(uv))
145 return 0;
ac5ea531 146 plane = (U8**)UNF_combin[uv >> 16];
8f118dcd 147 if (! plane)
148 return 0;
ac5ea531 149 row = plane[(uv >> 8) & 0xff];
150 return row ? row[uv & 0xff] : 0;
151}
152
e524f5b2 153static void sv_cat_decompHangul (SV* sv, UV uv)
ac5ea531 154{
155 UV sindex, lindex, vindex, tindex;
2a204b45 156 U8 *t, tmp[3 * UTF8_MAXLEN + 1];
ac5ea531 157
8f118dcd 158 if (! Hangul_IsS(uv))
159 return;
ac5ea531 160
161 sindex = uv - Hangul_SBase;
162 lindex = sindex / Hangul_NCount;
163 vindex = (sindex % Hangul_NCount) / Hangul_TCount;
164 tindex = sindex % Hangul_TCount;
165
2a204b45 166 t = tmp;
ac5ea531 167 t = uvuni_to_utf8(t, (lindex + Hangul_LBase));
168 t = uvuni_to_utf8(t, (vindex + Hangul_VBase));
8f118dcd 169 if (tindex)
170 t = uvuni_to_utf8(t, (tindex + Hangul_TBase));
ac5ea531 171 *t = '\0';
e524f5b2 172 sv_catpvn(sv, (char *)tmp, t - tmp);
173}
174
175static void sv_cat_uvuni (SV* sv, UV uv)
176{
177 U8 *t, tmp[UTF8_MAXLEN + 1];
178
179 t = tmp;
180 t = uvuni_to_utf8(t, uv);
181 *t = '\0';
182 sv_catpvn(sv, (char *)tmp, t - tmp);
ac5ea531 183}
184
a092bcfd 185static char * sv_2pvunicode(SV *sv, STRLEN *lp)
186{
187 char *s;
188 STRLEN len;
189 s = (char*)SvPV(sv,len);
190 if (!SvUTF8(sv)) {
191 SV* tmpsv = sv_mortalcopy(sv);
192 if (!SvPOK(tmpsv))
193 (void)sv_pvn_force(tmpsv,&len);
194 sv_utf8_upgrade(tmpsv);
195 s = (char*)SvPV(tmpsv,len);
196 }
197 *lp = len;
198 return s;
199}
200
ac5ea531 201MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize
202
ac5ea531 203SV*
a092bcfd 204decompose(src, compat = &PL_sv_no)
205 SV * src
ac5ea531 206 SV * compat
d85850a7 207 PROTOTYPE: $;$
ac5ea531 208 PREINIT:
a092bcfd 209 SV *dst;
73263a9c 210 STRLEN srclen, retlen;
211 U8 *s, *e, *p, *r;
a092bcfd 212 UV uv;
ac5ea531 213 bool iscompat;
214 CODE:
ac5ea531 215 iscompat = SvTRUE(compat);
a092bcfd 216 s = (U8*)sv_2pvunicode(src,&srclen);
217 e = s + srclen;
ac5ea531 218
219 dst = newSV(1);
220 (void)SvPOK_only(dst);
221 SvUTF8_on(dst);
222
82e740b6 223 for (p = s; p < e; p += retlen) {
e524f5b2 224 uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
82e740b6 225 if (!retlen)
226 croak(ErrRetlenIsZero);
227
8f118dcd 228 if (Hangul_IsS(uv))
229 sv_cat_decompHangul(dst, uv);
ac5ea531 230 else {
231 r = iscompat ? dec_compat(uv) : dec_canonical(uv);
8f118dcd 232 if (r)
233 sv_catpv(dst, (char *)r);
234 else
e524f5b2 235 sv_cat_uvuni(dst, uv);
ac5ea531 236 }
237 }
238 RETVAL = dst;
239 OUTPUT:
240 RETVAL
241
242
243
244SV*
a092bcfd 245reorder(src)
246 SV * src
ac5ea531 247 PROTOTYPE: $
248 PREINIT:
a092bcfd 249 SV *dst;
8f118dcd 250 STRLEN srclen, dstlen, retlen, stk_cc_max;
251 U8 *s, *e, *p, *d, curCC;
e524f5b2 252 UV uv, uvlast;
ac5ea531 253 UNF_cc * stk_cc;
e524f5b2 254 STRLEN i, cc_pos;
255 bool valid_uvlast;
ac5ea531 256 CODE:
a092bcfd 257 s = (U8*)sv_2pvunicode(src,&srclen);
e524f5b2 258 e = s + srclen;
a092bcfd 259
8f118dcd 260 dstlen = srclen + 1;
261 dst = newSV(dstlen);
e524f5b2 262 (void)SvPOK_only(dst);
8f118dcd 263 SvUTF8_on(dst);
e524f5b2 264 d = (U8*)SvPVX(dst);
ac5ea531 265
266 stk_cc_max = 10; /* enough as an initial value? */
267 New(0, stk_cc, stk_cc_max, UNF_cc);
268
e524f5b2 269 for (p = s; p < e;) {
270 uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
82e740b6 271 if (!retlen)
272 croak(ErrRetlenIsZero);
2a204b45 273 p += retlen;
274
82e740b6 275 curCC = getCombinClass(uv);
e524f5b2 276 if (curCC == 0) {
277 d = uvuni_to_utf8(d, uv);
8f118dcd 278 continue;
e524f5b2 279 }
ac5ea531 280
2a204b45 281 cc_pos = 0;
ac5ea531 282 stk_cc[cc_pos].cc = curCC;
283 stk_cc[cc_pos].uv = uv;
284 stk_cc[cc_pos].pos = cc_pos;
285
e524f5b2 286 valid_uvlast = FALSE;
8f118dcd 287 while (p < e) {
e524f5b2 288 uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
82e740b6 289 if (!retlen)
290 croak(ErrRetlenIsZero);
291 p += retlen;
292
ac5ea531 293 curCC = getCombinClass(uv);
e524f5b2 294 if (curCC == 0) {
295 uvlast = uv;
296 valid_uvlast = TRUE;
8f118dcd 297 break;
e524f5b2 298 }
82e740b6 299
ac5ea531 300 cc_pos++;
8f118dcd 301 if (stk_cc_max <= cc_pos) { /* extend if need */
ac5ea531 302 stk_cc_max = cc_pos + 1;
303 Renew(stk_cc, stk_cc_max, UNF_cc);
304 }
305 stk_cc[cc_pos].cc = curCC;
306 stk_cc[cc_pos].uv = uv;
307 stk_cc[cc_pos].pos = cc_pos;
308 }
309
e524f5b2 310 /* reordered if there are two c.c.'s */
311 if (cc_pos) {
312 qsort((void*)stk_cc, cc_pos + 1, sizeof(UNF_cc), compare_cc);
313 }
ac5ea531 314
e524f5b2 315 for (i = 0; i <= cc_pos; i++) {
316 d = uvuni_to_utf8(d, stk_cc[i].uv);
317 }
318 if (valid_uvlast)
319 {
320 d = uvuni_to_utf8(d, uvlast);
ac5ea531 321 }
322 }
e524f5b2 323 *d = '\0';
324 SvCUR_set(dst, d - (U8*)SvPVX(dst));
ac5ea531 325 Safefree(stk_cc);
8f118dcd 326 RETVAL = dst;
ac5ea531 327 OUTPUT:
328 RETVAL
329
330
331
2a204b45 332SV*
a092bcfd 333compose(src)
334 SV * src
ac5ea531 335 PROTOTYPE: $
82e740b6 336 ALIAS:
337 composeContiguous = 1
ac5ea531 338 PREINIT:
a092bcfd 339 SV *dst, *tmp;
ac5ea531 340 U8 *s, *p, *e, *d, *t, *tmp_start, curCC, preCC;
341 UV uv, uvS, uvComp;
2a204b45 342 STRLEN srclen, dstlen, tmplen, retlen;
ac5ea531 343 bool beginning = TRUE;
2a204b45 344 CODE:
a092bcfd 345 s = (U8*)sv_2pvunicode(src,&srclen);
ac5ea531 346 e = s + srclen;
a092bcfd 347
d85850a7 348 dstlen = srclen + 1;
2a204b45 349 dst = newSV(dstlen);
ac5ea531 350 (void)SvPOK_only(dst);
351 SvUTF8_on(dst);
352 d = (U8*)SvPVX(dst);
353
354 /* for uncomposed combining char */
355 tmp = sv_2mortal(newSV(dstlen));
356 (void)SvPOK_only(tmp);
357 SvUTF8_on(tmp);
358
8f118dcd 359 for (p = s; p < e;) {
360 if (beginning) {
e524f5b2 361 uvS = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
82e740b6 362 if (!retlen)
363 croak(ErrRetlenIsZero);
ac5ea531 364 p += retlen;
365
8f118dcd 366 if (getCombinClass(uvS)) { /* no Starter found yet */
ac5ea531 367 d = uvuni_to_utf8(d, uvS);
368 continue;
369 }
370 beginning = FALSE;
371 }
372
373 /* Starter */
374 t = tmp_start = (U8*)SvPVX(tmp);
375 preCC = 0;
376
377 /* to the next Starter */
8f118dcd 378 while (p < e) {
e524f5b2 379 uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
82e740b6 380 if (!retlen)
381 croak(ErrRetlenIsZero);
ac5ea531 382 p += retlen;
82e740b6 383
ac5ea531 384 curCC = getCombinClass(uv);
385
8f118dcd 386 if (preCC && preCC == curCC) {
ac5ea531 387 preCC = curCC;
388 t = uvuni_to_utf8(t, uv);
389 } else {
2a204b45 390 uvComp = composite_uv(uvS, uv);
ac5ea531 391
82e740b6 392 if (uvComp && ! isExclusion(uvComp) &&
393 (ix ? (t == tmp_start) : (preCC <= curCC))) {
d85850a7 394 STRLEN leftcur, rightcur, dstcur;
395 leftcur = UNISKIP(uvComp);
396 rightcur = UNISKIP(uvS) + UNISKIP(uv);
397
398 if (leftcur > rightcur) {
399 dstcur = d - (U8*)SvPVX(dst);
400 dstlen += leftcur - rightcur;
401 d = (U8*)SvGROW(dst,dstlen) + dstcur;
402 }
ac5ea531 403 /* preCC not changed to curCC */
404 uvS = uvComp;
1efaba7f 405 } else if (! curCC && p < e) { /* blocked */
ac5ea531 406 break;
407 } else {
408 preCC = curCC;
409 t = uvuni_to_utf8(t, uv);
410 }
411 }
412 }
2a204b45 413 d = uvuni_to_utf8(d, uvS); /* starter (composed or not) */
d85850a7 414 tmplen = t - tmp_start;
415 if (tmplen) { /* uncomposed combining char */
ac5ea531 416 t = (U8*)SvPVX(tmp);
8f118dcd 417 while (tmplen--)
418 *d++ = *t++;
ac5ea531 419 }
420 uvS = uv;
421 } /* for */
00f2676f 422 *d = '\0';
d85850a7 423 SvCUR_set(dst, d - (U8*)SvPVX(dst));
2a204b45 424 RETVAL = dst;
425 OUTPUT:
426 RETVAL
ac5ea531 427
428
8f118dcd 429void
a092bcfd 430checkNFD(src)
431 SV * src
8f118dcd 432 PROTOTYPE: $
433 ALIAS:
434 checkNFKD = 1
435 PREINIT:
8f118dcd 436 STRLEN srclen, retlen;
437 U8 *s, *e, *p, curCC, preCC;
a092bcfd 438 UV uv;
82e740b6 439 CODE:
a092bcfd 440 s = (U8*)sv_2pvunicode(src,&srclen);
8f118dcd 441 e = s + srclen;
442
443 preCC = 0;
444 for (p = s; p < e; p += retlen) {
e524f5b2 445 uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
82e740b6 446 if (!retlen)
447 croak(ErrRetlenIsZero);
448
8f118dcd 449 curCC = getCombinClass(uv);
450 if (preCC > curCC && curCC != 0) /* canonical ordering violated */
451 XSRETURN_NO;
452 if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv)))
453 XSRETURN_NO;
454 preCC = curCC;
455 }
456 XSRETURN_YES;
457
458
459
460void
a092bcfd 461checkNFC(src)
462 SV * src
8f118dcd 463 PROTOTYPE: $
464 ALIAS:
465 checkNFKC = 1
466 PREINIT:
8f118dcd 467 STRLEN srclen, retlen;
468 U8 *s, *e, *p, curCC, preCC;
a092bcfd 469 UV uv;
8f118dcd 470 bool isMAYBE;
82e740b6 471 CODE:
a092bcfd 472 s = (U8*)sv_2pvunicode(src,&srclen);
8f118dcd 473 e = s + srclen;
474
475 preCC = 0;
476 isMAYBE = FALSE;
477 for (p = s; p < e; p += retlen) {
e524f5b2 478 uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
82e740b6 479 if (!retlen)
480 croak(ErrRetlenIsZero);
481
8f118dcd 482 curCC = getCombinClass(uv);
483
484 if (preCC > curCC && curCC != 0) /* canonical ordering violated */
485 XSRETURN_NO;
486
487 /* get NFC/NFKC property */
488 if (Hangul_IsS(uv)) /* Hangul syllables are canonical composites */
489 ; /* YES */
490 else if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
491 XSRETURN_NO;
492 else if (isComp2nd(uv))
493 isMAYBE = TRUE;
494 else if (ix) {
495 char *canon, *compat;
6c941e0c 496 /* NFKC_NO when having compatibility mapping. */
8f118dcd 497 canon = (char *) dec_canonical(uv);
498 compat = (char *) dec_compat(uv);
6c941e0c 499 if (compat && !(canon && strEQ(canon, compat)))
8f118dcd 500 XSRETURN_NO;
501 } /* end of get NFC/NFKC property */
502
503 preCC = curCC;
504 }
505 if (isMAYBE)
506 XSRETURN_UNDEF;
507 else
508 XSRETURN_YES;
509
510
511
82e740b6 512void
a092bcfd 513checkFCD(src)
514 SV * src
82e740b6 515 PROTOTYPE: $
516 ALIAS:
517 checkFCC = 1
518 PREINIT:
82e740b6 519 STRLEN srclen, retlen, canlen, canret;
520 U8 *s, *e, *p, curCC, preCC;
a092bcfd 521 UV uv, uvLead, uvTrail;
82e740b6 522 U8 *sCan, *pCan, *eCan;
523 bool isMAYBE;
524 CODE:
a092bcfd 525 s = (U8*)sv_2pvunicode(src,&srclen);
82e740b6 526 e = s + srclen;
527
528 preCC = 0;
529 isMAYBE = FALSE;
530 for (p = s; p < e; p += retlen) {
e524f5b2 531 uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
82e740b6 532 if (!retlen)
533 croak(ErrRetlenIsZero);
534
535 sCan = (U8*) dec_canonical(uv);
536
537 if (sCan) {
538 canlen = (STRLEN)strlen((char *) sCan);
e524f5b2 539 uvLead = utf8n_to_uvuni(sCan, canlen, &canret, AllowAnyUTF);
82e740b6 540 }
541 else {
542 uvLead = uv;
543 }
544
545 curCC = getCombinClass(uvLead);
546
547 if (curCC != 0 && curCC < preCC) /* canonical ordering violated */
548 XSRETURN_NO;
549
550 if (ix) {
551 if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
552 XSRETURN_NO;
553 else if (isComp2nd(uv))
554 isMAYBE = TRUE;
555 }
556
557 if (sCan) {
558 eCan = sCan + canlen;
559 pCan = utf8_hop(eCan, -1);
560 if (pCan < sCan)
561 croak(ErrHopBeforeStart);
e524f5b2 562 uvTrail = utf8n_to_uvuni(pCan, eCan - pCan, &canret, AllowAnyUTF);
82e740b6 563 preCC = getCombinClass(uvTrail);
564 }
565 else {
566 preCC = curCC;
567 }
568 }
569 if (isMAYBE)
570 XSRETURN_UNDEF;
571 else
572 XSRETURN_YES;
573
574
575
ac5ea531 576U8
577getCombinClass(uv)
578 UV uv
8f118dcd 579 PROTOTYPE: $
ac5ea531 580
581bool
2a204b45 582isExclusion(uv)
ac5ea531 583 UV uv
8f118dcd 584 PROTOTYPE: $
585
586bool
587isSingleton(uv)
588 UV uv
589 PROTOTYPE: $
590
591bool
592isNonStDecomp(uv)
593 UV uv
594 PROTOTYPE: $
595
596bool
597isComp2nd(uv)
598 UV uv
599 PROTOTYPE: $
600 ALIAS:
601 isNFC_MAYBE = 1
602 isNFKC_MAYBE = 2
603
604
605
606void
607isNFD_NO(uv)
608 UV uv
609 PROTOTYPE: $
610 ALIAS:
611 isNFKD_NO = 1
82e740b6 612 CODE:
8f118dcd 613 if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv)))
614 XSRETURN_YES; /* NFD_NO or NFKD_NO */
615 else
616 XSRETURN_NO;
617
618
619
620void
621isComp_Ex(uv)
622 UV uv
623 PROTOTYPE: $
624 ALIAS:
625 isNFC_NO = 0
626 isNFKC_NO = 1
82e740b6 627 CODE:
8f118dcd 628 if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
629 XSRETURN_YES; /* NFC_NO or NFKC_NO */
630 else if (ix) {
631 char *canon, *compat;
632 canon = (char *) dec_canonical(uv);
633 compat = (char *) dec_compat(uv);
634 if (compat && (!canon || strNE(canon, compat)))
635 XSRETURN_YES; /* NFC_NO or NFKC_NO */
636 else
637 XSRETURN_NO;
638 }
639 else
640 XSRETURN_NO;
641
642
ac5ea531 643
2a204b45 644SV*
ac5ea531 645getComposite(uv, uv2)
646 UV uv
647 UV uv2
2a204b45 648 PROTOTYPE: $$
649 PREINIT:
bcdb689b 650 UV composite;
2a204b45 651 CODE:
bcdb689b 652 composite = composite_uv(uv, uv2);
653 RETVAL = composite ? newSVuv(composite) : &PL_sv_undef;
2a204b45 654 OUTPUT:
655 RETVAL
ac5ea531 656
8f118dcd 657
658
ac5ea531 659SV*
660getCanon(uv)
661 UV uv
662 PROTOTYPE: $
663 ALIAS:
664 getCompat = 1
665 PREINIT:
666 U8 * rstr;
667 CODE:
8f118dcd 668 if (Hangul_IsS(uv)) {
ac5ea531 669 SV * dst;
670 dst = newSV(1);
671 (void)SvPOK_only(dst);
672 sv_cat_decompHangul(dst, uv);
673 RETVAL = dst;
674 } else {
675 rstr = ix ? dec_compat(uv) : dec_canonical(uv);
8f118dcd 676 if (!rstr)
677 XSRETURN_UNDEF;
ac5ea531 678 RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr));
679 }
680 SvUTF8_on(RETVAL);
681 OUTPUT:
682 RETVAL
683
82e740b6 684
685void
a092bcfd 686splitOnLastStarter(src)
687 SV * src
82e740b6 688 PREINIT:
a092bcfd 689 SV *svp;
82e740b6 690 STRLEN srclen, retlen;
691 U8 *s, *e, *p;
a092bcfd 692 UV uv;
82e740b6 693 PPCODE:
a092bcfd 694 s = (U8*)sv_2pvunicode(src,&srclen);
82e740b6 695 e = s + srclen;
696
697 for (p = e; s < p; ) {
698 p = utf8_hop(p, -1);
699 if (p < s)
700 croak(ErrHopBeforeStart);
e524f5b2 701 uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
82e740b6 702 if (getCombinClass(uv) == 0) /* Last Starter found */
703 break;
704 }
705
706 svp = sv_2mortal(newSVpvn((char*)s, p - s));
707 SvUTF8_on(svp);
708 XPUSHs(svp);
709
710 svp = sv_2mortal(newSVpvn((char*)p, e - p));
711 SvUTF8_on(svp);
712 XPUSHs(svp);
713