#define Hangul_TCount 28
#define Hangul_IsS(u) ((Hangul_SBase <= (u)) && ((u) <= Hangul_SFinal))
-#define Hangul_IsN(u) (! (((u) - Hangul_SBase) % Hangul_TCount))
+#define Hangul_IsN(u) (((u) - Hangul_SBase) % Hangul_TCount == 0)
#define Hangul_IsLV(u) (Hangul_IsS(u) && Hangul_IsN(u))
#define Hangul_IsL(u) ((Hangul_LBase <= (u)) && ((u) <= Hangul_LFinal))
#define Hangul_IsV(u) ((Hangul_VBase <= (u)) && ((u) <= Hangul_VFinal))
return row ? row[uv & 0xff] : NULL;
}
-UV getComposite (UV uv, UV uv2)
+UV composite_uv (UV uv, UV uv2)
{
UNF_complist ***plane, **row, *cell, *i;
}
if(Hangul_IsLV(uv) && Hangul_IsT(uv2)) {
uv2 -= Hangul_TBase; /* tindex */
- return (uv + uv2);
+ return(uv + uv2);
}
plane = UNF_compos[uv >> 16];
if(! plane) return 0;
row = plane[(uv >> 8) & 0xff];
- if(! row) return 0;
+ if(! row) return 0;
cell = row[uv & 0xff];
- if(! cell) return 0;
+ if(! cell) return 0;
for(i = cell; i->nextchar; i++) {
if(uv2 == i->nextchar) return i->composite;
}
void sv_cat_decompHangul (SV* sv, UV uv)
{
UV sindex, lindex, vindex, tindex;
- U8 *t, temp[3 * UTF8_MAXLEN + 1];
+ U8 *t, tmp[3 * UTF8_MAXLEN + 1];
if(! Hangul_IsS(uv)) return;
vindex = (sindex % Hangul_NCount) / Hangul_TCount;
tindex = sindex % Hangul_TCount;
- t = temp;
+ t = tmp;
t = uvuni_to_utf8(t, (lindex + Hangul_LBase));
t = uvuni_to_utf8(t, (vindex + Hangul_VBase));
if (tindex) t = uvuni_to_utf8(t, (tindex + Hangul_TBase));
*t = '\0';
- sv_catpvn(sv, (char *)temp, strlen((char *)temp));
+ sv_catpvn(sv, (char *)tmp, strlen((char *)tmp));
}
MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize
-
SV*
decompose(arg, compat)
SV * arg
SV * compat
PROTOTYPE: $
PREINIT:
- SV *src, *dst;
- STRLEN srclen, dstlen, retlen;
- U8 *s, *e, *p, *d, *r;
UV uv;
+ SV *src, *dst;
+ STRLEN srclen, retlen;
+ U8 *s, *e, *p, *r;
bool iscompat;
CODE:
if(SvUTF8(arg)) {
src = sv_mortalcopy(arg);
sv_utf8_upgrade(src);
}
-
iscompat = SvTRUE(compat);
dst = newSV(1);
s = (U8*)SvPV(src,srclen);
e = s + srclen;
+
for(p = s; p < e;){
U8 *cc_in;
STRLEN cc_len, cc_iter, cc_pos;
uv = utf8n_to_uvchr(p, e - p, &retlen, 0);
- p += retlen;
- cc_pos = 0;
curCC = getCombinClass(uv);
+ p += retlen;
+
if(! (curCC && p < e)) continue; else cc_in = p - retlen;
+ cc_pos = 0;
stk_cc[cc_pos].cc = curCC;
stk_cc[cc_pos].uv = uv;
stk_cc[cc_pos].pos = cc_pos;
-void
+SV*
compose(arg)
SV * arg
PROTOTYPE: $
SV *src, *dst, *tmp;
U8 *s, *p, *e, *d, *t, *tmp_start, curCC, preCC;
UV uv, uvS, uvComp;
- STRLEN srclen, dstlen, tmplen, dstcur, retlen;
+ STRLEN srclen, dstlen, tmplen, retlen;
bool beginning = TRUE;
- PPCODE:
+ CODE:
if(SvUTF8(arg)) {
src = arg;
} else {
src = sv_mortalcopy(arg);
sv_utf8_upgrade(src);
}
+
s = (U8*)SvPV(src, srclen);
e = s + srclen;
dstlen = srclen + 1; /* equal or shorter, XXX */
- dst = sv_2mortal(newSV(dstlen));
+ dst = newSV(dstlen);
(void)SvPOK_only(dst);
SvUTF8_on(dst);
d = (U8*)SvPVX(dst);
preCC = curCC;
t = uvuni_to_utf8(t, uv);
} else {
- uvComp = getComposite(uvS, uv);
+ uvComp = composite_uv(uvS, uv);
/* S + C + S => S-S + C would be also blocked. */
- if( uvComp && ! getExclusion(uvComp) && preCC <= curCC)
+ if( uvComp && ! isExclusion(uvComp) && preCC <= curCC)
{
/* preCC not changed to curCC */
uvS = uvComp;
}
}
}
- d = uvuni_to_utf8(d, uvS); /* composed char */
- if(tmplen = t - tmp_start) { /* uncomposed combining char */
+ d = uvuni_to_utf8(d, uvS); /* starter (composed or not) */
+ if((tmplen = t - tmp_start)) { /* uncomposed combining char */
t = (U8*)SvPVX(tmp);
while(tmplen--) *d++ = *t++;
}
uvS = uv;
} /* for */
- dstcur = d - (U8*)SvPVX(dst);
- SvCUR_set(dst, dstcur);
- XPUSHs(dst);
+ e = d; /* end of dst */
+ d = (U8*)SvPVX(dst);
+ SvCUR_set(dst, e - d);
+ RETVAL = dst;
+ OUTPUT:
+ RETVAL
UV uv
bool
-getExclusion(uv)
+isExclusion(uv)
UV uv
-UV
+SV*
getComposite(uv, uv2)
UV uv
UV uv2
+ PROTOTYPE: $$
+ PREINIT:
+ UV comp;
+ CODE:
+ comp = composite_uv(uv, uv2);
+ RETVAL = comp ? newSVuv(comp) : &PL_sv_undef;
+ OUTPUT:
+ RETVAL
SV*
getCanon(uv)