X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FUnicode%2FNormalize%2FNormalize.xs;h=4d5b0b8e8f1efe9bebd781b20086eb989f4d3e47;hb=2a204b451f774baa0cfb5a7a330513820530f0bf;hp=aca08538fbf0b5ea1fe30a1bc2b5b37d478f9e56;hpb=9000ce8c302c8d13490096392c4b2e8f21f767eb;p=p5sagit%2Fp5-mst-13.2.git diff --git a/ext/Unicode/Normalize/Normalize.xs b/ext/Unicode/Normalize/Normalize.xs index aca0853..4d5b0b8 100644 --- a/ext/Unicode/Normalize/Normalize.xs +++ b/ext/Unicode/Normalize/Normalize.xs @@ -44,7 +44,7 @@ #define Hangul_TCount 28 #define Hangul_IsS(u) ((Hangul_SBase <= (u)) && ((u) <= Hangul_SFinal)) -#define Hangul_IsN(u) (! (((u) - Hangul_SBase) % Hangul_TCount)) +#define Hangul_IsN(u) (((u) - Hangul_SBase) % Hangul_TCount == 0) #define Hangul_IsLV(u) (Hangul_IsS(u) && Hangul_IsN(u)) #define Hangul_IsL(u) ((Hangul_LBase <= (u)) && ((u) <= Hangul_LFinal)) #define Hangul_IsV(u) ((Hangul_VBase <= (u)) && ((u) <= Hangul_VFinal)) @@ -86,7 +86,7 @@ U8* dec_compat (UV uv) return row ? row[uv & 0xff] : NULL; } -UV getComposite (UV uv, UV uv2) +UV composite_uv (UV uv, UV uv2) { UNF_complist ***plane, **row, *cell, *i; @@ -99,14 +99,14 @@ UV getComposite (UV uv, UV uv2) } if(Hangul_IsLV(uv) && Hangul_IsT(uv2)) { uv2 -= Hangul_TBase; /* tindex */ - return (uv + uv2); + return(uv + uv2); } plane = UNF_compos[uv >> 16]; if(! plane) return 0; row = plane[(uv >> 8) & 0xff]; - if(! row) return 0; + if(! row) return 0; cell = row[uv & 0xff]; - if(! cell) return 0; + if(! cell) return 0; for(i = cell; i->nextchar; i++) { if(uv2 == i->nextchar) return i->composite; } @@ -126,7 +126,7 @@ U8 getCombinClass (UV uv) void sv_cat_decompHangul (SV* sv, UV uv) { UV sindex, lindex, vindex, tindex; - U8 *t, temp[3 * UTF8_MAXLEN + 1]; + U8 *t, tmp[3 * UTF8_MAXLEN + 1]; if(! Hangul_IsS(uv)) return; @@ -135,27 +135,26 @@ void sv_cat_decompHangul (SV* sv, UV uv) vindex = (sindex % Hangul_NCount) / Hangul_TCount; tindex = sindex % Hangul_TCount; - t = temp; + t = tmp; t = uvuni_to_utf8(t, (lindex + Hangul_LBase)); t = uvuni_to_utf8(t, (vindex + Hangul_VBase)); if (tindex) t = uvuni_to_utf8(t, (tindex + Hangul_TBase)); *t = '\0'; - sv_catpvn(sv, (char *)temp, strlen((char *)temp)); + sv_catpvn(sv, (char *)tmp, strlen((char *)tmp)); } MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize - SV* decompose(arg, compat) SV * arg SV * compat PROTOTYPE: $ PREINIT: + UV uv; SV *src, *dst; STRLEN srclen, dstlen, retlen; U8 *s, *e, *p, *d, *r; - UV uv; bool iscompat; CODE: if(SvUTF8(arg)) { @@ -164,7 +163,6 @@ decompose(arg, compat) src = sv_mortalcopy(arg); sv_utf8_upgrade(src); } - iscompat = SvTRUE(compat); dst = newSV(1); @@ -208,16 +206,18 @@ reorder(arg) s = (U8*)SvPV(src,srclen); e = s + srclen; + for(p = s; p < e;){ U8 *cc_in; STRLEN cc_len, cc_iter, cc_pos; uv = utf8n_to_uvchr(p, e - p, &retlen, 0); - p += retlen; - cc_pos = 0; curCC = getCombinClass(uv); + p += retlen; + if(! (curCC && p < e)) continue; else cc_in = p - retlen; + cc_pos = 0; stk_cc[cc_pos].cc = curCC; stk_cc[cc_pos].uv = uv; stk_cc[cc_pos].pos = cc_pos; @@ -255,7 +255,7 @@ reorder(arg) -void +SV* compose(arg) SV * arg PROTOTYPE: $ @@ -263,19 +263,20 @@ compose(arg) SV *src, *dst, *tmp; U8 *s, *p, *e, *d, *t, *tmp_start, curCC, preCC; UV uv, uvS, uvComp; - STRLEN srclen, dstlen, tmplen, dstcur, retlen; + STRLEN srclen, dstlen, tmplen, retlen; bool beginning = TRUE; - PPCODE: + CODE: if(SvUTF8(arg)) { src = arg; } else { src = sv_mortalcopy(arg); sv_utf8_upgrade(src); } + s = (U8*)SvPV(src, srclen); e = s + srclen; dstlen = srclen + 1; /* equal or shorter, XXX */ - dst = sv_2mortal(newSV(dstlen)); + dst = newSV(dstlen); (void)SvPOK_only(dst); SvUTF8_on(dst); d = (U8*)SvPVX(dst); @@ -311,10 +312,10 @@ compose(arg) preCC = curCC; t = uvuni_to_utf8(t, uv); } else { - uvComp = getComposite(uvS, uv); + uvComp = composite_uv(uvS, uv); /* S + C + S => S-S + C would be also blocked. */ - if( uvComp && ! getExclusion(uvComp) && preCC <= curCC) + if( uvComp && ! isExclusion(uvComp) && preCC <= curCC) { /* preCC not changed to curCC */ uvS = uvComp; @@ -326,16 +327,19 @@ compose(arg) } } } - d = uvuni_to_utf8(d, uvS); /* composed char */ + d = uvuni_to_utf8(d, uvS); /* starter (composed or not) */ if(tmplen = t - tmp_start) { /* uncomposed combining char */ t = (U8*)SvPVX(tmp); while(tmplen--) *d++ = *t++; } uvS = uv; } /* for */ - dstcur = d - (U8*)SvPVX(dst); - SvCUR_set(dst, dstcur); - XPUSHs(dst); + e = d; /* end of dst */ + d = (U8*)SvPVX(dst); + SvCUR_set(dst, e - d); + RETVAL = dst; + OUTPUT: + RETVAL @@ -344,13 +348,21 @@ getCombinClass(uv) UV uv bool -getExclusion(uv) +isExclusion(uv) UV uv -UV +SV* getComposite(uv, uv2) UV uv UV uv2 + PROTOTYPE: $$ + PREINIT: + UV comp; + CODE: + comp = composite_uv(uv, uv2); + RETVAL = comp ? newSVuv(comp) : &PL_sv_undef; + OUTPUT: + RETVAL SV* getCanon(uv)