From: Rafael Garcia-Suarez Date: Tue, 8 Jun 2004 18:42:29 +0000 (+0000) Subject: Upgrade to Unicode::Normalize 0.30. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=a092bcfd0f738c1ea0511ee4fa058ee96e488fcb;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Unicode::Normalize 0.30. p4raw-id: //depot/perl@22911 --- diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes index e63656c..bb1b693 100644 --- a/ext/Unicode/Normalize/Changes +++ b/ext/Unicode/Normalize/Changes @@ -1,5 +1,9 @@ Revision history for Perl extension Unicode::Normalize. +0.30 Sun May 2 14:35:00 2004 + - XSUB: (perl 5.8.1 or later) improved utf8 upgrade of non-POK (private POK) + values like tied scalars, overloaded objects, etc. + 0.28 Sat Nov 22 23:46:24 2003 - XSUB: even if string contains a malformed, "short" Unicode character, decompose() and reorder() will be safe. Garbage will be no longer added. diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm index ef8ec68..09ef371 100644 --- a/ext/Unicode/Normalize/Normalize.pm +++ b/ext/Unicode/Normalize/Normalize.pm @@ -13,7 +13,7 @@ use Carp; no warnings 'utf8'; -our $VERSION = '0.28'; +our $VERSION = '0.30'; our $PACKAGE = __PACKAGE__; require Exporter; @@ -117,6 +117,8 @@ Unicode::Normalize - Unicode Normalization Forms =head1 SYNOPSIS +(1) using function names exported by default: + use Unicode::Normalize; $NFD_string = NFD($string); # Normalization Form D @@ -124,7 +126,7 @@ Unicode::Normalize - Unicode Normalization Forms $NFKD_string = NFKD($string); # Normalization Form KD $NFKC_string = NFKC($string); # Normalization Form KC - or +(2) using function names exported on request: use Unicode::Normalize 'normalize'; @@ -382,11 +384,11 @@ C and other some functions: on request. =head1 AUTHOR -SADAHIRO Tomoyuki, +SADAHIRO Tomoyuki http://homepage1.nifty.com/nomenclator/perl/ - Copyright(C) 2001-2003, SADAHIRO Tomoyuki. Japan. All rights reserved. + Copyright(C) 2001-2004, SADAHIRO Tomoyuki. Japan. All rights reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. diff --git a/ext/Unicode/Normalize/Normalize.xs b/ext/Unicode/Normalize/Normalize.xs index 2ce7cbc..13544c9 100644 --- a/ext/Unicode/Normalize/Normalize.xs +++ b/ext/Unicode/Normalize/Normalize.xs @@ -182,34 +182,44 @@ static void sv_cat_uvuni (SV* sv, UV uv) sv_catpvn(sv, (char *)tmp, t - tmp); } +static char * sv_2pvunicode(SV *sv, STRLEN *lp) +{ + char *s; + STRLEN len; + s = (char*)SvPV(sv,len); + if (!SvUTF8(sv)) { + SV* tmpsv = sv_mortalcopy(sv); + if (!SvPOK(tmpsv)) + (void)sv_pvn_force(tmpsv,&len); + sv_utf8_upgrade(tmpsv); + s = (char*)SvPV(tmpsv,len); + } + *lp = len; + return s; +} + MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize SV* -decompose(arg, compat = &PL_sv_no) - SV * arg +decompose(src, compat = &PL_sv_no) + SV * src SV * compat PROTOTYPE: $;$ PREINIT: - UV uv; - SV *src, *dst; + SV *dst; STRLEN srclen, retlen; U8 *s, *e, *p, *r; + UV uv; bool iscompat; CODE: - if (SvUTF8(arg)) { - src = arg; - } else { - src = sv_mortalcopy(arg); - sv_utf8_upgrade(src); - } iscompat = SvTRUE(compat); + s = (U8*)sv_2pvunicode(src,&srclen); + e = s + srclen; dst = newSV(1); (void)SvPOK_only(dst); SvUTF8_on(dst); - s = (U8*)SvPV(src,srclen); - e = s + srclen; for (p = s; p < e; p += retlen) { uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF); if (!retlen) @@ -232,11 +242,11 @@ decompose(arg, compat = &PL_sv_no) SV* -reorder(arg) - SV * arg +reorder(src) + SV * src PROTOTYPE: $ PREINIT: - SV *src, *dst; + SV *dst; STRLEN srclen, dstlen, retlen, stk_cc_max; U8 *s, *e, *p, *d, curCC; UV uv, uvlast; @@ -244,15 +254,9 @@ reorder(arg) STRLEN i, cc_pos; bool valid_uvlast; CODE: - if (SvUTF8(arg)) { - src = arg; - } else { - src = sv_mortalcopy(arg); - sv_utf8_upgrade(src); - } - - s = (U8*)SvPV(src, srclen); + s = (U8*)sv_2pvunicode(src,&srclen); e = s + srclen; + dstlen = srclen + 1; dst = newSV(dstlen); (void)SvPOK_only(dst); @@ -326,27 +330,21 @@ reorder(arg) SV* -compose(arg) - SV * arg +compose(src) + SV * src PROTOTYPE: $ ALIAS: composeContiguous = 1 PREINIT: - SV *src, *dst, *tmp; + SV *dst, *tmp; U8 *s, *p, *e, *d, *t, *tmp_start, curCC, preCC; UV uv, uvS, uvComp; STRLEN srclen, dstlen, tmplen, retlen; bool beginning = TRUE; CODE: - if (SvUTF8(arg)) { - src = arg; - } else { - src = sv_mortalcopy(arg); - sv_utf8_upgrade(src); - } - - s = (U8*)SvPV(src, srclen); + s = (U8*)sv_2pvunicode(src,&srclen); e = s + srclen; + dstlen = srclen + 1; dst = newSV(dstlen); (void)SvPOK_only(dst); @@ -429,25 +427,17 @@ compose(arg) void -checkNFD(arg) - SV * arg +checkNFD(src) + SV * src PROTOTYPE: $ ALIAS: checkNFKD = 1 PREINIT: - UV uv; - SV *src; STRLEN srclen, retlen; U8 *s, *e, *p, curCC, preCC; + UV uv; CODE: - if (SvUTF8(arg)) { - src = arg; - } else { - src = sv_mortalcopy(arg); - sv_utf8_upgrade(src); - } - - s = (U8*)SvPV(src,srclen); + s = (U8*)sv_2pvunicode(src,&srclen); e = s + srclen; preCC = 0; @@ -468,26 +458,18 @@ checkNFD(arg) void -checkNFC(arg) - SV * arg +checkNFC(src) + SV * src PROTOTYPE: $ ALIAS: checkNFKC = 1 PREINIT: - UV uv; - SV *src; STRLEN srclen, retlen; U8 *s, *e, *p, curCC, preCC; + UV uv; bool isMAYBE; CODE: - if (SvUTF8(arg)) { - src = arg; - } else { - src = sv_mortalcopy(arg); - sv_utf8_upgrade(src); - } - - s = (U8*)SvPV(src,srclen); + s = (U8*)sv_2pvunicode(src,&srclen); e = s + srclen; preCC = 0; @@ -528,27 +510,19 @@ checkNFC(arg) void -checkFCD(arg) - SV * arg +checkFCD(src) + SV * src PROTOTYPE: $ ALIAS: checkFCC = 1 PREINIT: - UV uv, uvLead, uvTrail; - SV *src; STRLEN srclen, retlen, canlen, canret; U8 *s, *e, *p, curCC, preCC; + UV uv, uvLead, uvTrail; U8 *sCan, *pCan, *eCan; bool isMAYBE; CODE: - if (SvUTF8(arg)) { - src = arg; - } else { - src = sv_mortalcopy(arg); - sv_utf8_upgrade(src); - } - - s = (U8*)SvPV(src,srclen); + s = (U8*)sv_2pvunicode(src,&srclen); e = s + srclen; preCC = 0; @@ -709,22 +683,15 @@ getCanon(uv) void -splitOnLastStarter(arg) - SV * arg +splitOnLastStarter(src) + SV * src PREINIT: - UV uv; - SV *src, *svp; + SV *svp; STRLEN srclen, retlen; U8 *s, *e, *p; + UV uv; PPCODE: - if (SvUTF8(arg)) { - src = arg; - } else { - src = sv_mortalcopy(arg); - sv_utf8_upgrade(src); - } - - s = (U8*)SvPV(src,srclen); + s = (U8*)sv_2pvunicode(src,&srclen); e = s + srclen; for (p = e; s < p; ) { diff --git a/ext/Unicode/Normalize/t/illegal.t b/ext/Unicode/Normalize/t/illegal.t index 76cd833..9d18aad 100644 --- a/ext/Unicode/Normalize/t/illegal.t +++ b/ext/Unicode/Normalize/t/illegal.t @@ -14,9 +14,16 @@ BEGIN { } } +BEGIN { + unless (5.006001 <= $]) { + print "1..0 # skipped: Perl 5.6.1 or later". + " needed for this test\n"; + exit; + } +} + ######################### -use 5.006001; use Test; use strict; use warnings; diff --git a/ext/Unicode/Normalize/t/short.t b/ext/Unicode/Normalize/t/short.t index 1f185ac..d799f4a 100644 --- a/ext/Unicode/Normalize/t/short.t +++ b/ext/Unicode/Normalize/t/short.t @@ -14,6 +14,14 @@ BEGIN { } } +BEGIN { + unless (5.006001 <= $]) { + print "1..0 # skipped: Perl 5.6.1 or later". + " needed for this test\n"; + exit; + } +} + ######################### use strict; @@ -26,11 +34,8 @@ print "ok 1\n"; no warnings qw(utf8); -our $a = "\x{3042}"; # 3-byte length (in UTF-8/UTF-EBCDIC) -{ - use bytes; - substr($a,1,length($a), ''); # remove trailing octets -} +# U+3042 is 3-byte length (in UTF-8/UTF-EBCDIC) +our $a = pack 'U0C', unpack 'C', "\x{3042}"; print NFD($a) eq "\0" ? "ok" : "not ok", " 2\n"; diff --git a/ext/Unicode/Normalize/t/split.t b/ext/Unicode/Normalize/t/split.t index 03b599e..6bf7ff6 100644 --- a/ext/Unicode/Normalize/t/split.t +++ b/ext/Unicode/Normalize/t/split.t @@ -14,6 +14,14 @@ BEGIN { } } +BEGIN { + unless (5.006001 <= $]) { + print "1..0 # skipped: Perl 5.6.1 or later". + " needed for this test\n"; + exit; + } +} + ######################### use Test;