From: Rafael Garcia-Suarez Date: Tue, 20 Jun 2006 12:44:27 +0000 (+0000) Subject: Upgrade to Unicode::Normalize 1.01 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=39f4556fa6ad8437dfb47fa415d469ee118f4f4d;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Unicode::Normalize 1.01 p4raw-id: //depot/perl@28406 --- diff --git a/MANIFEST b/MANIFEST index dbd71ac..72352a2 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1212,6 +1212,7 @@ ext/Unicode/Normalize/t/proto.t Unicode::Normalize ext/Unicode/Normalize/t/short.t Unicode::Normalize ext/Unicode/Normalize/t/split.t Unicode::Normalize ext/Unicode/Normalize/t/test.t Unicode::Normalize +ext/Unicode/Normalize/t/tie.t Unicode::Normalize ext/util/make_ext Used by Makefile to execute extension Makefiles ext/XS/APItest/APItest.pm XS::APItest extension ext/XS/APItest/APItest.xs XS::APItest extension diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes index 8d05a3d..b7eb1d5 100644 --- a/ext/Unicode/Normalize/Changes +++ b/ext/Unicode/Normalize/Changes @@ -1,5 +1,10 @@ Revision history for Perl extension Unicode::Normalize. +1.01 Tue Jun 13 22:01:53 2006 + - XSUB: sv_setpvn() needs cast to (char*). + - XSUB: avoid double FETCH for tied scalar variables. + - added tie.t. + 1.00 Thu May 25 20:35:06 2006 - Pure Perl: compose($not_canonically_reordered) works like that in XSUB, where an intervening character with higher combining class blocks diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm index 16d7664..b51177d 100644 --- a/ext/Unicode/Normalize/Normalize.pm +++ b/ext/Unicode/Normalize/Normalize.pm @@ -13,7 +13,7 @@ use Carp; no warnings 'utf8'; -our $VERSION = '1.00'; +our $VERSION = '1.01'; our $PACKAGE = __PACKAGE__; require Exporter; diff --git a/ext/Unicode/Normalize/Normalize.xs b/ext/Unicode/Normalize/Normalize.xs index 3dea0f4..b051e2a 100644 --- a/ext/Unicode/Normalize/Normalize.xs +++ b/ext/Unicode/Normalize/Normalize.xs @@ -181,17 +181,17 @@ static U8* pv_cat_decompHangul(U8* d, UV uv) return d; } -static char * sv_2pvunicode(SV *sv, STRLEN *lp) +static char* sv_2pvunicode(SV *sv, STRLEN *lp) { char *s; STRLEN len; - s = (char*)SvPV(sv,len); + s = SvPV(sv,len); if (!SvUTF8(sv)) { - SV* tmpsv = sv_mortalcopy(sv); + SV* tmpsv = sv_2mortal(newSVpvn(s, len)); if (!SvPOK(tmpsv)) - (void)sv_pvn_force(tmpsv,&len); + s = SvPV_force(tmpsv,len); sv_utf8_upgrade(tmpsv); - s = (char*)SvPV(tmpsv,len); + s = SvPV(tmpsv,len); } if (lp) *lp = len; @@ -292,7 +292,7 @@ U8* pv_utf8_reorder(U8* s, STRLEN slen, U8* d, STRLEN dlen) else { Renew(seq_ext, seq_max, UNF_cc); } - seq_ptr = seq_ext; /* till now use seq_ext */ + seq_ptr = seq_ext; /* use seq_ext from now */ } seq_ptr[cc_pos].cc = curCC; @@ -336,7 +336,7 @@ U8* pv_utf8_compose(U8* s, STRLEN slen, U8* d, STRLEN dlen, bool iscontig) U8* e = s + slen; U8* dend = d + dlen; - UV uvS; /* code point of the starter */ + UV uvS = 0; /* code point of the starter */ bool valid_uvS = FALSE; /* if FALSE, uvS isn't initialized yet */ U8 preCC = 0; @@ -464,7 +464,7 @@ decompose(src, compat = &PL_sv_no) dst = newSVpvn("", 0); dlen = slen; New(0, d, dlen+1, U8); - dend = pv_utf8_decompose(s, slen, &d, dlen, SvTRUE(compat)); + dend = pv_utf8_decompose(s, slen, &d, dlen, (bool)SvTRUE(compat)); sv_setpvn(dst, (char *)d, dend - d); SvUTF8_on(dst); Safefree(d); @@ -691,7 +691,7 @@ checkFCD(src) for (p = s; p < e; p += retlen) { U8 *sCan; UV uvLead; - STRLEN canlen, canret; + STRLEN canlen = 0; UV uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF); if (!retlen) croak(ErrRetlenIsZero, "checkFCD or -FCC"); @@ -699,6 +699,7 @@ checkFCD(src) sCan = (U8*) dec_canonical(uv); if (sCan) { + STRLEN canret; canlen = (STRLEN)strlen((char *) sCan); uvLead = utf8n_to_uvuni(sCan, canlen, &canret, AllowAnyUTF); if (!canret) @@ -721,6 +722,7 @@ checkFCD(src) } if (sCan) { + STRLEN canret; UV uvTrail; U8* eCan = sCan + canlen; U8* pCan = utf8_hop(eCan, -1); diff --git a/ext/Unicode/Normalize/README b/ext/Unicode/Normalize/README index e70d7ea..79a2b5e 100644 --- a/ext/Unicode/Normalize/README +++ b/ext/Unicode/Normalize/README @@ -1,10 +1,12 @@ -Unicode/Normalize version 1.00 +Unicode/Normalize version 1.01 =================================== Unicode::Normalize - Unicode Normalization Forms SYNOPSIS +(1) using function names exported by default: + use Unicode::Normalize; $NFD_string = NFD($string); # Normalization Form D @@ -12,7 +14,7 @@ SYNOPSIS $NFKD_string = NFKD($string); # Normalization Form KD $NFKC_string = NFKC($string); # Normalization Form KC - or +(2) using function names exported on request: use Unicode::Normalize 'normalize'; @@ -21,11 +23,10 @@ SYNOPSIS $NFKD_string = normalize('KD', $string); # Normalization Form KD $NFKC_string = normalize('KC', $string); # Normalization Form KC - INSTALLATION -Perl 5.6.1 or later. -(Perl 5.8.0 or later is recommended.) +Perl 5.6.1 or later is required. +Perl 5.8.1 or later is recommended. To install this module (XSUB: needs a C compiler), type the following: @@ -55,42 +56,34 @@ type the following (!! "enableXS" must run before "Makefile.PL" !!): DEPENDENCIES -This module requires other modules and libraries following: - -Carp -Exporter -File::Copy -File::Spec +This module requires some core modules and pragmas, +and the following files from Unicode Character Database, +which are included in recent perl core distributions. -unicore/CombiningClass.pl (or unicode/CombiningClass.pl) -unicore/Decomposition.pl (or unicode/Decomposition.pl) -unicore/CompositionExclusions.txt (or unicode/CompExcl.txt) +- unicore/CombiningClass.pl (or unicode/CombiningClass.pl) +- unicore/Decomposition.pl (or unicode/Decomposition.pl) +- unicore/CompositionExclusions.txt (or unicode/CompExcl.txt) -CAVEAT +CAVEATS -(1) In the perl-current, unicore/CompExcl.txt - is renamed unicore/CompositionExclusions.txt. +(1) After these unicore/*.* files are updated: -(2) After these unicore/*.* files are updated. - - In the case of an XS edition: +In the case of XSUB: You must rebuild the module, as the data will be compiled on building. - In the case of a pure Perl edition: +In the case of pure perl: Rebuilding is not necessary, as the data will be read on requirement. -(3) Pure Perl edition, Normalize.pmN, may work without any other file +(2) Normalize.pmN (pure perl module) may work without any other file in this distribution (it must be renamed Normalize.pm, though) -COPYRIGHT AND LICENCE - - SADAHIRO Tomoyuki +COPYRIGHT AND LICENSE - http://homepage1.nifty.com/nomenclator/perl/ +SADAHIRO Tomoyuki - Copyright(C) 2001-2006, SADAHIRO Tomoyuki. Japan. All rights reserved. +Copyright(C) 2001-2006, SADAHIRO Tomoyuki. Japan. All rights reserved. - This module is free software; you can redistribute it - and/or modify it under the same terms as Perl itself. +This module is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. diff --git a/ext/Unicode/Normalize/t/func.t b/ext/Unicode/Normalize/t/func.t index 81421ce..794fff6 100644 --- a/ext/Unicode/Normalize/t/func.t +++ b/ext/Unicode/Normalize/t/func.t @@ -101,9 +101,9 @@ sub uprops { return $r; } -ok(uprops(0x0000), 'xsnfbdmckyg'); -ok(uprops(0x0029), 'xsnfbdmckyg'); -ok(uprops(0x0041), 'xsnfbdmckyg'); +ok(uprops(0x0000), 'xsnfbdmckyg'); # NULL +ok(uprops(0x0029), 'xsnfbdmckyg'); # RIGHT PARENTHESIS +ok(uprops(0x0041), 'xsnfbdmckyg'); # LATIN CAPITAL LETTER A ok(uprops(0x00A0), 'xsnfbdmcKyG'); # NO-BREAK SPACE ok(uprops(0x00C0), 'xsnfbDmcKyg'); # LATIN CAPITAL LETTER A WITH GRAVE ok(uprops(0x0300), 'xsnfBdMckYg'); # COMBINING GRAVE ACCENT @@ -113,7 +113,7 @@ ok(uprops(0x0958), 'XsnFbDmCKyG'); # DEVANAGARI LETTER QA ok(uprops(0x0F43), 'XsnFbDmCKyG'); # TIBETAN LETTER GHA ok(uprops(0x1100), 'xsnfbdmckyg'); # HANGUL CHOSEONG KIYEOK ok(uprops(0x1161), 'xsnfBdMckYg'); # HANGUL JUNGSEONG A -ok(uprops(0x11AF), 'xsnfBdMckYg'); # HANGUL JONGSEONG RIEU +ok(uprops(0x11AF), 'xsnfBdMckYg'); # HANGUL JONGSEONG RIEUL ok(uprops(0x212B), 'xSnFbDmCKyG'); # ANGSTROM SIGN ok(uprops(0xAC00), 'xsnfbDmcKyg'); # HANGUL SYLLABLE GA ok(uprops(0xF900), 'xSnFbDmCKyG'); # CJK COMPATIBILITY IDEOGRAPH-F900 diff --git a/ext/Unicode/Normalize/t/illegal.t b/ext/Unicode/Normalize/t/illegal.t index 9d18aad..976e509 100644 --- a/ext/Unicode/Normalize/t/illegal.t +++ b/ext/Unicode/Normalize/t/illegal.t @@ -24,10 +24,6 @@ BEGIN { ######################### -use Test; -use strict; -use warnings; - BEGIN { use Unicode::Normalize qw(:all); @@ -39,6 +35,10 @@ BEGIN { } } +use Test; +use strict; +use warnings; + BEGIN { plan tests => 112 }; ######################### diff --git a/ext/Unicode/Normalize/t/tie.t b/ext/Unicode/Normalize/t/tie.t new file mode 100644 index 0000000..c721491 --- /dev/null +++ b/ext/Unicode/Normalize/t/tie.t @@ -0,0 +1,69 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +######################### + +BEGIN { + use Unicode::Normalize qw(:all); + + unless (exists &Unicode::Normalize::bootstrap or 5.008 <= $]) { + print "1..0 # skipped: XSUB, or Perl 5.8.0 or later". + " needed for this test\n"; + print $@; + exit; + } +} + +use Test; +use strict; +use warnings; +BEGIN { plan tests => 16 }; + +package tiescalar; +sub TIESCALAR { + my ($class, $instance) = @_; + return bless \$instance => $class; +} +sub FETCH { return ${$_[0]}++ } +sub STORE { return ${$_[0]} = $_[1] } +sub DESTROY { undef ${$_[0]} } + +######################### + +package main; + +tie my $tie1, 'tiescalar', "123"; +ok(NFD($tie1), 123); +ok(NFC($tie1), 124); +ok(NFKD($tie1), 125); +ok(NFKC($tie1), 126); +ok(FCD($tie1), 127); +ok(FCC($tie1), 128); + +tie my $tie2, 'tiescalar', "256"; +ok(normalize('NFD', $tie2), 256); +ok(normalize('NFC', $tie2), 257); +ok(normalize('NFKD', $tie2), 258); +ok(normalize('NFKC', $tie2), 259); +ok(normalize('FCD', $tie2), 260); +ok(normalize('FCC', $tie2), 261); + +tie my $tie3, 'tiescalar', "315"; +ok(decompose($tie3), 315); +ok(reorder($tie3), 316); +ok(compose($tie3), 317); +ok(composeContiguous($tie3), 318); +