ext/Unicode/Normalize/t/short.t Unicode::Normalize
ext/Unicode/Normalize/t/split.t Unicode::Normalize
ext/Unicode/Normalize/t/test.t Unicode::Normalize
+ext/Unicode/Normalize/t/tie.t Unicode::Normalize
ext/util/make_ext Used by Makefile to execute extension Makefiles
ext/XS/APItest/APItest.pm XS::APItest extension
ext/XS/APItest/APItest.xs XS::APItest extension
Revision history for Perl extension Unicode::Normalize.
+1.01 Tue Jun 13 22:01:53 2006
+ - XSUB: sv_setpvn() needs cast to (char*).
+ - XSUB: avoid double FETCH for tied scalar variables.
+ - added tie.t.
+
1.00 Thu May 25 20:35:06 2006
- Pure Perl: compose($not_canonically_reordered) works like that in XSUB,
where an intervening character with higher combining class blocks
no warnings 'utf8';
-our $VERSION = '1.00';
+our $VERSION = '1.01';
our $PACKAGE = __PACKAGE__;
require Exporter;
return d;
}
-static char * sv_2pvunicode(SV *sv, STRLEN *lp)
+static char* sv_2pvunicode(SV *sv, STRLEN *lp)
{
char *s;
STRLEN len;
- s = (char*)SvPV(sv,len);
+ s = SvPV(sv,len);
if (!SvUTF8(sv)) {
- SV* tmpsv = sv_mortalcopy(sv);
+ SV* tmpsv = sv_2mortal(newSVpvn(s, len));
if (!SvPOK(tmpsv))
- (void)sv_pvn_force(tmpsv,&len);
+ s = SvPV_force(tmpsv,len);
sv_utf8_upgrade(tmpsv);
- s = (char*)SvPV(tmpsv,len);
+ s = SvPV(tmpsv,len);
}
if (lp)
*lp = len;
else {
Renew(seq_ext, seq_max, UNF_cc);
}
- seq_ptr = seq_ext; /* till now use seq_ext */
+ seq_ptr = seq_ext; /* use seq_ext from now */
}
seq_ptr[cc_pos].cc = curCC;
U8* e = s + slen;
U8* dend = d + dlen;
- UV uvS; /* code point of the starter */
+ UV uvS = 0; /* code point of the starter */
bool valid_uvS = FALSE; /* if FALSE, uvS isn't initialized yet */
U8 preCC = 0;
dst = newSVpvn("", 0);
dlen = slen;
New(0, d, dlen+1, U8);
- dend = pv_utf8_decompose(s, slen, &d, dlen, SvTRUE(compat));
+ dend = pv_utf8_decompose(s, slen, &d, dlen, (bool)SvTRUE(compat));
sv_setpvn(dst, (char *)d, dend - d);
SvUTF8_on(dst);
Safefree(d);
for (p = s; p < e; p += retlen) {
U8 *sCan;
UV uvLead;
- STRLEN canlen, canret;
+ STRLEN canlen = 0;
UV uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
if (!retlen)
croak(ErrRetlenIsZero, "checkFCD or -FCC");
sCan = (U8*) dec_canonical(uv);
if (sCan) {
+ STRLEN canret;
canlen = (STRLEN)strlen((char *) sCan);
uvLead = utf8n_to_uvuni(sCan, canlen, &canret, AllowAnyUTF);
if (!canret)
}
if (sCan) {
+ STRLEN canret;
UV uvTrail;
U8* eCan = sCan + canlen;
U8* pCan = utf8_hop(eCan, -1);
-Unicode/Normalize version 1.00
+Unicode/Normalize version 1.01
===================================
Unicode::Normalize - Unicode Normalization Forms
SYNOPSIS
+(1) using function names exported by default:
+
use Unicode::Normalize;
$NFD_string = NFD($string); # Normalization Form D
$NFKD_string = NFKD($string); # Normalization Form KD
$NFKC_string = NFKC($string); # Normalization Form KC
- or
+(2) using function names exported on request:
use Unicode::Normalize 'normalize';
$NFKD_string = normalize('KD', $string); # Normalization Form KD
$NFKC_string = normalize('KC', $string); # Normalization Form KC
-
INSTALLATION
-Perl 5.6.1 or later.
-(Perl 5.8.0 or later is recommended.)
+Perl 5.6.1 or later is required.
+Perl 5.8.1 or later is recommended.
To install this module (XSUB: needs a C compiler), type the following:
DEPENDENCIES
-This module requires other modules and libraries following:
-
-Carp
-Exporter
-File::Copy
-File::Spec
+This module requires some core modules and pragmas,
+and the following files from Unicode Character Database,
+which are included in recent perl core distributions.
-unicore/CombiningClass.pl (or unicode/CombiningClass.pl)
-unicore/Decomposition.pl (or unicode/Decomposition.pl)
-unicore/CompositionExclusions.txt (or unicode/CompExcl.txt)
+- unicore/CombiningClass.pl (or unicode/CombiningClass.pl)
+- unicore/Decomposition.pl (or unicode/Decomposition.pl)
+- unicore/CompositionExclusions.txt (or unicode/CompExcl.txt)
-CAVEAT
+CAVEATS
-(1) In the perl-current, unicore/CompExcl.txt
- is renamed unicore/CompositionExclusions.txt.
+(1) After these unicore/*.* files are updated:
-(2) After these unicore/*.* files are updated.
-
- In the case of an XS edition:
+In the case of XSUB:
You must rebuild the module,
as the data will be compiled on building.
- In the case of a pure Perl edition:
+In the case of pure perl:
Rebuilding is not necessary,
as the data will be read on requirement.
-(3) Pure Perl edition, Normalize.pmN, may work without any other file
+(2) Normalize.pmN (pure perl module) may work without any other file
in this distribution (it must be renamed Normalize.pm, though)
-COPYRIGHT AND LICENCE
-
- SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
+COPYRIGHT AND LICENSE
- http://homepage1.nifty.com/nomenclator/perl/
+SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
- Copyright(C) 2001-2006, SADAHIRO Tomoyuki. Japan. All rights reserved.
+Copyright(C) 2001-2006, SADAHIRO Tomoyuki. Japan. All rights reserved.
- This module is free software; you can redistribute it
- and/or modify it under the same terms as Perl itself.
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
return $r;
}
-ok(uprops(0x0000), 'xsnfbdmckyg');
-ok(uprops(0x0029), 'xsnfbdmckyg');
-ok(uprops(0x0041), 'xsnfbdmckyg');
+ok(uprops(0x0000), 'xsnfbdmckyg'); # NULL
+ok(uprops(0x0029), 'xsnfbdmckyg'); # RIGHT PARENTHESIS
+ok(uprops(0x0041), 'xsnfbdmckyg'); # LATIN CAPITAL LETTER A
ok(uprops(0x00A0), 'xsnfbdmcKyG'); # NO-BREAK SPACE
ok(uprops(0x00C0), 'xsnfbDmcKyg'); # LATIN CAPITAL LETTER A WITH GRAVE
ok(uprops(0x0300), 'xsnfBdMckYg'); # COMBINING GRAVE ACCENT
ok(uprops(0x0F43), 'XsnFbDmCKyG'); # TIBETAN LETTER GHA
ok(uprops(0x1100), 'xsnfbdmckyg'); # HANGUL CHOSEONG KIYEOK
ok(uprops(0x1161), 'xsnfBdMckYg'); # HANGUL JUNGSEONG A
-ok(uprops(0x11AF), 'xsnfBdMckYg'); # HANGUL JONGSEONG RIEU
+ok(uprops(0x11AF), 'xsnfBdMckYg'); # HANGUL JONGSEONG RIEUL
ok(uprops(0x212B), 'xSnFbDmCKyG'); # ANGSTROM SIGN
ok(uprops(0xAC00), 'xsnfbDmcKyg'); # HANGUL SYLLABLE GA
ok(uprops(0xF900), 'xSnFbDmCKyG'); # CJK COMPATIBILITY IDEOGRAPH-F900
#########################
-use Test;
-use strict;
-use warnings;
-
BEGIN {
use Unicode::Normalize qw(:all);
}
}
+use Test;
+use strict;
+use warnings;
+
BEGIN { plan tests => 112 };
#########################
--- /dev/null
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+#########################
+
+BEGIN {
+ use Unicode::Normalize qw(:all);
+
+ unless (exists &Unicode::Normalize::bootstrap or 5.008 <= $]) {
+ print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
+ " needed for this test\n";
+ print $@;
+ exit;
+ }
+}
+
+use Test;
+use strict;
+use warnings;
+BEGIN { plan tests => 16 };
+
+package tiescalar;
+sub TIESCALAR {
+ my ($class, $instance) = @_;
+ return bless \$instance => $class;
+}
+sub FETCH { return ${$_[0]}++ }
+sub STORE { return ${$_[0]} = $_[1] }
+sub DESTROY { undef ${$_[0]} }
+
+#########################
+
+package main;
+
+tie my $tie1, 'tiescalar', "123";
+ok(NFD($tie1), 123);
+ok(NFC($tie1), 124);
+ok(NFKD($tie1), 125);
+ok(NFKC($tie1), 126);
+ok(FCD($tie1), 127);
+ok(FCC($tie1), 128);
+
+tie my $tie2, 'tiescalar', "256";
+ok(normalize('NFD', $tie2), 256);
+ok(normalize('NFC', $tie2), 257);
+ok(normalize('NFKD', $tie2), 258);
+ok(normalize('NFKC', $tie2), 259);
+ok(normalize('FCD', $tie2), 260);
+ok(normalize('FCC', $tie2), 261);
+
+tie my $tie3, 'tiescalar', "315";
+ok(decompose($tie3), 315);
+ok(reorder($tie3), 316);
+ok(compose($tie3), 317);
+ok(composeContiguous($tie3), 318);
+