From: Rafael Garcia-Suarez Date: Sat, 5 Apr 2003 11:28:22 +0000 (+0000) Subject: Upgrade to Unicode::Normalize 0.21 and Unicode::Collate 0.24, X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=9f1f04a17d93e8b8afa26e6ca9144732df879671;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Unicode::Normalize 0.21 and Unicode::Collate 0.24, by SADAHIRO Tomoyuki. p4raw-id: //depot/perl@19144 --- diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes index 30f5c4a..92b944e 100644 --- a/ext/Unicode/Normalize/Changes +++ b/ext/Unicode/Normalize/Changes @@ -1,5 +1,8 @@ Revision history for Perl extension Unicode::Normalize. +0.21 Thu Apr 02 23:12:54 2003 + - internal tweak: for (?un)pack 'U'. + 0.20 Sun Mar 02 13:29:25 2003 - decompose Hangul syllables in a decomposition mapping. diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm index e0232d3..14c121a 100644 --- a/ext/Unicode/Normalize/Normalize.pm +++ b/ext/Unicode/Normalize/Normalize.pm @@ -1,8 +1,8 @@ package Unicode::Normalize; BEGIN { - if (ord("A") == 193) { - die "Unicode::Normalize not ported to EBCDIC\n"; + unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { + die "Unicode::Normalize cannot stringify a Unicode code point\n"; } } @@ -11,7 +11,7 @@ use strict; use warnings; use Carp; -our $VERSION = '0.20'; +our $VERSION = '0.21'; our $PACKAGE = __PACKAGE__; require Exporter; @@ -35,6 +35,29 @@ our %EXPORT_TAGS = ( bootstrap Unicode::Normalize $VERSION; +use constant UNICODE_FOR_PACK => "A" eq pack('U', 0x41); +use constant NATIVE_FOR_PACK => "A" eq pack('U', ord("A")); + +use constant UNICODE_FOR_UNPACK => 0x41 == unpack('U', "A"); +use constant NATIVE_FOR_UNPACK => ord("A") == unpack('U', "A"); + +sub pack_U { + return UNICODE_FOR_PACK + ? pack('U*', @_) + : NATIVE_FOR_PACK + ? pack('U*', map utf8::unicode_to_native($_), @_) + : die "$PACKAGE, a Unicode code point cannot be stringified.\n"; +} + +sub unpack_U { + return UNICODE_FOR_UNPACK + ? unpack('U*', shift) + : NATIVE_FOR_UNPACK + ? map(utf8::native_to_unicode($_), unpack 'U*', shift) + : die "$PACKAGE, a code point returned from unpack U " . + "cannot be converted into Unicode.\n"; +} + use constant COMPAT => 1; sub NFD ($) { reorder(decompose($_[0])) } @@ -136,7 +159,7 @@ As C<$form_name>, one of the following names must be given. =item C<$decomposed_string = decompose($string, $useCompatMapping)> -Decompose the specified string and returns the result. +Decomposes the specified string and returns the result. If the second parameter (a boolean) is omitted or false, decomposes it using the Canonical Decomposition Mapping. @@ -150,7 +173,7 @@ Reordering may be required. =item C<$reordered_string = reorder($string)> -Reorder the combining characters and the like in the canonical ordering +Reorders the combining characters and the like in the canonical ordering and returns the result. E.g., when you have a list of NFD/NFKD strings, diff --git a/ext/Unicode/Normalize/README b/ext/Unicode/Normalize/README index f1b1754..8447502 100644 --- a/ext/Unicode/Normalize/README +++ b/ext/Unicode/Normalize/README @@ -1,4 +1,4 @@ -Unicode/Normalize version 0.20 +Unicode/Normalize version 0.21 =================================== Unicode::Normalize - Unicode Normalization Forms diff --git a/ext/Unicode/Normalize/mkheader b/ext/Unicode/Normalize/mkheader index 6cac390..e2c4f12 100644 --- a/ext/Unicode/Normalize/mkheader +++ b/ext/Unicode/Normalize/mkheader @@ -15,7 +15,11 @@ use warnings; use Carp; use File::Spec; -our $IsEBCDIC = ord("A") != 0x41; +BEGIN { + unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { + die "Unicode::Normalize cannot stringify a Unicode code point\n"; + } +} our $PACKAGE = 'Unicode::Normalize, mkheader'; @@ -197,12 +201,17 @@ foreach my $key (keys %Compat) { $Compat{$key} = [ getCompatList($key) ]; } +sub _pack_U { + return "A" eq pack('U', 0x41) + ? pack('U*', @_) + : "A" eq pack('U', ord("A")) + ? pack('U*', map utf8::unicode_to_native($_), @_) + : die "$PACKAGE, a Unicode code point cannot be stringified.\n"; +} + sub _U_stringify { sprintf '"%s"', join '', - map sprintf("\\x%02x", $_), unpack 'C*', - $IsEBCDIC - ? pack('U*', map utf8::unicode_to_native($_), @_) - : pack('U*', @_); + map sprintf("\\x%02x", $_), unpack 'C*', _pack_U(@_); } foreach my $hash (\%Canon, \%Compat) { diff --git a/ext/Unicode/Normalize/t/func.t b/ext/Unicode/Normalize/t/func.t index d540d99..81e092a 100644 --- a/ext/Unicode/Normalize/t/func.t +++ b/ext/Unicode/Normalize/t/func.t @@ -1,7 +1,8 @@ BEGIN { - if (ord("A") == 193) { - print "1..0 # Unicode::Normalize not ported to EBCDIC\n"; + unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; exit 0; } } @@ -9,7 +10,7 @@ BEGIN { BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; - @INC = qw(../lib); + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } @@ -22,19 +23,8 @@ BEGIN { plan tests => 13 }; use Unicode::Normalize qw(:all); ok(1); # If we made it this far, we're ok. -our $IsEBCDIC = ord("A") != 0x41; - -sub _pack_U { - return $IsEBCDIC - ? pack('U*', map utf8::unicode_to_native($_), @_) - : pack('U*', @_); -} - -sub _unpack_U { - return $IsEBCDIC - ? map(utf8::native_to_unicode($_), unpack 'U*', shift) - : unpack('U*', shift); -} +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _unpack_U { Unicode::Normalize::unpack_U(@_) } ######################### @@ -50,7 +40,7 @@ print ! defined getCanon( 0) && getCanon(0x00EF) eq _pack_U(0x0069, 0x0308) && getCanon(0x304C) eq _pack_U(0x304B, 0x3099) && getCanon(0x1EA4) eq _pack_U(0x0041, 0x0302, 0x0301) - && getCanon(0x1F82) eq "\x{03B1}\x{0313}\x{0300}\x{0345}" + && getCanon(0x1F82) eq _pack_U(0x03B1, 0x0313, 0x0300, 0x0345) && getCanon(0x1FAF) eq _pack_U(0x03A9, 0x0314, 0x0342, 0x0345) && getCanon(0xAC00) eq _pack_U(0x1100, 0x1161) && getCanon(0xAE00) eq _pack_U(0x1100, 0x1173, 0x11AF) diff --git a/ext/Unicode/Normalize/t/norm.t b/ext/Unicode/Normalize/t/norm.t index 77ca218..76ee255 100644 --- a/ext/Unicode/Normalize/t/norm.t +++ b/ext/Unicode/Normalize/t/norm.t @@ -1,7 +1,8 @@ BEGIN { - if (ord("A") == 193) { - print "1..0 # Unicode::Normalize not ported to EBCDIC\n"; + unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; exit 0; } } @@ -9,7 +10,7 @@ BEGIN { BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; - @INC = qw(../lib); + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } @@ -22,19 +23,8 @@ BEGIN { plan tests => 18 }; use Unicode::Normalize qw(normalize); ok(1); # If we made it this far, we're ok. -our $IsEBCDIC = ord("A") != 0x41; - -sub _pack_U { - return $IsEBCDIC - ? pack('U*', map utf8::unicode_to_native($_), @_) - : pack('U*', @_); -} - -sub _unpack_U { - return $IsEBCDIC - ? map(utf8::native_to_unicode($_), unpack 'U*', shift) - : unpack('U*', shift); -} +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _unpack_U { Unicode::Normalize::unpack_U(@_) } ######################### diff --git a/ext/Unicode/Normalize/t/test.t b/ext/Unicode/Normalize/t/test.t index db1a536..b98a8b8 100644 --- a/ext/Unicode/Normalize/t/test.t +++ b/ext/Unicode/Normalize/t/test.t @@ -1,7 +1,8 @@ BEGIN { - if (ord("A") == 193) { - print "1..0 # Unicode::Normalize not ported to EBCDIC\n"; + unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; exit 0; } } @@ -9,7 +10,7 @@ BEGIN { BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; - @INC = qw(../lib); + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } @@ -22,19 +23,8 @@ BEGIN { plan tests => 20 }; use Unicode::Normalize; ok(1); # If we made it this far, we're ok. -our $IsEBCDIC = ord("A") != 0x41; - -sub _pack_U { - return $IsEBCDIC - ? pack('U*', map utf8::unicode_to_native($_), @_) - : pack('U*', @_); -} - -sub _unpack_U { - return $IsEBCDIC - ? map(utf8::native_to_unicode($_), unpack 'U*', shift) - : unpack('U*', shift); -} +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _unpack_U { Unicode::Normalize::unpack_U(@_) } ######################### diff --git a/lib/Unicode/Collate.pm b/lib/Unicode/Collate.pm index 5193559..a753808 100644 --- a/lib/Unicode/Collate.pm +++ b/lib/Unicode/Collate.pm @@ -1,8 +1,8 @@ package Unicode::Collate; BEGIN { - if (ord("A") == 193) { - die "Unicode::Collate not ported to EBCDIC\n"; + unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { + die "Unicode::Collate cannot stringify a Unicode code point\n"; } } @@ -14,11 +14,7 @@ use File::Spec; require Exporter; -# Supporting on EBCDIC platform is not tested. -# Tester(s) welcome! -our $IsEBCDIC = ord("A") != 0x41; - -our $VERSION = '0.23'; +our $VERSION = '0.24'; our $PACKAGE = __PACKAGE__; our @ISA = qw(Exporter); @@ -37,7 +33,7 @@ eval { require Unicode::UCD }; unless ($@) { $UNICODE_VERSION = Unicode::UCD::UnicodeVersion(); } -else { # XXX, Perl 5.6.1 +else { # Perl 5.6.1 my($f, $fh); foreach my $d (@INC) { $f = File::Spec->catfile($d, "unicode", "Unicode.301"); @@ -59,17 +55,21 @@ use constant NOMATCHPOS => -1; # This is also used as a HAS_UNICODE_NORMALIZE flag. our $getCombinClass; +# Supported Levels +use constant MinLevel => 1; +use constant MaxLevel => 4; + # Minimum weights at level 2 and 3, respectively -use constant Min2 => 0x20; -use constant Min3 => 0x02; +use constant Min2Wt => 0x20; +use constant Min3Wt => 0x02; # Shifted weight at 4th level -use constant Shift4 => 0xFFFF; +use constant Shift4Wt => 0xFFFF; # Variable weight at 1st level. # This is a negative value but should be regarded as zero on collation. # This is for distinction of variable chars from level 3 ignorable chars. -use constant Var1 => -1; +use constant Var1Wt => -1; # A boolean for Variable and 16-bit weights at 4 levels of Collation Element @@ -79,10 +79,6 @@ use constant Var1 => -1; # other than "shift" (as well as "shift-trimmed") is unreliable. use constant VCE_TEMPLATE => 'Cn4'; -# Unicode encoding of strings to be collated -# TODO: 'N*' for UTF-32BE, 'V*' for UTF-32LE. -use constant UTF_TEMPLATE => 'U*'; - # A sort key: 16-bit weights # See also the PROBLEM on VCE_TEMPLATE above. use constant KEY_TEMPLATE => 'n*'; @@ -113,6 +109,33 @@ sub UCA_Version { "9" } sub Base_Unicode_Version { $UNICODE_VERSION || 'unknown' } +###### + +use constant UNICODE_FOR_PACK => ("A" eq pack('U', 0x41)); +use constant NATIVE_FOR_PACK => ("A" eq pack('U', ord("A"))); + +use constant UNICODE_FOR_UNPACK => (0x41 == unpack('U', "A")); +use constant NATIVE_FOR_UNPACK => (ord("A") == unpack('U', "A")); + +sub pack_U { + return UNICODE_FOR_PACK + ? pack('U*', @_) + : NATIVE_FOR_PACK + ? pack('U*', map utf8::unicode_to_native($_), @_) + : die "$PACKAGE, a Unicode code point cannot be stringified.\n"; +} + +sub unpack_U { + return UNICODE_FOR_UNPACK + ? unpack('U*', shift) + : NATIVE_FOR_UNPACK + ? map(utf8::native_to_unicode($_), unpack 'U*', shift) + : die "$PACKAGE, a code point returned from unpack U " . + "cannot be converted into Unicode.\n"; +} + +###### + my (%AlternateOK); @AlternateOK{ qw/ blanked non-ignorable shifted shift-trimmed @@ -125,13 +148,15 @@ our @ChangeOK = qw/ /; our @ChangeNG = qw/ - entry entries table combining maxlength + entry entries table maxlength ignoreChar ignoreName undefChar undefName versionTable alternateTable backwardsTable forwardsTable rearrangeTable derivCode normCode rearrangeHash L3_ignorable + backwardsFlag /; -# The hash key 'ignored' is deleted at VERSION 0.21. -# The hash key 'isShift' are deleted at VERSION 0.23. +# The hash key 'ignored' is deleted at v 0.21. +# The hash key 'isShift' is deleted at v 0.23. +# The hash key 'combining' is deleted at v 0.24. my (%ChangeOK, %ChangeNG); @ChangeOK{ @ChangeOK } = (); @@ -155,12 +180,18 @@ sub change { return wantarray ? %old : $self; } +sub _checkLevel { + my $level = shift; + my $key = shift; + croak sprintf "Illegal level %d (in \$self->{%s}) lower than %d.", + $level, $key, MinLevel if MinLevel > $level; + croak sprintf "Unsupported level %d (in \$self->{%s}) higher than %d ", + $level, $key, MaxLevel if MaxLevel < $level; +} + sub checkCollator { my $self = shift; - croak "Illegal level lower than 1 (passed $self->{level})." - if $self->{level} < 1; - croak "A level higher than 4 (passed $self->{level}) is not supported." - if 4 < $self->{level}; + _checkLevel($self->{level}, "level"); $self->{derivCode} = $self->{UCA_Version} == 8 ? \&_derivCE_8 : @@ -171,10 +202,24 @@ sub checkCollator { croak "$PACKAGE unknown alternate tag name: $self->{alternate}" unless exists $AlternateOK{ $self->{alternate} }; - $self->{backwards} = [] - if ! defined $self->{backwards}; - $self->{backwards} = [ $self->{backwards} ] - if ! ref $self->{backwards}; + if (! defined $self->{backwards}) { + $self->{backwardsFlag} = 0; + } + elsif (! ref $self->{backwards}) { + _checkLevel($self->{backwards}, "backwards"); + $self->{backwardsFlag} = 1 << $self->{backwards}; + } + else { + my %level; + $self->{backwardsFlag} = 0; + for my $b (@{ $self->{backwards} }) { + _checkLevel($b, "backwards"); + $level{$b} = 1; + } + for my $v (sort keys %level) { + $self->{backwardsFlag} += 1 << $v; + } + } $self->{rearrange} = [] if ! defined $self->{rearrange}; @@ -223,7 +268,7 @@ sub new $self->parseEntry($_) foreach split /\n/, $self->{entry}; } - $self->{level} ||= 4; + $self->{level} ||= MaxLevel; $self->{UCA_Version} ||= UCA_Version(); $self->{overrideHangul} = '' @@ -305,10 +350,7 @@ sub parseEntry $entry = join(CODE_SEP, @uv); # in JCPS if (defined $self->{undefChar} || defined $self->{ignoreChar}) { - # Do not use UTF_TEMPLATE; Perl' RE is only for utf8. - my $ele = $IsEBCDIC - ? pack('U*', map utf8::unicode_to_native($_), @uv) - : pack('U*', @uv); + my $ele = pack_U(@uv); # regarded as if it were not entried in the table return @@ -323,15 +365,12 @@ sub parseEntry $k = '[.0000.0000.0000.0000]' if defined $self->{ignoreName} && $name =~ /$self->{ignoreName}/; - my $combining = TRUE; # primary = 0, secondary != 0; my $is_L3_ignorable; foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient. my @wt = _getHexArray($arr); push @key, pack(VCE_TEMPLATE, $var, @wt); - $combining = FALSE - unless $wt[0] == 0 && $wt[1] != 0; $is_L3_ignorable = TRUE if $wt[0] + $wt[1] + $wt[2] == 0; # if $arr !~ /[1-9A-Fa-f]/; NG @@ -340,11 +379,6 @@ sub parseEntry $self->{entries}{$entry} = \@key; - $self->{combining}{$entry} = TRUE - if $combining; - - # The key is a string representing a numeral code point. - $self->{L3_ignorable}{$uv[0]} = TRUE if @uv == 1 && $is_L3_ignorable; @@ -353,8 +387,9 @@ sub parseEntry if @uv > 1; } + ## -## arrayref[weights] = altCE(bool variable?, list[num] weights) +## arrayref[weights] = altCE(VCE) ## sub altCE { @@ -362,26 +397,29 @@ sub altCE my($var, @wt) = unpack(VCE_TEMPLATE, shift); $self->{alternate} eq 'blanked' ? - $var ? [Var1, 0, 0, $wt[3]] : \@wt : + $var ? [Var1Wt, 0, 0, $wt[3]] : \@wt : $self->{alternate} eq 'non-ignorable' ? \@wt : $self->{alternate} eq 'shifted' ? - $var ? [Var1, 0, 0, $wt[0] ] - : [ @wt[0..2], $wt[0]+$wt[1]+$wt[2] ? Shift4 : 0 ] : + $var ? [Var1Wt, 0, 0, $wt[0] ] + : [ @wt[0..2], $wt[0]+$wt[1]+$wt[2] ? Shift4Wt : 0 ] : $self->{alternate} eq 'shift-trimmed' ? - $var ? [Var1, 0, 0, $wt[0] ] : [ @wt[0..2], 0 ] : + $var ? [Var1Wt, 0, 0, $wt[0] ] : [ @wt[0..2], 0 ] : croak "$PACKAGE unknown alternate name: $self->{alternate}"; } sub viewSortKey { my $self = shift; - my $ver = $self->{UCA_Version}; + $self->visualizeSortKey($self->getSortKey(@_)); +} - my $key = $self->getSortKey(@_); - my $view = join " ", map sprintf("%04X", $_), unpack(KEY_TEMPLATE, $key); +sub visualizeSortKey +{ + my $self = shift; + my $view = join " ", map sprintf("%04X", $_), unpack(KEY_TEMPLATE, shift); - if ($ver <= 8) { + if ($self->{UCA_Version} <= 8) { $view =~ s/ ?0000 ?/|/g; } else { $view =~ s/\b0000\b/|/g; @@ -423,9 +461,7 @@ sub splitCE } # get array of Unicode code point of string. - my @src = $IsEBCDIC - ? map(utf8::native_to_unicode($_), unpack UTF_TEMPLATE, $str) - : unpack(UTF_TEMPLATE, $str); + my @src = unpack_U($str); # rearrangement: # Character positions are not kept if rearranged, @@ -529,7 +565,7 @@ sub getWt $cjk ? map(pack(VCE_TEMPLATE, NON_VAR, @$_), &$cjk($u)) : defined $cjk && $self->{UCA_Version} <= 8 && $u < 0x10000 - ? pack(VCE_TEMPLATE, NON_VAR, $u, Min2, Min3, $u) + ? pack(VCE_TEMPLATE, NON_VAR, $u, Min2Wt, Min3Wt, $u) : $der->($u); } else { @@ -557,7 +593,7 @@ sub getSortKey if ($wt->[0] == 0) { # ignorable next if $last_is_variable; } else { - $last_is_variable = ($wt->[0] == Var1); + $last_is_variable = ($wt->[0] == Var1Wt); } } push @buf, $wt; @@ -571,10 +607,6 @@ sub getSortKey if 0 < $b->[$v]; } } - foreach (@{ $self->{backwards} }) { - my $v = $_ - 1; - @{ $ret[$v] } = reverse @{ $ret[$v] }; - } # modification of tertiary weights if ($self->{upper_before_lower}) { @@ -591,6 +623,15 @@ sub getSortKey elsif (0x0D <= $_ && $_ <= 0x0E) { $_ += 5 } # hiragana } } + + if ($self->{backwardsFlag}) { + for (my $v = MinLevel; $v <= MaxLevel; $v++) { + if ($self->{backwardsFlag} & (1 << $v)) { + @{ $ret[$v-1] } = reverse @{ $ret[$v-1] }; + } + } + } + join LEVEL_SEP, map pack(KEY_TEMPLATE, @$_), @ret; } @@ -630,8 +671,8 @@ sub _derivCE_9 { my $aaaa = $base + ($u >> 15); my $bbbb = ($u & 0x7FFF) | 0x8000; return - pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2, Min3, $u), - pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $u); + pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u), + pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $u); } sub _derivCE_8 { @@ -684,7 +725,7 @@ sub _nonIgnorAtLevel($$) my $wt = shift; return if ! defined $wt; my $lv = shift; - return grep($wt->[$_] != 0, 0..$lv-1) ? TRUE : FALSE; + return grep($wt->[$_-1] != 0, MinLevel..$lv) ? TRUE : FALSE; } ## @@ -731,7 +772,6 @@ sub index $pos = 0 if $pos < 0; my $grob = shift; - my $comb = $self->{combining}; my $lev = $self->{level}; my $ver9 = $self->{UCA_Version} > 8; my $v2i = $self->{alternate} ne 'non-ignorable'; @@ -760,14 +800,14 @@ sub index if ($wt->[0] == 0) { $to_be_pushed = FALSE if $last_is_variable; } else { - $last_is_variable = ($wt->[0] == Var1); + $last_is_variable = ($wt->[0] == Var1Wt); } } if (@subWt && $wt->[0] == 0) { push @{ $subWt[-1] }, $wt if $to_be_pushed; } else { - $wt->[0] = 0 if $wt->[0] == Var1; + $wt->[0] = 0 if $wt->[0] == Var1Wt; push @subWt, [ $wt ]; } } @@ -789,7 +829,7 @@ sub index if ($wt->[0] == 0) { $to_be_pushed = FALSE if $last_is_variable; } else { - $last_is_variable = ($wt->[0] == Var1); + $last_is_variable = ($wt->[0] == Var1Wt); } } @@ -797,7 +837,7 @@ sub index push @{ $strWt[-1] }, $wt if $to_be_pushed; $finPos[-1] = $strCE->[$i][2]; } elsif ($to_be_pushed) { - $wt->[0] = 0 if $wt->[0] == Var1; + $wt->[0] = 0 if $wt->[0] == Var1Wt; push @strWt, [ $wt ]; push @iniPos, $found_base ? NOMATCHPOS : $strCE->[$i][1]; $finPos[-1] = NOMATCHPOS if $found_base; @@ -1217,9 +1257,9 @@ If the tag is made true, this is reversed. B: These tags simplemindedly assume any lowercase/uppercase or hiragana/katakana distinctions -should occur in level 3, and their weights at level 3 -should be same as those mentioned in 7.3.1, UTS #10. -If you define your collation elements which violates this, +must occur in level 3, and their weights at level 3 +must be same as those mentioned in 7.3.1, UTS #10. +If you define your collation elements which violate this requirement, these tags don't work validly. =back diff --git a/lib/Unicode/Collate/Changes b/lib/Unicode/Collate/Changes index 3e60f0b..2f7b6e7 100644 --- a/lib/Unicode/Collate/Changes +++ b/lib/Unicode/Collate/Changes @@ -1,5 +1,8 @@ Revision history for Perl extension Unicode::Collate. +0.24 Thu Apr 02 23:12:54 2003 + - internal tweak for (?un)pack 'U'. + 0.23 Wed Sep 04 19:25:20 2002 - fix: scalar match() no longer returns an lvalue substr ref. - fix: "Ignorable after variable" should be made level 3 ignorable diff --git a/lib/Unicode/Collate/README b/lib/Unicode/Collate/README index d829c77..fc0f68f 100644 --- a/lib/Unicode/Collate/README +++ b/lib/Unicode/Collate/README @@ -1,4 +1,4 @@ -Unicode/Collate version 0.23 +Unicode/Collate version 0.24 =============================== NAME diff --git a/lib/Unicode/Collate/t/index.t b/lib/Unicode/Collate/t/index.t index e759ef2..550cbe3 100644 --- a/lib/Unicode/Collate/t/index.t +++ b/lib/Unicode/Collate/t/index.t @@ -1,7 +1,8 @@ BEGIN { - if (ord("A") == 193) { - print "1..0 # Unicode::Collate not ported to EBCDIC\n"; + unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; exit 0; } } diff --git a/lib/Unicode/Collate/t/test.t b/lib/Unicode/Collate/t/test.t index 777e9fb..502e0b1 100644 --- a/lib/Unicode/Collate/t/test.t +++ b/lib/Unicode/Collate/t/test.t @@ -1,7 +1,8 @@ BEGIN { - if (ord("A") == 193) { - print "1..0 # Unicode::Collate not ported to EBCDIC\n"; + unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { + print "1..0 # Unicode::Collate " . + "cannot stringify a Unicode code point\n"; exit 0; } } @@ -53,10 +54,12 @@ ok($Collator->cmp("", "perl"), -1); ############## -# Use pack('U'), not chr(), for Perl 5.6.1. -my $A_acute = pack('U', $IsEBCDIC ? 0x65 : 0xC1); -my $a_acute = pack('U', $IsEBCDIC ? 0x45 : 0xE1); -my $acute = pack('U', 0x0301); +sub _pack_U { Unicode::Collate::pack_U(@_) } +sub _unpack_U { Unicode::Collate::unpack_U(@_) } + +my $A_acute = _pack_U(0xC1); +my $a_acute = _pack_U(0xE1); +my $acute = _pack_U(0x0301); ok($Collator->cmp("A$acute", $A_acute), 0); # @version 3.1.1 (prev: -1) ok($Collator->cmp($a_acute, $A_acute), -1);