From: Jarkko Hietaniemi Date: Sun, 2 Sep 2001 12:41:12 +0000 (+0000) Subject: Special casing had become a little bit more complex in Unicode 3.1.1. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f499c3861b7c35f971fe2027563597c0f31f3f45;p=p5sagit%2Fp5-mst-13.2.git Special casing had become a little bit more complex in Unicode 3.1.1. p4raw-id: //depot/perl@11832 --- diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index d4525cc..841c373 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -681,6 +681,11 @@ list overrides the normal behavior if all of the listed conditions are true. Case distinctions in the condition list are not significant. Conditions preceded by "NON_" represent the negation of the condition +Note that when there are multiple case folding definitions for a +single code point because of different locales, the value returned by +casespec() is a hash reference which has the locales as the keys and +hash references as described above as the values. + A I is defined as a 2-letter ISO 3166 country code, possibly followed by a "_" and a 2-letter ISO language code (possibly followed by a "_" and a variant code). You can find the lists of those codes, @@ -705,12 +710,49 @@ sub _casespec { if (openunicode(\$CASESPECFH, "SpecCase.txt")) { while (<$CASESPECFH>) { if (/^([0-9A-F]+); ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; (\w+(?: \w+)*)?/) { - my $code = hex($1); - $CASESPEC{$code} = { code => $1, - lower => $2, - title => $3, - upper => $4, - condition => $5 }; + my ($hexcode, $lower, $title, $upper, $condition) = + ($1, $2, $3, $4, $5); + my $code = hex($hexcode); + if (exists $CASESPEC{$code}) { + if (exists $CASESPEC{$code}->{code}) { + my ($oldlower, + $oldtitle, + $oldupper, + $oldcondition) = + @{$CASESPEC{$code}}{qw(lower + title + upper + condition)}; + my ($oldlocale) = + ($oldcondition =~ /^([a-z][a-z](?:_\S+)?)/); + if (defined $oldlocale) { + delete $CASESPEC{$code}; + $CASESPEC{$code}->{$oldlocale} = + { code => $hexcode, + lower => $oldlower, + title => $oldtitle, + upper => $oldupper, + condition => $oldcondition }; + } else { + warn __PACKAGE__, ": SpecCase.txt:", $., ": No oldlocale for 0x$hexcode\n" + } + } + my ($locale) = + ($condition =~ /^([a-z][a-z](?:_\S+)?)/); + $CASESPEC{$code}->{$locale} = + { code => $hexcode, + lower => $lower, + title => $title, + upper => $upper, + condition => $condition }; + } else { + $CASESPEC{$code} = + { code => $hexcode, + lower => $lower, + title => $title, + upper => $upper, + condition => $condition }; + } } } close($CASESPECFH); diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index 67e99d0..7536e72 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -223,7 +223,7 @@ ok( charinrange($ranges, "13a0")); ok( charinrange($ranges, "13f4")); ok(!charinrange($ranges, "13f5")); -ok(Unicode::UCD::UnicodeVersion, 3.1); +ok(Unicode::UCD::UnicodeVersion, '3.1.1'); use Unicode::UCD qw(compexcl); @@ -264,8 +264,8 @@ ok($casespec->{code} eq '00DF' && $casespec = casespec(0x307); -ok($casespec->{code} eq '0307' && - $casespec->{lower} eq '0307' && - $casespec->{title} eq '' && - $casespec->{upper} eq '' && - $casespec->{condition} eq 'lt AFTER_i'); +ok($casespec->{az}->{code} eq '0307' && + $casespec->{az}->{lower} eq '' && + $casespec->{az}->{title} eq '0307' && + $casespec->{az}->{upper} eq '0307' && + $casespec->{az}->{condition} eq 'az AFTER_i NOT_MORE_ABOVE');