Special casing had become a little bit more complex in Unicode 3.1.1.
Jarkko Hietaniemi [Sun, 2 Sep 2001 12:41:12 +0000 (12:41 +0000)]
p4raw-id: //depot/perl@11832

lib/Unicode/UCD.pm
lib/Unicode/UCD.t

index d4525cc..841c373 100644 (file)
@@ -681,6 +681,11 @@ list overrides the normal behavior if all of the listed conditions are
 true.  Case distinctions in the condition list are not significant.
 Conditions preceded by "NON_" represent the negation of the condition
 
+Note that when there are multiple case folding definitions for a
+single code point because of different locales, the value returned by
+casespec() is a hash reference which has the locales as the keys and
+hash references as described above as the values.
+
 A I<locale> is defined as a 2-letter ISO 3166 country code, possibly
 followed by a "_" and a 2-letter ISO language code (possibly followed
 by a "_" and a variant code).  You can find the lists of those codes,
@@ -705,12 +710,49 @@ sub _casespec {
        if (openunicode(\$CASESPECFH, "SpecCase.txt")) {
            while (<$CASESPECFH>) {
                if (/^([0-9A-F]+); ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; (\w+(?: \w+)*)?/) {
-                   my $code = hex($1);
-                   $CASESPEC{$code} = { code      => $1,
-                                        lower     => $2,
-                                        title     => $3,
-                                        upper     => $4,
-                                        condition => $5 };
+                   my ($hexcode, $lower, $title, $upper, $condition) =
+                       ($1, $2, $3, $4, $5);
+                   my $code = hex($hexcode);
+                   if (exists $CASESPEC{$code}) {
+                       if (exists $CASESPEC{$code}->{code}) {
+                           my ($oldlower,
+                               $oldtitle,
+                               $oldupper,
+                               $oldcondition) =
+                                   @{$CASESPEC{$code}}{qw(lower
+                                                          title
+                                                          upper
+                                                          condition)};
+                           my ($oldlocale) =
+                               ($oldcondition =~ /^([a-z][a-z](?:_\S+)?)/);
+                           if (defined $oldlocale) {
+                               delete $CASESPEC{$code};
+                               $CASESPEC{$code}->{$oldlocale} =
+                               { code      => $hexcode,
+                                 lower     => $oldlower,
+                                 title     => $oldtitle,
+                                 upper     => $oldupper,
+                                 condition => $oldcondition };
+                           } else {
+                               warn __PACKAGE__, ": SpecCase.txt:", $., ": No oldlocale for 0x$hexcode\n"
+                           }
+                       }
+                       my ($locale) =
+                           ($condition =~ /^([a-z][a-z](?:_\S+)?)/);
+                       $CASESPEC{$code}->{$locale} =
+                       { code      => $hexcode,
+                         lower     => $lower,
+                         title     => $title,
+                         upper     => $upper,
+                         condition => $condition };
+                   } else {
+                       $CASESPEC{$code} =
+                       { code      => $hexcode,
+                         lower     => $lower,
+                         title     => $title,
+                         upper     => $upper,
+                         condition => $condition };
+                   }
                }
            }
            close($CASESPECFH);
index 67e99d0..7536e72 100644 (file)
@@ -223,7 +223,7 @@ ok( charinrange($ranges, "13a0"));
 ok( charinrange($ranges, "13f4"));
 ok(!charinrange($ranges, "13f5"));
 
-ok(Unicode::UCD::UnicodeVersion, 3.1);
+ok(Unicode::UCD::UnicodeVersion, '3.1.1');
 
 use Unicode::UCD qw(compexcl);
 
@@ -264,8 +264,8 @@ ok($casespec->{code} eq '00DF' &&
 
 $casespec = casespec(0x307);
 
-ok($casespec->{code} eq '0307' &&
-   $casespec->{lower} eq '0307'  &&
-   $casespec->{title} eq ''  &&
-   $casespec->{upper} eq '' &&
-   $casespec->{condition} eq 'lt AFTER_i');
+ok($casespec->{az}->{code} eq '0307' &&
+   $casespec->{az}->{lower} eq ''  &&
+   $casespec->{az}->{title} eq '0307'  &&
+   $casespec->{az}->{upper} eq '0307' &&
+   $casespec->{az}->{condition} eq 'az AFTER_i NOT_MORE_ABOVE');