lib/legacy.t

   1 use warnings;
   2 use strict;
   3
   4 BEGIN {
   5     chdir 't' if -d 't';
   6     @INC = '../lib';
   7     require './test.pl';
   8 }
   9
  10 plan(13312);    # Determined by experimentation
  11
  12 # Test the upper/lower/title case mappings for all characters 0-255.
  13
  14 # First compute the case mappings without resorting to the functions we're
  15 # testing.
  16
  17 # Initialize the arrays so each $i maps to itself.
  18 my @posix_to_upper;
  19 for my $i (0 .. 255) {
  20     $posix_to_upper[$i] = chr($i);
  21 }
  22 my @posix_to_lower
  23 = my @posix_to_title
  24 = my @latin1_to_upper
  25 = my @latin1_to_lower
  26 = my @latin1_to_title
  27 = @posix_to_upper;
  28
  29 # Override the elements in the to_lower arrays that have different lower case
  30 # mappings
  31 for my $i (0x41 .. 0x5A) {
  32     $posix_to_lower[$i] = chr(ord($posix_to_lower[$i]) + 32);
  33     $latin1_to_lower[$i] = chr(ord($latin1_to_lower[$i]) + 32);
  34 }
  35
  36 # Same for upper and title
  37 for my $i (0x61 .. 0x7A) {
  38     $posix_to_upper[$i] = chr(ord($posix_to_upper[$i]) - 32);
  39     $latin1_to_upper[$i] = chr(ord($latin1_to_upper[$i]) - 32);
  40     $posix_to_title[$i] = chr(ord($posix_to_title[$i]) - 32);
  41     $latin1_to_title[$i] = chr(ord($latin1_to_title[$i]) - 32);
  42 }
  43
  44 # And the same for those in the latin1 range
  45 for my $i (0xC0 .. 0xD6, 0xD8 .. 0xDE) {
  46     $latin1_to_lower[$i] = chr(ord($latin1_to_lower[$i]) + 32);
  47 }
  48 for my $i (0xE0 .. 0xF6, 0xF8 .. 0xFE) {
  49     $latin1_to_upper[$i] = chr(ord($latin1_to_upper[$i]) - 32);
  50     $latin1_to_title[$i] = chr(ord($latin1_to_title[$i]) - 32);
  51 }
  52
  53 # Override the abnormal cases.
  54 $latin1_to_upper[0xB5] = chr(0x39C);
  55 $latin1_to_title[0xB5] = chr(0x39C);
  56 $latin1_to_upper[0xDF] = 'SS';
  57 $latin1_to_title[0xDF] = 'Ss';
  58 $latin1_to_upper[0xFF] = chr(0x178);
  59 $latin1_to_title[0xFF] = chr(0x178);
  60
  61 my $repeat = 25;    # Length to make strings.
  62
  63 # Create hashes of strings in several ranges, both for uc and lc.
  64 my %posix;
  65 $posix{'uc'} = 'A' x $repeat;
  66 $posix{'lc'} = 'a' x $repeat ;
  67
  68 my %cyrillic;
  69 $cyrillic{'uc'} = chr(0x42F) x $repeat;
  70 $cyrillic{'lc'} = chr(0x44F) x $repeat;
  71
  72 my %latin1;
  73 $latin1{'uc'} = chr(0xD8) x $repeat;
  74 $latin1{'lc'} = chr(0xF8) x $repeat;
  75
  76 my %empty;
  77 $empty{'lc'} = $empty{'uc'} = "";
  78
  79 # Loop so prefix each character being tested with nothing, and the various
  80 # strings; then loop for suffixes of those strings as well.
  81 for my  $prefix (\%empty, \%posix, \%cyrillic, \%latin1) {
  82     for my  $suffix (\%empty, \%posix, \%cyrillic, \%latin1) {
  83         for my $i (0 .. 255) {  # For each possible posix or latin1 character
  84             my $cp = sprintf "U+%04X", $i;
  85
  86             # First try using latin1 (Unicode) semantics.
  87             no legacy "unicode8bit";
  88
  89             my $phrase = 'with uni8bit';
  90             my $char = chr($i);
  91             my $pre_lc = $prefix->{'lc'};
  92             my $pre_uc = $prefix->{'uc'};
  93             my $post_lc = $suffix->{'lc'};
  94             my $post_uc = $suffix->{'uc'};
  95             my $to_upper = $pre_lc . $char . $post_lc;
  96             my $expected_upper = $pre_uc . $latin1_to_upper[$i] . $post_uc;
  97             my $to_lower = $pre_uc . $char . $post_uc;
  98             my $expected_lower = $pre_lc . $latin1_to_lower[$i] . $post_lc;
  99
 100             is (uc($to_upper), $expected_upper,
 101                 display("$cp: $phrase: uc($to_upper) eq $expected_upper"));
 102             is (lc($to_lower), $expected_lower,
 103                 display("$cp: $phrase: lc($to_lower) eq $expected_lower"));
 104
 105             if ($pre_uc eq "") {    # Title case if null prefix.
 106                 my $expected_title = $latin1_to_title[$i] . $post_lc;
 107                 is (ucfirst($to_upper), $expected_title,
 108                     display("$cp: $phrase: ucfirst($to_upper) eq $expected_title"));
 109                 my $expected_lcfirst = $latin1_to_lower[$i] . $post_uc;
 110                 is (lcfirst($to_lower), $expected_lcfirst,
 111                     display("$cp: $phrase: lcfirst($to_lower) eq $expected_lcfirst"));
 112             }
 113
 114             # Then try with posix semantics.
 115             use legacy "unicode8bit";
 116             $phrase = 'no uni8bit';
 117
 118             # These don't contribute anything in this case.
 119             next if $suffix == \%cyrillic;
 120             next if $suffix == \%latin1;
 121             next if $prefix == \%cyrillic;
 122             next if $prefix == \%latin1;
 123
 124             $expected_upper = $pre_uc . $posix_to_upper[$i] . $post_uc;
 125             $expected_lower = $pre_lc . $posix_to_lower[$i] . $post_lc;
 126
 127             is (uc($to_upper), $expected_upper,
 128                 display("$cp: $phrase: uc($to_upper) eq $expected_upper"));
 129             is (lc($to_lower), $expected_lower,
 130                 display("$cp: $phrase: lc($to_lower) eq $expected_lower"));
 131
 132             if ($pre_uc eq "") {
 133                 my $expected_title = $posix_to_title[$i] . $post_lc;
 134                 is (ucfirst($to_upper), $expected_title,
 135                     display("$cp: $phrase: ucfirst($to_upper) eq $expected_title"));
 136                 my $expected_lcfirst = $posix_to_lower[$i] . $post_uc;
 137                 is (lcfirst($to_lower), $expected_lcfirst,
 138                     display("$cp: $phrase: lcfirst($to_lower) eq $expected_lcfirst"));
 139             }
 140         }
 141     }
 142 }