Commit | Line | Data |
19692e8d |
1 | #!./perl -w |
2 | |
cb0a5b5c |
3 | BEGIN { |
4 | chdir 't' if -d 't'; |
5 | @INC = '../lib'; |
6 | require './test.pl'; |
7 | |
19692e8d |
8 | plan(tests => 91); |
cb0a5b5c |
9 | } |
10 | |
19692e8d |
11 | use strict; |
12 | |
cb0a5b5c |
13 | # Two hashes one will all keys 8-bit possible (initially), other |
14 | # with a utf8 requiring key from the outset. |
15 | |
16 | my %hash8 = ( "\xff" => 0xff, |
17 | "\x7f" => 0x7f, |
18 | ); |
19 | my %hashu = ( "\xff" => 0xff, |
20 | "\x7f" => 0x7f, |
21 | "\x{1ff}" => 0x1ff, |
22 | ); |
23 | |
24 | # Check that we can find the 8-bit things by various litterals |
25 | is($hash8{"\x{00ff}"},0xFF); |
26 | is($hash8{"\x{007f}"},0x7F); |
27 | is($hash8{"\xff"},0xFF); |
28 | is($hash8{"\x7f"},0x7F); |
29 | is($hashu{"\x{00ff}"},0xFF); |
30 | is($hashu{"\x{007f}"},0x7F); |
31 | is($hashu{"\xff"},0xFF); |
32 | is($hashu{"\x7f"},0x7F); |
33 | |
34 | # Now try same thing with variables forced into various forms. |
35 | foreach my $a ("\x7f","\xff") |
36 | { |
37 | utf8::upgrade($a); |
38 | is($hash8{$a},ord($a)); |
39 | is($hashu{$a},ord($a)); |
40 | utf8::downgrade($a); |
41 | is($hash8{$a},ord($a)); |
42 | is($hashu{$a},ord($a)); |
43 | my $b = $a.chr(100); |
44 | chop($b); |
45 | is($hash8{$b},ord($b)); |
46 | is($hashu{$b},ord($b)); |
47 | } |
48 | |
49 | # Check we have not got an spurious extra keys |
20b5b8d0 |
50 | is(join('',sort { ord $a <=> ord $b } keys %hash8),"\x7f\xff"); |
51 | is(join('',sort { ord $a <=> ord $b } keys %hashu),"\x7f\xff\x{1ff}"); |
cb0a5b5c |
52 | |
53 | # Now add a utf8 key to the 8-bit hash |
54 | $hash8{chr(0x1ff)} = 0x1ff; |
55 | |
56 | # Check we have not got an spurious extra keys |
20b5b8d0 |
57 | is(join('',sort { ord $a <=> ord $b } keys %hash8),"\x7f\xff\x{1ff}"); |
cb0a5b5c |
58 | |
59 | foreach my $a ("\x7f","\xff","\x{1ff}") |
60 | { |
61 | utf8::upgrade($a); |
62 | is($hash8{$a},ord($a)); |
63 | my $b = $a.chr(100); |
64 | chop($b); |
65 | is($hash8{$b},ord($b)); |
66 | } |
67 | |
68 | # and remove utf8 from the other hash |
69 | is(delete $hashu{chr(0x1ff)},0x1ff); |
70 | is(join('',sort keys %hashu),"\x7f\xff"); |
71 | |
72 | foreach my $a ("\x7f","\xff") |
73 | { |
74 | utf8::upgrade($a); |
75 | is($hashu{$a},ord($a)); |
76 | utf8::downgrade($a); |
77 | is($hashu{$a},ord($a)); |
78 | my $b = $a.chr(100); |
79 | chop($b); |
80 | is($hashu{$b},ord($b)); |
81 | } |
82 | |
83 | |
4c79aee6 |
84 | |
85 | { |
19692e8d |
86 | print "# Unicode hash keys and \\w\n"; |
87 | # This is not really a regex test but regexes bring |
88 | # out the issue nicely. |
89 | use strict; |
90 | my $u3 = "f\x{df}\x{100}"; |
91 | my $u2 = substr($u3,0,2); |
92 | my $u1 = substr($u2,0,1); |
93 | my $u0 = chr (0xdf)x4; # Make this 4 chars so that all lengths are distinct. |
94 | |
95 | my @u = ($u0, $u1, $u2, $u3); |
96 | |
97 | while (@u) { |
98 | my %u = (map {( $_, $_)} @u); |
99 | my $keys = scalar @u; |
100 | $keys .= ($keys == 1) ? " key" : " keys"; |
4c79aee6 |
101 | |
102 | for (keys %u) { |
19692e8d |
103 | my $l = 0 + /^\w+$/; |
104 | my $r = 0 + $u{$_} =~ /^\w+$/; |
105 | is ($l, $r, "\\w on keys with $keys, key of length " . length $_); |
106 | } |
107 | |
108 | my $more; |
109 | do { |
110 | $more = 0; |
111 | # Want to do this direct, rather than copying to a temporary variable |
112 | # The first time each will return key and value at the start of the hash. |
113 | # each will return () after we've done the last pair. $more won't get |
114 | # set then, and the do will exit. |
115 | for (each %u) { |
116 | $more = 1; |
117 | my $l = 0 + /^\w+$/; |
118 | my $r = 0 + $u{$_} =~ /^\w+$/; |
119 | is ($l, $r, "\\w on each, with $keys, key of length " . length $_); |
120 | } |
121 | } while ($more); |
4c79aee6 |
122 | |
123 | for (%u) { |
19692e8d |
124 | my $l = 0 + /^\w+$/; |
125 | my $r = 0 + $u{$_} =~ /^\w+$/; |
126 | is ($l, $r, "\\w on hash with $keys, key of length " . length $_); |
127 | } |
128 | pop @u; |
129 | undef %u; |
130 | } |
131 | } |
132 | |
133 | { |
134 | my $utf8_sz = my $bytes_sz = "\x{df}"; |
135 | $utf8_sz .= chr 256; |
136 | chop ($utf8_sz); |
137 | |
138 | my (%bytes_first, %utf8_first); |
139 | |
140 | $bytes_first{$bytes_sz} = $bytes_sz; |
141 | |
142 | for (keys %bytes_first) { |
143 | my $l = 0 + /^\w+$/; |
144 | my $r = 0 + $bytes_first{$_} =~ /^\w+$/; |
145 | is ($l, $r, "\\w on each, bytes"); |
146 | } |
147 | |
148 | $bytes_first{$utf8_sz} = $utf8_sz; |
149 | |
150 | for (keys %bytes_first) { |
151 | my $l = 0 + /^\w+$/; |
152 | my $r = 0 + $bytes_first{$_} =~ /^\w+$/; |
153 | is ($l, $r, "\\w on each, bytes now utf8"); |
154 | } |
155 | |
156 | $utf8_first{$utf8_sz} = $utf8_sz; |
157 | |
158 | for (keys %utf8_first) { |
159 | my $l = 0 + /^\w+$/; |
160 | my $r = 0 + $utf8_first{$_} =~ /^\w+$/; |
161 | is ($l, $r, "\\w on each, utf8"); |
162 | } |
163 | |
164 | $utf8_first{$bytes_sz} = $bytes_sz; |
165 | |
166 | for (keys %utf8_first) { |
167 | my $l = 0 + /^\w+$/; |
168 | my $r = 0 + $utf8_first{$_} =~ /^\w+$/; |
169 | is ($l, $r, "\\w on each, utf8 now bytes"); |
170 | } |
171 | |
4c79aee6 |
172 | } |