add code for Unicode semantics for non-utf8 latin1 chars
[p5sagit/p5-mst-13.2.git] / lib / legacy.pm
CommitLineData
66060160 1package legacy;
2
3our $VERSION = '1.00';
4
00f254e2 5$unicode8bit::hint_uni8bit = 0x00000800;
66060160 6
7my %legacy_bundle = (
8 "5.10" => [qw(unicode8bit)],
9 "5.11" => [qw(unicode8bit)],
10);
11
12my %legacy = ( 'unicode8bit' => '0' );
13
14=head1 NAME
15
16legacy - Perl pragma to preserve legacy behaviors or enable new non-default
17behaviors
18
19=head1 SYNOPSIS
20
21 use legacy ':5.10'; # Keeps semantics the same as in perl 5.10
22
00f254e2 23 use legacy qw(unicode8bit);
66060160 24
00f254e2 25 no legacy;
66060160 26
00f254e2 27 no legacy qw(unicode8bit);
66060160 28
29=head1 DESCRIPTION
30
31Some programs may rely on behaviors that for others are problematic or
32even wrong. A new version of Perl may change behaviors from past ones,
33and when it is viewed that the old way of doing things may be required
00f254e2 34to still be supported, the new behavior will be able to be turned off by using
35this pragma.
66060160 36
37Additionally, a new behavior may be supported in a new version of Perl, but
38for whatever reason the default remains the old one. This pragma can enable
39the new behavior.
40
41Like other pragmas (C<use feature>, for example), C<use legacy qw(foo)> will
42only make the legacy behavior for "foo" available from that point to the end of
43the enclosing block.
44
66060160 45=head2 B<use legacy>
46
47Preserve the old way of doing things when a new version of Perl is
00f254e2 48released that would otherwise change the behavior.
49
50The one current possibility is:
51
52=head3 unicode8bit
53
54THIS IS SUBJECT TO CHANGE
55
56Use legacy semantics for the 128 characters on ASCII systems that have the 8th
57bit set. (See L</EBCDIC platforms> below for EBCDIC systems.) Unless
58C<S<use locale>> is specified, or the scalar containing such a character is
59known by Perl to be encoded in UTF8, the semantics are essentially that the
60characters have an ordinal number, and that's it. They are caseless, and
61aren't anything: they're not controls, not letters, not punctuation, ..., not
62anything.
63
64This behavior stems from when Perl did not support Unicode, and ASCII was the
65only known character set outside of C<S<use locale>>. In order to not
66possibly break pre_Unicode programs, these characters have retained their old
67non-meanings, except when it is clear to Perl that Unicode is what is meant,
68for example by calling utf::upgrade() on a scalar, or if the scalar also
69contains characters that are only available in Unicode. Then these 128
70characters take on their Unicode meanings.
71
72The problem with this behavior is that a scalar that encodes these characters
73has a different meaning depending on if it is stored as utf8 or not.
74In general, the internal storage method should not affect the
75external behavior.
76
77The behavior is known to have effects on these areas:
78
79=over 4
80
81=item
82
83Changing the case of a scalar, that is, using C<uc()>,
84C<ucfirst()>,
85C<lc()>,
86and C<lcfirst()>, or C<\L>, C<\U>, C<\u> and C<\l> in regular expression substitutions.
87
88=item
89
90Using caseless (C</i>) regular expression matching
91
92=item
93
94Matching a number of properties in regular expressions, such as C<\w>
95
96=item
97
98User-defined case change mappings. You can create a C<ToUpper()> function, for
99example, which overrides Perl's built-in case mappings. The scalar must be
100encoded in utf8 for your function to actually be invoked.
101
102=back
103
104B<This lack of semantics for these characters is currently the default,>
105outside of C<use locale>. See below for EBCDIC.
106To turn on B<case changing semantics only> for these characters, use
107C<S<no legacy>>.
108The other legacy behaviors regarding these characters are currently
109unaffected by this pragma.
110
111=head4 EBCDIC platforms
112
113On EBCDIC platforms, the situation is somewhat different. The legacy
114semantics are whatever the underlying semantics of the native C language
115library are. Each of the three EBCDIC encodings currently known by Perl is an
116isomorph of the Latin-1 character set. That means every character in Latin-1
117has a corresponding EBCDIC equivalent, and vice-versa. Specifying C<S<no
118legacy>> currently makes sure that all EBCDIC characters have the same
119B<casing only> semantics as their corresponding Latin-1 characters.
66060160 120
121=head2 B<no legacy>
122
123Turn on a new behavior in a version of Perl that understands
124it but has it turned off by default. For example, C<no legacy 'foo'> turns on
00f254e2 125behavior C<foo> in the lexical scope of the pragma. C<no legacy>
126without any modifier turns on all new behaviors known to the pragma.
66060160 127
128=head1 LEGACY BUNDLES
129
00f254e2 130It's possible to turn off all new behaviors past a given release by
66060160 131using a I<legacy bundle>, which is the name of the release prefixed with
132a colon, to distinguish it from an individual legacy behavior.
133
134Specifying sub-versions such as the C<0> in C<5.10.0> in legacy bundles has
135no effect: legacy bundles are guaranteed to be the same for all sub-versions.
136
137Legacy bundles are not allowed with C<no legacy>
138
139=cut
140
141sub import {
142 my $class = shift;
143 if (@_ == 0) {
144 croak("No legacy behaviors specified");
145 }
146 while (@_) {
147 my $name = shift(@_);
148 if (substr($name, 0, 1) eq ":") {
149 my $v = substr($name, 1);
150 if (!exists $legacy_bundle{$v}) {
151 $v =~ s/^([0-9]+)\.([0-9]+).[0-9]+$/$1.$2/;
152 if (!exists $legacy_bundle{$v}) {
153 unknown_legacy_bundle(substr($name, 1));
154 }
155 }
156 unshift @_, @{$legacy_bundle{$v}};
157 next;
158 }
159 if (!exists $legacy{$name}) {
160 unknown_legacy($name);
161 }
00f254e2 162 $^H &= ~$unicode8bit::hint_uni8bit; # The only valid thing as of yet
66060160 163 }
164}
165
166
167sub unimport {
168 my $class = shift;
169
170 # A bare C<no legacy> should disable *all* legacy behaviors
171 if (!@_) {
172 unshift @_, keys(%legacy);
173 }
174
175 while (@_) {
176 my $name = shift;
177 if (substr($name, 0, 1) eq ":") {
178 croak(sprintf('Legacy bundles (%s) are not allowed in "no legacy"',
179 $name));
180 }
181 if (!exists($legacy{$name})) {
182 unknown_legacy($name);
183 }
184 else {
00f254e2 185 $^H |= $unicode8bit::hint_uni8bit; # The only valid thing as of yet
66060160 186 }
187 }
188}
189
190sub unknown_legacy {
191 my $legacy = shift;
192 croak(sprintf('Legacy "%s" is not supported by Perl %vd', $legacy, $^V));
193}
194
195sub unknown_legacy_bundle {
196 my $legacy = shift;
197 croak(sprintf('Legacy bundle "%s" is not supported by Perl %vd',
198 $legacy, $^V));
199}
200
201sub croak {
202 require Carp;
203 Carp::croak(@_);
204}
205
2061;