* Synced the perlfaq
[p5sagit/p5-mst-13.2.git] / lib / legacy.pm
CommitLineData
66060160 1package legacy;
2
3our $VERSION = '1.00';
4
61fc5122 5$unicode8bit::hint_not_uni8bit = 0x00000800;
66060160 6
7my %legacy_bundle = (
8 "5.10" => [qw(unicode8bit)],
9 "5.11" => [qw(unicode8bit)],
10);
11
12my %legacy = ( 'unicode8bit' => '0' );
13
14=head1 NAME
15
a994735a 16legacy - Perl pragma to preserve legacy behaviors or enable new non-default behaviors
66060160 17
18=head1 SYNOPSIS
19
20 use legacy ':5.10'; # Keeps semantics the same as in perl 5.10
21
00f254e2 22 use legacy qw(unicode8bit);
66060160 23
00f254e2 24 no legacy;
66060160 25
00f254e2 26 no legacy qw(unicode8bit);
66060160 27
28=head1 DESCRIPTION
29
30Some programs may rely on behaviors that for others are problematic or
31even wrong. A new version of Perl may change behaviors from past ones,
32and when it is viewed that the old way of doing things may be required
00f254e2 33to still be supported, the new behavior will be able to be turned off by using
34this pragma.
66060160 35
36Additionally, a new behavior may be supported in a new version of Perl, but
37for whatever reason the default remains the old one. This pragma can enable
38the new behavior.
39
40Like other pragmas (C<use feature>, for example), C<use legacy qw(foo)> will
41only make the legacy behavior for "foo" available from that point to the end of
42the enclosing block.
43
66060160 44=head2 B<use legacy>
45
46Preserve the old way of doing things when a new version of Perl is
00f254e2 47released that would otherwise change the behavior.
48
49The one current possibility is:
50
51=head3 unicode8bit
52
00f254e2 53Use legacy semantics for the 128 characters on ASCII systems that have the 8th
54bit set. (See L</EBCDIC platforms> below for EBCDIC systems.) Unless
55C<S<use locale>> is specified, or the scalar containing such a character is
56known by Perl to be encoded in UTF8, the semantics are essentially that the
57characters have an ordinal number, and that's it. They are caseless, and
58aren't anything: they're not controls, not letters, not punctuation, ..., not
59anything.
60
61This behavior stems from when Perl did not support Unicode, and ASCII was the
62only known character set outside of C<S<use locale>>. In order to not
e1e0053f 63possibly break pre-Unicode programs, these characters have retained their old
00f254e2 64non-meanings, except when it is clear to Perl that Unicode is what is meant,
a994735a 65for example by calling utf8::upgrade() on a scalar, or if the scalar also
00f254e2 66contains characters that are only available in Unicode. Then these 128
67characters take on their Unicode meanings.
68
69The problem with this behavior is that a scalar that encodes these characters
70has a different meaning depending on if it is stored as utf8 or not.
71In general, the internal storage method should not affect the
72external behavior.
73
74The behavior is known to have effects on these areas:
75
76=over 4
77
a994735a 78=item *
00f254e2 79
a994735a 80Changing the case of a scalar, that is, using C<uc()>, C<ucfirst()>, C<lc()>,
e1e0053f 81and C<lcfirst()>, or C<\L>, C<\U>, C<\u> and C<\l> in regular expression
82substitutions.
00f254e2 83
a994735a 84=item *
00f254e2 85
86Using caseless (C</i>) regular expression matching
87
a994735a 88=item *
00f254e2 89
90Matching a number of properties in regular expressions, such as C<\w>
91
a994735a 92=item *
00f254e2 93
94User-defined case change mappings. You can create a C<ToUpper()> function, for
95example, which overrides Perl's built-in case mappings. The scalar must be
96encoded in utf8 for your function to actually be invoked.
97
98=back
99
100B<This lack of semantics for these characters is currently the default,>
101outside of C<use locale>. See below for EBCDIC.
102To turn on B<case changing semantics only> for these characters, use
103C<S<no legacy>>.
104The other legacy behaviors regarding these characters are currently
105unaffected by this pragma.
106
107=head4 EBCDIC platforms
108
109On EBCDIC platforms, the situation is somewhat different. The legacy
110semantics are whatever the underlying semantics of the native C language
111library are. Each of the three EBCDIC encodings currently known by Perl is an
112isomorph of the Latin-1 character set. That means every character in Latin-1
113has a corresponding EBCDIC equivalent, and vice-versa. Specifying C<S<no
114legacy>> currently makes sure that all EBCDIC characters have the same
115B<casing only> semantics as their corresponding Latin-1 characters.
66060160 116
117=head2 B<no legacy>
118
119Turn on a new behavior in a version of Perl that understands
120it but has it turned off by default. For example, C<no legacy 'foo'> turns on
00f254e2 121behavior C<foo> in the lexical scope of the pragma. C<no legacy>
122without any modifier turns on all new behaviors known to the pragma.
66060160 123
124=head1 LEGACY BUNDLES
125
00f254e2 126It's possible to turn off all new behaviors past a given release by
66060160 127using a I<legacy bundle>, which is the name of the release prefixed with
128a colon, to distinguish it from an individual legacy behavior.
129
130Specifying sub-versions such as the C<0> in C<5.10.0> in legacy bundles has
131no effect: legacy bundles are guaranteed to be the same for all sub-versions.
132
e1e0053f 133Legacy bundles are not allowed with C<no legacy>.
66060160 134
135=cut
136
137sub import {
138 my $class = shift;
139 if (@_ == 0) {
140 croak("No legacy behaviors specified");
141 }
142 while (@_) {
143 my $name = shift(@_);
144 if (substr($name, 0, 1) eq ":") {
145 my $v = substr($name, 1);
146 if (!exists $legacy_bundle{$v}) {
147 $v =~ s/^([0-9]+)\.([0-9]+).[0-9]+$/$1.$2/;
148 if (!exists $legacy_bundle{$v}) {
149 unknown_legacy_bundle(substr($name, 1));
150 }
151 }
152 unshift @_, @{$legacy_bundle{$v}};
153 next;
154 }
61fc5122 155 $^H |= $unicode8bit::hint_not_uni8bit; # The only valid thing as of yet
66060160 156 }
157}
158
159
160sub unimport {
161 my $class = shift;
162
163 # A bare C<no legacy> should disable *all* legacy behaviors
164 if (!@_) {
165 unshift @_, keys(%legacy);
166 }
167
168 while (@_) {
169 my $name = shift;
170 if (substr($name, 0, 1) eq ":") {
171 croak(sprintf('Legacy bundles (%s) are not allowed in "no legacy"',
172 $name));
173 }
174 if (!exists($legacy{$name})) {
175 unknown_legacy($name);
176 }
177 else {
61fc5122 178 $^H &= ~ $unicode8bit::hint_not_uni8bit; # The only valid thing now
66060160 179 }
180 }
181}
182
183sub unknown_legacy {
184 my $legacy = shift;
185 croak(sprintf('Legacy "%s" is not supported by Perl %vd', $legacy, $^V));
186}
187
188sub unknown_legacy_bundle {
189 my $legacy = shift;
190 croak(sprintf('Legacy bundle "%s" is not supported by Perl %vd',
191 $legacy, $^V));
192}
193
194sub croak {
195 require Carp;
196 Carp::croak(@_);
197}
198
1991;