lib/legacy.pm

   1 package legacy;
   2
   3 our $VERSION = '1.00';
   4
   5 $unicode8bit::hint_uni8bit = 0x00000800;
   6
   7 my %legacy_bundle = (
   8     "5.10" => [qw(unicode8bit)],
   9     "5.11" => [qw(unicode8bit)],
  10 );
  11
  12 my %legacy = ( 'unicode8bit' => '0' );
  13
  14 =head1 NAME
  15
  16 legacy - Perl pragma to preserve legacy behaviors or enable new non-default
  17 behaviors
  18
  19 =head1 SYNOPSIS
  20
  21  use legacy ':5.10'; # Keeps semantics the same as in perl 5.10
  22
  23  use legacy qw(unicode8bit);
  24
  25  no legacy;
  26
  27  no legacy qw(unicode8bit);
  28
  29 =head1 DESCRIPTION
  30
  31 Some programs may rely on behaviors that for others are problematic or
  32 even wrong.  A new version of Perl may change behaviors from past ones,
  33 and when it is viewed that the old way of doing things may be required
  34 to still be supported, the new behavior will be able to be turned off by using
  35 this pragma.
  36
  37 Additionally, a new behavior may be supported in a new version of Perl, but
  38 for whatever reason the default remains the old one.  This pragma can enable
  39 the new behavior.
  40
  41 Like other pragmas (C<use feature>, for example), C<use legacy qw(foo)> will
  42 only make the legacy behavior for "foo" available from that point to the end of
  43 the enclosing block.
  44
  45 =head2 B<use legacy>
  46
  47 Preserve the old way of doing things when a new version of Perl is
  48 released that would otherwise change the behavior.
  49
  50 The one current possibility is:
  51
  52 =head3 unicode8bit
  53
  54 THIS IS SUBJECT TO CHANGE
  55
  56 Use legacy semantics for the 128 characters on ASCII systems that have the 8th
  57 bit set.  (See L</EBCDIC platforms> below for EBCDIC systems.)  Unless
  58 C<S<use locale>> is specified, or the scalar containing such a character is
  59 known by Perl to be encoded in UTF8, the semantics are essentially that the
  60 characters have an ordinal number, and that's it.  They are caseless, and
  61 aren't anything: they're not controls, not letters, not punctuation, ..., not
  62 anything.
  63
  64 This behavior stems from when Perl did not support Unicode, and ASCII was the
  65 only known character set outside of C<S<use locale>>.  In order to not
  66 possibly break pre_Unicode programs, these characters have retained their old
  67 non-meanings, except when it is clear to Perl that Unicode is what is meant,
  68 for example by calling utf::upgrade() on a scalar, or if the scalar also
  69 contains characters that are only available in Unicode.  Then these 128
  70 characters take on their Unicode meanings.
  71
  72 The problem with this behavior is that a scalar that encodes these characters
  73 has a different meaning depending on if it is stored as utf8 or not.
  74 In general, the internal storage method should not affect the
  75 external behavior.
  76
  77 The behavior is known to have effects on these areas:
  78
  79 =over 4
  80
  81 =item
  82
  83 Changing the case of a scalar, that is, using C<uc()>,
  84 C<ucfirst()>,
  85 C<lc()>,
  86 and C<lcfirst()>, or C<\L>, C<\U>, C<\u> and C<\l> in regular expression substitutions.
  87
  88 =item
  89
  90 Using caseless (C</i>) regular expression matching
  91
  92 =item
  93
  94 Matching a number of properties in regular expressions, such as C<\w>
  95
  96 =item
  97
  98 User-defined case change mappings.  You can create a C<ToUpper()> function, for
  99 example, which overrides Perl's built-in case mappings.  The scalar must be
 100 encoded in utf8 for your function to actually be invoked.
 101
 102 =back
 103
 104 B<This lack of semantics for these characters is currently the default,>
 105 outside of C<use locale>.  See below for EBCDIC.
 106 To turn on B<case changing semantics only> for these characters, use
 107 C<S<no legacy>>.
 108 The other legacy behaviors regarding these characters are currently
 109 unaffected by this pragma.
 110
 111 =head4 EBCDIC platforms
 112
 113 On EBCDIC platforms, the situation is somewhat different.  The legacy
 114 semantics are whatever the underlying semantics of the native C language
 115 library are.  Each of the three EBCDIC encodings currently known by Perl is an
 116 isomorph of the Latin-1 character set.  That means every character in Latin-1
 117 has a corresponding EBCDIC equivalent, and vice-versa.  Specifying C<S<no
 118 legacy>> currently makes sure that all EBCDIC characters have the same
 119 B<casing only> semantics as their corresponding Latin-1 characters.
 120
 121 =head2 B<no legacy>
 122
 123 Turn on a new behavior in a version of Perl that understands
 124 it but has it turned off by default.  For example, C<no legacy 'foo'> turns on
 125 behavior C<foo> in the lexical scope of the pragma.  C<no legacy>
 126 without any modifier turns on all new behaviors known to the pragma.
 127
 128 =head1 LEGACY BUNDLES
 129
 130 It's possible to turn off all new behaviors past a given release by
 131 using a I<legacy bundle>, which is the name of the release prefixed with
 132 a colon, to distinguish it from an individual legacy behavior.
 133
 134 Specifying sub-versions such as the C<0> in C<5.10.0> in legacy bundles has
 135 no effect: legacy bundles are guaranteed to be the same for all sub-versions.
 136
 137 Legacy bundles are not allowed with C<no legacy>
 138
 139 =cut
 140
 141 sub import {
 142     my $class = shift;
 143     if (@_ == 0) {
 144         croak("No legacy behaviors specified");
 145     }
 146     while (@_) {
 147         my $name = shift(@_);
 148         if (substr($name, 0, 1) eq ":") {
 149             my $v = substr($name, 1);
 150             if (!exists $legacy_bundle{$v}) {
 151                 $v =~ s/^([0-9]+)\.([0-9]+).[0-9]+$/$1.$2/;
 152                 if (!exists $legacy_bundle{$v}) {
 153                     unknown_legacy_bundle(substr($name, 1));
 154                 }
 155             }
 156             unshift @_, @{$legacy_bundle{$v}};
 157             next;
 158         }
 159         if (!exists $legacy{$name}) {
 160             unknown_legacy($name);
 161         }
 162         $^H &= ~$unicode8bit::hint_uni8bit;    # The only valid thing as of yet
 163     }
 164 }
 165
 166
 167 sub unimport {
 168     my $class = shift;
 169
 170     # A bare C<no legacy> should disable *all* legacy behaviors
 171     if (!@_) {
 172         unshift @_, keys(%legacy);
 173     }
 174
 175     while (@_) {
 176         my $name = shift;
 177         if (substr($name, 0, 1) eq ":") {
 178             croak(sprintf('Legacy bundles (%s) are not allowed in "no legacy"',
 179                 $name));
 180         }
 181         if (!exists($legacy{$name})) {
 182             unknown_legacy($name);
 183         }
 184         else {
 185             $^H |= $unicode8bit::hint_uni8bit; # The only valid thing as of yet
 186         }
 187     }
 188 }
 189
 190 sub unknown_legacy {
 191     my $legacy = shift;
 192     croak(sprintf('Legacy "%s" is not supported by Perl %vd', $legacy, $^V));
 193 }
 194
 195 sub unknown_legacy_bundle {
 196     my $legacy = shift;
 197     croak(sprintf('Legacy bundle "%s" is not supported by Perl %vd',
 198         $legacy, $^V));
 199 }
 200
 201 sub croak {
 202     require Carp;
 203     Carp::croak(@_);
 204 }
 205
 206 1;