lib/feature.pm

   1 package feature;
   2
   3 our $VERSION = '1.14';
   4
   5 # (feature name) => (internal name, used in %^H)
   6 my %feature = (
   7     switch          => 'feature_switch',
   8     say             => "feature_say",
   9     state           => "feature_state",
  10     unicode_strings => "feature_unicode",
  11 );
  12
  13 # This gets set (for now) in $^H as well as in %^H,
  14 # for runtime speed of the uc/lc/ucfirst/lcfirst functions.
  15 our $hint_uni8bit = 0x00000800;
  16
  17 # NB. the latest bundle must be loaded by the -E switch (see toke.c)
  18
  19 my %feature_bundle = (
  20     "5.10" => [qw(switch say state)],
  21     "5.11" => [qw(switch say state unicode_strings)],
  22 );
  23
  24 # special case
  25 $feature_bundle{"5.9.5"} = $feature_bundle{"5.10"};
  26
  27 # TODO:
  28 # - think about versioned features (use feature switch => 2)
  29
  30 =head1 NAME
  31
  32 feature - Perl pragma to enable new syntactic features
  33
  34 =head1 SYNOPSIS
  35
  36     use feature qw(switch say);
  37     given ($foo) {
  38         when (1)          { say "\$foo == 1" }
  39         when ([2,3])      { say "\$foo == 2 || \$foo == 3" }
  40         when (/^a[bc]d$/) { say "\$foo eq 'abd' || \$foo eq 'acd'" }
  41         when ($_ > 100)   { say "\$foo > 100" }
  42         default           { say "None of the above" }
  43     }
  44
  45     use feature ':5.10'; # loads all features available in perl 5.10
  46
  47 =head1 DESCRIPTION
  48
  49 It is usually impossible to add new syntax to Perl without breaking
  50 some existing programs. This pragma provides a way to minimize that
  51 risk. New syntactic constructs, or new semantic meanings to older
  52 constructs, can be enabled by C<use feature 'foo'>, and will be parsed
  53 only when the appropriate feature pragma is in scope.
  54
  55 =head2 Lexical effect
  56
  57 Like other pragmas (C<use strict>, for example), features have a lexical
  58 effect. C<use feature qw(foo)> will only make the feature "foo" available
  59 from that point to the end of the enclosing block.
  60
  61     {
  62         use feature 'say';
  63         say "say is available here";
  64     }
  65     print "But not here.\n";
  66
  67 =head2 C<no feature>
  68
  69 Features can also be turned off by using C<no feature "foo">. This too
  70 has lexical effect.
  71
  72     use feature 'say';
  73     say "say is available here";
  74     {
  75         no feature 'say';
  76         print "But not here.\n";
  77     }
  78     say "Yet it is here.";
  79
  80 C<no feature> with no features specified will turn off all features.
  81
  82 =head2 The 'switch' feature
  83
  84 C<use feature 'switch'> tells the compiler to enable the Perl 6
  85 given/when construct.
  86
  87 See L<perlsyn/"Switch statements"> for details.
  88
  89 =head2 The 'say' feature
  90
  91 C<use feature 'say'> tells the compiler to enable the Perl 6
  92 C<say> function.
  93
  94 See L<perlfunc/say> for details.
  95
  96 =head2 the 'state' feature
  97
  98 C<use feature 'state'> tells the compiler to enable C<state>
  99 variables.
 100
 101 See L<perlsub/"Persistent Private Variables"> for details.
 102
 103 =head2 the 'unicode_strings' feature
 104
 105 C<use feature 'unicode_strings'> tells the compiler to treat
 106 strings with codepoints larger than 128 as Unicode. It is available
 107 starting with Perl 5.11.3.
 108
 109 In greater detail:
 110
 111 This feature modifies the semantics for the 128 characters on ASCII
 112 systems that have the 8th bit set.  (See L</EBCDIC platforms> below for
 113 EBCDIC systems.) By default, unless C<S<use locale>> is specified, or the
 114 scalar containing such a character is known by Perl to be encoded in UTF8,
 115 the semantics are essentially that the characters have an ordinal number,
 116 and that's it.  They are caseless, and aren't anything: they're not
 117 controls, not letters, not punctuation, ..., not anything.
 118
 119 This behavior stems from when Perl did not support Unicode, and ASCII was the
 120 only known character set outside of C<S<use locale>>.  In order to not
 121 possibly break pre-Unicode programs, these characters have retained their old
 122 non-meanings, except when it is clear to Perl that Unicode is what is meant,
 123 for example by calling utf8::upgrade() on a scalar, or if the scalar also
 124 contains characters that are only available in Unicode.  Then these 128
 125 characters take on their Unicode meanings.
 126
 127 The problem with this behavior is that a scalar that encodes these characters
 128 has a different meaning depending on if it is stored as utf8 or not.
 129 In general, the internal storage method should not affect the
 130 external behavior.
 131
 132 The behavior is known to have effects on these areas:
 133
 134 =over 4
 135
 136 =item *
 137
 138 Changing the case of a scalar, that is, using C<uc()>, C<ucfirst()>, C<lc()>,
 139 and C<lcfirst()>, or C<\L>, C<\U>, C<\u> and C<\l> in regular expression
 140 substitutions.
 141
 142 =item *
 143
 144 Using caseless (C</i>) regular expression matching
 145
 146 =item *
 147
 148 Matching a number of properties in regular expressions, such as C<\w>
 149
 150 =item *
 151
 152 User-defined case change mappings.  You can create a C<ToUpper()> function, for
 153 example, which overrides Perl's built-in case mappings.  The scalar must be
 154 encoded in utf8 for your function to actually be invoked.
 155
 156 =back
 157
 158 B<This lack of semantics for these characters is currently the default,>
 159 outside of C<use locale>.  See below for EBCDIC.
 160
 161 To turn on B<case changing semantics only> for these characters, use
 162 C<use feature "unicode_strings">.
 163
 164 The other old (legacy) behaviors regarding these characters are currently
 165 unaffected by this pragma.
 166
 167 =head4 EBCDIC platforms
 168
 169 On EBCDIC platforms, the situation is somewhat different.  The legacy
 170 semantics are whatever the underlying semantics of the native C language
 171 library are.  Each of the three EBCDIC encodings currently known by Perl is an
 172 isomorph of the Latin-1 character set.  That means every character in Latin-1
 173 has a corresponding EBCDIC equivalent, and vice-versa.  Specifying C<S<no
 174 legacy>> currently makes sure that all EBCDIC characters have the same
 175 B<casing only> semantics as their corresponding Latin-1 characters.
 176
 177 =head1 FEATURE BUNDLES
 178
 179 It's possible to load a whole slew of features in one go, using
 180 a I<feature bundle>. The name of a feature bundle is prefixed with
 181 a colon, to distinguish it from an actual feature. At present, the
 182 only feature bundle is C<use feature ":5.10"> which is equivalent
 183 to C<use feature qw(switch say state)>.
 184
 185 Specifying sub-versions such as the C<0> in C<5.10.0> in feature bundles has
 186 no effect: feature bundles are guaranteed to be the same for all sub-versions.
 187
 188 =head1 IMPLICIT LOADING
 189
 190 There are two ways to load the C<feature> pragma implicitly :
 191
 192 =over 4
 193
 194 =item *
 195
 196 By using the C<-E> switch on the command-line instead of C<-e>. It enables
 197 all available features in the main compilation unit (that is, the one-liner.)
 198
 199 =item *
 200
 201 By requiring explicitly a minimal Perl version number for your program, with
 202 the C<use VERSION> construct, and when the version is higher than or equal to
 203 5.10.0. That is,
 204
 205     use 5.10.0;
 206
 207 will do an implicit
 208
 209     use feature ':5.10';
 210
 211 and so on. Note how the trailing sub-version is automatically stripped from the
 212 version.
 213
 214 But to avoid portability warnings (see L<perlfunc/use>), you may prefer:
 215
 216     use 5.010;
 217
 218 with the same effect.
 219
 220 =back
 221
 222 =cut
 223
 224 sub import {
 225     my $class = shift;
 226     if (@_ == 0) {
 227         croak("No features specified");
 228     }
 229     while (@_) {
 230         my $name = shift(@_);
 231         if (substr($name, 0, 1) eq ":") {
 232             my $v = substr($name, 1);
 233             if (!exists $feature_bundle{$v}) {
 234                 $v =~ s/^([0-9]+)\.([0-9]+).[0-9]+$/$1.$2/;
 235                 if (!exists $feature_bundle{$v}) {
 236                     unknown_feature_bundle(substr($name, 1));
 237                 }
 238             }
 239             unshift @_, @{$feature_bundle{$v}};
 240             next;
 241         }
 242         if (!exists $feature{$name}) {
 243             unknown_feature($name);
 244         }
 245         $^H{$feature{$name}} = 1;
 246         $^H |= $hint_uni8bit if $name eq 'unicode_strings';
 247     }
 248 }
 249
 250 sub unimport {
 251     my $class = shift;
 252
 253     # A bare C<no feature> should disable *all* features
 254     if (!@_) {
 255         delete @^H{ values(%feature) };
 256         $^H &= ~ $hint_uni8bit;
 257         return;
 258     }
 259
 260     while (@_) {
 261         my $name = shift;
 262         if (substr($name, 0, 1) eq ":") {
 263             my $v = substr($name, 1);
 264             if (!exists $feature_bundle{$v}) {
 265                 $v =~ s/^([0-9]+)\.([0-9]+).[0-9]+$/$1.$2/;
 266                 if (!exists $feature_bundle{$v}) {
 267                     unknown_feature_bundle(substr($name, 1));
 268                 }
 269             }
 270             unshift @_, @{$feature_bundle{$v}};
 271             next;
 272         }
 273         if (!exists($feature{$name})) {
 274             unknown_feature($name);
 275         }
 276         else {
 277             delete $^H{$feature{$name}};
 278             $^H &= ~ $hint_uni8bit if $name eq 'unicode_strings';
 279         }
 280     }
 281 }
 282
 283 sub unknown_feature {
 284     my $feature = shift;
 285     croak(sprintf('Feature "%s" is not supported by Perl %vd',
 286             $feature, $^V));
 287 }
 288
 289 sub unknown_feature_bundle {
 290     my $feature = shift;
 291     croak(sprintf('Feature bundle "%s" is not supported by Perl %vd',
 292             $feature, $^V));
 293 }
 294
 295 sub croak {
 296     require Carp;
 297     Carp::croak(@_);
 298 }
 299
 300 1;