Make a dummy OPpPAD_STATE and a dummy PL_unitcheck_save available to

[p5sagit/p5-mst-13.2.git] / pod / perlsub.pod
diff --git a/pod/perlsub.pod b/pod/perlsub.pod

index 376ba12..5ecd346 100644 (file)
--- a/pod/perlsub.pod
+++ b/pod/perlsub.pod
@@ -1,10 +1,12 @@
 =head1 NAME
+X<subroutine> X<function>
 
 perlsub - Perl subroutines
 
 =head1 SYNOPSIS
 
 To declare subroutines:
+X<subroutine, declaration> X<sub>
 
     sub NAME;                    # A "forward" declaration.
     sub NAME(PROTO);             #  ditto, but with prototypes
@@ -17,6 +19,7 @@ To declare subroutines:
     sub NAME(PROTO) : ATTRS BLOCK #  with prototypes and attributes
 
 To define an anonymous subroutine at runtime:
+X<subroutine, anonymous>
 
     $subref = sub BLOCK;                # no proto
     $subref = sub (PROTO) BLOCK;        # with proto
@@ -24,10 +27,12 @@ To define an anonymous subroutine at runtime:
     $subref = sub (PROTO) : ATTRS BLOCK; # with proto and attributes
 
 To import subroutines:
+X<import>
 
     use MODULE qw(NAME1 NAME2 NAME3);
 
 To call subroutines:
+X<subroutine, call> X<call>
 
     NAME(LIST);           # & is optional with parentheses.
     NAME LIST;    # Parentheses optional if predeclared/imported.
@@ -52,6 +57,7 @@ pass-by-reference instead to avoid this.  Both call and return lists may
 contain as many or as few scalar elements as you'd like.  (Often a
 function without an explicit return statement is called a subroutine, but
 there's really no difference from Perl's perspective.)
+X<subroutine, parameter> X<parameter>
 
 Any arguments passed in show up in the array C<@_>.  Therefore, if
 you called a function with two arguments, those would be stored in
@@ -65,16 +71,22 @@ or a reference to it is taken.  (Some earlier versions of Perl
 created the element whether or not the element was assigned to.)
 Assigning to the whole array C<@_> removes that aliasing, and does
 not update any arguments.
-
-The return value of a subroutine is the value of the last expression
-evaluated.  More explicitly, a C<return> statement may be used to exit the
-subroutine, optionally specifying the returned value, which will be
-evaluated in the appropriate context (list, scalar, or void) depending
-on the context of the subroutine call.  If you specify no return value,
-the subroutine returns an empty list in list context, the undefined
-value in scalar context, or nothing in void context.  If you return
-one or more aggregates (arrays and hashes), these will be flattened
-together into one large indistinguishable list.
+X<subroutine, argument> X<argument> X<@_>
+
+A C<return> statement may be used to exit a subroutine, optionally
+specifying the returned value, which will be evaluated in the
+appropriate context (list, scalar, or void) depending on the context of
+the subroutine call.  If you specify no return value, the subroutine
+returns an empty list in list context, the undefined value in scalar
+context, or nothing in void context.  If you return one or more
+aggregates (arrays and hashes), these will be flattened together into
+one large indistinguishable list.
+
+If no C<return> is found and if the last statement is an expression, its
+value is returned. If the last statement is a loop control structure
+like a C<foreach> or a C<while>, the returned value is unspecified. The
+empty sub returns the empty list.
+X<subroutine, return value> X<return value> X<return>
 
 Perl does not have named formal parameters.  In practice all you
 do is assign to a C<my()> list of these.  Variables that aren't
@@ -83,6 +95,7 @@ on creating private variables, see L<"Private Variables via my()">
 and L<"Temporary Values via local()">.  To create protected
 environments for a set of functions in a separate package (and
 probably a separate file), see L<perlmod/"Packages">.
+X<formal parameter> X<parameter, formal>
 
 Example:
 
@@ -129,6 +142,7 @@ Because the assignment copies the values, this also has the effect
 of turning call-by-reference into call-by-value.  Otherwise a
 function is free to do in-place modifications of C<@_> and change
 its caller's values.
+X<call-by-reference> X<call-by-value>
 
     upcase_in($v1, $v2);  # this changes $v1 and $v2
     sub upcase_in {
@@ -138,6 +152,7 @@ its caller's values.
 You aren't allowed to modify constants in this way, of course.  If an
 argument were actually literal and you tried to change it, you'd take a
 (presumably fatal) exception.   For example, this won't work:
+X<call-by-reference> X<call-by-value>
 
     upcase_in("frederick");
 
@@ -181,12 +196,14 @@ want to do an indirect subroutine call with a subroutine name or
 reference using the C<&$subref()> or C<&{$subref}()> constructs,
 although the C<< $subref->() >> notation solves that problem.
 See L<perlref> for more about all that.
+X<&>
 
 Subroutines may be called recursively.  If a subroutine is called
 using the C<&> form, the argument list is optional, and if omitted,
 no C<@_> array is set up for the subroutine: the C<@_> array at the
 time of the call is visible to subroutine instead.  This is an
 efficiency mechanism that new users may wish to avoid.
+X<recursion>
 
     &foo(1,2,3);       # pass three arguments
     foo(1,2,3);                # the same
@@ -201,16 +218,23 @@ Not only does the C<&> form make the argument list optional, it also
 disables any prototype checking on arguments you do provide.  This
 is partly for historical reasons, and partly for having a convenient way
 to cheat if you know what you're doing.  See L<Prototypes> below.
+X<&>
+
+Subroutines whose names are in all upper case are reserved to the Perl
+core, as are modules whose names are in all lower case.  A subroutine in
+all capitals is a loosely-held convention meaning it will be called
+indirectly by the run-time system itself, usually due to a triggered event.
+Subroutines that do special, pre-defined things include C<AUTOLOAD>, C<CLONE>,
+C<DESTROY> plus all functions mentioned in L<perltie> and L<PerlIO::via>.
 
-Functions whose names are in all upper case are reserved to the Perl
-core, as are modules whose names are in all lower case.  A
-function in all capitals is a loosely-held convention meaning it
-will be called indirectly by the run-time system itself, usually
-due to a triggered event.  Functions that do special, pre-defined
-things include C<BEGIN>, C<CHECK>, C<INIT>, C<END>, C<AUTOLOAD>,
-C<CLONE> and C<DESTROY>--plus all functions mentioned in L<perltie>.
+The C<BEGIN>, C<UNITCHECK>, C<CHECK>, C<INIT> and C<END> subroutines
+are not so much subroutines as named special code blocks, of which you
+can have more than one in a package, and which you can B<not> call
+explicitly.  See L<perlmod/"BEGIN, UNITCHECK, CHECK, INIT and END">
 
 =head2 Private Variables via my()
+X<my> X<variable, lexical> X<lexical> X<lexical variable> X<scope, lexical>
+X<lexical scope> X<attributes, my>
 
 Synopsis:
 
@@ -238,6 +262,7 @@ variables declared with C<my> are totally hidden from the outside
 world, including any called subroutines.  This is true if it's the
 same subroutine called from itself or elsewhere--every call gets
 its own copy.
+X<local>
 
 This doesn't mean that a C<my> variable declared in a statically
 enclosing lexical scope would be invisible.  Only dynamic scopes
@@ -251,6 +276,7 @@ occurred at the same scope, presumably file scope.
 An C<eval()>, however, can see lexical variables of the scope it is
 being evaluated in, so long as the names aren't hidden by declarations within
 the C<eval()> itself.  See L<perlref>.
+X<eval, scope of>
 
 The parameter list to my() may be assigned to if desired, which allows you
 to initialize your variables.  (If no initializer is given for a
@@ -333,6 +359,7 @@ in the manner of C<local>.  However, if the index variable is
 prefixed with the keyword C<my>, or if there is already a lexical
 by that name in scope, then a new lexical is created instead.  Thus
 in the loop
+X<foreach> X<for>
 
     for my $i (1, 2, 3) {
         some_function();
@@ -340,6 +367,7 @@ in the loop
 
 the scope of $i extends to the end of the loop, but not beyond it,
 rendering the value of $i inaccessible within C<some_function()>.
+X<foreach> X<for>
 
 Some users may wish to encourage the use of lexically scoped variables.
 As an aid to catching implicit uses to package variables,
@@ -366,7 +394,6 @@ never fully qualified with the package name.  In particular, you're not
 allowed to try to make a package variable (or other global) lexical:
 
     my $pack::var;     # ERROR!  Illegal syntax
-    my $_;             # also illegal (currently)
 
 In fact, a dynamic variable (also known as package or global variables)
 are still accessible using the fully qualified C<::> notation even while a
@@ -404,6 +431,50 @@ L<perlref/"Function Templates"> for something of a work-around to
 this.
 
 =head2 Persistent Private Variables
+X<state> X<state variable> X<static> X<variable, persistent> X<variable, static> X<closure>
+
+There are two ways to build persistent private variables in Perl 5.10.
+First, you can simply use the C<state> feature. Or, you can use closures,
+if you want to stay compatible with releases older than 5.10.
+
+=head3 Persistent variables via state()
+
+Beginning with perl 5.9.4, you can declare variables with the C<state>
+keyword in place of C<my>. For that to work, though, you must have
+enabled that feature beforehand, either by using the C<feature> pragma, or
+by using C<-E> on one-liners. (see L<feature>)
+
+For example, the following code maintains a private counter, incremented
+each time the gimme_another() function is called:
+
+    use feature 'state';
+    sub gimme_another { state $x; return ++$x }
+
+Also, since C<$x> is lexical, it can't be reached or modified by any Perl
+code outside.
+
+You can initialize state variables, and the assigment will be executed
+only once:
+
+    sub starts_from_42 { state $x = 42; return ++$x }
+
+You can also, as a syntactic shortcut, initialize more than one if they're
+all declared within the same state() clause:
+
+    state ($a, $b, $c) = ( 'one', 'two', 'three' );
+
+However, be warned that state variables declared as part of a list will
+get assigned each time the statement will be executed, since it will be
+considered as a regular list assigment, not one to be executed only once:
+
+    (state $x, my $y) = (1, 2); # $x gets reinitialized every time !
+
+B<Caveat>: the code at the right side of the assignment to a state
+variable will be executed every time; only the assignment is disabled. So,
+avoid code that has side-effects, or that is slow to execute. This might
+be optimized out in a future version of Perl.
+
+=head3 Persistent variables with closures
 
 Just because a lexical variable is lexically (also called statically)
 scoped to its enclosing block, C<eval>, or C<do> FILE, this doesn't mean that
@@ -440,18 +511,19 @@ via C<require> or C<use>, then this is probably just fine.  If it's
 all in the main program, you'll need to arrange for the C<my>
 to be executed early, either by putting the whole block above
 your main program, or more likely, placing merely a C<BEGIN>
-sub around it to make sure it gets executed before your program
+code block around it to make sure it gets executed before your program
 starts to run:
 
-    sub BEGIN {
+    BEGIN {
        my $secret_val = 0;
        sub gimme_another {
            return ++$secret_val;
        }
     }
 
-See L<perlmod/"Package Constructors and Destructors"> about the
-special triggered functions, C<BEGIN>, C<CHECK>, C<INIT> and C<END>.
+See L<perlmod/"BEGIN, UNITCHECK, CHECK, INIT and END"> about the
+special triggered code blocks, C<BEGIN>, C<UNITCHECK>, C<CHECK>,
+C<INIT> and C<END>.
 
 If declared at the outermost scope (the file scope), then lexicals
 work somewhat like C's file statics.  They are available to all
@@ -460,6 +532,8 @@ from outside that file.  This strategy is sometimes used in modules
 to create private variables that the whole module can see.
 
 =head2 Temporary Values via local()
+X<local> X<scope, dynamic> X<dynamic scope> X<variable, local>
+X<variable, temporary>
 
 B<WARNING>: In general, you should be using C<my> instead of C<local>, because
 it's faster and safer.  Exceptions to this include the global punctuation
@@ -515,6 +589,7 @@ through a loop.  Consequently, it's more efficient to localize your
 variables outside the loop.
 
 =head3 Grammatical note on local()
+X<local, context>
 
 A C<local> is simply a modifier on an lvalue expression.  When you assign to
 a C<local>ized variable, the C<local> doesn't change whether its list is viewed
@@ -530,6 +605,7 @@ both supply a list context to the right-hand side, while
 supplies a scalar context.
 
 =head3 Localization of special variables
+X<local, special variable>
 
 If you localize a special variable, you'll be giving a new value to it,
 but its magic won't go away.  That means that all side-effects related
@@ -565,8 +641,10 @@ code that relies on any particular behaviour of localising tied arrays
 or hashes (localising individual elements is still okay).
 See L<perl58delta/"Localising Tied Arrays and Hashes Is Broken"> for more
 details.
+X<local, tie>
 
 =head3 Localization of globs
+X<local, glob> X<glob>
 
 The construct
 
@@ -584,8 +662,11 @@ separator.
 Notably, if you want to work with a brand new value of the default scalar
 $_, and avoid the potential problem listed above about $_ previously
 carrying a magic value, you should use C<local *_> instead of C<local $_>.
+As of perl 5.9.1, you can also use the lexical form of C<$_> (declaring it
+with C<my $_>), which avoids completely this problem.
 
 =head3 Localization of elements of composite types
+X<local, composite type element> X<local, array element> X<local, hash element>
 
 It's also worth taking a moment to explain what happens when you
 C<local>ize a member of a composite type (i.e. an array or hash element).
@@ -628,6 +709,7 @@ The behavior of local() on non-existent members of composite
 types is subject to change in future.
 
 =head2 Lvalue subroutines
+X<lvalue> X<subroutine, lvalue>
 
 B<WARNING>: Lvalue subroutines are still experimental and the
 implementation may change in future versions of Perl.
@@ -697,6 +779,7 @@ subroutine never gets that chance.  Consider;
 =back
 
 =head2 Passing Symbol Table Entries (typeglobs)
+X<typeglob> X<*>
 
 B<WARNING>: The mechanism described in this section was originally
 the only way to simulate pass-by-reference in older versions of
@@ -739,6 +822,7 @@ the individual arrays.  For more on typeglobs, see
 L<perldata/"Typeglobs and Filehandles">.
 
 =head2 When to Still Use local()
+X<local> X<variable, local>
 
 Despite the existence of C<my>, there are still three places where the
 C<local> operator still shines.  In fact, in these three places, you
@@ -816,6 +900,7 @@ this operation could on occasion misbehave.
 =back
 
 =head2 Pass by Reference
+X<pass by reference> X<pass-by-reference> X<reference>
 
 If you want to pass more than one array or hash into a function--or
 return them from it--and have them maintain their integrity, then
@@ -929,6 +1014,7 @@ Notice to pass back just the bare *FH, not its reference.
     }
 
 =head2 Prototypes
+X<prototype> X<subroutine, prototype>
 
 Perl supports a very limited kind of compile-time argument checking
 using function prototyping.  If you declare
@@ -969,7 +1055,7 @@ corresponding built-in.
     sub myopen (*;$)        myopen HANDLE, $name
     sub mypipe (**)         mypipe READHANDLE, WRITEHANDLE
     sub mygrep (&@)         mygrep { /foo/ } $a, $b, $c
-    sub myrand ($)          myrand 42
+    sub myrand (;$)         myrand 42
     sub mytime ()           mytime
 
 Any backslashed prototype character represents an actual argument
@@ -1013,9 +1099,13 @@ follows:
        ...
     }
 
-A semicolon separates mandatory arguments from optional arguments.
+A semicolon (C<;>) separates mandatory arguments from optional arguments.
 It is redundant before C<@> or C<%>, which gobble up everything else.
 
+As the last character of a prototype, or just before a semicolon, you can
+use C<_> in place of C<$>: if this argument is not provided, C<$_> will be
+used instead.
+
 Note how the last three examples in the table above are treated
 specially by the parser.  C<mygrep()> is parsed as a true list
 operator, C<myrand()> is parsed as a true unary operator with unary
@@ -1029,6 +1119,7 @@ without a prototype.
 
 The interesting thing about C<&> is that you can generate new syntax with it,
 provided it's in the initial position:
+X<&>
 
     sub try (&@) {
        my($try,$catch) = @_;
@@ -1053,6 +1144,7 @@ scoped, those anonymous subroutines can act like closures... (Gee,
 is this sounding a little Lispish?  (Never mind.))))
 
 And here's a reimplementation of the Perl C<grep> operator:
+X<grep>
 
     sub mygrep (&@) {
        my $code = shift;
@@ -1106,6 +1198,7 @@ This is all very powerful, of course, and should be used only in moderation
 to make the world a better place.
 
 =head2 Constant Functions
+X<constant>
 
 Functions with a prototype of C<()> are potential candidates for
 inlining.  If the result after optimization and constant folding
@@ -1127,7 +1220,17 @@ The following functions would all be inlined:
     sub FLAG_MASK ()   { FLAG_FOO | FLAG_BAR }
 
     sub OPT_BAZ ()     { not (0x1B58 & FLAG_MASK) }
-    sub BAZ_VAL () {
+
+    sub N () { int(OPT_BAZ) / 3 }
+
+    sub FOO_SET () { 1 if FLAG_MASK & FLAG_FOO }
+
+Be aware that these will not be inlined; as they contain inner scopes,
+the constant folding doesn't reduce them to a single constant:
+
+    sub foo_set () { if (FLAG_MASK & FLAG_FOO) { 1 } }
+
+    sub baz_val () {
        if (OPT_BAZ) {
            return 23;
        }
@@ -1136,13 +1239,6 @@ The following functions would all be inlined:
        }
     }
 
-    sub N () { int(BAZ_VAL) / 3 }
-    BEGIN {
-       my $prod = 1;
-       for (1..N) { $prod *= $_ }
-       sub N_FACTORIAL () { $prod }
-    }
-
 If you redefine a subroutine that was eligible for inlining, you'll get
 a mandatory warning.  (You can use this warning to tell whether or not a
 particular subroutine is considered constant.)  The warning is
@@ -1158,6 +1254,7 @@ inlining mechanism in some other way, such as
     }
 
 =head2 Overriding Built-in Functions
+X<built-in> X<override> X<CORE> X<CORE::GLOBAL>
 
 Many built-in functions may be overridden, though this should be tried
 only occasionally and for good reason.  Typically this might be
@@ -1275,11 +1372,13 @@ And, as you'll have noticed from the previous example, if you override
 C<glob>, the C<< <*> >> glob operator is overridden as well.
 
 In a similar fashion, overriding the C<readline> function also overrides
-the equivalent I/O operator C<< <FILEHANDLE> >>.
+the equivalent I/O operator C<< <FILEHANDLE> >>. Also, overriding
+C<readpipe> also overrides the operators C<``> and C<qx//>.
 
 Finally, some built-ins (e.g. C<exists> or C<grep>) can't be overridden.
 
 =head2 Autoloading
+X<autoloading> X<AUTOLOAD>
 
 If you call a subroutine that is undefined, you would ordinarily
 get an immediate, fatal error complaining that the subroutine doesn't
@@ -1331,6 +1430,7 @@ SelfLoader modules in L<SelfLoader>, and the document on adding C
 functions to Perl code in L<perlxs>.
 
 =head2 Subroutine Attributes
+X<attribute> X<subroutine, attribute> X<attrs>
 
 A subroutine declaration or definition may have a list of attributes
 associated with it.  If such an attribute list is present, it is
@@ -1348,17 +1448,17 @@ nest properly.
 
 Examples of valid syntax (even though the attributes are unknown):
 
-    sub fnord (&\%) : switch(10,foo(7,3))  :  expensive ;
-    sub plugh () : Ugly('\(") :Bad ;
+    sub fnord (&\%) : switch(10,foo(7,3))  :  expensive;
+    sub plugh () : Ugly('\(") :Bad;
     sub xyzzy : _5x5 { ... }
 
 Examples of invalid syntax:
 
-    sub fnord : switch(10,foo() ; # ()-string not balanced
-    sub snoid : Ugly('(') ;      # ()-string not balanced
-    sub xyzzy : 5x5 ;            # "5x5" not a valid identifier
-    sub plugh : Y2::north ;      # "Y2::north" not a simple identifier
-    sub snurt : foo + bar ;      # "+" not a colon or space
+    sub fnord : switch(10,foo(); # ()-string not balanced
+    sub snoid : Ugly('(');       # ()-string not balanced
+    sub xyzzy : 5x5;             # "5x5" not a valid identifier
+    sub plugh : Y2::north;       # "Y2::north" not a simple identifier
+    sub snurt : foo + bar;       # "+" not a colon or space
 
 The attribute list is passed as a list of constant strings to the code
 which associates them with the subroutine.  In particular, the second example