From: Gurusamy Sarathy Date: Mon, 24 May 1999 07:24:11 +0000 (+0000) Subject: major pod update from Tom Christiansen X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=19799a22062ef658e4ac543ea06fa9193323512a;p=p5sagit%2Fp5-mst-13.2.git major pod update from Tom Christiansen p4raw-id: //depot/perl@3460 --- diff --git a/MANIFEST b/MANIFEST index 10a1bcc..2678567 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1001,6 +1001,7 @@ pod/perltie.pod Tieing an object class into a simple variable pod/perltoc.pod Table of Contents info pod/perltodo.pod Todo list explained pod/perltoot.pod Tom's object-oriented tutorial +pod/perltootc.pod Tom's object-oriented tutorial (more on class data) pod/perltrap.pod Trap info pod/perlvar.pod Variable info pod/perlxs.pod XS api info diff --git a/lib/Pod/Functions.pm b/lib/Pod/Functions.pm index 033c579..5e6551f 100644 --- a/lib/Pod/Functions.pm +++ b/lib/Pod/Functions.pm @@ -90,9 +90,9 @@ __DATA__ abs Math absolute value function accept Socket accept an incoming socket connect alarm Process schedule a SIGALRM -atan2 Math arctangent of Y/X +atan2 Math arctangent of Y/X in the range -PI to PI bind Socket binds an address to a socket -binmode I/O prepare binary files on old systems +binmode I/O prepare binary files for I/O bless Objects create an object caller Flow,Namespace get context of the current subroutine call chdir File change your current working directory @@ -104,7 +104,7 @@ chr String get character this number represents chroot File make directory new root for path lookups close I/O close file (or pipe or socket) handle closedir I/O close directory handle -connect Socket connect to a remove socket +connect Socket connect to a remote socket continue Flow optional trailing block in a while or foreach cos Math cosine function crypt String one-way passwd-style encryption @@ -123,12 +123,12 @@ endprotoent Network be done using protocols file endpwent User be done using passwd file endservent Network be done using services file eof I/O test a filehandle for its end -eval Flow,Misc catch exceptions or compile code +eval Flow,Misc catch exceptions or compile and run code exec Process abandon this program to run another exists HASH test whether a hash key is present exit Flow terminate this program exp Math raise I to a power -fcntl File file control system all +fcntl File file control system call fileno I/O return file descriptor from filehandle flock I/O lock an entire file with an advisory lock fork Process create a new process just like this one @@ -145,7 +145,7 @@ getlogin User return who logged in at this tty getnetbyaddr Network get network record given its address getnetbyname Network get networks record given name getnetent Network get next networks record -getpeername Socket find the other hend of a socket connection +getpeername Socket find the other end of a socket connection getpgrp Process get process group getppid Process get parent process ID getpriority Process get current nice value @@ -180,6 +180,7 @@ link File create a hard link in the filesytem listen Socket register your socket as a server local Misc,Namespace create a temporary value for a global variable (dynamic scoping) localtime Time convert UNIX time into record or string using local time +lock Threads get a thread lock on a variable, subroutine, or method log Math retrieve the natural logarithm for a number lstat File stat a symbolic link m// Regexp match a string with a regular expression pattern @@ -251,7 +252,7 @@ shmget SysV get SysV shared memory segment identifier shmread SysV read SysV shared memory shmwrite SysV write SysV shared memory shutdown Socket close down just half of a socket connection -sin Math return the sin of a number +sin Math return the sine of a number sleep Process block for some number of seconds socket Socket create a socket socketpair Socket create a pair of sockets diff --git a/pod/Makefile b/pod/Makefile index 7db379c..f28b9d4 100644 --- a/pod/Makefile +++ b/pod/Makefile @@ -38,6 +38,7 @@ POD = \ perldsc.pod \ perllol.pod \ perltoot.pod \ + perltootc.pod \ perlobj.pod \ perltie.pod \ perlbot.pod \ @@ -96,6 +97,7 @@ MAN = \ perldsc.man \ perllol.man \ perltoot.man \ + perltootc.man \ perlobj.man \ perltie.man \ perlbot.man \ @@ -154,6 +156,7 @@ HTML = \ perldsc.html \ perllol.html \ perltoot.html \ + perltootc.html \ perlobj.html \ perltie.html \ perlbot.html \ @@ -212,6 +215,7 @@ TEX = \ perldsc.tex \ perllol.tex \ perltoot.tex \ + perltootc.tex \ perlobj.tex \ perltie.tex \ perlbot.tex \ diff --git a/pod/buildtoc b/pod/buildtoc index 62df02b..2574b10 100644 --- a/pod/buildtoc +++ b/pod/buildtoc @@ -10,7 +10,8 @@ sub output ($); perlsyn perlop perlre perlrun perlfunc perlvar perlsub perlmod perlmodlib perlmodinstall perlform perllocale perlref perlreftut perldsc - perllol perltoot perlobj perltie perlbot perlipc perldbmfilter perldebug + perllol perltoot perltootc perlobj perltie perlbot perlipc + perldbmfilter perldebug perldiag perlsec perltrap perlport perlstyle perlpod perlbook perlembed perlapio perlxs perlxstut perlguts perlcall perlhist diff --git a/pod/perl.pod b/pod/perl.pod index 8f688c7..3b4d785 100644 --- a/pod/perl.pod +++ b/pod/perl.pod @@ -4,16 +4,13 @@ perl - Practical Extraction and Report Language =head1 SYNOPSIS -B S<[ B<-sTuU> ]> - S<[ B<-hv> ] [ B<-V>[:I] ]> - S<[ B<-cw> ] [ B<-d>[:I] ] [ B<-D>[I] ]> - S<[ B<-pna> ] [ B<-F>I ] [ B<-l>[I] ] [ B<-0>[I] ]> - S<[ B<-I>I ] [ B<-m>[B<->]I ] [ B<-M>[B<->]I<'module...'> ]> - S<[ B<-P> ]> - S<[ B<-S> ]> - S<[ B<-x>[I] ]> - S<[ B<-i>[I] ]> - S<[ B<-e> I<'command'> ] [ B<--> ] [ I ] [ I ]...> +B S<[ B<-sTuU> ]> S<[ B<-hv> ] [ B<-V>[:I] ]> + S<[ B<-cw> ] [ B<-d>[:I] ] [ B<-D>[I] ]> + S<[ B<-pna> ] [ B<-F>I ] [ B<-l>[I] ] [ B<-0>[I] ]> + S<[ B<-I>I ] [ B<-m>[B<->]I ] [ B<-M>[B<->]I<'module...'> ]> + S<[ B<-P> ]> S<[ B<-S> ]> S<[ B<-x>[I] ]> + S<[ B<-i>[I] ]> S<[ B<-e> I<'command'> ] + [ B<--> ] [ I ] [ I ]...> For ease of access, the Perl manual has been split up into a number of sections: @@ -40,11 +37,12 @@ of sections: perlform Perl formats perllocale Perl locale support - perlref Perl references perlreftut Perl references short introduction + perlref Perl references, the rest of the story perldsc Perl data structures intro - perllol Perl data structures: lists of lists - perltoot Perl OO tutorial + perllol Perl data structures: arrays of arrays + perltoot Perl OO tutorial, part 1 + perltootc Perl OO tutorial, part 2 perlobj Perl objects perltie Perl objects hidden behind simple variables perlbot Perl OO tricks and examples @@ -75,7 +73,7 @@ of sections: (If you're intending to read these straight through for the first time, the suggested order will tend to reduce the number of forward references.) -By default, all of the above manpages are installed in the +By default, the manpages listed above are installed in the F directory. Extensive additional documentation for Perl modules is available. The @@ -140,107 +138,63 @@ scripts into Perl scripts. But wait, there's more... -Perl version 5 is nearly a complete rewrite, and provides -the following additional benefits: +Begun in 1993 (see L), Perl version 5 is nearly a complete +rewrite that provides the following additional benefits: -=over 5 +=over -=item * Many usability enhancements +=item * modularity and reusability using innumerable modules -It is now possible to write much more readable Perl code (even within -regular expressions). Formerly cryptic variable names can be replaced -by mnemonic identifiers. Error messages are more informative, and the -optional warnings will catch many of the mistakes a novice might make. -This cannot be stressed enough. Whenever you get mysterious behavior, -try the B<-w> switch!!! Whenever you don't get mysterious behavior, -try using B<-w> anyway. +Described in L, L, and L. -=item * Simplified grammar +=item * embeddable and extensible -The new yacc grammar is one half the size of the old one. Many of the -arbitrary grammar rules have been regularized. The number of reserved -words has been cut by 2/3. Despite this, nearly all old Perl scripts -will continue to work unchanged. +Described in L, L, L, L, +L, and L. -=item * Lexical scoping +=item * roll-your-own magic variables (including multiple simultaneous DBM implementations) -Perl variables may now be declared within a lexical scope, like "auto" -variables in C. Not only is this more efficient, but it contributes -to better privacy for "programming in the large". Anonymous -subroutines exhibit deep binding of lexical variables (closures). +Described in L and L. -=item * Arbitrarily nested data structures +=item * subroutines can now be overridden, autoloaded, and prototyped -Any scalar value, including any array element, may now contain a -reference to any other variable or subroutine. You can easily create -anonymous variables and subroutines. Perl manages your reference -counts for you. +Described in L. -=item * Modularity and reusability +=item * arbitrarily nested data structures and anonymous functions -The Perl library is now defined in terms of modules which can be easily -shared among various packages. A package may choose to import all or a -portion of a module's published interface. Pragmas (that is, compiler -directives) are defined and used by the same mechanism. +Described in L, L, L, and L. -=item * Object-oriented programming +=item * object-oriented programming -A package can function as a class. Dynamic multiple inheritance and -virtual methods are supported in a straightforward manner and with very -little new syntax. Filehandles may now be treated as objects. +Described in L, L, and L. -=item * Embeddable and Extensible +=item * compilability into C code or Perl bytecode -Perl may now be embedded easily in your C or C++ application, and can -either call or be called by your routines through a documented -interface. The XS preprocessor is provided to make it easy to glue -your C or C++ routines into Perl. Dynamic loading of modules is -supported, and Perl itself can be made into a dynamic library. +Described in L and L. -=item * POSIX compliant +=item * support for light-weight processes (threads) -A major new module is the POSIX module, which provides access to all -available POSIX routines and definitions, via object classes where -appropriate. +Described in L and L. -=item * Package constructors and destructors +=item * support for internationalization, localization, and Unicode -The new BEGIN and END blocks provide means to capture control as -a package is being compiled, and after the program exits. As a -degenerate case they work just like awk's BEGIN and END when you -use the B<-p> or B<-n> switches. +Described in L and L. -=item * Multiple simultaneous DBM implementations +=item * lexical scoping -A Perl program may now access DBM, NDBM, SDBM, GDBM, and Berkeley DB -files from the same script simultaneously. In fact, the old dbmopen -interface has been generalized to allow any variable to be tied -to an object class which defines its access methods. +Described in L. -=item * Subroutine definitions may now be autoloaded +=item * regular expression enhancements -In fact, the AUTOLOAD mechanism also allows you to define any arbitrary -semantics for undefined subroutine calls. It's not for just autoloading. +Described in L, with additional examples in L. -=item * Regular expression enhancements +=item * enhanced debugger and interactive Perl environment, with intregrated editor support -You can now specify nongreedy quantifiers. You can now do grouping -without creating a backreference. You can now write regular expressions -with embedded whitespace and comments for readability. A consistent -extensibility mechanism has been added that is upwardly compatible with -all old regular expressions. +Described in L. -=item * Innumerable Unbundled Modules +=item * POSIX 1003.1 compliant library -The Comprehensive Perl Archive Network described in L -contains hundreds of plug-and-play modules full of reusable code. -See F for a site near you. - -=item * Compilability - -While not yet in full production mode, a working perl-to-C compiler -does exist. It can generate portable byte code, simple C, or -optimized C code. +Described in L. =back @@ -254,7 +208,7 @@ February 1999 and Perl 5.005_03. The following platforms are able to build Perl from the standard source code distribution available at -F +http://www.perl.com/CPAN/src/index.html AIX Linux SCO ODT/OSR A/UX MachTen Solaris @@ -278,7 +232,7 @@ F The following platforms have been known to build Perl from the source but for the Perl release 5.005_03 we haven't been able to verify them, either because the hardware/software platforms are rather rare or -because we don't have an active champion on these platforms, or both. +because we don't have an active champion on these platforms--or both. 3b1 FPS Plan 9 AmigaOS GENIX PowerUX @@ -301,7 +255,7 @@ supported in the Perl release 5.005_03: VM/ESA The following platforms have their own source code distributions and -binaries available via F. +binaries available via http://www.perl.com/CPAN/ports/index.html. Perl release @@ -311,7 +265,7 @@ binaries available via F. Tandem Guardian 5.004 The following platforms have only binaries available via -F. +http://www.perl.com/CPAN/ports/index.html. Perl release @@ -325,12 +279,12 @@ See L. =head1 AUTHOR -Larry Wall >, with the help of oodles of other folks. +Larry Wall , with the help of oodles of other folks. If your Perl success stories and testimonials may be of help to others who wish to advocate the use of Perl in their applications, or if you wish to simply express your gratitude to Larry and the -Perl developers, please write to >. +Perl developers, please write to perl-thanks@perl.org . =head1 FILES @@ -339,9 +293,11 @@ Perl developers, please write to >. =head1 SEE ALSO a2p awk to perl translator - s2p sed to perl translator + http://www.perl.com/ the Perl Home Page + http://www.perl.com/CPAN the Comphrehensive Perl Archive + =head1 DIAGNOSTICS The B<-w> switch produces some lovely diagnostics. @@ -381,10 +337,10 @@ so they are limited to a maximum of 65535 (higher numbers usually being affected by wraparound). You may mail your bug reports (be sure to include full configuration -information as output by the myconfig program in the perl source tree, -or by C) to >. -If you've succeeded in compiling perl, the perlbug script in the utils/ -subdirectory can be used to help mail in a bug report. +information as output by the myconfig program in the perl source +tree, or by C) to perlbug@perl.com . If you've succeeded +in compiling perl, the perlbug script in the utils/ subdirectory +can be used to help mail in a bug report. Perl actually stands for Pathologically Eclectic Rubbish Lister, but don't tell anyone I said that. diff --git a/pod/perl5004delta.pod b/pod/perl5004delta.pod index 323830b..43bfb51 100644 --- a/pod/perl5004delta.pod +++ b/pod/perl5004delta.pod @@ -268,7 +268,7 @@ referenced subroutine, with the given parameters (if any). This new syntax follows the pattern of S{FOO}>> and S[$foo]>>: You may now write S> as -S($foo)>>. All of these arrow terms may be chained; +S($foo)>>. All these arrow terms may be chained; thus, S{FOO}}($bar)>> may now be written S{FOO}-E($bar)>>. @@ -758,7 +758,7 @@ details on how to get started with building this port. There is also support for building perl under the Cygwin32 environment. Cygwin32 is a set of GNU tools that make it possible to compile and run -many UNIX programs under Windows NT by providing a mostly UNIX-like +many Unix programs under Windows NT by providing a mostly Unix-like interface for compilation and execution. See F in the perl distribution for more details on this port and how to obtain the Cygwin32 toolkit. @@ -936,7 +936,7 @@ requested with the ":flock" tag (e.g. C). =head2 IO -The IO module provides a simple mechanism to load all of the IO modules at one +The IO module provides a simple mechanism to load all the IO modules at one go. Currently this includes: IO::Handle diff --git a/pod/perlcall.pod b/pod/perlcall.pod index 2b83780..35c0f05 100644 --- a/pod/perlcall.pod +++ b/pod/perlcall.pod @@ -116,7 +116,7 @@ subroutine are stored on the Perl stack. As a general rule you should I check the return value from these functions. Even if you are expecting only a particular number of values to be returned from the Perl subroutine, there is nothing to -stop someone from doing something unexpected - don't say you haven't +stop someone from doing something unexpected--don't say you haven't been warned. =head1 FLAG VALUES @@ -505,9 +505,9 @@ returned from I. It will always be 0. =head2 Passing Parameters Now let's make a slightly more complex example. This time we want to -call a Perl subroutine, C, which will take 2 parameters - a -string (C<$s>) and an integer (C<$n>). The subroutine will simply -print the first C<$n> characters of the string. +call a Perl subroutine, C, which will take 2 parameters--a +string ($s) and an integer ($n). The subroutine will simply +print the first $n characters of the string. So the Perl subroutine would look like this @@ -555,7 +555,7 @@ as C. =item 2. If you are going to put something onto the Perl stack, you need to know -where to put it. This is the purpose of the macro C - it declares +where to put it. This is the purpose of the macro C--it declares and initializes a I copy of the Perl stack pointer. All the other macros which will be used in this example require you to @@ -563,7 +563,7 @@ have used this macro. The exception to this rule is if you are calling a Perl subroutine directly from an XSUB function. In this case it is not necessary to -use the C macro explicitly - it will be declared for you +use the C macro explicitly--it will be declared for you automatically. =item 3. @@ -578,12 +578,12 @@ The C macro tells Perl to make a mental note of the current stack pointer. Even if you aren't passing any parameters (like the example shown in the section I) you must still call the C macro before you can call any of the -I functions - Perl still needs to know that there are no +I functions--Perl still needs to know that there are no parameters. The C macro sets the global copy of the stack pointer to be the same as our local copy. If we didn't do this I -wouldn't know where the two parameters we pushed were - remember that +wouldn't know where the two parameters we pushed were--remember that up to now all the stack pointer manipulation we have done is with our local copy, I the global copy. @@ -922,7 +922,7 @@ and here is a C function to call it. To be able to access the two parameters that were pushed onto the stack after they return from I it is necessary to make a note -of their addresses - thus the two variables C and C. +of their addresses--thus the two variables C and C. The reason this is necessary is that the area of the Perl stack which held them will very likely have been overwritten by something else by @@ -1175,11 +1175,11 @@ the version of Perl you are using) Not a CODE reference at ... Undefined subroutine &main::47 called ... -The variable C<$ref> may have referred to the subroutine C +The variable $ref may have referred to the subroutine C whenever the call to C was made but by the time C gets called it now holds the number C<47>. Because we saved only a pointer to the original SV in C, any changes to -C<$ref> will be tracked by the pointer C. This means that +$ref will be tracked by the pointer C. This means that whenever C gets called, it will attempt to execute the code which is referenced by the SV* C. In this case though, it now refers to the integer C<47>, so expect Perl to complain @@ -1351,7 +1351,7 @@ So the methods C and C can be invoked like this call_PrintID('Mine', 'PrintID') ; The only thing to note is that in both the static and virtual methods, -the method name is not passed via the stack - it is used as the first +the method name is not passed via the stack--it is used as the first parameter to I. =head2 Using GIMME_V @@ -1485,9 +1485,9 @@ enclosing scope at some stage. In the event driven scenario that may never happen. This means that as time goes on, your program will create more and more temporaries, none of which will ever be freed. As each of these temporaries consumes some memory your program will -eventually consume all the available memory in your system - kapow! +eventually consume all the available memory in your system--kapow! -So here is the bottom line - if you are sure that control will revert +So here is the bottom line--if you are sure that control will revert back to the enclosing Perl scope fairly quickly after the end of your callback, then it isn't absolutely necessary to dispose explicitly of any temporaries you may have created. Mind you, if you are at all @@ -1579,7 +1579,7 @@ require is a means of storing the mapping between the opened file and the Perl subroutine we want to be called for that file. Say the i/o library has a function C which associates a C -function C with a file handle C - this assumes that it +function C with a file handle C--this assumes that it has also provided some routine to open the file and so obtain the file handle. diff --git a/pod/perldata.pod b/pod/perldata.pod index ad27db1..f4c660d 100644 --- a/pod/perldata.pod +++ b/pod/perldata.pod @@ -8,9 +8,9 @@ perldata - Perl data types Perl has three built-in data types: scalars, arrays of scalars, and associative arrays of scalars, known as "hashes". Normal arrays -are ordered lists indexed by number, starting with 0 and with +are ordered lists of scalars indexed by number, starting with 0 and with negative subscripts counting from the end. Hashes are unordered -collections of values indexed by their associated string key. +collections of scalar values indexed by their associated string key. Values are usually referred to by name, or through a named reference. The first character of the name tells you to what sort of data @@ -165,7 +165,7 @@ references are strongly-typed, uncastable pointers with builtin reference-counting and destructor invocation. A scalar value is interpreted as TRUE in the Boolean sense if it is not -the empty string or the number 0 (or its string equivalent, "0"). The +the null string or the number 0 (or its string equivalent, "0"). The Boolean context is just a special kind of scalar context where no conversion to a string or a number is ever performed. @@ -220,7 +220,7 @@ had to break this to make sure destructors were called when expected.) You can also gain some miniscule measure of efficiency by pre-extending an array that is going to get big. You can also extend an array by assigning to an element that is off the end of the array. You -can truncate an array down to nothing by assigning the empty list +can truncate an array down to nothing by assigning the null list () to it. The following are equivalent: @whatever = (); @@ -278,8 +278,8 @@ integer formats: String literals are usually delimited by either single or double quotes. They work much like quotes in the standard Unix shells: double-quoted string literals are subject to backslash and variable -substitution; single-quoted strings are not (except for "C<\'>" and -"C<\\>"). The usual C-style backslash rules apply for making +substitution; single-quoted strings are not (except for C<\'> and +C<\\>). The usual C-style backslash rules apply for making characters such as newline, tab, etc., as well as some more exotic forms. See L for a list. @@ -490,7 +490,7 @@ followed by all the elements returned by the subroutine named SomeSub called in list context, followed by the key/value pairs of %glarch. To make a list reference that does I interpolate, see L. -The empty list is represented by (). Interpolating it in a list +The null list is represented by (). Interpolating it in a list has no effect. Thus ((),(),()) is equivalent to (). Similarly, interpolating an array with no elements is the same as if no array had been interpolated at that point. @@ -530,7 +530,7 @@ produced by the expression on the right side of the assignment: $x = (($foo,$bar) = f()); # set $x to f()'s return count This is handy when you want to do a list assignment in a Boolean -context, because most list functions return a empty list when finished, +context, because most list functions return a null list when finished, which when assigned produces a 0, which is interpreted as FALSE. The final element may be an array or a hash: @@ -639,9 +639,10 @@ You couldn't just loop through C to do this because that function produces a new list which is a copy of the values, so changing them doesn't change the original. -As a special rule, if a slice would produce a list consisting entirely -of undefined values, the empty list is produced instead. This makes -it easy to write loops that terminate when an empty list is returned: +As a special rule, if a list slice would produce a list consisting +entirely of undefined values, the null list is produced instead. +This makes it easy to write loops that terminate when a null list +is returned: while ( ($home, $user) = (getpwent)[7,0]) { printf "%-8s %s\n", $user, $home; @@ -649,7 +650,7 @@ it easy to write loops that terminate when an empty list is returned: As noted earlier in this document, the scalar sense of list assignment is the number of elements on the right-hand side of the assignment. -The empty list contains no elements, so when the password file is +The null list contains no elements, so when the password file is exhausted, the result is 0, not 2. If you're confused about why you use an '@' there on a hash slice diff --git a/pod/perldebug.pod b/pod/perldebug.pod index ed77fd3..5699732 100644 --- a/pod/perldebug.pod +++ b/pod/perldebug.pod @@ -557,7 +557,7 @@ Quit. ("quit" doesn't work for this.) This is the only supported way to exit the debugger, though typing C twice may do it too. Set an Cption C to 0 if you want to be able to I the end the script. You may also need to set C<$finished> to 0 at +off> the end the script. You may also need to set $finished to 0 at some moment if you want to step through global destruction. =item R @@ -968,7 +968,7 @@ application. The array C<@{"_E$filename"}> is the line-by-line contents of $filename for all the compiled files. Same for Ced strings which -contain subroutines, or which are currently executed. The C<$filename> +contain subroutines, or which are currently executed. The $filename for Ced strings looks like C<(eval 34)>. =item * diff --git a/pod/perldelta.pod b/pod/perldelta.pod index fe4f29f..7ffaf74 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -117,7 +117,7 @@ extent of 64-bit support. Depending on the platform (hints file) more or less 64-awareness becomes available. As of 5.005_54 at least somewhat 64-bit aware platforms are HP-UX 11 or better, Solaris 2.6 or better, IRIX 6.2 or better. Naturally 64-bit platforms like Digital -UNIX and UNICOS also have 64-bit support. +Unix and UNICOS also have 64-bit support. =head2 Better syntax checks on parenthesized unary operators @@ -335,8 +335,8 @@ O_ACCMODE: the mask of O_RDONLY, O_WRONLY, and O_RDWR. =item File::Spec New methods have been added to the File::Spec module: devnull() returns -the name of the null device (/dev/null on UNIX) and tmpdir() the name of -the temp directory (normally /tmp on UNIX). There are now also methods +the name of the null device (/dev/null on Unix) and tmpdir() the name of +the temp directory (normally /tmp on Unix). There are now also methods to convert between absolute and relative filenames: abs2rel() and rel2abs(). For compatibility with operating systems that specify volume names in file paths, the splitpath(), splitdir() and catdir() methods diff --git a/pod/perldsc.pod b/pod/perldsc.pod index ef3ae75..5ab97e1 100644 --- a/pod/perldsc.pod +++ b/pod/perldsc.pod @@ -8,8 +8,8 @@ The single feature most sorely lacking in the Perl programming language prior to its 5.0 release was complex data structures. Even without direct language support, some valiant programmers did manage to emulate them, but it was hard work and not for the faint of heart. You could occasionally -get away with the C<$m{$LoL,$b}> notation borrowed from I in which the -keys are actually more like a single concatenated string C<"$LoL$b">, but +get away with the C<$m{$AoA,$b}> notation borrowed from B in which the +keys are actually more like a single concatenated string C<"$AoA$b">, but traversal and sorting were difficult. More desperate programmers even hacked Perl's internal symbol table directly, a strategy that proved hard to develop and maintain--to put it mildly. @@ -21,7 +21,7 @@ with three dimensions! for $x (1 .. 10) { for $y (1 .. 10) { for $z (1 .. 10) { - $LoL[$x][$y][$z] = + $AoA[$x][$y][$z] = $x ** $y + $z; } } @@ -30,7 +30,7 @@ with three dimensions! Alas, however simple this may appear, underneath it's a much more elaborate construct than meets the eye! -How do you print it out? Why can't you say just C? How do +How do you print it out? Why can't you say just C? How do you sort it? How can you pass it to a function or get one of these back from a function? Is is an object? Can you save it to disk to read back later? How do you access whole rows or columns of that matrix? Do @@ -93,8 +93,8 @@ level. It's just that you can I it as though it were a two-dimensional one. This is actually the way almost all C multidimensional arrays work as well. - $list[7][12] # array of arrays - $list[7]{string} # array of hashes + $array[7][12] # array of arrays + $array[7]{string} # array of hashes $hash{string}[7] # hash of arrays $hash{string}{'another string'} # hash of hashes @@ -102,10 +102,10 @@ Now, because the top level contains only references, if you try to print out your array in with a simple print() function, you'll get something that doesn't look very nice, like this: - @LoL = ( [2, 3], [4, 5, 7], [0] ); - print $LoL[1][2]; + @AoA = ( [2, 3], [4, 5, 7], [0] ); + print $AoA[1][2]; 7 - print @LoL; + print @AoA; ARRAY(0x83c38)ARRAY(0x8b194)ARRAY(0x8b1d0) @@ -124,25 +124,25 @@ repeatedly. Here's the case where you just get the count instead of a nested array: for $i (1..10) { - @list = somefunc($i); - $LoL[$i] = @list; # WRONG! + @array = somefunc($i); + $AoA[$i] = @array; # WRONG! } -That's just the simple case of assigning a list to a scalar and getting +That's just the simple case of assigning an array to a scalar and getting its element count. If that's what you really and truly want, then you might do well to consider being a tad more explicit about it, like this: for $i (1..10) { - @list = somefunc($i); - $counts[$i] = scalar @list; + @array = somefunc($i); + $counts[$i] = scalar @array; } Here's the case of taking a reference to the same memory location again and again: for $i (1..10) { - @list = somefunc($i); - $LoL[$i] = \@list; # WRONG! + @array = somefunc($i); + $AoA[$i] = \@array; # WRONG! } So, what's the big problem with that? It looks right, doesn't it? @@ -150,8 +150,8 @@ After all, I just told you that you need an array of references, so by golly, you've made me one! Unfortunately, while this is true, it's still broken. All the references -in @LoL refer to the I, and they will therefore all hold -whatever was last in @list! It's similar to the problem demonstrated in +in @AoA refer to the I, and they will therefore all hold +whatever was last in @array! It's similar to the problem demonstrated in the following C program: #include @@ -176,40 +176,40 @@ hash constructor C<{}> instead. Here's the right way to do the preceding broken code fragments: for $i (1..10) { - @list = somefunc($i); - $LoL[$i] = [ @list ]; + @array = somefunc($i); + $AoA[$i] = [ @array ]; } The square brackets make a reference to a new array with a I -of what's in @list at the time of the assignment. This is what +of what's in @array at the time of the assignment. This is what you want. Note that this will produce something similar, but it's much harder to read: for $i (1..10) { - @list = 0 .. $i; - @{$LoL[$i]} = @list; + @array = 0 .. $i; + @{$AoA[$i]} = @array; } Is it the same? Well, maybe so--and maybe not. The subtle difference is that when you assign something in square brackets, you know for sure it's always a brand new reference with a new I of the data. -Something else could be going on in this new case with the C<@{$LoL[$i]}}> +Something else could be going on in this new case with the C<@{$AoA[$i]}}> dereference on the left-hand-side of the assignment. It all depends on -whether C<$LoL[$i]> had been undefined to start with, or whether it -already contained a reference. If you had already populated @LoL with +whether C<$AoA[$i]> had been undefined to start with, or whether it +already contained a reference. If you had already populated @AoA with references, as in - $LoL[3] = \@another_list; + $AoA[3] = \@another_array; Then the assignment with the indirection on the left-hand-side would use the existing reference that was already there: - @{$LoL[3]} = @list; + @{$AoA[3]} = @array; Of course, this I have the "interesting" effect of clobbering -@another_list. (Have you ever noticed how when a programmer says +@another_array. (Have you ever noticed how when a programmer says something is "interesting", that rather than meaning "intriguing", they're disturbingly more apt to mean that it's "annoying", "difficult", or both? :-) @@ -222,8 +222,8 @@ Surprisingly, the following dangerous-looking construct will actually work out fine: for $i (1..10) { - my @list = somefunc($i); - $LoL[$i] = \@list; + my @array = somefunc($i); + $AoA[$i] = \@array; } That's because my() is more of a run-time statement than it is a @@ -242,18 +242,18 @@ do the right thing behind the scenes. In summary: - $LoL[$i] = [ @list ]; # usually best - $LoL[$i] = \@list; # perilous; just how my() was that list? - @{ $LoL[$i] } = @list; # way too tricky for most programmers + $AoA[$i] = [ @array ]; # usually best + $AoA[$i] = \@array; # perilous; just how my() was that array? + @{ $AoA[$i] } = @array; # way too tricky for most programmers =head1 CAVEAT ON PRECEDENCE -Speaking of things like C<@{$LoL[$i]}>, the following are actually the +Speaking of things like C<@{$AoA[$i]}>, the following are actually the same thing: - $listref->[2][2] # clear - $$listref[2][2] # confusing + $aref->[2][2] # clear + $$aref[2][2] # confusing That's because Perl's precedence rules on its five prefix dereferencers (which look like someone swearing: C<$ @ * % &>) make them bind more @@ -263,11 +263,11 @@ accustomed to using C<*a[i]> to mean what's pointed to by the I element of C. That is, they first take the subscript, and only then dereference the thing at that subscript. That's fine in C, but this isn't C. -The seemingly equivalent construct in Perl, C<$$listref[$i]> first does -the deref of C<$listref>, making it take $listref as a reference to an +The seemingly equivalent construct in Perl, C<$$aref[$i]> first does +the deref of $aref, making it take $aref as a reference to an array, and then dereference that, and finally tell you the I value -of the array pointed to by $LoL. If you wanted the C notion, you'd have to -write C<${$LoL[$i]}> to force the C<$LoL[$i]> to get evaluated first +of the array pointed to by $AoA. If you wanted the C notion, you'd have to +write C<${$AoA[$i]}> to force the C<$AoA[$i]> to get evaluated first before the leading C<$> dereferencer. =head1 WHY YOU SHOULD ALWAYS C @@ -283,19 +283,19 @@ This way, you'll be forced to declare all your variables with my() and also disallow accidental "symbolic dereferencing". Therefore if you'd done this: - my $listref = [ + my $aref = [ [ "fred", "barney", "pebbles", "bambam", "dino", ], [ "homer", "bart", "marge", "maggie", ], [ "george", "jane", "elroy", "judy", ], ]; - print $listref[2][2]; + print $aref[2][2]; The compiler would immediately flag that as an error I, -because you were accidentally accessing C<@listref>, an undeclared +because you were accidentally accessing C<@aref>, an undeclared variable, and it would thereby remind you to write instead: - print $listref->[2][2] + print $aref->[2][2] =head1 DEBUGGING @@ -303,10 +303,10 @@ Before version 5.002, the standard Perl debugger didn't do a very nice job of printing out complex data structures. With 5.002 or above, the debugger includes several new features, including command line editing as well as the C command to dump out complex data structures. For -example, given the assignment to $LoL above, here's the debugger output: +example, given the assignment to $AoA above, here's the debugger output: - DB<1> x $LoL - $LoL = ARRAY(0x13b5a0) + DB<1> x $AoA + $AoA = ARRAY(0x13b5a0) 0 ARRAY(0x1f0a24) 0 'fred' 1 'barney' @@ -330,79 +330,79 @@ Presented with little comment (these will get their own manpages someday) here are short code examples illustrating access of various types of data structures. -=head1 LISTS OF LISTS +=head1 ARRAYS OF ARRAYS -=head2 Declaration of a LIST OF LISTS +=head2 Declaration of a ARRAY OF ARRAYS - @LoL = ( + @AoA = ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], [ "homer", "marge", "bart" ], ); -=head2 Generation of a LIST OF LISTS +=head2 Generation of a ARRAY OF ARRAYS # reading from file while ( <> ) { - push @LoL, [ split ]; + push @AoA, [ split ]; } # calling a function for $i ( 1 .. 10 ) { - $LoL[$i] = [ somefunc($i) ]; + $AoA[$i] = [ somefunc($i) ]; } # using temp vars for $i ( 1 .. 10 ) { @tmp = somefunc($i); - $LoL[$i] = [ @tmp ]; + $AoA[$i] = [ @tmp ]; } # add to an existing row - push @{ $LoL[0] }, "wilma", "betty"; + push @{ $AoA[0] }, "wilma", "betty"; -=head2 Access and Printing of a LIST OF LISTS +=head2 Access and Printing of a ARRAY OF ARRAYS # one element - $LoL[0][0] = "Fred"; + $AoA[0][0] = "Fred"; # another element - $LoL[1][1] =~ s/(\w)/\u$1/; + $AoA[1][1] =~ s/(\w)/\u$1/; # print the whole thing with refs - for $aref ( @LoL ) { + for $aref ( @AoA ) { print "\t [ @$aref ],\n"; } # print the whole thing with indices - for $i ( 0 .. $#LoL ) { - print "\t [ @{$LoL[$i]} ],\n"; + for $i ( 0 .. $#AoA ) { + print "\t [ @{$AoA[$i]} ],\n"; } # print the whole thing one at a time - for $i ( 0 .. $#LoL ) { - for $j ( 0 .. $#{ $LoL[$i] } ) { - print "elt $i $j is $LoL[$i][$j]\n"; + for $i ( 0 .. $#AoA ) { + for $j ( 0 .. $#{ $AoA[$i] } ) { + print "elt $i $j is $AoA[$i][$j]\n"; } } -=head1 HASHES OF LISTS +=head1 HASHES OF ARRAYS -=head2 Declaration of a HASH OF LISTS +=head2 Declaration of a HASH OF ARRAYS - %HoL = ( + %HoA = ( flintstones => [ "fred", "barney" ], jetsons => [ "george", "jane", "elroy" ], simpsons => [ "homer", "marge", "bart" ], ); -=head2 Generation of a HASH OF LISTS +=head2 Generation of a HASH OF ARRAYS # reading from file # flintstones: fred barney wilma dino while ( <> ) { next unless s/^(.*?):\s*//; - $HoL{$1} = [ split ]; + $HoA{$1} = [ split ]; } # reading from file; more temps @@ -410,65 +410,65 @@ types of data structures. while ( $line = <> ) { ($who, $rest) = split /:\s*/, $line, 2; @fields = split ' ', $rest; - $HoL{$who} = [ @fields ]; + $HoA{$who} = [ @fields ]; } # calling a function that returns a list for $group ( "simpsons", "jetsons", "flintstones" ) { - $HoL{$group} = [ get_family($group) ]; + $HoA{$group} = [ get_family($group) ]; } # likewise, but using temps for $group ( "simpsons", "jetsons", "flintstones" ) { @members = get_family($group); - $HoL{$group} = [ @members ]; + $HoA{$group} = [ @members ]; } # append new members to an existing family - push @{ $HoL{"flintstones"} }, "wilma", "betty"; + push @{ $HoA{"flintstones"} }, "wilma", "betty"; -=head2 Access and Printing of a HASH OF LISTS +=head2 Access and Printing of a HASH OF ARRAYS # one element - $HoL{flintstones}[0] = "Fred"; + $HoA{flintstones}[0] = "Fred"; # another element - $HoL{simpsons}[1] =~ s/(\w)/\u$1/; + $HoA{simpsons}[1] =~ s/(\w)/\u$1/; # print the whole thing - foreach $family ( keys %HoL ) { - print "$family: @{ $HoL{$family} }\n" + foreach $family ( keys %HoA ) { + print "$family: @{ $HoA{$family} }\n" } # print the whole thing with indices - foreach $family ( keys %HoL ) { + foreach $family ( keys %HoA ) { print "family: "; - foreach $i ( 0 .. $#{ $HoL{$family} } ) { - print " $i = $HoL{$family}[$i]"; + foreach $i ( 0 .. $#{ $HoA{$family} } ) { + print " $i = $HoA{$family}[$i]"; } print "\n"; } # print the whole thing sorted by number of members - foreach $family ( sort { @{$HoL{$b}} <=> @{$HoL{$a}} } keys %HoL ) { - print "$family: @{ $HoL{$family} }\n" + foreach $family ( sort { @{$HoA{$b}} <=> @{$HoA{$a}} } keys %HoA ) { + print "$family: @{ $HoA{$family} }\n" } # print the whole thing sorted by number of members and name foreach $family ( sort { - @{$HoL{$b}} <=> @{$HoL{$a}} + @{$HoA{$b}} <=> @{$HoA{$a}} || $a cmp $b - } keys %HoL ) + } keys %HoA ) { - print "$family: ", join(", ", sort @{ $HoL{$family} }), "\n"; + print "$family: ", join(", ", sort @{ $HoA{$family} }), "\n"; } -=head1 LISTS OF HASHES +=head1 ARRAYS OF HASHES -=head2 Declaration of a LIST OF HASHES +=head2 Declaration of a ARRAY OF HASHES - @LoH = ( + @AoH = ( { Lead => "fred", Friend => "barney", @@ -485,7 +485,7 @@ types of data structures. } ); -=head2 Generation of a LIST OF HASHES +=head2 Generation of a ARRAY OF HASHES # reading from file # format: LEAD=fred FRIEND=barney @@ -495,7 +495,7 @@ types of data structures. ($key, $value) = split /=/, $field; $rec->{$key} = $value; } - push @LoH, $rec; + push @AoH, $rec; } @@ -503,34 +503,34 @@ types of data structures. # format: LEAD=fred FRIEND=barney # no temp while ( <> ) { - push @LoH, { split /[\s+=]/ }; + push @AoH, { split /[\s+=]/ }; } - # calling a function that returns a key,value list, like + # calling a function that returns a key/value pair list, like # "lead","fred","daughter","pebbles" while ( %fields = getnextpairset() ) { - push @LoH, { %fields }; + push @AoH, { %fields }; } # likewise, but using no temp vars while (<>) { - push @LoH, { parsepairs($_) }; + push @AoH, { parsepairs($_) }; } # add key/value to an element - $LoH[0]{pet} = "dino"; - $LoH[2]{pet} = "santa's little helper"; + $AoH[0]{pet} = "dino"; + $AoH[2]{pet} = "santa's little helper"; -=head2 Access and Printing of a LIST OF HASHES +=head2 Access and Printing of a ARRAY OF HASHES # one element - $LoH[0]{lead} = "fred"; + $AoH[0]{lead} = "fred"; # another element - $LoH[1]{lead} =~ s/(\w)/\u$1/; + $AoH[1]{lead} =~ s/(\w)/\u$1/; # print the whole thing with refs - for $href ( @LoH ) { + for $href ( @AoH ) { print "{ "; for $role ( keys %$href ) { print "$role=$href->{$role} "; @@ -539,18 +539,18 @@ types of data structures. } # print the whole thing with indices - for $i ( 0 .. $#LoH ) { + for $i ( 0 .. $#AoH ) { print "$i is { "; - for $role ( keys %{ $LoH[$i] } ) { - print "$role=$LoH[$i]{$role} "; + for $role ( keys %{ $AoH[$i] } ) { + print "$role=$AoH[$i]{$role} "; } print "}\n"; } # print the whole thing one at a time - for $i ( 0 .. $#LoH ) { - for $role ( keys %{ $LoH[$i] } ) { - print "elt $i $role is $LoH[$i]{$role}\n"; + for $i ( 0 .. $#AoH ) { + for $role ( keys %{ $AoH[$i] } ) { + print "elt $i $role is $AoH[$i]{$role}\n"; } } @@ -767,9 +767,9 @@ many different sorts: ########################################################### # now, you might want to make interesting extra fields that # include pointers back into the same data structure so if - # change one piece, it changes everywhere, like for examples - # if you wanted a {kids} field that was an array reference - # to a list of the kids' records without having duplicate + # change one piece, it changes everywhere, like for example + # if you wanted a {kids} field that was a reference + # to an array of the kids' records without having duplicate # records and thus update problems. ########################################################### foreach $family (keys %TV) { @@ -784,7 +784,7 @@ many different sorts: $rec->{kids} = [ @kids ]; } - # you copied the list, but the list itself contains pointers + # you copied the array, but the array itself contains pointers # to uncopied objects. this means that if you make bart get # older via diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index d409319..650493a 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -30,7 +30,7 @@ Elements of the LIST should be separated by commas. Any function in the list below may be used either with or without parentheses around its arguments. (The syntax descriptions omit the parentheses.) If you use the parentheses, the simple (but occasionally -surprising) rule is this: It I like a function, therefore it I a +surprising) rule is this: It I like a function, therefore it I a function, and precedence doesn't matter. Otherwise it's a list operator or unary operator, and precedence does matter. And whitespace between the function and left parenthesis doesn't count--so you need to @@ -80,8 +80,8 @@ In general, functions in Perl that serve as wrappers for system calls of the same name (like chown(2), fork(2), closedir(2), etc.) all return true when they succeed and C otherwise, as is usually mentioned in the descriptions below. This is different from the C interfaces, -which return C<-1> on failure. Exceptions to this rule are C, -C, and C. System calls also set the special C<$!> +which return C<-1> on failure. Exceptions to this rule are C, +C, and C. System calls also set the special C<$!> variable on failure. Other functions do not, except accidentally. =head2 Perl Functions by Category @@ -255,7 +255,7 @@ A file test, where X is one of the letters listed below. This unary operator takes one argument, either a filename or a filehandle, and tests the associated file to see if something is true about it. If the argument is omitted, tests C<$_>, except for C<-t>, which tests STDIN. -Unless otherwise documented, it returns C<1> for TRUE and C<''> for FALSE, or +Unless otherwise documented, it returns C<1> for true and C<''> for false, or the undefined value if the file doesn't exist. Despite the funny names, precedence is the same as any other named unary operator, and the argument may be parenthesized like any other unary operator. The @@ -339,12 +339,12 @@ characters with the high bit set. If too many strange characters (E30%) are found, it's a C<-B> file, otherwise it's a C<-T> file. Also, any file containing null in the first block is considered a binary file. If C<-T> or C<-B> is used on a filehandle, the current stdio buffer is examined -rather than the first block. Both C<-T> and C<-B> return TRUE on a null +rather than the first block. Both C<-T> and C<-B> return true on a null file, or a file at EOF when testing a filehandle. Because you have to read a file to do the C<-T> test, on most occasions you want to use a C<-f> against the file first, as in C. -If any of the file tests (or either the C or C operators) are given +If any of the file tests (or either the C or C operators) are given the special filehandle consisting of a solitary underline, then the stat structure of the previous file test (or stat operator) is used, saving a system call. (This doesn't work with C<-t>, and you need to remember @@ -373,7 +373,7 @@ If VALUE is omitted, uses C<$_>. =item accept NEWSOCKET,GENERICSOCKET Accepts an incoming socket connect, just as the accept(2) system call -does. Returns the packed address if it succeeded, FALSE otherwise. +does. Returns the packed address if it succeeded, false otherwise. See the example in L. =item alarm SECONDS @@ -391,18 +391,18 @@ starting a new one. The returned value is the amount of time remaining on the previous timer. For delays of finer granularity than one second, you may use Perl's -four-arugment version of select() leaving the first three arguments -undefined, or you might be able to use the C interface to +four-argument version of select() leaving the first three arguments +undefined, or you might be able to use the C interface to access setitimer(2) if your system supports it. The Time::HiRes module from CPAN may also prove useful. -It is usually a mistake to intermix C -and C calls. +It is usually a mistake to intermix C +and C calls. -If you want to use C to time out a system call you need to use an -C/C pair. You can't rely on the alarm causing the system call to +If you want to use C to time out a system call you need to use an +C/C pair. You can't rely on the alarm causing the system call to fail with C<$!> set to C because Perl sets up signal handlers to -restart system calls on some systems. Using C/C always works, +restart system calls on some systems. Using C/C always works, modulo the caveats given in L. eval { @@ -431,29 +431,51 @@ function, or use the familiar relation: =item bind SOCKET,NAME Binds a network address to a socket, just as the bind system call -does. Returns TRUE if it succeeded, FALSE otherwise. NAME should be a +does. Returns true if it succeeded, false otherwise. NAME should be a packed address of the appropriate type for the socket. See the examples in L. =item binmode FILEHANDLE -Arranges for the file to be read or written in "binary" mode in operating -systems that distinguish between binary and text files. Files that -are not in binary mode have CR LF sequences translated to LF on input -and LF translated to CR LF on output. Binmode has no effect under -many sytems, but in MS-DOS and similarly archaic systems, it may be -imperative--otherwise your MS-DOS-damaged C library may mangle your file. -The key distinction between systems that need C and those -that don't is their text file formats. Systems like Unix, MacOS, and -Plan9 that delimit lines with a single character, and that encode that -character in C as C<"\n">, do not need C. The rest may need it. -If FILEHANDLE is an expression, the value is taken as the name of the -filehandle. - -If the system does care about it, using it when you shouldn't is just as -perilous as failing to use it when you should. Fortunately for most of -us, you can't go wrong using binmode() on systems that don't care about -it, though. +Arranges for FILEHANDLE to be read or written in "binary" mode on +systems whose run-time libraries force the programmer to guess +between binary and text files. If FILEHANDLE is an expression, the +value is taken as the name of the filehandle. binmode() should be +called after the C but before any I/O is done on the filehandle. +The only way to reset binary mode on a filehandle is to reopen the +file. + +The operating system, device drivers, C libraries, and Perl run-time +system all conspire to let the programmer conveniently treat a +simple, one-byte C<\n> as the line terminator, irrespective of its +external representation. On Unix and its brethren, the native file +representation exactly matches the internal representation, making +everyone's lives unbelievably simpler. Consequently, L +has no effect under Unix, Plan9, or Mac OS, all of which use C<\n> +to end each line. (Unix and Plan9 think C<\n> means C<\cJ> and +C<\r> means C<\cM>, whereas the Mac goes the other way--it uses +C<\cM> for c<\n> and C<\cJ> to mean C<\r>. But that's ok, because +it's only one byte, and the internal and external representations +match.) + +In legacy systems like MS-DOS and its embellishments, your program +sees a C<\n> as a simple C<\cJ> (just as in Unix), but oddly enough, +that's not what's physically stored on disk. What's worse, these +systems refuse to help you with this; it's up to you to remember +what to do. And you mustn't go applying binmode() with wild abandon, +either, because if your system does care about binmode(), then using +it when you shouldn't is just as perilous as failing to use it when +you should. + +That means that on any version of Microsoft WinXX that you might +care to name (or not), binmode() causes C<\cM\cJ> sequences on disk +to be converted to C<\n> when read into your program, and causes +any C<\n> in your program to be converted back to C<\cM\cJ> on +output to disk. This sad discrepancy leads to no end of +problems in not just the readline operator, but also when using +seek(), tell(), and read() calls. See L for other painful +details. See the C<$/> and C<$\> variables in L for how +to manually set your input and output line-termination sequences. =item bless REF,CLASSNAME @@ -461,7 +483,7 @@ it, though. This function tells the thingy referenced by REF that it is now an object in the CLASSNAME package. If CLASSNAME is omitted, the current package -is used. Because a C is often the last thing in a constructor. +is used. Because a C is often the last thing in a constructor, it returns the reference for convenience. Always use the two-argument version if the function doing the blessing might be inherited by a derived class. See L and L for more about the blessing @@ -481,7 +503,7 @@ See L. Returns the context of the current subroutine call. In scalar context, returns the caller's package name if there is a caller, that is, if -we're in a subroutine or C or C, and the undefined value +we're in a subroutine or C or C, and the undefined value otherwise. In list context, returns ($package, $filename, $line) = caller; @@ -493,12 +515,12 @@ to go back before the current one. ($package, $filename, $line, $subroutine, $hasargs, $wantarray, $evaltext, $is_require) = caller($i); -Here C<$subroutine> may be C<"(eval)"> if the frame is not a subroutine -call, but an C. In such a case additional elements C<$evaltext> and +Here $subroutine may be C<"(eval)"> if the frame is not a subroutine +call, but an C. In such a case additional elements $evaltext and C<$is_require> are set: C<$is_require> is true if the frame is created by a -C or C statement, C<$evaltext> contains the text of the +C or C statement, $evaltext contains the text of the C statement. In particular, for a C statement, -C<$filename> is C<"(eval)">, but C<$evaltext> is undefined. (Note also that +$filename is C<"(eval)">, but $evaltext is undefined. (Note also that each C statement creates a C frame inside an C) frame. @@ -507,16 +529,16 @@ detailed information: it sets the list variable C<@DB::args> to be the arguments with which the subroutine was invoked. Be aware that the optimizer might have optimized call frames away before -C had a chance to get the information. That means that C +C had a chance to get the information. That means that C might not return information about the call frame you expect it do, for -C 1>. In particular, C<@DB::args> might have information from the -previous time C was called. +C 1>. In particular, C<@DB::args> might have information from the +previous time C was called. =item chdir EXPR Changes the working directory to EXPR, if possible. If EXPR is omitted, -changes to the user's home directory. Returns TRUE upon success, -FALSE otherwise. See the example under C. +changes to the user's home directory. Returns true upon success, +false otherwise. See the example under C. =item chmod LIST @@ -548,7 +570,8 @@ that the final record may be missing its newline. When in paragraph mode (C<$/ = "">), it removes all trailing newlines from the string. When in slurp mode (C<$/ = undef>) or fixed-length record mode (C<$/> is a reference to an integer or the like, see L) chomp() won't -remove anything. If VARIABLE is omitted, it chomps C<$_>. Example: +remove anything. +If VARIABLE is omitted, it chomps C<$_>. Example: while (<>) { chomp; # avoid \n on last field @@ -588,16 +611,18 @@ You can actually chop anything that's an lvalue, including an assignment: chop($answer = ); If you chop a list, each element is chopped. Only the value of the -last C is returned. +last C is returned. -Note that C returns the last character. To return all but the last +Note that C returns the last character. To return all but the last character, use C. =item chown LIST Changes the owner (and group) of a list of files. The first two -elements of the list must be the I uid and gid, in that order. -Returns the number of files successfully changed. +elements of the list must be the I uid and gid, in that +order. A value of -1 in either position is interpreted by most +systems to leave that value unchanged. Returns the number of files +successfully changed. $cnt = chown $uid, $gid, 'foo', 'bar'; chown $uid, $gid, @filenames; @@ -605,9 +630,9 @@ Returns the number of files successfully changed. Here's an example that looks up nonnumeric uids in the passwd file: print "User: "; - chop($user = ); + chomp($user = ); print "Files: "; - chop($pattern = ); + chomp($pattern = ); ($login,$pass,$uid,$gid) = getpwnam($user) or die "$user not in passwd file"; @@ -619,6 +644,10 @@ On most systems, you are not allowed to change the ownership of the file unless you're the superuser, although you should be able to change the group to any of your secondary groups. On insecure systems, these restrictions may be relaxed, but this is not a portable assumption. +On POSIX systems, you can detect this condition this way: + + use POSIX qw(sysconf _PC_CHOWN_RESTRICTED); + $can_chown_giveaway = not sysconf(_PC_CHOWN_RESTRICTED); =item chr NUMBER @@ -641,24 +670,24 @@ named directory the new root directory for all further pathnames that begin with a C<"/"> by your process and all its children. (It doesn't change your current working directory, which is unaffected.) For security reasons, this call is restricted to the superuser. If FILENAME is -omitted, does a C to C<$_>. +omitted, does a C to C<$_>. =item close FILEHANDLE =item close -Closes the file or pipe associated with the file handle, returning TRUE +Closes the file or pipe associated with the file handle, returning true only if stdio successfully flushes buffers and closes the system file -descriptor. Closes the currently selected filehandle if the argument +descriptor. Closes the currently selected filehandle if the argument is omitted. You don't have to close FILEHANDLE if you are immediately going to do -another C on it, because C will close it for you. (See -C.) However, an explicit C on an input file resets the line -counter (C<$.>), while the implicit close done by C does not. +another C on it, because C will close it for you. (See +C.) However, an explicit C on an input file resets the line +counter (C<$.>), while the implicit close done by C does not. -If the file handle came from a piped open C will additionally -return FALSE if one of the other system calls involved fails or if the +If the file handle came from a piped open C will additionally +return false if one of the other system calls involved fails or if the program exits with non-zero status. (If the only problem was that the program exited non-zero C<$!> will be set to C<0>.) Closing a pipe also waits for the process executing on the pipe to complete, in case you @@ -681,7 +710,7 @@ filehandle, usually the real filehandle name. =item closedir DIRHANDLE -Closes a directory opened by C and returns the success of that +Closes a directory opened by C and returns the success of that system call. DIRHANDLE may be an expression whose value can be used as an indirect @@ -690,7 +719,7 @@ dirhandle, usually the real dirhandle name. =item connect SOCKET,NAME Attempts to connect to a remote socket, just as the connect system call -does. Returns TRUE if it succeeded, FALSE otherwise. NAME should be a +does. Returns true if it succeeded, false otherwise. NAME should be a packed address of the appropriate type for the socket. See the examples in L. @@ -705,8 +734,8 @@ continued via the C statement (which is similar to the C C statement). C, C, or C may appear within a C -block. C and C will behave as if they had been executed within -the main block. So will C, but since it will execute a C +block. C and C will behave as if they had been executed within +the main block. So will C, but since it will execute a C block, it may be more entertaining. while (EXPR) { @@ -720,7 +749,7 @@ block, it may be more entertaining. ### last always comes here Omitting the C section is semantically equivalent to using an -empty one, logically enough. In that case, C goes directly back +empty one, logically enough. In that case, C goes directly back to check the condition at the top of the loop. =item cos EXPR @@ -741,14 +770,14 @@ extirpated as a potential munition). This can prove useful for checking the password file for lousy passwords, amongst other things. Only the guys wearing white hats should do this. -Note that C is intended to be a one-way function, much like breaking +Note that C is intended to be a one-way function, much like breaking eggs to make an omelette. There is no (known) corresponding decrypt function. As a result, this function isn't all that useful for cryptography. (For that, see your nearby CPAN mirror.) When verifying an existing encrypted string you should use the encrypted text as the salt (like C). This -allows your code to work with the standard C and with more +allows your code to work with the standard C and with more exotic implementations. When choosing a new salt create a random two character string whose characters come from the set C<[./0-9A-Za-z]> (like C). @@ -773,34 +802,40 @@ their own password: Of course, typing in your own password to whoever asks you for it is unwise. +The L function is unsuitable for encrypting large quantities +of data, not least of all because you can't get the information +back. Look at the F and F directories +on your favorite CPAN mirror for a slew of potentially useful +modules. + =item dbmclose HASH -[This function has been largely superseded by the C function.] +[This function has been largely superseded by the C function.] Breaks the binding between a DBM file and a hash. -=item dbmopen HASH,DBNAME,MODE +=item dbmopen HASH,DBNAME,MASK -[This function has been largely superseded by the C function.] +[This function has been largely superseded by the C function.] This binds a dbm(3), ndbm(3), sdbm(3), gdbm(3), or Berkeley DB file to a -hash. HASH is the name of the hash. (Unlike normal C, the first -argument is I a filehandle, even though it looks like one). DBNAME +hash. HASH is the name of the hash. (Unlike normal C, the first +argument is I a filehandle, even though it looks like one). DBNAME is the name of the database (without the F<.dir> or F<.pag> extension if any). If the database does not exist, it is created with protection -specified by MODE (as modified by the C). If your system supports -only the older DBM functions, you may perform only one C in your +specified by MASK (as modified by the C). If your system supports +only the older DBM functions, you may perform only one C in your program. In older versions of Perl, if your system had neither DBM nor -ndbm, calling C produced a fatal error; it now falls back to +ndbm, calling C produced a fatal error; it now falls back to sdbm(3). If you don't have write access to the DBM file, you can only read hash variables, not set them. If you want to test whether you can write, -either use file tests or try setting a dummy hash entry inside an C, +either use file tests or try setting a dummy hash entry inside an C, which will trap the error. -Note that functions such as C and C may return huge lists -when used on large DBM files. You may prefer to use the C +Note that functions such as C and C may return huge lists +when used on large DBM files. You may prefer to use the C function to iterate over large DBM files. Example: # print out history file offsets @@ -835,13 +870,13 @@ conditions. This function allows you to distinguish C from other values. (A simple Boolean test will not distinguish among C, zero, the empty string, and C<"0">, which are all equally false.) Note that since C is a valid scalar, its presence -doesn't I indicate an exceptional condition: C +doesn't I indicate an exceptional condition: C returns C when its argument is an empty array, I when the element to return happens to be C. -You may also use C to check whether a subroutine exists, by +You may also use C to check whether a subroutine exists, by saying C without parentheses. On the other hand, use -of C upon aggregates (hashes and arrays) is not guaranteed to +of C upon aggregates (hashes and arrays) is not guaranteed to produce intuitive results, and should probably be avoided. When used on a hash element, it tells you whether the value is defined, @@ -857,7 +892,7 @@ Examples: sub foo { defined &$bar ? &$bar(@_) : die "No bar"; } $debugging = 0 unless defined $debugging; -Note: Many folks tend to overuse C, and then are surprised to +Note: Many folks tend to overuse C, and then are surprised to discover that the number C<0> and C<""> (the zero-length string) are, in fact, defined values. For example, if you say @@ -868,11 +903,11 @@ matched "nothing". But it didn't really match nothing--rather, it matched something that happened to be zero characters long. This is all very above-board and honest. When a function returns an undefined value, it's an admission that it couldn't give you an honest answer. So you -should use C only when you're questioning the integrity of what +should use C only when you're questioning the integrity of what you're trying to do. At other times, a simple comparison to C<0> or C<""> is what you want. -Currently, using C on an entire array or hash reports whether +Currently, using C on an entire array or hash reports whether memory for that aggregate has ever been allocated. So an array you set to the empty list appears undefined initially, and one that once was full and that you then set to the empty list still appears defined. You @@ -881,13 +916,13 @@ should instead use a simple test for size: if (@an_array) { print "has array elements\n" } if (%a_hash) { print "has hash members\n" } -Using C on these, however, does clear their memory and then report +Using C on these, however, does clear their memory and then report them as not defined anymore, but you shouldn't do that unless you don't plan to use them again, because it saves time when you load them up again to have memory already ready to be filled. The normal way to free up space used by an aggregate is to assign the empty list. -This counterintuitive behavior of C on aggregates may be +This counterintuitive behavior of C on aggregates may be changed, fixed, or broken in a future release of Perl. See also L, L, L. @@ -898,7 +933,7 @@ Deletes the specified key(s) and their associated values from a hash. For each key, returns the deleted value associated with that key, or the undefined value if there was no such key. Deleting from C<$ENV{}> modifies the environment. Deleting from a hash tied to a DBM file -deletes the entry from the DBM file. (But deleting from a Cd hash +deletes the entry from the DBM file. (But deleting from a Cd hash doesn't necessarily return anything.) The following deletes all the values of a hash: @@ -925,12 +960,13 @@ operation is a hash element lookup or hash slice: =item die LIST -Outside an C, prints the value of LIST to C and exits with -the current value of C<$!> (errno). If C<$!> is C<0>, exits with the value of -C<($? EE 8)> (backtick `command` status). If C<($? EE 8)> -is C<0>, exits with C<255>. Inside an C the error message is stuffed into -C<$@> and the C is terminated with the undefined value. This makes -C the way to raise an exception. +Outside an C, prints the value of LIST to C and +exits with the current value of C<$!> (errno). If C<$!> is C<0>, +exits with the value of C<($? EE 8)> (backtick `command` +status). If C<($? EE 8)> is C<0>, exits with C<255>. Inside +an C the error message is stuffed into C<$@> and the +C is terminated with the undefined value. This makes +C the way to raise an exception. Equivalent examples: @@ -984,25 +1020,26 @@ regular expressions. Here's an example: } } -Since perl will stringify uncaught exception messages before displaying +Because perl will stringify uncaught exception messages before displaying them, you may want to overload stringification operations on such custom exception objects. See L for details about that. -You can arrange for a callback to be run just before the C does -its deed, by setting the C<$SIG{__DIE__}> hook. The associated handler -will be called with the error text and can change the error message, if -it sees fit, by calling C again. See L for details on -setting C<%SIG> entries, and L<"eval BLOCK"> for some examples. - -Note that the C<$SIG{__DIE__}> hook is currently called even inside -eval()ed blocks/strings! If one wants the hook to do nothing in such -situations, put +You can arrange for a callback to be run just before the C +does its deed, by setting the C<$SIG{__DIE__}> hook. The associated +handler will be called with the error text and can change the error +message, if it sees fit, by calling C again. See +L for details on setting C<%SIG> entries, and +L<"eval BLOCK"> for some examples. Although this feature was meant +to be run only right before your program was to exit, this is not +currently the case--the C<$SIG{__DIE__}> hook is currently called +even inside eval()ed blocks/strings! If one wants the hook to do +nothing in such situations, put die @_ if $^S; -as the first line of the handler (see L). Because this -promotes action at a distance, this counterintuitive behavior may be fixed -in a future release. +as the first line of the handler (see L). Because +this promotes strange action at a distance, this counterintuitive +behavior may be fixed in a future release. =item do BLOCK @@ -1046,7 +1083,7 @@ successfully compiled, C returns the value of the last expression evaluated. Note that inclusion of library modules is better done with the -C and C operators, which also do automatic error checking +C and C operators, which also do automatic error checking and raise an exception if there's a problem. You might like to use C to read in a program configuration @@ -1067,40 +1104,31 @@ file. Manual error checking can be done this way: =item dump -This causes an immediate core dump. Primarily this is so that you can -use the B program to turn your core dump into an executable binary -after having initialized all your variables at the beginning of the -program. When the new binary is executed it will begin by executing a -C (with all the restrictions that C suffers). Think of -it as a goto with an intervening core dump and reincarnation. If C
package, it's @articles = sort {$main::b <=> $main::a} @files; @@ -3758,7 +3832,7 @@ replaces them with the elements of LIST, if any. In list context, returns the elements removed from the array. In scalar context, returns the last element removed, or C if no elements are removed. The array grows or shrinks as necessary. -If OFFSET is negative then it start that far from the end of the array. +If OFFSET is negative then it starts that far from the end of the array. If LENGTH is omitted, removes everything from OFFSET onward. If LENGTH is negative, leave that many elements off the end of the array. The following equivalences hold (assuming C<$[ == 0>): @@ -3790,7 +3864,7 @@ Example, assuming array lengths are passed before arrays: =item split -Splits a string into an array of strings, and returns it. By default, +Splits a string into a list of strings and returns that list. By default, empty leading fields are preserved, and empty trailing ones are deleted. If not in list context, returns the number of fields found and splits into @@ -3807,7 +3881,7 @@ that the delimiter may be longer than one character.) If LIMIT is specified and positive, splits into no more than that many fields (though it may split into fewer). If LIMIT is unspecified or zero, trailing null fields are stripped (which potential users -of C would do well to remember). If LIMIT is negative, it is +of C would do well to remember). If LIMIT is negative, it is treated as if an arbitrarily large LIMIT had been specified. A pattern matching the null string (not to be confused with @@ -3829,7 +3903,7 @@ unnecessary work. For the list above LIMIT would have been 4 by default. In time critical applications it behooves you not to split into more fields than you really need. -If the PATTERN contains parentheses, additional array elements are +If the PATTERN contains parentheses, additional list elements are created from each matching substring in the delimiter. split(/([,-])/, "1-10,20", 3); @@ -3838,7 +3912,7 @@ produces the list value (1, '-', 10, ',', 20) -If you had the entire header of a normal Unix email message in C<$header>, +If you had the entire header of a normal Unix email message in $header, you could split it up into fields and their values this way: $header =~ s/\n\s+/ /g; # fix continuation lines @@ -3849,11 +3923,11 @@ patterns that vary at runtime. (To do runtime compilation only once, use C.) As a special case, specifying a PATTERN of space (C<' '>) will split on -white space just as C with no arguments does. Thus, C can +white space just as C with no arguments does. Thus, C can be used to emulate B's default behavior, whereas C will give you as many null initial fields as there are leading spaces. -A C on C is like a C except that any leading -whitespace produces a null first field. A C with no arguments +A C on C is like a C except that any leading +whitespace produces a null first field. A C with no arguments really does a C internally. Example: @@ -3865,22 +3939,22 @@ Example: #... } -(Note that C<$shell> above will still have a newline on it. See L, +(Note that $shell above will still have a newline on it. See L, L, and L.) =item sprintf FORMAT, LIST -Returns a string formatted by the usual C conventions of the -C library function C. See L or L +Returns a string formatted by the usual C conventions of the +C library function C. See L or L on your system for an explanation of the general principles. -Perl does its own C formatting -- it emulates the C -function C, but it doesn't use it (except for floating-point +Perl does its own C formatting--it emulates the C +function C, but it doesn't use it (except for floating-point numbers, and even then only the standard modifiers are allowed). As a -result, any non-standard extensions in your local C are not +result, any non-standard extensions in your local C are not available from Perl. -Perl's C permits the following universally-known conversions: +Perl's C permits the following universally-known conversions: %% a percent sign %c a character with the given number @@ -3931,11 +4005,11 @@ There is also one Perl-specific flag: V interpret integer as Perl's standard integer type -Where a number would appear in the flags, an asterisk ("C<*>") may be +Where a number would appear in the flags, an asterisk (C<*>) may be used instead, in which case Perl uses the next item in the parameter list as the given number (that is, as the field width or precision). -If a field width obtained through "C<*>" is negative, it has the same -effect as the "C<->" flag: left-justification. +If a field width obtained through C<*> is negative, it has the same +effect as the C<-> flag: left-justification. If C is in effect, the character used for the decimal point in formatted real numbers is affected by the LC_NUMERIC locale. @@ -3956,19 +4030,19 @@ loaded the standard Math::Complex module. =item srand -Sets the random number seed for the C operator. If EXPR is +Sets the random number seed for the C operator. If EXPR is omitted, uses a semi-random value supplied by the kernel (if it supports the F device) or based on the current time and process ID, among other things. In versions of Perl prior to 5.004 the default -seed was just the current C. This isn't a particularly good seed, +seed was just the current C
-package is assumed. That is, C<$::sail> is equivalent to C<$main::sail>. +Perl provides a mechanism for alternative namespaces to protect +packages from stomping on each other's variables. In fact, there's +really no such thing as a global variable in Perl . The package +statement declares the compilation unit as being in the given +namespace. The scope of the package declaration is from the +declaration itself through the end of the enclosing block, C, +or file, whichever comes first (the same scope as the my() and +local() operators). Unqualified dynamic identifiers will be in +this namespace, except for those few identifiers that if unqualified, +default to the main package instead of the current one as described +below. A package statement affects only dynamic variables--including +those you've used local() on--but I lexical variables created +with my(). Typically it would be the first declaration in a file +included by the C, C, or C operators. You can +switch into a package in more than one place; it merely influences +which symbol table is used by the compiler for the rest of that +block. You can refer to variables and filehandles in other packages +by prefixing the identifier with the package name and a double +colon: C<$Package::Variable>. If the package name is null, the +C
package is assumed. That is, C<$::sail> is equivalent to +C<$main::sail>. The old package delimiter was a single quote, but double colon is now the preferred delimiter, in part because it's more readable to humans, and @@ -37,35 +39,38 @@ C<"This is $owner's house">, you'll be accessing C<$owner::s>; that is, the $s variable in package C, which is probably not what you meant. Use braces to disambiguate, as in C<"This is ${owner}'s house">. -Packages may be nested inside other packages: C<$OUTER::INNER::var>. This -implies nothing about the order of name lookups, however. All symbols +Packages may themselves contain package separators, as in +C<$OUTER::INNER::var>. This implies nothing about the order of +name lookups, however. There are no relative packages: all symbols are either local to the current package, or must be fully qualified from the outer package name down. For instance, there is nowhere -within package C that C<$INNER::var> refers to C<$OUTER::INNER::var>. -It would treat package C as a totally separate global package. - -Only identifiers starting with letters (or underscore) are stored in a -package's symbol table. All other symbols are kept in package C
, -including all of the punctuation variables like $_. In addition, when -unqualified, the identifiers STDIN, STDOUT, STDERR, ARGV, ARGVOUT, ENV, -INC, and SIG are forced to be in package C
, even when used for other -purposes than their builtin one. Note also that, if you have a package -called C, C, or C, then you can't use the qualified form of an -identifier because it will be interpreted instead as a pattern match, -a substitution, or a transliteration. - -(Variables beginning with underscore used to be forced into package +within package C that C<$INNER::var> refers to +C<$OUTER::INNER::var>. It would treat package C as a totally +separate global package. + +Only identifiers starting with letters (or underscore) are stored +in a package's symbol table. All other symbols are kept in package +C
, including all punctuation variables, like $_. In addition, +when unqualified, the identifiers STDIN, STDOUT, STDERR, ARGV, +ARGVOUT, ENV, INC, and SIG are forced to be in package C
, +even when used for other purposes than their built-in one. If you +have a package called C, C, or C, then you can't use the +qualified form of an identifier because it would be instead interpreted +as a pattern match, a substitution, or a transliteration. + +Variables beginning with underscore used to be forced into package main, but we decided it was more useful for package writers to be able to use leading underscore to indicate private variables and method names. -$_ is still global though.) +$_ is still global though. See also L. -Eval()ed strings are compiled in the package in which the eval() was +Ced strings are compiled in the package in which the eval() was compiled. (Assignments to C<$SIG{}>, however, assume the signal handler specified is in the C
package. Qualify the signal handler name if you wish to have a signal handler in a package.) For an example, examine F in the Perl library. It initially switches to the C package so that the debugger doesn't interfere with variables -in the script you are trying to debug. At various points, however, it +in the program you are trying to debug. At various points, however, it temporarily switches back to the C
package to evaluate various expressions in the context of the C
package (or wherever you came from). See L. @@ -92,8 +97,8 @@ table lookups at compile time: local $main::{foo} = $main::{bar}; You can use this to print out all the variables in a package, for -instance. The standard F library and the CPAN module -Devel::Symdump make use of this. +instance. The standard but antequated F library and +the CPAN module Devel::Symdump make use of this. Assignment to a typeglob performs an aliasing operation, i.e., @@ -102,7 +107,7 @@ Assignment to a typeglob performs an aliasing operation, i.e., causes variables, subroutines, formats, and file and directory handles accessible via the identifier C also to be accessible via the identifier C. If you want to alias only a particular variable or -subroutine, you can assign a reference instead: +subroutine, assign a reference instead: *dick = \$richard; @@ -130,7 +135,7 @@ is a somewhat tricky way of passing around references cheaply when you won't want to have to remember to dereference variables explicitly. -Another use of symbol tables is for making "constant" scalars. +Another use of symbol tables is for making "constant" scalars. *PI = \3.14159265358979; @@ -157,14 +162,14 @@ This prints You gave me main::foo You gave me bar::baz -The *foo{THING} notation can also be used to obtain references to the +The C<*foo{THING}> notation can also be used to obtain references to the individual elements of *foo, see L. =head2 Package Constructors and Destructors There are two special subroutine definitions that function as package -constructors and destructors. These are the C and C -routines. The C is optional for these routines. +constructors and destructors. These are the C, C, and +C routines. The C is optional for these routines. A C subroutine is executed as soon as possible, that is, the moment it is completely defined, even before the rest of the containing file @@ -183,39 +188,45 @@ trap that yourself (if you can).) You may have multiple C blocks within a file--they will execute in reverse order of definition; that is: last in, first out (LIFO). -Inside an C subroutine, C<$?> contains the value that the script is +Inside an C subroutine, C<$?> contains the value that the program is going to pass to C. You can modify C<$?> to change the exit -value of the script. Beware of changing C<$?> by accident (e.g. by +value of the program. Beware of changing C<$?> by accident (e.g. by running something via C). -Note that when you use the B<-n> and B<-p> switches to Perl, C and +When you use the B<-n> and B<-p> switches to Perl, C and C work just as they do in B, as a degenerate case. As currently implemented (and subject to change, since its inconvenient at best), -both C I C blocks are run when you use the B<-c> switch +both C and blocks are run when you use the B<-c> switch for a compile-only syntax check, although your main code is not. +Similar to C blocks, C blocks are run just before the +Perl runtime begins execution. For example, the code generators +documented in L make use of C blocks to initialize +and resolve pointers to XSUBs. + =head2 Perl Classes -There is no special class syntax in Perl, but a package may function +There is no special class syntax in Perl, but a package may act as a class if it provides subroutines to act as methods. Such a package may also derive some of its methods from another class (package) -by listing the other package name in its global @ISA array (which +by listing the other package name(s) in its global @ISA array (which must be a package global, not a lexical). For more on this, see L and L. =head2 Perl Modules -A module is just a package that is defined in a library file of -the same name, and is designed to be reusable. It may do this by -providing a mechanism for exporting some of its symbols into the symbol -table of any package using it. Or it may function as a class -definition and make its semantics available implicitly through method -calls on the class and its objects, without explicit exportation of any -symbols. Or it can do a little of both. +A module is just a set of related function in a library file a Perl +package with the same name as the file. It is specifically designed +to be reusable by other modules or programs. It may do this by +providing a mechanism for exporting some of its symbols into the +symbol table of any package using it. Or it may function as a class +definition and make its semantics available implicitly through +method calls on the class and its objects, without explicitly +exportating anything. Or it can do a little of both. -For example, to start a normal module called Some::Module, create -a file called Some/Module.pm and start with this template: +For example, to start a traditional, non-OO module called Some::Module, +create a file called F and start with this template: package Some::Module; # assumes Some/Module.pm @@ -275,10 +286,13 @@ a file called Some/Module.pm and start with this template: END { } # module clean-up code here (global destructor) -Then go on to declare and use your variables in functions -without any qualifications. -See L and the L for details on -mechanics and style issues in module creation. + ## YOUR CODE GOES HERE + + 1; # don't forget to return a true value from the file + +Then go on to declare and use your variables in functions without +any qualifications. See L and the L for +details on mechanics and style issues in module creation. Perl modules are included into your program by saying @@ -304,12 +318,13 @@ is exactly equivalent to BEGIN { require Module; } -All Perl module files have the extension F<.pm>. C assumes this so -that you don't have to spell out "F" in quotes. This also -helps to differentiate new modules from old F<.pl> and F<.ph> files. -Module names are also capitalized unless they're functioning as pragmas, -"Pragmas" are in effect compiler directives, and are sometimes called -"pragmatic modules" (or even "pragmata" if you're a classicist). +All Perl module files have the extension F<.pm>. The C operator +assumes this so you don't have to spell out "F" in quotes. +This also helps to differentiate new modules from old F<.pl> and +F<.ph> files. Module names are also capitalized unless they're +functioning as pragmas; pragmas are in effect compiler directives, +and are sometimes called "pragmatic modules" (or even "pragmata" +if you're a classicist). The two statements: @@ -319,18 +334,19 @@ The two statements: differ from each other in two ways. In the first case, any double colons in the module name, such as C, are translated into your system's directory separator, usually "/". The second -case does not, and would have to be specified literally. The other difference -is that seeing the first C clues in the compiler that uses of -indirect object notation involving "SomeModule", as in C<$ob = purge SomeModule>, -are method calls, not function calls. (Yes, this really can make a difference.) - -Because the C statement implies a C block, the importation -of semantics happens at the moment the C statement is compiled, +case does not, and would have to be specified literally. The other +difference is that seeing the first C clues in the compiler +that uses of indirect object notation involving "SomeModule", as +in C<$ob = purge SomeModule>, are method calls, not function calls. +(Yes, this really can make a difference.) + +Because the C statement implies a C block, the importing +of semantics happens as soon as the C statement is compiled, before the rest of the file is compiled. This is how it is able to function as a pragma mechanism, and also how modules are able to -declare subroutines that are then visible as list operators for +declare subroutines that are then visible as list or unary operators for the rest of the current file. This will not work if you use C -instead of C. With require you can get into this problem: +instead of C. With C you can get into this problem: require Cwd; # make Cwd:: accessible $here = Cwd::getcwd(); @@ -354,22 +370,22 @@ filenames on some systems. Therefore, if a module's name is, say, C, then its definition is actually found in the library file F. -Perl modules always have a F<.pm> file, but there may also be dynamically -linked executables or autoloaded subroutine definitions associated with -the module. If so, these will be entirely transparent to the user of -the module. It is the responsibility of the F<.pm> file to load (or -arrange to autoload) any additional functionality. The POSIX module -happens to do both dynamic loading and autoloading, but the user can -say just C to get it all. - -For more information on writing extension modules, see L -and L. +Perl modules always have a F<.pm> file, but there may also be +dynamically linked executables (often ending in F<.so>) or autoloaded +subroutine definitions (often ending in F<.al> associated with the +module. If so, these will be entirely transparent to the user of +the module. It is the responsibility of the F<.pm> file to load +(or arrange to autoload) any additional functionality. For example, +although the POSIX module happens to do both dynamic loading and +autoloading, but the user can say just C to get it all. =head1 SEE ALSO See L for general style issues related to building Perl -modules and classes as well as descriptions of the standard library and -CPAN, L for how Perl's standard import/export mechanism works, -L for an in-depth tutorial on creating classes, L -for a hard-core reference document on objects, and L for an -explanation of functions and scoping. +modules and classes, as well as descriptions of the standard library +and CPAN, L for how Perl's standard import/export mechanism +works, L and L for an in-depth tutorial on +creating classes, L for a hard-core reference document on +objects, L for an explanation of functions and scoping, +and L and L for more information on writing +extension modules. diff --git a/pod/perlmodinstall.pod b/pod/perlmodinstall.pod index b6176f0..4076254 100644 --- a/pod/perlmodinstall.pod +++ b/pod/perlmodinstall.pod @@ -5,21 +5,23 @@ perlmodinstall - Installing CPAN Modules =head1 DESCRIPTION You can think of a module as the fundamental unit of reusable Perl -code; see L for details. Whenever anyone creates a chunk of -Perl code that they think will be useful to the world, they register -as a Perl developer at http://www.perl.com/CPAN/modules/04pause.html -so that they can then upload their code to the CPAN. The CPAN is the -Comprehensive Perl Archive Network and can be accessed at -http://www.perl.com/CPAN/. +code; See L for details. Whenever anyone creates a chunk +of Perl code that they think will be useful to the world, they +register as a Perl developer at +http://www.perl.com/CPAN/modules/04pause.html so that they can then +upload their code to CPAN. CPAN is the Comprehensive Perl Archive +Network and can be accessed at http://www.perl.com/CPAN/, or searched +via http://cpan.perl.com/ and +http://theory.uwinnipeg.ca/mod_perl/cpan-search.pl . This documentation is for people who want to download CPAN modules and install them on their own computer. =head2 PREAMBLE -You have a file ending in .tar.gz (or, less often, .zip). You know -there's a tasty module inside. There are four steps you must now -take: +You have a file ending in F<.tar.gz> (or, less often, F<.zip>). +You know there's a tasty module inside. You must now take four +steps: =over 5 @@ -44,8 +46,8 @@ say C, you can substitute C to install the modules into C. Then you can use the modules from your Perl programs with C or sometimes just C. +"/my/perl_directory/lib/site_perl"> or sometimes just C. =over 4 @@ -54,7 +56,8 @@ from your Perl programs with C You can use Andreas Koenig's CPAN module -( http://www.perl.com/CPAN/modules/by-module/CPAN ) +(which comes standard with Perl, or can itself be downloaded +from http://www.perl.com/CPAN/modules/by-module/CPAN) to automate the following steps, from DECOMPRESS through INSTALL. A. DECOMPRESS @@ -85,12 +88,12 @@ While still in that directory, type: make install -Make sure you have the appropriate permissions to install the module +Make sure you have appropriate permissions to install the module in your Perl 5 library directory. Often, you'll need to be root. That's all you need to do on Unix systems with dynamic linking. -Most Unix systems have dynamic linking -- if yours doesn't, or if for -another reason you have a statically-linked perl, B the +Most Unix systems have dynamic linking--if yours doesn't, or if for +another reason you have a statically-linked perl, I the module requires compilation, you'll need to build a new Perl binary that includes the module. Again, you'll probably need to be root. @@ -100,7 +103,7 @@ B A. DECOMPRESS -You can use the shareware Winzip ( http://www.winzip.com ) to +You can use the shareware B program ( http://www.winzip.com ) to decompress and unpack modules. B. UNPACK @@ -112,7 +115,7 @@ If you used WinZip, this was already done for you. Does the module require compilation (i.e. does it have files that end in .xs, .c, .h, .y, .cc, .cxx, or .C)? If it does, you're on your own. You can try compiling it yourself if you have a C compiler. -If you're successful, consider uploading the resulting binary to the +If you're successful, consider uploading the resulting binary to CPAN for others to use. If it doesn't, go to INSTALL. D. INSTALL @@ -129,11 +132,11 @@ B or C<.zip>. Windows browsers sometimes +F<.tar.gz> or F<.zip>. Windows browsers sometimes download C<.tar.gz> files as C<_tar.tar>, because early versions of Windows prohibited more than one dot in a filename. -You can use the shareware WinZip ( http://www.winzip.com ) to +You can use the shareware B program ( http://www.winzip.com ) to decompress and unpack modules. Or, you can use InfoZip's C utility ( @@ -151,7 +154,7 @@ UNPACK your module as well. B. UNPACK -All of the methods in DECOMPRESS will have done this for you. +The methods in DECOMPRESS will have done this for you. C. BUILD @@ -185,18 +188,18 @@ Specificly the "Commpress & Translate" listing ( http://hyperarchive.lcs.mit.edu/HyperArchive/Abstracts/cmp/HyperArchive.html ). -You can either use the shareware StuffIt Expander +You can either use the shareware B program ( http://hyperarchive.lcs.mit.edu/HyperArchive/Archive/cmp/stuffit-expander-401.hqx ) in combination with I ( http://hyperarchive.lcs.mit.edu/HyperArchive/Archive/cmp/drop-stuff-with-ee-40.hqx ) -or the freeware MacGzip ( +or the freeware B program ( http://persephone.cps.unizar.es/general/gente/spd/gzip/gzip.html ). B. UNPACK If you're using DropStuff or Stuffit, you can just extract the tar -archive. Otherwise, you can use the freeware I +archive. Otherwise, you can use the freeware B ( http://hyperarchive.lcs.mit.edu/HyperArchive/Archive/cmp/suntar-221.hqx ) or I ( http://hyperarchive.lcs.mit.edu/HyperArchive/Archive/cmp/tar-40b.hqx ). @@ -208,9 +211,9 @@ Does the module require compilation? Overview: You need MPW and a combination of new and old CodeWarrior compilers for MPW and libraries. Makefiles created for building under -MPW use the Metrowerks compilers. It's most likely possible to build +MPW use Metrowerks compilers. It's most likely possible to build without other compilers, but it has not been done successfully, to our -knowledge. Read the documentation in MacPerl: Power and Ease ( +knowledge. Read the documentation in I ( http://www.ptf.com/macperl/ ) on porting/building extensions, or find an existing precompiled binary, or hire someone to build it for you. @@ -226,9 +229,10 @@ Make sure the newlines for the modules are in Mac format, not Unix format. If they are not then you might have decompressed them incorrectly. Check your decompression and unpacking utilities settings to make sure they are translating text files properly. -As a last resort, you can use the perl one-liner: - perl -i.bak -pe 's/(?:\015)?\012/\015/g' filenames +As a last resort, you can use the perl one-liner: + + perl -i.bak -pe 's/(?:\015)?\012/\015/g' on the source files. @@ -275,7 +279,7 @@ Go into the newly-created directory and type: make make test -You will need the packages mentioned in C +You will need the packages mentioned in F in the Perl distribution. D. INSTALL @@ -284,7 +288,7 @@ While still in that directory, type: make install -You will need the packages mentioned in Readme.dos in the Perl distribution. +You will need the packages mentioned in F in the Perl distribution. =item * @@ -298,8 +302,8 @@ the instructions for Unix. B -When downloading from CPAN, save your file with a C<.tgz> -extension instead of C<.tar.gz>. All other periods in the +When downloading from CPAN, save your file with a F<.tgz> +extension instead of F<.tar.gz>. All other periods in the filename should be replaced with underscores. For example, C should be downloaded as C. @@ -361,7 +365,7 @@ Substitute C for C above if you're using MMK. B, -Introduce the .tar.gz file into an HFS as binary; don't translate from +Introduce the F<.tar.gz> file into an HFS as binary; don't translate from ASCII to EBCDIC. A. DECOMPRESS diff --git a/pod/perlmodlib.pod b/pod/perlmodlib.pod index 2dc38df..4cee455 100644 --- a/pod/perlmodlib.pod +++ b/pod/perlmodlib.pod @@ -6,54 +6,76 @@ perlmodlib - constructing new Perl modules and finding existing ones =head1 THE PERL MODULE LIBRARY -A number of modules are included the Perl distribution. These are -described below, and all end in F<.pm>. You may also discover files in -the library directory that end in either F<.pl> or F<.ph>. These are old -libraries supplied so that old programs that use them still run. The -F<.pl> files will all eventually be converted into standard modules, and -the F<.ph> files made by B will probably end up as extension modules -made by B. (Some F<.ph> values may already be available through the -POSIX module.) The B file in the distribution may help in your -conversion, but it's just a mechanical process and therefore far from -bulletproof. +Many modules are included the Perl distribution. These are described +below, and all end in F<.pm>. You may discover compiled library +file (usually ending in F<.so>) or small pieces of modules to be +autoloaded (ending in F<.al>); these were automatically generated +by the installation process. You may also discover files in the +library directory that end in either F<.pl> or F<.ph>. These are +old libraries supplied so that old programs that use them still +run. The F<.pl> files will all eventually be converted into standard +modules, and the F<.ph> files made by B will probably end up +as extension modules made by B. (Some F<.ph> values may +already be available through the POSIX, Errno, or Fcntl modules.) +The B file in the distribution may help in your conversion, +but it's just a mechanical process and therefore far from bulletproof. =head2 Pragmatic Modules -They work somewhat like pragmas in that they tend to affect the compilation of -your program, and thus will usually work well only when used within a -C, or C. Most of these are lexically scoped, so an inner BLOCK -may countermand any of these by saying: +They work somewhat like compiler directives (pragmata) in that they +tend to affect the compilation of your program, and thus will usually +work well only when used within a C, or C. Most of these +are lexically scoped, so an inner BLOCK may countermand them +by saying: no integer; no strict 'refs'; which lasts until the end of that BLOCK. -Unlike the pragmas that effect the C<$^H> hints variable, the C and C declarations are not BLOCK-scoped. They allow -you to predeclare a variables or subroutines within a particular -I rather than just a block. Such declarations are effective -for the entire file for which they were declared. You cannot rescind -them with C or C. +Some pragmas are lexically scoped--typically those that affect the +C<$^H> hints variable. Others affect the current package instead, +like C and C, whic allow you to predeclare a +variables or subroutines within a particular I rather than +just a block. Such declarations are effective for the entire file +for which they were declared. You cannot rescind them with C or C. The following pragmas are defined (and have their own documentation). =over 12 -=item use autouse MODULE => qw(sub1 sub2 sub3) +=item attrs -Defers C until someone calls one of the specified -subroutines (which must be exported by MODULE). This pragma should be -used with caution, and only when necessary. +set/get attributes of a subroutine + +=item autouse + +postpone load of modules until a function is used + +=item base + +Establish IS-A relationship with base class at compile time =item blib -manipulate @INC at compile time to use MakeMaker's uninstalled version -of a package +Use MakeMaker's uninstalled version of a package + +=item constant + +declare constants =item diagnostics -force verbose warning diagnostics +Perl compiler pragma to force verbose warning diagnostics + +=item fields + +compile-time class fields + +=item filetest + +control the filetest permission operators =item integer @@ -61,7 +83,7 @@ compute arithmetic in integer instead of double =item less -request less of something from the compiler +perl pragma to request less of something from the compiler =item lib @@ -69,19 +91,19 @@ manipulate @INC at compile time =item locale -use or ignore current locale for builtin operations (see L) +use and avoid POSIX locales for built-in operations =item ops -restrict named opcodes when compiling or running Perl code +restrict unsafe operations when compiling =item overload -overload basic Perl operations +Package for overloading perl operations =item re -alter behaviour of regular expressions +alter regular expression behavior =item sigtrap @@ -95,14 +117,22 @@ restrict unsafe constructs predeclare sub names -=item vmsish +=item utf8 -adopt certain VMS-specific behaviors +turn on UTF-8 and Unicode support =item vars predeclare global variable names +=item vmsish + +control VMS-specific language features + +=item warning + +control optional warnings + =back =head2 Standard Modules @@ -119,27 +149,115 @@ provide framework for multiple DBMs =item AutoLoader -load functions only on demand +load subroutines only on demand =item AutoSplit split a package for autoloading +=item B + +The Perl Compiler; See also L. + +=item B::Asmdata + +Autogenerated data about Perl ops, used to generate bytecode + +=item B::Assembler + +Assemble Perl bytecode + +=item B::Bblock + +Walk basic blocks + +=item B::Bytecode + +Perl compiler's bytecode backend + +=item B::C + +Perl compiler's C backend + +=item B::CC + +Perl compiler's optimized C translation backend + +=item B::Debug + +Walk Perl syntax tree, printing debug info about ops + +=item B::Deparse + +Perl compiler backend to produce perl code + +=item B::Disassembler + +Disassemble Perl bytecode + +=item B::Lint + +Perl lint + +=item B::Showlex + +Show lexical variables used in functions or files + +=item B::Stackobj + +Helper module for CC backend + +=item B::Terse + +Walk Perl syntax tree, printing terse info about ops + +=item B::Xref + +Generates cross reference reports for Perl programs + =item Benchmark benchmark running times of code +=item CGI + +Simple Common Gateway Interface Class + +=item CGI::Apache + +Make things work with CGI.pm against Perl-Apache API + +=item CGI::Carp + +CGI routines for writing to the HTTPD (or other) error log + +=item CGI::Cookie + +Interface to Netscape Cookies + +=item CGI::Fast + +CGI Interface for Fast CGI + +=item CGI::Push + +Simple Interface to Server Push + +=item CGI::Switch + +Try more than one constructors and return the first object available + =item CPAN -interface to Comprehensive Perl Archive Network +query, download and build perl modules from CPAN sites =item CPAN::FirstTime -create a CPAN configuration file +Utility for CPAN::Config file Initialization =item CPAN::Nox -run CPAN while avoiding compiled extensions +Wrapper around CPAN.pm without using any XS module =item Carp @@ -147,7 +265,7 @@ warn of errors (from perspective of caller) =item Class::Struct -declare struct-like datatypes +declare struct-like datatypes as Perl classes =item Config @@ -157,13 +275,21 @@ access Perl configuration information get pathname of current working directory +=item DB + +programmatic interface to the Perl debugging API + =item DB_File -access to Berkeley DB +Perl5 access to Berkeley DB version 1.x + +=item Data::Dumper + +stringified perl data structures, suitable for both printing and C =item Devel::Peek -data debugging tool for the XS programmer +A data debugging tool for the XS programmer =item Devel::SelfStubber @@ -173,9 +299,13 @@ generate stubs for a SelfLoading module supply object methods for directory handles +=item Dumpvalue + +provides screen dump of Perl data. + =item DynaLoader -dynamically load C libraries into Perl code +Dynamically load C libraries into Perl code =item English @@ -183,27 +313,39 @@ use nice English (or awk) names for ugly punctuation variables =item Env -import environment variables +perl module that imports environment variables + +=item Errno + +System errno constants =item Exporter -implements default import method for modules +Implements default import method for modules + +=item ExtUtils::Command + +utilities to replace common UNIX commands in Makefiles etc. =item ExtUtils::Embed -utilities for embedding Perl in C/C++ applications +Utilities for embedding Perl in C/C++ applications =item ExtUtils::Install install files from here to there +=item ExtUtils::Installed + +Inventory management of installed modules + =item ExtUtils::Liblist determine libraries to use and how to use them =item ExtUtils::MM_OS2 -methods to override Unix behaviour in ExtUtils::MakeMaker +methods to override UN*X behavior in ExtUtils::MakeMaker =item ExtUtils::MM_Unix @@ -211,7 +353,11 @@ methods used by ExtUtils::MakeMaker =item ExtUtils::MM_VMS -methods to override Unix behaviour in ExtUtils::MakeMaker +methods to override UN*X behavior in ExtUtils::MakeMaker + +=item ExtUtils::MM_Win32 + +methods to override UN*X behavior in ExtUtils::MakeMaker =item ExtUtils::MakeMaker @@ -221,6 +367,10 @@ create an extension Makefile utilities to write and check a MANIFEST file +=item ExtUtils::Miniperl + +write the C code for perlmain.c + =item ExtUtils::Mkbootstrap make a bootstrap file for use by DynaLoader @@ -229,13 +379,17 @@ make a bootstrap file for use by DynaLoader write linker options files for dynamic extension +=item ExtUtils::Packlist + +manage .packlist files + =item ExtUtils::testlib add blib/* directories to @INC =item Fatal -make errors in builtins or Perl functions fatal +replace functions with equivalents which succeed or die =item Fcntl @@ -245,17 +399,17 @@ load the C Fcntl.h defines split a pathname into pieces -=item File::CheckTree - -run many filetest checks on a tree - =item File::Compare -compare files or filehandles +Compare files or filehandles =item File::Copy -copy files or filehandles +Copy files or filehandles + +=item File::DosGlob + +DOS like globbing and then some =item File::Find @@ -271,11 +425,31 @@ portably perform operations on file names =item File::Spec::Functions -function call interface to File::Spec module +portably perform operations on file names + +=item File::Spec::Mac + +File::Spec for MacOS + +=item File::Spec::OS2 + +methods for OS/2 file specs + +=item File::Spec::Unix + +methods used by File::Spec + +=item File::Spec::VMS + +methods for VMS file specs + +=item File::Spec::Win32 + +methods for Win32 file specs =item File::stat -by-name interface to Perl's builtin stat() functions +by-name interface to Perl's built-in stat() functions =item FileCache @@ -287,11 +461,11 @@ supply object methods for filehandles =item FindBin -locate directory of original Perl script +Locate directory of original perl script =item GDBM_File -access to the gdbm library +Perl5 access to the gdbm library. =item Getopt::Long @@ -299,7 +473,7 @@ extended processing of command line options =item Getopt::Std -process single-character switches with switch clustering +Process single-character switches with switch clustering =item I18N::Collate @@ -309,6 +483,10 @@ compare 8-bit scalar data according to the current locale load various IO modules +=item IO::Dir + +supply object methods for directory handles + =item IO::File supply object methods for filehandles @@ -321,6 +499,10 @@ supply object methods for I/O handles supply object methods for pipes +=item IO::Poll + +Object interface to system poll call + =item IO::Seekable supply seek based methods for I/O objects @@ -331,7 +513,19 @@ OO interface to the select system call =item IO::Socket -object interface to socket communications +Object interface to socket communications + +=item IO::Socket::INET + +Object interface for AF_INET domain sockets + +=item IO::Socket::UNIX + +Object interface for AF_UNIX domain sockets + +=item IPC::Msg + +SysV Msg IPC object class =item IPC::Open2 @@ -341,13 +535,21 @@ open a process for both reading and writing open a process for reading, writing, and error handling +=item IPC::Semaphore + +SysV Semaphore IPC object class + +=item IPC::SysV + +SysV IPC constants + =item Math::BigFloat -arbitrary length float math package +Arbitrary length float math package =item Math::BigInt -arbitrary size integer math package +Arbitrary size integer math package =item Math::Complex @@ -355,52 +557,59 @@ complex numbers and associated mathematical functions =item Math::Trig -simple interface to parts of Math::Complex for those who -need trigonometric functions only for real numbers +trigonometric functions =item NDBM_File -tied access to ndbm files +Tied access to ndbm files =item Net::Ping -Hello, anybody home? +check a remote host for reachability =item Net::hostent -by-name interface to Perl's builtin gethost*() functions +by-name interface to Perl's built-in gethost*() functions =item Net::netent -by-name interface to Perl's builtin getnet*() functions +by-name interface to Perl's built-in getnet*() functions =item Net::protoent -by-name interface to Perl's builtin getproto*() functions +by-name interface to Perl's built-in getproto*() functions =item Net::servent -by-name interface to Perl's builtin getserv*() functions +by-name interface to Perl's built-in getserv*() functions -=item Opcode +=item O -disable named opcodes when compiling or running Perl code +Generic interface to Perl Compiler backends -=item Pod::Text +=item Opcode -convert POD data to formatted ASCII text +Disable named opcodes when compiling perl code =item POSIX -interface to IEEE Standard 1003.1 +Perl interface to IEEE Std 1003.1 + +=item Pod::Html + +module to convert pod files to HTML + +=item Pod::Text + +convert POD data to formatted ASCII text =item SDBM_File -tied access to sdbm files +Tied access to sdbm files =item Safe -compile and execute code in restricted compartments +Compile and execute code in restricted compartments =item Search::Dict @@ -416,7 +625,7 @@ load functions only on demand =item Shell -run shell commands transparently within Perl +run shell commands transparently within perl =item Socket @@ -428,27 +637,31 @@ manipulate Perl symbols and their names =item Sys::Hostname -try every conceivable way to get hostname +Try every conceivable way to get hostname =item Sys::Syslog -interface to the Unix syslog(3) calls +Perl interface to the UNIX syslog(3) calls =item Term::Cap -termcap interface +Perl termcap interface =item Term::Complete -word completion module +Perl word completion module =item Term::ReadLine -interface to various C packages +Perl interface to various C packages. + +=item Test + +provides a simple framework for writing test scripts =item Test::Harness -run Perl standard test scripts with statistics +run perl standard test scripts with statistics =item Text::Abbrev @@ -456,35 +669,61 @@ create an abbreviation table from a list =item Text::ParseWords -parse text into an array of tokens +parse text into an array of tokens or array of arrays =item Text::Soundex -implementation of the Soundex Algorithm as described by Knuth - -=item Text::Tabs +Implementation of the Soundex Algorithm as Described by Knuth -expand and unexpand tabs per the Unix expand(1) and unexpand(1) +=item Text::Tabs -- expand and unexpand tabs per the unix expand(1) and unexpand(1) =item Text::Wrap line wrapping to form simple paragraphs -=item Tie::Hash +=item Thread + +multithreading + +=item Thread::Queue + +thread-safe queues + +=item Thread::Semaphore + +thread-safe semaphores + +=item Thread::Signal + +Start a thread which runs signal handlers reliably + +=item Thread::Specific + +thread-specific keys + +=item Tie::Array + +base class for tied arrays + +=item Tie::Handle + +base class definitions for tied handles + +=item Tie::Hash, Tie::StdHash base class definitions for tied hashes =item Tie::RefHash -base class definitions for tied hashes with references as keys +use references as hash keys -=item Tie::Scalar +=item Tie::Scalar, Tie::StdScalar base class definitions for tied scalars =item Tie::SubstrHash -fixed-table-size, fixed-key-length hashing +Fixed-table-size, fixed-key-length hashing =item Time::Local @@ -492,11 +731,11 @@ efficiently compute time from local and GMT time =item Time::gmtime -by-name interface to Perl's builtin gmtime() function +by-name interface to Perl's built-in gmtime() function =item Time::localtime -by-name interface to Perl's builtin localtime() function +by-name interface to Perl's built-in localtime() function =item Time::tm @@ -508,42 +747,54 @@ base class for ALL classes (blessed references) =item User::grent -by-name interface to Perl's builtin getgr*() functions +by-name interface to Perl's built-in getgr*() functions =item User::pwent -by-name interface to Perl's builtin getpw*() functions +by-name interface to Perl's built-in getpw*() functions =back -To find out I the modules installed on your system, including -those without documentation or outside the standard release, do this: +To find out I modules installed on your system, including +those without documentation or outside the standard release, +jus tdo this: % find `perl -e 'print "@INC"'` -name '*.pm' -print -They should all have their own documentation installed and accessible via -your system man(1) command. If that fails, try the I program. +They should all have their own documentation installed and accessible +via your system man(1) command. If you do not have a B +program, you can use the Perl B program instead, which +generates Perl code as output you can run through perl. If you +have a B program but it doesn't find your modules, you'll have +to fix your manpath. See L for details. If you have no +system B command, you might try the B program. =head2 Extension Modules -Extension modules are written in C (or a mix of Perl and C) and may be -statically linked or in general are -dynamically loaded into Perl if and when you need them. Supported -extension modules include the Socket, Fcntl, and POSIX modules. +Extension modules are written in C (or a mix of Perl and C). They +are usually dynamically loaded into Perl if and when you need them, +but may also be be linked in statically. Supported extension modules +include Socket, Fcntl, and POSIX. Many popular C extension modules do not come bundled (at least, not -completely) due to their sizes, volatility, or simply lack of time for -adequate testing and configuration across the multitude of platforms on -which Perl was beta-tested. You are encouraged to look for them in -archie(1L), the Perl FAQ or Meta-FAQ, the WWW page, and even with their -authors before randomly posting asking for their present condition and -disposition. +completely) due to their sizes, volatility, or simply lack of time +for adequate testing and configuration across the multitude of +platforms on which Perl was beta-tested. You are encouraged to +look for them on CPAN (described below), or using web search engines +like Alta Vista or Deja News. =head1 CPAN -CPAN stands for the Comprehensive Perl Archive Network. This is a globally -replicated collection of all known Perl materials, including hundreds -of unbundled modules. Here are the major categories of modules: +CPAN stands for Comprehensive Perl Archive Network; it's a globally +replicated trove of Perl materials, including documentation, style +guides, tricks and trap, alternate ports to non-Unix systems and +occasional binary distributions for these. Search engines for +CPAN can be found at http://cpan.perl.com/ and at +http://theory.uwinnipeg.ca/mod_perl/cpan-search.pl . + +Most importantly, CPAN includes around a thousand unbundled modules, +some of which require a C compiler to build. Major categories of +modules are: =over @@ -612,21 +863,18 @@ Miscellaneous Modules =back -The registered CPAN sites as of this writing include the following. +Registered CPAN sites as of this writing include the following. You should try to choose one close to you: =over -=item * -Africa +=item Africa South Africa ftp://ftp.is.co.za/programming/perl/CPAN/ ftp://ftpza.co.za/pub/mirrors/cpan/ -=item * -Asia +=item Asia - Armenia ftp://sunsite.aua.am/pub/CPAN/ China ftp://freesoft.cei.gov.cn/pub/languages/perl/CPAN/ Hong Kong ftp://ftp.hkstar.com/pub/CPAN/ Israel ftp://bioinfo.weizmann.ac.il/pub/software/perl/CPAN/ @@ -634,6 +882,7 @@ Asia ftp://ftp.jaist.ac.jp/pub/lang/perl/CPAN/ ftp://ftp.lab.kdd.co.jp/lang/perl/CPAN/ ftp://ftp.meisei-u.ac.jp/pub/CPAN/ + ftp://ftp.ring.gr.jp/pub/lang/perl/CPAN/ ftp://mirror.nucba.ac.jp/mirror/Perl/ Singapore ftp://ftp.nus.edu.sg/pub/unix/perl/CPAN/ South Korea ftp://ftp.bora.net/pub/CPAN/ @@ -643,8 +892,7 @@ Asia Thailand ftp://ftp.cs.riubon.ac.th/pub/mirrors/CPAN/ ftp://ftp.nectec.or.th/pub/mirrors/CPAN/ -=item * -Australasia +=item Australasia Australia ftp://cpan.topend.com.au/pub/CPAN/ ftp://ftp.labyrinth.net.au/pub/perl/CPAN/ @@ -653,13 +901,11 @@ Australasia New Zealand ftp://ftp.auckland.ac.nz/pub/perl/CPAN/ ftp://sunsite.net.nz/pub/languages/perl/CPAN/ -=item * Central America Costa Rica ftp://ftp.ucr.ac.cr/pub/Unix/CPAN/ -=item * -Europe +=item Europe Austria ftp://ftp.tuwien.ac.at/pub/languages/perl/CPAN/ Belgium ftp://ftp.kulnet.kuleuven.ac.be/pub/mirror/CPAN/ @@ -686,8 +932,10 @@ Europe Ireland ftp://sunsite.compapp.dcu.ie/pub/perl/ Italy ftp://cis.uniRoma2.it/CPAN/ ftp://ftp.flashnet.it/pub/CPAN/ + ftp://ftp.unina.it/pub/Other/CPAN/ ftp://ftp.unipi.it/pub/mirror/perl/CPAN/ Netherlands ftp://ftp.cs.uu.nl/mirror/CPAN/ + ftp://ftp.EU.net/packages/cpan/ ftp://ftp.nluug.nl/pub/languages/perl/CPAN/ Norway ftp://ftp.uit.no/pub/languages/perl/cpan/ ftp://sunsite.uio.no/pub/languages/perl/CPAN/ @@ -696,10 +944,11 @@ Europe ftp://ftp.pk.edu.pl/pub/lang/perl/CPAN/ ftp://sunsite.icm.edu.pl/pub/CPAN/ Portugal ftp://ftp.ci.uminho.pt/pub/mirrors/cpan/ + ftp://ftp.ist.utl.pt/pub/CPAN/ ftp://ftp.ua.pt/pub/CPAN/ Romania ftp://ftp.dntis.ro/pub/mirrors/perl-cpan/ ftp://ftp.dnttm.ro/pub/CPAN/ - Russia ftp://cpan.npi.msu.su/CPAN/ + Russia ftp://ftp.chg.ru/pub/lang/perl/CPAN/ ftp://ftp.sai.msu.su/pub/lang/perl/CPAN/ Slovakia ftp://ftp.entry.sk/pub/languages/perl/CPAN/ Slovenia ftp://ftp.arnes.si/software/perl/CPAN/ @@ -714,11 +963,11 @@ Europe ftp://sunsite.doc.ic.ac.uk/packages/CPAN/ ftp://unix.hensa.ac.uk/mirrors/perl-CPAN/ -=item * -North America +=item North America Alberta ftp://sunsite.ualberta.ca/pub/Mirror/CPAN/ - California ftp://ftp.cdrom.com/pub/perl/CPAN/ + California ftp://cpan.nas.nasa.gov/pub/perl/CPAN/ + ftp://ftp.cdrom.com/pub/perl/CPAN/ ftp://ftp.digital.com/pub/plan/perl/CPAN/ Colorado ftp://ftp.cs.colorado.edu/pub/perl/CPAN/ Florida ftp://ftp.cise.ufl.edu/pub/perl/CPAN/ @@ -728,30 +977,30 @@ North America Manitoba ftp://theory.uwinnipeg.ca/pub/CPAN/ Massachusetts ftp://ftp.ccs.neu.edu/net/mirrors/ftp.funet.fi/pub/languages/perl/CPAN/ ftp://ftp.iguide.com/pub/mirrors/packages/perl/CPAN/ - Mexico D.F. ftp://ftp.msg.com.mx/pub/CPAN/ + Mexico ftp://ftp.msg.com.mx/pub/CPAN/ + Minnesota ftp://ftp.midearthbbs.com/CPAN/ New York ftp://ftp.rge.com/pub/languages/perl/ North Carolina ftp://ftp.duke.edu/pub/perl/ Oklahoma ftp://ftp.ou.edu/mirrors/CPAN/ - Ontario ftp://ftp.crc.ca/pub/packages/perl/CPAN/ + Ontario ftp://ftp.crc.ca/pub/packages/lang/perl/CPAN/ Oregon ftp://ftp.orst.edu/pub/packages/CPAN/ Pennsylvania ftp://ftp.epix.net/pub/languages/perl/ Texas ftp://ftp.sedl.org/pub/mirrors/CPAN/ Utah ftp://mirror.xmission.com/CPAN/ Virginia ftp://ftp.perl.org/pub/perl/CPAN/ ftp://ruff.cs.jmu.edu/pub/CPAN/ - Washington ftp://ftp.spu.edu/pub/CPAN/ + Washington ftp://ftp-mirror.internap.com/pub/CPAN/ + ftp://ftp.spu.edu/pub/CPAN/ -=item * -South America +=item South America Brazil ftp://cpan.if.usp.br/pub/mirror/CPAN/ - Chile ftp://ftp.ing.puc.cl/pub/unix/perl/CPAN/ - ftp://sunsite.dcc.uchile.cl/pub/Lang/perl/CPAN/ + Chile ftp://sunsite.dcc.uchile.cl/pub/Lang/perl/CPAN/ =back For an up-to-date listing of CPAN sites, -see F or F. +see http://www.perl.com/perl/CPAN or ftp://www.perl.com/perl/ . =head1 Modules: Creation, Use, and Abuse @@ -795,6 +1044,8 @@ scheme as the original author. =item Try to design the new module to be easy to extend and reuse. +Always use B<-w>. + Use blessed references. Use the two argument form of bless to bless into the class name given as the first parameter of the constructor, e.g.,: @@ -819,7 +1070,7 @@ appropriate. Split large methods into smaller more flexible ones. Inherit methods from other modules if appropriate. Avoid class name tests like: C. -Generally you can delete the "C" part with no harm at all. +Generally you can delete the C part with no harm at all. Let the objects look after themselves! Generally, avoid hard-wired class names as far as possible. @@ -833,7 +1084,7 @@ the module after __END__ either using AutoSplit or by saying: eval join('',) || die $@ unless caller(); Does your module pass the 'empty subclass' test? If you say -"C<@SUBCLASS::ISA = qw(YOURCLASS);>" your applications should be able +C<@SUBCLASS::ISA = qw(YOURCLASS);> your applications should be able to use SUBCLASS in exactly the same way as YOURCLASS. For example, does your application still work if you change: C<$obj = new YOURCLASS;> into: C<$obj = new SUBCLASS;> ? @@ -842,11 +1093,18 @@ Avoid keeping any state information in your packages. It makes it difficult for multiple other packages to use yours. Keep state information in objects. -Always use B<-w>. Try to C (or C). +Always use B<-w>. + +Try to C (or C). Remember that you can add C to individual blocks -of code that need less strictness. Always use B<-w>. Always use B<-w>! +of code that need less strictness. + +Always use B<-w>. + Follow the guidelines in the perlstyle(1) manual. +Always use B<-w>. + =item Some simple style guidelines The perlstyle manual supplied with Perl has many helpful points. @@ -1016,7 +1274,7 @@ should store your module's version number in a non-my package variable called $VERSION. This should be a floating point number with at least two digits after the decimal (i.e., hundredths, e.g, C<$VERSION = "0.01">). Don't use a "1.3.2" style version. -See Exporter.pm in Perl5.001m or later for details. +See L for details. It may be handy to add a function or method to retrieve the number. Use the number in announcements and archive file names when @@ -1030,7 +1288,7 @@ module (or the module itself if small) to the comp.lang.perl.announce Usenet newsgroup. This will at least ensure very wide once-off distribution. -If possible you should place the module into a major ftp archive and +If possible, register the module with CPAN. You should include details of its location in your announcement. Some notes about ftp archives: Please use a long descriptive file @@ -1065,7 +1323,7 @@ Please remember to send me an updated entry for the Module list! Always strive to remain compatible with previous released versions. Otherwise try to add a mechanism to revert to the -old behaviour if people rely on it. Document incompatible changes. +old behavior if people rely on it. Document incompatible changes. =back @@ -1091,8 +1349,8 @@ it worth it unless you plan to make other changes at the same time? =item Make the most of the opportunity. If you are going to convert the script to a module you can use the -opportunity to redesign the interface. The 'Guidelines for Module -Creation' above include many of the issues you should consider. +opportunity to redesign the interface. The guidelines for module +creation above include many of the issues you should consider. =item The pl2pm utility will get you started. diff --git a/pod/perlobj.pod b/pod/perlobj.pod index a997ae0..137896f 100644 --- a/pod/perlobj.pod +++ b/pod/perlobj.pod @@ -7,7 +7,7 @@ perlobj - Perl objects First of all, you need to understand what references are in Perl. See L for that. Second, if you still find the following reference work too complicated, a tutorial on object-oriented programming -in Perl can be found in L. +in Perl can be found in L and L. If you're still with us, then here are three very simple definitions that you should find reassuring. @@ -115,12 +115,13 @@ reference as an ordinary reference. Outside the class package, the reference is generally treated as an opaque value that may be accessed only through the class's methods. -A constructor may re-bless a referenced object currently belonging to -another class, but then the new class is responsible for all cleanup -later. The previous blessing is forgotten, as an object may belong -to only one class at a time. (Although of course it's free to -inherit methods from many classes.) If you find yourself having to -do this, the parent class is probably misbehaving, though. +Although a a constructor can in theory re-bless a referenced object +currently belonging to another class, this is almost certainly going +to get you into trouble. The new class is responsible for all +cleanup later. The previous blessing is forgotten, as an object +may belong to only one class at a time. (Although of course it's +free to inherit methods from many classes.) If you find yourself +having to do this, the parent class is probably misbehaving, though. A clarification: Perl objects are blessed. References are not. Objects know which package they belong to. References do not. The bless() @@ -186,16 +187,16 @@ is to prepend your fieldname in the hash with the package name. Unlike say C++, Perl doesn't provide any special syntax for method definition. (It does provide a little syntax for method invocation though. More on that later.) A method expects its first argument -to be the object (reference) or package (string) it is being invoked on. There are just two -types of methods, which we'll call class and instance. -(Sometimes you'll hear these called static and virtual, in honor of -the two C++ method types they most closely resemble.) +to be the object (reference) or package (string) it is being invoked +on. There are two ways of calling methods, which we'll call class +methods and instance methods. A class method expects a class name as the first argument. It -provides functionality for the class as a whole, not for any individual -object belonging to the class. Constructors are typically class -methods. Many class methods simply ignore their first argument, because -they already know what package they're in, and don't care what package +provides functionality for the class as a whole, not for any +individual object belonging to the class. Constructors are often +class methods, but see L and L for alternatives. +Many class methods simply ignore their first argument, because they +already know what package they're in and don't care what package they were invoked via. (These aren't necessarily the same, because class methods follow the inheritance tree just like ordinary instance methods.) Another typical use for class methods is to look up an @@ -310,10 +311,59 @@ class. Sometimes you want to call a method when you don't know the method name ahead of time. You can use the arrow form, replacing the method name -with a simple scalar variable containing the method name: +with a simple scalar variable containing the method name or a +reference to the function. $method = $fast ? "findfirst" : "findbest"; - $fred->$method(@args); + $fred->$method(@args); # call by name + + if ($coderef = $fred->can($parent . "::findbest")) { + $self->$coderef(@args); # call by coderef + } + +=head2 WARNING + +While indirect object syntax may well be appealing to English speakers and +to C++ programmers, be not seduced! It suffers from two grave problems. + +The first problem is that an indirect object is limited to a name, +a scalar variable, or a block, because it would have to do too much +lookahead otherwise, just like any other postfix dereference in the +language. (These are the same quirky rules as are used for the filehandle +slot in functions like C and C.) This can lead to horribly +confusing precedence problems, as in these next two lines: + + move $obj->{FIELD}; # probably wrong! + move $ary[$i]; # probably wrong! + +Those actually parse as the very surprising: + + $obj->move->{FIELD}; # Well, lookee here + $ary->move->[$i]; # Didn't expect this one, eh? + +Rather than what you might have expected: + + $obj->{FIELD}->move(); # You should be so lucky. + $ary[$i]->move; # Yeah, sure. + +The left side of ``-E'' is not so limited, because it's an infix operator, +not a postfix operator. + +As if that weren't bad enough, think about this: Perl must guess I whether C and C above are functions or methods. +Usually Perl gets it right, but when it doesn't it, you get a function +call compiled as a method, or vice versa. This can introduce subtle +bugs that are hard to unravel. For example, calling a method C +in indirect notation--as C++ programmers are so wont to do--can +be miscompiled into a subroutine call if there's already a C +function in scope. You'd end up calling the current package's C +as a subroutine, rather than the desired class's method. The compiler +tries to cheat by remembering bareword Cs, but the grief if it +messes up just isn't worth the years of debugging it would likely take +you to to track such subtle bugs down. + +The infix arrow notation using ``C<-E>'' doesn't suffer from either +of these disturbing ambiguities, so we recommend you use it exclusively. =head2 Default UNIVERSAL methods @@ -391,50 +441,6 @@ one are destroyed. Such objects will be freed and destroyed automatically when the current object is freed, provided no other references to them exist elsewhere. -=head2 WARNING - -While indirect object syntax may well be appealing to English speakers and -to C++ programmers, be not seduced! It suffers from two grave problems. - -The first problem is that an indirect object is limited to a name, -a scalar variable, or a block, because it would have to do too much -lookahead otherwise, just like any other postfix dereference in the -language. (These are the same quirky rules as are used for the filehandle -slot in functions like C and C.) This can lead to horribly -confusing precedence problems, as in these next two lines: - - move $obj->{FIELD}; # probably wrong! - move $ary[$i]; # probably wrong! - -Those actually parse as the very surprising: - - $obj->move->{FIELD}; # Well, lookee here - $ary->move->[$i]; # Didn't expect this one, eh? - -Rather than what you might have expected: - - $obj->{FIELD}->move(); # You should be so lucky. - $ary[$i]->move; # Yeah, sure. - -The left side of ``-E'' is not so limited, because it's an infix operator, -not a postfix operator. - -As if that weren't bad enough, think about this: Perl must guess I whether C and C above are functions or methods. -Usually Perl gets it right, but when it doesn't it, you get a function -call compiled as a method, or vice versa. This can introduce subtle -bugs that are hard to unravel. For example, calling a method C -in indirect notation--as C++ programmers are so wont to do--can -be miscompiled into a subroutine call if there's already a C -function in scope. You'd end up calling the current package's C -as a subroutine, rather than the desired class's method. The compiler -tries to cheat by remembering bareword Cs, but the grief if it -messes up just isn't worth the years of debugging it would likely take -you to to track such subtle bugs down. - -The infix arrow notation using ``C<-E>'' doesn't suffer from either -of these disturbing ambiguities, so we recommend you use it exclusively. - =head2 Summary That's about all there is to it. Now you need just to go off and buy a @@ -547,8 +553,8 @@ breaks the circularities in the self-referential structure. =head1 SEE ALSO -A kinder, gentler tutorial on object-oriented programming in Perl can -be found in L. -You should also check out L for other object tricks, traps, and tips, -as well as L for some style guides on constructing both modules +A kinder, gentler tutorial on object-oriented programming in Perl +can be found in L and L. You should also check +out L for other object tricks, traps, and tips, as well +as L for some style guides on constructing both modules and classes. diff --git a/pod/perlop.pod b/pod/perlop.pod index 106b9a9..0f8117c 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -5,11 +5,11 @@ perlop - Perl operators and precedence =head1 SYNOPSIS Perl operators have the following associativity and precedence, -listed from highest precedence to lowest. Note that all operators -borrowed from C keep the same precedence relationship with each other, -even where C's precedence is slightly screwy. (This makes learning -Perl easier for C folks.) With very few exceptions, these all -operate on scalar values only, not array values. +listed from highest precedence to lowest. Operators borrowed from +C keep the same precedence relationship with each other, even where +C's precedence is slightly screwy. (This makes learning Perl easier +for C folks.) With very few exceptions, these all operate on scalar +values only, not array values. left terms and list operators (leftward) left -> @@ -64,11 +64,11 @@ For example, in @ary = (1, 3, sort 4, 2); print @ary; # prints 1324 -the commas on the right of the sort are evaluated before the sort, but -the commas on the left are evaluated after. In other words, list -operators tend to gobble up all the arguments that follow them, and +the commas on the right of the sort are evaluated before the sort, +but the commas on the left are evaluated after. In other words, +list operators tend to gobble up all arguments that follow, and then act like a simple TERM with regard to the preceding expression. -Note that you have to be careful with parentheses: +Be careful with parentheses: # These evaluate exit before doing the print: print($foo, exit); # Obviously not what you want. @@ -95,16 +95,18 @@ as well as L<"I/O Operators">. =head2 The Arrow Operator -Just as in C and C++, "C<-E>" is an infix dereference operator. If the -right side is either a C<[...]> or C<{...}> subscript, then the left side -must be either a hard or symbolic reference to an array or hash (or -a location capable of holding a hard reference, if it's an lvalue (assignable)). -See L. +"C<-E>" is an infix dereference operator, just as it is in C +and C++. If the right side is either a C<[...]>, C<{...}>, or a +C<(...)> subscript, then the left side must be either a hard or +symbolic reference to an array, a hash, or a subroutine respectively. +(Or technically speaking, a location capable of holding a hard +reference, if it's an array or hash reference being used for +assignment.) See L and L. -Otherwise, the right side is a method name or a simple scalar variable -containing the method name, and the left side must either be an object -(a blessed reference) or a class name (that is, a package name). -See L. +Otherwise, the right side is a method name or a simple scalar +variable containing either the method name or a subroutine reference, +and the left side must be either an object (a blessed reference) +or a class name (that is, a package name). See L. =head2 Auto-increment and Auto-decrement @@ -129,7 +131,7 @@ The auto-decrement operator is not magical. =head2 Exponentiation -Binary "**" is the exponentiation operator. Note that it binds even more +Binary "**" is the exponentiation operator. It binds even more tightly than unary minus, so -2**4 is -(2**4), not (-2)**4. (This is implemented using C's pow(3) function, which actually works on doubles internally.) @@ -155,10 +157,10 @@ syntactically for separating a function name from a parenthesized expression that would otherwise be interpreted as the complete list of function arguments. (See examples above under L.) -Unary "\" creates a reference to whatever follows it. See L. -Do not confuse this behavior with the behavior of backslash within a -string, although both forms do convey the notion of protecting the next -thing from interpretation. +Unary "\" creates a reference to whatever follows it. See L +and L. Do not confuse this behavior with the behavior of +backslash within a string, although both forms do convey the notion +of protecting the next thing from interpolation. =head2 Binding Operators @@ -384,23 +386,26 @@ of B, B, and various editors. Each ".." operator maintains its own boolean state. It is false as long as its left operand is false. Once the left operand is true, the range operator stays true until the right operand is true, I which the range operator becomes false -again. (It doesn't become false till the next time the range operator is +again. It doesn't become false till the next time the range operator is evaluated. It can test the right operand and become false on the same evaluation it became true (as in B), but it still returns true once. -If you don't want it to test the right operand till the next evaluation -(as in B), use three dots ("...") instead of two.) The right -operand is not evaluated while the operator is in the "false" state, and -the left operand is not evaluated while the operator is in the "true" -state. The precedence is a little lower than || and &&. The value -returned is either the empty string for false, or a sequence number -(beginning with 1) for true. The sequence number is reset for each range -encountered. The final sequence number in a range has the string "E0" -appended to it, which doesn't affect its numeric value, but gives you -something to search for if you want to exclude the endpoint. You can -exclude the beginning point by waiting for the sequence number to be -greater than 1. If either operand of scalar ".." is a constant expression, -that operand is implicitly compared to the C<$.> variable, the current -line number. Examples: +If you don't want it to test the right operand till the next +evaluation, as in B, just use three dots ("...") instead of +two. In all other regards, "..." behaves just like ".." does. + +The right operand is not evaluated while the operator is in the +"false" state, and the left operand is not evaluated while the +operator is in the "true" state. The precedence is a little lower +than || and &&. The value returned is either the empty string for +false, or a sequence number (beginning with 1) for true. The +sequence number is reset for each range encountered. The final +sequence number in a range has the string "E0" appended to it, which +doesn't affect its numeric value, but gives you something to search +for if you want to exclude the endpoint. You can exclude the +beginning point by waiting for the sequence number to be greater +than 1. If either operand of scalar ".." is a constant expression, +that operand is implicitly compared to the C<$.> variable, the +current line number. Examples: As a scalar operator: @@ -429,7 +434,7 @@ can say @alphabet = ('A' .. 'Z'); -to get all the letters of the alphabet, or +to get all normal letters of the alphabet, or $hexdigit = (0 .. 9, 'a' .. 'f')[$num & 15]; @@ -464,8 +469,6 @@ legal lvalues (meaning that you can assign to them): ($a_or_b ? $a : $b) = $c; -This is not necessarily guaranteed to contribute to the readability of your program. - Because this operator produces an assignable result, using assignments without parentheses will get you in trouble. For example, this: @@ -479,6 +482,10 @@ Rather than this: ($a % 2) ? ($a += 10) : ($a += 2) +That should probably be written more simply as: + + $a += ($a % 2) ? 10 : 2; + =head2 Assignment Operators "=" is the ordinary assignment operator. @@ -500,7 +507,7 @@ The following are recognized: .= %= ^= x= -Note that while these are grouped by family, they all have the precedence +Although these are grouped by family, they all have the precedence of assignment. Unlike in C, the assignment operator produces a valid lvalue. Modifying @@ -573,14 +580,14 @@ probably avoid using this for assignment, only for control flow. ($a = $b) or $c; # really means this $a = $b || $c; # better written this way -However, when it's a list context assignment and you're trying to use +However, when it's a list-context assignment and you're trying to use "||" for control flow, you probably need "or" so that the assignment takes higher precedence. @info = stat($file) || die; # oops, scalar sense of stat! @info = stat($file) or die; # better, now @info gets its due -Then again, you could always use parentheses. +Then again, you could always use parentheses. Binary "xor" returns the exclusive-OR of the two surrounding expressions. It cannot short circuit, of course. @@ -602,7 +609,7 @@ operators are typed: $, @, %, and &.) =item (TYPE) -Type casting operator. +Type-casting operator. =back @@ -627,17 +634,17 @@ the same character fore and aft, but the 4 sorts of brackets s{}{} Substitution yes (unless '' is delimiter) tr{}{} Transliteration no (but see below) -Note that there can be whitespace between the operator and the quoting +There can be whitespace between the operator and the quoting characters, except when C<#> is being used as the quoting character. -C is parsed as being the string C, while C is the -operator C followed by a comment. Its argument will be taken from the -next line. This allows you to write: +C is parsed as the string C, while C is the +operator C followed by a comment. Its argument will be taken +from the next line. This allows you to write: s {foo} # Replace foo {bar} # with bar. -For constructs that do interpolation, variables beginning with "C<$>" -or "C<@>" are interpolated, as are the following sequences. Within +For constructs that do interpolate, variables beginning with "C<$>" +or "C<@>" are interpolated, as are the following escape sequences. Within a transliteration, the first eleven of these sequences may be used. \t tab (HT, TAB) @@ -650,7 +657,7 @@ a transliteration, the first eleven of these sequences may be used. \033 octal char (ESC) \x1b hex char (ESC) \x{263a} wide hex char (SMILEY) - \c[ control char + \c[ control char (ESC) \l lowercase next char \u uppercase next char @@ -664,7 +671,7 @@ and C<\U> is taken from the current locale. See L. All systems use the virtual C<"\n"> to represent a line terminator, called a "newline". There is no such thing as an unvarying, physical -newline character. It is an illusion that the operating system, +newline character. It is only an illusion that the operating system, device drivers, C libraries, and Perl all conspire to preserve. Not all systems read C<"\r"> as ASCII CR and C<"\n"> as ASCII LF. For example, on a Mac, these are reversed, and on systems without line terminator, @@ -687,28 +694,17 @@ interpolated, so that regular expressions may be incorporated into the pattern from the variables. If this is not what you want, use C<\Q> to interpolate a variable literally. -Apart from the above, there are no multiple levels of interpolation. In -particular, contrary to the expectations of shell programmers, back-quotes -do I interpolate within double quotes, nor do single quotes impede -evaluation of variables when used within double quotes. +Apart from the behavior described above, Perl does not expand +multiple levels of interpolation. In particular, contrary to the +expectations of shell programmers, back-quotes do I interpolate +within double quotes, nor do single quotes impede evaluation of +variables when used within double quotes. =head2 Regexp Quote-Like Operators Here are the quote-like operators that apply to pattern matching and related activities. -Most of this section is related to use of regular expressions from Perl. -Such a use may be considered from two points of view: Perl handles a -a string and a "pattern" to RE (regular expression) engine to match, -RE engine finds (or does not find) the match, and Perl uses the findings -of RE engine for its operation, possibly asking the engine for other matches. - -RE engine has no idea what Perl is going to do with what it finds, -similarly, the rest of Perl has no idea what a particular regular expression -means to RE engine. This creates a clean separation, and in this section -we discuss matching from Perl point of view only. The other point of -view may be found in L. - =over 8 =item ?PATTERN? @@ -727,21 +723,22 @@ patterns local to the current package are reset. reset if eof; # clear ?? status for next file } -This usage is vaguely deprecated, and may be removed in some future -version of Perl. +This usage is vaguely depreciated, which means it just might possibly +be removed in some distant future version of Perl, perhaps somewhere +around the year 2168. =item m/PATTERN/cgimosx =item /PATTERN/cgimosx Searches a string for a pattern match, and in scalar context returns -true (1) or false (''). If no string is specified via the C<=~> or -C operator, the $_ string is searched. (The string specified with -C<=~> need not be an lvalue--it may be the result of an expression -evaluation, but remember the C<=~> binds rather tightly.) See also -L. -See L for discussion of additional considerations that apply -when C is in effect. +true if it succeeds, false if it fails. If no string is specified +via the C<=~> or C operator, the $_ string is searched. (The +string specified with C<=~> need not be an lvalue--it may be the +result of an expression evaluation, but remember the C<=~> binds +rather tightly.) See also L. See L for +discussion of additional considerations that apply when C +is in effect. Options are: @@ -755,11 +752,10 @@ Options are: If "/" is the delimiter then the initial C is optional. With the C you can use any pair of non-alphanumeric, non-whitespace characters -as delimiters. This is particularly useful for matching Unix path names -that contain "/", to avoid LTS (leaning toothpick syndrome). If "?" is +as delimiters. This is particularly useful for matching path names +that contain "/", to avoid LTS (leaning toothpick syndrome). If "?" is the delimiter, then the match-only-once rule of C applies. -If "'" is the delimiter, no variable interpolation is performed on the -PATTERN. +If "'" is the delimiter, no interpolation is performed on the PATTERN. PATTERN may contain variables, which will be interpolated (and the pattern recompiled) every time the pattern search is evaluated, except @@ -770,12 +766,12 @@ the trailing delimiter. This avoids expensive run-time recompilations, and is useful when the value you are interpolating won't change over the life of the script. However, mentioning C constitutes a promise that you won't change the variables in the pattern. If you change them, -Perl won't even notice. +Perl won't even notice. See also L. If the PATTERN evaluates to the empty string, the last I matched regular expression is used instead. -If the C option is not used, C in a list context returns a +If the C option is not used, C in list context returns a list consisting of the subexpressions matched by the parentheses in the pattern, i.e., (C<$1>, C<$2>, C<$3>...). (Note that here C<$1> etc. are also set, and that this differs from Perl 4's behavior.) When there are @@ -805,15 +801,16 @@ remainder of the line, and assigns those three fields to $F1, $F2, and $Etc. The conditional is true if any variables were assigned, i.e., if the pattern matched. -The C modifier specifies global pattern matching--that is, matching -as many times as possible within the string. How it behaves depends on -the context. In list context, it returns a list of all the -substrings matched by all the parentheses in the regular expression. -If there are no parentheses, it returns a list of all the matched -strings, as if there were parentheses around the whole pattern. +The C modifier specifies global pattern matching--that is, +matching as many times as possible within the string. How it behaves +depends on the context. In list context, it returns a list of the +substrings matched by any capturing parentheses in the regular +expression. If there are no parentheses, it returns a list of all +the matched strings, as if there were parentheses around the whole +pattern. In scalar context, each execution of C finds the next match, -returning TRUE if it matches, and FALSE if there is no further match. +returning true if it matches, and false if there is no further match. The position after the last match can be read or set using the pos() function; see L. A failed match normally resets the search position to the beginning of the string, but you can avoid that @@ -823,8 +820,8 @@ string also resets the search position. You can intermix C matches with C, where C<\G> is a zero-width assertion that matches the exact position where the previous C, if any, left off. The C<\G> assertion is not supported without -the C modifier; currently, without C, C<\G> behaves just like -C<\A>, but that's accidental and may change in the future. +the C modifier. (Currently, without C, C<\G> behaves just like +C<\A>, but that's accidental and may change in the future.) Examples: @@ -832,12 +829,10 @@ Examples: ($one,$five,$fifteen) = (`uptime` =~ /(\d+\.\d+)/g); # scalar context - { - local $/ = ""; - while (defined($paragraph = <>)) { - while ($paragraph =~ /[a-z]['")]*[.!?]+['")]*\s/g) { - $sentences++; - } + $/ = ""; $* = 1; # $* deprecated in modern perls + while (defined($paragraph = <>)) { + while ($paragraph =~ /[a-z]['")]*[.!?]+['")]*\s/g) { + $sentences++; } } print "$sentences\n"; @@ -893,7 +888,7 @@ Here is the output (split into several lines): =item C<'STRING'> -A single-quoted, literal string. A backslash represents a backslash +A single-quoted, literal string. A backslash represents a backslash unless followed by the delimiter or another backslash, in which case the delimiter or backslash is interpolated. @@ -909,15 +904,16 @@ A double-quoted, interpolated string. $_ .= qq (*** The previous line contains the naughty word "$1".\n) - if /(tcl|rexx|python)/; # :-) + if /\b(tcl|java|python)\b/i; # :-) $baz = "\n"; # a one-character string =item qr/STRING/imosx -Quote-as-a-regular-expression operator. I is interpolated the -same way as I in C. If "'" is used as the -delimiter, no variable interpolation is done. Returns a Perl value -which may be used instead of the corresponding C expression. +This operators quotes--and compiles--its I as a regular +expression. I is interpolated the same way as I +in C. If "'" is used as the delimiter, no interpolation +is done. Returns a Perl value which may be used instead of the +corresponding C expression. For example, @@ -936,7 +932,7 @@ The result may be used as a subpattern in a match: $string =~ /$re/; # or this way Since Perl may compile the pattern at the moment of execution of qr() -operator, using qr() may have speed advantages in I situations, +operator, using qr() may have speed advantages in some situations, notably if the result of qr() is used standalone: sub match { @@ -951,11 +947,11 @@ notably if the result of qr() is used standalone: } @_; } -Precompilation of the pattern into an internal representation at the -moment of qr() avoids a need to recompile the pattern every time a -match C is attempted. (Note that Perl has many other -internal optimizations, but none would be triggered in the above -example if we did not use qr() operator.) +Precompilation of the pattern into an internal representation at +the moment of qr() avoids a need to recompile the pattern every +time a match C is attempted. (Perl has many other internal +optimizations, but none would be triggered in the above example if +we did not use qr() operator.) Options are: @@ -1012,7 +1008,7 @@ double-quote interpolation, passing it on to the shell instead: $perl_info = qx(ps $$); # that's Perl's $$ $shell_info = qx'ps $$'; # that's the new shell's $$ -Note that how the string gets evaluated is entirely subject to the command +How that string gets evaluated is entirely subject to the command interpreter on your system. On most platforms, you will have to protect shell metacharacters if you want them treated literally. This is in practice difficult to do, as it's unclear how to escape which characters. @@ -1064,10 +1060,10 @@ Some frequently seen examples: use POSIX qw( setlocale localeconv ) @EXPORT = qw( foo bar baz ); -A common mistake is to try to separate the words with comma or to put -comments into a multi-line C-string. For this reason the C<-w> -switch produce warnings if the STRING contains the "," or the "#" -character. +A common mistake is to try to separate the words with comma or to +put comments into a multi-line C-string. For this reason, the +B<-w> switch (that is, the C<$^W> variable) produces warnings if +the STRING contains the "," or the "#" character. =item s/PATTERN/REPLACEMENT/egimosx @@ -1080,7 +1076,7 @@ variable is searched and modified. (The string specified with C<=~> must be scalar variable, an array element, a hash element, or an assignment to one of those, i.e., an lvalue.) -If the delimiter chosen is a single quote, no variable interpolation is +If the delimiter chosen is a single quote, no interpolation is done on either the PATTERN or the REPLACEMENT. Otherwise, if the PATTERN contains a $ that looks like a variable rather than an end-of-string test, the variable will be interpolated into the pattern @@ -1163,16 +1159,14 @@ B, we use the \EIE form in only the left hand side. Anywhere else it's $EIE. Occasionally, you can't use just a C to get all the changes -to occur. Here are two common cases: +to occur that you might want. Here are two common cases: # put commas in the right places in an integer - 1 while s/(.*\d)(\d\d\d)/$1,$2/g; # perl4 - 1 while s/(\d)(\d\d\d)(?!\d)/$1,$2/g; # perl5 + 1 while s/(\d)(\d\d\d)(?!\d)/$1,$2/g; # expand tabs to 8-column spacing 1 while s/\t+/' ' x (length($&)*8 - length($`)%8)/e; - =item tr/SEARCHLIST/REPLACEMENTLIST/cdsUC =item y/SEARCHLIST/REPLACEMENTLIST/cdsUC @@ -1206,14 +1200,14 @@ Options: U Translate to/from UTF-8. C Translate to/from 8-bit char (octet). -If the C modifier is specified, the SEARCHLIST character set is -complemented. If the C modifier is specified, any characters specified -by SEARCHLIST not found in REPLACEMENTLIST are deleted. (Note -that this is slightly more flexible than the behavior of some B -programs, which delete anything they find in the SEARCHLIST, period.) -If the C modifier is specified, sequences of characters that were -transliterated to the same character are squashed down to a single instance of the -character. +If the C modifier is specified, the SEARCHLIST character set +is complemented. If the C modifier is specified, any characters +specified by SEARCHLIST not found in REPLACEMENTLIST are deleted. +(Note that this is slightly more flexible than the behavior of some +B programs, which delete anything they find in the SEARCHLIST, +period.) If the C modifier is specified, sequences of characters +that were transliterated to the same character are squashed down +to a single instance of the character. If the C modifier is used, the REPLACEMENTLIST is always interpreted exactly as specified. Otherwise, if the REPLACEMENTLIST is shorter @@ -1245,19 +1239,20 @@ Examples: tr [\200-\377] [\000-\177]; # delete 8th bit - tr/\0-\xFF//CU; # translate Latin-1 to Unicode - tr/\0-\x{FF}//UC; # translate Unicode to Latin-1 + tr/\0-\xFF//CU; # change Latin-1 to Unicode + tr/\0-\x{FF}//UC; # change Unicode to Latin-1 -If multiple transliterations are given for a character, only the first one is used: +If multiple transliterations are given for a character, only the +first one is used: tr/AAA/XYZ/ will transliterate any A to X. -Note that because the transliteration table is built at compile time, neither +Because the transliteration table is built at compile time, neither the SEARCHLIST nor the REPLACEMENTLIST are subjected to double quote -interpolation. That means that if you want to use variables, you must use -an eval(): +interpolation. That means that if you want to use variables, you +must use an eval(): eval "tr/$oldlist/$newlist/"; die $@ if $@; @@ -1268,52 +1263,52 @@ an eval(): =head2 Gory details of parsing quoted constructs -When presented with something which may have several different -interpretations, Perl uses the principle B (expanded to Do What I Mean -- not what I wrote) to pick up the most probable interpretation of the -source. This strategy is so successful that Perl users usually do not -suspect ambivalence of what they write. However, time to time Perl's ideas -differ from what the author meant. - -The target of this section is to clarify the Perl's way of interpreting -quoted constructs. The most frequent reason one may have to want to know the -details discussed in this section is hairy regular expressions. However, the -first steps of parsing are the same for all Perl quoting operators, so here -they are discussed together. - -The most important detail of Perl parsing rules is the first one -discussed below; when processing a quoted construct, Perl I -finds the end of the construct, then it interprets the contents of the -construct. If you understand this rule, you may skip the rest of this -section on the first reading. The other rules would -contradict user's expectations much less frequently than the first one. - -Some of the passes discussed below are performed concurrently, but as -far as results are the same, we consider them one-by-one. For different -quoting constructs Perl performs different number of passes, from -one to five, but they are always performed in the same order. +When presented with something that might have several different +interpretations, Perl uses the B (that's "Do What I Mean") +principle to pick the most probable interpretation. This strategy +is so successful that Perl programmers often do not suspect the +ambivalence of what they write. But from time to time, Perl's +notions differ substantially from what the author honestly meant. + +This section hopes to clarify how Perl handles quoted constructs. +Although the most common reason to learn this is to unravel labyrinthine +regular expressions, because the initial steps of parsing are the +same for all quoting operators, they are all discussed together. + +The most important Perl parsing rule is the first one discussed +below: when processing a quoted construct, Perl first finds the end +of that construct, then interprets its contents. If you understand +this rule, you may skip the rest of this section on the first +reading. The other rules are likely to contradict the user's +expectations much less frequently than this first one. + +Some passes discussed below are performed concurrently, but because +their results are the same, we consider them individually. For different +quoting constructs, Perl performs different numbers of passes, from +one to five, but these passes are always performed in the same order. =over =item Finding the end -First pass is finding the end of the quoted construct, be it -a multichar delimiter -C<"\nEOF\n"> of C<< construct, C which terminates C construct, -C<]> which terminates C construct, or C> which terminates a -fileglob started with C<<>. +The first pass is finding the end of the quoted construct, whether +it be a multicharacter delimiter C<"\nEOF\n"> in the C<< +construct, a C that terminates a C construct, a C<]> which +terminates C construct, or a C> which terminates a +fileglob started with C>. -When searching for one-char non-matching delimiter, such as C, combinations -C<\\> and C<\/> are skipped. When searching for one-char matching delimiter, -such as C<]>, combinations C<\\>, C<\]> and C<\[> are skipped, and -nested C<[>, C<]> are skipped as well. When searching for multichar delimiter -no skipping is performed. +When searching for single-character non-pairing delimiters, such +as C, combinations of C<\\> and C<\/> are skipped. However, +when searching for single-character pairing delimiter like C<[>, +combinations of C<\\>, C<\]>, and C<\[> are all skipped, and nested +C<[>, C<]> are skipped as well. When searching for multicharacter +delimiters, nothing is skipped. -For constructs with 3-part delimiters (C etc.) the search is -repeated once more. +For constructs with three-part delimiters (C, C, and +C), the search is repeated once more. -During this search no attention is paid to the semantic of the construct, -thus: +During this search no attention is paid to the semantics of the construct. +Thus: "$hash{"$foo/$bar"}" @@ -1323,30 +1318,28 @@ or: bar # NOT a comment, this slash / terminated m//! /x -do not form legal quoted expressions, the quoted part ends on the first C<"> -and C, and the rest happens to be a syntax error. Note that since the slash -which terminated C was followed by a C, the above is not C, -but rather C with no 'x' switch. So the embedded C<#> is interpreted -as a literal C<#>. +do not form legal quoted expressions. The quoted part ends on the +first C<"> and C, and the rest happens to be a syntax error. +Because the slash that terminated C was followed by a C, +the example above is not C, but rather C with no C +modifier. So the embedded C<#> is interpreted as a literal C<#>. =item Removal of backslashes before delimiters -During the second pass the text between the starting delimiter and -the ending delimiter is copied to a safe location, and the C<\> is -removed from combinations consisting of C<\> and delimiter(s) (both starting -and ending delimiter if they differ). - -The removal does not happen for multi-char delimiters. - -Note that the combination C<\\> is left as it was! +During the second pass, text between the starting and ending +delimiters is copied to a safe location, and the C<\> is removed +from combinations consisting of C<\> and delimiter--or delimiters, +meaning both starting and ending delimiters will should these differ. +This removal does not happen for multi-character delimiters. +Note that the combination C<\\> is left intact, just as it was. -Starting from this step no information about the delimiter(s) is used in the -parsing. +Starting from this step no information about the delimiters is +used in parsing. =item Interpolation -Next step is interpolation in the obtained delimiter-independent text. -There are four different cases. +The next step is interpolation in the text obtained, which is now +delimiter-independent. There are four different cases. =over @@ -1360,44 +1353,40 @@ The only interpolation is removal of C<\> from pairs C<\\>. =item C<"">, C<``>, C, C, C<> -C<\Q>, C<\U>, C<\u>, C<\L>, C<\l> (possibly paired with C<\E>) are converted -to corresponding Perl constructs, thus C<"$foo\Qbaz$bar"> is converted to : - - $foo . (quotemeta("baz" . $bar)); - -Other combinations of C<\> with following chars are substituted with -appropriate expansions. +C<\Q>, C<\U>, C<\u>, C<\L>, C<\l> (possibly paired with C<\E>) are +converted to corresponding Perl constructs. Thus, C<"$foo\Qbaz$bar"> +is converted to C<$foo . (quotemeta("baz" . $bar))> internally. +The other combinations are replaced with appropriate expansions. -Let it be stressed that I and C<\E>> is interpolated -in the usual way. Say, C<"\Q\\E"> has no C<\E> inside: it has C<\Q>, C<\\>, -and C, thus the result is the same as for C<"\\\\E">. Generally speaking, -having backslashes between C<\Q> and C<\E> may lead to counterintuitive -results. So, C<"\Q\t\E"> is converted to: - - quotemeta("\t") - -which is the same as C<"\\\t"> (since TAB is not alphanumerical). Note also -that: +Let it be stressed that I and C<\E>> +is interpolated in the usual way. Something like C<"\Q\\E"> has +no C<\E> inside. instead, it has C<\Q>, C<\\>, and C, so the +result is the same as for C<"\\\\E">. As a general rule, backslashes +between C<\Q> and C<\E> may lead to counterintuitive results. So, +C<"\Q\t\E"> is converted to C, which is the same +as C<"\\\t"> (since TAB is not alphanumeric). Note also that: $str = '\t'; return "\Q$str"; may be closer to the conjectural I of the writer of C<"\Q\t\E">. -Interpolated scalars and arrays are internally converted to the C and -C<.> Perl operations, thus C<"$foo >>> '@arr'"> becomes: +Interpolated scalars and arrays are converted internally to the C and +C<.> catentation operations. Thus, C<"$foo XXX '@arr'"> becomes: - $foo . " >>> '" . (join $", @arr) . "'"; + $foo . " XXX '" . (join $", @arr) . "'"; -All the operations in the above are performed simultaneously left-to-right. +All operations above are performed simultaneously, left to right. -Since the result of "\Q STRING \E" has all the metacharacters quoted -there is no way to insert a literal C<$> or C<@> inside a C<\Q\E> pair: if -protected by C<\> C<$> will be quoted to became "\\\$", if not, it is -interpreted as starting an interpolated scalar. +Because the result of C<"\Q STRING \E"> has all metacharacters +quoted, there is no way to insert a literal C<$> or C<@> inside a +C<\Q\E> pair. If protected by C<\>, C<$> will be quoted to became +C<"\\\$">; if not, it is interpreted as the start of an interpolated +scalar. -Note also that the interpolating code needs to make a decision on where the -interpolated scalar ends. For instance, whether C<"a $b -E {c}"> means: +Note also that the interpolation code needs to make a decision on +where the interpolated scalar ends. For instance, whether +C<"a $b -E {c}"> really means: "a " . $b . " -> {c}"; @@ -1405,99 +1394,108 @@ or: "a " . $b -> {c}; -I the decision is to take the longest possible text which -does not include spaces between components and contains matching -braces/brackets. Since the outcome may be determined by I based -on heuristic estimators, the result I, but -is usually correct for the ambiguous cases. +Most of the time, the longest possible text that does not include +spaces between components and which contains matching braces or +brackets. because the outcome may be determined by voting based +on heuristic estimators, the result is not strictly predictable. +Fortunately, it's usually correct for ambiguous cases. =item C, C, C, C, -Processing of C<\Q>, C<\U>, C<\u>, C<\L>, C<\l> and interpolation happens -(almost) as with C constructs, but I followed by -RE-special chars (including C<\>) is not performed>! Moreover, -inside C<(?{BLOCK})>, C<(?# comment )>, and C<#>-comment of -C-regular expressions no processing is performed at all. -This is the first step where presence of the C switch is relevant. - -Interpolation has several quirks: C<$|>, C<$(> and C<$)> are not interpolated, and -constructs C<$var[SOMETHING]> are I (by several different estimators) -to be an array element or C<$var> followed by a RE alternative. This is -the place where the notation C<${arr[$bar]}> comes handy: C -is interpreted as an array element C<-9>, not as a regular expression from -variable C<$arr> followed by a digit, which is the interpretation of -C. Since voting among different estimators may be performed, -the result I. - -It is on this step that C<\1> is converted to C<$1> in the replacement -text of C. - -Note that absence of processing of C<\\> creates specific restrictions on the -post-processed text: if the delimiter is C, one cannot get the combination -C<\/> into the result of this step: C will finish the regular expression, -C<\/> will be stripped to C on the previous step, and C<\\/> will be left -as is. Since C is equivalent to C<\/> inside a regular expression, this -does not matter unless the delimiter is a special character for the RE engine, -as in C, C, or C, or an alphanumeric char, as in: +Processing of C<\Q>, C<\U>, C<\u>, C<\L>, C<\l>, and interpolation +happens (almost) as with C constructs, but the substitution +of C<\> followed by RE-special chars (including C<\>) is not +performed. Moreover, inside C<(?{BLOCK})>, C<(?# comment )>, and +a C<#>-comment in a C-regular expression, no processing is +performed whatsoever. This is the first step at which the presence +of the C modifier is relevant. + +Interpolation has several quirks: C<$|>, C<$(>, and C<$)> are not +interpolated, and constructs C<$var[SOMETHING]> are voted (by several +different estimators) to be either an array element or C<$var> +followed by an RE alternative. This is where the notation +C<${arr[$bar]}> comes handy: C is interpreted as +array element C<-9>, not as a regular expression from the variable +C<$arr> followed by a digit, which would be the interpretation of +C. Since voting among different estimators may occur, +the result is not predictable. + +It is at this step that C<\1> is begrudgingly converted to C<$1> in +the replacement text of C to correct the incorrigible +I hackers who haven't picked up the saner idiom yet. A warning +is emitted if the B<-w> command-line flag (that is, the C<$^W> variable) +was set. + +The lack of processing of C<\\> creates specific restrictions on +the post-processed text. If the delimiter is C, one cannot get +the combination C<\/> into the result of this step. C will +finish the regular expression, C<\/> will be stripped to C on +the previous step, and C<\\/> will be left as is. Because C is +equivalent to C<\/> inside a regular expression, this does not +matter unless the delimiter happens to be character special to the +RE engine, such as in C, C, or C; or an +alphanumeric char, as in: m m ^ a \s* b mmx; -In the above RE, which is intentionally obfuscated for illustration, the +In the RE above, which is intentionally obfuscated for illustration, the delimiter is C, the modifier is C, and after backslash-removal the -RE is the same as for C). +RE is the same as for C). There's more than one +reason you're encouraged to restrict your delimiters to non-alphanumeric, +non-whitespace choices. =back -This step is the last one for all the constructs except regular expressions, +This step is the last one for all constructs except regular expressions, which are processed further. =item Interpolation of regular expressions -All the previous steps were performed during the compilation of Perl code, -this one happens in run time (though it may be optimized to be calculated -at compile time if appropriate). After all the preprocessing performed -above (and possibly after evaluation if catenation, joining, up/down-casing -and Cing are involved) the resulting I is passed to RE -engine for compilation. - -Whatever happens in the RE engine is better be discussed in L, -but for the sake of continuity let us do it here. - -This is another step where presence of the C switch is relevant. -The RE engine scans the string left-to-right, and converts it to a finite -automaton. - -Backslashed chars are either substituted by corresponding literal -strings (as with C<\{>), or generate special nodes of the finite automaton -(as with C<\b>). Characters which are special to the RE engine (such as -C<|>) generate corresponding nodes or groups of nodes. C<(?#...)> -comments are ignored. All the rest is either converted to literal strings -to match, or is ignored (as is whitespace and C<#>-style comments if -C is present). - -Note that the parsing of the construct C<[...]> is performed using -rather different rules than for the rest of the regular expression. -The terminator of this construct is found using the same rules as for -finding a terminator of a C<{}>-delimited construct, the only exception -being that C<]> immediately following C<[> is considered as if preceded -by a backslash. Similarly, the terminator of C<(?{...})> is found using -the same rules as for finding a terminator of a C<{}>-delimited construct. - -It is possible to inspect both the string given to RE engine, and the -resulting finite automaton. See arguments C/C -of C> directive, and/or B<-Dr> option of Perl in -L. +Previous steps were performed during the compilation of Perl code, +but this one happens at run time--although it may be optimized to +be calculated at compile time if appropriate. After preprocessing +described above, and possibly after evaluation if catenation, +joining, casing translation, or metaquoting are involved, the +resulting I is passed to the RE engine for compilation. + +Whatever happens in the RE engine might be better discussed in L, +but for the sake of continuity, we shall do so here. + +This is another step where the presence of the C modifier is +relevant. The RE engine scans the string from left to right and +converts it to a finite automaton. + +Backslashed characters are either replaced with corresponding +literal strings (as with C<\{>), or else they generate special nodes +in the finite automaton (as with C<\b>). Characters special to the +RE engine (such as C<|>) generate corresponding nodes or groups of +nodes. C<(?#...)> comments are ignored. All the rest is either +converted to literal strings to match, or else is ignored (as is +whitespace and C<#>-style comments if C is present). + +Parsing of the bracketed character class construct, C<[...]>, is +rather different than the rule used for the rest of the pattern. +The terminator of this construct is found using the same rules as +for finding the terminator of a C<{}>-delimited construct, the only +exception being that C<]> immediately following C<[> is treated as +though preceded by a backslash. Similarly, the terminator of +C<(?{...})> is found using the same rules as for finding the +terminator of a C<{}>-delimited construct. + +It is possible to inspect both the string given to RE engine and the +resulting finite automaton. See the arguments C/C +in the C> pragma, as well as Perl's B<-Dr> command-line +switch documented in L. =item Optimization of regular expressions This step is listed for completeness only. Since it does not change semantics, details of this step are not documented and are subject -to change. This step is performed over the finite automaton generated -during the previous pass. +to change without notice. This step is performed over the finite +automaton that was generated during the previous pass. -However, in older versions of Perl C> used to silently -optimize C to mean C. This behaviour, though present -in current versions of Perl, may be deprecated in future. +It is at this stage that C silently optimizes C to +mean C. =back @@ -1506,39 +1504,40 @@ in current versions of Perl, may be deprecated in future. There are several I/O operators you should know about. A string enclosed by backticks (grave accents) first undergoes -variable substitution just like a double quoted string. It is then -interpreted as a command, and the output of that command is the value -of the pseudo-literal, like in a shell. In scalar context, a single -string consisting of all the output is returned. In list context, -a list of values is returned, one for each line of output. (You can -set C<$/> to use a different line terminator.) The command is executed +double-quote interpolation. It is then interpreted as an external +command, and the output of that command is the value of the +pseudo-literal, j +string consisting of all output is returned. In list context, a +list of values is returned, one per line of output. (You can set +C<$/> to use a different line terminator.) The command is executed each time the pseudo-literal is evaluated. The status value of the command is returned in C<$?> (see L for the interpretation of C<$?>). Unlike in B, no translation is done on the return data--newlines remain newlines. Unlike in any of the shells, single quotes do not hide variable names in the command from interpretation. -To pass a $ through to the shell you need to hide it with a backslash. -The generalized form of backticks is C. (Because backticks -always undergo shell expansion as well, see L for -security concerns.) - -In a scalar context, evaluating a filehandle in angle brackets yields the -next line from that file (newline, if any, included), or C at -end-of-file. When C<$/> is set to C (i.e. file slurp mode), -and the file is empty, it returns C<''> the first time, followed by -C subsequently. - -Ordinarily you must assign the returned value to a variable, but there is one -situation where an automatic assignment happens. I the -input symbol is the only thing inside the conditional of a C or -C loop, the value is automatically assigned to the variable -C<$_>. In these loop constructs, the assigned value (whether assignment -is automatic or explicit) is then tested to see if it is defined. -The defined test avoids problems where line has a string value -that would be treated as false by perl e.g. "" or "0" with no trailing -newline. (This may seem like an odd thing to you, but you'll use the -construct in almost every Perl script you write.) Anyway, the following -lines are equivalent to each other: +To pass a literal dollar-sign through to the shell you need to hide +it with a backslash. The generalized form of backticks is C. +(Because backticks always undergo shell expansion as well, see +L for security concerns.) + +In scalar context, evaluating a filehandle in angle brackets yields +the next line from that file (the newline, if any, included), or +C at end-of-file or on error. When C<$/> is set to C +(sometimes known as file-slurp mode) and the file is empty, it +returns C<''> the first time, followed by C subsequently. + +Ordinarily you must assign the returned value to a variable, but +there is one situation where an automatic assignment happens. If +and only if the input symbol is the only thing inside the conditional +of a C statement (even if disguised as a C loop), +the value is automatically assigned to the global variable $_, +destroying whatever was there previously. (This may seem like an +odd thing to you, but you'll use the construct in almost every Perl +script you write.) The $_ variables is not implicitly localized. +You'll have to put a C before the loop if you want that +to happen. + +The following lines are equivalent: while (defined($_ = )) { print; } while ($_ = ) { print; } @@ -1548,34 +1547,40 @@ lines are equivalent to each other: print while ($_ = ); print while ; -and this also behaves similarly, but avoids the use of $_ : +This also behaves similarly, but avoids $_ : while (my $line = ) { print $line } -If you really mean such values to terminate the loop they should be -tested for explicitly: +In these loop constructs, the assigned value (whether assignment +is automatic or explicit) is then tested to see whether it is +defined. The defined test avoids problems where line has a string +value that would be treated as false by Perl, for example a "" or +a "0" with no trailing newline. If you really mean for such values +to terminate the loop, they should be tested for explicitly: while (($_ = ) ne '0') { ... } while () { last unless $_; ... } -In other boolean contexts, CIE> without explicit C -test or comparison will solicit a warning if C<-w> is in effect. +In other boolean contexts, CIE> without an +explicit C test or comparison elicit a warning if the B<-w> +command-line switch (the C<$^W> variable) is in effect. The filehandles STDIN, STDOUT, and STDERR are predefined. (The -filehandles C, C, and C will also work except in -packages, where they would be interpreted as local identifiers rather -than global.) Additional filehandles may be created with the open() -function. See L for details on this. +filehandles C, C, and C will also work except +in packages, where they would be interpreted as local identifiers +rather than global.) Additional filehandles may be created with +the open() function, amongst others. See L and +L for details on this. -If a EFILEHANDLEE is used in a context that is looking for a list, a -list consisting of all the input lines is returned, one line per list -element. It's easy to make a I data space this way, so use with -care. +If a EFILEHANDLEE is used in a context that is looking for +a list, a list comprising all input lines is returned, one line per +list element. It's easy to grow to a rather large data space this +way, so use with care. -EFILEHANDLEE may also be spelt readline(FILEHANDLE). See -L. +EFILEHANDLEE may also be spelled C. +See L. -The null filehandle EE is special and can be used to emulate the +The null filehandle EE is special: it can be used to emulate the behavior of B and B. Input from EE comes either from standard input, or from each file listed on the command line. Here's how it works: the first time EE is evaluated, the @ARGV array is @@ -1597,16 +1602,17 @@ is equivalent to the following Perl-like pseudo code: } } -except that it isn't so cumbersome to say, and will actually work. It -really does shift array @ARGV and put the current filename into variable -$ARGV. It also uses filehandle I internally--EE is just a -synonym for EARGVE, which is magical. (The pseudo code above -doesn't work because it treats EARGVE as non-magical.) +except that it isn't so cumbersome to say, and will actually work. +It really does shift the @ARGV array and put the current filename +into the $ARGV variable. It also uses filehandle I +internally--EE is just a synonym for EARGVE, which +is magical. (The pseudo code above doesn't work because it treats +EARGVE as non-magical.) You can modify @ARGV before the first EE as long as the array ends up containing the list of filenames you really want. Line numbers (C<$.>) -continue as if the input were one big happy file. (But see example -under C for how to reset line numbers on each file.) +continue as though the input were one big happy file. See the example +in L for how to reset line numbers on each file. If you want to set @ARGV to your own list of files, go right ahead. This sets @ARGV to all plain text files if no @ARGV was given: @@ -1634,12 +1640,13 @@ Getopts modules or put a loop on the front like this: } The EE symbol will return C for end-of-file only once. -If you call it again after this it will assume you are processing another -@ARGV list, and if you haven't set @ARGV, will input from STDIN. +If you call it again after this, it will assume you are processing another +@ARGV list, and if you haven't set @ARGV, will read input from STDIN. -If the string inside the angle brackets is a reference to a scalar -variable (e.g., E$fooE), then that variable contains the name of the -filehandle to input from, or its typeglob, or a reference to the same. For example: +If angle brackets contain is a simple scalar variable (e.g., +E$fooE), then that variable contains the name of the +filehandle to input from, or its typeglob, or a reference to the +same. For example: $fh = \*STDIN; $line = <$fh>; @@ -1648,9 +1655,9 @@ If what's within the angle brackets is neither a filehandle nor a simple scalar variable containing a filehandle name, typeglob, or typeglob reference, it is interpreted as a filename pattern to be globbed, and either a list of filenames or the next filename in the list is returned, -depending on context. This distinction is determined on syntactic -grounds alone. That means C$xE> is always a readline from -an indirect handle, but C$hash{key}E> is always a glob. +depending on context. This distinction is determined on syntactic +grounds alone. That means C$xE> is always a readline() from +an indirect handle, but C$hash{key}E> is always a glob(). That's because $x is a simple scalar variable, but C<$hash{key}> is not--it's a hash element. @@ -1660,7 +1667,7 @@ in the previous paragraph. (In older versions of Perl, programmers would insert curly brackets to force interpretation as a filename glob: C${foo}E>. These days, it's considered cleaner to call the internal function directly as C, which is probably the right -way to have done it in the first place.) Example: +way to have done it in the first place.) For example: while (<*.c>) { chmod 0644, $_; @@ -1674,27 +1681,31 @@ is equivalent to chmod 0644, $_; } -In fact, it's currently implemented that way. (Which means it will not -work on filenames with spaces in them unless you have csh(1) on your -machine.) Of course, the shortest way to do the above is: +In fact, it's currently implemented that way, but this is expected +to be made completely internal in the near future. (Which means +it will not work on filenames with spaces in them unless you have +csh(1) on your machine.) Of course, the shortest way to do the +above is: chmod 0644, <*.c>; -Because globbing invokes a shell, it's often faster to call readdir() yourself -and do your own grep() on the filenames. Furthermore, due to its current -implementation of using a shell, the glob() routine may get "Arg list too -long" errors (unless you've installed tcsh(1L) as F). - -A glob evaluates its (embedded) argument only when it is starting a new -list. All values must be read before it will start over. In a list -context this isn't important, because you automatically get them all -anyway. In scalar context, however, the operator returns the next value -each time it is called, or a C value if you've just run out. As -for filehandles an automatic C is generated when the glob -occurs in the test part of a C or C - because legal glob returns -(e.g. a file called F<0>) would otherwise terminate the loop. -Again, C is returned only once. So if you're expecting a single value -from a glob, it is much better to say +Because globbing currently invokes a shell, it's often faster to +call readdir() yourself and do your own grep() on the filenames. +Furthermore, due to its current implementation of using a shell, +the glob() routine may get "Arg list too long" errors (unless you've +installed tcsh(1L) as F or hacked your F). + +A (file)glob evaluates its (embedded) argument only when it is +starting a new list. All values must be read before it will start +over. In list context, this isn't important because you automatically +get them all anyway. However, in scalar context the operator returns +the next value each time it's called, or C +run out. As with filehandle reads, an automatic C is +generated when the glob occurs in the test part of a C, +because legal glob returns (e.g. a file called F<0>) would otherwise +terminate the loop. Again, C is returned only once. So if +you're expecting a single value from a glob, it is much better to +say ($file) = ; @@ -1703,7 +1714,7 @@ than $file = ; because the latter will alternate between returning a filename and -returning FALSE. +returning false. It you're trying to do variable interpolation, it's definitely better to use the glob() function, because the older notation can cause people @@ -1715,10 +1726,10 @@ to become confused with the indirect filehandle notation. =head2 Constant Folding Like C, Perl does a certain amount of expression evaluation at -compile time, whenever it determines that all arguments to an +compile time whenever it determines that all arguments to an operator are static and have no side effects. In particular, string concatenation happens at compile time between literals that don't do -variable substitution. Backslash interpretation also happens at +variable substitution. Backslash interpolation also happens at compile time. You can say 'Now is the time for all' . "\n" . @@ -1731,20 +1742,20 @@ you say if (-s $file > 5 + 100 * 2**16) { } } -the compiler will precompute the number that -expression represents so that the interpreter -won't have to. +the compiler will precompute the number which that expression +represents so that the interpreter won't have to. =head2 Bitwise String Operators Bitstrings of any size may be manipulated by the bitwise operators (C<~ | & ^>). -If the operands to a binary bitwise op are strings of different sizes, -B<|> and B<^> ops will act as if the shorter operand had additional -zero bits on the right, while the B<&> op will act as if the longer -operand were truncated to the length of the shorter. Note that the -granularity for such extension or truncation is one or more I. +If the operands to a binary bitwise op are strings of different +sizes, B<|> and B<^> ops act as though the shorter operand had +additional zero bits on the right, while the B<&> op acts as though +the longer operand were truncated to the length of the shorter. +The granularity for such extension or truncation is one or more +bytes. # ASCII-based examples print "j p \n" ^ " a h"; # prints "JAPH\n" @@ -1752,9 +1763,9 @@ granularity for such extension or truncation is one or more I. print "japh\nJunk" & '_____'; # prints "JAPH\n"; print 'p N$' ^ " E bitwise operation. You may explicitly show which type of +a B bitwise operation. You may explicitly show which type of operation you intend by using C<""> or C<0+>, as in the examples below. $foo = 150 | 105 ; # yields 255 (0x96 | 0x69 is 0xFF) @@ -1770,33 +1781,39 @@ in a bit vector. =head2 Integer Arithmetic -By default Perl assumes that it must do most of its arithmetic in +By default, Perl assumes that it must do most of its arithmetic in floating point. But by saying use integer; you may tell the compiler that it's okay to use integer operations -from here to the end of the enclosing BLOCK. An inner BLOCK may -countermand this by saying +(if it feels like it) from here to the end of the enclosing BLOCK. +An inner BLOCK may countermand this by saying no integer; -which lasts until the end of that BLOCK. - -The bitwise operators ("&", "|", "^", "~", "<<", and ">>") always -produce integral results. (But see also L.) -However, C still has meaning -for them. By default, their results are interpreted as unsigned -integers. However, if C is in effect, their results are -interpreted as signed integers. For example, C<~0> usually evaluates -to a large integral value. However, C is -1 on twos-complement machines. +which lasts until the end of that BLOCK. Note that this doesn't +mean everything is only an integer, merely that Perl may use integer +operations if it is so inclined. For example, even under C, if you take the C, you'll still get C<1.4142135623731> +or so. + +Used on numbers, the bitwise operators ("&", "|", "^", "~", "<<", +and ">>") always produce integral results. (But see also L.) However, C still has meaning for +them. By default, their results are interpreted as unsigned integers, but +if C is in effect, their results are interpreted +as signed integers. For example, C<~0> usually evaluates to a large +integral value. However, C is C<-1> on twos-complement +machines. =head2 Floating-point Arithmetic While C provides integer-only arithmetic, there is no -similar ways to provide rounding or truncation at a certain number of -decimal places. For rounding to a certain number of digits, sprintf() -or printf() is usually the easiest route. +analogous mechanism to provide automatic rounding or truncation to a +certain number of decimal places. For rounding to a certain number +of digits, sprintf() or printf() is usually the easiest route. +See L. Floating-point numbers are only approximations to what a mathematician would call real numbers. There are infinitely more reals than floats, @@ -1820,10 +1837,10 @@ this topic. } The POSIX module (part of the standard perl distribution) implements -ceil(), floor(), and a number of other mathematical and trigonometric -functions. The Math::Complex module (part of the standard perl -distribution) defines a number of mathematical functions that can also -work on real numbers. Math::Complex not as efficient as POSIX, but +ceil(), floor(), and other mathematical and trigonometric functions. +The Math::Complex module (part of the standard perl distribution) +defines mathematical functions that work on both the reals and the +imaginary numbers. Math::Complex not as efficient as POSIX, but POSIX can't work with complex numbers. Rounding in financial applications can have serious implications, and @@ -1835,13 +1852,17 @@ need yourself. =head2 Bigger Numbers The standard Math::BigInt and Math::BigFloat modules provide -variable precision arithmetic and overloaded operators. -At the cost of some space and considerable speed, they -avoid the normal pitfalls associated with limited-precision -representations. +variable-precision arithmetic and overloaded operators, although +they're currently pretty slow. At the cost of some space and +considerable speed, they avoid the normal pitfalls associated with +limited-precision representations. use Math::BigInt; $x = Math::BigInt->new('123456789123456789'); print $x * $x; # prints +15241578780673678515622620750190521 + +The non-standard modules SSLeay::BN and Math::Pari provide +equivalent functionality (and much more) with a substantial +performance savings. diff --git a/pod/perlopentut.pod b/pod/perlopentut.pod index 6e6091a..ae622a6 100644 --- a/pod/perlopentut.pod +++ b/pod/perlopentut.pod @@ -73,7 +73,7 @@ from a different file, and forget to trim it before opening: This is not a bug, but a feature. Because C mimics the shell in its style of using redirection arrows to specify how to open the file, it also does so with respect to extra white space around the filename itself -as well. For accessing files with naughty names, see L. =head2 Pipe Opens diff --git a/pod/perlpod.pod b/pod/perlpod.pod index 7fa8290..731a0fb 100644 --- a/pod/perlpod.pod +++ b/pod/perlpod.pod @@ -11,7 +11,6 @@ L, L, and L. - =head2 Verbatim Paragraph A verbatim paragraph, distinguished by being indented (that is, @@ -20,7 +19,6 @@ with tabs assumed to be on 8-column boundaries. There are no special formatting escapes, so you can't italicize or anything like that. A \ means \, and nothing else. - =head2 Command Paragraph All command paragraphs start with "=", followed by an @@ -75,7 +73,6 @@ or use "=item 1.", "=item 2.", etc., to produce numbered lists, or use or numbers. If you start with bullets or numbers, stick with them, as many formatters use the first "=item" type to decide how to format the list. - =item =for =item =begin @@ -149,10 +146,8 @@ Some examples of lists include: =back - =back - =head2 Ordinary Block of Text It will be filled, and maybe even @@ -193,7 +188,6 @@ here and in commands: E Some non-numeric HTML entity, such as E - =head2 The Intent That's it. The intent is simplicity, not power. I wanted paragraphs @@ -223,7 +217,6 @@ TeX, and other markup languages, as used for online documentation. Translators exist for B (that's for nroff(1) and troff(1)), B, B, B, and B. - =head2 Embedding Pods in Perl Modules You can embed pod documentation in your Perl scripts. Start your @@ -236,7 +229,6 @@ directive. __END__ - =head1 NAME modern - I am a modern module @@ -244,7 +236,6 @@ directive. If you had not had that empty line there, then the translators wouldn't have seen it. - =head2 Common Pod Pitfalls =over 4 diff --git a/pod/perlport.pod b/pod/perlport.pod index a2c798f..4234fcd 100644 --- a/pod/perlport.pod +++ b/pod/perlport.pod @@ -196,6 +196,8 @@ transfer and store numbers always in text format, instead of raw binary, or consider using modules like Data::Dumper (included in the standard distribution as of Perl 5.005) and Storable. +Also, don't depend on specific values returned from C and +C. These in some cases may differ from platform to platform. =head2 Files and Filesystems @@ -257,7 +259,7 @@ Also of use is File::Basename, from the standard distribution, which splits a pathname into pieces (base filename, full path to directory, and file suffix). -Even when on a single platform (if you can call UNIX a single platform), +Even when on a single platform (if you can call Unix a single platform), remember not to count on the existence or the contents of system-specific files or directories, like F, F, F, or even F. For @@ -343,27 +345,25 @@ often behave differently, and often represent their results in a platform-dependent way. Thus you should seldom depend on them to produce consistent results. -The UNIX System V IPC (msg*(), sem*(), shm*()) is not available -even in all UNIX platforms. - One especially common bit of Perl code is opening a pipe to sendmail: - open(MAIL, '| /usr/lib/sendmail -t') or die $!; + open(MAIL, '|/usr/lib/sendmail -t') or die $!; This is fine for systems programming when sendmail is known to be available. But it is not fine for many non-Unix systems, and even some Unix systems that may not have sendmail installed. If a portable -solution is needed, see the various distributions on CPAN that deal with -it. Mail::Mailer and Mail::Send in the MailTools distribution -are commonly used, and provide several mailing methods, including mail, -sendmail, and direct SMTP (via Net::SMTP) if a mail transfer agent is -not available. Mail::Sendmail is a standalone module that provides -simple, platform-independent mailing. +solution is needed, see the C and C modules +in the C distribution. C provides several +mailing methods, including mail, sendmail, and direct SMTP +(via C) if a mail transfer agent is not available. The rule of thumb for portable code is: Do it all in portable Perl, or use a module (that may internally implement it with platform-specific code, but expose a common interface). +The Unix System V IPC (C) is not available +even in all Unix platforms. + =head2 External Subroutines (XS) @@ -415,6 +415,13 @@ Date::Parse. An array of values, such as those returned by C, can be converted to an OS-specific representation using Time::Local. +When calculating specific times, such as for tests in time or date modules, +it may be appropriate to calculate an offset for the epoch. + require Time::Local; + $offset = Time::Local::timegm(0, 0, 0, 1, 0, 70); +The value for C<$offset> in Unix will be C<0>, but in Mac OS will be +some large number. C<$offset> can then be added to a Unix time value +to get what should be the proper value on any system. =head2 Character sets and character encoding @@ -432,7 +439,7 @@ before the 'b'. =head2 Internationalisation If you may assume POSIX (a rather large assumption, that in practice -means UNIX), you may read more about the POSIX locale system (see +means Unix), you may read more about the POSIX locale system from L. The locale system at least attempts to make things a little bit more portable, or at least more convenient and native-friendly for non-English users. The system affects character @@ -479,6 +486,11 @@ to other platforms easier. Use the Config module and the special variable C<$^O> to differentiate platforms, as described in L<"PLATFORMS">. +Be careful in the tests you supply with your module or programs. Often +module code is fully portable, but the tests are not. This occurs +often when tests spawn off other processes or call external programs +to aid in the testing, or when (as noted above) the tests assume certain +things about the filesystem and paths. Be careful not to depend on a specific output style for errors, such as when checking C<$@> after an C. Some platforms expect a certain output format, and perl on those platforms may @@ -535,30 +547,29 @@ field of the string returned by typing C (or a similar command) at the shell prompt. Here, for example, are a few of the more popular Unix flavors: - uname $^O $Config{'archname'} + uname $^O $Config{'archname'} -------------------------------------------- - AIX aix aix + AIX aix aix BSD/OS bsdos i386-bsdos dgux dgux AViiON-dgux DYNIX/ptx dynixptx i386-dynixptx - FreeBSD freebsd freebsd-i386 - Linux linux i386-linux + FreeBSD freebsd freebsd-i386 + Linux linux i386-linux Linux linux i586-linux Linux linux ppc-linux - HP-UX hpux PA-RISC1.1 - IRIX irix irix + HP-UX hpux PA-RISC1.1 + IRIX irix irix openbsd openbsd i386-openbsd - OSF1 dec_osf alpha-dec_osf + OSF1 dec_osf alpha-dec_osf reliantunix-n svr4 RM400-svr4 SCO_SV sco_sv i386-sco_sv SINIX-N svr4 RM400-svr4 sn4609 unicos CRAY_C90-unicos sn6521 unicosmk t3e-unicosmk sn9617 unicos CRAY_J90-unicos - sn9716 unicos CRAY_J90-unicos - SunOS solaris sun4-solaris - SunOS solaris i86pc-solaris - SunOS4 sunos sun4-sunos + SunOS solaris sun4-solaris + SunOS solaris i86pc-solaris + SunOS4 sunos sun4-sunos Note that because the C<$Config{'archname'}> may depend on the hardware architecture it may vary quite a lot, much more than the C<$^O>. @@ -1603,6 +1614,8 @@ Not useful. (S) =over 4 +=item v1.42, 22 May 1999 +Added notes about tests, sprintf/printf, and epoch offsets. =item v1.41, 19 May 1999 Lots more little changes to formatting and content. @@ -1675,6 +1688,7 @@ Nick Ing-Simmons Enick@ni-s.u-net.comE, Andreas J. KEnig Ekoenig@kulturbox.deE, Markus Laker Emlaker@contax.co.ukE, Andrew M. Langmead Eaml@world.std.comE, +Larry Moore Eljmoore@freespace.netE, Paul Moore EPaul.Moore@uk.origin-it.comE, Chris Nandor Epudge@pobox.comE, Matthias Neeracher Eneeri@iis.ee.ethz.chE, @@ -1693,4 +1707,4 @@ Epudge@pobox.comE. =head1 VERSION -Version 1.41, last modified 19 May 1999 +Version 1.42, last modified 22 May 1999 diff --git a/pod/perlre.pod b/pod/perlre.pod index 95d4734..98d7b35 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -6,13 +6,13 @@ perlre - Perl regular expressions This page describes the syntax of regular expressions in Perl. For a description of how to I regular expressions in matching -operations, plus various examples of the same, see discussion +operations, plus various examples of the same, see discussions of C, C, C and C in L. -The matching operations can have various modifiers. The modifiers +Matching operations can have various modifiers. Modifiers that relate to the interpretation of the regular expression inside -are listed below. For the modifiers that alter the way a regular expression -is used by Perl, see L and +are listed below. Modifiers that alter the way a regular expression +is used by Perl are detailed in L and L. =over 4 @@ -33,14 +33,15 @@ line anywhere within the string. =item s Treat string as single line. That is, change "." to match any character -whatsoever, even a newline, which it normally would not match. +whatsoever, even a newline, which normally it would not match. -The C and C modifiers both override the C<$*> setting. That is, no matter -what C<$*> contains, C without C will force "^" to match only at the -beginning of the string and "$" to match only at the end (or just before a -newline at the end) of the string. Together, as /ms, they let the "." match -any character whatsoever, while yet allowing "^" and "$" to match, -respectively, just after and just before newlines within the string. +The C and C modifiers both override the C<$*> setting. That +is, no matter what C<$*> contains, C without C will force +"^" to match only at the beginning of the string and "$" to match +only at the end (or just before a newline at the end) of the string. +Together, as /ms, they let the "." match any character whatsoever, +while yet allowing "^" and "$" to match, respectively, just after +and just before newlines within the string. =item x @@ -70,11 +71,11 @@ in L. =head2 Regular Expressions -The patterns used in pattern matching are regular expressions such as -those supplied in the Version 8 regex routines. (In fact, the -routines are derived (distantly) from Henry Spencer's freely -redistributable reimplementation of the V8 routines.) -See L for details. +The patterns used in Perl pattern matching derive from supplied in +the Version 8 regex routines. (In fact, the routines are derived +(distantly) from Henry Spencer's freely redistributable reimplementation +of the V8 routines.) See L for +details. In particular the following metacharacters have their standard I-ish meanings: @@ -177,12 +178,13 @@ In addition, Perl defines the following: equivalent to C<(?:\PM\pM*)> \C Match a single C char (octet) even under utf8. -A C<\w> matches a single alphanumeric character, not a whole -word. To match a word you'd need to say C<\w+>. If C is in -effect, the list of alphabetic characters generated by C<\w> is taken -from the current locale. See L. You may use C<\w>, C<\W>, -C<\s>, C<\S>, C<\d>, and C<\D> within character classes (though not as -either end of a range). +A C<\w> matches a single alphanumeric character, not a whole word. +To match a word you'd need to say C<\w+>. If C is in +effect, the list of alphabetic characters generated by C<\w> is +taken from the current locale. See L. You may use +C<\w>, C<\W>, C<\s>, C<\S>, C<\d>, and C<\D> within character classes +(though not as either end of a range). See L for details +about C<\pP>, C<\PP>, and C<\X>. Perl defines the following zero-width assertions: @@ -193,41 +195,46 @@ Perl defines the following zero-width assertions: \z Match only at end of string \G Match only where previous m//g left off (works only with /g) -A word boundary (C<\b>) is defined as a spot between two characters that -has a C<\w> on one side of it and a C<\W> on the other side of it (in -either order), counting the imaginary characters off the beginning and -end of the string as matching a C<\W>. (Within character classes C<\b> -represents backspace rather than a word boundary.) The C<\A> and C<\Z> are -just like "^" and "$", except that they won't match multiple times when the -C modifier is used, while "^" and "$" will match at every internal line -boundary. To match the actual end of the string, not ignoring newline, -you can use C<\z>. The C<\G> assertion can be used to chain global -matches (using C), as described in -L. - -It is also useful when writing C-like scanners, when you have several -patterns that you want to match against consequent substrings of your -string, see the previous reference. -The actual location where C<\G> will match can also be influenced -by using C as an lvalue. See L. - -When the bracketing construct C<( ... )> is used, \EdigitE matches the -digit'th substring. Outside of the pattern, always use "$" instead of "\" -in front of the digit. (While the \EdigitE notation can on rare occasion work -outside the current pattern, this should not be relied upon. See the -WARNING below.) The scope of $EdigitE (and C<$`>, C<$&>, and C<$'>) -extends to the end of the enclosing BLOCK or eval string, or to the next -successful pattern match, whichever comes first. If you want to use -parentheses to delimit a subpattern (e.g., a set of alternatives) without -saving it as a subpattern, follow the ( with a ?:. +A word boundary (C<\b>) is defined as a spot between two characters +that has a C<\w> on one side of it and a C<\W> on the other side +of it (in either order), counting the imaginary characters off the +beginning and end of the string as matching a C<\W>. (Within +character classes C<\b> represents backspace rather than a word +boundary, just as it normally does in any double-quoted string.) +The C<\A> and C<\Z> are just like "^" and "$", except that they +won't match multiple times when the C modifier is used, while +"^" and "$" will match at every internal line boundary. To match +the actual end of the string and not ignore an optional trailing +newline, use C<\z>. + +The C<\G> assertion can be used to chain global matches (using +C), as described in L. +It is also useful when writing C-like scanners, when you have +several patterns that you want to match against consequent substrings +of your string, see the previous reference. The actual location +where C<\G> will match can also be influenced by using C as +an lvalue. See L. + +When the bracketing construct C<( ... )> is used to create a capture +buffer, \EdigitE matches the digit'th substring. Outside +of the pattern, always use "$" instead of "\" in front of the digit. +(While the \EdigitE notation can on rare occasion work +outside the current pattern, this should not be relied upon. See +the WARNING below.) The scope of $EdigitE (and C<$`>, +C<$&>, and C<$'>) extends to the end of the enclosing BLOCK or eval +string, or to the next successful pattern match, whichever comes +first. If you want to use parentheses to delimit a subpattern +(e.g., a set of alternatives) without saving it as a subpattern, +follow the ( with a ?:. You may have as many parentheses as you wish. If you have more -than 9 substrings, the variables $10, $11, ... refer to the -corresponding substring. Within the pattern, \10, \11, etc. refer back -to substrings if there have been at least that many left parentheses before -the backreference. Otherwise (for backward compatibility) \10 is the -same as \010, a backspace, and \11 the same as \011, a tab. And so -on. (\1 through \9 are always backreferences.) +than 9 captured substrings, the variables $10, $11, ... refer to +the corresponding substring. Within the pattern, \10, \11, etc. +refer back to substrings if there have been at least that many left +parentheses before the backreference. Otherwise (for backward +compatibility) \10 is the same as \010, a backspace, and \11 the +same as \011, a tab. And so on. (\1 through \9 are always +backreferences.) C<$+> returns whatever the last bracket match matched. C<$&> returns the entire matched string. (C<$0> used to return the same thing, but not any @@ -242,50 +249,88 @@ everything after the matched string. Examples: $seconds = $3; } -Once perl sees that you need one of C<$&>, C<$`> or C<$'> anywhere in +Once Perl sees that you need one of C<$&>, C<$`> or C<$'> anywhere in the program, it has to provide them on each and every pattern match. This can slow your program down. The same mechanism that handles these provides for the use of $1, $2, etc., so you pay the same price -for each pattern that contains capturing parentheses. But if you never +for each pattern that contains capturing parentheses. But if you never use $&, etc., in your script, then patterns I capturing -parentheses won't be penalized. So avoid $&, $', and $` if you can, +parentheses won't be penalized. So avoid $&, $', and $` if you can, but if you can't (and some algorithms really appreciate them), once you've used them once, use them at will, because you've already paid the price. As of 5.005, $& is not so costly as the other two. -Backslashed metacharacters in Perl are -alphanumeric, such as C<\b>, C<\w>, C<\n>. Unlike some other regular -expression languages, there are no backslashed symbols that aren't -alphanumeric. So anything that looks like \\, \(, \), \E, \E, -\{, or \} is always interpreted as a literal character, not a -metacharacter. This was once used in a common idiom to disable or -quote the special meanings of regular expression metacharacters in a -string that you want to use for a pattern. Simply quote all -non-alphanumeric characters: +Backslashed metacharacters in Perl are alphanumeric, such as C<\b>, +C<\w>, C<\n>. Unlike some other regular expression languages, there +are no backslashed symbols that aren't alphanumeric. So anything +that looks like \\, \(, \), \E, \E, \{, or \} is always +interpreted as a literal character, not a metacharacter. This was +once used in a common idiom to disable or quote the special meanings +of regular expression metacharacters in a string that you want to +use for a pattern. Simply quote all non-alphanumeric characters: $pattern =~ s/(\W)/\\$1/g; -Now it is much more common to see either the quotemeta() function or -the C<\Q> escape sequence used to disable all metacharacters' special -meanings like this: +In modern days, it is more common to see either the quotemeta() +function or the C<\Q> metaquoting escape sequence used to disable +all metacharacters' special meanings like this: /$unquoted\Q$quoted\E$unquoted/ -Perl defines a consistent extension syntax for regular expressions. -The syntax is a pair of parentheses with a question mark as the first -thing within the parentheses (this was a syntax error in older -versions of Perl). The character after the question mark gives the -function of the extension. Several extensions are already supported: +=head2 Extended Patterns + +For those situations where simple regular expression patterns are +not enough, Perl defines a consistent extension syntax for venturing +beyond simple patterns such as are found in standard tools like +B and B. That syntax is a pair of parentheses with a +question mark as the first thing within the parentheses (this was +a syntax error in older versions of Perl). The character after the +question mark gives the function of the extension. + +Many extensions are already supported, some for almost five years +now. Other, more exotic forms are very new, and should be considered +highly experimental, and are so marked. + +A question mark was chosen for this and for the new minimal-matching +construct because 1) question mark is pretty rare in older regular +expressions, and 2) whenever you see one, you should stop and "question" +exactly what is going on. That's psychology... =over 10 =item C<(?#text)> -A comment. The text is ignored. If the C switch is used to enable -whitespace formatting, a simple C<#> will suffice. Note that perl closes +A comment. The text is ignored. If the C modifier is used to enable +whitespace formatting, a simple C<#> will suffice. Note that Perl closes the comment as soon as it sees a C<)>, so there is no way to put a literal C<)> in the comment. +=item C<(?imsx-imsx)> + +One or more embedded pattern-match modifiers. This is particularly +useful for dynamic patterns, such as those read in from a configuration +file, read in as an argument, are specified in a table somewhere, +etc. Consider the case that some of which want to be case sensitive +and some do not. The case insensitive ones need to include merely +C<(?i)> at the front of the pattern. For example: + + $pattern = "foobar"; + if ( /$pattern/i ) { } + + # more flexible: + + $pattern = "(?i)foobar"; + if ( /$pattern/ ) { } + +Letters after a C<-> turn those modifiers off. These modifiers are +localized inside an enclosing group (if any). For example, + + ( (?i) blah ) \s+ \1 + +will match a repeated (I!) word C in any +case, assuming C modifier, and no C modifier outside of this +group. + =item C<(?:pattern)> =item C<(?imsx-imsx:pattern)> @@ -299,10 +344,11 @@ is like @fields = split(/\b(a|b|c)\b/) -but doesn't spit out extra fields. +but doesn't spit out extra fields. It's also cheaper not to capture +characters if you don't need to. -The letters between C and C<:> act as flags modifiers, see -L>. In particular, +Any letters between C and C<:> act as flags modifiers as with +C<(?imsx-imsx)>. For example, /(?s-i:more.*than).*million/i @@ -312,15 +358,15 @@ is equivalent to more verbose =item C<(?=pattern)> -A zero-width positive lookahead assertion. For example, C +A zero-width positive look-ahead assertion. For example, C matches a word followed by a tab, without including the tab in C<$&>. =item C<(?!pattern)> -A zero-width negative lookahead assertion. For example C +A zero-width negative look-ahead assertion. For example C matches any occurrence of "foo" that isn't followed by "bar". Note -however that lookahead and lookbehind are NOT the same thing. You cannot -use this for lookbehind. +however that look-ahead and look-behind are NOT the same thing. You cannot +use this for look-behind. If you are looking for a "bar" that isn't preceded by a "foo", C will not do what you want. That's because the C<(?!foo)> is just saying that @@ -332,29 +378,32 @@ Sometimes it's still easier just to say: if (/bar/ && $` !~ /foo$/) -For lookbehind see below. +For look-behind see below. =item C<(?E=pattern)> -A zero-width positive lookbehind assertion. For example, C=\t)\w+/> -matches a word following a tab, without including the tab in C<$&>. -Works only for fixed-width lookbehind. +A zero-width positive look-behind assertion. For example, C=\t)\w+/> +matches a word that follows a tab, without including the tab in C<$&>. +Works only for fixed-width look-behind. =item C<(? -A zero-width negative lookbehind assertion. For example C -matches any occurrence of "foo" that isn't following "bar". -Works only for fixed-width lookbehind. +A zero-width negative look-behind assertion. For example C +matches any occurrence of "foo" that does not follow "bar". Works +only for fixed-width look-behind. =item C<(?{ code })> -Experimental "evaluate any Perl code" zero-width assertion. Always -succeeds. C is not interpolated. Currently the rules to -determine where the C ends are somewhat convoluted. +B: This extended regular expression feature is considered +highly experimental, and may be changed or deleted without notice. -The C is properly scoped in the following sense: if the assertion -is backtracked (compare L<"Backtracking">), all the changes introduced after -Cisation are undone, so +This zero-width assertion evaluate any embedded Perl code. It +always succeeds, and its C is not interpolated. Currently, +the rules to determine where the C ends are somewhat convoluted. + +The C is properly scoped in the following sense: If the assertion +is backtracked (compare L<"Backtracking">), all changes introduced after +Cization are undone, so that $_ = 'a' x 8; m< @@ -370,51 +419,55 @@ Cisation are undone, so # location. >x; -will set C<$res = 4>. Note that after the match $cnt returns to the globally -introduced value 0, since the scopes which restrict C statements +will set C<$res = 4>. Note that after the match, $cnt returns to the globally +introduced value, since the scopes which restrict C operators are unwound. -This assertion may be used as L> -switch. If I used in this way, the result of evaluation of C -is put into variable $^R. This happens immediately, so $^R can be used from -other C<(?{ code })> assertions inside the same regular expression. +This assertion may be used as a C<(?(condition)yes-pattern|no-pattern)> +switch. If I used in this way, the result of evaluation of +C is put into the special variable C<$^R>. This happens +immediately, so C<$^R> can be used from other C<(?{ code })> assertions +inside the same regular expression. -The above assignment to $^R is properly localized, thus the old value of $^R -is restored if the assertion is backtracked (compare L<"Backtracking">). +The assignment to C<$^R> above is properly localized, so the old +value of C<$^R> is restored if the assertion is backtracked; compare +L<"Backtracking">. -Due to security concerns, this construction is not allowed if the regular -expression involves run-time interpolation of variables, unless -C pragma is used (see L), or the variables contain -results of qr() operator (see L). +For reasons of security, this construct is forbidden if the regular +expression involves run-time interpolation of variables, unless the +perilous C pragma has been used (see L), or the +variables contain results of C operator (see +L). -This restriction is due to the wide-spread (questionable) practice of -using the construct +This restriction is due to the wide-spread and remarkably convenient +custom of using run-time determined strings as patterns. For example: $re = <>; chomp $re; $string =~ /$re/; -without tainting. While this code is frowned upon from security point -of view, when C<(?{})> was introduced, it was considered bad to add -I security holes to existing scripts. - -B Use of the above insecure snippet without also enabling taint mode -is to be severely frowned upon. C does not disable tainting -checks, thus to allow $re in the above snippet to contain C<(?{})> -I, one needs both C and untaint -the $re. +Prior to the execution of code in a pattern, this was completely +safe from a security point of view, although it could of course +raise an exception from an illegal pattern. If you turn on the +C, though, it is no longer secure, so you should +only do so if you are also using taint checking. Better yet, use +the carefully constrained evaluation within a Safe module. See +L for details about both these mechanisms. =item C<(?p{ code })> -I "postponed" regular subexpression. C is evaluated -at runtime, at the moment this subexpression may match. The result of -evaluation is considered as a regular expression, and matched as if it -were inserted instead of this construct. +B: This extended regular expression feature is considered +highly experimental, and may be changed or deleted without notice. -C is not interpolated. Currently the rules to -determine where the C ends are somewhat convoluted. +This is a "postponed" regular subexpression. The C is evaluated +at run time, at the moment this subexpression may match. The result +of evaluation is considered as a regular expression and matched as +if it were inserted instead of this construct. -The following regular expression matches matching parenthesized group: +C is not interpolated. As before, the rules to determine +where the C ends are currently somewhat convoluted. + +The following pattern matches a parenthesized group: $re = qr{ \( @@ -428,31 +481,33 @@ The following regular expression matches matching parenthesized group: =item C<(?Epattern)> -An "independent" subexpression. Matches the substring that a -I C would match if anchored at the given position, -B. - -Say, C<^(?Ea*)ab> will never match, since C<(?Ea*)> (anchored -at the beginning of string, as above) will match I characters -C at the beginning of string, leaving no C for C to match. -In contrast, C will match the same as C, since the match of -the subgroup C is influenced by the following group C (see -L<"Backtracking">). In particular, C inside C will match -fewer characters than a standalone C, since this makes the tail match. - -An effect similar to C<(?Epattern)> may be achieved by - - (?=(pattern))\1 - -since the lookahead is in I<"logical"> context, thus matches the same -substring as a standalone C. The following C<\1> eats the matched -string, thus making a zero-length assertion into an analogue of -C<(?E...)>. (The difference between these two constructs is that the -second one uses a catching group, thus shifting ordinals of -backreferences in the rest of a regular expression.) - -This construct is useful for optimizations of "eternal" -matches, because it will not backtrack (see L<"Backtracking">). +B: This extended regular expression feature is considered +highly experimental, and may be changed or deleted without notice. + +An "independent" subexpression, one which matches the substring +that a I C would match if anchored at the given +position -- but it matches no more than this substring. This +construct is useful for optimizations of what would otherwise be +"eternal" matches, because it will not backtrack (see L<"Backtracking">). + +For example: C<^(?Ea*)ab> will never match, since C<(?Ea*)> +(anchored at the beginning of string, as above) will match I +characters C at the beginning of string, leaving no C for +C to match. In contrast, C will match the same as C, +since the match of the subgroup C is influenced by the following +group C (see L<"Backtracking">). In particular, C inside +C will match fewer characters than a standalone C, since +this makes the tail match. + +An effect similar to C<(?Epattern)> may be achieved by writing +C<(?=(pattern))\1>. This matches the same substring as a standalone +C, and the following C<\1> eats the matched string; it therefore +makes a zero-length assertion into an analogue of C<(?E...)>. +(The difference between these two constructs is that the second one +uses a capturing group, thus shifting ordinals of backreferences +in the rest of a regular expression.) + +Consider this pattern: m{ \( ( @@ -463,17 +518,16 @@ matches, because it will not backtrack (see L<"Backtracking">). \) }x -That will efficiently match a nonempty group with matching -two-or-less-level-deep parentheses. However, if there is no such group, -it will take virtually forever on a long string. That's because there are -so many different ways to split a long string into several substrings. -This is what C<(.+)+> is doing, and C<(.+)+> is similar to a subpattern -of the above pattern. Consider that the above pattern detects no-match -on C<((()aaaaaaaaaaaaaaaaaa> in several seconds, but that each extra -letter doubles this time. This exponential performance will make it -appear that your program has hung. - -However, a tiny modification of this pattern +That will efficiently match a nonempty group with matching parentheses +two levels deep or less. However, if there is no such group, it +will take virtually forever on a long string. That's because there +are so many different ways to split a long string into several +substrings. This is what C<(.+)+> is doing, and C<(.+)+> is similar +to a subpattern of the above pattern. Consider how the pattern +above detects no-match on C<((()aaaaaaaaaaaaaaaaaa> in several +seconds, but that each extra letter doubles this time. This +exponential performance will make it appear that your program has +hung. However, a tiny modification of this pattern m{ \( ( @@ -491,18 +545,21 @@ however, that this pattern currently triggers a warning message under B<-w> saying it C<"matches the null string many times">): On simple groups, such as the pattern C<(?E [^()]+ )>, a comparable -effect may be achieved by negative lookahead, as in C<[^()]+ (?! [^()] )>. +effect may be achieved by negative look-ahead, as in C<[^()]+ (?! [^()] )>. This was only 4 times slower on a string with 1000000 Cs. =item C<(?(condition)yes-pattern|no-pattern)> =item C<(?(condition)yes-pattern)> +B: This extended regular expression feature is considered +highly experimental, and may be changed or deleted without notice. + Conditional expression. C<(condition)> should be either an integer in parentheses (which is valid if the corresponding pair of parentheses -matched), or lookahead/lookbehind/evaluate zero-width assertion. +matched), or look-ahead/look-behind/evaluate zero-width assertion. -Say, +For example: m{ ( \( )? [^()]+ @@ -512,39 +569,8 @@ Say, matches a chunk of non-parentheses, possibly included in parentheses themselves. -=item C<(?imsx-imsx)> - -One or more embedded pattern-match modifiers. This is particularly -useful for patterns that are specified in a table somewhere, some of -which want to be case sensitive, and some of which don't. The case -insensitive ones need to include merely C<(?i)> at the front of the -pattern. For example: - - $pattern = "foobar"; - if ( /$pattern/i ) { } - - # more flexible: - - $pattern = "(?i)foobar"; - if ( /$pattern/ ) { } - -Letters after C<-> switch modifiers off. - -These modifiers are localized inside an enclosing group (if any). Say, - - ( (?i) blah ) \s+ \1 - -(assuming C modifier, and no C modifier outside of this group) -will match a repeated (I!) word C in any -case. - =back -A question mark was chosen for this and for the new minimal-matching -construct because 1) question mark is pretty rare in older regular -expressions, and 2) whenever you see one, you should stop and "question" -exactly what is going on. That's psychology... - =head2 Backtracking A fundamental feature of regular expression matching involves the @@ -652,7 +678,7 @@ definition might succeed against a particular string. And if there are multiple ways it might succeed, you need to understand backtracking to know which variety of success you will achieve. -When using lookahead assertions and negations, this can all get even +When using look-ahead assertions and negations, this can all get even tricker. Imagine you'd like to find a sequence of non-digits not followed by "123". You might try to write that as @@ -702,7 +728,7 @@ time. Now there's indeed something following "AB" that is not We can deal with this by using both an assertion and a negation. We'll say that the first part in $1 must be followed by a digit, and in fact, it must also be followed by something that's not "123". Remember that the -lookaheads are zero-width expressions--they only look, but don't consume +look-aheads are zero-width expressions--they only look, but don't consume any of the string in their match. So rewriting this way produces what you'd expect; that is, case 5 will fail, but case 6 succeeds: @@ -712,7 +738,7 @@ you'd expect; that is, case 5 will fail, but case 6 succeeds: 6: got ABC In other words, the two zero-width assertions next to each other work as though -they're ANDed together, just as you'd use any builtin assertions: C +they're ANDed together, just as you'd use any built-in assertions: C matches only if you're at the beginning of the line AND the end of the line simultaneously. The deeper underlying truth is that juxtaposition in regular expressions always means AND, except when you write an explicit OR @@ -720,10 +746,10 @@ using the vertical bar. C means match "a" AND (then) match "b", although the attempted matches are made at different positions because "a" is not a zero-width assertion, but a one-width assertion. -One warning: particularly complicated regular expressions can take -exponential time to solve due to the immense number of possible ways they -can use backtracking to try match. For example this will take a very long -time to run +B: particularly complicated regular expressions can take +exponential time to solve due to the immense number of possible +ways they can use backtracking to try match. For example, this will +take a very long time to run /((a{0,5}){0,5}){0,5}/ @@ -732,10 +758,10 @@ it would take literally forever--or until you ran out of stack space. A powerful tool for optimizing such beasts is "independent" groups, which do not backtrace (see Lpattern)>>). Note also that -zero-length lookahead/lookbehind assertions will not backtrace to make +zero-length look-ahead/look-behind assertions will not backtrace to make the tail match, since they are in "logical" context: only the fact whether they match or not is considered relevant. For an example -where side-effects of a lookahead I have influenced the +where side-effects of a look-ahead I have influenced the following match, see Lpattern)>>. =head2 Version 8 Regular Expressions @@ -810,7 +836,7 @@ match "0x1234 0x4321", but not "0x1234 01234", because subpattern 1 actually matched "0x", even though the rule C<0|0x> could potentially match the leading 0 in the second number. -=head2 WARNING on \1 vs $1 +=head2 Warning on \1 vs $1 Some people get too used to writing things like: @@ -837,7 +863,7 @@ different things on the I side of the C. =head2 Repeated patterns matching zero-length substring -WARNING: Difficult material (and prose) ahead. This section needs a rewrite. +B: Difficult material (and prose) ahead. This section needs a rewrite. Regular expressions provide a terse and powerful programming language. As with most other power tools, power comes together with the ability @@ -873,8 +899,9 @@ the infinite loop>. The rules for this are different for lower-level loops given by the greedy modifiers C<*+{}>, and for higher-level ones like the C modifier or split() operator. -The lower-level loops are I when it is detected that a -repeated expression did match a zero-length substring, thus +The lower-level loops are I (that is, the loop is +broken) when Perl detects that a repeated expression matched a +zero-length substring. Thus m{ (?: NON_ZERO_LENGTH | ZERO_LENGTH )* }x; @@ -892,7 +919,7 @@ This prohibition interacts with backtracking (see L<"Backtracking">), and so the I match is chosen if the I match is of zero length. -Say, +For example: $_ = 'bar'; s/\w??/<$&>/g; @@ -905,7 +932,7 @@ alternate with one-character-long matches. Similarly, for repeated C the second-best match is the match at the position one notch further in the string. -The additional state of being I is associated to +The additional state of being I is associated with the matched string, and is reset by each assignment to pos(). =head2 Creating custom RE engines @@ -955,7 +982,12 @@ part of this regular expression needs to be converted explicitly $re = customre::convert $re; /\Y|$re\Y|/; -=head2 SEE ALSO +=head1 BUGS + +This manpage is varies from difficult to understand to completely +and utterly opaque. + +=head1 SEE ALSO L. @@ -965,4 +997,4 @@ L. L. -I (see L) by Jeffrey Friedl. +I by Jeffrey Friedl. diff --git a/pod/perlref.pod b/pod/perlref.pod index 596ff72..6ec6055 100644 --- a/pod/perlref.pod +++ b/pod/perlref.pod @@ -57,7 +57,7 @@ References can be created in several ways. By using the backslash operator on a variable, subroutine, or value. (This works much like the & (address-of) operator in C.) Note -that this typically creates I reference to a variable, because +that this typically creates I reference to a variable, because there's already a reference to the variable in the symbol table. But the symbol table reference might go away, and you'll still have the reference that the backslash returned. Here are some examples: @@ -150,7 +150,7 @@ Note the presence of the semicolon. Except for the fact that the code inside isn't executed immediately, a C is not so much a declaration as it is an operator, like C or C. (However, no matter how many times you execute that particular line (unless you're in an -C), C<$coderef> will still have a reference to the I +C), $coderef will still have a reference to the I anonymous subroutine.) Anonymous subroutines act as closures with respect to my() variables, @@ -299,9 +299,9 @@ a simple scalar variable containing a reference of the correct type: &$coderef(1,2,3); print $globref "output\n"; -It's important to understand that we are specifically I dereferencing +It's important to understand that we are specifically I dereferencing C<$arrayref[0]> or C<$hashref{"KEY"}> there. The dereference of the -scalar variable happens I it does any key lookups. Anything more +scalar variable happens I it does any key lookups. Anything more complicated than a simple scalar variable must use methods 2 or 3 below. However, a "simple scalar" includes an identifier that itself uses method 1 recursively. Therefore, the following prints "howdy". @@ -334,7 +334,7 @@ people often make the mistake of viewing the dereferencing symbols as proper operators, and wonder about their precedence. If they were, though, you could use parentheses instead of braces. That's not the case. Consider the difference below; case 0 is a short-hand version of case 1, -I case 2: +I case 2: $$hashref{"KEY"} = "VALUE"; # CASE 0 ${$hashref}{"KEY"} = "VALUE"; # CASE 1 @@ -356,7 +356,7 @@ syntactic sugar, the examples for method 2 may be written: $coderef->(1,2,3); # Subroutine call The left side of the arrow can be any expression returning a reference, -including a previous dereference. Note that C<$array[$x]> is I the +including a previous dereference. Note that C<$array[$x]> is I the same thing as C<$array-E[$x]> here: $array[$x]->{"foo"}->[0] = "January"; @@ -369,7 +369,7 @@ C<{"foo"}> in it. Likewise C<$array[$x]-E{"foo"}> will automatically get defined with an array reference so that we can look up C<[0]> in it. This process is called I. -One more thing here. The arrow is optional I brackets +One more thing here. The arrow is optional I brackets subscripts, so you can shrink the above down to $array[$x]{"foo"}[0] = "January"; @@ -421,9 +421,9 @@ chicanery is also useful for arbitrary expressions: We said that references spring into existence as necessary if they are undefined, but we didn't say what happens if a value used as a -reference is already defined, but I a hard reference. If you +reference is already defined, but I a hard reference. If you use it as a reference in this case, it'll be treated as a symbolic -reference. That is, the value of the scalar is taken to be the I +reference. That is, the value of the scalar is taken to be the I of a variable, rather than a direct link to a (possibly) anonymous value. diff --git a/pod/perlrun.pod b/pod/perlrun.pod index 7cb9aed..c71b9f3 100644 --- a/pod/perlrun.pod +++ b/pod/perlrun.pod @@ -17,7 +17,11 @@ B S<[ B<-sTuU> ]> =head1 DESCRIPTION -Upon startup, Perl looks for your script in one of the following +The normal way to run a Perl program is by making it directly +executable, or else by passing the name of the source file as an +argument on the command line. (An interactive Perl environment +is also possible--see L for details on how to do that.) +Upon startup, Perl looks for your program in one of the following places: =over 4 @@ -35,61 +39,71 @@ way. See L.) =item 3. Passed in implicitly via standard input. This works only if there are -no filename arguments--to pass arguments to a STDIN script you -must explicitly specify a "-" for the script name. +no filename arguments--to pass arguments to a STDIN-read program you +must explicitly specify a "-" for the program name. =back With methods 2 and 3, Perl starts parsing the input file from the beginning, unless you've specified a B<-x> switch, in which case it scans for the first line starting with #! and containing the word -"perl", and starts there instead. This is useful for running a script +"perl", and starts there instead. This is useful for running a program embedded in a larger message. (In this case you would indicate the end -of the script using the C<__END__> token.) +of the program using the C<__END__> token.) The #! line is always examined for switches as the line is being parsed. Thus, if you're on a machine that allows only one argument with the #! line, or worse, doesn't even recognize the #! line, you still can get consistent switch behavior regardless of how Perl was -invoked, even if B<-x> was used to find the beginning of the script. - -Because many operating systems silently chop off kernel interpretation of -the #! line after 32 characters, some switches may be passed in on the -command line, and some may not; you could even get a "-" without its -letter, if you're not careful. You probably want to make sure that all -your switches fall either before or after that 32 character boundary. -Most switches don't actually care if they're processed redundantly, but -getting a - instead of a complete switch could cause Perl to try to -execute standard input instead of your script. And a partial B<-I> switch +invoked, even if B<-x> was used to find the beginning of the program. + +Because historically some operating systems silently chopped off +kernel interpretation of the #! line after 32 characters, some +switches may be passed in on the command line, and some may not; +you could even get a "-" without its letter, if you're not careful. +You probably want to make sure that all your switches fall either +before or after that 32-character boundary. Most switches don't +actually care if they're processed redundantly, but getting a "-" +instead of a complete switch could cause Perl to try to execute +standard input instead of your program. And a partial B<-I> switch could also cause odd results. -Some switches do care if they are processed twice, for instance combinations -of B<-l> and B<-0>. Either put all the switches after the 32 character -boundary (if applicable), or replace the use of B<-0>I by -C. +Some switches do care if they are processed twice, for instance +combinations of B<-l> and B<-0>. Either put all the switches after +the 32-character boundary (if applicable), or replace the use of +B<-0>I by C. Parsing of the #! switches starts wherever "perl" is mentioned in the line. The sequences "-*" and "- " are specifically ignored so that you could, if you were so inclined, say #!/bin/sh -- # -*- perl -*- -p - eval 'exec /usr/bin/perl -wS $0 ${1+"$@"}' + eval 'exec perl -wS $0 ${1+"$@"}' if $running_under_some_shell; -to let Perl see the B<-p> switch. +to let Perl see the B<-p> switch. + +A similar trick involves the B program, if you have it. + + #!/usr/bin/env perl + +The examples above use a relative path to the perl interpreter, +getting whatever version is first in the user's path. If you want +a specific version of Perl, say, perl5.005_57, you should place +that directly in the #! line's path. If the #! line does not contain the word "perl", the program named after the #! is executed instead of the Perl interpreter. This is slightly bizarre, but it helps people on machines that don't do #!, because they -can tell a program that their SHELL is /usr/bin/perl, and Perl will then +can tell a program that their SHELL is F, and Perl will then dispatch the program to the correct interpreter for them. -After locating your script, Perl compiles the entire script to an +After locating your program, Perl compiles the entire program to an internal form. If there are any compilation errors, execution of the -script is not attempted. (This is unlike the typical shell script, +program is not attempted. (This is unlike the typical shell script, which might run part-way through before finding a syntax error.) -If the script is syntactically correct, it is executed. If the script +If the program is syntactically correct, it is executed. If the program runs off the end without hitting an exit() or die() operator, an implicit C is provided to indicate successful completion. @@ -105,12 +119,12 @@ Put extproc perl -S -your_switches -as the first line in C<*.cmd> file (C<-S> due to a bug in cmd.exe's +as the first line in C<*.cmd> file (B<-S> due to a bug in cmd.exe's `extproc' handling). =item MS-DOS -Create a batch file to run your script, and codify it in +Create a batch file to run your program, and codify it in C (see the F file in the source distribution for more information). @@ -126,7 +140,7 @@ and a Perl library file. =item Macintosh -Macintosh perl scripts will have the appropriate Creator and +A Macintosh perl program will have the appropriate Creator and Type, so that double-clicking them will invoke the perl application. =item VMS @@ -136,10 +150,10 @@ Put $ perl -mysw 'f$env("procedure")' 'p1' 'p2' 'p3' 'p4' 'p5' 'p6' 'p7' 'p8' ! $ exit++ + ++$status != 0 and $exit = $status = undef; -at the top of your script, where C<-mysw> are any command line switches you -want to pass to Perl. You can now invoke the script directly, by saying -C, or as a DCL procedure, by saying C<@script> (or implicitly -via F by just using the name of the script). +at the top of your program, where B<-mysw> are any command line switches you +want to pass to Perl. You can now invoke the program directly, by saying +C, or as a DCL procedure, by saying C<@program> (or implicitly +via F by just using the name of the program). This incantation is a bit much to remember, but Perl will display it for you if you say C. @@ -150,10 +164,10 @@ Command-interpreters on non-Unix systems have rather different ideas on quoting than Unix shells. You'll need to learn the special characters in your command-interpreter (C<*>, C<\> and C<"> are common) and how to protect whitespace and these characters to run -one-liners (see C<-e> below). +one-liners (see B<-e> below). On some systems, you may have to change single-quotes to double ones, -which you must I do on Unix or Plan9 systems. You might also +which you must I do on Unix or Plan9 systems. You might also have to change a single % to a %%. For example: @@ -171,13 +185,13 @@ For example: # VMS perl -e "print ""Hello world\n""" -The problem is that none of this is reliable: it depends on the command -and it is entirely possible neither works. If 4DOS was the command shell, this would -probably work better: +The problem is that none of this is reliable: it depends on the +command and it is entirely possible neither works. If B<4DOS> were +the command shell, this would probably work better: perl -e "print "Hello world\n"" -CMD.EXE in Windows NT slipped a lot of standard Unix functionality in +B in Windows NT slipped a lot of standard Unix functionality in when nobody was looking, but just try to find documentation for its quoting rules. @@ -191,22 +205,30 @@ There is no general solution to all of this. It's just a mess. =head2 Location of Perl It may seem obvious to say, but Perl is useful only when users can -easily find it. When possible, it's good for both B and -B to be symlinks to the actual binary. If that -can't be done, system administrators are strongly encouraged to put -(symlinks to) perl and its accompanying utilities, such as perldoc, into -a directory typically found along a user's PATH, or in another obvious -and convenient place. +easily find it. When possible, it's good for both F +and F to be symlinks to the actual binary. If +that can't be done, system administrators are strongly encouraged +to put (symlinks to) perl and its accompanying utilities into a +directory typically found along a user's PATH, or in some other +obvious and convenient place. + +In this documentation, C<#!/usr/bin/perl> on the first line of the program +will stand in for whatever method works on your system. You are +advised to use a specific path if you care about a specific version. -In this documentation, C<#!/usr/bin/perl> on the first line of the script -will stand in for whatever method works on your system. + #!/usr/local/bin/perl5.00554 -=head2 Switches +or if you just want to be running at least version, place a statement +like this at the top of your program: -A single-character switch may be combined with the following switch, if -any. + use 5.005_54; - #!/usr/bin/perl -spi.bak # same as -s -p -i.bak +=head2 Command Switches + +As with all standard commands, a single-character switch may be +clustered with the following switch, if any. + + #!/usr/bin/perl -spi.orig # same as -s -p -i.orig Switches include: @@ -220,7 +242,7 @@ precede or follow the digits. For example, if you have a version of B which can print filenames terminated by the null character, you can say this: - find . -name '*.bak' -print0 | perl -n0e unlink + find . -name '*.orig' -print0 | perl -n0e unlink The special value 00 will cause Perl to slurp files in paragraph mode. The value 0777 will cause Perl to slurp files whole because there is no @@ -245,26 +267,26 @@ An alternate delimiter may be specified using B<-F>. =item B<-c> -causes Perl to check the syntax of the script and then exit without +causes Perl to check the syntax of the program and then exit without executing it. Actually, it I execute C, C, and C blocks, because these are considered as occurring outside the execution of -your program. +your program. C blocks, however, will be skipped. =item B<-d> -runs the script under the Perl debugger. See L. +runs the program under the Perl debugger. See L. =item B<-d:>I -runs the script under the control of a debugging or tracing module -installed as Devel::foo. E.g., B<-d:DProf> executes the script using the -Devel::DProf profiler. See L. +runs the program under the control of a debugging, profiling, or +tracing module installed as Devel::foo. E.g., B<-d:DProf> executes +the program using the Devel::DProf profiler. See L. =item B<-D>I =item B<-D>I -sets debugging flags. To watch how it executes your script, use +sets debugging flags. To watch how it executes your program, use B<-Dtls>. (This works only if debugging is compiled into your Perl.) Another nice value is B<-Dx>, which lists your compiled syntax tree. And B<-Dr> displays compiled regular expressions. As an @@ -283,24 +305,35 @@ equivalent to B<-Dtls>): 512 r Regular expression parsing and execution 1024 x Syntax tree dump 2048 u Tainting checks - 4096 L Memory leaks (needs C<-DLEAKTEST> when compiling Perl) + 4096 L Memory leaks (needs -DLEAKTEST when compiling Perl) 8192 H Hash dump -- usurps values() 16384 X Scratchpad allocation 32768 D Cleaning up 65536 S Thread synchronization -All these flags require C<-DDEBUGGING> when you compile the Perl -executable. This flag is automatically set if you include C<-g> +All these flags require B<-DDEBUGGING> when you compile the Perl +executable. See the F file in the Perl source distribution +for how to do this. This flag is automatically set if you include B<-g> option when C asks you about optimizer/debugger flags. +If you're just trying to get a print out of each line of Perl code +as it executes, the way that C provides for shell scripts, +you can't use Perl's B<-D> switch. Instead do this + + # Bourne shell syntax + $ PERLDB_OPTS="NonStop=1 AutoTrace=1 frame=2" perl -dS program + + # csh syntax + % (setenv PERLDB_OPTS "NonStop=1 AutoTrace=1 frame=2"; perl -dS program) + +See L for details and variations. + =item B<-e> I -may be used to enter one line of script. -If B<-e> is given, Perl -will not look for a script filename in the argument list. -Multiple B<-e> commands may -be given to build up a multi-line script. -Make sure to use semicolons where you would in a normal program. +may be used to enter one line of program. If B<-e> is given, Perl +will not look for a filename in the argument list. Multiple B<-e> +commands may be given to build up a multi-line script. Make sure +to use semicolons where you would in a normal program. =item B<-F>I @@ -324,47 +357,46 @@ rules: If no extension is supplied, no backup is made and the current file is overwritten. -If the extension doesn't contain a C<*> then it is appended to the end -of the current filename as a suffix. - -If the extension does contain one or more C<*> characters, then each C<*> -is replaced with the current filename. In perl terms you could think of -this as: +If the extension doesn't contain a C<*>, then it is appended to the +end of the current filename as a suffix. If the extension does +contain one or more C<*> characters, then each C<*> is replaced +with the current filename. In Perl terms, you could think of this +as: ($backup = $extension) =~ s/\*/$file_name/g; This allows you to add a prefix to the backup file, instead of (or in addition to) a suffix: - $ perl -pi'bak_*' -e 's/bar/baz/' fileA # backup to 'bak_fileA' + $ perl -pi 'orig_*' -e 's/bar/baz/' fileA # backup to 'orig_fileA' Or even to place backup copies of the original files into another directory (provided the directory already exists): - $ perl -pi'old/*.bak' -e 's/bar/baz/' fileA # backup to 'old/fileA.bak' + $ perl -pi 'old/*.orig' -e 's/bar/baz/' fileA # backup to 'old/fileA.orig' These sets of one-liners are equivalent: $ perl -pi -e 's/bar/baz/' fileA # overwrite current file - $ perl -pi'*' -e 's/bar/baz/' fileA # overwrite current file + $ perl -pi '*' -e 's/bar/baz/' fileA # overwrite current file - $ perl -pi'.bak' -e 's/bar/baz/' fileA # backup to 'fileA.bak' - $ perl -pi'*.bak' -e 's/bar/baz/' fileA # backup to 'fileA.bak' + $ perl -pi '.orig' -e 's/bar/baz/' fileA # backup to 'fileA.orig' + $ perl -pi '*.orig' -e 's/bar/baz/' fileA # backup to 'fileA.orig' From the shell, saying - $ perl -p -i.bak -e "s/foo/bar/; ... " + $ perl -p -i.orig -e "s/foo/bar/; ... " -is the same as using the script: +is the same as using the program: - #!/usr/bin/perl -pi.bak + #!/usr/bin/perl -pi.orig s/foo/bar/; which is equivalent to #!/usr/bin/perl - $extension = '.bak'; - while (<>) { + $extension = '.orig'; + LINE: while (<>) { if ($ARGV ne $oldargv) { if ($extension !~ /\*/) { $backup = $ARGV . $extension; @@ -392,9 +424,9 @@ output filehandle after the loop. As shown above, Perl creates the backup file whether or not any output is actually changed. So this is just a fancy way to copy files: - $ perl -p -i'/some/file/path/*' -e 1 file1 file2 file3... - or - $ perl -p -i'.bak' -e 1 file1 file2 file3... + $ perl -p -i '/some/file/path/*' -e 1 file1 file2 file3... +or + $ perl -p -i '.orig' -e 1 file1 file2 file3... You can use C without parentheses to locate the end of each input file, in case you want to append to each file, or reset line numbering @@ -404,15 +436,19 @@ If, for a given file, Perl is unable to create the backup file as specified in the extension then it will skip that file and continue on with the next one (if it exists). -For a discussion of issues surrounding file permissions and C<-i>, see -L. +For a discussion of issues surrounding file permissions and B<-i>, +see L. You cannot use B<-i> to create directories or to strip extensions from files. -Perl does not expand C<~>, so don't do that. +Perl does not expand C<~> in filenames, which is good, since some +folks use it for their backup files: -Finally, note that the B<-i> switch does not impede execution when no + $ perl -pi~ -e 's/foo/bar/' file1 file2 file3... + +Finally, the B<-i> switch does not impede execution when no files are given on the command line. In this case, no backup is made (the original file cannot, of course, be determined) and processing proceeds from STDIN to STDOUT as might be expected. @@ -426,13 +462,13 @@ searches /usr/include and /usr/lib/perl. =item B<-l>[I] -enables automatic line-ending processing. It has two effects: first, -it automatically chomps "C<$/>" (the input record separator) when used -with B<-n> or B<-p>, and second, it assigns "C<$\>" -(the output record separator) to have the value of I so that -any print statements will have that separator added back on. If -I is omitted, sets "C<$\>" to the current value of "C<$/>". For -instance, to trim lines to 80 columns: +enables automatic line-ending processing. It has two separate +effects. First, it automatically chomps C<$/> (the input record +separator) when used with B<-n> or B<-p>. Second, it assigns C<$\> +(the output record separator) to have the value of I so +that any print statements will have that separator added back on. +If I is omitted, sets C<$\> to the current value of +C<$/>. For instance, to trim lines to 80 columns: perl -lpe 'substr($_, 80) = ""' @@ -452,55 +488,59 @@ This sets C<$\> to newline and then sets C<$/> to the null character. =item B<-[mM]>[B<->]I -C<-m>I executes C I C<();> before executing your -script. +B<-m>I executes C I C<();> before executing your +program. -C<-M>I executes C I C<;> before executing your -script. You can use quotes to add extra code after the module name, -e.g., C<-M'module qw(foo bar)'>. +B<-M>I executes C I C<;> before executing your +program. You can use quotes to add extra code after the module name, +e.g., C<'-Mmodule qw(foo bar)'>. -If the first character after the C<-M> or C<-m> is a dash (C<->) +If the first character after the B<-M> or B<-m> is a dash (C<->) then the 'use' is replaced with 'no'. A little builtin syntactic sugar means you can also say -C<-mmodule=foo,bar> or C<-Mmodule=foo,bar> as a shortcut for -C<-M'module qw(foo bar)'>. This avoids the need to use quotes when -importing symbols. The actual code generated by C<-Mmodule=foo,bar> is +B<-mmodule=foo,bar> or B<-Mmodule=foo,bar> as a shortcut for +C<'-Mmodule qw(foo bar)'>. This avoids the need to use quotes when +importing symbols. The actual code generated by B<-Mmodule=foo,bar> is C. Note that the C<=> form -removes the distinction between C<-m> and C<-M>. +removes the distinction between B<-m> and B<-M>. =item B<-n> -causes Perl to assume the following loop around your script, which +causes Perl to assume the following loop around your program, which makes it iterate over filename arguments somewhat like B or B: + LINE: while (<>) { - ... # your script goes here + ... # your program goes here } Note that the lines are not printed by default. See B<-p> to have lines printed. If a file named by an argument cannot be opened for -some reason, Perl warns you about it, and moves on to the next file. +some reason, Perl warns you about it and moves on to the next file. Here is an efficient way to delete all files older than a week: - find . -mtime +7 -print | perl -nle 'unlink;' + find . -mtime +7 -print | perl -nle unlink -This is faster than using the C<-exec> switch of B because you don't -have to start a process on every filename found. +This is faster than using the B<-exec> switch of B because you don't +have to start a process on every filename found. It does suffer from +the bug of mishandling newlines in pathnames, which you can fix if +you C and C blocks may be used to capture control before or after -the implicit loop, just as in B. +the implicit program loop, just as in B. =item B<-p> -causes Perl to assume the following loop around your script, which +causes Perl to assume the following loop around your program, which makes it iterate over filename arguments somewhat like B: + LINE: while (<>) { - ... # your script goes here + ... # your program goes here } continue { print or die "-p destination: $!\n"; } @@ -512,30 +552,31 @@ treated as fatal. To suppress printing use the B<-n> switch. A B<-p> overrides a B<-n> switch. C and C blocks may be used to capture control before or after -the implicit loop, just as in awk. +the implicit loop, just as in B. =item B<-P> -causes your script to be run through the C preprocessor before -compilation by Perl. (Because both comments and cpp directives begin +causes your program to be run through the C preprocessor before +compilation by Perl. (Because both comments and B directives begin with the # character, you should avoid starting comments with any words recognized by the C preprocessor such as "if", "else", or "define".) =item B<-s> -enables some rudimentary switch parsing for switches on the command -line after the script name but before any filename arguments (or before +enables rudimentary switch parsing for switches on the command +line after the program name but before any filename arguments (or before a B<-->). Any switch found there is removed from @ARGV and sets the -corresponding variable in the Perl script. The following script -prints "true" if and only if the script is invoked with a B<-xyz> switch. +corresponding variable in the Perl program. The following program +prints "true" if and only if the program is invoked with a B<-xyz> switch. #!/usr/bin/perl -s - if ($xyz) { print "true\n"; } + if ($xyz) { print "true\n" } =item B<-S> makes Perl use the PATH environment variable to search for the -script (unless the name of the script contains directory separators). +program (unless the name of the program contains directory separators). + On some platforms, this also makes Perl append suffixes to the filename while searching for it. For example, on Win32 platforms, the ".bat" and ".cmd" suffixes are appended if a lookup for the @@ -543,16 +584,6 @@ original name fails, and if the name does not already end in one of those suffixes. If your Perl was compiled with DEBUGGING turned on, using the -Dp switch to Perl shows how the search progresses. -If the filename supplied contains directory separators (i.e. it is an -absolute or relative pathname), and if the file is not found, -platforms that append file extensions will do so and try to look -for the file with those extensions added, one by one. - -On DOS-like platforms, if the script does not contain directory -separators, it will first be searched for in the current directory -before being searched for on the PATH. On Unix platforms, the -script will be searched for strictly on the PATH. - Typically this is used to emulate #! startup on platforms that don't support #!. This example works on many platforms that have a shell compatible with Bourne shell: @@ -561,94 +592,121 @@ have a shell compatible with Bourne shell: eval 'exec /usr/bin/perl -wS $0 ${1+"$@"}' if $running_under_some_shell; -The system ignores the first line and feeds the script to /bin/sh, -which proceeds to try to execute the Perl script as a shell script. +The system ignores the first line and feeds the program to F, +which proceeds to try to execute the Perl program as a shell script. The shell executes the second line as a normal shell command, and thus starts up the Perl interpreter. On some systems $0 doesn't always contain the full pathname, so the B<-S> tells Perl to search for the -script if necessary. After Perl locates the script, it parses the +program if necessary. After Perl locates the program, it parses the lines and ignores them because the variable $running_under_some_shell -is never true. If the script will be interpreted by csh, you will need +is never true. If the program will be interpreted by csh, you will need to replace C<${1+"$@"}> with C<$*>, even though that doesn't understand embedded spaces (and such) in the argument list. To start up sh rather than csh, some systems may have to replace the #! line with a line containing just a colon, which will be politely ignored by Perl. Other systems can't control that, and need a totally devious construct that -will work under any of csh, sh, or Perl, such as the following: +will work under any of B, B, or Perl, such as the following: - eval '(exit $?0)' && eval 'exec /usr/bin/perl -wS $0 ${1+"$@"}' + eval '(exit $?0)' && eval 'exec perl -wS $0 ${1+"$@"}' & eval 'exec /usr/bin/perl -wS $0 $argv:q' if $running_under_some_shell; +If the filename supplied contains directory separators (i.e., is an +absolute or relative pathname), and if that file is not found, +platforms that append file extensions will do so and try to look +for the file with those extensions added, one by one. + +On DOS-like platforms, if the program does not contain directory +separators, it will first be searched for in the current directory +before being searched for on the PATH. On Unix platforms, the +program will be searched for strictly on the PATH. + =item B<-T> forces "taint" checks to be turned on so you can test them. Ordinarily -these checks are done only when running setuid or setgid. It's a good -idea to turn them on explicitly for programs run on another's behalf, -such as CGI programs. See L. Note that (for security reasons) -this option must be seen by Perl quite early; usually this means it must -appear early on the command line or in the #! line (for systems which -support that). +these checks are done only when running setuid or setgid. It's a +good idea to turn them on explicitly for programs that run on behalf +of someone else whom you might not necessarily trust, such as CGI +programs or any internet servers you might write in Perl. See +L for details. For security reasons, this option must be +seen by Perl quite early; usually this means it must appear early +on the command line or in the #! line for systems which support +that construct. =item B<-u> -causes Perl to dump core after compiling your script. You can then -in theory take this core dump and turn it into an executable file by using the -B program (not supplied). This speeds startup at the expense of -some disk space (which you can minimize by stripping the executable). -(Still, a "hello world" executable comes out to about 200K on my -machine.) If you want to execute a portion of your script before dumping, -use the dump() operator instead. Note: availability of B is -platform specific and may not be available for a specific port of -Perl. It has been superseded by the new perl-to-C compiler, which is more -portable, even though it's still only considered beta. +This obsolete switch causes Perl to dump core after compiling your +program. You can then in theory take this core dump and turn it +into an executable file by using the B program (not supplied). +This speeds startup at the expense of some disk space (which you +can minimize by stripping the executable). (Still, a "hello world" +executable comes out to about 200K on my machine.) If you want to +execute a portion of your program before dumping, use the dump() +operator instead. Note: availability of B is platform +specific and may not be available for a specific port of Perl. + +This switch has been superseded in favor of the new Perl code +generator backends to the compiler. See L and L +for details. =item B<-U> allows Perl to do unsafe operations. Currently the only "unsafe" operations are the unlinking of directories while running as superuser, and running setuid programs with fatal taint checks turned into -warnings. Note that the B<-w> switch (or the C<$^W> variable) must -be used along with this option to actually B the +warnings. Note that the B<-w> switch (or the C<$^W> variable) must +be used along with this option to actually I the taint-check warnings. =item B<-v> -prints the version and patchlevel of your Perl executable. +prints the version and patchlevel of your perl executable. =item B<-V> prints summary of the major perl configuration values and the current -value of @INC. +values of @INC. =item B<-V:>I Prints to STDOUT the value of the named configuration variable. +For example, -=item B<-w> + $ perl -V:man.dir + +will provide strong clues about what your MANPATH variable should +be set to in order to access the Perl documentation. -prints warnings about variable names that are mentioned only once, and -scalar variables that are used before being set. Also warns about -redefined subroutines, and references to undefined filehandles or -filehandles opened read-only that you are attempting to write on. Also -warns you if you use values as a number that doesn't look like numbers, -using an array as though it were a scalar, if your subroutines recurse -more than 100 deep, and innumerable other things. +=item B<-w> -You can disable specific warnings using C<__WARN__> hooks, as described -in L and L. See also L and L. +prints warnings about dubious constructs, such as variable names +that are mentioned only once and scalar variables that are used +before being set, redefined subroutines, references to undefined +filehandles or filehandles opened read-only that you are attempting +to write on, values used as a number that doesn't look like numbers, +using an array as though it were a scalar, if your subroutines +recurse more than 100 deep, and innumerable other things. + +This switch really just enables the internal C<^$W> variable. You +can disable or promote into fatal errors specific warnings using +C<__WARN__> hooks, as described in L and L. +See also L and L. A new, fine-grained warning +facility is also available if you want to manipulate entire classes +of warnings; see L (or better yet, its source code) about +that. =item B<-x> I -tells Perl that the script is embedded in a message. Leading -garbage will be discarded until the first line that starts with #! and -contains the string "perl". Any meaningful switches on that line will -be applied. If a directory name is specified, Perl will switch to -that directory before running the script. The B<-x> switch controls -only the disposal of leading garbage. The script must be -terminated with C<__END__> if there is trailing garbage to be ignored (the -script can process any or all of the trailing garbage via the DATA -filehandle if desired). +tells Perl that the program is embedded in a larger chunk of unrelated +ASCII text, such as in a mail message. Leading garbage will be +discarded until the first line that starts with #! and contains the +string "perl". Any meaningful switches on that line will be applied. +If a directory name is specified, Perl will switch to that directory +before running the program. The B<-x> switch controls only the +disposal of leading garbage. The program must be terminated with +C<__END__> if there is trailing garbage to be ignored (the program +can process any or all of the trailing garbage via the DATA filehandle +if desired). =back @@ -666,7 +724,7 @@ Used if chdir has no argument and HOME is not set. =item PATH -Used in executing subprocesses, and in finding the script if B<-S> is +Used in executing subprocesses, and in finding the program if B<-S> is used. =item PERL5LIB @@ -674,8 +732,8 @@ used. A colon-separated list of directories in which to look for Perl library files before looking in the standard library and the current directory. If PERL5LIB is not defined, PERLLIB is used. When running -taint checks (because the script was running setuid or setgid, or the -B<-T> switch was used), neither variable is used. The script should +taint checks (because the program was running setuid or setgid, or the +B<-T> switch was used), neither variable is used. The program should instead say use lib "/my/directory"; @@ -684,7 +742,7 @@ instead say Command-line options (switches). Switches in this variable are taken as if they were on every Perl command line. Only the B<-[DIMUdmw]> -switches are allowed. When running taint checks (because the script +switches are allowed. When running taint checks (because the program was running setuid or setgid, or the B<-T> switch was used), this variable is ignored. If PERL5OPT begins with B<-T>, tainting will be enabled, and any subsequent options ignored. @@ -701,12 +759,12 @@ The command used to load the debugger code. The default is: BEGIN { require 'perl5db.pl' } -=item PERL5SHELL (specific to WIN32 port) +=item PERL5SHELL (specific to the Win32 port) May be set to an alternative shell that perl must use internally for executing "backtick" commands or system(). Default is C on WindowsNT and C on Windows95. The value is considered -to be space delimited. Precede any character that needs to be protected +to be space-separated. Precede any character that needs to be protected (like a space or backslash) with a backslash. Note that Perl doesn't use COMSPEC for this purpose because @@ -736,12 +794,11 @@ Perl also has environment variables that control how Perl handles data specific to particular natural languages. See L. Apart from these, Perl uses no other environment variables, except -to make them available to the script being executed, and to child -processes. However, scripts running setuid would do well to execute +to make them available to the program being executed, and to child +processes. However, programs running setuid would do well to execute the following lines before doing anything else, just to keep people honest: - $ENV{PATH} = '/bin:/usr/bin'; # or whatever you need + $ENV{PATH} = '/bin:/usr/bin'; # or whatever you need $ENV{SHELL} = '/bin/sh' if exists $ENV{SHELL}; delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; - diff --git a/pod/perlsec.pod b/pod/perlsec.pod index 0b22acd..212879a 100644 --- a/pod/perlsec.pod +++ b/pod/perlsec.pod @@ -139,7 +139,7 @@ metacharacters, nor are dot, dash, or at going to mean something special to the shell. Use of C would have been insecure in theory because it lets everything through, but Perl doesn't check for that. The lesson is that when untainting, you must be exceedingly careful with your patterns. -Laundering data using regular expression is the I mechanism for +Laundering data using regular expression is the I mechanism for untainting dirty data, unless you use the strategy detailed below to fork a child of lesser privilege. diff --git a/pod/perlsub.pod b/pod/perlsub.pod index bfab0fe..2bd1cfd 100644 --- a/pod/perlsub.pod +++ b/pod/perlsub.pod @@ -19,22 +19,23 @@ To define an anonymous subroutine at runtime: To import subroutines: - use PACKAGE qw(NAME1 NAME2 NAME3); + use MODULE qw(NAME1 NAME2 NAME3); To call subroutines: NAME(LIST); # & is optional with parentheses. NAME LIST; # Parentheses optional if predeclared/imported. + &NAME(LIST); # Circumvent prototypes. &NAME; # Makes current @_ visible to called subroutine. =head1 DESCRIPTION -Like many languages, Perl provides for user-defined subroutines. These -may be located anywhere in the main program, loaded in from other files -via the C, C, or C keywords, or even generated on the -fly using C or anonymous subroutines (closures). You can even call -a function indirectly using a variable containing its name or a CODE reference -to it. +Like many languages, Perl provides for user-defined subroutines. +These may be located anywhere in the main program, loaded in from +other files via the C, C, or C keywords, or +generated on the fly using C or anonymous subroutines (closures). +You can even call a function indirectly using a variable containing +its name or a CODE reference. The Perl model for function call and return values is simple: all functions are passed as parameters one single flat list of scalars, and @@ -44,37 +45,38 @@ collapse, losing their identities--but you may always use pass-by-reference instead to avoid this. Both call and return lists may contain as many or as few scalar elements as you'd like. (Often a function without an explicit return statement is called a subroutine, but -there's really no difference from the language's perspective.) - -Any arguments passed to the routine come in as the array C<@_>. Thus if you -called a function with two arguments, those would be stored in C<$_[0]> -and C<$_[1]>. The array C<@_> is a local array, but its elements are -aliases for the actual scalar parameters. In particular, if an element -C<$_[0]> is updated, the corresponding argument is updated (or an error -occurs if it is not updatable). If an argument is an array or hash -element which did not exist when the function was called, that element is -created only when (and if) it is modified or if a reference to it is -taken. (Some earlier versions of Perl created the element whether or not -it was assigned to.) Note that assigning to the whole array C<@_> removes -the aliasing, and does not update any arguments. - -The return value of the subroutine is the value of the last expression -evaluated. Alternatively, a C statement may be used to exit the +there's really no difference from Perl's perspective.) + +Any arguments passed in show up in the array C<@_>. Therefore, if +you called a function with two arguments, those would be stored in +C<$_[0]> and C<$_[1]>. The array C<@_> is a local array, but its +elements are aliases for the actual scalar parameters. In particular, +if an element C<$_[0]> is updated, the corresponding argument is +updated (or an error occurs if it is not updatable). If an argument +is an array or hash element which did not exist when the function +was called, that element is created only when (and if) it is modified +or a reference to it is taken. (Some earlier versions of Perl +created the element whether or not the element was assigned to.) +Assigning to the whole array C<@_> removes that aliasing, and does +not update any arguments. + +The return value of a subroutine is the value of the last expression +evaluated. More explicitly, a C statement may be used to exit the subroutine, optionally specifying the returned value, which will be evaluated in the appropriate context (list, scalar, or void) depending on the context of the subroutine call. If you specify no return value, -the subroutine will return an empty list in a list context, an undefined -value in a scalar context, or nothing in a void context. If you return -one or more arrays and/or hashes, these will be flattened together into -one large indistinguishable list. - -Perl does not have named formal parameters, but in practice all you do is -assign to a C list of these. Any variables you use in the function -that aren't declared private are global variables. For the gory details -on creating private variables, see -L<"Private Variables via my()"> and L<"Temporary Values via local()">. -To create protected environments for a set of functions in a separate -package (and probably a separate file), see L. +the subroutine returns an empty list in list context, the undefined +value in scalar context, or nothing in void context. If you return +one or more aggregates (arrays and hashes), these will be flattened +together into one large indistinguishable list. + +Perl does not have named formal parameters. In practice all you +do is assign to a C list of these. Variables that aren't +declared to be private are global variables. For gory details +on creating private variables, see L<"Private Variables via my()"> +and L<"Temporary Values via local()">. To create protected +environments for a set of functions in a separate package (and +probably a separate file), see L. Example: @@ -93,7 +95,7 @@ Example: # that start with whitespace sub get_line { - $thisline = $lookahead; # GLOBAL VARIABLES!! + $thisline = $lookahead; # global variables! LINE: while (defined($lookahead = )) { if ($lookahead =~ /^[ \t]/) { $thisline .= $lookahead; @@ -102,24 +104,25 @@ Example: last LINE; } } - $thisline; + return $thisline; } $lookahead = ; # get first line - while ($_ = get_line()) { + while (defined($line = get_line())) { ... } -Use array assignment to a local list to name your formal arguments: +Asisng to a list of private variables to name your arguments: sub maybeset { my($key, $value) = @_; $Foo{$key} = $value unless $Foo{$key}; } -This also has the effect of turning call-by-reference into call-by-value, -because the assignment copies the values. Otherwise a function is free to -do in-place modifications of C<@_> and change its caller's values. +Because the assignment copies the values, this also has the effect +of turning call-by-reference into call-by-value. Otherwise a +function is free to do in-place modifications of C<@_> and change +its caller's values. upcase_in($v1, $v2); # this changes $v1 and $v2 sub upcase_in { @@ -136,7 +139,7 @@ It would be much safer if the C function were written to return a copy of its parameters instead of changing them in place: - ($v3, $v4) = upcase($v1, $v2); # this doesn't + ($v3, $v4) = upcase($v1, $v2); # this doesn't change $v1 and $v2 sub upcase { return unless defined wantarray; # void context, do nothing my @parms = @_; @@ -144,12 +147,12 @@ of changing them in place: return wantarray ? @parms : $parms[0]; } -Notice how this (unprototyped) function doesn't care whether it was passed -real scalars or arrays. Perl will see everything as one big long flat C<@_> -parameter list. This is one of the ways where Perl's simple -argument-passing style shines. The C function would work perfectly -well without changing the C definition even if we fed it things -like this: +Notice how this (unprototyped) function doesn't care whether it was +passed real scalars or arrays. Perl sees all arugments as one big, +long, flat parameter list in C<@_>. This is one area where +Perl's simple argument-passing style shines. The C +function would work perfectly well without changing the C +definition even if we fed it things like this: @newlist = upcase(@list1, @list2); @newlist = upcase( split /:/, $var ); @@ -158,24 +161,26 @@ Do not, however, be tempted to do this: (@a, @b) = upcase(@list1, @list2); -Because like its flat incoming parameter list, the return list is also -flat. So all you have managed to do here is stored everything in C<@a> and -made C<@b> an empty list. See L for alternatives. - -A subroutine may be called using the "C<&>" prefix. The "C<&>" is optional -in modern Perls, and so are the parentheses if the subroutine has been -predeclared. (Note, however, that the "C<&>" is I optional when -you're just naming the subroutine, such as when it's used as an -argument to C or C. Nor is it optional when you want to -do an indirect subroutine call with a subroutine name or reference -using the C<&$subref()> or C<&{$subref}()> constructs. See L -for more on that.) - -Subroutines may be called recursively. If a subroutine is called using -the "C<&>" form, the argument list is optional, and if omitted, no C<@_> array is -set up for the subroutine: the C<@_> array at the time of the call is -visible to subroutine instead. This is an efficiency mechanism that -new users may wish to avoid. +Like the flattened incoming parameter list, the return list is also +flattened on return. So all you have managed to do here is stored +everything in C<@a> and made C<@b> an empty list. See L for alternatives. + +A subroutine may be called using an explicit C<&> prefix. The +C<&> is optional in modern Perl, as are parentheses if the +subroutine has been predeclared. The C<&> is I optional +when just naming the subroutine, such as when it's used as +an argument to defined() or undef(). Nor is it optional when you +want to do an indirect subroutine call with a subroutine name or +reference using the C<&$subref()> or C<&{$subref}()> constructs, +although the C<$subref-E()> notation solves that problem. +See L for more about all that. + +Subroutines may be called recursively. If a subroutine is called +using the C<&> form, the argument list is optional, and if omitted, +no C<@_> array is set up for the subroutine: the C<@_> array at the +time of the call is visible to subroutine instead. This is an +efficiency mechanism that new users may wish to avoid. &foo(1,2,3); # pass three arguments foo(1,2,3); # the same @@ -186,18 +191,19 @@ new users may wish to avoid. &foo; # foo() get current args, like foo(@_) !! foo; # like foo() IFF sub foo predeclared, else "foo" -Not only does the "C<&>" form make the argument list optional, but it also -disables any prototype checking on the arguments you do provide. This +Not only does the C<&> form make the argument list optional, it also +disables any prototype checking on arguments you do provide. This is partly for historical reasons, and partly for having a convenient way -to cheat if you know what you're doing. See the section on Prototypes below. +to cheat if you know what you're doing. See L below. -Function whose names are in all upper case are reserved to the Perl core, -just as are modules whose names are in all lower case. A function in -all capitals is a loosely-held convention meaning it will be called -indirectly by the run-time system itself. Functions that do special, -pre-defined things are C, C, C, and C--plus all the -functions mentioned in L. The 5.005 release adds C -to this list. +Function whose names are in all upper case are reserved to the Perl +core, as are modules whose names are in all lower case. A +function in all capitals is a loosely-held convention meaning it +will be called indirectly by the run-time system itself, usually +due to a triggered event. Functions that do special, pre-defined +things include C, C, C, and C--plus +all functions mentioned in L. The 5.005 release adds +C to this list. =head2 Private Variables via my() @@ -208,36 +214,38 @@ Synopsis: my $foo = "flurp"; # declare $foo lexical, and init it my @oof = @bar; # declare @oof lexical, and init it -A "C" declares the listed variables to be confined (lexically) to the -enclosing block, conditional (C), loop -(C), subroutine, C, or -C'd file. If more than one value is listed, the list -must be placed in parentheses. All listed elements must be legal lvalues. -Only alphanumeric identifiers may be lexically scoped--magical -builtins like C<$/> must currently be Cize with "C" instead. - -Unlike dynamic variables created by the "C" operator, lexical -variables declared with "C" are totally hidden from the outside world, -including any called subroutines (even if it's the same subroutine called -from itself or elsewhere--every call gets its own copy). - -This doesn't mean that a C variable declared in a statically -I lexical scope would be invisible. Only the dynamic scopes -are cut off. For example, the C function below has access to the -lexical C<$x> variable because both the my and the sub occurred at the same -scope, presumably the file scope. +The C operator declares the listed variables to be lexically +confined to the enclosing block, conditional (C), +loop (C), subroutine, C, +or C'd file. If more than one value is listed, the +list must be placed in parentheses. All listed elements must be +legal lvalues. Only alphanumeric identifiers may be lexically +scoped--magical built-in like C<$/> must currently be Cize +with C instead. + +Unlike dynamic variables created by the C operator, lexical +variables declared with C are totally hidden from the outside +world, including any called subroutines. This is true if it's the +same subroutine called from itself or elsewhere--every call gets +its own copy. + +This doesn't mean that a C variable declared in a statically +enclosing lexical scope would be invisible. Only dynamic scopes +are cut off. For example, the C function below has access +to the lexical $x variable because both the C and the C +occurred at the same scope, presumably file scope. my $x = 10; sub bumpx { $x++ } -(An C, however, can see the lexical variables of the scope it is -being evaluated in so long as the names aren't hidden by declarations within -the C itself. See L.) +An C, however, can see lexical variables of the scope it is +being evaluated in, so long as the names aren't hidden by declarations within +the C itself. See L. -The parameter list to C may be assigned to if desired, which allows you +The parameter list to my() may be assigned to if desired, which allows you to initialize your variables. (If no initializer is given for a particular variable, it is created with the undefined value.) Commonly -this is used to name the parameters to a subroutine. Examples: +this is used to name input parameters to a subroutine. Examples: $arg = "fred"; # "global" variable $n = cube_root(27); @@ -250,8 +258,8 @@ this is used to name the parameters to a subroutine. Examples: return $arg; } -The "C" is simply a modifier on something you might assign to. So when -you do assign to the variables in its argument list, the "C" doesn't +The C is simply a modifier on something you might assign to. So when +you do assign to variables in its argument list, C doesn't change whether those variables are viewed as a scalar or an array. So my ($foo) = ; # WRONG? @@ -275,24 +283,24 @@ the current statement. Thus, my $x = $x; -can be used to initialize the new $x with the value of the old C<$x>, and +can be used to initialize a new $x with the value of the old $x, and the expression my $x = 123 and $x == 123 -is false unless the old C<$x> happened to have the value C<123>. +is false unless the old $x happened to have the value C<123>. Lexical scopes of control structures are not bounded precisely by the braces that delimit their controlled blocks; control expressions are -part of the scope, too. Thus in the loop +part of that scope, too. Thus in the loop - while (defined(my $line = <>)) { + while (my $line = <>) { $line = lc $line; } continue { print $line; } -the scope of C<$line> extends from its declaration throughout the rest of +the scope of $line extends from its declaration throughout the rest of the loop construct (including the C clause), but not beyond it. Similarly, in the conditional @@ -305,44 +313,48 @@ it. Similarly, in the conditional die "'$answer' is neither 'yes' nor 'no'"; } -the scope of C<$answer> extends from its declaration throughout the rest -of the conditional (including C and C clauses, if any), +the scope of $answer extends from its declaration through the rest +of that conditional, including any C and C clauses, but not beyond it. -(None of the foregoing applies to C or C +None of the foregoing text applies to C or C modifiers appended to simple statements. Such modifiers are not -control structures and have no effect on scoping.) +control structures and have no effect on scoping. The C loop defaults to scoping its index variable dynamically -(in the manner of C; see below). However, if the index -variable is prefixed with the keyword "C", then it is lexically -scoped instead. Thus in the loop +in the manner of C. However, if the index variable is +prefixed with the keyword C, or if there is already a lexical +by that name in scope, then a new lexical is created instead. Thus +in the loop for my $i (1, 2, 3) { some_function(); } -the scope of C<$i> extends to the end of the loop, but not beyond it, and -so the value of C<$i> is unavailable in C. +the scope of $i extends to the end of the loop, but not beyond it, +rendering the value of $i inaccessible within C. Some users may wish to encourage the use of lexically scoped variables. -As an aid to catching implicit references to package variables, -if you say +As an aid to catching implicit uses to package variables, +which are always global, if you say use strict 'vars'; -then any variable reference from there to the end of the enclosing -block must either refer to a lexical variable, or must be fully -qualified with the package name. A compilation error results -otherwise. An inner block may countermand this with S<"C">. - -A C has both a compile-time and a run-time effect. At compile time, -the compiler takes notice of it; the principle usefulness of this is to -quiet S<"C">. The actual initialization is delayed until -run time, so it gets executed appropriately; every time through a loop, -for example. - -Variables declared with "C" are not part of any package and are therefore +then any variable mentioned from there to the end of the enclosing +block must either refer to a lexical variable, be predeclared via +C, or else must be fully qualified with the package name. +A compilation error results otherwise. An inner block may countermand +this with C. + +A C has both a compile-time and a run-time effect. At compile +time, the compiler takes notice of it. The principle usefulness +of this is to quiet C, but it is also essential +for generation of closures as detailed in L. Actual +initialization is delayed until run time, though, so it gets executed +at the appropriate time, such as each time through a loop, for +example. + +Variables declared with C are not part of any package and are therefore never fully qualified with the package name. In particular, you're not allowed to try to make a package variable (or other global) lexical: @@ -360,13 +372,14 @@ lexical of the same name is also visible: That will print out C<20> and C<10>. -You may declare "C" variables at the outermost scope of a file to hide -any such identifiers totally from the outside world. This is similar -to C's static variables at the file level. To do this with a subroutine -requires the use of a closure (anonymous function with lexical access). -If a block (such as an C, function, or C) wants to create -a private subroutine that cannot be called from outside that block, -it can declare a lexical variable containing an anonymous sub reference: +You may declare C variables at the outermost scope of a file +to hide any such identifiers from the world outside that file. This +is similar in spirit to C's static variables when they are used at +the file level. To do this with a subroutine requires the use of +a closure (an anonymous function that accesses enclosing lexicals). +If you want to create a private subroutine that cannot be called +from outside that block, it can declare a lexical variable containing +an anonymous sub reference: my $secret_version = '1.001-beta'; my $secret_sub = sub { print $secret_version }; @@ -375,11 +388,13 @@ it can declare a lexical variable containing an anonymous sub reference: As long as the reference is never returned by any function within the module, no outside module can see the subroutine, because its name is not in any package's symbol table. Remember that it's not I called -C<$some_pack::secret_version> or anything; it's just C<$secret_version>, +C<$some_pack::secret_version> or anything; it's just $secret_version, unqualified and unqualifiable. -This does not work with object methods, however; all object methods have -to be in the symbol table of some package to be found. +This does not work with object methods, however; all object methods +have to be in the symbol table of some package to be found. See +L for something of a work-around to +this. =head2 Persistent Private Variables @@ -415,7 +430,7 @@ and put the static variable outside the function but in the block. If this function is being sourced in from a separate file via C or C, then this is probably just fine. If it's -all in the main program, you'll need to arrange for the C +all in the main program, you'll need to arrange for the C to be executed early, either by putting the whole block above your main program, or more likely, placing merely a C sub around it to make sure it gets executed before your program @@ -428,20 +443,21 @@ starts to run: } } -See L about the C function. +See L about the +special triggered functions, C and C. -If declared at the outermost scope, the file scope, then lexicals work -someone like C's file statics. They are available to all functions in -that same file declared below them, but are inaccessible from outside of -the file. This is sometimes used in modules to create private variables -for the whole module. +If declared at the outermost scope (the file scope), then lexicals +work somewhat like C's file statics. They are available to all +functions in that same file declared below them, but are inaccessible +from outside that file. This strategy is sometimes used in modules +to create private variables that the whole module can see. =head2 Temporary Values via local() -B: In general, you should be using "C" instead of "C", because +B: In general, you should be using C instead of C, because it's faster and safer. Exceptions to this include the global punctuation variables, filehandles and formats, and direct manipulation of the Perl -symbol table itself. Format variables often use "C" though, as do +symbol table itself. Format variables often use C though, as do other variables whose current value must be visible to called subroutines. @@ -458,14 +474,14 @@ Synopsis: local *merlyn = 'randal'; # SAME THING: promote 'randal' to *randal local *merlyn = \$randal; # just alias $merlyn, not @merlyn etc -A C modifies its listed variables to be "local" to the enclosing -block, C, or C--and to I. -A C just gives temporary values to global (meaning package) -variables. It does B create a local variable. This is known as -dynamic scoping. Lexical scoping is done with "C", which works more -like C's auto declarations. +A C modifies its listed variables to be "local" to the +enclosing block, C, or C--and to I. A C just gives temporary +values to global (meaning package) variables. It does I create +a local variable. This is known as dynamic scoping. Lexical scoping +is done with C, which works more like C's auto declarations. -If more than one variable is given to C, they must be placed in +If more than one variable is given to C, they must be placed in parentheses. All listed elements must be legal lvalues. This operator works by saving the current values of those variables in its argument list on a hidden stack and restoring them upon exiting the block, subroutine, or @@ -490,7 +506,7 @@ subroutine. Examples: } # old %digits restored here -Because C is a run-time command, it gets executed every time +Because C is a run-time operator, it gets executed each time through a loop. In releases of Perl previous to 5.0, this used more stack storage each time until the loop was exited. Perl now reclaims the space each time through, but it's still more efficient to declare your variables @@ -581,34 +597,15 @@ Perl will print This is a test only a test. The array has 6 elements: 0, 1, 2, undef, undef, 5 -Note also that when you Cize a member of a composite type that -B, the value is treated as though it were -in an lvalue context, i.e., it is first created and then Cized. -The consequence of this is that the hash or array is in fact permanently -modified. For instance, if you say - - %hash = ( 'This' => 'is', 'a' => 'test' ); - @ary = ( 0..5 ); - { - local($ary[8]) = 0; - local($hash{'b'}) = 'whatever'; - } - printf "%%hash has now %d keys, \@ary %d elements.\n", - scalar(keys(%hash)), scalar(@ary); - -Perl will print - - %hash has now 3 keys, @ary 9 elements. - -The above behavior of local() on non-existent members of composite +The behavior of local() on non-existent members of composite types is subject to change in future. =head2 Passing Symbol Table Entries (typeglobs) -[Note: The mechanism described in this section was originally the only -way to simulate pass-by-reference in older versions of Perl. While it -still works fine in modern versions, the new reference mechanism is -generally easier to work with. See below.] +B: The mechanism described in this section was originally +the only way to simulate pass-by-reference in older versions of +Perl. While it still works fine in modern versions, the new reference +mechanism is generally easier to work with. See below. Sometimes you don't want to pass the value of an array to a subroutine but rather the name of it, so that the subroutine can modify the global @@ -621,7 +618,7 @@ funny prefix characters on variables and subroutines and such. When evaluated, the typeglob produces a scalar value that represents all the objects of that name, including any filehandle, format, or subroutine. When assigned to, it causes the name mentioned to refer to -whatever "C<*>" value was assigned to it. Example: +whatever C<*> value was assigned to it. Example: sub doubleary { local(*someary) = @_; @@ -632,7 +629,7 @@ whatever "C<*>" value was assigned to it. Example: doubleary(*foo); doubleary(*bar); -Note that scalars are already passed by reference, so you can modify +Scalars are already passed by reference, so you can modify scalar arguments without using this mechanism by referring explicitly to C<$_[0]> etc. You can modify all the elements of an array by passing all the elements as scalars, but you have to use the C<*> mechanism (or @@ -647,13 +644,13 @@ L. =head2 When to Still Use local() -Despite the existence of C, there are still three places where the -C operator still shines. In fact, in these three places, you +Despite the existence of C, there are still three places where the +C operator still shines. In fact, in these three places, you I use C instead of C. =over -=item 1. You need to give a global variable a temporary value, especially C<$_>. +=item 1. You need to give a global variable a temporary value, especially $_. The global variables, like C<@ARGV> or the punctuation variables, must be Cized with C. This block reads in F, and splits @@ -667,7 +664,7 @@ in C<@Fields>. @Fields = split /^\s*=+\s*$/; } -It particular, it's important to Cize C<$_> in any routine that assigns +It particular, it's important to Cize $_ in any routine that assigns to it. Look out for implicit assignments in C conditionals. =item 2. You need to create a local file or directory handle or a local function. @@ -724,9 +721,9 @@ you're going to have to use an explicit pass-by-reference. Before you do that, you need to understand references as detailed in L. This section may not make much sense to you otherwise. -Here are a few simple examples. First, let's pass in several -arrays to a function and have it C all of then, return a new -list of all their former last elements: +Here are a few simple examples. First, let's pass in several arrays +to a function and have it C all of then, returning a new list +of all their former last elements: @tailings = popmany ( \@a, \@b, \@c, \@d ); @@ -765,9 +762,10 @@ Where people get into trouble is here: or (%a, %b) = func(%c, %d); -That syntax simply won't work. It sets just C<@a> or C<%a> and clears the C<@b> or -C<%b>. Plus the function didn't get passed into two separate arrays or -hashes: it got one long list in C<@_>, as always. +That syntax simply won't work. It sets just C<@a> or C<%a> and +clears the C<@b> or C<%b>. Plus the function didn't get passed +into two separate arrays or hashes: it got one long list in C<@_>, +as always. If you can arrange for everyone to deal with this through references, it's cleaner code, although not so nice to look at. Here's a function that @@ -799,12 +797,13 @@ It turns out that you can actually do this also: } Here we're using the typeglobs to do symbol table aliasing. It's -a tad subtle, though, and also won't work if you're using C -variables, because only globals (well, and Cs) are in the symbol table. +a tad subtle, though, and also won't work if you're using C +variables, because only globals (even in disguised as Cs) +are in the symbol table. If you're passing around filehandles, you could usually just use the bare -typeglob, like C<*STDOUT>, but typeglobs references would be better because -they'll still work properly under S>. For example: +typeglob, like C<*STDOUT>, but typeglobs references work, too. +For example: splutter(\*STDOUT); sub splutter { @@ -818,45 +817,41 @@ they'll still work properly under S>. For example: return scalar <$fh>; } -Another way to do this is using C<*HANDLE{IO}>, see L for usage -and caveats. - -If you're planning on generating new filehandles, you could do this: +If you're planning on generating new filehandles, you could do this. +Notice to pass back just the bare *FH, not its reference. sub openit { - my $name = shift; + my $path = shift; local *FH; return open (FH, $path) ? *FH : undef; } -Although that will actually produce a small memory leak. See the bottom -of L for a somewhat cleaner way using the C -package. - =head2 Prototypes -As of the 5.002 release of perl, if you declare +Perl supports a very limited kind of compile-time argument checking +using function prototyping. If you declare sub mypush (\@@) -then C takes arguments exactly like C does. The declaration -of the function to be called must be visible at compile time. The prototype -affects only the interpretation of new-style calls to the function, where -new-style is defined as not using the C<&> character. In other words, -if you call it like a builtin function, then it behaves like a builtin -function. If you call it like an old-fashioned subroutine, then it -behaves like an old-fashioned subroutine. It naturally falls out from -this rule that prototypes have no influence on subroutine references -like C<\&foo> or on indirect subroutine calls like C<&{$subref}> or -C<$subref-E()>. +then C takes arguments exactly like C does. The +function declaration must be visible at compile time. The prototype +affects only interpretation of new-style calls to the function, +where new-style is defined as not using the C<&> character. In +other words, if you call it like a built-in function, then it behaves +like a built-in function. If you call it like an old-fashioned +subroutine, then it behaves like an old-fashioned subroutine. It +naturally falls out from this rule that prototypes have no influence +on subroutine references like C<\&foo> or on indirect subroutine +calls like C<&{$subref}> or C<$subref-E()>. Method calls are not influenced by prototypes either, because the -function to be called is indeterminate at compile time, because it depends -on inheritance. +function to be called is indeterminate at compile time, since +the exact code called depends on inheritance. -Because the intent is primarily to let you define subroutines that work -like builtin commands, here are the prototypes for some other functions -that parse almost exactly like the corresponding builtins. +Because the intent of this feature is primarily to let you define +subroutines that work like built-in functions, here are prototypes +for some other functions that parse almost exactly like the +corresponding built-in. Declared as Called as @@ -877,35 +872,35 @@ that parse almost exactly like the corresponding builtins. Any backslashed prototype character represents an actual argument that absolutely must start with that character. The value passed -to the subroutine (as part of C<@_>) will be a reference to the -actual argument given in the subroutine call, obtained by applying -C<\> to that argument. +as part of C<@_> will be a reference to the actual argument given +in the subroutine call, obtained by applying C<\> to that argument. Unbackslashed prototype characters have special meanings. Any -unbackslashed C<@> or C<%> eats all the rest of the arguments, and forces +unbackslashed C<@> or C<%> eats all remaining arguments, and forces list context. An argument represented by C<$> forces scalar context. An C<&> requires an anonymous subroutine, which, if passed as the first -argument, does not require the "C" keyword or a subsequent comma. A +argument, does not require the C keyword or a subsequent comma. A C<*> allows the subroutine to accept a bareword, constant, scalar expression, typeglob, or a reference to a typeglob in that slot. The value will be available to the subroutine either as a simple scalar, or (in the latter two cases) as a reference to the typeglob. A semicolon separates mandatory arguments from optional arguments. -(It is redundant before C<@> or C<%>.) +It is redundant before C<@> or C<%>, which gobble up everything else. -Note how the last three examples above are treated specially by the parser. -C is parsed as a true list operator, C is parsed as a -true unary operator with unary precedence the same as C, and -C is truly without arguments, just like C. That is, if you -say +Note how the last three examples in the table above are treated +specially by the parser. C is parsed as a true list +operator, C is parsed as a true unary operator with unary +precedence the same as C, and C is truly without +arguments, just like C. That is, if you say mytime +2; you'll get C, not C, which is how it would be parsed -without the prototype. +without a prototype. -The interesting thing about C<&> is that you can generate new syntax with it: +The interesting thing about C<&> is that you can generate new syntax with it, +provided it's in the initial position: sub try (&@) { my($try,$catch) = @_; @@ -924,12 +919,12 @@ The interesting thing about C<&> is that you can generate new syntax with it: }; That prints C<"unphooey">. (Yes, there are still unresolved -issues having to do with the visibility of C<@_>. I'm ignoring that +issues having to do with visibility of C<@_>. I'm ignoring that question for the moment. (But note that if we make C<@_> lexically scoped, those anonymous subroutines can act like closures... (Gee, is this sounding a little Lispish? (Never mind.)))) -And here's a reimplementation of C: +And here's a reimplementation of the Perl C operator: sub mygrep (&@) { my $code = shift; @@ -965,12 +960,12 @@ returning a list: func(@foo); func( split /:/ ); -Then you've just supplied an automatic C in front of their +Then you've just supplied an automatic C in front of their argument, which can be more than a bit surprising. The old C<@foo> which used to hold one thing doesn't get passed in. Instead, -the C now gets passed in C<1>, that is, the number of elements -in C<@foo>. And the C gets called in a scalar context and -starts scribbling on your C<@_> parameter list. +C now gets passed in a C<1>; that is, the number of elements +in C<@foo>. And the C gets called in scalar context so it +starts scribbling on your C<@_> parameter list. Ouch! This is all very powerful, of course, and should be used only in moderation to make the world a better place. @@ -978,12 +973,11 @@ to make the world a better place. =head2 Constant Functions Functions with a prototype of C<()> are potential candidates for -inlining. If the result after optimization and constant folding is -either a constant or a lexically-scoped scalar which has no other +inlining. If the result after optimization and constant folding +is either a constant or a lexically-scoped scalar which has no other references, then it will be used in place of function calls made -without C<&> or C. Calls made using C<&> or C are never -inlined. (See F for an easy way to declare most -constants.) +without C<&>. Calls made using C<&> are never inlined. (See +F for an easy way to declare most constants.) The following functions would all be inlined: @@ -1019,55 +1013,57 @@ a mandatory warning. (You can use this warning to tell whether or not a particular subroutine is considered constant.) The warning is considered severe enough not to be optional because previously compiled invocations of the function will still be using the old value of the -function. If you need to be able to redefine the subroutine you need to +function. If you need to be able to redefine the subroutine, you need to ensure that it isn't inlined, either by dropping the C<()> prototype -(which changes the calling semantics, so beware) or by thwarting the +(which changes calling semantics, so beware) or by thwarting the inlining mechanism in some other way, such as sub not_inlined () { 23 if $]; } -=head2 Overriding Builtin Functions +=head2 Overriding Built-in Functions -Many builtin functions may be overridden, though this should be tried +Many built-in functions may be overridden, though this should be tried only occasionally and for good reason. Typically this might be -done by a package attempting to emulate missing builtin functionality +done by a package attempting to emulate missing built-in functionality on a non-Unix system. Overriding may be done only by importing the name from a module--ordinary predeclaration isn't good enough. However, the -C pragma (compiler directive) lets you, in effect, predeclare subs -via the import syntax, and these names may then override the builtin ones: +C pragma lets you, in effect, predeclare subs +via the import syntax, and these names may then override built-in ones: use subs 'chdir', 'chroot', 'chmod', 'chown'; chdir $somewhere; sub chdir { ... } -To unambiguously refer to the builtin form, one may precede the -builtin name with the special package qualifier C. For example, -saying C will always refer to the builtin C, even +To unambiguously refer to the built-in form, precede the +built-in name with the special package qualifier C. For example, +saying C always refers to the built-in C, even if the current package has imported some other subroutine called -C<&open()> from elsewhere. +C<&open()> from elsewhere. Even though it looks like a regular +function calls, it isn't: you can't take a reference to it, such as +the incorrect C<\&CORE::open> might appear to produce. -Library modules should not in general export builtin names like "C" -or "C" as part of their default C<@EXPORT> list, because these may +Library modules should not in general export built-in names like C +or C as part of their default C<@EXPORT> list, because these may sneak into someone else's namespace and change the semantics unexpectedly. -Instead, if the module adds the name to the C<@EXPORT_OK> list, then it's +Instead, if the module adds that name to C<@EXPORT_OK>, then it's possible for a user to import the name explicitly, but not implicitly. That is, they could say use Module 'open'; -and it would import the C override, but if they said +and it would import the C override. But if they said use Module; -they would get the default imports without the overrides. +they would get the default imports without overrides. -The foregoing mechanism for overriding builtins is restricted, quite +The foregoing mechanism for overriding built-in is restricted, quite deliberately, to the package that requests the import. There is a second -method that is sometimes applicable when you wish to override a builtin +method that is sometimes applicable when you wish to override a built-in everywhere, without regard to namespace boundaries. This is achieved by importing a sub into the special namespace C. Here is an example that quite brazenly replaces the C operator with something @@ -1089,9 +1085,12 @@ that understands regular expressions. sub glob { my $pat = shift; my @got; - local(*D); - if (opendir D, '.') { @got = grep /$pat/, readdir D; closedir D; } - @got; + local *D; + if (opendir D, '.') { + @got = grep /$pat/, readdir D; + closedir D; + } + return @got; } 1; @@ -1102,44 +1101,45 @@ And here's how it could be (ab)used: use REGlob 'glob'; # override glob() in Foo:: only print for <^[a-z_]+\.pm\$>; # show all pragmatic modules -Note that the initial comment shows a contrived, even dangerous example. +The initial comment shows a contrived, even dangerous example. By overriding C globally, you would be forcing the new (and -subversive) behavior for the C operator for B namespace, +subversive) behavior for the C operator for I namespace, without the complete cognizance or cooperation of the modules that own those namespaces. Naturally, this should be done with extreme caution--if it must be done at all. The C example above does not implement all the support needed to -cleanly override perl's C operator. The builtin C has +cleanly override perl's C operator. The built-in C has different behaviors depending on whether it appears in a scalar or list -context, but our C doesn't. Indeed, many perl builtins have such +context, but our C doesn't. Indeed, many perl built-in have such context sensitive behaviors, and these must be adequately supported by a properly written override. For a fully functional example of overriding C, study the implementation of C in the standard library. - =head2 Autoloading -If you call a subroutine that is undefined, you would ordinarily get an -immediate fatal error complaining that the subroutine doesn't exist. -(Likewise for subroutines being used as methods, when the method -doesn't exist in any base class of the class package.) If, -however, there is an C subroutine defined in the package or -packages that were searched for the original subroutine, then that -C subroutine is called with the arguments that would have been -passed to the original subroutine. The fully qualified name of the -original subroutine magically appears in the C<$AUTOLOAD> variable in the -same package as the C routine. The name is not passed as an -ordinary argument because, er, well, just because, that's why... - -Most C routines will load in a definition for the subroutine in -question using eval, and then execute that subroutine using a special -form of "goto" that erases the stack frame of the C routine -without a trace. (See the standard C module, for example.) -But an C routine can also just emulate the routine and never -define it. For example, let's pretend that a function that wasn't defined -should just call C with those arguments. All you'd do is this: +If you call a subroutine that is undefined, you would ordinarily +get an immediate, fatal error complaining that the subroutine doesn't +exist. (Likewise for subroutines being used as methods, when the +method doesn't exist in any base class of the class's package.) +However, if an C subroutine is defined in the package or +packages used to locate the original subroutine, then that +C subroutine is called with the arguments that would have +been passed to the original subroutine. The fully qualified name +of the original subroutine magically appears in the global $AUTOLOAD +variable of the same package as the C routine. The name +is not passed as an ordinary argument because, er, well, just +because, that's why... + +Many C routines load in a definition for the requested +subroutine using eval(), then execute that subroutine using a special +form of goto() that erases the stack frame of the C routine +without a trace. (See the source to the standard module documented +in L, for example.) But an C routine can +also just emulate the routine and never define it. For example, +let's pretend that a function that wasn't defined should just invoke +C with those arguments. All you'd do is: sub AUTOLOAD { my $program = $AUTOLOAD; @@ -1150,8 +1150,8 @@ should just call C with those arguments. All you'd do is this: who('am', 'i'); ls('-l'); -In fact, if you predeclare the functions you want to call that way, you don't -even need the parentheses: +In fact, if you predeclare functions you want to call that way, you don't +even need parentheses: use subs qw(date who ls); date; @@ -1159,16 +1159,19 @@ even need the parentheses: ls -l; A more complete example of this is the standard Shell module, which -can treat undefined subroutine calls as calls to Unix programs. +can treat undefined subroutine calls as calls to external programs. -Mechanisms are available for modules writers to help split the modules -up into autoloadable files. See the standard AutoLoader module +Mechanisms are available to help modules writers split their modules +into autoloadable files. See the standard AutoLoader module described in L and in L, the standard SelfLoader modules in L, and the document on adding C -functions to perl code in L. +functions to Perl code in L. =head1 SEE ALSO -See L for more about references and closures. See L if -you'd like to learn about calling C subroutines from perl. See L -to learn about bundling up your functions in separate files. +See L for more about references and closures. +See L if you'd like to learn about calling C subroutines from Perl. +See L if you'd like to learn about calling PErl subroutines from C. +See L to learn about bundling up your functions in separate files. +See L to learn what library modules come standard on your system. +See L to learn how to make object method calls. diff --git a/pod/perlsyn.pod b/pod/perlsyn.pod index a3bc5ab..ee668e1 100644 --- a/pod/perlsyn.pod +++ b/pod/perlsyn.pod @@ -44,7 +44,7 @@ subroutine without defining it by saying C, thus: sub myname; $me = myname $0 or die "can't get myname"; -Note that it functions as a list operator, not as a unary operator; so +Note that my() functions as a list operator, not as a unary operator; so be careful to use C instead of C<||> in this case. However, if you were to declare the subroutine as C, then C would function as a unary operator, so either C or @@ -86,7 +86,7 @@ presuming you're a speaker of English. The C modifier is an iterator: For each value in EXPR, it aliases C<$_> to the value and executes the statement. The C and C modifiers have the usual "C loop" semantics (conditional evaluated first), except -when applied to a C-BLOCK (or to the now-deprecated C-SUBROUTINE +when applied to a C-BLOCK (or to the deprecated C-SUBROUTINE statement), in which case the block executes once before the conditional is evaluated. This is so that you can write loops like: @@ -289,9 +289,7 @@ is therefore visible only within the loop. Otherwise, the variable is implicitly local to the loop and regains its former value upon exiting the loop. If the variable was previously declared with C, it uses that variable instead of the global one, but it's still localized to -the loop. (Note that a lexically scoped variable can cause problems -if you have subroutine or format declarations within the loop which -refer to it.) +the loop. The C keyword is actually a synonym for the C keyword, so you can use C for readability or C for brevity. (Or because @@ -490,15 +488,15 @@ C envariable. That kind of switch statement only works when you know the C<&&> clauses will be true. If you don't, the previous C example should be used. -You might also consider writing a hash instead of synthesizing a C -statement. +You might also consider writing a hash of subroutine references +instead of synthesizing a C statement. =head2 Goto -Although not for the faint of heart, Perl does support a C statement. -A loop's LABEL is not actually a valid target for a C; -it's just the name of the loop. There are three forms: C-LABEL, -C-EXPR, and C-&NAME. +Although not for the faint of heart, Perl does support a C +statement. There are three forms: C-LABEL, C-EXPR, and +C-&NAME. A loop's LABEL is not actually a valid target for +a C; it's just the name of the loop. The C-LABEL form finds the statement labeled with LABEL and resumes execution there. It may not be used to go into any construct that diff --git a/pod/perlthrtut.pod b/pod/perlthrtut.pod index f2ca3bd..fc88561 100644 --- a/pod/perlthrtut.pod +++ b/pod/perlthrtut.pod @@ -5,7 +5,7 @@ perlthrtut - tutorial on threads in Perl =head1 DESCRIPTION One of the most prominent new features of Perl 5.005 is the inclusion -of threads. Threads make a number of things a lot easier, and are a +of threads. Threads make a number of things a lot easier, and are a very useful addition to your bag of programming tricks. =head1 What Is A Thread Anyway? @@ -14,44 +14,44 @@ A thread is a flow of control through a program with a single execution point. Sounds an awful lot like a process, doesn't it? Well, it should. -Threads are one of the pieces of a process. Every process has at least +Threads are one of the pieces of a process. Every process has at least one thread and, up until now, every process running Perl had only one -thread. With 5.005, though, you can create extra threads. We're going +thread. With 5.005, though, you can create extra threads. We're going to show you how, when, and why. =head1 Threaded Program Models There are three basic ways that you can structure a threaded -program. Which model you choose depends on what you need your program -to do. For many non-trivial threaded programs you'll need to choose +program. Which model you choose depends on what you need your program +to do. For many non-trivial threaded programs you'll need to choose different models for different pieces of your program. =head2 Boss/Worker The boss/worker model usually has one `boss' thread and one or more -`worker' threads. The boss thread gathers or generates tasks that need +`worker' threads. The boss thread gathers or generates tasks that need to be done, then parcels those tasks out to the appropriate worker thread. This model is common in GUI and server programs, where a main thread waits for some event and then passes that event to the appropriate -worker threads for processing. Once the event has been passed on, the +worker threads for processing. Once the event has been passed on, the boss thread goes back to waiting for another event. -The boss thread does relatively little work. While tasks aren't +The boss thread does relatively little work. While tasks aren't necessarily performed faster than with any other method, it tends to have the best user-response times. =head2 Work Crew In the work crew model, several threads are created that do -essentially the same thing to different pieces of data. It closely +essentially the same thing to different pieces of data. It closely mirrors classical parallel processing and vector processors, where a large array of processors do the exact same thing to many pieces of data. This model is particularly useful if the system running the program -will distribute multiple threads across different processors. It can +will distribute multiple threads across different processors. It can also be useful in ray tracing or rendering engines, where the individual threads can pass on interim results to give the user visual feedback. @@ -60,29 +60,29 @@ feedback. The pipeline model divides up a task into a series of steps, and passes the results of one step on to the thread processing the -next. Each thread does one thing to each piece of data and passes the +next. Each thread does one thing to each piece of data and passes the results to the next thread in line. This model makes the most sense if you have multiple processors so two or more threads will be executing in parallel, though it can often -make sense in other contexts as well. It tends to keep the individual +make sense in other contexts as well. It tends to keep the individual tasks small and simple, as well as allowing some parts of the pipeline to block (on I/O or system calls, for example) while other parts keep -going. If you're running different parts of the pipeline on different +going. If you're running different parts of the pipeline on different processors you may also take advantage of the caches on each processor. This model is also handy for a form of recursive programming where, rather than having a subroutine call itself, it instead creates -another thread. Prime and Fibonacci generators both map well to this +another thread. Prime and Fibonacci generators both map well to this form of the pipeline model. (A version of a prime number generator is presented later on.) =head1 Native threads -There are several different ways to implement threads on a system. How +There are several different ways to implement threads on a system. How threads are implemented depends both on the vendor and, in some cases, -the version of the operating system. Often the first implementation +the version of the operating system. Often the first implementation will be relatively simple, but later versions of the OS will be more sophisticated. @@ -93,42 +93,42 @@ There are three basic categories of threads-user-mode threads, kernel threads, and multiprocessor kernel threads. User-mode threads are threads that live entirely within a program and -its libraries. In this model, the OS knows nothing about threads. As +its libraries. In this model, the OS knows nothing about threads. As far as it's concerned, your process is just a process. This is the easiest way to implement threads, and the way most OSes -start. The big disadvantage is that, since the OS knows nothing about -threads, if one thread blocks they all do. Typical blocking activities +start. The big disadvantage is that, since the OS knows nothing about +threads, if one thread blocks they all do. Typical blocking activities include most system calls, most I/O, and things like sleep(). -Kernel threads are the next step in thread evolution. The OS knows +Kernel threads are the next step in thread evolution. The OS knows about kernel threads, and makes allowances for them. The main difference between a kernel thread and a user-mode thread is -blocking. With kernel threads, things that block a single thread don't -block other threads. This is not the case with user-mode threads, +blocking. With kernel threads, things that block a single thread don't +block other threads. This is not the case with user-mode threads, where the kernel blocks at the process level and not the thread level. This is a big step forward, and can give a threaded program quite a performance boost over non-threaded programs. Threads that block performing I/O, for example, won't block threads that are doing other -things. Each process still has only one thread running at once, +things. Each process still has only one thread running at once, though, regardless of how many CPUs a system might have. Since kernel threading can interrupt a thread at any time, they will uncover some of the implicit locking assumptions you may make in your -program. For example, something as simple as C<$a = $a + 2> can behave -unpredictably with kernel threads if C<$a> is visible to other -threads, as another thread may have changed C<$a> between the time it +program. For example, something as simple as C<$a = $a + 2> can behave +unpredictably with kernel threads if $a is visible to other +threads, as another thread may have changed $a between the time it was fetched on the right hand side and the time the new value is stored. Multiprocessor Kernel Threads are the final step in thread -support. With multiprocessor kernel threads on a machine with multiple +support. With multiprocessor kernel threads on a machine with multiple CPUs, the OS may schedule two or more threads to run simultaneously on different CPUs. This can give a serious performance boost to your threaded program, -since more than one thread will be executing at the same time. As a +since more than one thread will be executing at the same time. As a tradeoff, though, any of those nagging synchronization issues that might not have shown with basic kernel threads will appear with a vengeance. @@ -138,14 +138,14 @@ different OSes (and different thread implementations for a particular OS) allocate CPU cycles to threads in different ways. Cooperative multitasking systems have running threads give up control -if one of two things happen. If a thread calls a yield function, it -gives up control. It also gives up control if the thread does -something that would cause it to block, such as perform I/O. In a +if one of two things happen. If a thread calls a yield function, it +gives up control. It also gives up control if the thread does +something that would cause it to block, such as perform I/O. In a cooperative multitasking implementation, one thread can starve all the others for CPU time if it so chooses. Preemptive multitasking systems interrupt threads at regular intervals -while the system decides which thread should run next. In a preemptive +while the system decides which thread should run next. In a preemptive multitasking system, one thread usually won't monopolize the CPU. On some systems, there can be cooperative and preemptive threads @@ -156,18 +156,18 @@ normal priorities behave preemptively.) =head1 What kind of threads are perl threads? If you have experience with other thread implementations, you might -find that things aren't quite what you expect. It's very important to +find that things aren't quite what you expect. It's very important to remember when dealing with Perl threads that Perl Threads Are Not X Threads, for all values of X. They aren't POSIX threads, or -DecThreads, or Java's Green threads, or Win32 threads. There are +DecThreads, or Java's Green threads, or Win32 threads. There are similarities, and the broad concepts are the same, but if you start looking for implementation details you're going to be either -disappointed or confused. Possibly both. +disappointed or confused. Possibly both. This is not to say that Perl threads are completely different from -everything that's ever come before--they're not. Perl's threading -model owes a lot to other thread models, especially POSIX. Just as -Perl is not C, though, Perl threads are not POSIX threads. So if you +everything that's ever come before--they're not. Perl's threading +model owes a lot to other thread models, especially POSIX. Just as +Perl is not C, though, Perl threads are not POSIX threads. So if you find yourself looking for mutexes, or thread priorities, it's time to step back a bit and think about what you want to do and how Perl can do it. @@ -175,28 +175,28 @@ do it. =head1 Threadsafe Modules The addition of threads has changed Perl's internals -substantially. There are implications for people who write -modules--especially modules with XS code or external libraries. While +substantially. There are implications for people who write +modules--especially modules with XS code or external libraries. While most modules won't encounter any problems, modules that aren't explicitly tagged as thread-safe should be tested before being used in production code. Not all modules that you might use are thread-safe, and you should always assume a module is unsafe unless the documentation says -otherwise. This includes modules that are distributed as part of the -core. Threads are a beta feature, and even some of the standard +otherwise. This includes modules that are distributed as part of the +core. Threads are a beta feature, and even some of the standard modules aren't thread-safe. If you're using a module that's not thread-safe for some reason, you can protect yourself by using semaphores and lots of programming -discipline to control access to the module. Semaphores are covered +discipline to control access to the module. Semaphores are covered later in the article. Perl Threads Are Different =head1 Thread Basics The core Thread module provides the basic functions you need to write -threaded programs. In the following sections we'll cover the basics, -showing you what you need to do to create a threaded program. After +threaded programs. In the following sections we'll cover the basics, +showing you what you need to do to create a threaded program. After that, we'll go over some of the features of the Thread module that make threaded programming easier. @@ -208,7 +208,7 @@ your programs are compiled. If your Perl wasn't compiled with thread support enabled, then any attempt to use threads will fail. Remember that the threading support in 5.005 is in beta release, and -should be treated as such. You should expect that it may not function +should be treated as such. You should expect that it may not function entirely properly, and the thread interface may well change some before it is a fully supported, production release. The beta version shouldn't be used for mission-critical projects. Having said that, @@ -237,13 +237,13 @@ have code like this: Since code that runs both with and without threads is usually pretty messy, it's best to isolate the thread-specific code in its own -module. In our example above, that's what MyMod_threaded is, and it's +module. In our example above, that's what MyMod_threaded is, and it's only imported if we're running on a threaded Perl. =head2 Creating Threads The Thread package provides the tools you need to create new -threads. Like any other module, you need to tell Perl you want to use +threads. Like any other module, you need to tell Perl you want to use it; use Thread imports all the pieces you need to create basic threads. @@ -258,11 +258,11 @@ The simplest, straightforward way to create a thread is with new(): } The new() method takes a reference to a subroutine and creates a new -thread, which starts executing in the referenced subroutine. Control +thread, which starts executing in the referenced subroutine. Control then passes both to the subroutine and the caller. If you need to, your program can pass parameters to the subroutine as -part of the thread startup. Just include the list of parameters as +part of the thread startup. Just include the list of parameters as part of the C call, like this: use Thread; @@ -281,8 +281,8 @@ part of the C call, like this: The subroutine runs like a normal Perl subroutine, and the call to new Thread returns whatever the subroutine returns. -The last example illustrates another feature of threads. You can spawn -off several threads using the same subroutine. Each thread executes +The last example illustrates another feature of threads. You can spawn +off several threads using the same subroutine. Each thread executes the same subroutine, but in a separate thread with a separate environment and potentially separate arguments. @@ -305,22 +305,22 @@ spin off a chunk of code like eval(), but into its own thread: You'll notice we did a use Thread qw(async) in that example. async is not exported by default, so if you want it, you'll either need to import it before you use it or fully qualify it as -Thread::async. You'll also note that there's a semicolon after the -closing brace. That's because async() treats the following block as an +Thread::async. You'll also note that there's a semicolon after the +closing brace. That's because async() treats the following block as an anonymous subroutine, so the semicolon is necessary. Like eval(), the code executes in the same context as it would if it -weren't spun off. Since both the code inside and after the async start -executing, you need to be careful with any shared resources. Locking +weren't spun off. Since both the code inside and after the async start +executing, you need to be careful with any shared resources. Locking and other synchronization techniques are covered later. =head2 Giving up control There are times when you may find it useful to have a thread -explicitly give up the CPU to another thread. Your threading package +explicitly give up the CPU to another thread. Your threading package might not support preemptive multitasking for threads, for example, or you may be doing something compute-intensive and want to make sure -that the user-interface thread gets called frequently. Regardless, +that the user-interface thread gets called frequently. Regardless, there are times that you might want a thread to give up the processor. Perl's threading package provides the yield() function that does @@ -344,7 +344,7 @@ this. yield() is pretty straightforward, and works like this: =head2 Waiting For A Thread To Exit -Since threads are also subroutines, they can return values. To wait +Since threads are also subroutines, they can return values. To wait for a thread to exit and extract any scalars it might return, you can use the join() method. @@ -357,11 +357,11 @@ use the join() method. sub sub1 { return "Fifty-six", "foo", 2; } In the example above, the join() method returns as soon as the thread -ends. In addition to waiting for a thread to finish and gathering up +ends. In addition to waiting for a thread to finish and gathering up any values that the thread might have returned, join() also performs any OS cleanup necessary for the thread. That cleanup might be important, especially for long-running programs that spawn lots of -threads. If you don't want the return values and don't want to wait +threads. If you don't want the return values and don't want to wait for the thread to finish, you should call the detach() method instead. detach() is covered later in the article. @@ -369,7 +369,7 @@ instead. detach() is covered later in the article. So what happens when an error occurs in a thread? Any errors that could be caught with eval() are postponed until the thread is -joined. If your program never joins, the errors appear when your +joined. If your program never joins, the errors appear when your program exits. Errors deferred until a join() can be caught with eval(): @@ -390,12 +390,12 @@ to get them. =head2 Ignoring A Thread join() does three things:it waits for a thread to exit, cleans up -after it, and returns any data the thread may have produced. But what +after it, and returns any data the thread may have produced. But what if you're not interested in the thread's return values, and you don't really care when the thread finishes? All you want is for the thread to get cleaned up after when it's done. -In this case, you use the detach() method. Once a thread is detached, +In this case, you use the detach() method. Once a thread is detached, it'll run until it's finished, then Perl will clean up after it automatically. @@ -421,29 +421,29 @@ lost. =head1 Threads And Data Now that we've covered the basics of threads, it's time for our next -topic: data. Threading introduces a couple of complications to data +topic: data. Threading introduces a couple of complications to data access that non-threaded programs never need to worry about. =head2 Shared And Unshared Data The single most important thing to remember when using threads is that all threads potentially have access to all the data anywhere in your -program. While this is true with a nonthreaded Perl program as well, +program. While this is true with a nonthreaded Perl program as well, it's especially important to remember with a threaded program, since more than one thread can be accessing this data at once. Perl's scoping rules don't change because you're using threads. If a subroutine (or block, in the case of async()) could see a variable if -you weren't running with threads, it can see it if you are. This is +you weren't running with threads, it can see it if you are. This is especially important for the subroutines that create, and makes my -variables even more important. Remember--if your variables aren't +variables even more important. Remember--if your variables aren't lexically scoped (declared with C) you're probably sharing it between threads. =head2 Thread Pitfall: Races While threads bring a new set of useful tools, they also bring a -number of pitfalls. One pitfall is the race condition: +number of pitfalls. One pitfall is the race condition: use Thread; $a = 1; @@ -458,14 +458,14 @@ number of pitfalls. One pitfall is the race condition: What do you think $a will be? The answer, unfortunately, is "it depends." Both sub1() and sub2() access the global variable $a, once -to read and once to write. Depending on factors ranging from your +to read and once to write. Depending on factors ranging from your thread implementation's scheduling algorithm to the phase of the moon, $a can be 2 or 3. Race conditions are caused by unsynchronized access to shared -data. Without explicit synchronization, there's no way to be sure that +data. Without explicit synchronization, there's no way to be sure that nothing has happened to the shared data between the time you access it -and the time you update it. Even this simple code fragment has the +and the time you update it. Even this simple code fragment has the possibility of error: use Thread qw(async); @@ -473,8 +473,8 @@ possibility of error: async{ $b = $a; $a = $b + 1; }; async{ $c = $a; $a = $c + 1; }; -Two threads both access $a. Each thread can potentially be interrupted -at any point, or be executed in any order. At the end, $a could be 3 +Two threads both access $a. Each thread can potentially be interrupted +at any point, or be executed in any order. At the end, $a could be 3 or 4, and both $b and $c could be 2 or 3. Whenever your program accesses data or resources that can be accessed @@ -484,9 +484,9 @@ data corruption and race conditions. =head2 Controlling access: lock() The lock() function takes a variable (or subroutine, but we'll get to -that later) and puts a lock on it. No other thread may lock the +that later) and puts a lock on it. No other thread may lock the variable until the locking thread exits the innermost block containing -the lock. Using lock() is straightforward: +the lock. Using lock() is straightforward: use Thread qw(async); $a = 4; @@ -513,29 +513,29 @@ the lock. Using lock() is straightforward: print "\$a is $a\n"; lock() blocks the thread until the variable being locked is -available. When lock() returns, your thread can be sure that no other +available. When lock() returns, your thread can be sure that no other thread can lock that variable until the innermost block containing the lock exits. It's important to note that locks don't prevent access to the variable -in question, only lock attempts. This is in keeping with Perl's +in question, only lock attempts. This is in keeping with Perl's longstanding tradition of courteous programming, and the advisory file -locking that flock() gives you. Locked subroutines behave differently, -however. We'll cover that later in the article. +locking that flock() gives you. Locked subroutines behave differently, +however. We'll cover that later in the article. -You may lock arrays and hashes as well as scalars. Locking an array, +You may lock arrays and hashes as well as scalars. Locking an array, though, will not block subsequent locks on array elements, just lock attempts on the array itself. Finally, locks are recursive, which means it's okay for a thread to -lock a variable more than once. The lock will last until the outermost +lock a variable more than once. The lock will last until the outermost lock() on the variable goes out of scope. =head2 Thread Pitfall: Deadlocks -Locks are a handy tool to synchronize access to data. Using them -properly is the key to safe shared data. Unfortunately, locks aren't -without their dangers. Consider the following code: +Locks are a handy tool to synchronize access to data. Using them +properly is the key to safe shared data. Unfortunately, locks aren't +without their dangers. Consider the following code: use Thread qw(async yield); $a = 4; @@ -553,34 +553,34 @@ without their dangers. Consider the following code: lock ($a); }; -This program will probably hang until you kill it. The only way it +This program will probably hang until you kill it. The only way it won't hang is if one of the two async() routines acquires both locks -first. A guaranteed-to-hang version is more complicated, but the +first. A guaranteed-to-hang version is more complicated, but the principle is the same. The first thread spawned by async() will grab a lock on $a then, a -second or two later, try to grab a lock on $b. Meanwhile, the second -thread grabs a lock on $b, then later tries to grab a lock on $a. The +second or two later, try to grab a lock on $b. Meanwhile, the second +thread grabs a lock on $b, then later tries to grab a lock on $a. The second lock attempt for both threads will block, each waiting for the other to release its lock. This condition is called a deadlock, and it occurs whenever two or more threads are trying to get locks on resources that the others -own. Each thread will block, waiting for the other to release a lock -on a resource. That never happens, though, since the thread with the +own. Each thread will block, waiting for the other to release a lock +on a resource. That never happens, though, since the thread with the resource is itself waiting for a lock to be released. -There are a number of ways to handle this sort of problem. The best +There are a number of ways to handle this sort of problem. The best way is to always have all threads acquire locks in the exact same -order. If, for example, you lock variables $a, $b, and $c, always lock -$a before $b, and $b before $c. It's also best to hold on to locks for +order. If, for example, you lock variables $a, $b, and $c, always lock +$a before $b, and $b before $c. It's also best to hold on to locks for as short a period of time to minimize the risks of deadlock. =head2 Queues: Passing Data Around A queue is a special thread-safe object that lets you put data in one end and take it out the other without having to worry about -synchronization issues. They're pretty straightforward, and look like +synchronization issues. They're pretty straightforward, and look like this: use Thread qw(async); @@ -599,13 +599,13 @@ this: sleep 10; $DataQueue->enqueue(undef); -You create the queue with new Thread::Queue. Then you can add lists of +You create the queue with new Thread::Queue. Then you can add lists of scalars onto the end with enqueue(), and pop scalars off the front of -it with dequeue(). A queue has no fixed size, and can grow as needed +it with dequeue(). A queue has no fixed size, and can grow as needed to hold everything pushed on to it. If a queue is empty, dequeue() blocks until another thread enqueues -something. This makes queues ideal for event loops and other +something. This makes queues ideal for event loops and other communications between threads. =head1 Threads And Code @@ -617,10 +617,10 @@ entire subroutines. =head2 Semaphores: Synchronizing Data Access -Semaphores are a kind of generic locking mechanism. Unlike lock, which +Semaphores are a kind of generic locking mechanism. Unlike lock, which gets a lock on a particular scalar, Perl doesn't associate any particular thing with a semaphore so you can use them to control -access to anything you like. In addition, semaphores can allow more +access to anything you like. In addition, semaphores can allow more than one thread to access a resource at once, though by default semaphores only allow one thread access at a time. @@ -630,7 +630,7 @@ semaphores only allow one thread access at a time. Semaphores have two methods, down and up. down decrements the resource count, while up increments it. down calls will block if the -semaphore's current count would decrement below zero. This program +semaphore's current count would decrement below zero. This program gives a quick demonstration: use Thread qw(yield); @@ -659,20 +659,20 @@ gives a quick demonstration: } } -The three invocations of the subroutine all operate in sync. The +The three invocations of the subroutine all operate in sync. The semaphore, though, makes sure that only one thread is accessing the global variable at once. =item Advanced Semaphores By default, semaphores behave like locks, letting only one thread -down() them at a time. However, there are other uses for semaphores. +down() them at a time. However, there are other uses for semaphores. Each semaphore has a counter attached to it. down() decrements the -counter and up() increments the counter. By default, semaphores are +counter and up() increments the counter. By default, semaphores are created with the counter set to one, down() decrements by one, and -up() increments by one. If down() attempts to decrement the counter -below zero, it blocks until the counter is large enough. Note that +up() increments by one. If down() attempts to decrement the counter +below zero, it blocks until the counter is large enough. Note that while a semaphore can be created with a starting count of zero, any up() or down() always changes the counter by at least one. $semaphore->down(0) is the same as $semaphore->down(1). @@ -680,21 +680,21 @@ one. $semaphore->down(0) is the same as $semaphore->down(1). The question, of course, is why would you do something like this? Why create a semaphore with a starting count that's not one, or why decrement/increment it by more than one? The answer is resource -availability. Many resources that you want to manage access for can be +availability. Many resources that you want to manage access for can be safely used by more than one thread at once. -For example, let's take a GUI driven program. It has a semaphore that +For example, let's take a GUI driven program. It has a semaphore that it uses to synchronize access to the display, so only one thread is -ever drawing at once. Handy, but of course you don't want any thread -to start drawing until things are properly set up. In this case, you +ever drawing at once. Handy, but of course you don't want any thread +to start drawing until things are properly set up. In this case, you can create a semaphore with a counter set to zero, and up it when things are ready for drawing. Semaphores with counters greater than one are also useful for -establishing quotas. Say, for example, that you have a number of -threads that can do I/O at once. You don't want all the threads +establishing quotas. Say, for example, that you have a number of +threads that can do I/O at once. You don't want all the threads reading or writing at once though, since that can potentially swamp -your I/O channels, or deplete your process' quota of filehandles. You +your I/O channels, or deplete your process' quota of filehandles. You can use a semaphore initialized to the number of concurrent I/O requests (or open files) that you want at any one time, and have your threads quietly block and unblock themselves. @@ -707,14 +707,14 @@ thread needs to check out or return a number of resources at once. =head2 Attributes: Restricting Access To Subroutines In addition to synchronizing access to data or resources, you might -find it useful to synchronize access to subroutines. You may be +find it useful to synchronize access to subroutines. You may be accessing a singular machine resource (perhaps a vector processor), or find it easier to serialize calls to a particular subroutine than to have a set of locks and sempahores. -One of the additions to Perl 5.005 is subroutine attributes. The +One of the additions to Perl 5.005 is subroutine attributes. The Thread package uses these to provide several flavors of -serialization. It's important to remember that these attributes are +serialization. It's important to remember that these attributes are used in the compilation phase of your program so you can't change a subroutine's behavior while your program is actually running. @@ -727,9 +727,9 @@ The basic subroutine lock looks like this: } This ensures that only one thread will be executing this subroutine at -any one time. Once a thread calls this subroutine, any other thread +any one time. Once a thread calls this subroutine, any other thread that calls it will block until the thread in the subroutine exits -it. A more elaborate example looks like this: +it. A more elaborate example looks like this: use Thread qw(yield); @@ -760,10 +760,10 @@ can see that only one thread is in it at any one time. =head2 Methods Locking an entire subroutine can sometimes be overkill, especially -when dealing with Perl objects. When calling a method for an object, +when dealing with Perl objects. When calling a method for an object, for example, you want to serialize calls to a method, so that only one thread will be in the subroutine for a particular object, but threads -calling that subroutine for a different object aren't blocked. The +calling that subroutine for a different object aren't blocked. The method attribute indicates whether the subroutine is really a method. use Thread; @@ -817,25 +817,25 @@ thread is ever in one_at_a_time() at once. =head2 Locking A Subroutine -You can lock a subroutine as you would lock a variable. Subroutine +You can lock a subroutine as you would lock a variable. Subroutine locks work the same as a C in the subroutine, and block all access to the subroutine for other threads until the -lock goes out of scope. When the subroutine isn't locked, any number +lock goes out of scope. When the subroutine isn't locked, any number of threads can be in it at once, and getting a lock on a subroutine -doesn't affect threads already in the subroutine. Getting a lock on a +doesn't affect threads already in the subroutine. Getting a lock on a subroutine looks like this: lock(\&sub_to_lock); -Simple enough. Unlike use attrs, which is a compile time option, +Simple enough. Unlike use attrs, which is a compile time option, locking and unlocking a subroutine can be done at runtime at your -discretion. There is some runtime penalty to using lock(\&sub) instead +discretion. There is some runtime penalty to using lock(\&sub) instead of use attrs qw(locked), so make sure you're choosing the proper method to do the locking. You'd choose lock(\&sub) when writing modules and code to run on both threaded and unthreaded Perl, especially for code that will run on -5.004 or earlier Perls. In that case, it's useful to have subroutines +5.004 or earlier Perls. In that case, it's useful to have subroutines that should be serialized lock themselves if they're running threaded, like so: @@ -855,20 +855,20 @@ version of Perl you're running. We've covered the workhorse parts of Perl's threading package, and with these tools you should be well on your way to writing threaded -code and packages. There are a few useful little pieces that didn't +code and packages. There are a few useful little pieces that didn't really fit in anyplace else. =head2 What Thread Am I In? The Thread->self method provides your program with a way to get an -object representing the thread it's currently in. You can use this +object representing the thread it's currently in. You can use this object in the same way as the ones returned from the thread creation. =head2 Thread IDs tid() is a thread object method that returns the thread ID of the -thread the object represents. Thread IDs are integers, with the main -thread in a program being 0. Currently Perl assigns a unique tid to +thread the object represents. Thread IDs are integers, with the main +thread in a program being 0. Currently Perl assigns a unique tid to every thread ever created in your program, assigning the first thread to be created a tid of 1, and increasing the tid by 1 for each new thread that's created. @@ -881,7 +881,7 @@ if the objects represent the same thread, and false if they don't. =head2 What Threads Are Running? Thread->list returns a list of thread objects, one for each thread -that's currently running. Handy for a number of things, including +that's currently running. Handy for a number of things, including cleaning up at the end of your program: # Loop through all the threads @@ -892,14 +892,14 @@ cleaning up at the end of your program: } } -The example above is just for illustration. It isn't strictly +The example above is just for illustration. It isn't strictly necessary to join all the threads you create, since Perl detaches all the threads before it exits. =head1 A Complete Example Confused yet? It's time for an example program to show some of the -things we've covered. This program finds prime numbers using threads. +things we've covered. This program finds prime numbers using threads. 1 #!/usr/bin/perl -w 2 # prime-pthread, courtesy of Tom Christiansen @@ -936,12 +936,12 @@ things we've covered. This program finds prime numbers using threads. 33 $kid->join() if $kid; 34 } -This program uses the pipeline model to generate prime numbers. Each +This program uses the pipeline model to generate prime numbers. Each thread in the pipeline has an input queue that feeds numbers to be checked, a prime number that it's responsible for, and an output queue -that it funnels numbers that have failed the check into. If the thread +that it funnels numbers that have failed the check into. If the thread has a number that's failed its check and there's no child thread, then -the thread must have found a new prime number. In that case, a new +the thread must have found a new prime number. In that case, a new child thread is created for that prime and stuck on the end of the pipeline. @@ -952,20 +952,20 @@ number is, it's a number that's only evenly divisible by itself and 1) The bulk of the work is done by the check_num() subroutine, which takes a reference to its input queue and a prime number that it's -responsible for. After pulling in the input queue and the prime that +responsible for. After pulling in the input queue and the prime that the subroutine's checking (line 20), we create a new queue (line 22) and reserve a scalar for the thread that we're likely to create later (line 21). The while loop from lines 23 to line 31 grabs a scalar off the input queue and checks against the prime this thread is responsible -for. Line 24 checks to see if there's a remainder when we modulo the -number to be checked against our prime. If there is one, the number +for. Line 24 checks to see if there's a remainder when we modulo the +number to be checked against our prime. If there is one, the number must not be evenly divisible by our prime, so we need to either pass it on to the next thread if we've created one (line 26) or create a new thread if we haven't. -The new thread creation is line 29. We pass on to it a reference to +The new thread creation is line 29. We pass on to it a reference to the queue we've created, and the prime number we've found. Finally, once the loop terminates (because we got a 0 or undef in the @@ -975,18 +975,18 @@ child and wait for it to exit if we've created a child (Lines 32 and Meanwhile, back in the main thread, we create a queue (line 9) and the initial child thread (line 10), and pre-seed it with the first prime: -2. Then we queue all the numbers from 3 to 1000 for checking (lines +2. Then we queue all the numbers from 3 to 1000 for checking (lines 12-14), then queue a die notice (line 16) and wait for the first child -thread to terminate (line 17). Because a child won't die until its +thread to terminate (line 17). Because a child won't die until its child has died, we know that we're done once we return from the join. -That's how it works. It's pretty simple; as with many Perl programs, +That's how it works. It's pretty simple; as with many Perl programs, the explanation is much longer than the program. =head1 Conclusion A complete thread tutorial could fill a book (and has, many times), -but this should get you well on your way. The final authority on how +but this should get you well on your way. The final authority on how Perl's threads behave is the documention bundled with the Perl distribution, but with what we've covered in this article, you should be well on your way to becoming a threaded Perl expert. @@ -1046,7 +1046,7 @@ France, September 1992, Yves Bekkers and Jacques Cohen, eds. Springer, Thanks (in no particular order) to Chaim Frenkel, Steve Fink, Gurusamy Sarathy, Ilya Zakharevich, Benjamin Sugars, Jürgen Christoffel, Joshua Pritikin, and Alan Burlison, for their help in reality-checking and -polishing this article. Big thanks to Tom Christiansen for his rewrite +polishing this article. Big thanks to Tom Christiansen for his rewrite of the prime number generator. =head1 AUTHOR diff --git a/pod/perltie.pod b/pod/perltie.pod index 581b4ab..5611174 100644 --- a/pod/perltie.pod +++ b/pod/perltie.pod @@ -834,7 +834,7 @@ destructor (DESTROY) is called, which is normal for objects that have no more valid references; and thus the file is closed. In the second example, however, we have stored another reference to -the tied object in C<$x>. That means that when untie() gets called +the tied object in $x. That means that when untie() gets called there will still be a valid reference to the object in existence, so the destructor is not called at that time, and thus the file is not closed. The reason there is no output is because the file buffers diff --git a/pod/perltootc.pod b/pod/perltootc.pod new file mode 100644 index 0000000..f7157e8 --- /dev/null +++ b/pod/perltootc.pod @@ -0,0 +1,1337 @@ +=head1 NAME + +perltootc - Tom's OO Tutorial for Class Data in Perl + +=head1 DESCRIPTION + +When designing an object class, you are sometimes faced with the situation +of wanting common state shared by all objects of that class. +Such I act somewhat like global variables for the entire +class, but unlike program-wide globals, class attributes have meaning only to +the class itself. + +Here are a few examples where class attributes might come in handy: + +=over + +=item * + +to keep a count of the objects you've created, or how many are +still extant. + +=item * + +to extract the name or file descriptor for a logfile used by a debugging +method. + +=item * + +to access collective data, like the total amount of cash dispensed by +all ATMs in a network in a given day. + +=item * + +to access the last object created by a class, or the most accessed object, +or to retrieve a list of all objects. + +=back + +Unlike a true global, class attributes should not be accessed directly. +Instead, their state should be inspected, and perhaps altered, only +through the mediated access of I. These class attributes +accessor methods are similar in spirit and function to accessors used +to manipulate the state of instance attributes on an object. They provide a +clear firewall between interface and implementation. + +You should allow access to class attributes through either the class +name or any object of that class. If we assume that $an_object is of +type Some_Class, and the &Some_Class::population_count method accesses +class attributes, then these two invocations should both be possible, +and almost certainly equivalent. + + Some_Class->population_count() + $an_object->population_count() + +The question is, where do you store the state which that method accesses? +Unlike more restrictive languages like C++, where these are called +static data members, Perl provides no syntactic mechanism to declare +class attributes, any more than it provides a syntactic mechanism to +declare instance attributes. Perl provides the developer with a broad +set of powerful but flexible features that can be uniquely crafted to +the particular demands of the situation. + +A class in Perl is typically implemented in a module. A module consists +of two complementary feature sets: a package for interfacing with the +outside world, and a lexical file scope for privacy. Either of these +two mechanisms can be used to implement class attributes. That means you +get to decide whether to put your class attributes in package variables +or to put them in lexical variables. + +And those aren't the only decisions to make. If you choose to use package +variables, you can make your class attribute accessor methods either ignorant +of inheritance or sensitive to it. If you choose lexical variables, +you can elect to permit access to them from anywhere in the entire file +scope, or you can limit direct data access exclusively to the methods +implementing those attributes. + +=head1 Class Data as Package Variables + +Because a class in Perl is really just a package, using package variables +to hold class attributes is the most natural choice. This makes it simple +for each class to have its own class attributes. Let's say you have a class +called Some_Class that needs a couple of different attributes that you'd +like to be global to the entire class. The simplest thing to do is to +use package variables like $Some_Class::CData1 and $Some_Class::CData2 +to hold these attributes. But we certainly don't want to encourage +outsiders to touch those data directly, so we provide methods +to mediate access. + +In the accessor methods below, we'll for now just ignore the first +argument--that part to the left of the arrow on method invocation, which +is either a class name or an object reference. + + package Some_Class; + sub CData1 { + shift; # XXX: ignore calling class/object + $Some_Class::CData1 = shift if @_; + return $Some_Class::CData1; + } + sub CData2 { + shift; # XXX: ignore calling class/object + $Some_Class::CData2 = shift if @_; + return $Some_Class::CData2; + } + +This technique is highly legible and should be completely straightforward +to even the novice Perl programmer. By fully qualifying the package +variables, they stand out clearly when reading the code. Unfortunately, +if you misspell one of these, you've introduced an error that's hard +to catch. It's also somewhat disconcerting to see the class name itself +hard-coded in so many places. + +Both these problems can be easily fixed. Just add the C +pragma, then pre-declare your package variables. (The C operator +will be new in 5.006, and will work for package globals just like C +works for scoped lexicals.) + + package Some_Class; + use strict; + our($CData1, $CData2); # our() is new to perl5.006 + sub CData1 { + shift; # XXX: ignore calling class/object + $CData1 = shift if @_; + return $CData1; + } + sub CData2 { + shift; # XXX: ignore calling class/object + $CData2 = shift if @_; + return $CData2; + } + + +As with any other global variable, some programmers prefer to start their +package variables with capital letters. This helps clarity somewhat, but +by no longer fully qualifying the package variables, their significance +can be lost when reading the code. You can fix this easily enough by +choosing better names than were used here. + +=head2 Putting All Your Eggs in One Basket + +Just as the mindless enumeration of accessor methods for instance attributes +grows tedious after the first few (see L), so too does the +repetition begin to grate when listing out accessor methods for class +data. Repetition runs counter to the primary virtue of a programmer: +Laziness, here manifesting as that innate urge every programmer feels +to factor out duplicate code whenever possible. + +Here's what to do. First, make just one hash to hold all class attributes. + + package Some_Class; + use strict; + our %ClassData = ( # our() is new to perl5.006 + CData1 => "", + CData2 => "", + ); + +Using closures (see L) and direct access to the package symbol +table (see L), now clone an accessor method for each key in +the %ClassData hash. Each of these methods is used to fetch or store +values to the specific, named class attribute. + + for my $datum (keys %ClassData) { + no strict "refs"; # to register new methods in package + *$datum = sub { + shift; # XXX: ignore calling class/object + $ClassData{$datum} = shift if @_; + return $ClassData{$datum}; + } + } + +It's true that you could work out a solution employing an &AUTOLOAD +method, but this approach is unlikely to prove satisfactory. Your +function would have to distinguish between class attributes and object +attributes; it could interfere with inheritance; and it would have to +careful about DESTROY. Such complexity is uncalled for in most cases, +and certainly in this one. + +You may wonder why we're rescinding strict refs for the loop. We're +manipulating the package's symbol table to introduce new function names +using symbolic references (indirect naming), which the strict pragma +would otherwise forbid. Normally, symbolic references are a dodgy +notion at best. This isn't just because they can be used accidentally +when you aren't meaning to. It's also because for most uses +to which beginning Perl programmers attempt to put symbolic references, +we have much better approaches, like nested hashes or hashes of arrays. +But there's nothing wrong with using symbolic references to manipulate +something that is meaningful only from the perspective of the package +symbol symbol table, like method names or package variables. In other +words, when you want to refer to the symbol table, use symbol references. + +Clustering all the class attributes in one place has several advantages. +They're easy to spot, initialize, and change. The aggregation also +makes them convenient to access externally, such as from a debugger +or a persistence package. The only possible problem is that we don't +automatically know the name of each class's class object, should it have +one. This issue is addressed below in L<"The Eponymous Meta-Object">. + +=head2 Inheritance Concerns + +Suppose you have an instance of a derived class, and you access class +data using an inherited method call. Should that end up referring +to the base class's attributes, or to those in the derived class? +How would it work in the earlier examples? The derived class inherits +all the base class's methods, including those that access class attributes. +But what package are the class attributes stored in? + +The answer is that, as written, class attributes are stored in the package into +which those methods were compiled. When you invoke the &CData1 method +on the name of the derived class or on one of that class's objects, the +version shown above is still run, so you'll access $Some_Class::CData1--or +in the method cloning version, C<$Some_Class::ClassData{CData1}>. + +Think of these class methods as executing in the context of their base +class, not in that of their derived class. Sometimes this is exactly +what you want. If Feline subclasses Carnivore, then the population of +Carnivores in the world should go up when a new Feline is born. +But what if you wanted to figure out how many Felines you have apart +from Carnivores? The current approach doesn't support that. + +You'll have to decide on a case-by-case basis whether it makes any sense +for class attributes to be package-relative. If you want it to be so, +then stop ignoring the first argument to the function. Either it will +be a package name if the method was invoked directly on a class name, +or else it will be an object reference if the method was invoked on an +object reference. In the latter case, the ref() function provides the +class of that object. + + package Some_Class; + sub CData1 { + my $obclass = shift; + my $class = ref($obclass) || $obclass; + my $varname = $class . "::CData1"; + no strict "refs"; # to access package data symbolically + $$varname = shift if @_; + return $$varname; + } + +And then do likewise for all other class attributes (such as CData2, +etc.) that you wish to access as package variables in the invoking package +instead of the compiling package as we had previously. + +Once again we temporarily disable the strict references ban, because +otherwise we couldn't use the fully-qualified symbolic name for +the package global. This is perfectly reasonable: since all package +variables by definition live in a package, there's nothing wrong with +accessing them via that package's symbol table. That's what it's there +for (well, somewhat). + +What about just using a single hash for everything and then cloning +methods? What would that look like? The only difference would be the +closure used to produce new method entries for the class's symbol table. + + no strict "refs"; + *$datum = sub { + my $obclass = shift; + my $class = ref($obclass) || $obclass; + my $varname = $class . "::ClassData"; + $varname->{$datum} = shift if @_; + return $varname->{$datum}; + } + +=head2 The Eponymous Meta-Object + +It could be argued that the %ClassData hash in the previous example is +neither the most imaginative nor the most intuitive of names. Is there +something else that might make more sense, be more useful, or both? + +As it happens, yes, there is. For the "class meta-object", we'll use +a package variable of the same name as the package itself. Within the +scope of a package Some_Class declaration, we'll use the eponymously +named hash %Some_Class as that class's meta-object. (Using an eponymously +named hash is somewhat reminiscent of classes that name their constructors +eponymously in the Python or C++ fashion. That is, class Some_Class would +use &Some_Class::Some_Class as a constructor, probably even exporting that +name as well. The StrNum class in Recipe 13.14 in I +does this, if you're looking for an example.) + +This predictable approach has many benefits, including having a well-known +identifier to aid in debugging, transparent persistence, +or checkpointing. It's also the obvious name for monadic classes and +translucent attributes, discussed later. + +Here's an example of such a class. Notice how the name of the +hash storing the meta-object is the same as the name of the package +used to implement the class. + + package Some_Class; + use strict; + + # create class meta-object using that most perfect of names + our %Some_Class = ( # our() is new to perl5.006 + CData1 => "", + CData2 => "", + ); + + # this accessor is calling-package-relative + sub CData1 { + my $obclass = shift; + my $class = ref($obclass) || $obclass; + no strict "refs"; # to access eponymous meta-object + $class->{CData1} = shift if @_; + return $class->{CData1}; + } + + # but this accessor is not + sub CData2 { + shift; # XXX: ignore calling class/object + no strict "refs"; # to access eponymous meta-object + __PACKAGE__ -> {CData2} = shift if @_; + return __PACKAGE__ -> {CData2}; + } + +In the second accessor method, the __PACKAGE__ notation was used for +two reasons. First, to avoid hardcoding the literal package name +in the code in case we later want to change that name. Second, to +clarify to the reader that what matters here is the package currently +being compiled into, not the package of the invoking object or class. +If the long sequence of non-alphabetic characters bothers you, you can +always put the __PACKAGE__ in a variable first. + + sub CData2 { + shift; # XXX: ignore calling class/object + no strict "refs"; # to access eponymous meta-object + my $class = __PACKAGE__; + $class->{CData2} = shift if @_; + return $class->{CData2}; + } + +Even though we're using symbolic references for good not evil, some +folks tend to become unnerved when they see so many places with strict +ref checking disabled. Given a symbolic reference, you can always +produce a real reference (the reverse is not true, though). So we'll +create a subroutine that does this conversion for us. If invoked as a +function of no arguments, it returns a reference to the compiling class's +eponymous hash. Invoked as a class method, it returns a reference to +the eponymous hash of its caller. And when invoked as an object method, +this function returns a reference to the eponymous hash for whatever +class the object belongs to. + + package Some_Class; + use strict; + + our %Some_Class = ( # our() is new to perl5.006 + CData1 => "", + CData2 => "", + ); + + # tri-natured: function, class method, or object method + sub _classobj { + my $obclass = shift || __PACKAGE__; + my $class = ref($obclass) || $obclass; + no strict "refs"; # to convert sym ref to real one + return \%$class; + } + + for my $datum (keys %{ _classobj() } ) { + # turn off strict refs so that we can + # register a method in the symbol table + no strict "refs"; + *$datum = sub { + use strict "refs"; + my $self = shift->_classobj(); + $self->{$datum} = shift if @_; + return $self->{$datum}; + } + } + +=head2 Indirect References to Class Data + +A reasonably common strategy for handling class attributes is to store +a reference to each package variable on the object itself. This is +a strategy you've probably seen before, such as in L and +L, but there may be variations in the example below that you +haven't thought of before. + + package Some_Class; + our($CData1, $CData2); # our() is new to perl5.006 + + sub new { + my $obclass = shift; + return bless my $self = { + ObData1 => "", + ObData2 => "", + CData1 => \$CData1, + CData2 => \$CData2, + } => (ref $obclass || $obclass); + } + + sub ObData1 { + my $self = shift; + $self->{ObData1} = shift if @_; + return $self->{ObData1}; + } + + sub ObData2 { + my $self = shift; + $self->{ObData2} = shift if @_; + return $self->{ObData2}; + } + + sub CData1 { + my $self = shift; + my $dataref = ref $self + ? $self->{CData1} + : \$CData1; + $$dataref = shift if @_; + return $$dataref; + } + + sub CData2 { + my $self = shift; + my $dataref = ref $self + ? $self->{CData2} + : \$CData2; + $$dataref = shift if @_; + return $$dataref; + } + +As written above, a derived class will inherit these methods, which +will consequently access package variables in the base class's package. +This is not necessarily expected behavior in all circumstances. Here's an +example that uses a variable meta-object, taking care to access the +proper package's data. + + package Some_Class; + use strict; + + our %Some_Class = ( # our() is new to perl5.006 + CData1 => "", + CData2 => "", + ); + + sub _classobj { + my $self = shift; + my $class = ref($self) || $self; + no strict "refs"; + # get (hard) ref to eponymous meta-object + return \%$class; + } + + sub new { + my $obclass = shift; + my $classobj = $obclass->_classobj(); + bless my $self = { + ObData1 => "", + ObData2 => "", + CData1 => \$classobj->{CData1}, + CData2 => \$classobj->{CData2}, + } => (ref $obclass || $obclass); + return $self; + } + + sub ObData1 { + my $self = shift; + $self->{ObData1} = shift if @_; + return $self->{ObData1}; + } + + sub ObData2 { + my $self = shift; + $self->{ObData2} = shift if @_; + return $self->{ObData2}; + } + + sub CData1 { + my $self = shift; + $self = $self->_classobj() unless ref $self; + my $dataref = $self->{CData1}; + $$dataref = shift if @_; + return $$dataref; + } + + sub CData2 { + my $self = shift; + $self = $self->_classobj() unless ref $self; + my $dataref = $self->{CData2}; + $$dataref = shift if @_; + return $$dataref; + } + +Not only are we now strict refs clean, using an eponymous meta-object +seems to make the code cleaner. Unlike the previous version, this one +does something interesting in the face of inheritance: it accesses the +class meta-object in the invoking class instead of the one into which +the method was initially compiled. + +You can easily access data in the class meta-object, making +it easy to dump the complete class state using an external mechanism such +as when debugging or implementing a persistent class. This works because +the class meta-object is a package variable, has a well-known name, and +clusters all its data together. (Transparent persistence +is not always feasible, but it's certainly an appealing idea.) + +There's still no check that object accessor methods have not been +invoked on a class name. If strict ref checking is enabled, you'd +blow up. If not, then you get the eponymous meta-object. What you do +with--or about--this is up to you. The next two sections demonstrate +innovative uses for this powerful feature. + +=head2 Monadic Classes + +Some of the standard modules shipped with Perl provide class interfaces +without any attribute methods whatsoever. The most commonly used module +not numbered amongst the pragmata, the Exporter module, is a class with +neither constructors nor attributes. Its job is simply to provide a +standard interface for modules wishing to export part of their namespace +into that of their caller. Modules use the Exporter's &import method by +setting their inheritance list in their package's @ISA array to mention +"Exporter". But class Exporter provides no constructor, so you can't +have several instances of the class. In fact, you can't have any--it +just doesn't make any sense. All you get is its methods. Its interface +contains no statefulness, so state data is wholly superfluous. + +Another sort of class that pops up from time to time is one that supports +a unique instance. Such classes are called I, or less +formally, I or I. + +If a class is monadic, where do you store its state, that is, +its attributes? How do you make sure that there's never more than +one instance? While you could merely use a slew of package variables, +it's a lot cleaner to use the eponymously named hash. Here's a complete +example of a monadic class: + + package Cosmos; + %Cosmos = (); + + # accessor method for "name" attribute + sub name { + my $self = shift; + $self->{name} = shift if @_; + return $self->{name}; + } + + # read-only accessor method for "birthday" attribute + sub birthday { + my $self = shift; + die "can't reset birthday" if @_; # XXX: croak() is better + return $self->{birthday}; + } + + # accessor method for "stars" attribute + sub stars { + my $self = shift; + $self->{stars} = shift if @_; + return $self->{stars}; + } + + # oh my - one of our stars just went out! + sub supernova { + my $self = shift; + my $count = $self->stars(); + $self->stars($count - 1) if $count > 0; + } + + # constructor/initializer method - fix by reboot + sub bigbang { + my $self = shift; + %$self = ( + name => "the world according to tchrist", + birthday => time(), + stars => 0, + ); + return $self; # yes, it's probably a class. SURPRISE! + } + + # After the class is compiled, but before any use or require + # returns, we start off the universe with a bang. + __PACKAGE__ -> bigbang(); + +Hold on, that doesn't look like anything special. Those attribute +accessors look no different than they would if this were a regular class +instead of a monadic one. The crux of the matter is there's nothing +that says that $self must hold a reference to a blessed object. It merely +has to be something you can invoke methods on. Here the package name +itself, Cosmos, works as an object. Look at the &supernova method. Is that +a class method or an object method? The answer is that static analysis +cannot reveal the answer. Perl doesn't care, and neither should you. +In the three attribute methods, C<%$self> is really accessing the %Cosmos +package variable. + +If like Stephen Hawking, you posit the existence of multiple, sequential, +and unrelated universes, then you can invoke the &bigbang method yourself +at any time to start everything all over again. You might think of +&bigbang as more of an initializer than a constructor, since the function +doesn't allocate new memory; it only initializes what's already there. +But like any other constructor, it does return a scalar value to use +for later method invocations. + +Imagine that some day in the future, you decide that one universe just +isn't enough. You could write a new class from scratch, but you already +have an existing class that does what you want--except that it's monadic, +and you want more than just one cosmos. + +That's what code reuse via subclassing is all about. Look how short +the new code is: + + package Multiverse; + use Cosmos; + @ISA = qw(Cosmos); + + sub new { + my $protoverse = shift; + my $class = ref($protoverse) || $protoverse; + my $self = {}; + return bless($self, $class)->bigbang(); + } + 1; + +Because we were careful to be good little creators when we designed our +Cosmos class, we can now reuse it without touching a single line of code +when it comes time to write our Multiverse class. The same code that +worked when invoked as a class method continues to work perfectly well +when invoked against separate instances of a derived class. + +The astonishing thing about the Cosmos class above is that the value +returned by the &bigbang "constructor" is not a reference to a blessed +object at all. It's just the class's own name. A class name is, for +virtually all intents and purposes, a perfectly acceptable object. +It has state, behavior, and identify, the three crucial components +of an object system. It even manifests inheritance, polymorphism, +and encapsulation. And what more can you ask of an object? + +To understand object orientation in Perl, it's important to recognize the +unification of what other programming languages might think of as class +methods and object methods into just plain methods. "Class methods" +and "object methods" are distinct only in the compartmentalizing mind +of the Perl programmer, not in the Perl language itself. + +Along those same lines, a constructor is nothing special either, which +is one reason why Perl has no pre-ordained name for them. "Constructor" +is just an informal term loosely used to describe a method that returns +a scalar value that you can make further method calls against. So long +as it's either a class name or an object reference, that's good enough. +It doesn't even have to be a reference to a brand new object. + +You can have as many--or as few--constructors as you want, and you can +name them whatever you care to. Blindly and obediently using new() +for each and every constructor you ever write is to speak Perl with +such a severe C++ accent that you do a disservice to both languages. +There's no reason to insist that each class have but one constructor, +or that that constructor be named new(), or that that constructor be +used solely as a class method and not an object method. + +The next section shows how useful it can be to further distance ourselves +from any formal distinction between class method calls and object method +calls, both in constructors and in accessor methods. + +=head2 Translucent Attributes + +A package's eponymous hash can be used for more than just containing +per-class, global state data. It can also serve as a sort of template +containing default settings for object attributes. These default +settings can then be used in constructors for initialization of a +particular object. The class's eponymous hash can also be used to +implement I. A translucent attribute is one +that has a class-wide default. Each object can set its own value for the +attribute, in which case C<$object-Eattribute()> returns that value. +But if no value has been set, then C<$object-Eattribute()> returns +the class-wide default. + +We'll apply something of a copy-on-write approach to these translucent +attributes. If you're just fetching values from them, you get +translucency. But if you store a new value to them, that new value is +set on the current object. On the other hand, if you use the class as +an object and store the attribute value directly on the class, then the +meta-object's value changes, and later fetch operations on objects with +uninitialized values for those attributes will retrieve the meta-object's +new values. Objects with their own initialized values, however, won't +see any change. + +Let's look at some concrete examples of using these properties before we +show how to implement them. Suppose that a class named Some_Class +had a translucent data attribute called "color". First you set the color +in the meta-object, then you create three objects using a constructor +that happens to be named &spawn. + + use Vermin; + Vermin->color("vermilion"); + + $ob1 = Vermin->spawn(); # so that's where Jedi come from + $ob2 = Vermin->spawn(); + $ob3 = Vermin->spawn(); + + print $obj3->color(); # prints "vermilion" + +Each of these objects' colors is now "vermilion", because that's the +meta-object's value that attribute, and these objects do not have +individual color values set. + +Changing the attribute on one object has no effect on other objects +previously created. + + $ob3->color("chartreuse"); + print $ob3->color(); # prints "chartreuse" + print $ob1->color(); # prints "vermilion", translucently + +If you now use $ob3 to spawn off another object, the new object will +take the color its parent held, which now happens to be "chartreuse". +That's because the constructor uses the invoking object as its template +for initializing attributes. When that invoking object is the +class name, the object used as a template is the eponymous meta-object. +When the invoking object is a reference to an instantiated object, the +&spawn constructor uses that existing object as a template. + + $ob4 = $ob3->spawn(); # $ob3 now template, not %Vermin + print $ob4->color(); # prints "chartreuse" + +Any actual values set on the template object will be copied to the +new object. But attributes undefined in the template object, being +translucent, will remain undefined and consequently translucent in the +new one as well. + +Now let's change the color attribute on the entire class: + + Vermin->color("azure"); + print $ob1->color(); # prints "azure" + print $ob2->color(); # prints "azure" + print $ob3->color(); # prints "chartreuse" + print $ob4->color(); # prints "chartreuse" + +That color change took effect only in the first pair of objects, which +were still translucently accessing the meta-object's values. The second +pair had per-object initialized colors, and so didn't change. + +One important question remains. Changes to the meta-object are reflected +in translucent attributes in the entire class, but what about +changes to discrete objects? If you change the color of $ob3, does the +value of $ob4 see that change? Or vice-versa. If you change the color +of $ob4, does then the value of $ob3 shift? + + $ob3->color("amethyst"); + print $ob3->color(); # prints "amethyst" + print $ob4->color(); # hmm: "chartreuse" or "amethyst"? + +While one could argue that in certain rare cases it should, let's not +do that. Good taste aside, we want the answer to the question posed in +the comment above to be "chartreuse", not "amethyst". So we'll treat +these attributes similar to the way process attributes like environment +variables, user and group IDs, or the current working directory are +treated across a fork(). You can change only yourself, but you will see +those changes reflected in your unspawned children. Changes to one object +will propagate enither up to the parent nor down to any existing child objects. +Those objects made later, however, will see the changes. + +If you have an object with an actual attribute value, and you want to +make that object's attribute value translucent again, what do you do? +Let's design the class so that when you invoke an accessor method with +C as its argument, that attribute returns to translucency. + + $ob4->color(undef); # back to "azure" + +Here's a complete implementation of Vermin as described above. + + package Vermin; + + # here's the class meta-object, eponymously named. + # it holds all class attributes, and also all instance attributes + # so the latter can be used for both initialization + # and translucency. + + our %Vermin = ( # our() is new to perl5.006 + PopCount => 0, # capital for class attributes + color => "beige", # small for instance attributes + ); + + # constructor method + # invoked as class method or object method + sub spawn { + my $obclass = shift; + my $class = ref($obclass) || $obclass; + my $self = {}; + bless($self, $class); + $class->{PopCount}++; + # init fields from invoking object, or omit if + # invoking object is the class to provide translucency + %$self = %$obclass if ref $obclass; + return $self; + } + + # translucent accessor for "color" attribute + # invoked as class method or object method + sub color { + my $self = shift; + my $class = ref($self) || $self; + + # handle class invocation + unless (ref $self) { + $class->{color} = shift if @_; + return $class->{color} + } + + # handle object invocation + $self->{color} = shift if @_; + if (defined $self->{color}) { # not exists! + return $self->{color}; + } else { + return $class->{color}; + } + } + + # accessor for "PopCount" class attribute + # invoked as class method or object method + # but uses object solely to locate meta-object + sub population { + my $obclass = shift; + my $class = ref($obclass) || $obclass; + return $class->{PopCount}; + } + + # instance destructor + # invoked only as object method + sub DESTROY { + my $self = shift; + my $class = ref $self; + $class->{PopCount}--; + } + +Here are a couple of helper methods that might be convenient. They aren't +accessor methods at all. They're used to detect accessibility of data +attributes. The &is_translucent method determines whether a particular +object attribute is coming from the meta-object. The &has_attribute +method detects whether a class implements a particular property at all. +It could also be used to distinguish undefined properties from non-existent +ones. + + # detect whether an object attribute is translucent + # (typically?) invoked only as object method + sub is_translucent { + my($self, $attr) = @_; + return !defined $self->{$attr}; + } + + # test for presence of attribute in class + # invoked as class method or object method + sub has_attribute { + my($self, $attr) = @_; + my $class = ref $self if $self; + return exists $class->{$attr}; + } + +If you prefer to install your accessors more generically, you can make +use of the upper-case versus lower-case convention to register into the +package appropriate methods cloned from generic closures. + + for my $datum (keys %{ +__PACKAGE__ }) { + *$datum = ($datum =~ /^[A-Z]/) + ? sub { # install class accessor + my $obclass = shift; + my $class = ref($obclass) || $obclass; + return $class->{$datum}; + } + : sub { # install translucent accessor + my $self = shift; + my $class = ref($self) || $self; + unless (ref $self) { + $class->{$datum} = shift if @_; + return $class->{$datum} + } + $self->{$datum} = shift if @_; + return defined $self->{$datum} + ? $self -> {$datum} + : $class -> {$datum} + } + } + +Translations of this closure-based approach into C++, Java, and Python +have been left as exercises for the reader. Be sure to send us mail as +soon as you're done. + +=head1 Class Data as Lexical Variables + +=head2 Privacy and Responsibility + +Unlike conventions used by some Perl programmers, in the previous +examples, we didn't prefix the package variables used for class attributes +with an underscore, nor did we do so for the names of the hash keys used +for instance attributes. You don't need little markers on data names to +suggest nominal privacy on attribute variables or hash keys, because these +are B notionally private! Outsiders have no business whatsoever +playing with anything within a class save through the mediated access of +its documented interface; in other words, through method invocations. +And not even through just any method, either. Methods that begin with +an underscore are traditionally considered off-limits outside the class. +If outsiders skip the documented method interface to poke around the +internals of your class and end up breaking something, that's not your +fault--it's theirs. + +Perl believes in individual responsibility rather than mandated control. +Perl respects you enough to let you choose your own preferred level of +pain, or of pleasure. Perl believes that you are creative, intelligent, +and capable of making your own decisions--and fully expects you to +take complete responsibility for your own actions. In a perfect world, +these admonitions alone would suffice, and everyone would be intelligent, +responsible, happy, and creative. And careful. One probably shouldn't +forget careful, and that's a good bit harder to expect. Even Einstein +would take wrong turns by accident and end up lost in the wrong part +of town. + +Some folks get the heebie-jeebies when they see package variables +hanging out there for anyone to reach over and alter them. Some folks +live in constant fear that someone somewhere might do something wicked. +The solution to that problem is simply to fire the wicked, of course. +But unfortunately, it's not as simple as all that. These cautious +types are also afraid that they or others will do something not so +much wicked as careless, whether by accident or out of desperation. +If we fire everyone who ever gets careless, pretty soon there won't be +anybody left to get any work done. + +Whether it's needless paranoia or sensible caution, this uneasiness can +be a problem for some people. We can take the edge off their discomfort +by providing the option of storing class attributes as lexical variables +instead of as package variables. The my() operator is the source of +all privacy in Perl, and it is a powerful form of privacy indeed. + +It is widely perceived, and indeed has often been written, that Perl +provides no data hiding, that it affords the class designer no privacy +nor isolation, merely a rag-tag assortment of weak and unenforcible +social conventions instead. This perception is demonstrably false and +easily disproven. In the next section, we show how to implement forms +of privacy that are far stronger than those provided in nearly any +other object-oriented language. + +=head2 File-Scoped Lexicals + +A lexical variable is visible only through the end of its static scope. +That means that the only code able to access that variable is code +residing textually below the my() operator through the end of its block +if it has one, or through the end of the current file if it doesn't. + +Starting again with our simplest example given at the start of this +document, we replace our() variables with my() versions. + + package Some_Class; + my($CData1, $CData2); # file scope, not in any package + sub CData1 { + shift; # XXX: ignore calling class/object + $CData1 = shift if @_; + return $CData1; + } + sub CData2 { + shift; # XXX: ignore calling class/object + $CData2 = shift if @_; + return $CData2; + } + +So much for that old $Some_Class::CData1 package variable and its brethren! +Those are gone now, replaced with lexicals. No one outside the +scope can reach in and alter the class state without resorting to the +documented interface. Not even subclasses or superclasses of +this one have unmediated access to $CData1. They have to invoke the &CData1 +method against Some_Class or an instance thereof, just like anybody else. + +To be scrupulously honest, that last statement assumes you haven't packed +several classes together into the same file scope, nor strewn your class +implementation across several different files. Accessibility of those +variables is based uniquely on the static file scope. It has nothing to +do with the package. That means that code in a different file but +the same package (class) could not access those variables, yet code in the +same file but a different package (class) could. There are sound reasons +why we usually suggest a one-to-one mapping between files and packages +and modules and classes. You don't have to stick to this suggestion if +you really know what you're doing, but you're apt to confuse yourself +otherwise, especially at first. + +If you'd like to aggregate your class attributes into one lexically scoped, +composite structure, you're perfectly free to do so. + + package Some_Class; + my %ClassData = ( + CData1 => "", + CData2 => "", + ); + sub CData1 { + shift; # XXX: ignore calling class/object + $ClassData{CData1} = shift if @_; + return $ClassData{CData1}; + } + sub CData2 { + shift; # XXX: ignore calling class/object + $ClassData{CData2} = shift if @_; + return $ClassData{CData2}; + } + +To make this more scalable as other class attributes are added, we can +again register closures into the package symbol table to create accessor +methods for them. + + package Some_Class; + my %ClassData = ( + CData1 => "", + CData2 => "", + ); + for my $datum (keys %ClassData) { + no strict "refs"; + *$datum = sub { + shift; # XXX: ignore calling class/object + $ClassData{$datum} = shift if @_; + return $ClassData{$datum}; + }; + } + +Requiring even your own class to use accessor methods like anybody else is +probably a good thing. But demanding and expecting that everyone else, +be they subclass or superclass, friend or foe, will all come to your +object through mediation is more than just a good idea. It's absolutely +critical to the model. Let there be in your mind no such thing as +"public" data, nor even "protected" data, which is a seductive but +ultimately destructive notion. Both will come back to bite at you. +That's because as soon as you take that first step out of the solid +position in which all state is considered completely private, save from the +perspective of its own accessor methods, you have violated the envelope. +And, having pierced that encapsulating envelope, you shall doubtless +someday pay the price when future changes in the implementation break +unrelated code. Considering that avoiding this infelicitous outcome was +precisely why you consented to suffer the slings and arrows of obsequious +abstraction by turning to object orientation in the first place, such +breakage seems unfortunate in the extreme. + +=head2 More Inheritance Concerns + +Suppose that Some_Class were used as a base class from which to derive +Another_Class. If you invoke a &CData method on the derived class or +on an object of that class, what do you get? Would the derived class +have its own state, or would it piggyback on its base class's versions +of the class attributes? + +The answer is that under the scheme outlined above, the derived class +would B have its own state data. As before, whether you consider +this a good thing or a bad one depends on the semantics of the classes +involved. + +The cleanest, sanest, simplest way to address per-class state in a +lexical is for the derived class to override its base class's version +of the method that accesses the class attributes. Since the actual method +called is the one in the object's derived class if this exists, you +automatically get per-class state this way. Any urge to provide an +unadvertised method to sneak out a reference to the %ClassData hash +should be strenuously resisted. + +As with any other overridden method, the implementation in the +derived class always has the option of invoking its base class's +version of the method in addition to its own. Here's an example: + + package Another_Class; + @ISA = qw(Some_Class); + + my %ClassData = ( + CData1 => "", + ); + + sub CData1 { + my($self, $newvalue) = @_; + if (@_ > 1) { + # set locally first + $ClassData{CData1} = $newvalue; + + # then pass the buck up to the first + # overridden version, if there is one + if ($self->can("SUPER::CData1")) { + $self->SUPER::CData1($newvalue); + } + } + return $ClassData{CData1}; + } + +Those dabbling in multiple inheritance might be concerned +about there being more than one override. + + for my $parent (@ISA) { + my $methname = $parent . "::CData1"; + if ($self->can($methname)) { + $self->$methname($newvalue); + } + } + +Because the &UNIVERSAL::can method returns a reference +to the function directly, you can use this directly +for a significant performance improvement: + + for my $parent (@ISA) { + if (my $coderef = $self->can($parent . "::CData1")) { + $self->$coderef($newvalue); + } + } + +=head2 Locking the Door and Throwing Away the Key + +As currently implemented, any code within the same scope as the +file-scoped lexical %ClassData can alter that hash directly. Is that +ok? Is it acceptable or even desirable to allow other parts of the +implementation of this class to access class attributes directly? + +That depends on how careful you want to be. Think back to the Cosmos +class. If the &supernova method had directly altered $Cosmos::Stars or +C<$Cosmos::Cosmos{stars}>, then we wouldn't have been able to reuse the +class when it came to inventing a Multiverse. So letting even the class +itself access its own class attributes without the mediating intervention of +properly designed accessor methods is probably not a good idea after all. + +Restricting access to class attributes from the class itself is usually +not enforcible even in strongly object-oriented languages. But in Perl, +you can. + +Here's one way: + + package Some_Class; + + { # scope for hiding $CData1 + my $CData1; + sub CData1 { + shift; # XXX: unused + $CData1 = shift if @_; + return $CData1; + } + } + + { # scope for hiding $CData2 + my $CData2; + sub CData2 { + shift; # XXX: unused + $CData2 = shift if @_; + return $CData2; + } + } + +No one--absolutely no one--is allowed to read or write the class +attributes without the mediation of the managing accessor method, since +only that method has access to the lexical variable it's managing. +This use of mediated access to class attributes is a form privacy far +stronger than most OO languages provide. + +The repetition of code used to create per-datum accessor methods chafes +at our Laziness, so we'll again use closures to create similar +methods. + + package Some_Class; + + { # scope for ultra-private meta-object for class attributes + my %ClassData = ( + CData1 => "", + CData2 => "", + ); + + for my $datum (keys %ClassData ) { + no strict "refs"; + *$datum = sub { + use strict "refs"; + my ($self, $newvalue) = @_; + $ClassData{$datum} = $newvalue if @_ > 1; + return $ClassData{$datum}; + } + } + + } + +The closure above can be modified to take inheritance into account using +the &UNIVERSAL::can method and SUPER as shown previously. + +=head2 Translucency Revisited + +The Vermin class used to demonstrate translucency used an eponymously +named package variable, %Vermin, as its meta-object. If you prefer to +use absolutely no package variables beyond those necessary to appease +inheritance or possibly the Exporter, this strategy is closed to you. +That's too bad, because translucent attributes are an appealing +technique, so it would be valuable to devise an implementation using +only lexicals. + +There's a second reason why you might wish to avoid the eponymous +package hash. If you use class names with double-colons in them, you +would end up poking around somewhere you might not have meant to poke. + + package Vermin; + $class = "Vermin"; + $class->{PopCount}++; + # accesses $Vermin::Vermin{PopCount} + + package Vermin::Noxious; + $class = "Vermin::Noxious"; + $class->{PopCount}++; + # accesses $Vermin::Noxious{PopCount} + +In the first case, because the class name had no double-colons, we got +the hash in the current package. But in the second case, instead of +getting some hash in the current package, we got the hash %Noxious in +the Vermin package. (The noxious vermin just invaded another package and +sprayed their data around it. :-) Perl doesn't support relative packages +in its naming conventions, so any double-colons trigger a fully-qualified +lookup instead of just looking in the current package. + +In practice, it is unlikely that the Vermin class had an existing +package variable named %Noxious that you just blew away. If you're +still mistrustful, you could always stake out your own territory +where you know the rules, such as using Eponymous::Vermin::Noxious or +Hieronymus::Vermin::Boschious or Leave_Me_Alone::Vermin::Noxious as class +names instead. Sure, it's in theory possible that someone else has +a class named Eponymous::Vermin with its own %Noxious hash, but this +kind of thing is always true. There's no arbiter of package names. +It's always the case that globals like @Cwd::ISA would collide if more +than one class uses the same Cwd package. + +If this still leaves you with an uncomfortable twinge of paranoia, +we have another solution for you. There's nothing that says that you +have to have a package variable to hold a class meta-object, either for +monadic classes or for translucent attributes. Just code up the methods +so that they access a lexical instead. + +Here's another implementation of the Vermin class with semantics identical +to those given previously, but this time using no package variables. + + package Vermin; + + + # Here's the class meta-object, eponymously named. + # It holds all class data, and also all instance data + # so the latter can be used for both initialization + # and translucency. it's a template. + my %ClassData = ( + PopCount => 0, # capital for class attributes + color => "beige", # small for instance attributes + ); + + # constructor method + # invoked as class method or object method + sub spawn { + my $obclass = shift; + my $class = ref($obclass) || $obclass; + my $self = {}; + bless($self, $class); + $ClassData{PopCount}++; + # init fields from invoking object, or omit if + # invoking object is the class to provide translucency + %$self = %$obclass if ref $obclass; + return $self; + } + + # translucent accessor for "color" attribute + # invoked as class method or object method + sub color { + my $self = shift; + + # handle class invocation + unless (ref $self) { + $ClassData{color} = shift if @_; + return $ClassData{color} + } + + # handle object invocation + $self->{color} = shift if @_; + if (defined $self->{color}) { # not exists! + return $self->{color}; + } else { + return $ClassData{color}; + } + } + + # class attribute accessor for "PopCount" attribute + # invoked as class method or object method + sub population { + return $ClassData{PopCount}; + } + + # instance destructor; invoked only as object method + sub DESTROY { + $ClassData{PopCount}--; + } + + # detect whether an object attribute is translucent + # (typically?) invoked only as object method + sub is_translucent { + my($self, $attr) = @_; + $self = \%ClassData if !ref $self; + return !defined $self->{$attr}; + } + + # test for presence of attribute in class + # invoked as class method or object method + sub has_attribute { + my($self, $attr) = @_; + return exists $ClassData{$attr}; + } + +=head1 NOTES + +Inheritance is a powerful but subtle device, best used only after careful +forethought and design. Aggregation instead of inheritance is often a +better approach. + +We use the hypothetical our() syntax for package variables. It works +like C, but looks like my(). It should be in this summer's +major release (5.006) of perl--we hope. + +You can't use file-scoped lexicals in conjunction with the SelfLoader +or the AutoLoader, because they alter the lexical scope in which the +module's methods wind up getting compiled. + +The usual mealy-mouthed package-mungeing doubtless applies to setting +up names of object attributes. For example, C<$self-E{ObData1}> +should probably be C<$self-E{ __PACKAGE__ . "_ObData1" }>, but that +would just confuse the examples. + +=head1 SEE ALSO + +L, L, L, and L. + +The Tie::SecureHash module from CPAN is worth checking out. + +=head1 AUTHOR AND COPYRIGHT + +Copyright (c) 1999 Tom Christiansen. +All rights reserved. + +When included as part of the Standard Version of Perl, or as part of +its complete documentation whether printed or otherwise, this work +may be distributed only under the terms of Perl's Artistic License. +Any distribution of this file or derivatives thereof I +of that package require that special arrangements be made with +copyright holder. + +Irrespective of its distribution, all code examples in this file +are hereby placed into the public domain. You are permitted and +encouraged to use this code in your own programs for fun +or for profit as you see fit. A simple comment in the code giving +credit would be courteous but is not required. + +=head1 ACKNOWLEDGEMENTS + +Russ Albery, Jon Orwant, Randy Ray, Larry Rosler, Nat Torkington, +and Stephen Warren all contributed suggestions and corrections to this +piece. Thanks especially to Damian Conway for his ideas and feedback, +and without whose indirect prodding I might never have taken the time +to show others how much Perl has to offer in the way of objects once +you start thinking outside the tiny little box that today's "popular" +object-oriented languages enforce. + +=head1 HISTORY + +Last edit: Fri May 21 15:47:56 MDT 1999 diff --git a/pod/perltrap.pod b/pod/perltrap.pod index 852d8e9..321c86d 100644 --- a/pod/perltrap.pod +++ b/pod/perltrap.pod @@ -22,7 +22,7 @@ The English module, loaded via use English; allows you to refer to special variables (like C<$/>) with names (like -C<$RS>), as though they were in B; see L for details. +$RS), as though they were in B; see L for details. =item * @@ -160,7 +160,7 @@ You must use C rather than C. The C and C keywords from C become in Perl C and C, respectively. -Unlike in C, these do I work within a C construct. +Unlike in C, these do I work within a C construct. =item * @@ -305,7 +305,7 @@ file read is the sole condition in a while loop: =item * -Remember not to use "C<=>" when you need "C<=~>"; +Remember not to use C<=> when you need C<=~>; these two constructs are quite different: $x = /foo/; @@ -1056,7 +1056,7 @@ All types of RE traps. =item * Regular Expression C now does no interpolation on either side. It used to -interpolate C<$lhs> but not C<$rhs>. (And still does not match a literal +interpolate $lhs but not $rhs. (And still does not match a literal '$' in string) $a=1;$b=2; @@ -1095,7 +1095,7 @@ the very first time in any such closure. For instance, if you say } build_match() will always return a sub which matches the contents of -C<$left> and C<$right> as they were the I time that build_match() +$left and $right as they were the I time that build_match() was called, not as they are in the current call. This is probably a bug, and may change in future versions of Perl. @@ -1327,7 +1327,7 @@ Note that you can C to ward off such trappiness under perl5. =item * Interpolation The construct "this is $$x" used to interpolate the pid at that -point, but now apparently tries to dereference C<$x>. C<$$> by itself still +point, but now apparently tries to dereference $x. C<$$> by itself still works fine, however. print "this is $$x\n"; diff --git a/pod/perlvar.pod b/pod/perlvar.pod index 5c851d9..b9c8195 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -17,21 +17,12 @@ at the top of your program. This will alias all the short names to the long names in the current package. Some even have medium names, generally borrowed from B. -Due to an unfortunate accident of Perl's implementation, "C" -imposes a considerable performance penalty on all regular expression -matches in a program, regardless of whether they occur in the scope of -"C". For that reason, saying "C" in -libraries is strongly discouraged. See the Devel::SawAmpersand module -documentation from CPAN -(http://www.perl.com/CPAN/modules/by-module/Devel/Devel-SawAmpersand-0.10.readme) -for more information. - -To go a step further, those variables that depend on the currently -selected filehandle may instead (and preferably) be set by calling an -object method on the FileHandle object. (Summary lines below for this -contain the word HANDLE.) First you must say +If you don't mind the performance hit, variables that depend on the +currently selected filehandle may instead be set by calling an +appropriate object method on the IO::Handle object. (Summary lines +below for this contain the word HANDLE.) First you must say - use FileHandle; + use IO::Handle; after which you may use either @@ -41,11 +32,13 @@ or more safely, HANDLE->method(EXPR) -Each of the methods returns the old value of the FileHandle attribute. +Each of the methods returns the old value of the IO::Handle attribute. The methods each take an optional EXPR, which if supplied specifies the -new value for the FileHandle attribute in question. If not supplied, +new value for the IO::Handle attribute in question. If not supplied, most of the methods do nothing to the current value, except for autoflush(), which will assume a 1 for you, just to be different. +It costs quite a bit to load in the IO::Handle class, so you should +learn how to use the regular built-in variables. A few of these variables are considered "read-only". This means that if you try to assign to this variable, either directly or indirectly through @@ -56,7 +49,6 @@ arrays, then the hashes (except $^M was added in the wrong place). This is somewhat obscured by the fact that %ENV and %SIG are listed as $ENV{expr} and $SIG{expr}. - =over 8 =item $ARG @@ -66,7 +58,7 @@ $ENV{expr} and $SIG{expr}. The default input and pattern-searching space. The following pairs are equivalent: - while (<>) {...} # equivalent in only while! + while (<>) {...} # equivalent only in while! while (defined($_ = <>)) {...} /^Subject:/ @@ -75,8 +67,8 @@ equivalent: tr/a-z/A-Z/ $_ =~ tr/a-z/A-Z/ - chop - chop($_) + chomp + chomp($_) Here are the places where Perl will assume $_ even if you don't use it: @@ -111,7 +103,7 @@ The implicit iterator variable in the grep() and map() functions. The default place to put an input record when a CFHE> operation's result is tested by itself as the sole criterion of a C -test. Note that outside of a C test, this will not happen. +test. Outside of a C test, this will not happen. =back @@ -123,10 +115,11 @@ test. Note that outside of a C test, this will not happen. =item $EIE -Contains the subpattern from the corresponding set of parentheses in -the last pattern matched, not counting patterns matched in nested -blocks that have been exited already. (Mnemonic: like \digits.) -These variables are all read-only. +Contains the subpattern from the corresponding set of capturing +parentheses from the last pattern match, not counting patterns +matched in nested blocks that have been exited already. (Mnemonic: +like \digits.) These variables are all read-only and dynamically +scoped to the current BLOCK. =item $MATCH @@ -134,11 +127,11 @@ These variables are all read-only. The string matched by the last successful pattern match (not counting any matches hidden within a BLOCK or eval() enclosed by the current -BLOCK). (Mnemonic: like & in some editors.) This variable is read-only. +BLOCK). (Mnemonic: like & in some editors.) This variable is read-only +and dynamically scoped to the current BLOCK. The use of this variable anywhere in a program imposes a considerable -performance penalty on all regular expression matches. See the -Devel::SawAmpersand module from CPAN for more information. +performance penalty on all regular expression matches. See L. =item $PREMATCH @@ -150,8 +143,7 @@ enclosed by the current BLOCK). (Mnemonic: C<`> often precedes a quoted string.) This variable is read-only. The use of this variable anywhere in a program imposes a considerable -performance penalty on all regular expression matches. See the -Devel::SawAmpersand module from CPAN for more information. +performance penalty on all regular expression matches. See L. =item $POSTMATCH @@ -166,28 +158,27 @@ string.) Example: /def/; print "$`:$&:$'\n"; # prints abc:def:ghi -This variable is read-only. +This variable is read-only and dynamically scoped to the current BLOCK. The use of this variable anywhere in a program imposes a considerable -performance penalty on all regular expression matches. See the -Devel::SawAmpersand module from CPAN for more information. +performance penalty on all regular expression matches. See L. =item $LAST_PAREN_MATCH =item $+ The last bracket matched by the last search pattern. This is useful if -you don't know which of a set of alternative patterns matched. For +you don't know which one of a set of alternative patterns matched. For example: /Version: (.*)|Revision: (.*)/ && ($rev = $+); (Mnemonic: be positive and forward looking.) -This variable is read-only. +This variable is read-only and dynamically scoped to the current BLOCK. =item @+ -$+[0] is the offset of the end of the last successfull match. +$+[0] is the offset of the end of the last successful match. C<$+[>IC<]> is the offset of the end of the substring matched by I-th subpattern, or undef if the subpattern did not match. @@ -196,7 +187,7 @@ $+[0] - $-[0]>. Similarly, C<$>I coincides with CIC<], $+[>IC<] - $-[>IC<]> if C<$-[>IC<]> is defined, and $+ coincides with C. One can use C<$#+> to find the number of subgroups in the last successful match. Note the difference with -C<$#->, which is the last I subgroup. Compare with L<"@-">. +C<$#->, which is the last I subgroup. Compare with C<@->. =item $MULTILINE_MATCHING @@ -205,12 +196,12 @@ C<$#->, which is the last I subgroup. Compare with L<"@-">. Set to 1 to do multi-line matching within a string, 0 to tell Perl that it can assume that strings contain a single line, for the purpose of optimizing pattern matches. Pattern matches on strings containing -multiple newlines can produce confusing results when "C<$*>" is 0. Default -is 0. (Mnemonic: * matches multiple things.) Note that this variable -influences the interpretation of only "C<^>" and "C<$>". A literal newline can +multiple newlines can produce confusing results when C<$*> is 0. Default +is 0. (Mnemonic: * matches multiple things.) This variable +influences the interpretation of only C<^> and C<$>. A literal newline can be searched for even when C<$* == 0>. -Use of "C<$*>" is deprecated in modern Perls, supplanted by +Use of C<$*> is deprecated in modern Perl, supplanted by the C and C modifiers on pattern matching. =item input_line_number HANDLE EXPR @@ -221,17 +212,16 @@ the C and C modifiers on pattern matching. =item $. -The current input line number for the last file handle from -which you read (or performed a C or C on). The value +The current input record number for the last file handle from which +you just read() (or performed a C or C on). The value may be different from the actual physical line number in the file, -depending on what notion of "line" is in effect--see L<$/> on how -to affect that. An -explicit close on a filehandle resets the line number. Because -"CE>" never does an explicit close, line numbers increase -across ARGV files (but see examples under eof()). Localizing C<$.> has -the effect of also localizing Perl's notion of "the last read -filehandle". (Mnemonic: many programs use "." to mean the current line -number.) +depending on what notion of "line" is in effect--see C<$/> on how +to change that. An explicit close on a filehandle resets the line +number. Because CE> never does an explicit close, line +numbers increase across ARGV files (but see examples in L). +Consider this variable read-only: setting it does not reposition +the seek pointer; you'll have to do that on your own. (Mnemonic: +many programs use "." to mean the current line number.) =item input_record_separator HANDLE EXPR @@ -243,46 +233,48 @@ number.) The input record separator, newline by default. This is used to influence Perl's idea of what a "line" is. Works like B's RS -variable, including treating empty lines as delimiters if set to the -null string. (Note: An empty line cannot contain any spaces or tabs.) -You may set it to a multi-character string to match a multi-character -delimiter, or to C to read to end of file. Note that setting it -to C<"\n\n"> means something slightly different than setting it to -C<"">, if the file contains consecutive empty lines. Setting it to -C<""> will treat two or more consecutive empty lines as a single empty -line. Setting it to C<"\n\n"> will blindly assume that the next input -character belongs to the next paragraph, even if it's a newline. -(Mnemonic: / is used to delimit line boundaries when quoting poetry.) +variable, including treating empty lines as a terminator if set to +the null string. (Note: An empty line cannot contain any spaces +or tabs.) You may set it to a multi-character string to match a +multi-character terminator, or to C to read through the end +of file. Setting it to C<"\n\n"> means something slightly +different than setting to C<"">, if the file contains consecutive +empty lines. Setting to C<""> will treat two or more consecutive +empty lines as a single empty line. Setting to C<"\n\n"> will +blindly assume that the next input character belongs to the next +paragraph, even if it's a newline. (Mnemonic: / is used to delimit +line boundaries when quoting poetry.) undef $/; # enable "slurp" mode $_ = ; # whole file now here s/\n[ \t]+/ /g; -Remember: the value of $/ is a string, not a regexp. AWK has to be -better for something :-) +Remember: the value of C<$/> is a string, not a regex. B has to be +better for something. :-) -Setting $/ to a reference to an integer, scalar containing an integer, or -scalar that's convertable to an integer will attempt to read records +Setting C<$/> to a reference to an integer, scalar containing an integer, or +scalar that's convertible to an integer will attempt to read records instead of lines, with the maximum record size being the referenced -integer. So this: +integer. So this: $/ = \32768; # or \"32768", or \$var_containing_32768 open(FILE, $myfile); $_ = ; -will read a record of no more than 32768 bytes from FILE. If you're not -reading from a record-oriented file (or your OS doesn't have -record-oriented files), then you'll likely get a full chunk of data with -every read. If a record is larger than the record size you've set, you'll -get the record back in pieces. +will read a record of no more than 32768 bytes from FILE. If you're +not reading from a record-oriented file (or your OS doesn't have +record-oriented files), then you'll likely get a full chunk of data +with every read. If a record is larger than the record size you've +set, you'll get the record back in pieces. -On VMS, record reads are done with the equivalent of C, so it's -best not to mix record and non-record reads on the same file. (This is -likely not a problem, as any file you'd want to read in record mode is -probably usable in line mode) Non-VMS systems perform normal I/O, so -it's safe to mix record and non-record reads of a file. +On VMS, record reads are done with the equivalent of C, +so it's best not to mix record and non-record reads on the same +file. (This is unlikely to be a problem, because any file you'd +want to read in record mode is probably usable in line mode.) +Non-VMS systems perform normal I/O, so it's safe to mix record and +non-record reads of a file. -Also see L<$.>. +Also see C<$.>. =item autoflush HANDLE EXPR @@ -290,16 +282,17 @@ Also see L<$.>. =item $| -If set to nonzero, forces a flush right away and after every write or print on the -currently selected output channel. Default is 0 (regardless of whether -the channel is actually buffered by the system or not; C<$|> tells you -only whether you've asked Perl explicitly to flush after each write). -Note that STDOUT will typically be line buffered if output is to the -terminal and block buffered otherwise. Setting this variable is useful -primarily when you are outputting to a pipe, such as when you are running -a Perl script under rsh and want to see the output as it's happening. This -has no effect on input buffering. -(Mnemonic: when you want your pipes to be piping hot.) +If set to nonzero, forces a flush right away and after every write +or print on the currently selected output channel. Default is 0 +(regardless of whether the channel is actually buffered by the +system or not; C<$|> tells you only whether you've asked Perl +explicitly to flush after each write). STDOUT will +typically be line buffered if output is to the terminal and block +buffered otherwise. Setting this variable is useful primarily when +you are outputting to a pipe or socket, such as when you are running +a Perl program under B and want to see the output as it's +happening. This has no effect on input buffering. See L +for that. (Mnemonic: when you want your pipes to be piping hot.) =item output_field_separator HANDLE EXPR @@ -310,11 +303,11 @@ has no effect on input buffering. =item $, The output field separator for the print operator. Ordinarily the -print operator simply prints out the comma-separated fields you -specify. To get behavior more like B, set this variable -as you would set B's OFS variable to specify what is printed -between fields. (Mnemonic: what is printed when there is a , in your -print statement.) +print operator simply prints out its arguments without further +adornment. To get behavior more like B, set this variable as +you would set B's OFS variable to specify what is printed +between fields. (Mnemonic: what is printed when there is a "," in +your print statement.) =item output_record_separator HANDLE EXPR @@ -325,21 +318,21 @@ print statement.) =item $\ The output record separator for the print operator. Ordinarily the -print operator simply prints out the comma-separated fields you -specify, with no trailing newline or record separator assumed. -To get behavior more like B, set this variable as you would -set B's ORS variable to specify what is printed at the end of the -print. (Mnemonic: you set "C<$\>" instead of adding \n at the end of the -print. Also, it's just like C<$/>, but it's what you get "back" from -Perl.) +print operator simply prints out its arguments as is, with no +trailing newline or other end-of-record string added. To get +behavior more like B, set this variable as you would set +B's ORS variable to specify what is printed at the end of the +print. (Mnemonic: you set C<$\> instead of adding "\n" at the +end of the print. Also, it's just like C<$/>, but it's what you +get "back" from Perl.) =item $LIST_SEPARATOR =item $" -This is like "C<$,>" except that it applies to array values interpolated -into a double-quoted string (or similar interpreted string). Default -is a space. (Mnemonic: obvious, I think.) +This is like C<$,> except that it applies to array and slice values +interpolated into a double-quoted string (or similar interpreted +string). Default is a space. (Mnemonic: obvious, I think.) =item $SUBSCRIPT_SEPARATOR @@ -364,13 +357,14 @@ which means ($foo{$a},$foo{$b},$foo{$c}) -Default is "\034", the same as SUBSEP in B. Note that if your -keys contain binary data there might not be any safe value for "C<$;>". +Default is "\034", the same as SUBSEP in B. If your +keys contain binary data there might not be any safe value for C<$;>. (Mnemonic: comma (the syntactic subscript separator) is a -semi-semicolon. Yeah, I know, it's pretty lame, but "C<$,>" is already +semi-semicolon. Yeah, I know, it's pretty lame, but C<$,> is already taken for something more important.) -Consider using "real" multidimensional arrays. +Consider using "real" multidimensional arrays as described +in L. =item $OFMT @@ -379,12 +373,12 @@ Consider using "real" multidimensional arrays. The output format for printed numbers. This variable is a half-hearted attempt to emulate B's OFMT variable. There are times, however, when B and Perl have differing notions of what is in fact -numeric. The initial value is %.Ig, where I is the value +numeric. The initial value is "%.Ig", where I is the value of the macro DBL_DIG from your system's F. This is different from -B's default OFMT setting of %.6g, so you need to set "C<$#>" +B's default OFMT setting of "%.6g", so you need to set C<$#> explicitly to get B's value. (Mnemonic: # is the number sign.) -Use of "C<$#>" is deprecated. +Use of C<$#> is deprecated. =item format_page_number HANDLE EXPR @@ -393,6 +387,7 @@ Use of "C<$#>" is deprecated. =item $% The current page number of the currently selected output channel. +Used with formats. (Mnemonic: % is page number in B.) =item format_lines_per_page HANDLE EXPR @@ -402,7 +397,9 @@ The current page number of the currently selected output channel. =item $= The current page length (printable lines) of the currently selected -output channel. Default is 60. (Mnemonic: = has horizontal lines.) +output channel. Default is 60. +Used with formats. +(Mnemonic: = has horizontal lines.) =item format_lines_left HANDLE EXPR @@ -411,11 +408,13 @@ output channel. Default is 60. (Mnemonic: = has horizontal lines.) =item $- The number of lines left on the page of the currently selected output -channel. (Mnemonic: lines_on_page - lines_printed.) +channel. +Used with formats. +(Mnemonic: lines_on_page - lines_printed.) =item @- -$-[0] is the offset of the start of the last successfull match. +$-[0] is the offset of the start of the last successful match. C<$-[>IC<]> is the offset of the start of the substring matched by I-th subpattern, or undef if the subpattern did not match. @@ -425,7 +424,7 @@ $+[>IC<] - $-[>IC<]> if C<$-[>IC<]> is defined, and $+ coincides with C. One can use C<$#-> to find the last matched subgroup in the last successful match. Note the difference with C<$#+>, which is the number of subgroups in the regular expression. Compare -with L<"@+">. +with C<@+>. =item format_name HANDLE EXPR @@ -435,7 +434,7 @@ with L<"@+">. The name of the current report format for the currently selected output channel. Default is name of the filehandle. (Mnemonic: brother to -"C<$^>".) +C<$^>.) =item format_top_name HANDLE EXPR @@ -471,7 +470,7 @@ What formats output to perform a form feed. Default is \f. =item $^A The current value of the write() accumulator for format() lines. A format -contains formline() commands that put their result into C<$^A>. After +contains formline() calls that put their result into C<$^A>. After calling its format, write() prints out the contents of C<$^A> and empties. So you never actually see the contents of C<$^A> unless you call formline() yourself and then look at it. See L and @@ -482,21 +481,27 @@ L. =item $? The status returned by the last pipe close, backtick (C<``>) command, -or system() operator. Note that this is the status word returned by the -wait() system call (or else is made up to look like it). Thus, the exit -value of the subprocess is actually (C<$? EE 8>), and C<$? & 127> -gives which signal, if any, the process died from, and C<$? & 128> reports -whether there was a core dump. (Mnemonic: similar to B and B.) +successful call to wait() or waitpid(), or from the system() +operator. This is just the 16-bit status word returned by the +wait() system call (or else is made up to look like it). Thus, the +exit value of the subprocess is actually (C<$? EE 8>), and +C<$? & 127> gives which signal, if any, the process died from, and +C<$? & 128> reports whether there was a core dump. (Mnemonic: +similar to B and B.) Additionally, if the C variable is supported in C, its value is returned via $? if any of the C functions fail. -Note that if you have installed a signal handler for C, the +If you have installed a signal handler for C, the value of C<$?> will usually be wrong outside that handler. Inside an C subroutine C<$?> contains the value that is going to be given to C. You can modify C<$?> in an C subroutine to -change the exit status of the script. +change the exit status of your program. For example: + + END { + $? = 1 if $? == 255; # die would make it 255 + } Under VMS, the pragma C makes C<$?> reflect the actual VMS exit status, instead of the default emulation of POSIX @@ -510,14 +515,15 @@ Also see L. =item $! -If used in a numeric context, yields the current value of errno, with -all the usual caveats. (This means that you shouldn't depend on the -value of C<$!> to be anything in particular unless you've gotten a -specific error return indicating a system error.) If used in a string -context, yields the corresponding system error string. You can assign -to C<$!> to set I if, for instance, you want C<"$!"> to return the -string for error I, or you want to set the exit value for the die() -operator. (Mnemonic: What just went bang?) +If used numerically, yields the current value of the C C +variable, with all the usual caveats. (This means that you shouldn't +depend on the value of C<$!> to be anything in particular unless +you've gotten a specific error return indicating a system error.) +If used an a string, yields the corresponding system error string. +You can assign a number to C<$!> to set I if, for instance, +you want C<"$!"> to return the string for error I, or you want +to set the exit value for the die() operator. (Mnemonic: What just +went bang?) Also see L. @@ -541,7 +547,7 @@ OS/2 API either via CRT, or directly from perl. Under Win32, C<$^E> always returns the last error information reported by the Win32 call C which describes the last error from within the Win32 API. Most Win32-specific -code will report errors via C<$^E>. ANSI C and UNIX-like calls +code will report errors via C<$^E>. ANSI C and Unix-like calls set C and so most portable Perl code will report errors via C<$!>. @@ -554,12 +560,12 @@ Also see L. =item $@ -The Perl syntax error message from the last eval() command. If null, the +The Perl syntax error message from the last eval() operator. If null, the last eval() parsed and executed correctly (although the operations you invoked may have failed in the normal fashion). (Mnemonic: Where was the syntax error "at"?) -Note that warning messages are not collected in this variable. You can, +Warning messages are not collected in this variable. You can, however, set up a routine to process warnings by setting C<$SIG{__WARN__}> as described below. @@ -571,8 +577,9 @@ Also see L. =item $$ -The process number of the Perl running this script. (Mnemonic: same -as shells.) +The process number of the Perl running this script. You should +consider this variable read-only, although it will be altered +across fork() calls. (Mnemonic: same as shells.) =item $REAL_USER_ID @@ -580,7 +587,7 @@ as shells.) =item $< -The real uid of this process. (Mnemonic: it's the uid you came I, +The real uid of this process. (Mnemonic: it's the uid you came I, if you're running setuid.) =item $EFFECTIVE_USER_ID @@ -594,8 +601,8 @@ The effective uid of this process. Example: $< = $>; # set real to effective uid ($<,$>) = ($>,$<); # swap real and effective uid -(Mnemonic: it's the uid you went I, if you're running setuid.) -Note: "C<$E>" and "C<$E>" can be swapped only on machines +(Mnemonic: it's the uid you went I, if you're running setuid.) +Note: C<$E> and C<$E> can be swapped only on machines supporting setreuid(). =item $REAL_GROUP_ID @@ -610,12 +617,12 @@ list of groups you are in. The first number is the one returned by getgid(), and the subsequent ones by getgroups(), one of which may be the same as the first number. -However, a value assigned to "C<$(>" must be a single number used to -set the real gid. So the value given by "C<$(>" should I be assigned -back to "C<$(>" without being forced numeric, such as by adding zero. +However, a value assigned to C<$(> must be a single number used to +set the real gid. So the value given by C<$(> should I be assigned +back to C<$(> without being forced numeric, such as by adding zero. -(Mnemonic: parentheses are used to I things. The real gid is the -group you I, if you're running setgid.) +(Mnemonic: parentheses are used to I things. The real gid is the +group you I, if you're running setgid.) =item $EFFECTIVE_GROUP_ID @@ -629,42 +636,41 @@ separated list of groups you are in. The first number is the one returned by getegid(), and the subsequent ones by getgroups(), one of which may be the same as the first number. -Similarly, a value assigned to "C<$)>" must also be a space-separated +Similarly, a value assigned to C<$)> must also be a space-separated list of numbers. The first number is used to set the effective gid, and the rest (if any) are passed to setgroups(). To get the effect of an empty list for setgroups(), just repeat the new effective gid; that is, to force an effective gid of 5 and an effectively empty setgroups() list, say C< $) = "5 5" >. -(Mnemonic: parentheses are used to I things. The effective gid -is the group that's I for you, if you're running setgid.) +(Mnemonic: parentheses are used to I things. The effective gid +is the group that's I for you, if you're running setgid.) -Note: "C<$E>", "C<$E>", "C<$(>" and "C<$)>" can be set only on -machines that support the corresponding I routine. "C<$(>" -and "C<$)>" can be swapped only on machines supporting setregid(). +Note: C<$E>, C<$E>, C<$(> and C<$)> can be set only on +machines that support the corresponding I routine. C<$(> +and C<$)> can be swapped only on machines supporting setregid(). =item $PROGRAM_NAME =item $0 -Contains the name of the file containing the Perl script being -executed. On some operating systems -assigning to "C<$0>" modifies the argument area that the ps(1) -program sees. This is more useful as a way of indicating the -current program state than it is for hiding the program you're running. +Contains the name of the program being executed. On some operating +systems assigning to C<$0> modifies the argument area that the B +program sees. This is more useful as a way of indicating the current +program state than it is for hiding the program you're running. (Mnemonic: same as B and B.) =item $[ The index of the first element in an array, and of the first character -in a substring. Default is 0, but you could set it to 1 to make -Perl behave more like B (or Fortran) when subscripting and when -evaluating the index() and substr() functions. (Mnemonic: [ begins -subscripts.) +in a substring. Default is 0, but you could theoretically set it +to 1 to make Perl behave more like B (or Fortran) when +subscripting and when evaluating the index() and substr() functions. +(Mnemonic: [ begins subscripts.) -As of Perl 5, assignment to "C<$[>" is treated as a compiler directive, -and cannot influence the behavior of any other file. Its use is -discouraged. +As of release 5 of Perl, assignment to C<$[> is treated as a compiler +directive, and cannot influence the behavior of any other file. +Its use is highly discouraged. =item $PERL_VERSION @@ -678,16 +684,17 @@ of perl in the right bracket?) Example: warn "No checksumming!\n" if $] < 3.019; See also the documentation of C and C -for a convenient way to fail if the Perl interpreter is too old. +for a convenient way to fail if the running Perl interpreter is too old. =item $COMPILING =item $^C -The current value of the flag associated with the B<-c> switch. Mainly -of use with B<-MO=...> to allow code to alter its behaviour when being compiled. -(For example to automatically AUTOLOADing at compile time rather than normal -deferred loading.) Setting C<$^C = 1> is similar to calling C. +The current value of the flag associated with the B<-c> switch. +Mainly of use with B<-MO=...> to allow code to alter its behavior +when being compiled, such as for example to AUTOLOAD at compile +time rather than normal, deferred loading. See L. Setting +C<$^C = 1> is similar to calling C. =item $DEBUGGING @@ -704,7 +711,7 @@ The maximum system file descriptor, ordinarily 2. System file descriptors are passed to exec()ed processes, while higher file descriptors are not. Also, during an open(), system file descriptors are preserved even if the open() fails. (Ordinary file descriptors are -closed before the open() is attempted.) Note that the close-on-exec +closed before the open() is attempted.) The close-on-exec status of a file descriptor will be decided according to the value of C<$^F> when the open() or pipe() was called, not the time of the exec(). @@ -722,17 +729,18 @@ inplace editing. (Mnemonic: value of B<-i> switch.) =item $^M -By default, running out of memory it is not trappable. However, if -compiled for this, Perl may use the contents of C<$^M> as an emergency -pool after die()ing with this message. Suppose that your Perl were -compiled with -DPERL_EMERGENCY_SBRK and used Perl's malloc. Then +By default, running out of memory is an untrappable, fatal error. +However, if suitably built, Perl can use the contents of C<$^M> +as an emergency memory pool after die()ing. Suppose that your Perl +were compiled with -DPERL_EMERGENCY_SBRK and used Perl's malloc. +Then - $^M = 'a' x (1<<16); + $^M = 'a' x (1 << 16); -would allocate a 64K buffer for use when in emergency. See the F -file for information on how to enable this option. As a disincentive to -casual use of this advanced feature, there is no L long name for -this variable. +would allocate a 64K buffer for use when in emergency. See the +F file in the Perl distribution for information on how to +enable this option. To discourage casual use of this advanced +feature, there is no L long name for this variable. =item $OSNAME @@ -740,14 +748,15 @@ this variable. The name of the operating system under which this copy of Perl was built, as determined during the configuration process. The value -is identical to C<$Config{'osname'}>. +is identical to C<$Config{'osname'}>. See also L and the +B<-V> command-line switch documented in L. =item $PERLDB =item $^P -The internal variable for debugging support. Different bits mean the -following (subject to change): +The internal variable for debugging support. The meanings of the +various bits are subject to change, but currently indicate: =over 6 @@ -777,42 +786,42 @@ Start with single-step on. =back -Note that some bits may be relevant at compile-time only, some at -run-time only. This is a new mechanism and the details may change. +Some bits may be relevant at compile-time only, some at +run-time only. This is a new mechanism and the details may change. =item $^R -The result of evaluation of the last successful L> -regular expression assertion. (Excluding those used as switches.) May -be written to. +The result of evaluation of the last successful C<(?{ code })> +regular expression assertion (see L). May be written to. =item $^S Current state of the interpreter. Undefined if parsing of the current module/eval is not finished (may happen in $SIG{__DIE__} and -$SIG{__WARN__} handlers). True if inside an eval, otherwise false. +$SIG{__WARN__} handlers). True if inside an eval(), otherwise false. =item $BASETIME =item $^T -The time at which the script began running, in seconds since the +The time at which the program began running, in seconds since the epoch (beginning of 1970). The values returned by the B<-M>, B<-A>, -and B<-C> filetests are -based on this value. +and B<-C> filetests are based on this value. =item $WARNING =item $^W -The current value of the warning switch, either TRUE or FALSE. -(Mnemonic: related to the B<-w> switch.) +The current value of the warning switch, initially true if B<-w> +was used, false otherwise, but directly modifiable. (Mnemonic: +related to the B<-w> switch.) See also L. =item $EXECUTABLE_NAME =item $^X The name that the Perl binary itself was executed as, from C's C. +This may not be a full pathname, nor even necessarily in your path. =item $ARGV @@ -820,20 +829,21 @@ contains the name of the current file when reading from EE. =item @ARGV -The array @ARGV contains the command line arguments intended for the -script. Note that C<$#ARGV> is the generally number of arguments minus -one, because C<$ARGV[0]> is the first argument, I the command name. See -"C<$0>" for the command name. +The array @ARGV contains the command-line arguments intended for +the script. C<$#ARGV> is the generally number of arguments minus +one, because C<$ARGV[0]> is the first argument, I the program's +command name itself. See C<$0> for the command name. =item @INC -The array @INC contains the list of places to look for Perl scripts to -be evaluated by the C, C, or C constructs. It -initially consists of the arguments to any B<-I> command line switches, -followed by the default Perl library, probably F, -followed by ".", to represent the current directory. If you need to -modify this at runtime, you should use the C pragma -to get the machine-dependent library properly loaded also: +The array @INC contains the list of places that the C, +C, or C constructs look for their library files. It +initially consists of the arguments to any B<-I> command-line +switches, followed by the default Perl library, probably +F, followed by ".", to represent the current +directory. If you need to modify this at runtime, you should use +the C pragma to get the machine-dependent library properly +loaded also: use lib '/mypath/libdir/'; use SomeMod; @@ -841,29 +851,31 @@ to get the machine-dependent library properly loaded also: =item @_ Within a subroutine the array @_ contains the parameters passed to that -subroutine. See L. +subroutine. See L. =item %INC -The hash %INC contains entries for each filename that has -been included via C or C. The key is the filename you -specified, and the value is the location of the file actually found. -The C command uses this array to determine whether a given file -has already been included. +The hash %INC contains entries for each filename included via the +C, C, or C operators. The key is the filename +you specified (with module names converted to pathnames), and the +value is the location of the file actually found. The C +operator uses this array to determine whether a particular file has +already been included. =item %ENV =item $ENV{expr} The hash %ENV contains your current environment. Setting a -value in C changes the environment for child processes. +value in C changes the environment for any child processes +you subsequently fork() off. =item %SIG =item $SIG{expr} The hash %SIG is used to set signal handlers for various -signals. Example: +signals. For example: sub handler { # 1st argument is signal name my($sig) = @_; @@ -875,30 +887,27 @@ signals. Example: $SIG{'INT'} = \&handler; $SIG{'QUIT'} = \&handler; ... - $SIG{'INT'} = 'DEFAULT'; # restore default action + $SIG{'INT'} = 'DEFAULT'; # restore default action $SIG{'QUIT'} = 'IGNORE'; # ignore SIGQUIT Using a value of C<'IGNORE'> usually has the effect of ignoring the signal, except for the C signal. See L for more about this special case. -The %SIG array contains values for only the signals actually set within -the Perl script. Here are some other examples: +Here are some other examples: - $SIG{"PIPE"} = Plumber; # SCARY!! $SIG{"PIPE"} = "Plumber"; # assumes main::Plumber (not recommended) $SIG{"PIPE"} = \&Plumber; # just fine; assume current Plumber + $SIG{"PIPE"} = *Plumber; # somewhat esoteric $SIG{"PIPE"} = Plumber(); # oops, what did Plumber() return?? -The one marked scary is problematic because it's a bareword, which means -sometimes it's a string representing the function, and sometimes it's -going to call the subroutine call right then and there! Best to be sure -and quote it or take a reference to it. *Plumber works too. See L. +Be sure not to use a bareword as the name of a signal handler, +lest you inadvertently call it. If your system has the sigaction() function then signal handlers are installed using it. This means you get reliable signal handling. If your system has the SA_RESTART flag it is used when signals handlers are -installed. This means that system calls for which it is supported +installed. This means that system calls for which restarting is supported continue rather than returning when a signal arrives. If you want your system calls to be interrupted by signal delivery then do something like this: @@ -929,16 +938,20 @@ unless the hook routine itself exits via a C, a loop exit, or a die(). The C<__DIE__> handler is explicitly disabled during the call, so that you can die from a C<__DIE__> handler. Similarly for C<__WARN__>. -Note that the C<$SIG{__DIE__}> hook is called even inside eval()ed -blocks/strings. See L and L for how to -circumvent this. - -Note that C<__DIE__>/C<__WARN__> handlers are very special in one -respect: they may be called to report (probable) errors found by the -parser. In such a case the parser may be in inconsistent state, so -any attempt to evaluate Perl code from such a handler will probably -result in a segfault. This means that calls which result/may-result -in parsing Perl should be used with extreme caution, like this: +Due to an implementation glitch, the C<$SIG{__DIE__}> hook is called +even inside an eval(). Do not use this to rewrite a pending exception +in C<$@>, or as a bizarre substitute for overriding CORE::GLOBAL::die(). +This strange action at a distance may be fixed in a future release +so that C<$SIG{__DIE__}> is only called if your program is about +to exit, as was the original intent. Any other use is deprecated. + +C<__DIE__>/C<__WARN__> handlers are very special in one respect: +they may be called to report (probable) errors found by the parser. +In such a case the parser may be in inconsistent state, so any +attempt to evaluate Perl code from such a handler will probably +result in a segfault. This means that warnings or errors that +result from parsing Perl should be used with extreme caution, like +this: require Carp if defined $^S; Carp::confess("Something wrong") if defined &Carp::confess; @@ -950,94 +963,94 @@ called the handler. The second line will print backtrace and die if Carp was available. The third line will be executed only if Carp was not available. -See L, L and L for -additional info. +See L, L, L, and +L for additional information. =back =head2 Error Indicators -The variables L<$@>, L<$!>, L<$^E>, and L<$?> contain information about -different types of error conditions that may appear during execution of -Perl script. The variables are shown ordered by the "distance" between -the subsystem which reported the error and the Perl process, and -correspond to errors detected by the Perl interpreter, C library, -operating system, or an external program, respectively. +The variables C<$@>, C<$!>, C<$^E>, and C<$?> contain information +about different types of error conditions that may appear during +execution of a Perl program. The variables are shown ordered by +the "distance" between the subsystem which reported the error and +the Perl process. They correspond to errors detected by the Perl +interpreter, C library, operating system, or an external program, +respectively. To illustrate the differences between these variables, consider the -following Perl expression: +following Perl expression, which uses a single-quoted string: - eval ' - open PIPE, "/cdrom/install |"; - @res = ; - close PIPE or die "bad pipe: $?, $!"; - '; + eval q{ + open PIPE, "/cdrom/install |"; + @res = ; + close PIPE or die "bad pipe: $?, $!"; + }; After execution of this statement all 4 variables may have been set. -$@ is set if the string to be C-ed did not compile (this may happen if -C or C were imported with bad prototypes), or if Perl -code executed during evaluation die()d (either implicitly, say, -if C was imported from module L, or the C after -C was triggered). In these cases the value of $@ is the compile -error, or C error (which will interpolate C<$!>!), or the argument -to C (which will interpolate C<$!> and C<$?>!). - -When the above expression is executed, open(), C<>, and C -are translated to C run-time library calls. $! is set if one of these -calls fails. The value is a symbolic indicator chosen by the C run-time -library, say C. - -On some systems the above C library calls are further translated -to calls to the kernel. The kernel may have set more verbose error -indicator that one of the handful of standard C errors. In such cases $^E -contains this verbose error indicator, which may be, say, C. On systems where C library calls are identical to system calls -$^E is a duplicate of $!. - -Finally, $? may be set to non-C<0> value if the external program -C fails. Upper bits of the particular value may reflect -specific error conditions encountered by this program (this is -program-dependent), lower-bits reflect mode of failure (segfault, completion, -etc.). Note that in contrast to $@, $!, and $^E, which are set only -if error condition is detected, the variable $? is set on each C or -pipe C, overwriting the old value. - -For more details, see the individual descriptions at L<$@>, L<$!>, L<$^E>, -and L<$?>. +C<$@> is set if the string to be C-ed did not compile (this +may happen if C or C were imported with bad prototypes), +or if Perl code executed during evaluation die()d . In these cases +the value of $@ is the compile error, or the argument to C +(which will interpolate C<$!> and C<$?>!). (See also L, +though.) + +When the eval() expression above is executed, open(), C<>, +and C are translated to calls in the C run-time library and +thence to the operating system kernel. C<$!> is set to the C library's +C if one of these calls fails. + +Under a few operating systems, C<$^E> may contain a more verbose +error indicator, such as in this case, "CDROM tray not closed." +Systems that do not support extended error mesages leave $C<$^E> +the same as C<$!>. + +Finally, C<$?> may be set to non-0 value if the external program +F fails. The upper eight bits reflect specific +error conditions encountered by the program (the program's exit() +value). The lower eight bits reflect mode of failure, like signal +death and core dump information See wait(2) for details. In +contrast to C<$!> and C<$^E>, which are set only if error condition +is detected, the variable C<$?> is set on each C or pipe +C, overwriting the old value. This is more like C<$@>, which +on every eval() is always set on failure and cleared on success. +For more details, see the individual descriptions at C<$@>, C<$!>, C<$^E>, +and C<$?>. =head2 Technical Note on the Syntax of Variable Names -Variable names in Perl can have several formats. Usually, they must -begin with a letter or underscore, in which case they can be -arbitrarily long (up to an internal limit of 256 characters) and may -contain letters, digits, underscores, or the special sequence C<::>. -In this case the part before the last C<::> is taken to be a I; see L. +Variable names in Perl can have several formats. Usually, they +must begin with a letter or underscore, in which case they can be +arbitrarily long (up to an internal limit of 251 characters) and +may contain letters, digits, underscores, or the special sequence +C<::> or C<'>. In this case, the part before the last C<::> or +C<'> is taken to be a I; see L. Perl variable names may also be a sequence of digits or a single punctuation or control character. These names are all reserved for -special uses by Perl; for example, the all-digits names are used to -hold backreferences after a regulare expression match. Perl has a -special syntax for the single-control-character names: It understands -C<^X> (caret C) to mean the control-C character. For example, -the notation C<$^W> (dollar-sign caret C) is the scalar variable -whose name is the single character control-C. This is better than -typing a literal control-C into your program. +special uses by Perl; for example, the all-digits names are used +to hold data captured by backreferences after a regular expression +match. Perl has a special syntax for the single-control-character +names: It understands C<^X> (caret C) to mean the control-C +character. For example, the notation C<$^W> (dollar-sign caret +C) is the scalar variable whose name is the single character +control-C. This is better than typing a literal control-C +into your program. Finally, new in Perl 5.006, Perl variable names may be alphanumeric -strings that begin with control characters. These variables must be -written in the form C<${^Foo}>; the braces are not optional. -C<${^Foo}> denotes the scalar variable whose name is a control-C -followed by two C's. These variables are reserved for future -special uses by Perl, except for the ones that begin with C<^_> -(control-underscore). No control-character name that begins with -C<^_> will acquire a special meaning in any future version of Perl; -such names may therefore be used safely in programs. C<^_> itself, -however, I reserved. - -All Perl variables that begin with digits, control characters, or +strings that begin with control characters (or better yet, a caret). +These variables must be written in the form C<${^Foo}>; the braces +are not optional. C<${^Foo}> denotes the scalar variable whose +name is a control-C followed by two C's. These variables are +reserved for future special uses by Perl, except for the ones that +begin with C<^_> (control-underscore or caret-underscore). No +control-character name that begins with C<^_> will acquire a special +meaning in any future version of Perl; such names may therefore be +used safely in programs. C<$^_> itself, however, I reserved. + +Perl identifiers that begin with digits, control characters, or punctuation characters are exempt from the effects of the C declaration and are always forced to be in package C
. A few other names are also exempt: @@ -1049,7 +1062,21 @@ other names are also exempt: SIG In particular, the new special C<${^_XYZ}> variables are always taken -to be in package C
regardless of any C declarations +to be in package C
, regardless of any C declarations presently in scope. +=head1 BUGS + +Due to an unfortunate accident of Perl's implementation, C imposes a considerable performance penalty on all regular +expression matches in a program, regardless of whether they occur +in the scope of C. For that reason, saying C in libraries is strongly discouraged. See the +Devel::SawAmpersand module documentation from CPAN +(http://www.perl.com/CPAN/modules/by-module/Devel/Devel-SawAmpersand-0.10.readme) +for more information. +Having to even think about the C<$^S> variable in your exception +handlers is simply wrong. C<$SIG{__DIE__}> as currently implemented +invites grievous and difficult to track down errors. Avoid it +and use an C or CORE::GLOBAL::die override instead. diff --git a/pod/perlxs.pod b/pod/perlxs.pod index 98a9834..ee582e0 100644 --- a/pod/perlxs.pod +++ b/pod/perlxs.pod @@ -367,8 +367,8 @@ The following code demonstrates how to supply initialization code for function parameters. The initialization code is eval'd within double quotes by the compiler before it is added to the output so anything which should be interpreted literally [mainly C<$>, C<@>, or C<\\>] -must be protected with backslashes. The variables C<$var>, C<$arg>, -and C<$type> can be used as in typemaps. +must be protected with backslashes. The variables $var, $arg, +and $type can be used as in typemaps. bool_t rpcb_gettime(host,timep) diff --git a/pod/pod2man.PL b/pod/pod2man.PL index a673ea1..20610a8 100644 --- a/pod/pod2man.PL +++ b/pod/pod2man.PL @@ -785,7 +785,7 @@ while (<>) { } {I<$1>\\|$2}gx; # convert simple variable references - s/(\s+)([\$\@%][\w:]+)(?!\()/${1}C<$2>/g; + s/(\s+)([\$\@%&*][\w:]+)(?!\()/${1}C<$2>/g; if (m{ ( [\-\w]+